~bzr-pqm/bzr/bzr.dev

2052.3.2 by John Arbash Meinel
Change Copyright .. by Canonical to Copyright ... Canonical
1
# Copyright (C) 2006 Canonical Ltd
1911.2.1 by John Arbash Meinel
Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate
2
#
3
# This program is free software; you can redistribute it and/or modify
2052.3.1 by John Arbash Meinel
Add tests to cleanup the copyright of all source files
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
1911.2.1 by John Arbash Meinel
Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1911.2.1 by John Arbash Meinel
Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate
16
2052.3.1 by John Arbash Meinel
Add tests to cleanup the copyright of all source files
17
1911.2.1 by John Arbash Meinel
Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate
18
"""Tests for encoding performance."""
19
20
from bzrlib import (
1911.2.3 by John Arbash Meinel
Moving everything into a new location so that we can cache more than just revision ids
21
    cache_utf8,
1911.2.1 by John Arbash Meinel
Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate
22
    osutils,
23
    )
24
25
from bzrlib.benchmarks import Benchmark
26
27
28
_normal_revision_id = (u'john@arbash-meinel.com-20060801200018'
29
                       u'-cafa6272d9b8cac4')
30
_unicode_revision_id = (u'\u062c\u0648\u062c\u0648@\xe5rbash-meinel.com-'
31
                        u'\xb5\xb5\xb5-20060801200018-cafa6272d9b8cac4')
32
33
_normal_revision_id_utf8 = _normal_revision_id.encode('utf-8')
34
_unicode_revision_id_utf8 = _unicode_revision_id.encode('utf-8')
35
36
37
class EncodingBenchmark(Benchmark):
2399.1.7 by John Arbash Meinel
Cleanup bzrlib/benchmarks/* so that everything at least has a valid doc string.
38
    """Benchmark the time to encode strings."""
1911.2.1 by John Arbash Meinel
Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate
39
40
    def setUp(self):
41
        super(EncodingBenchmark, self).setUp()
42
        # Make sure we start and end with a clean cache
1911.2.3 by John Arbash Meinel
Moving everything into a new location so that we can cache more than just revision ids
43
        cache_utf8.clear_encoding_cache()
44
        self.addCleanup(cache_utf8.clear_encoding_cache)
1911.2.1 by John Arbash Meinel
Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate
45
46
    def encode_1M(self, revision_id):
47
        """Encode the given revision id 1 million times"""
48
        # In a real kernel tree there are 7.7M lines of code
49
        # so the initial import actually has to encode a revision
50
        # id to store annotated lines one time for every line.
51
        for i in xrange(1000000):
52
            revision_id.encode('utf8')
53
54
    def encode_cached_1M(self, revision_id):
55
        """Encode the given revision id 1 million times using the cache"""
1911.2.3 by John Arbash Meinel
Moving everything into a new location so that we can cache more than just revision ids
56
        encode = cache_utf8.encode
1911.2.1 by John Arbash Meinel
Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate
57
        for i in xrange(1000000):
1911.2.3 by John Arbash Meinel
Moving everything into a new location so that we can cache more than just revision ids
58
            encode(revision_id)
1911.2.1 by John Arbash Meinel
Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate
59
60
    def encode_multi(self, revision_list, count):
61
        """Encode each entry in the list count times"""
62
        for i in xrange(count):
63
            for revision_id in revision_list:
64
                revision_id.encode('utf-8')
65
66
    def encode_cached_multi(self, revision_list, count):
67
        """Encode each entry in the list count times"""
1911.2.3 by John Arbash Meinel
Moving everything into a new location so that we can cache more than just revision ids
68
        encode = cache_utf8.encode
1911.2.1 by John Arbash Meinel
Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate
69
        for i in xrange(count):
70
            for revision_id in revision_list:
1911.2.3 by John Arbash Meinel
Moving everything into a new location so that we can cache more than just revision ids
71
                encode(revision_id)
1911.2.1 by John Arbash Meinel
Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate
72
73
    def test_encode_1_by_1M_ascii(self):
74
        """Test encoding a single revision id 1 million times."""
75
        self.time(self.encode_1M, _normal_revision_id)
76
77
    def test_encode_1_by_1M_ascii_cached(self):
78
        """Test encoding a single revision id 1 million times."""
79
        self.time(self.encode_cached_1M, _normal_revision_id)
80
81
    def test_encode_1_by_1M_ascii_str(self):
82
        # We have places that think they have a unicode revision id
83
        # but actually, they have a plain string. So .encode(utf8)
84
        # actually has to decode from ascii, and then encode into utf8
85
        self.time(self.encode_1M, str(_normal_revision_id))
86
87
    def test_encode_1_by_1M_ascii_str_cached(self):
88
        self.time(self.encode_cached_1M, str(_normal_revision_id))
89
90
    def test_encode_1_by_1M_unicode(self):
91
        """Test encoding a single revision id 1 million times."""
92
        self.time(self.encode_1M, _unicode_revision_id)
93
94
    def test_encode_1_by_1M_unicode_cached(self):
95
        """Test encoding a single revision id 1 million times."""
96
        self.time(self.encode_cached_1M, _unicode_revision_id)
97
98
    def test_encode_1k_by_1k_ascii(self):
99
        """Test encoding 5 revisions 100k times"""
100
        revisions = [unicode(osutils.rand_chars(60)) for x in xrange(1000)]
101
        self.time(self.encode_multi, revisions, 1000)
102
103
    def test_encode_1k_by_1k_ascii_cached(self):
104
        """Test encoding 5 revisions 100k times"""
105
        revisions = [unicode(osutils.rand_chars(60)) for x in xrange(1000)]
106
        self.time(self.encode_cached_multi, revisions, 1000)
107
108
    def test_encode_1k_by_1k_unicode(self):
109
        """Test encoding 5 revisions 100k times"""
2152.1.1 by John Arbash Meinel
(Dmitry Vasiliev) Update and improve benchmarks for cache_utf8
110
        revisions = [u'\u062c\u0648\u062c\u0648' +
1911.2.1 by John Arbash Meinel
Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate
111
                     unicode(osutils.rand_chars(60)) for x in xrange(1000)]
112
        self.time(self.encode_multi, revisions, 1000)
113
114
    def test_encode_1k_by_1k_unicode_cached(self):
115
        """Test encoding 5 revisions 100k times"""
2152.1.1 by John Arbash Meinel
(Dmitry Vasiliev) Update and improve benchmarks for cache_utf8
116
        revisions = [u'\u062c\u0648\u062c\u0648' +
1911.2.1 by John Arbash Meinel
Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate
117
                     unicode(osutils.rand_chars(60)) for x in xrange(1000)]
118
        self.time(self.encode_cached_multi, revisions, 1000)
119
2152.1.1 by John Arbash Meinel
(Dmitry Vasiliev) Update and improve benchmarks for cache_utf8
120
    def test_encode_500K_by_1_ascii(self):
121
        revisions = [unicode("test%07d" % x) for x in xrange(500000)]
122
        self.time(self.encode_multi, revisions, 1)
123
124
    def test_encode_500K_by_1_ascii_cached(self):
125
        revisions = [unicode("test%07d" % x) for x in xrange(500000)]
126
        self.time(self.encode_cached_multi, revisions, 1)
127
128
    def test_encode_500K_by_1_unicode(self):
129
        revisions = [u'\u062c\u0648\u062c\u0648' +
130
                     unicode("%07d" % x) for x in xrange(500000)]
131
        self.time(self.encode_multi, revisions, 1)
132
133
    def test_encode_500K_by_1_unicode_cached(self):
134
        revisions = [u'\u062c\u0648\u062c\u0648' +
135
                     unicode("%07d" % x) for x in xrange(500000)]
136
        self.time(self.encode_cached_multi, revisions, 1)
1911.2.1 by John Arbash Meinel
Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate
137
2399.1.7 by John Arbash Meinel
Cleanup bzrlib/benchmarks/* so that everything at least has a valid doc string.
138
1911.2.1 by John Arbash Meinel
Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate
139
class DecodingBenchmarks(Benchmark):
2399.1.7 by John Arbash Meinel
Cleanup bzrlib/benchmarks/* so that everything at least has a valid doc string.
140
    """Benchmark the time to decode strings."""
1911.2.1 by John Arbash Meinel
Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate
141
142
    def setUp(self):
143
        super(DecodingBenchmarks, self).setUp()
144
        # Make sure we start and end with a clean cache
1911.2.3 by John Arbash Meinel
Moving everything into a new location so that we can cache more than just revision ids
145
        cache_utf8.clear_encoding_cache()
146
        self.addCleanup(cache_utf8.clear_encoding_cache)
1911.2.1 by John Arbash Meinel
Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate
147
148
    def decode_1M(self, revision_id):
149
        for i in xrange(1000000):
150
            revision_id.decode('utf8')
151
152
    def decode_cached_1M(self, revision_id):
1911.2.3 by John Arbash Meinel
Moving everything into a new location so that we can cache more than just revision ids
153
        decode = cache_utf8.decode
1911.2.1 by John Arbash Meinel
Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate
154
        for i in xrange(1000000):
1911.2.3 by John Arbash Meinel
Moving everything into a new location so that we can cache more than just revision ids
155
            decode(revision_id)
1911.2.1 by John Arbash Meinel
Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate
156
157
    def decode_multi(self, revision_list, count):
158
        for i in xrange(count):
159
            for revision_id in revision_list:
160
                revision_id.decode('utf-8')
161
162
    def decode_cached_multi(self, revision_list, count):
1911.2.3 by John Arbash Meinel
Moving everything into a new location so that we can cache more than just revision ids
163
        decode = cache_utf8.decode
1911.2.1 by John Arbash Meinel
Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate
164
        for i in xrange(count):
165
            for revision_id in revision_list:
1911.2.3 by John Arbash Meinel
Moving everything into a new location so that we can cache more than just revision ids
166
                decode(revision_id)
1911.2.1 by John Arbash Meinel
Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate
167
168
    def test_decode_1_by_1M_ascii(self):
169
        """Test decoding a single revision id 1 million times."""
170
        self.time(self.decode_1M, _normal_revision_id_utf8)
171
172
    def test_decode_1_by_1M_ascii_cached(self):
173
        """Test decoding a single revision id 1 million times."""
174
        self.time(self.decode_cached_1M, _normal_revision_id_utf8)
175
176
    def test_decode_1_by_1M_unicode(self):
177
        """Test decoding a single revision id 1 million times."""
178
        self.time(self.decode_1M, _unicode_revision_id_utf8)
179
180
    def test_decode_1_by_1M_unicode_cached(self):
181
        """Test decoding a single revision id 1 million times."""
182
        self.time(self.decode_cached_1M, _unicode_revision_id_utf8)
183
184
    def test_decode_1k_by_1k_ascii(self):
185
        """Test decoding 5 revisions 100k times"""
186
        revisions = [osutils.rand_chars(60) for x in xrange(1000)]
187
        self.time(self.decode_multi, revisions, 1000)
188
189
    def test_decode_1k_by_1k_ascii_cached(self):
190
        """Test decoding 5 revisions 100k times"""
191
        revisions = [osutils.rand_chars(60) for x in xrange(1000)]
192
        self.time(self.decode_cached_multi, revisions, 1000)
193
194
    def test_decode_1k_by_1k_unicode(self):
195
        """Test decoding 5 revisions 100k times"""
2152.1.1 by John Arbash Meinel
(Dmitry Vasiliev) Update and improve benchmarks for cache_utf8
196
        revisions = [(u'\u062c\u0648\u062c\u0648' +
1911.2.1 by John Arbash Meinel
Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate
197
                      unicode(osutils.rand_chars(60))).encode('utf8')
198
                     for x in xrange(1000)]
199
        self.time(self.decode_multi, revisions, 1000)
200
201
    def test_decode_1k_by_1k_unicode_cached(self):
202
        """Test decoding 5 revisions 100k times"""
2152.1.1 by John Arbash Meinel
(Dmitry Vasiliev) Update and improve benchmarks for cache_utf8
203
        revisions = [(u'\u062c\u0648\u062c\u0648' +
1911.2.1 by John Arbash Meinel
Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate
204
                      unicode(osutils.rand_chars(60))).encode('utf8')
205
                     for x in xrange(1000)]
206
        self.time(self.decode_cached_multi, revisions, 1000)
2152.1.1 by John Arbash Meinel
(Dmitry Vasiliev) Update and improve benchmarks for cache_utf8
207
208
    def test_decode_500K_by_1_ascii(self):
209
        revisions = [("test%07d" % x) for x in xrange(500000)]
210
        self.time(self.decode_multi, revisions, 1)
211
212
    def test_decode_500K_by_1_ascii_cached(self):
213
        revisions = [("test%07d" % x) for x in xrange(500000)]
214
        self.time(self.decode_cached_multi, revisions, 1)
215
216
    def test_decode_500K_by_1_unicode(self):
217
        revisions = [(u'\u062c\u0648\u062c\u0648' +
218
                      unicode("%07d" % x)).encode('utf-8')
219
                     for x in xrange(500000)]
220
        self.time(self.decode_multi, revisions, 1)
221
222
    def test_decode_500K_by_1_unicode_cached(self):
223
        revisions = [(u'\u062c\u0648\u062c\u0648' +
224
                      unicode("%07d" % x)).encode('utf-8')
225
                     for x in xrange(500000)]
226
        self.time(self.decode_cached_multi, revisions, 1)