1
# Copyright (C) 2006 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
"""Tests for encoding performance."""
25
from bzrlib.benchmarks import Benchmark
28
_normal_revision_id = (u'john@arbash-meinel.com-20060801200018'
30
_unicode_revision_id = (u'\u062c\u0648\u062c\u0648@\xe5rbash-meinel.com-'
31
u'\xb5\xb5\xb5-20060801200018-cafa6272d9b8cac4')
33
_normal_revision_id_utf8 = _normal_revision_id.encode('utf-8')
34
_unicode_revision_id_utf8 = _unicode_revision_id.encode('utf-8')
37
class EncodingBenchmark(Benchmark):
38
"""Benchmark the time to encode strings."""
41
super(EncodingBenchmark, self).setUp()
42
# Make sure we start and end with a clean cache
43
cache_utf8.clear_encoding_cache()
44
self.addCleanup(cache_utf8.clear_encoding_cache)
46
def encode_1M(self, revision_id):
47
"""Encode the given revision id 1 million times"""
48
# In a real kernel tree there are 7.7M lines of code
49
# so the initial import actually has to encode a revision
50
# id to store annotated lines one time for every line.
51
for i in xrange(1000000):
52
revision_id.encode('utf8')
54
def encode_cached_1M(self, revision_id):
55
"""Encode the given revision id 1 million times using the cache"""
56
encode = cache_utf8.encode
57
for i in xrange(1000000):
60
def encode_multi(self, revision_list, count):
61
"""Encode each entry in the list count times"""
62
for i in xrange(count):
63
for revision_id in revision_list:
64
revision_id.encode('utf-8')
66
def encode_cached_multi(self, revision_list, count):
67
"""Encode each entry in the list count times"""
68
encode = cache_utf8.encode
69
for i in xrange(count):
70
for revision_id in revision_list:
73
def test_encode_1_by_1M_ascii(self):
74
"""Test encoding a single revision id 1 million times."""
75
self.time(self.encode_1M, _normal_revision_id)
77
def test_encode_1_by_1M_ascii_cached(self):
78
"""Test encoding a single revision id 1 million times."""
79
self.time(self.encode_cached_1M, _normal_revision_id)
81
def test_encode_1_by_1M_ascii_str(self):
82
# We have places that think they have a unicode revision id
83
# but actually, they have a plain string. So .encode(utf8)
84
# actually has to decode from ascii, and then encode into utf8
85
self.time(self.encode_1M, str(_normal_revision_id))
87
def test_encode_1_by_1M_ascii_str_cached(self):
88
self.time(self.encode_cached_1M, str(_normal_revision_id))
90
def test_encode_1_by_1M_unicode(self):
91
"""Test encoding a single revision id 1 million times."""
92
self.time(self.encode_1M, _unicode_revision_id)
94
def test_encode_1_by_1M_unicode_cached(self):
95
"""Test encoding a single revision id 1 million times."""
96
self.time(self.encode_cached_1M, _unicode_revision_id)
98
def test_encode_1k_by_1k_ascii(self):
99
"""Test encoding 5 revisions 100k times"""
100
revisions = [unicode(osutils.rand_chars(60)) for x in xrange(1000)]
101
self.time(self.encode_multi, revisions, 1000)
103
def test_encode_1k_by_1k_ascii_cached(self):
104
"""Test encoding 5 revisions 100k times"""
105
revisions = [unicode(osutils.rand_chars(60)) for x in xrange(1000)]
106
self.time(self.encode_cached_multi, revisions, 1000)
108
def test_encode_1k_by_1k_unicode(self):
109
"""Test encoding 5 revisions 100k times"""
110
revisions = [u'\u062c\u0648\u062c\u0648' +
111
unicode(osutils.rand_chars(60)) for x in xrange(1000)]
112
self.time(self.encode_multi, revisions, 1000)
114
def test_encode_1k_by_1k_unicode_cached(self):
115
"""Test encoding 5 revisions 100k times"""
116
revisions = [u'\u062c\u0648\u062c\u0648' +
117
unicode(osutils.rand_chars(60)) for x in xrange(1000)]
118
self.time(self.encode_cached_multi, revisions, 1000)
120
def test_encode_500K_by_1_ascii(self):
121
revisions = [unicode("test%07d" % x) for x in xrange(500000)]
122
self.time(self.encode_multi, revisions, 1)
124
def test_encode_500K_by_1_ascii_cached(self):
125
revisions = [unicode("test%07d" % x) for x in xrange(500000)]
126
self.time(self.encode_cached_multi, revisions, 1)
128
def test_encode_500K_by_1_unicode(self):
129
revisions = [u'\u062c\u0648\u062c\u0648' +
130
unicode("%07d" % x) for x in xrange(500000)]
131
self.time(self.encode_multi, revisions, 1)
133
def test_encode_500K_by_1_unicode_cached(self):
134
revisions = [u'\u062c\u0648\u062c\u0648' +
135
unicode("%07d" % x) for x in xrange(500000)]
136
self.time(self.encode_cached_multi, revisions, 1)
139
class DecodingBenchmarks(Benchmark):
140
"""Benchmark the time to decode strings."""
143
super(DecodingBenchmarks, self).setUp()
144
# Make sure we start and end with a clean cache
145
cache_utf8.clear_encoding_cache()
146
self.addCleanup(cache_utf8.clear_encoding_cache)
148
def decode_1M(self, revision_id):
149
for i in xrange(1000000):
150
revision_id.decode('utf8')
152
def decode_cached_1M(self, revision_id):
153
decode = cache_utf8.decode
154
for i in xrange(1000000):
157
def decode_multi(self, revision_list, count):
158
for i in xrange(count):
159
for revision_id in revision_list:
160
revision_id.decode('utf-8')
162
def decode_cached_multi(self, revision_list, count):
163
decode = cache_utf8.decode
164
for i in xrange(count):
165
for revision_id in revision_list:
168
def test_decode_1_by_1M_ascii(self):
169
"""Test decoding a single revision id 1 million times."""
170
self.time(self.decode_1M, _normal_revision_id_utf8)
172
def test_decode_1_by_1M_ascii_cached(self):
173
"""Test decoding a single revision id 1 million times."""
174
self.time(self.decode_cached_1M, _normal_revision_id_utf8)
176
def test_decode_1_by_1M_unicode(self):
177
"""Test decoding a single revision id 1 million times."""
178
self.time(self.decode_1M, _unicode_revision_id_utf8)
180
def test_decode_1_by_1M_unicode_cached(self):
181
"""Test decoding a single revision id 1 million times."""
182
self.time(self.decode_cached_1M, _unicode_revision_id_utf8)
184
def test_decode_1k_by_1k_ascii(self):
185
"""Test decoding 5 revisions 100k times"""
186
revisions = [osutils.rand_chars(60) for x in xrange(1000)]
187
self.time(self.decode_multi, revisions, 1000)
189
def test_decode_1k_by_1k_ascii_cached(self):
190
"""Test decoding 5 revisions 100k times"""
191
revisions = [osutils.rand_chars(60) for x in xrange(1000)]
192
self.time(self.decode_cached_multi, revisions, 1000)
194
def test_decode_1k_by_1k_unicode(self):
195
"""Test decoding 5 revisions 100k times"""
196
revisions = [(u'\u062c\u0648\u062c\u0648' +
197
unicode(osutils.rand_chars(60))).encode('utf8')
198
for x in xrange(1000)]
199
self.time(self.decode_multi, revisions, 1000)
201
def test_decode_1k_by_1k_unicode_cached(self):
202
"""Test decoding 5 revisions 100k times"""
203
revisions = [(u'\u062c\u0648\u062c\u0648' +
204
unicode(osutils.rand_chars(60))).encode('utf8')
205
for x in xrange(1000)]
206
self.time(self.decode_cached_multi, revisions, 1000)
208
def test_decode_500K_by_1_ascii(self):
209
revisions = [("test%07d" % x) for x in xrange(500000)]
210
self.time(self.decode_multi, revisions, 1)
212
def test_decode_500K_by_1_ascii_cached(self):
213
revisions = [("test%07d" % x) for x in xrange(500000)]
214
self.time(self.decode_cached_multi, revisions, 1)
216
def test_decode_500K_by_1_unicode(self):
217
revisions = [(u'\u062c\u0648\u062c\u0648' +
218
unicode("%07d" % x)).encode('utf-8')
219
for x in xrange(500000)]
220
self.time(self.decode_multi, revisions, 1)
222
def test_decode_500K_by_1_unicode_cached(self):
223
revisions = [(u'\u062c\u0648\u062c\u0648' +
224
unicode("%07d" % x)).encode('utf-8')
225
for x in xrange(500000)]
226
self.time(self.decode_cached_multi, revisions, 1)