~bzr-pqm/bzr/bzr.dev : contents of bzrlib/benchmarks/bench_cache

~bzr-pqm/bzr/bzr.dev : (revision 5110)

2052.3.2 by John Arbash Meinel Change Copyright .. by Canonical to Copyright ... Canonical	1	# Copyright (C) 2006 Canonical Ltd
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	2	#
	3	# This program is free software; you can redistribute it and/or modify
2052.3.1 by John Arbash Meinel Add tests to cleanup the copyright of all source files	4	# it under the terms of the GNU General Public License as published by
	5	# the Free Software Foundation; either version 2 of the License, or
	6	# (at your option) any later version.
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	7	#
	8	# This program is distributed in the hope that it will be useful,
	9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	# GNU General Public License for more details.
	12	#
	13	# You should have received a copy of the GNU General Public License
	14	# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob update FSF mailing address	15	# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	16
2052.3.1 by John Arbash Meinel Add tests to cleanup the copyright of all source files	17
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	18	"""Tests for encoding performance."""
	19
	20	from bzrlib import (
1911.2.3 by John Arbash Meinel Moving everything into a new location so that we can cache more than just revision ids	21	cache_utf8,
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	22	osutils,
	23	)
	24
	25	from bzrlib.benchmarks import Benchmark
	26
	27
	28	_normal_revision_id = (u'john@arbash-meinel.com-20060801200018'
	29	u'-cafa6272d9b8cac4')
	30	_unicode_revision_id = (u'\u062c\u0648\u062c\u0648@\xe5rbash-meinel.com-'
	31	u'\xb5\xb5\xb5-20060801200018-cafa6272d9b8cac4')
	32
	33	_normal_revision_id_utf8 = _normal_revision_id.encode('utf-8')
	34	_unicode_revision_id_utf8 = _unicode_revision_id.encode('utf-8')
	35
	36
	37	class EncodingBenchmark(Benchmark):
2399.1.7 by John Arbash Meinel Cleanup bzrlib/benchmarks/* so that everything at least has a valid doc string.	38	"""Benchmark the time to encode strings."""
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	39
	40	def setUp(self):
	41	super(EncodingBenchmark, self).setUp()
	42	# Make sure we start and end with a clean cache
1911.2.3 by John Arbash Meinel Moving everything into a new location so that we can cache more than just revision ids	43	cache_utf8.clear_encoding_cache()
	44	self.addCleanup(cache_utf8.clear_encoding_cache)
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	45
	46	def encode_1M(self, revision_id):
	47	"""Encode the given revision id 1 million times"""
	48	# In a real kernel tree there are 7.7M lines of code
	49	# so the initial import actually has to encode a revision
	50	# id to store annotated lines one time for every line.
	51	for i in xrange(1000000):
	52	revision_id.encode('utf8')
	53
	54	def encode_cached_1M(self, revision_id):
	55	"""Encode the given revision id 1 million times using the cache"""
1911.2.3 by John Arbash Meinel Moving everything into a new location so that we can cache more than just revision ids	56	encode = cache_utf8.encode
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	57	for i in xrange(1000000):
1911.2.3 by John Arbash Meinel Moving everything into a new location so that we can cache more than just revision ids	58	encode(revision_id)
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	59
	60	def encode_multi(self, revision_list, count):
	61	"""Encode each entry in the list count times"""
	62	for i in xrange(count):
	63	for revision_id in revision_list:
	64	revision_id.encode('utf-8')
	65
	66	def encode_cached_multi(self, revision_list, count):
	67	"""Encode each entry in the list count times"""
1911.2.3 by John Arbash Meinel Moving everything into a new location so that we can cache more than just revision ids	68	encode = cache_utf8.encode
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	69	for i in xrange(count):
	70	for revision_id in revision_list:
1911.2.3 by John Arbash Meinel Moving everything into a new location so that we can cache more than just revision ids	71	encode(revision_id)
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	72
	73	def test_encode_1_by_1M_ascii(self):
	74	"""Test encoding a single revision id 1 million times."""
	75	self.time(self.encode_1M, _normal_revision_id)
	76
	77	def test_encode_1_by_1M_ascii_cached(self):
	78	"""Test encoding a single revision id 1 million times."""
	79	self.time(self.encode_cached_1M, _normal_revision_id)
	80
	81	def test_encode_1_by_1M_ascii_str(self):
	82	# We have places that think they have a unicode revision id
	83	# but actually, they have a plain string. So .encode(utf8)
	84	# actually has to decode from ascii, and then encode into utf8
	85	self.time(self.encode_1M, str(_normal_revision_id))
	86
	87	def test_encode_1_by_1M_ascii_str_cached(self):
	88	self.time(self.encode_cached_1M, str(_normal_revision_id))
	89
	90	def test_encode_1_by_1M_unicode(self):
	91	"""Test encoding a single revision id 1 million times."""
	92	self.time(self.encode_1M, _unicode_revision_id)
	93
	94	def test_encode_1_by_1M_unicode_cached(self):
	95	"""Test encoding a single revision id 1 million times."""
	96	self.time(self.encode_cached_1M, _unicode_revision_id)
	97
	98	def test_encode_1k_by_1k_ascii(self):
	99	"""Test encoding 5 revisions 100k times"""
	100	revisions = [unicode(osutils.rand_chars(60)) for x in xrange(1000)]
	101	self.time(self.encode_multi, revisions, 1000)
	102
	103	def test_encode_1k_by_1k_ascii_cached(self):
	104	"""Test encoding 5 revisions 100k times"""
	105	revisions = [unicode(osutils.rand_chars(60)) for x in xrange(1000)]
	106	self.time(self.encode_cached_multi, revisions, 1000)
	107
	108	def test_encode_1k_by_1k_unicode(self):
	109	"""Test encoding 5 revisions 100k times"""
2152.1.1 by John Arbash Meinel (Dmitry Vasiliev) Update and improve benchmarks for cache_utf8	110	revisions = [u'\u062c\u0648\u062c\u0648' +
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	111	unicode(osutils.rand_chars(60)) for x in xrange(1000)]
	112	self.time(self.encode_multi, revisions, 1000)
	113
	114	def test_encode_1k_by_1k_unicode_cached(self):
	115	"""Test encoding 5 revisions 100k times"""
2152.1.1 by John Arbash Meinel (Dmitry Vasiliev) Update and improve benchmarks for cache_utf8	116	revisions = [u'\u062c\u0648\u062c\u0648' +
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	117	unicode(osutils.rand_chars(60)) for x in xrange(1000)]
	118	self.time(self.encode_cached_multi, revisions, 1000)
	119
2152.1.1 by John Arbash Meinel (Dmitry Vasiliev) Update and improve benchmarks for cache_utf8	120	def test_encode_500K_by_1_ascii(self):
	121	revisions = [unicode("test%07d" % x) for x in xrange(500000)]
	122	self.time(self.encode_multi, revisions, 1)
	123
	124	def test_encode_500K_by_1_ascii_cached(self):
	125	revisions = [unicode("test%07d" % x) for x in xrange(500000)]
	126	self.time(self.encode_cached_multi, revisions, 1)
	127
	128	def test_encode_500K_by_1_unicode(self):
	129	revisions = [u'\u062c\u0648\u062c\u0648' +
	130	unicode("%07d" % x) for x in xrange(500000)]
	131	self.time(self.encode_multi, revisions, 1)
	132
	133	def test_encode_500K_by_1_unicode_cached(self):
	134	revisions = [u'\u062c\u0648\u062c\u0648' +
	135	unicode("%07d" % x) for x in xrange(500000)]
	136	self.time(self.encode_cached_multi, revisions, 1)
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	137
2399.1.7 by John Arbash Meinel Cleanup bzrlib/benchmarks/* so that everything at least has a valid doc string.	138
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	139	class DecodingBenchmarks(Benchmark):
2399.1.7 by John Arbash Meinel Cleanup bzrlib/benchmarks/* so that everything at least has a valid doc string.	140	"""Benchmark the time to decode strings."""
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	141
	142	def setUp(self):
	143	super(DecodingBenchmarks, self).setUp()
	144	# Make sure we start and end with a clean cache
1911.2.3 by John Arbash Meinel Moving everything into a new location so that we can cache more than just revision ids	145	cache_utf8.clear_encoding_cache()
	146	self.addCleanup(cache_utf8.clear_encoding_cache)
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	147
	148	def decode_1M(self, revision_id):
	149	for i in xrange(1000000):
	150	revision_id.decode('utf8')
	151
	152	def decode_cached_1M(self, revision_id):
1911.2.3 by John Arbash Meinel Moving everything into a new location so that we can cache more than just revision ids	153	decode = cache_utf8.decode
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	154	for i in xrange(1000000):
1911.2.3 by John Arbash Meinel Moving everything into a new location so that we can cache more than just revision ids	155	decode(revision_id)
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	156
	157	def decode_multi(self, revision_list, count):
	158	for i in xrange(count):
	159	for revision_id in revision_list:
	160	revision_id.decode('utf-8')
	161
	162	def decode_cached_multi(self, revision_list, count):
1911.2.3 by John Arbash Meinel Moving everything into a new location so that we can cache more than just revision ids	163	decode = cache_utf8.decode
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	164	for i in xrange(count):
	165	for revision_id in revision_list:
1911.2.3 by John Arbash Meinel Moving everything into a new location so that we can cache more than just revision ids	166	decode(revision_id)
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	167
	168	def test_decode_1_by_1M_ascii(self):
	169	"""Test decoding a single revision id 1 million times."""
	170	self.time(self.decode_1M, _normal_revision_id_utf8)
	171
	172	def test_decode_1_by_1M_ascii_cached(self):
	173	"""Test decoding a single revision id 1 million times."""
	174	self.time(self.decode_cached_1M, _normal_revision_id_utf8)
	175
	176	def test_decode_1_by_1M_unicode(self):
	177	"""Test decoding a single revision id 1 million times."""
	178	self.time(self.decode_1M, _unicode_revision_id_utf8)
	179
	180	def test_decode_1_by_1M_unicode_cached(self):
	181	"""Test decoding a single revision id 1 million times."""
	182	self.time(self.decode_cached_1M, _unicode_revision_id_utf8)
	183
	184	def test_decode_1k_by_1k_ascii(self):
	185	"""Test decoding 5 revisions 100k times"""
	186	revisions = [osutils.rand_chars(60) for x in xrange(1000)]
	187	self.time(self.decode_multi, revisions, 1000)
	188
	189	def test_decode_1k_by_1k_ascii_cached(self):
	190	"""Test decoding 5 revisions 100k times"""
	191	revisions = [osutils.rand_chars(60) for x in xrange(1000)]
	192	self.time(self.decode_cached_multi, revisions, 1000)
	193
	194	def test_decode_1k_by_1k_unicode(self):
	195	"""Test decoding 5 revisions 100k times"""
2152.1.1 by John Arbash Meinel (Dmitry Vasiliev) Update and improve benchmarks for cache_utf8	196	revisions = [(u'\u062c\u0648\u062c\u0648' +
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	197	unicode(osutils.rand_chars(60))).encode('utf8')
	198	for x in xrange(1000)]
	199	self.time(self.decode_multi, revisions, 1000)
	200
	201	def test_decode_1k_by_1k_unicode_cached(self):
	202	"""Test decoding 5 revisions 100k times"""
2152.1.1 by John Arbash Meinel (Dmitry Vasiliev) Update and improve benchmarks for cache_utf8	203	revisions = [(u'\u062c\u0648\u062c\u0648' +
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	204	unicode(osutils.rand_chars(60))).encode('utf8')
	205	for x in xrange(1000)]
	206	self.time(self.decode_cached_multi, revisions, 1000)
2152.1.1 by John Arbash Meinel (Dmitry Vasiliev) Update and improve benchmarks for cache_utf8	207
	208	def test_decode_500K_by_1_ascii(self):
	209	revisions = [("test%07d" % x) for x in xrange(500000)]
	210	self.time(self.decode_multi, revisions, 1)
	211
	212	def test_decode_500K_by_1_ascii_cached(self):
	213	revisions = [("test%07d" % x) for x in xrange(500000)]
	214	self.time(self.decode_cached_multi, revisions, 1)
	215
	216	def test_decode_500K_by_1_unicode(self):
	217	revisions = [(u'\u062c\u0648\u062c\u0648' +
	218	unicode("%07d" % x)).encode('utf-8')
	219	for x in xrange(500000)]
	220	self.time(self.decode_multi, revisions, 1)
	221
	222	def test_decode_500K_by_1_unicode_cached(self):
	223	revisions = [(u'\u062c\u0648\u062c\u0648' +
	224	unicode("%07d" % x)).encode('utf-8')
	225	for x in xrange(500000)]
	226	self.time(self.decode_cached_multi, revisions, 1)