~bzr-pqm/bzr/bzr.dev : contents of bzrlib/benchmarks/bench_cache

~bzr-pqm/bzr/bzr.dev : (revision 1934.1.10)

# Copyright (C) 2006 by Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as published by
# the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

"""Tests for encoding performance."""

from bzrlib import (
    cache_utf8,
    osutils,
    )

from bzrlib.benchmarks import Benchmark


_normal_revision_id = (u'john@arbash-meinel.com-20060801200018'
                       u'-cafa6272d9b8cac4')
_unicode_revision_id = (u'\u062c\u0648\u062c\u0648@\xe5rbash-meinel.com-'
                        u'\xb5\xb5\xb5-20060801200018-cafa6272d9b8cac4')

_normal_revision_id_utf8 = _normal_revision_id.encode('utf-8')
_unicode_revision_id_utf8 = _unicode_revision_id.encode('utf-8')


class EncodingBenchmark(Benchmark):

    def setUp(self):
        super(EncodingBenchmark, self).setUp()
        # Make sure we start and end with a clean cache
        cache_utf8.clear_encoding_cache()
        self.addCleanup(cache_utf8.clear_encoding_cache)

    def encode_1M(self, revision_id):
        """Encode the given revision id 1 million times"""
        # In a real kernel tree there are 7.7M lines of code
        # so the initial import actually has to encode a revision
        # id to store annotated lines one time for every line.
        for i in xrange(1000000):
            revision_id.encode('utf8')

    def encode_cached_1M(self, revision_id):
        """Encode the given revision id 1 million times using the cache"""
        encode = cache_utf8.encode
        for i in xrange(1000000):
            encode(revision_id)

    def encode_multi(self, revision_list, count):
        """Encode each entry in the list count times"""
        for i in xrange(count):
            for revision_id in revision_list:
                revision_id.encode('utf-8')

    def encode_cached_multi(self, revision_list, count):
        """Encode each entry in the list count times"""
        encode = cache_utf8.encode
        for i in xrange(count):
            for revision_id in revision_list:
                encode(revision_id)

    def test_encode_1_by_1M_ascii(self):
        """Test encoding a single revision id 1 million times."""
        self.time(self.encode_1M, _normal_revision_id)

    def test_encode_1_by_1M_ascii_cached(self):
        """Test encoding a single revision id 1 million times."""
        self.time(self.encode_cached_1M, _normal_revision_id)

    def test_encode_1_by_1M_ascii_str(self):
        # We have places that think they have a unicode revision id
        # but actually, they have a plain string. So .encode(utf8)
        # actually has to decode from ascii, and then encode into utf8
        self.time(self.encode_1M, str(_normal_revision_id))

    def test_encode_1_by_1M_ascii_str_cached(self):
        self.time(self.encode_cached_1M, str(_normal_revision_id))

    def test_encode_1_by_1M_unicode(self):
        """Test encoding a single revision id 1 million times."""
        self.time(self.encode_1M, _unicode_revision_id)

    def test_encode_1_by_1M_unicode_cached(self):
        """Test encoding a single revision id 1 million times."""
        self.time(self.encode_cached_1M, _unicode_revision_id)

    def test_encode_1k_by_1k_ascii(self):
        """Test encoding 5 revisions 100k times"""
        revisions = [unicode(osutils.rand_chars(60)) for x in xrange(1000)]
        self.time(self.encode_multi, revisions, 1000)

    def test_encode_1k_by_1k_ascii_cached(self):
        """Test encoding 5 revisions 100k times"""
        revisions = [unicode(osutils.rand_chars(60)) for x in xrange(1000)]
        self.time(self.encode_cached_multi, revisions, 1000)

    def test_encode_1k_by_1k_unicode(self):
        """Test encoding 5 revisions 100k times"""
        revisions = ['\u062c\u0648\u062c\u0648' +
                     unicode(osutils.rand_chars(60)) for x in xrange(1000)]
        self.time(self.encode_multi, revisions, 1000)

    def test_encode_1k_by_1k_unicode_cached(self):
        """Test encoding 5 revisions 100k times"""
        revisions = ['\u062c\u0648\u062c\u0648' +
                     unicode(osutils.rand_chars(60)) for x in xrange(1000)]
        self.time(self.encode_cached_multi, revisions, 1000)


class DecodingBenchmarks(Benchmark):

    def setUp(self):
        super(DecodingBenchmarks, self).setUp()
        # Make sure we start and end with a clean cache
        cache_utf8.clear_encoding_cache()
        self.addCleanup(cache_utf8.clear_encoding_cache)

    def decode_1M(self, revision_id):
        for i in xrange(1000000):
            revision_id.decode('utf8')

    def decode_cached_1M(self, revision_id):
        decode = cache_utf8.decode
        for i in xrange(1000000):
            decode(revision_id)

    def decode_multi(self, revision_list, count):
        for i in xrange(count):
            for revision_id in revision_list:
                revision_id.decode('utf-8')

    def decode_cached_multi(self, revision_list, count):
        decode = cache_utf8.decode
        for i in xrange(count):
            for revision_id in revision_list:
                decode(revision_id)

    def test_decode_1_by_1M_ascii(self):
        """Test decoding a single revision id 1 million times."""
        self.time(self.decode_1M, _normal_revision_id_utf8)

    def test_decode_1_by_1M_ascii_cached(self):
        """Test decoding a single revision id 1 million times."""
        self.time(self.decode_cached_1M, _normal_revision_id_utf8)

    def test_decode_1_by_1M_unicode(self):
        """Test decoding a single revision id 1 million times."""
        self.time(self.decode_1M, _unicode_revision_id_utf8)

    def test_decode_1_by_1M_unicode_cached(self):
        """Test decoding a single revision id 1 million times."""
        self.time(self.decode_cached_1M, _unicode_revision_id_utf8)

    def test_decode_1k_by_1k_ascii(self):
        """Test decoding 5 revisions 100k times"""
        revisions = [osutils.rand_chars(60) for x in xrange(1000)]
        self.time(self.decode_multi, revisions, 1000)

    def test_decode_1k_by_1k_ascii_cached(self):
        """Test decoding 5 revisions 100k times"""
        revisions = [osutils.rand_chars(60) for x in xrange(1000)]
        self.time(self.decode_cached_multi, revisions, 1000)

    def test_decode_1k_by_1k_unicode(self):
        """Test decoding 5 revisions 100k times"""
        revisions = [('\u062c\u0648\u062c\u0648' +
                      unicode(osutils.rand_chars(60))).encode('utf8')
                     for x in xrange(1000)]
        self.time(self.decode_multi, revisions, 1000)

    def test_decode_1k_by_1k_unicode_cached(self):
        """Test decoding 5 revisions 100k times"""
        revisions = [('\u062c\u0648\u062c\u0648' +
                      unicode(osutils.rand_chars(60))).encode('utf8')
                     for x in xrange(1000)]
        self.time(self.decode_cached_multi, revisions, 1000)

1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	1	# Copyright (C) 2006 by Canonical Ltd
	2	#
	3	# This program is free software; you can redistribute it and/or modify
	4	# it under the terms of the GNU General Public License version 2 as published by
	5	# the Free Software Foundation.
	6	#
	7	# This program is distributed in the hope that it will be useful,
	8	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	9	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	10	# GNU General Public License for more details.
	11	#
	12	# You should have received a copy of the GNU General Public License
	13	# along with this program; if not, write to the Free Software
	14	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	15
	16	"""Tests for encoding performance."""
	17
	18	from bzrlib import (
1911.2.3 by John Arbash Meinel Moving everything into a new location so that we can cache more than just revision ids	19	cache_utf8,
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	20	osutils,
	21	)
	22
	23	from bzrlib.benchmarks import Benchmark
	24
	25
	26	_normal_revision_id = (u'john@arbash-meinel.com-20060801200018'
	27	u'-cafa6272d9b8cac4')
	28	_unicode_revision_id = (u'\u062c\u0648\u062c\u0648@\xe5rbash-meinel.com-'
	29	u'\xb5\xb5\xb5-20060801200018-cafa6272d9b8cac4')
	30
	31	_normal_revision_id_utf8 = _normal_revision_id.encode('utf-8')
	32	_unicode_revision_id_utf8 = _unicode_revision_id.encode('utf-8')
	33
	34
	35	class EncodingBenchmark(Benchmark):
	36
	37	def setUp(self):
	38	super(EncodingBenchmark, self).setUp()
	39	# Make sure we start and end with a clean cache
1911.2.3 by John Arbash Meinel Moving everything into a new location so that we can cache more than just revision ids	40	cache_utf8.clear_encoding_cache()
	41	self.addCleanup(cache_utf8.clear_encoding_cache)
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	42
	43	def encode_1M(self, revision_id):
	44	"""Encode the given revision id 1 million times"""
	45	# In a real kernel tree there are 7.7M lines of code
	46	# so the initial import actually has to encode a revision
	47	# id to store annotated lines one time for every line.
	48	for i in xrange(1000000):
	49	revision_id.encode('utf8')
	50
	51	def encode_cached_1M(self, revision_id):
	52	"""Encode the given revision id 1 million times using the cache"""
1911.2.3 by John Arbash Meinel Moving everything into a new location so that we can cache more than just revision ids	53	encode = cache_utf8.encode
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	54	for i in xrange(1000000):
1911.2.3 by John Arbash Meinel Moving everything into a new location so that we can cache more than just revision ids	55	encode(revision_id)
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	56
	57	def encode_multi(self, revision_list, count):
	58	"""Encode each entry in the list count times"""
	59	for i in xrange(count):
	60	for revision_id in revision_list:
	61	revision_id.encode('utf-8')
	62
	63	def encode_cached_multi(self, revision_list, count):
	64	"""Encode each entry in the list count times"""
1911.2.3 by John Arbash Meinel Moving everything into a new location so that we can cache more than just revision ids	65	encode = cache_utf8.encode
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	66	for i in xrange(count):
	67	for revision_id in revision_list:
1911.2.3 by John Arbash Meinel Moving everything into a new location so that we can cache more than just revision ids	68	encode(revision_id)
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	69
	70	def test_encode_1_by_1M_ascii(self):
	71	"""Test encoding a single revision id 1 million times."""
	72	self.time(self.encode_1M, _normal_revision_id)
	73
	74	def test_encode_1_by_1M_ascii_cached(self):
	75	"""Test encoding a single revision id 1 million times."""
	76	self.time(self.encode_cached_1M, _normal_revision_id)
	77
	78	def test_encode_1_by_1M_ascii_str(self):
	79	# We have places that think they have a unicode revision id
	80	# but actually, they have a plain string. So .encode(utf8)
	81	# actually has to decode from ascii, and then encode into utf8
	82	self.time(self.encode_1M, str(_normal_revision_id))
	83
	84	def test_encode_1_by_1M_ascii_str_cached(self):
	85	self.time(self.encode_cached_1M, str(_normal_revision_id))
	86
	87	def test_encode_1_by_1M_unicode(self):
	88	"""Test encoding a single revision id 1 million times."""
	89	self.time(self.encode_1M, _unicode_revision_id)
	90
	91	def test_encode_1_by_1M_unicode_cached(self):
	92	"""Test encoding a single revision id 1 million times."""
	93	self.time(self.encode_cached_1M, _unicode_revision_id)
	94
	95	def test_encode_1k_by_1k_ascii(self):
	96	"""Test encoding 5 revisions 100k times"""
	97	revisions = [unicode(osutils.rand_chars(60)) for x in xrange(1000)]
	98	self.time(self.encode_multi, revisions, 1000)
	99
	100	def test_encode_1k_by_1k_ascii_cached(self):
	101	"""Test encoding 5 revisions 100k times"""
	102	revisions = [unicode(osutils.rand_chars(60)) for x in xrange(1000)]
	103	self.time(self.encode_cached_multi, revisions, 1000)
	104
	105	def test_encode_1k_by_1k_unicode(self):
	106	"""Test encoding 5 revisions 100k times"""
	107	revisions = ['\u062c\u0648\u062c\u0648' +
	108	unicode(osutils.rand_chars(60)) for x in xrange(1000)]
	109	self.time(self.encode_multi, revisions, 1000)
	110
	111	def test_encode_1k_by_1k_unicode_cached(self):
	112	"""Test encoding 5 revisions 100k times"""
	113	revisions = ['\u062c\u0648\u062c\u0648' +
	114	unicode(osutils.rand_chars(60)) for x in xrange(1000)]
	115	self.time(self.encode_cached_multi, revisions, 1000)
	116
	117
	118	class DecodingBenchmarks(Benchmark):
	119
	120	def setUp(self):
	121	super(DecodingBenchmarks, self).setUp()
	122	# Make sure we start and end with a clean cache
1911.2.3 by John Arbash Meinel Moving everything into a new location so that we can cache more than just revision ids	123	cache_utf8.clear_encoding_cache()
	124	self.addCleanup(cache_utf8.clear_encoding_cache)
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	125
	126	def decode_1M(self, revision_id):
	127	for i in xrange(1000000):
	128	revision_id.decode('utf8')
	129
	130	def decode_cached_1M(self, revision_id):
1911.2.3 by John Arbash Meinel Moving everything into a new location so that we can cache more than just revision ids	131	decode = cache_utf8.decode
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	132	for i in xrange(1000000):
1911.2.3 by John Arbash Meinel Moving everything into a new location so that we can cache more than just revision ids	133	decode(revision_id)
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	134
	135	def decode_multi(self, revision_list, count):
	136	for i in xrange(count):
	137	for revision_id in revision_list:
	138	revision_id.decode('utf-8')
	139
	140	def decode_cached_multi(self, revision_list, count):
1911.2.3 by John Arbash Meinel Moving everything into a new location so that we can cache more than just revision ids	141	decode = cache_utf8.decode
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	142	for i in xrange(count):
	143	for revision_id in revision_list:
1911.2.3 by John Arbash Meinel Moving everything into a new location so that we can cache more than just revision ids	144	decode(revision_id)
1911.2.1 by John Arbash Meinel Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate	145
	146	def test_decode_1_by_1M_ascii(self):
	147	"""Test decoding a single revision id 1 million times."""
	148	self.time(self.decode_1M, _normal_revision_id_utf8)
	149
	150	def test_decode_1_by_1M_ascii_cached(self):
	151	"""Test decoding a single revision id 1 million times."""
	152	self.time(self.decode_cached_1M, _normal_revision_id_utf8)
	153
	154	def test_decode_1_by_1M_unicode(self):
	155	"""Test decoding a single revision id 1 million times."""
	156	self.time(self.decode_1M, _unicode_revision_id_utf8)
	157
	158	def test_decode_1_by_1M_unicode_cached(self):
	159	"""Test decoding a single revision id 1 million times."""
	160	self.time(self.decode_cached_1M, _unicode_revision_id_utf8)
	161
	162	def test_decode_1k_by_1k_ascii(self):
	163	"""Test decoding 5 revisions 100k times"""
	164	revisions = [osutils.rand_chars(60) for x in xrange(1000)]
	165	self.time(self.decode_multi, revisions, 1000)
	166
	167	def test_decode_1k_by_1k_ascii_cached(self):
	168	"""Test decoding 5 revisions 100k times"""
	169	revisions = [osutils.rand_chars(60) for x in xrange(1000)]
	170	self.time(self.decode_cached_multi, revisions, 1000)
	171
	172	def test_decode_1k_by_1k_unicode(self):
	173	"""Test decoding 5 revisions 100k times"""
	174	revisions = [('\u062c\u0648\u062c\u0648' +
	175	unicode(osutils.rand_chars(60))).encode('utf8')
	176	for x in xrange(1000)]
	177	self.time(self.decode_multi, revisions, 1000)
	178
	179	def test_decode_1k_by_1k_unicode_cached(self):
	180	"""Test decoding 5 revisions 100k times"""
	181	revisions = [('\u062c\u0648\u062c\u0648' +
	182	unicode(osutils.rand_chars(60))).encode('utf8')
	183	for x in xrange(1000)]
	184	self.time(self.decode_cached_multi, revisions, 1000)