1
# Copyright (C) 2007, 2009 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Benchmarks for bzr DirState performance."""
28
from bzrlib.tests.test__dirstate_helpers import (
29
compiled_dirstate_helpers_feature,
33
class BenchmarkDirState(benchmarks.Benchmark):
34
"""Benchmarks for DirState functions."""
36
def build_helper(self, layout):
37
"""This is a helper with the common build_??_dirstate funcs.
39
:param layout: [(num_dirs, files_per_dir)]
40
The number of directories per level, and the number of files to put
42
:return: A DirState object with the given layout. The blocks will be
43
modified in memory, and the object will be write locked. (Callers
44
must save and unlock the object).
46
self.build_tree(['dir/'])
48
self.build_tree_contents([('file', contents)])
49
file_stat = os.lstat('file')
50
dir_stat = os.lstat('dir')
51
file_sha1 = osutils.sha_string(contents)
53
state = dirstate.DirState.initialize('state')
54
def create_entries(base, layout):
57
num_dirs, num_files = layout[0]
58
for dnum in xrange(num_dirs):
60
path = '%s/%02d_directory' % (base, dnum)
62
path = '%02d_directory' % (dnum,)
63
dir_id = generate_ids.gen_file_id(path)
64
state.add(path, dir_id, 'directory', dir_stat, '')
65
for fnum in xrange(num_files):
66
fname = '%s/%02d_filename' % (path, fnum)
67
file_id = generate_ids.gen_file_id(fname)
68
state.add(fname, file_id, 'file', file_stat, file_sha1)
69
create_entries(path, layout[1:])
70
create_entries(None, layout)
73
def build_10k_dirstate_dirs(self):
74
"""Build a DirState file with 10k directories"""
75
state = self.build_helper([(10, 0), (10, 0), (10, 0), (10, 1)])
80
def build_20k_dirstate(self):
81
"""Build a DirState file with 20k records.
83
This approximates a kernel tree, based on the number of directories
84
(1000), and number of files per directory (20) and depth (3).
85
Because DirState doesn't have to have actual disk records, we just add
87
We try to have reasonable filename lengths, as well as a reasonable
90
state = self.build_helper([(10, 0), (10, 0), (10, 20)])
95
def build_20k_dirstate_with_parents(self, num_parents):
96
"""Build a DirState file with 20k records and N parents.
98
With 1 parent, this is equivalent to after a simple commit. With 2 it
99
is equivalent to after a merge.
101
# All files are marked as changed in the same revision, and this occurs
102
# supposedly in the history of the current trees.
103
last_changed_id = generate_ids.gen_revision_id('joe@foo.com')
104
parent_revision_ids = [generate_ids.gen_revision_id('joe@foo.com')
105
for i in xrange(num_parents)]
106
# Start with a dirstate file with 0 parents
107
state = self.build_helper([(10, 0), (10, 0), (10, 20)])
109
# This invasively updates the internals of DirState to be fast,
110
# since we don't have an api other than passing in Revision Tree
111
# objects, but that requires having a real inventory, etc.
113
for entry in state._iter_entries():
114
minikind, fingerprint, size, is_exec, packed_stat = entry[1][0]
115
for parent_id in parent_revision_ids:
116
# Add a parent record for this record
117
entry[1].append((minikind, fingerprint, size, is_exec,
119
state._parents = parent_revision_ids
126
def test_add_20k_entries(self):
127
"""Time how long it takes to add lots of entries."""
128
state = self.time(self.build_helper, [(10, 0), (10, 0), (10, 20)])
131
def test_build_20k_dirstate(self):
132
state = self.time(self.build_20k_dirstate)
135
entries = list(state._iter_entries())
136
self.assertEqual(21111, len(entries))
140
def test_build_20k_dirstate_1_parent(self):
141
state = self.time(self.build_20k_dirstate_with_parents, 1)
145
entries = list(state._iter_entries())
146
self.assertEqual(21111, len(entries))
150
def test_build_20k_dirstate_2_parents(self):
151
state = self.time(self.build_20k_dirstate_with_parents, 2)
155
entries = list(state._iter_entries())
156
self.assertEqual(21111, len(entries))
160
def test_save_20k_tree_0_parents(self):
161
state = self.build_20k_dirstate()
164
self.assertEqual(dirstate.DirState.NOT_IN_MEMORY,
165
state._dirblock_state)
166
state._read_dirblocks_if_needed()
167
state._dirblock_state = dirstate.DirState.IN_MEMORY_MODIFIED
168
self.time(state.save)
172
def test_save_20k_tree_1_parent(self):
173
state = self.build_20k_dirstate_with_parents(1)
176
self.assertEqual(dirstate.DirState.NOT_IN_MEMORY,
177
state._dirblock_state)
178
state._read_dirblocks_if_needed()
179
state._dirblock_state = dirstate.DirState.IN_MEMORY_MODIFIED
180
self.time(state.save)
184
def test_save_20k_tree_2_parents(self):
185
state = self.build_20k_dirstate_with_parents(2)
188
self.assertEqual(dirstate.DirState.NOT_IN_MEMORY,
189
state._dirblock_state)
190
state._read_dirblocks_if_needed()
191
state._dirblock_state = dirstate.DirState.IN_MEMORY_MODIFIED
192
self.time(state.save)
196
def test__read_dirblocks_20k_tree_0_parents_py(self):
197
from bzrlib._dirstate_helpers_py import _read_dirblocks
198
state = self.build_20k_dirstate()
201
self.assertEqual(dirstate.DirState.NOT_IN_MEMORY,
202
state._dirblock_state)
203
state._read_header_if_needed()
204
self.time(_read_dirblocks, state)
208
def test__read_dirblocks_20k_tree_0_parents_pyx(self):
209
self.requireFeature(compiled_dirstate_helpers_feature)
210
from bzrlib._dirstate_helpers_pyx import _read_dirblocks
211
state = self.build_20k_dirstate()
214
self.assertEqual(dirstate.DirState.NOT_IN_MEMORY,
215
state._dirblock_state)
216
state._read_header_if_needed()
217
self.time(_read_dirblocks, state)
221
def test__read_dirblocks_20k_tree_1_parent_py(self):
222
from bzrlib._dirstate_helpers_py import _read_dirblocks
223
state = self.build_20k_dirstate_with_parents(1)
226
self.assertEqual(dirstate.DirState.NOT_IN_MEMORY,
227
state._dirblock_state)
228
state._read_header_if_needed()
229
self.time(_read_dirblocks, state)
233
def test__read_dirblocks_20k_tree_1_parent_pyx(self):
234
self.requireFeature(compiled_dirstate_helpers_feature)
235
from bzrlib._dirstate_helpers_pyx import _read_dirblocks
236
state = self.build_20k_dirstate_with_parents(1)
239
self.assertEqual(dirstate.DirState.NOT_IN_MEMORY,
240
state._dirblock_state)
241
state._read_header_if_needed()
242
self.time(_read_dirblocks, state)
246
def test__read_dirblocks_20k_tree_2_parents_py(self):
247
from bzrlib._dirstate_helpers_py import _read_dirblocks
248
state = self.build_20k_dirstate_with_parents(2)
251
self.assertEqual(dirstate.DirState.NOT_IN_MEMORY,
252
state._dirblock_state)
253
state._read_header_if_needed()
254
self.time(_read_dirblocks, state)
258
def test__read_dirblocks_20k_tree_2_parents_pyx(self):
259
self.requireFeature(compiled_dirstate_helpers_feature)
260
from bzrlib._dirstate_helpers_pyx import _read_dirblocks
261
state = self.build_20k_dirstate_with_parents(2)
264
self.assertEqual(dirstate.DirState.NOT_IN_MEMORY,
265
state._dirblock_state)
266
state._read_header_if_needed()
267
self.time(_read_dirblocks, state)
271
def do_bisect_list(self, bisect_func):
272
"""Call bisect_dirblock for each path."""
273
# We use self._paths and self._blocks because we expect it to be a very
274
# long list. And the interface for 'self.time()' causes the parameters
275
# to be printed when run with --lsprof-timed. Which is *really* ugly
276
# when the list is thousands of entries.
277
blocks = self._blocks
278
return [bisect_func(blocks, path) for path in self._paths]
280
def do_bisect_list_cached(self, bisect_func):
281
"""Same as do_bisect_list, but cache the split paths"""
283
blocks = self._blocks
284
return [bisect_func(blocks, path, cache=cache) for path in self._paths]
286
def setup_paths_and_offsets(self, state):
287
"""Get a list of paths and expected offsets.
289
This will be used to check do_bisect_list*
291
state._read_dirblocks_if_needed()
293
expected_offsets = [0]
294
for offset, info in enumerate(state._dirblocks):
296
# We already handled the empty path
299
# all paths are of the form ##_directory
300
# so search for ##_director, ##_directory
301
paths.extend([dirname[:-1], dirname])
302
expected_offsets.extend([offset, offset])
304
self._expected_offsets = expected_offsets
305
self._blocks = state._dirblocks
307
def checkOffsets(self, offsets):
308
"""Make sure offsets matches self._expected_offsets"""
309
# These are really long lists, so it is easier to compare them with
310
# assertEqualDiff. So turn them into strings.
311
expected_str = '\n'.join(str(x) for x in self._expected_offsets)
312
offset_str = '\n'.join(str(x) for x in offsets)
313
self.assertEqualDiff(expected_str, offset_str)
315
def test_bisect_dirblock_py(self):
316
from bzrlib._dirstate_helpers_py import bisect_dirblock
317
state = self.build_10k_dirstate_dirs()
320
self.setup_paths_and_offsets(state)
321
offsets = self.time(self.do_bisect_list,
323
self.checkOffsets(offsets)
327
def test_bisect_dirblock_cached_py(self):
328
from bzrlib._dirstate_helpers_py import bisect_dirblock
329
state = self.build_10k_dirstate_dirs()
332
self.setup_paths_and_offsets(state)
333
offsets = self.time(self.do_bisect_list_cached,
335
self.checkOffsets(offsets)
339
def test_bisect_dirblock_pyx(self):
340
self.requireFeature(compiled_dirstate_helpers_feature)
341
from bzrlib._dirstate_helpers_pyx import bisect_dirblock
342
state = self.build_10k_dirstate_dirs()
345
self.setup_paths_and_offsets(state)
346
offsets = self.time(self.do_bisect_list, bisect_dirblock)
347
self.checkOffsets(offsets)
351
def create_path_names(self, layout, base=''):
352
"""Create a list of paths with auto-generated names.
354
:param layout: A list of [(num_dirs, num_files)] tuples. For each
355
level, the given number of directories will be created, each
356
containing that many files.
357
So [(2, 5), (3, 4)] will create 2 top level directories, containing
358
5 files, and each top level directory will contain 3 subdirs with 4
360
:param base: The base path to prepend to all entries, most callers will
362
:return: A list of path names.
368
num_dirs, num_files = layout[0]
369
for dnum in xrange(num_dirs):
371
path = '%s/%02d_directory' % (base, dnum)
373
path = '%02d_directory' % (dnum,)
375
for fnum in xrange(num_files):
376
fname = '%s/%02d_filename' % (path, fnum)
378
paths.extend(self.create_path_names(layout[1:], base=path))
381
def test_create_path_names(self):
382
names = self.create_path_names([(2, 3), (1, 2)])
383
self.assertEqual(['00_directory',
384
'00_directory/00_filename',
385
'00_directory/01_filename',
386
'00_directory/02_filename',
387
'00_directory/00_directory',
388
'00_directory/00_directory/00_filename',
389
'00_directory/00_directory/01_filename',
391
'01_directory/00_filename',
392
'01_directory/01_filename',
393
'01_directory/02_filename',
394
'01_directory/00_directory',
395
'01_directory/00_directory/00_filename',
396
'01_directory/00_directory/01_filename',
398
names = self.time(self.create_path_names, [(10, 2), (10, 2), (10, 20)])
399
# 20 files + 1 directory name, 10 times, plus 2 filenames and 1 dir, 10
400
# times, and another 2 files + 1 dir, 10 times
401
self.assertEqual(21330, 10*(3 + 10*(3 + 10*(1 + 20))))
402
self.assertEqual(21330, len(names))
404
def compareAllPaths(self, cmp_func, layout):
405
"""Compare N^2 paths.
407
Basically, compare every path in the list against every other path.
409
paths = self.create_path_names(layout)
413
cmp_func(path1, path2)
414
self.time(compare_all)
416
def test_cmp_by_dirs_py(self):
417
"""Benchmark 103041 comparisons."""
418
from bzrlib._dirstate_helpers_py import cmp_by_dirs
419
self.compareAllPaths(cmp_by_dirs,
420
[(3, 1), (3, 1), (3, 1), (3, 2)])
422
def test_cmp_by_dirs_pyrex(self):
423
self.requireFeature(compiled_dirstate_helpers_feature)
424
from bzrlib._dirstate_helpers_pyx import cmp_by_dirs
425
self.compareAllPaths(cmp_by_dirs,
426
[(3, 1), (3, 1), (3, 1), (3, 2)])