~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/benchmarks/bench_dirstate.py

  • Committer: Canonical.com Patch Queue Manager
  • Date: 2008-09-03 22:30:56 UTC
  • mfrom: (3644.2.13 index_builder_cleanup)
  • Revision ID: pqm@pqm.ubuntu.com-20080903223056-b108iytb38xkznci
(jam) Streamline BTreeBuilder.add_node et al to make btree creation
        faster.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# Copyright (C) 2007 Canonical Ltd
 
2
#
 
3
# This program is free software; you can redistribute it and/or modify
 
4
# it under the terms of the GNU General Public License as published by
 
5
# the Free Software Foundation; either version 2 of the License, or
 
6
# (at your option) any later version.
 
7
#
 
8
# This program is distributed in the hope that it will be useful,
 
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
11
# GNU General Public License for more details.
 
12
#
 
13
# You should have received a copy of the GNU General Public License
 
14
# along with this program; if not, write to the Free Software
 
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
16
 
 
17
"""Benchmarks for bzr DirState performance."""
 
18
 
 
19
import os
 
20
 
 
21
from bzrlib import (
 
22
    benchmarks,
 
23
    dirstate,
 
24
    generate_ids,
 
25
    osutils,
 
26
    tests,
 
27
    )
 
28
from bzrlib.tests.test__dirstate_helpers import (
 
29
    CompiledDirstateHelpersFeature,
 
30
    )
 
31
 
 
32
 
 
33
class BenchmarkDirState(benchmarks.Benchmark):
 
34
    """Benchmarks for DirState functions."""
 
35
 
 
36
    def build_helper(self, layout):
 
37
        """This is a helper with the common build_??_dirstate funcs.
 
38
 
 
39
        :param layout: [(num_dirs, files_per_dir)]
 
40
            The number of directories per level, and the number of files to put
 
41
            in it.
 
42
        :return: A DirState object with the given layout. The blocks will be
 
43
            modified in memory, and the object will be write locked. (Callers
 
44
            must save and unlock the object).
 
45
        """
 
46
        self.build_tree(['dir/'])
 
47
        contents = 'x'*10000
 
48
        self.build_tree_contents([('file', contents)])
 
49
        file_stat = os.lstat('file')
 
50
        dir_stat = os.lstat('dir')
 
51
        file_sha1 = osutils.sha_string(contents)
 
52
 
 
53
        state = dirstate.DirState.initialize('state')
 
54
        def create_entries(base, layout):
 
55
            if not layout:
 
56
                return
 
57
            num_dirs, num_files = layout[0]
 
58
            for dnum in xrange(num_dirs):
 
59
                if base:
 
60
                    path = '%s/%02d_directory' % (base, dnum)
 
61
                else:
 
62
                    path = '%02d_directory' % (dnum,)
 
63
                dir_id = generate_ids.gen_file_id(path)
 
64
                state.add(path, dir_id, 'directory', dir_stat, '')
 
65
                for fnum in xrange(num_files):
 
66
                    fname = '%s/%02d_filename' % (path, fnum)
 
67
                    file_id = generate_ids.gen_file_id(fname)
 
68
                    state.add(fname, file_id, 'file', file_stat, file_sha1)
 
69
                create_entries(path, layout[1:])
 
70
        create_entries(None, layout)
 
71
        return state
 
72
 
 
73
    def build_10k_dirstate_dirs(self):
 
74
        """Build a DirState file with 10k directories"""
 
75
        state = self.build_helper([(10, 0), (10, 0), (10, 0), (10, 1)])
 
76
        state.save()
 
77
        state.unlock()
 
78
        return state
 
79
 
 
80
    def build_20k_dirstate(self):
 
81
        """Build a DirState file with 20k records.
 
82
 
 
83
        This approximates a kernel tree, based on the number of directories
 
84
        (1000), and number of files per directory (20) and depth (3).
 
85
        Because DirState doesn't have to have actual disk records, we just add
 
86
        random files.
 
87
        We try to have reasonable filename lengths, as well as a reasonable
 
88
        stat value, etc.
 
89
        """
 
90
        state = self.build_helper([(10, 0), (10, 0), (10, 20)])
 
91
        state.save()
 
92
        state.unlock()
 
93
        return state
 
94
 
 
95
    def build_20k_dirstate_with_parents(self, num_parents):
 
96
        """Build a DirState file with 20k records and N parents.
 
97
 
 
98
        With 1 parent, this is equivalent to after a simple commit. With 2 it
 
99
        is equivalent to after a merge.
 
100
        """
 
101
        # All files are marked as changed in the same revision, and this occurs
 
102
        # supposedly in the history of the current trees.
 
103
        last_changed_id = generate_ids.gen_revision_id('joe@foo.com')
 
104
        parent_revision_ids = [generate_ids.gen_revision_id('joe@foo.com')
 
105
                               for i in xrange(num_parents)]
 
106
        # Start with a dirstate file with 0 parents
 
107
        state = self.build_helper([(10, 0), (10, 0), (10, 20)])
 
108
        try:
 
109
            # This invasively updates the internals of DirState to be fast,
 
110
            # since we don't have an api other than passing in Revision Tree
 
111
            # objects, but that requires having a real inventory, etc.
 
112
            if num_parents > 0:
 
113
                for entry in state._iter_entries():
 
114
                    minikind, fingerprint, size, is_exec, packed_stat = entry[1][0]
 
115
                    for parent_id in parent_revision_ids:
 
116
                        # Add a parent record for this record
 
117
                        entry[1].append((minikind, fingerprint, size, is_exec,
 
118
                                         last_changed_id))
 
119
            state._parents = parent_revision_ids
 
120
            state._ghosts = []
 
121
            state.save()
 
122
        finally:
 
123
            state.unlock()
 
124
        return state
 
125
 
 
126
    def test_add_20k_entries(self):
 
127
        """Time how long it takes to add lots of entries."""
 
128
        state = self.time(self.build_helper, [(10, 0), (10, 0), (10, 20)])
 
129
        state.unlock()
 
130
 
 
131
    def test_build_20k_dirstate(self):
 
132
        state = self.time(self.build_20k_dirstate)
 
133
        state.lock_read()
 
134
        try:
 
135
            entries = list(state._iter_entries())
 
136
            self.assertEqual(21111, len(entries))
 
137
        finally:
 
138
            state.unlock()
 
139
 
 
140
    def test_build_20k_dirstate_1_parent(self):
 
141
        state = self.time(self.build_20k_dirstate_with_parents, 1)
 
142
        state.lock_read()
 
143
        try:
 
144
            state._validate()
 
145
            entries = list(state._iter_entries())
 
146
            self.assertEqual(21111, len(entries))
 
147
        finally:
 
148
            state.unlock()
 
149
 
 
150
    def test_build_20k_dirstate_2_parents(self):
 
151
        state = self.time(self.build_20k_dirstate_with_parents, 2)
 
152
        state.lock_read()
 
153
        try:
 
154
            state._validate()
 
155
            entries = list(state._iter_entries())
 
156
            self.assertEqual(21111, len(entries))
 
157
        finally:
 
158
            state.unlock()
 
159
 
 
160
    def test_save_20k_tree_0_parents(self):
 
161
        state = self.build_20k_dirstate()
 
162
        state.lock_read()
 
163
        try:
 
164
            self.assertEqual(dirstate.DirState.NOT_IN_MEMORY,
 
165
                             state._dirblock_state)
 
166
            state._read_dirblocks_if_needed()
 
167
            state._dirblock_state = dirstate.DirState.IN_MEMORY_MODIFIED
 
168
            self.time(state.save)
 
169
        finally:
 
170
            state.unlock()
 
171
 
 
172
    def test_save_20k_tree_1_parent(self):
 
173
        state = self.build_20k_dirstate_with_parents(1)
 
174
        state.lock_read()
 
175
        try:
 
176
            self.assertEqual(dirstate.DirState.NOT_IN_MEMORY,
 
177
                             state._dirblock_state)
 
178
            state._read_dirblocks_if_needed()
 
179
            state._dirblock_state = dirstate.DirState.IN_MEMORY_MODIFIED
 
180
            self.time(state.save)
 
181
        finally:
 
182
            state.unlock()
 
183
 
 
184
    def test_save_20k_tree_2_parents(self):
 
185
        state = self.build_20k_dirstate_with_parents(2)
 
186
        state.lock_read()
 
187
        try:
 
188
            self.assertEqual(dirstate.DirState.NOT_IN_MEMORY,
 
189
                             state._dirblock_state)
 
190
            state._read_dirblocks_if_needed()
 
191
            state._dirblock_state = dirstate.DirState.IN_MEMORY_MODIFIED
 
192
            self.time(state.save)
 
193
        finally:
 
194
            state.unlock()
 
195
 
 
196
    def test__read_dirblocks_20k_tree_0_parents_py(self):
 
197
        from bzrlib._dirstate_helpers_py import _read_dirblocks_py
 
198
        state = self.build_20k_dirstate()
 
199
        state.lock_read()
 
200
        try:
 
201
            self.assertEqual(dirstate.DirState.NOT_IN_MEMORY,
 
202
                             state._dirblock_state)
 
203
            state._read_header_if_needed()
 
204
            self.time(_read_dirblocks_py, state)
 
205
        finally:
 
206
            state.unlock()
 
207
 
 
208
    def test__read_dirblocks_20k_tree_0_parents_c(self):
 
209
        self.requireFeature(CompiledDirstateHelpersFeature)
 
210
        from bzrlib._dirstate_helpers_c import _read_dirblocks_c
 
211
        state = self.build_20k_dirstate()
 
212
        state.lock_read()
 
213
        try:
 
214
            self.assertEqual(dirstate.DirState.NOT_IN_MEMORY,
 
215
                             state._dirblock_state)
 
216
            state._read_header_if_needed()
 
217
            self.time(_read_dirblocks_c, state)
 
218
        finally:
 
219
            state.unlock()
 
220
 
 
221
    def test__read_dirblocks_20k_tree_1_parent_py(self):
 
222
        from bzrlib._dirstate_helpers_py import _read_dirblocks_py
 
223
        state = self.build_20k_dirstate_with_parents(1)
 
224
        state.lock_read()
 
225
        try:
 
226
            self.assertEqual(dirstate.DirState.NOT_IN_MEMORY,
 
227
                             state._dirblock_state)
 
228
            state._read_header_if_needed()
 
229
            self.time(_read_dirblocks_py, state)
 
230
        finally:
 
231
            state.unlock()
 
232
 
 
233
    def test__read_dirblocks_20k_tree_1_parent_c(self):
 
234
        self.requireFeature(CompiledDirstateHelpersFeature)
 
235
        from bzrlib._dirstate_helpers_c import _read_dirblocks_c
 
236
        state = self.build_20k_dirstate_with_parents(1)
 
237
        state.lock_read()
 
238
        try:
 
239
            self.assertEqual(dirstate.DirState.NOT_IN_MEMORY,
 
240
                             state._dirblock_state)
 
241
            state._read_header_if_needed()
 
242
            self.time(_read_dirblocks_c, state)
 
243
        finally:
 
244
            state.unlock()
 
245
 
 
246
    def test__read_dirblocks_20k_tree_2_parents_py(self):
 
247
        from bzrlib._dirstate_helpers_py import _read_dirblocks_py
 
248
        state = self.build_20k_dirstate_with_parents(2)
 
249
        state.lock_read()
 
250
        try:
 
251
            self.assertEqual(dirstate.DirState.NOT_IN_MEMORY,
 
252
                             state._dirblock_state)
 
253
            state._read_header_if_needed()
 
254
            self.time(_read_dirblocks_py, state)
 
255
        finally:
 
256
            state.unlock()
 
257
 
 
258
    def test__read_dirblocks_20k_tree_2_parents_c(self):
 
259
        self.requireFeature(CompiledDirstateHelpersFeature)
 
260
        from bzrlib._dirstate_helpers_c import _read_dirblocks_c
 
261
        state = self.build_20k_dirstate_with_parents(2)
 
262
        state.lock_read()
 
263
        try:
 
264
            self.assertEqual(dirstate.DirState.NOT_IN_MEMORY,
 
265
                             state._dirblock_state)
 
266
            state._read_header_if_needed()
 
267
            self.time(_read_dirblocks_c, state)
 
268
        finally:
 
269
            state.unlock()
 
270
 
 
271
    def do_bisect_list(self, bisect_func):
 
272
        """Call bisect_dirblock for each path."""
 
273
        # We use self._paths and self._blocks because we expect it to be a very
 
274
        # long list. And the interface for 'self.time()' causes the parameters
 
275
        # to be printed when run with --lsprof-timed. Which is *really* ugly
 
276
        # when the list is thousands of entries.
 
277
        blocks = self._blocks
 
278
        return [bisect_func(blocks, path) for path in self._paths]
 
279
 
 
280
    def do_bisect_list_cached(self, bisect_func):
 
281
        """Same as do_bisect_list, but cache the split paths"""
 
282
        cache = {}
 
283
        blocks = self._blocks
 
284
        return [bisect_func(blocks, path, cache=cache) for path in self._paths]
 
285
 
 
286
    def setup_paths_and_offsets(self, state):
 
287
        """Get a list of paths and expected offsets.
 
288
 
 
289
        This will be used to check do_bisect_list*
 
290
        """
 
291
        state._read_dirblocks_if_needed()
 
292
        paths = ['']
 
293
        expected_offsets = [0]
 
294
        for offset, info in enumerate(state._dirblocks):
 
295
            dirname = info[0]
 
296
            # We already handled the empty path
 
297
            if dirname == '':
 
298
                continue
 
299
            # all paths are of the form ##_directory
 
300
            # so search for ##_director, ##_directory
 
301
            paths.extend([dirname[:-1], dirname])
 
302
            expected_offsets.extend([offset, offset])
 
303
        self._paths = paths
 
304
        self._expected_offsets = expected_offsets
 
305
        self._blocks = state._dirblocks
 
306
 
 
307
    def checkOffsets(self, offsets):
 
308
        """Make sure offsets matches self._expected_offsets"""
 
309
        # These are really long lists, so it is easier to compare them with
 
310
        # assertEqualDiff. So turn them into strings.
 
311
        expected_str = '\n'.join(str(x) for x in self._expected_offsets)
 
312
        offset_str = '\n'.join(str(x) for x in offsets)
 
313
        self.assertEqualDiff(expected_str, offset_str)
 
314
 
 
315
    def test_bisect_dirblock_py(self):
 
316
        from bzrlib._dirstate_helpers_py import bisect_dirblock_py
 
317
        state = self.build_10k_dirstate_dirs()
 
318
        state.lock_read()
 
319
        try:
 
320
            self.setup_paths_and_offsets(state)
 
321
            offsets = self.time(self.do_bisect_list,
 
322
                                bisect_dirblock_py)
 
323
            self.checkOffsets(offsets)
 
324
        finally:
 
325
            state.unlock()
 
326
 
 
327
    def test_bisect_dirblock_cached_py(self):
 
328
        from bzrlib._dirstate_helpers_py import bisect_dirblock_py
 
329
        state = self.build_10k_dirstate_dirs()
 
330
        state.lock_read()
 
331
        try:
 
332
            self.setup_paths_and_offsets(state)
 
333
            offsets = self.time(self.do_bisect_list_cached,
 
334
                                bisect_dirblock_py)
 
335
            self.checkOffsets(offsets)
 
336
        finally:
 
337
            state.unlock()
 
338
 
 
339
    def test_bisect_dirblock_c(self):
 
340
        self.requireFeature(CompiledDirstateHelpersFeature)
 
341
        from bzrlib._dirstate_helpers_c import bisect_dirblock_c
 
342
        state = self.build_10k_dirstate_dirs()
 
343
        state.lock_read()
 
344
        try:
 
345
            self.setup_paths_and_offsets(state)
 
346
            offsets = self.time(self.do_bisect_list, bisect_dirblock_c)
 
347
            self.checkOffsets(offsets)
 
348
        finally:
 
349
            state.unlock()
 
350
 
 
351
    def create_path_names(self, layout, base=''):
 
352
        """Create a list of paths with auto-generated names.
 
353
 
 
354
        :param layout: A list of [(num_dirs, num_files)] tuples. For each
 
355
            level, the given number of directories will be created, each
 
356
            containing that many files.
 
357
            So [(2, 5), (3, 4)] will create 2 top level directories, containing
 
358
            5 files, and each top level directory will contain 3 subdirs with 4
 
359
            files.
 
360
        :param base: The base path to prepend to all entries, most callers will
 
361
            pass ''
 
362
        :return: A list of path names.
 
363
        """
 
364
        if not layout:
 
365
            return []
 
366
 
 
367
        paths = []
 
368
        num_dirs, num_files = layout[0]
 
369
        for dnum in xrange(num_dirs):
 
370
            if base:
 
371
                path = '%s/%02d_directory' % (base, dnum)
 
372
            else:
 
373
                path = '%02d_directory' % (dnum,)
 
374
            paths.append(path)
 
375
            for fnum in xrange(num_files):
 
376
                fname = '%s/%02d_filename' % (path, fnum)
 
377
                paths.append(fname)
 
378
            paths.extend(self.create_path_names(layout[1:], base=path))
 
379
        return paths
 
380
 
 
381
    def test_create_path_names(self):
 
382
        names = self.create_path_names([(2, 3), (1, 2)])
 
383
        self.assertEqual(['00_directory',
 
384
                          '00_directory/00_filename',
 
385
                          '00_directory/01_filename',
 
386
                          '00_directory/02_filename',
 
387
                          '00_directory/00_directory',
 
388
                          '00_directory/00_directory/00_filename',
 
389
                          '00_directory/00_directory/01_filename',
 
390
                          '01_directory',
 
391
                          '01_directory/00_filename',
 
392
                          '01_directory/01_filename',
 
393
                          '01_directory/02_filename',
 
394
                          '01_directory/00_directory',
 
395
                          '01_directory/00_directory/00_filename',
 
396
                          '01_directory/00_directory/01_filename',
 
397
                         ], names)
 
398
        names = self.time(self.create_path_names, [(10, 2), (10, 2), (10, 20)])
 
399
        # 20 files + 1 directory name, 10 times, plus 2 filenames and 1 dir, 10
 
400
        # times, and another 2 files + 1 dir, 10 times
 
401
        self.assertEqual(21330, 10*(3 + 10*(3 + 10*(1 + 20))))
 
402
        self.assertEqual(21330, len(names))
 
403
 
 
404
    def compareAllPaths(self, cmp_func, layout):
 
405
        """Compare N^2 paths.
 
406
 
 
407
        Basically, compare every path in the list against every other path.
 
408
        """
 
409
        paths = self.create_path_names(layout)
 
410
        def compare_all():
 
411
            for path1 in paths:
 
412
                for path2 in paths:
 
413
                    cmp_func(path1, path2)
 
414
        self.time(compare_all)
 
415
 
 
416
    def test_cmp_by_dirs_py(self):
 
417
        """Benchmark 103041 comparisons."""
 
418
        from bzrlib._dirstate_helpers_py import cmp_by_dirs_py
 
419
        self.compareAllPaths(cmp_by_dirs_py,
 
420
                             [(3, 1), (3, 1), (3, 1), (3, 2)])
 
421
 
 
422
    def test_cmp_by_dirs_c(self):
 
423
        self.requireFeature(CompiledDirstateHelpersFeature)
 
424
        from bzrlib._dirstate_helpers_c import cmp_by_dirs_c
 
425
        self.compareAllPaths(cmp_by_dirs_c,
 
426
                             [(3, 1), (3, 1), (3, 1), (3, 2)])