~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/benchmarks/bench_dirstate.py

  • Committer: John Arbash Meinel
  • Date: 2007-05-03 23:35:49 UTC
  • mto: This revision was merged to the branch mainline in revision 2643.
  • Revision ID: john@arbash-meinel.com-20070503233549-445n015iomhc8ppm
Add benchmarks for dirstate.bisect_dirblocks, and implement bisect_dirblocks in pyrex.
Shows about a 2x performance improvement being in compiled C.
Also, at least on my Mac, it is faster without extra caching.

Show diffs side-by-side

added added

removed removed

Lines of Context:
23
23
    dirstate,
24
24
    generate_ids,
25
25
    osutils,
 
26
    tests,
26
27
    )
27
28
 
28
29
 
 
30
class _CompiledDirstateHelpersFeature(tests.Feature):
 
31
    def _probe(self):
 
32
        try:
 
33
            import bzrlib.compiled.dirstate_helpers
 
34
        except ImportError:
 
35
            return False
 
36
        return True
 
37
 
 
38
    def feature_name(self):
 
39
        return 'compiled dirstate helpers'
 
40
 
 
41
CompiledDirstateHelpersFeature =_CompiledDirstateHelpersFeature()
 
42
 
 
43
 
29
44
class BenchmarkDirState(benchmarks.Benchmark):
30
45
 
 
46
    def build_helper(self, layout):
 
47
        """This is a helper with the common build_??_dirstate funcs.
 
48
 
 
49
        :param layout: [(num_dirs, files_per_dir)]
 
50
            The number of directories per level, and the number of files to put
 
51
            in it.
 
52
        :return: A DirState object with the given layout.
 
53
        """
 
54
        self.build_tree(['dir/'])
 
55
        contents = 'x'*10000
 
56
        self.build_tree_contents([('file', contents)])
 
57
        file_stat = os.lstat('file')
 
58
        dir_stat = os.lstat('dir')
 
59
        file_sha1 = osutils.sha_string(contents)
 
60
 
 
61
        state = dirstate.DirState.initialize('state')
 
62
        try:
 
63
            def create_entries(base, layout):
 
64
                if not layout:
 
65
                    return
 
66
                num_dirs, num_files = layout[0]
 
67
                for dnum in xrange(num_dirs):
 
68
                    if base:
 
69
                        path = '%s/%02d_directory' % (base, dnum)
 
70
                    else:
 
71
                        path = '%02d_directory' % (dnum,)
 
72
                    dir_id = generate_ids.gen_file_id(path)
 
73
                    state.add(path, dir_id, 'directory', dir_stat, '')
 
74
                    for fnum in xrange(num_files):
 
75
                        fname = '%s/%02d_filename' % (path, fnum)
 
76
                        file_id = generate_ids.gen_file_id(fname)
 
77
                        state.add(fname, file_id, 'file', file_stat, file_sha1)
 
78
                    create_entries(path, layout[1:])
 
79
            create_entries(None, layout)
 
80
            state.save()
 
81
        finally:
 
82
            state.unlock()
 
83
        return state
 
84
 
 
85
    def build_1k_dirstate_dirs(self):
 
86
        """Build a DirState file with 1k directories"""
 
87
        return self.build_helper([(10, 0), (10, 0), (10, 1)])
 
88
 
31
89
    def build_20k_dirstate(self):
32
90
        """Build a DirState file with 20k records.
33
91
 
38
96
        We try to have reasonable filename lengths, as well as a reasonable
39
97
        stat value, etc.
40
98
        """
41
 
        self.build_tree(['dir/'])
42
 
        self.build_tree_contents([('file', 'x'*10000)])
43
 
        file_stat = os.lstat('file')
44
 
        dir_stat = os.lstat('dir')
45
 
        file_sha1 = osutils.sha_string('testing')
46
 
 
47
 
        # the average filename length is 11 characters
48
 
        # find . | sed -e 's/.*\///' | wc -l
49
 
        #   22545   22545  237869
50
 
        # 237869 / 22545 = 10.6
51
 
        # average depth is 30 characters
52
 
        # find . | wc -l
53
 
        #   22545   22545  679884
54
 
        # 679884 / 22545 = 30.1
55
 
        state = dirstate.DirState.initialize('state')
56
 
        try:
57
 
            for lvl1 in xrange(10):
58
 
                dir_1 = '%2d_directory' % (lvl1,)
59
 
                dir_1_id = generate_ids.gen_file_id(dir_1)
60
 
                state.add(dir_1, dir_1_id, 'directory', dir_stat, '')
61
 
                for lvl2 in xrange(10):
62
 
                    dir_2 = '%s/%2d_directory' % (dir_1, lvl2)
63
 
                    dir_2_id = generate_ids.gen_file_id(dir_2)
64
 
                    state.add(dir_2, dir_2_id, 'directory', dir_stat, '')
65
 
                    for lvl3 in xrange(10):
66
 
                        dir_3 = '%s/%2d_directory' % (dir_2, lvl3)
67
 
                        dir_3_id = generate_ids.gen_file_id(dir_3)
68
 
                        state.add(dir_3, dir_3_id, 'directory', dir_stat, '')
69
 
                        for filenum in xrange(20):
70
 
                            fname = '%s/%2d_filename' % (dir_3, filenum)
71
 
                            file_id = generate_ids.gen_file_id(fname)
72
 
                            state.add(fname, file_id, 'directory', dir_stat, '')
73
 
            state.save()
74
 
        finally:
75
 
            state.unlock()
76
 
        return state
 
99
        return self.build_helper([(10, 0), (10, 0), (10, 20)])
77
100
 
78
101
    def test_build_20k_dirblocks(self):
79
102
        state = self.time(self.build_20k_dirstate)
93
116
            self.time(state._read_dirblocks_if_needed)
94
117
        finally:
95
118
            state.unlock()
 
119
 
 
120
    def do_bisect_list(self, bisect_func):
 
121
        """Call bisect_dirblock for each path."""
 
122
        # We use self._paths and self._blocks because we expect it to be a very
 
123
        # long list. And the interface for 'self.time()' causes the parameters
 
124
        # to be printed when run with --lsprof-timed. Which is *really* ugly
 
125
        # when the list is thousands of entries.
 
126
        blocks = self._blocks
 
127
        return [bisect_func(blocks, path) for path in self._paths]
 
128
 
 
129
    def do_bisect_list_cached(self, bisect_func):
 
130
        """Same as do_bisect_list, but cache the split paths"""
 
131
        cache = {}
 
132
        blocks = self._blocks
 
133
        return [bisect_func(blocks, path, cache=cache) for path in self._paths]
 
134
 
 
135
    def setup_paths_and_offsets(self, state):
 
136
        """Get a list of paths and expected offsets.
 
137
 
 
138
        This will be used to check do_bisect_list*
 
139
        """
 
140
        state._read_dirblocks_if_needed()
 
141
        paths = ['']
 
142
        expected_offsets = [0]
 
143
        for offset, info in enumerate(state._dirblocks):
 
144
            dirname = info[0]
 
145
            # We already handled the empty path
 
146
            if dirname == '':
 
147
                continue
 
148
            # all paths are of the form ##_directory
 
149
            # so search for ##_director, ##_directory
 
150
            paths.extend([dirname[:-1], dirname])
 
151
            expected_offsets.extend([offset, offset])
 
152
        self._paths = paths
 
153
        self._expected_offsets = expected_offsets
 
154
        self._blocks = state._dirblocks
 
155
 
 
156
    def checkOffsets(self, offsets):
 
157
        """Make sure offsets matches self._expected_offsets"""
 
158
        # These are really long lists, so it is easier to compare them with
 
159
        # assertEqualDiff. So turn them into strings.
 
160
        expected_str = '\n'.join(str(x) for x in self._expected_offsets)
 
161
        offset_str = '\n'.join(str(x) for x in offsets)
 
162
        self.assertEqualDiff(expected_str, offset_str)
 
163
 
 
164
    def test_bisect_dirblock(self):
 
165
        state = self.build_1k_dirstate_dirs()
 
166
        state.lock_read()
 
167
        try:
 
168
            self.setup_paths_and_offsets(state)
 
169
            offsets = self.time(self.do_bisect_list, dirstate.bisect_dirblock)
 
170
            self.checkOffsets(offsets)
 
171
        finally:
 
172
            state.unlock()
 
173
 
 
174
    def test_bisect_dirblock_cached(self):
 
175
        state = self.build_1k_dirstate_dirs()
 
176
        state.lock_read()
 
177
        try:
 
178
            self.setup_paths_and_offsets(state)
 
179
            offsets = self.time(self.do_bisect_list_cached,
 
180
                                dirstate.bisect_dirblock)
 
181
            self.checkOffsets(offsets)
 
182
        finally:
 
183
            state.unlock()
 
184
 
 
185
    def test_bisect_dirblock_compiled(self):
 
186
        self.requireFeature(CompiledDirstateHelpersFeature)
 
187
        state = self.build_1k_dirstate_dirs()
 
188
        state.lock_read()
 
189
        try:
 
190
            self.setup_paths_and_offsets(state)
 
191
            offsets = self.time(self.do_bisect_list, dirstate.bisect_dirblock)
 
192
            self.checkOffsets(offsets)
 
193
        finally:
 
194
            state.unlock()
 
195
 
 
196
    def test_bisect_dirblock_compiled_cached(self):
 
197
        self.requireFeature(CompiledDirstateHelpersFeature)
 
198
        state = self.build_1k_dirstate_dirs()
 
199
        state.lock_read()
 
200
        try:
 
201
            self.setup_paths_and_offsets(state)
 
202
            offsets = self.time(self.do_bisect_list_cached,
 
203
                                dirstate.bisect_dirblock)
 
204
            self.checkOffsets(offsets)
 
205
        finally:
 
206
            state.unlock()