~bzr-pqm/bzr/bzr.dev

2890.2.3 by Robert Collins
* New module ``bzrlib.bisect_multi`` with generic multiple-bisection-at-once
1
# Copyright (C) 2007 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Tests for bisect_multi."""
18
19
from bzrlib import errors
20
from bzrlib.bisect_multi import bisect_multi_bytes
21
from bzrlib.tests import TestCase
22
23
24
class TestBisectMultiBytes(TestCase):
25
26
    def test_lookup_no_keys_no_calls(self):
27
        calls = []
28
        def missing_content(location_keys):
29
            calls.append(location_keys)
30
            return ((location_key, False) for location_key in location_keys)
31
        self.assertEqual([],
32
            list(bisect_multi_bytes(missing_content, 100, [])))
33
        self.assertEqual([], calls)
34
35
    def test_lookup_missing_key_no_content(self):
36
        """Doing a lookup in a zero-length file still does a single request.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
37
2890.2.3 by Robert Collins
* New module ``bzrlib.bisect_multi`` with generic multiple-bisection-at-once
38
        This makes sense because the bisector cannot tell how long content is
39
        and its more flexible to only stop when the content object says 'False'
40
        for a given location, key pair.
41
        """
42
        calls = []
43
        def missing_content(location_keys):
44
            calls.append(location_keys)
45
            return ((location_key, False) for location_key in location_keys)
46
        self.assertEqual([],
47
            list(bisect_multi_bytes(missing_content, 0, ['foo', 'bar'])))
48
        self.assertEqual([[(0, 'foo'), (0, 'bar')]], calls)
49
50
    def test_lookup_missing_key_before_all_others(self):
51
        calls = []
52
        def missing_first_content(location_keys):
53
            # returns -1 for all keys unless the byte offset is 0 when it
54
            # returns False
55
            calls.append(location_keys)
56
            result = []
57
            for location_key in location_keys:
58
                if location_key[0] == 0:
59
                    result.append((location_key, False))
60
                else:
61
                    result.append((location_key, -1))
62
            return result
63
        # given a 0 length file, this should terminate with one call.
64
        self.assertEqual([],
65
            list(bisect_multi_bytes(missing_first_content, 0, ['foo', 'bar'])))
66
        self.assertEqual([[(0, 'foo'), (0, 'bar')]], calls)
67
        del calls[:]
68
        # given a 2 length file, this should make two calls - 1, 0.
69
        self.assertEqual([],
70
            list(bisect_multi_bytes(missing_first_content, 2, ['foo', 'bar'])))
71
        self.assertEqual([
72
            [(1, 'foo'), (1, 'bar')],
73
            [(0, 'foo'), (0, 'bar')],
74
            ], calls)
75
        del calls[:]
76
        # given a really long file - 200MB, this should make a series of calls with the
77
        # gap between adjactent calls dropping by 50% each time. We choose a
78
        # length which just under a power of two to generate a corner case in
79
        # bisection - naively using power of two reduction in size can lead to
80
        # a very long tail in the bisection process. The current users of
81
        # the bisect_multi_bytes api are not expected to be concerned by this,
82
        # as the delta gets down to 4K (the minimum we expect to read and
83
        # parse) within 16 steps even on a 200MB index (which at 4 keys/K is
84
        # 800 thousand keys, and log2 of 800000 is 19 - so we're doing log2
85
        # steps in the worst case there.
86
        self.assertEqual([],
87
            list(bisect_multi_bytes(
88
                missing_first_content,268435456 - 1 , ['foo', 'bar'])))
89
        self.assertEqual([
90
            [(134217727, 'foo'), (134217727, 'bar')],
91
            [(67108864, 'foo'), (67108864, 'bar')],
92
            [(33554433, 'foo'), (33554433, 'bar')],
93
            [(16777218, 'foo'), (16777218, 'bar')],
94
            [(8388611, 'foo'), (8388611, 'bar')],
95
            [(4194308, 'foo'), (4194308, 'bar')],
96
            [(2097157, 'foo'), (2097157, 'bar')],
97
            [(1048582, 'foo'), (1048582, 'bar')],
98
            [(524295, 'foo'), (524295, 'bar')],
99
            [(262152, 'foo'), (262152, 'bar')],
100
            [(131081, 'foo'), (131081, 'bar')],
101
            [(65546, 'foo'), (65546, 'bar')],
102
            [(32779, 'foo'), (32779, 'bar')],
103
            [(16396, 'foo'), (16396, 'bar')],
104
            [(8205, 'foo'), (8205, 'bar')],
105
            [(4110, 'foo'), (4110, 'bar')],
106
            [(2063, 'foo'), (2063, 'bar')],
107
            [(1040, 'foo'), (1040, 'bar')],
108
            [(529, 'foo'), (529, 'bar')],
109
            [(274, 'foo'), (274, 'bar')],
110
            [(147, 'foo'), (147, 'bar')],
111
            [(84, 'foo'), (84, 'bar')],
112
            [(53, 'foo'), (53, 'bar')],
113
            [(38, 'foo'), (38, 'bar')],
114
            [(31, 'foo'), (31, 'bar')],
115
            [(28, 'foo'), (28, 'bar')],
116
            [(27, 'foo'), (27, 'bar')],
117
            [(26, 'foo'), (26, 'bar')],
118
            [(25, 'foo'), (25, 'bar')],
119
            [(24, 'foo'), (24, 'bar')],
120
            [(23, 'foo'), (23, 'bar')],
121
            [(22, 'foo'), (22, 'bar')],
122
            [(21, 'foo'), (21, 'bar')],
123
            [(20, 'foo'), (20, 'bar')],
124
            [(19, 'foo'), (19, 'bar')],
125
            [(18, 'foo'), (18, 'bar')],
126
            [(17, 'foo'), (17, 'bar')],
127
            [(16, 'foo'), (16, 'bar')],
128
            [(15, 'foo'), (15, 'bar')],
129
            [(14, 'foo'), (14, 'bar')],
130
            [(13, 'foo'), (13, 'bar')],
131
            [(12, 'foo'), (12, 'bar')],
132
            [(11, 'foo'), (11, 'bar')],
133
            [(10, 'foo'), (10, 'bar')],
134
            [(9, 'foo'), (9, 'bar')],
135
            [(8, 'foo'), (8, 'bar')],
136
            [(7, 'foo'), (7, 'bar')],
137
            [(6, 'foo'), (6, 'bar')],
138
            [(5, 'foo'), (5, 'bar')],
139
            [(4, 'foo'), (4, 'bar')],
140
            [(3, 'foo'), (3, 'bar')],
141
            [(2, 'foo'), (2, 'bar')],
142
            [(1, 'foo'), (1, 'bar')],
143
            [(0, 'foo'), (0, 'bar')],
144
            ], calls)
145
146
    def test_lookup_missing_key_after_all_others(self):
147
        calls = []
148
        end = None
149
        def missing_last_content(location_keys):
150
            # returns +1 for all keys unless the byte offset is 'end' when it
151
            # returns False
152
            calls.append(location_keys)
153
            result = []
154
            for location_key in location_keys:
155
                if location_key[0] == end:
156
                    result.append((location_key, False))
157
                else:
158
                    result.append((location_key, +1))
159
            return result
160
        # given a 0 length file, this should terminate with one call.
161
        end = 0
162
        self.assertEqual([],
163
            list(bisect_multi_bytes(missing_last_content, 0, ['foo', 'bar'])))
164
        self.assertEqual([[(0, 'foo'), (0, 'bar')]], calls)
165
        del calls[:]
166
        end = 2
167
        # given a 3 length file, this should make two calls - 1, 2.
168
        self.assertEqual([],
169
            list(bisect_multi_bytes(missing_last_content, 3, ['foo', 'bar'])))
170
        self.assertEqual([
171
            [(1, 'foo'), (1, 'bar')],
172
            [(2, 'foo'), (2, 'bar')],
173
            ], calls)
174
        del calls[:]
175
        end = 268435456 - 2
176
        # see the really-big lookup series in
177
        # test_lookup_missing_key_before_all_others for details about this
178
        # assertion.
179
        self.assertEqual([],
180
            list(bisect_multi_bytes(
181
                missing_last_content,268435456 - 1 , ['foo', 'bar'])))
182
        self.assertEqual([
183
            [(134217727, 'foo'), (134217727, 'bar')],
184
            [(201326590, 'foo'), (201326590, 'bar')],
185
            [(234881021, 'foo'), (234881021, 'bar')],
186
            [(251658236, 'foo'), (251658236, 'bar')],
187
            [(260046843, 'foo'), (260046843, 'bar')],
188
            [(264241146, 'foo'), (264241146, 'bar')],
189
            [(266338297, 'foo'), (266338297, 'bar')],
190
            [(267386872, 'foo'), (267386872, 'bar')],
191
            [(267911159, 'foo'), (267911159, 'bar')],
192
            [(268173302, 'foo'), (268173302, 'bar')],
193
            [(268304373, 'foo'), (268304373, 'bar')],
194
            [(268369908, 'foo'), (268369908, 'bar')],
195
            [(268402675, 'foo'), (268402675, 'bar')],
196
            [(268419058, 'foo'), (268419058, 'bar')],
197
            [(268427249, 'foo'), (268427249, 'bar')],
198
            [(268431344, 'foo'), (268431344, 'bar')],
199
            [(268433391, 'foo'), (268433391, 'bar')],
200
            [(268434414, 'foo'), (268434414, 'bar')],
201
            [(268434925, 'foo'), (268434925, 'bar')],
202
            [(268435180, 'foo'), (268435180, 'bar')],
203
            [(268435307, 'foo'), (268435307, 'bar')],
204
            [(268435370, 'foo'), (268435370, 'bar')],
205
            [(268435401, 'foo'), (268435401, 'bar')],
206
            [(268435416, 'foo'), (268435416, 'bar')],
207
            [(268435423, 'foo'), (268435423, 'bar')],
208
            [(268435426, 'foo'), (268435426, 'bar')],
209
            [(268435427, 'foo'), (268435427, 'bar')],
210
            [(268435428, 'foo'), (268435428, 'bar')],
211
            [(268435429, 'foo'), (268435429, 'bar')],
212
            [(268435430, 'foo'), (268435430, 'bar')],
213
            [(268435431, 'foo'), (268435431, 'bar')],
214
            [(268435432, 'foo'), (268435432, 'bar')],
215
            [(268435433, 'foo'), (268435433, 'bar')],
216
            [(268435434, 'foo'), (268435434, 'bar')],
217
            [(268435435, 'foo'), (268435435, 'bar')],
218
            [(268435436, 'foo'), (268435436, 'bar')],
219
            [(268435437, 'foo'), (268435437, 'bar')],
220
            [(268435438, 'foo'), (268435438, 'bar')],
221
            [(268435439, 'foo'), (268435439, 'bar')],
222
            [(268435440, 'foo'), (268435440, 'bar')],
223
            [(268435441, 'foo'), (268435441, 'bar')],
224
            [(268435442, 'foo'), (268435442, 'bar')],
225
            [(268435443, 'foo'), (268435443, 'bar')],
226
            [(268435444, 'foo'), (268435444, 'bar')],
227
            [(268435445, 'foo'), (268435445, 'bar')],
228
            [(268435446, 'foo'), (268435446, 'bar')],
229
            [(268435447, 'foo'), (268435447, 'bar')],
230
            [(268435448, 'foo'), (268435448, 'bar')],
231
            [(268435449, 'foo'), (268435449, 'bar')],
232
            [(268435450, 'foo'), (268435450, 'bar')],
233
            [(268435451, 'foo'), (268435451, 'bar')],
234
            [(268435452, 'foo'), (268435452, 'bar')],
235
            [(268435453, 'foo'), (268435453, 'bar')],
236
            [(268435454, 'foo'), (268435454, 'bar')]
237
            ], calls)
238
239
    def test_lookup_when_a_key_is_missing_continues(self):
240
        calls = []
241
        def missing_foo_otherwise_missing_first_content(location_keys):
242
            # returns -1 for all keys unless the byte offset is 0 when it
243
            # returns False
244
            calls.append(location_keys)
245
            result = []
246
            for location_key in location_keys:
247
                if location_key[1] == 'foo' or location_key[0] == 0:
248
                    result.append((location_key, False))
249
                else:
250
                    result.append((location_key, -1))
251
            return result
252
        # given a 2 length file, this should terminate with two calls, one for
253
        # both keys, and one for bar only.
254
        self.assertEqual([],
255
            list(bisect_multi_bytes(
256
                missing_foo_otherwise_missing_first_content, 2,
257
                ['foo', 'bar'])))
258
        self.assertEqual([
259
            [(1, 'foo'), (1, 'bar')],
260
            [(0, 'bar')],
261
            ], calls)
262
263
    def test_found_keys_returned_other_searches_continue(self):
264
        calls = []
265
        def find_bar_at_1_foo_missing_at_0(location_keys):
266
            calls.append(location_keys)
267
            result = []
268
            for location_key in location_keys:
269
                if location_key == (1, 'bar'):
270
                    result.append((location_key, 'bar-result'))
271
                elif location_key[0] == 0:
272
                    result.append((location_key, False))
273
                else:
274
                    result.append((location_key, -1))
275
            return result
276
        # given a 4 length file, this should terminate with three calls, two for
277
        # both keys, and one for foo only.
278
        self.assertEqual([('bar', 'bar-result')],
279
            list(bisect_multi_bytes(
280
                find_bar_at_1_foo_missing_at_0, 4,
281
                ['foo', 'bar'])))
282
        self.assertEqual([
283
            [(2, 'foo'), (2, 'bar')],
284
            [(1, 'foo'), (1, 'bar')],
285
            [(0, 'foo')],
286
            ], calls)
287
288
    def test_searches_different_keys_in_different_directions(self):
289
        calls = []
290
        def missing_bar_at_1_foo_at_3(location_keys):
291
            calls.append(location_keys)
292
            result = []
293
            for location_key in location_keys:
294
                if location_key[1] == 'bar':
295
                    if location_key[0] == 1:
296
                        result.append((location_key, False))
297
                    else:
298
                        # search down
299
                        result.append((location_key, -1))
300
                elif location_key[1] == 'foo':
301
                    if location_key[0] == 3:
302
                        result.append((location_key, False))
303
                    else:
304
                        # search up
305
                        result.append((location_key, +1))
306
            return result
307
        # given a 4 length file, this should terminate with two calls.
308
        self.assertEqual([],
309
            list(bisect_multi_bytes(
310
                missing_bar_at_1_foo_at_3, 4,
311
                ['foo', 'bar'])))
312
        self.assertEqual([
313
            [(2, 'foo'), (2, 'bar')],
314
            [(3, 'foo'), (1, 'bar')],
315
            ], calls)
316
317
    def test_change_direction_in_single_key_search(self):
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
318
        # check that we can search down, up, down again -
2890.2.3 by Robert Collins
* New module ``bzrlib.bisect_multi`` with generic multiple-bisection-at-once
319
        # so length 8, goes 4, 6, 5
320
        calls = []
321
        def missing_at_5(location_keys):
322
            calls.append(location_keys)
323
            result = []
324
            for location_key in location_keys:
325
                if location_key[0] == 5:
326
                    result.append((location_key, False))
327
                elif location_key[0] > 5:
328
                    # search down
329
                    result.append((location_key, -1))
330
                else:
331
                    # search up
332
                    result.append((location_key, +1))
333
            return result
334
        # given a 8 length file, this should terminate with three calls.
335
        self.assertEqual([],
336
            list(bisect_multi_bytes(
337
                missing_at_5, 8,
338
                ['foo', 'bar'])))
339
        self.assertEqual([
340
            [(4, 'foo'), (4, 'bar')],
341
            [(6, 'foo'), (6, 'bar')],
342
            [(5, 'foo'), (5, 'bar')],
343
            ], calls)
344