~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_btree_index.py

  • Committer: Canonical.com Patch Queue Manager
  • Date: 2008-09-03 22:30:56 UTC
  • mfrom: (3644.2.13 index_builder_cleanup)
  • Revision ID: pqm@pqm.ubuntu.com-20080903223056-b108iytb38xkznci
(jam) Streamline BTreeBuilder.add_node et al to make btree creation
        faster.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2008-2012, 2016 Canonical Ltd
 
1
# Copyright (C) 2008 Canonical Ltd
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
12
12
#
13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
16
#
17
17
 
18
18
"""Tests for btree indices."""
23
23
from bzrlib import (
24
24
    btree_index,
25
25
    errors,
26
 
    fifo_cache,
27
 
    lru_cache,
28
 
    osutils,
29
26
    tests,
30
 
    transport,
31
27
    )
32
28
from bzrlib.tests import (
33
29
    TestCaseWithTransport,
34
 
    scenarios,
35
 
    )
36
 
from bzrlib.tests import (
37
 
    features,
38
 
    )
39
 
 
40
 
 
41
 
load_tests = scenarios.load_tests_apply_scenarios
42
 
 
43
 
 
44
 
def btreeparser_scenarios():
 
30
    TestScenarioApplier,
 
31
    adapt_tests,
 
32
    condition_isinstance,
 
33
    split_suite_by_condition,
 
34
    )
 
35
from bzrlib.transport import get_transport
 
36
 
 
37
 
 
38
def load_tests(standard_tests, module, loader):
 
39
    # parameterise the TestBTreeNodes tests
 
40
    node_tests, others = split_suite_by_condition(standard_tests,
 
41
        condition_isinstance(TestBTreeNodes))
 
42
    applier = TestScenarioApplier()
45
43
    import bzrlib._btree_serializer_py as py_module
46
 
    scenarios = [('python', {'parse_btree': py_module})]
47
 
    if compiled_btreeparser_feature.available():
48
 
        scenarios.append(('C', 
49
 
            {'parse_btree': compiled_btreeparser_feature.module}))
50
 
    return scenarios
51
 
 
52
 
 
53
 
compiled_btreeparser_feature = features.ModuleAvailableFeature(
54
 
    'bzrlib._btree_serializer_pyx')
 
44
    applier.scenarios = [('python', {'parse_btree': py_module})]
 
45
    if CompiledBtreeParserFeature.available():
 
46
        # Is there a way to do this that gets missing feature failures rather
 
47
        # than no indication to the user?
 
48
        import bzrlib._btree_serializer_c as c_module
 
49
        applier.scenarios.append(('C', {'parse_btree': c_module}))
 
50
    adapt_tests(node_tests, applier, others)
 
51
    return others
 
52
 
 
53
 
 
54
class _CompiledBtreeParserFeature(tests.Feature):
 
55
    def _probe(self):
 
56
        try:
 
57
            import bzrlib._btree_serializer_c
 
58
        except ImportError:
 
59
            return False
 
60
        return True
 
61
 
 
62
    def feature_name(self):
 
63
        return 'bzrlib._btree_serializer_c'
 
64
 
 
65
CompiledBtreeParserFeature = _CompiledBtreeParserFeature()
55
66
 
56
67
 
57
68
class BTreeTestCase(TestCaseWithTransport):
59
70
    # that they test.
60
71
 
61
72
    def setUp(self):
62
 
        super(BTreeTestCase, self).setUp()
63
 
        self.overrideAttr(btree_index, '_RESERVED_HEADER_BYTES', 100)
 
73
        TestCaseWithTransport.setUp(self)
 
74
        self._original_header = btree_index._RESERVED_HEADER_BYTES
 
75
        def restore():
 
76
            btree_index._RESERVED_HEADER_BYTES = self._original_header
 
77
        self.addCleanup(restore)
 
78
        btree_index._RESERVED_HEADER_BYTES = 100
64
79
 
65
80
    def make_nodes(self, count, key_elements, reference_lists):
66
81
        """Generate count*key_elements sample nodes."""
100
115
 
101
116
    def shrink_page_size(self):
102
117
        """Shrink the default page size so that less fits in a page."""
103
 
        self.overrideAttr(btree_index, '_PAGE_SIZE')
 
118
        old_page_size = btree_index._PAGE_SIZE
 
119
        def cleanup():
 
120
            btree_index._PAGE_SIZE = old_page_size
 
121
        self.addCleanup(cleanup)
104
122
        btree_index._PAGE_SIZE = 2048
105
123
 
106
 
    def assertEqualApproxCompressed(self, expected, actual, slop=6):
107
 
        """Check a count of compressed bytes is approximately as expected
108
 
 
109
 
        Relying on compressed length being stable even with fixed inputs is
110
 
        slightly bogus, but zlib is stable enough that this mostly works.
111
 
        """
112
 
        if not expected - slop < actual < expected + slop:
113
 
            self.fail("Expected around %d bytes compressed but got %d" %
114
 
                (expected, actual))
115
 
 
116
124
 
117
125
class TestBTreeBuilder(BTreeTestCase):
118
126
 
119
 
    def test_clear_cache(self):
120
 
        builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
121
 
        # This is a no-op, but we need the api to be consistent with other
122
 
        # BTreeGraphIndex apis.
123
 
        builder.clear_cache()
124
 
 
125
127
    def test_empty_1_0(self):
126
128
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
127
129
        # NamedTemporaryFile dies on builder.finish().read(). weird.
153
155
        temp_file = builder.finish()
154
156
        content = temp_file.read()
155
157
        del temp_file
156
 
        self.assertEqual(131, len(content))
 
158
        self.assertEqual(158, len(content))
157
159
        self.assertEqual(
158
160
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=5\n"
159
161
            "row_lengths=1\n",
177
179
        temp_file = builder.finish()
178
180
        content = temp_file.read()
179
181
        del temp_file
180
 
        self.assertEqual(238, len(content))
 
182
        self.assertEqual(264, len(content))
181
183
        self.assertEqual(
182
184
            "B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=10\n"
183
185
            "row_lengths=1\n",
209
211
        temp_file = builder.finish()
210
212
        content = temp_file.read()
211
213
        del temp_file
212
 
        self.assertEqualApproxCompressed(9283, len(content))
 
214
        self.assertEqual(9283, len(content))
213
215
        self.assertEqual(
214
216
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
215
217
            "row_lengths=1,2\n",
227
229
        leaf1_bytes = zlib.decompress(leaf1)
228
230
        sorted_node_keys = sorted(node[0] for node in nodes)
229
231
        node = btree_index._LeafNode(leaf1_bytes, 1, 0)
230
 
        self.assertEqual(231, len(node))
231
 
        self.assertEqual(sorted_node_keys[:231], node.all_keys())
 
232
        self.assertEqual(231, len(node.keys))
 
233
        self.assertEqual(sorted_node_keys[:231], sorted(node.keys))
232
234
        leaf2_bytes = zlib.decompress(leaf2)
233
235
        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
234
 
        self.assertEqual(400 - 231, len(node))
235
 
        self.assertEqual(sorted_node_keys[231:], node.all_keys())
 
236
        self.assertEqual(400 - 231, len(node.keys))
 
237
        self.assertEqual(sorted_node_keys[231:], sorted(node.keys))
236
238
 
237
239
    def test_last_page_rounded_1_layer(self):
238
240
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
243
245
        temp_file = builder.finish()
244
246
        content = temp_file.read()
245
247
        del temp_file
246
 
        self.assertEqualApproxCompressed(155, len(content))
 
248
        self.assertEqual(181, len(content))
247
249
        self.assertEqual(
248
250
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=10\n"
249
251
            "row_lengths=1\n",
252
254
        leaf2 = content[74:]
253
255
        leaf2_bytes = zlib.decompress(leaf2)
254
256
        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
255
 
        self.assertEqual(10, len(node))
 
257
        self.assertEqual(10, len(node.keys))
256
258
        sorted_node_keys = sorted(node[0] for node in nodes)
257
 
        self.assertEqual(sorted_node_keys, node.all_keys())
 
259
        self.assertEqual(sorted_node_keys, sorted(node.keys))
258
260
 
259
261
    def test_last_page_not_rounded_2_layer(self):
260
262
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
265
267
        temp_file = builder.finish()
266
268
        content = temp_file.read()
267
269
        del temp_file
268
 
        self.assertEqualApproxCompressed(9283, len(content))
 
270
        self.assertEqual(9283, len(content))
269
271
        self.assertEqual(
270
272
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
271
273
            "row_lengths=1,2\n",
274
276
        leaf2 = content[8192:]
275
277
        leaf2_bytes = zlib.decompress(leaf2)
276
278
        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
277
 
        self.assertEqual(400 - 231, len(node))
 
279
        self.assertEqual(400 - 231, len(node.keys))
278
280
        sorted_node_keys = sorted(node[0] for node in nodes)
279
 
        self.assertEqual(sorted_node_keys[231:], node.all_keys())
 
281
        self.assertEqual(sorted_node_keys[231:], sorted(node.keys))
280
282
 
281
283
    def test_three_level_tree_details(self):
282
284
        # The left most pointer in the second internal node in a row should
291
293
 
292
294
        for node in nodes:
293
295
            builder.add_node(*node)
294
 
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
295
 
        size = t.put_file('index', self.time(builder.finish))
 
296
        transport = get_transport('trace+' + self.get_url(''))
 
297
        size = transport.put_file('index', self.time(builder.finish))
296
298
        del builder
297
 
        index = btree_index.BTreeGraphIndex(t, 'index', size)
 
299
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
298
300
        # Seed the metadata, we're using internal calls now.
299
301
        index.key_count()
300
302
        self.assertEqual(3, len(index._row_lengths),
313
315
        # in the second node it points at
314
316
        pos = index._row_offsets[2] + internal_node2.offset + 1
315
317
        leaf = index._get_leaf_nodes([pos])[pos]
316
 
        self.assertTrue(internal_node2.keys[0] in leaf)
 
318
        self.assertTrue(internal_node2.keys[0] in leaf.keys)
317
319
 
318
320
    def test_2_leaves_2_2(self):
319
321
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
324
326
        temp_file = builder.finish()
325
327
        content = temp_file.read()
326
328
        del temp_file
327
 
        self.assertEqualApproxCompressed(12643, len(content))
 
329
        self.assertEqual(12643, len(content))
328
330
        self.assertEqual(
329
331
            "B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=200\n"
330
332
            "row_lengths=1,3\n",
351
353
        # Test the parts of the index that take up memory are doing so
352
354
        # predictably.
353
355
        self.assertEqual(1, len(builder._nodes))
 
356
        self.assertEqual(1, len(builder._keys))
354
357
        self.assertIs(None, builder._nodes_by_key)
355
358
        builder.add_node(*nodes[1])
356
359
        self.assertEqual(0, len(builder._nodes))
 
360
        self.assertEqual(0, len(builder._keys))
357
361
        self.assertIs(None, builder._nodes_by_key)
358
362
        self.assertEqual(1, len(builder._backing_indices))
359
363
        self.assertEqual(2, builder._backing_indices[0].key_count())
360
364
        # now back to memory
361
365
        builder.add_node(*nodes[2])
362
366
        self.assertEqual(1, len(builder._nodes))
 
367
        self.assertEqual(1, len(builder._keys))
363
368
        self.assertIs(None, builder._nodes_by_key)
364
369
        # And spills to a second backing index combing all
365
370
        builder.add_node(*nodes[3])
366
371
        self.assertEqual(0, len(builder._nodes))
 
372
        self.assertEqual(0, len(builder._keys))
367
373
        self.assertIs(None, builder._nodes_by_key)
368
374
        self.assertEqual(2, len(builder._backing_indices))
369
375
        self.assertEqual(None, builder._backing_indices[0])
372
378
        builder.add_node(*nodes[4])
373
379
        builder.add_node(*nodes[5])
374
380
        self.assertEqual(0, len(builder._nodes))
 
381
        self.assertEqual(0, len(builder._keys))
375
382
        self.assertIs(None, builder._nodes_by_key)
376
383
        self.assertEqual(2, len(builder._backing_indices))
377
384
        self.assertEqual(2, builder._backing_indices[0].key_count())
420
427
        self.assertEqual(None, builder._backing_indices[2])
421
428
        self.assertEqual(16, builder._backing_indices[3].key_count())
422
429
        # Now finish, and check we got a correctly ordered tree
423
 
        t = self.get_transport('')
424
 
        size = t.put_file('index', builder.finish())
425
 
        index = btree_index.BTreeGraphIndex(t, 'index', size)
426
 
        nodes = list(index.iter_all_entries())
427
 
        self.assertEqual(sorted(nodes), nodes)
428
 
        self.assertEqual(16, len(nodes))
429
 
 
430
 
    def test_spill_index_stress_1_1_no_combine(self):
431
 
        builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
432
 
        builder.set_optimize(for_size=False, combine_backing_indices=False)
433
 
        nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
434
 
        builder.add_node(*nodes[0])
435
 
        # Test the parts of the index that take up memory are doing so
436
 
        # predictably.
437
 
        self.assertEqual(1, len(builder._nodes))
438
 
        self.assertIs(None, builder._nodes_by_key)
439
 
        builder.add_node(*nodes[1])
440
 
        self.assertEqual(0, len(builder._nodes))
441
 
        self.assertIs(None, builder._nodes_by_key)
442
 
        self.assertEqual(1, len(builder._backing_indices))
443
 
        self.assertEqual(2, builder._backing_indices[0].key_count())
444
 
        # now back to memory
445
 
        builder.add_node(*nodes[2])
446
 
        self.assertEqual(1, len(builder._nodes))
447
 
        self.assertIs(None, builder._nodes_by_key)
448
 
        # And spills to a second backing index but doesn't combine
449
 
        builder.add_node(*nodes[3])
450
 
        self.assertEqual(0, len(builder._nodes))
451
 
        self.assertIs(None, builder._nodes_by_key)
452
 
        self.assertEqual(2, len(builder._backing_indices))
453
 
        for backing_index in builder._backing_indices:
454
 
            self.assertEqual(2, backing_index.key_count())
455
 
        # The next spills to the 3rd slot
456
 
        builder.add_node(*nodes[4])
457
 
        builder.add_node(*nodes[5])
458
 
        self.assertEqual(0, len(builder._nodes))
459
 
        self.assertIs(None, builder._nodes_by_key)
460
 
        self.assertEqual(3, len(builder._backing_indices))
461
 
        for backing_index in builder._backing_indices:
462
 
            self.assertEqual(2, backing_index.key_count())
463
 
        # Now spill a few more, and check that we don't combine
464
 
        builder.add_node(*nodes[6])
465
 
        builder.add_node(*nodes[7])
466
 
        builder.add_node(*nodes[8])
467
 
        builder.add_node(*nodes[9])
468
 
        builder.add_node(*nodes[10])
469
 
        builder.add_node(*nodes[11])
470
 
        builder.add_node(*nodes[12])
471
 
        self.assertEqual(6, len(builder._backing_indices))
472
 
        for backing_index in builder._backing_indices:
473
 
            self.assertEqual(2, backing_index.key_count())
474
 
        # Test that memory and disk are both used for query methods; and that
475
 
        # None is skipped over happily.
476
 
        self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
477
 
            list(builder.iter_all_entries()))
478
 
        # Two nodes - one memory one disk
479
 
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
480
 
            set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
481
 
        self.assertEqual(13, builder.key_count())
482
 
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
483
 
            set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
484
 
        builder.add_node(*nodes[13])
485
 
        builder.add_node(*nodes[14])
486
 
        builder.add_node(*nodes[15])
487
 
        self.assertEqual(8, len(builder._backing_indices))
488
 
        for backing_index in builder._backing_indices:
489
 
            self.assertEqual(2, backing_index.key_count())
490
 
        # Now finish, and check we got a correctly ordered tree
491
430
        transport = self.get_transport('')
492
431
        size = transport.put_file('index', builder.finish())
493
432
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
495
434
        self.assertEqual(sorted(nodes), nodes)
496
435
        self.assertEqual(16, len(nodes))
497
436
 
498
 
    def test_set_optimize(self):
499
 
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
500
 
        builder.set_optimize(for_size=True)
501
 
        self.assertTrue(builder._optimize_for_size)
502
 
        builder.set_optimize(for_size=False)
503
 
        self.assertFalse(builder._optimize_for_size)
504
 
        # test that we can set combine_backing_indices without effecting
505
 
        # _optimize_for_size
506
 
        obj = object()
507
 
        builder._optimize_for_size = obj
508
 
        builder.set_optimize(combine_backing_indices=False)
509
 
        self.assertFalse(builder._combine_backing_indices)
510
 
        self.assertIs(obj, builder._optimize_for_size)
511
 
        builder.set_optimize(combine_backing_indices=True)
512
 
        self.assertTrue(builder._combine_backing_indices)
513
 
        self.assertIs(obj, builder._optimize_for_size)
514
 
 
515
437
    def test_spill_index_stress_2_2(self):
516
438
        # test that references and longer keys don't confuse things.
517
439
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2,
520
442
        builder.add_node(*nodes[0])
521
443
        # Test the parts of the index that take up memory are doing so
522
444
        # predictably.
 
445
        self.assertEqual(1, len(builder._keys))
523
446
        self.assertEqual(1, len(builder._nodes))
524
447
        self.assertIs(None, builder._nodes_by_key)
525
448
        builder.add_node(*nodes[1])
 
449
        self.assertEqual(0, len(builder._keys))
526
450
        self.assertEqual(0, len(builder._nodes))
527
451
        self.assertIs(None, builder._nodes_by_key)
528
452
        self.assertEqual(1, len(builder._backing_indices))
531
455
        old = dict(builder._get_nodes_by_key()) #Build up the nodes by key dict
532
456
        builder.add_node(*nodes[2])
533
457
        self.assertEqual(1, len(builder._nodes))
 
458
        self.assertEqual(1, len(builder._keys))
534
459
        self.assertIsNot(None, builder._nodes_by_key)
535
460
        self.assertNotEqual({}, builder._nodes_by_key)
536
461
        # We should have a new entry
538
463
        # And spills to a second backing index combing all
539
464
        builder.add_node(*nodes[3])
540
465
        self.assertEqual(0, len(builder._nodes))
 
466
        self.assertEqual(0, len(builder._keys))
541
467
        self.assertIs(None, builder._nodes_by_key)
542
468
        self.assertEqual(2, len(builder._backing_indices))
543
469
        self.assertEqual(None, builder._backing_indices[0])
546
472
        builder.add_node(*nodes[4])
547
473
        builder.add_node(*nodes[5])
548
474
        self.assertEqual(0, len(builder._nodes))
 
475
        self.assertEqual(0, len(builder._keys))
549
476
        self.assertIs(None, builder._nodes_by_key)
550
477
        self.assertEqual(2, len(builder._backing_indices))
551
478
        self.assertEqual(2, builder._backing_indices[0].key_count())
618
545
        for key, value, references in nodes:
619
546
            builder.add_node(key, value, references)
620
547
        stream = builder.finish()
621
 
        trans = transport.get_transport_from_url('trace+' + self.get_url())
 
548
        trans = get_transport('trace+' + self.get_url())
622
549
        size = trans.put_file('index', stream)
623
550
        return btree_index.BTreeGraphIndex(trans, 'index', size)
624
551
 
625
 
    def make_index_with_offset(self, ref_lists=1, key_elements=1, nodes=[],
626
 
                               offset=0):
627
 
        builder = btree_index.BTreeBuilder(key_elements=key_elements,
628
 
                                           reference_lists=ref_lists)
629
 
        builder.add_nodes(nodes)
630
 
        transport = self.get_transport('')
631
 
        # NamedTemporaryFile dies on builder.finish().read(). weird.
632
 
        temp_file = builder.finish()
633
 
        content = temp_file.read()
634
 
        del temp_file
635
 
        size = len(content)
636
 
        transport.put_bytes('index', (' '*offset)+content)
637
 
        return btree_index.BTreeGraphIndex(transport, 'index', size=size,
638
 
                                           offset=offset)
639
 
 
640
 
    def test_clear_cache(self):
641
 
        nodes = self.make_nodes(160, 2, 2)
642
 
        index = self.make_index(ref_lists=2, key_elements=2, nodes=nodes)
643
 
        self.assertEqual(1, len(list(index.iter_entries([nodes[30][0]]))))
644
 
        self.assertEqual([1, 4], index._row_lengths)
645
 
        self.assertIsNot(None, index._root_node)
646
 
        internal_node_pre_clear = index._internal_node_cache.keys()
647
 
        self.assertTrue(len(index._leaf_node_cache) > 0)
648
 
        index.clear_cache()
649
 
        # We don't touch _root_node or _internal_node_cache, both should be
650
 
        # small, and can save a round trip or two
651
 
        self.assertIsNot(None, index._root_node)
652
 
        # NOTE: We don't want to affect the _internal_node_cache, as we expect
653
 
        #       it will be small, and if we ever do touch this index again, it
654
 
        #       will save round-trips.  This assertion isn't very strong,
655
 
        #       becuase without a 3-level index, we don't have any internal
656
 
        #       nodes cached.
657
 
        self.assertEqual(internal_node_pre_clear,
658
 
                         index._internal_node_cache.keys())
659
 
        self.assertEqual(0, len(index._leaf_node_cache))
660
 
 
661
552
    def test_trivial_constructor(self):
662
 
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
663
 
        index = btree_index.BTreeGraphIndex(t, 'index', None)
 
553
        transport = get_transport('trace+' + self.get_url(''))
 
554
        index = btree_index.BTreeGraphIndex(transport, 'index', None)
664
555
        # Checks the page size at load, but that isn't logged yet.
665
 
        self.assertEqual([], t._activity)
 
556
        self.assertEqual([], transport._activity)
666
557
 
667
558
    def test_with_size_constructor(self):
668
 
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
669
 
        index = btree_index.BTreeGraphIndex(t, 'index', 1)
 
559
        transport = get_transport('trace+' + self.get_url(''))
 
560
        index = btree_index.BTreeGraphIndex(transport, 'index', 1)
670
561
        # Checks the page size at load, but that isn't logged yet.
671
 
        self.assertEqual([], t._activity)
 
562
        self.assertEqual([], transport._activity)
672
563
 
673
564
    def test_empty_key_count_no_size(self):
674
565
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
675
 
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
676
 
        t.put_file('index', builder.finish())
677
 
        index = btree_index.BTreeGraphIndex(t, 'index', None)
678
 
        del t._activity[:]
679
 
        self.assertEqual([], t._activity)
 
566
        transport = get_transport('trace+' + self.get_url(''))
 
567
        transport.put_file('index', builder.finish())
 
568
        index = btree_index.BTreeGraphIndex(transport, 'index', None)
 
569
        del transport._activity[:]
 
570
        self.assertEqual([], transport._activity)
680
571
        self.assertEqual(0, index.key_count())
681
572
        # The entire index should have been requested (as we generally have the
682
573
        # size available, and doing many small readvs is inappropriate).
683
574
        # We can't tell how much was actually read here, but - check the code.
684
 
        self.assertEqual([('get', 'index')], t._activity)
 
575
        self.assertEqual([('get', 'index'),
 
576
            ('readv', 'index', [(0, 72)], False, None)],
 
577
            transport._activity)
685
578
 
686
579
    def test_empty_key_count(self):
687
580
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
688
 
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
689
 
        size = t.put_file('index', builder.finish())
 
581
        transport = get_transport('trace+' + self.get_url(''))
 
582
        size = transport.put_file('index', builder.finish())
690
583
        self.assertEqual(72, size)
691
 
        index = btree_index.BTreeGraphIndex(t, 'index', size)
692
 
        del t._activity[:]
693
 
        self.assertEqual([], t._activity)
 
584
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
 
585
        del transport._activity[:]
 
586
        self.assertEqual([], transport._activity)
694
587
        self.assertEqual(0, index.key_count())
695
588
        # The entire index should have been read, as 4K > size
696
589
        self.assertEqual([('readv', 'index', [(0, 72)], False, None)],
697
 
                         t._activity)
 
590
            transport._activity)
698
591
 
699
592
    def test_non_empty_key_count_2_2(self):
700
593
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
701
594
        nodes = self.make_nodes(35, 2, 2)
702
595
        for node in nodes:
703
596
            builder.add_node(*node)
704
 
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
705
 
        size = t.put_file('index', builder.finish())
706
 
        index = btree_index.BTreeGraphIndex(t, 'index', size)
707
 
        del t._activity[:]
708
 
        self.assertEqual([], t._activity)
 
597
        transport = get_transport('trace+' + self.get_url(''))
 
598
        size = transport.put_file('index', builder.finish())
 
599
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
 
600
        del transport._activity[:]
 
601
        self.assertEqual([], transport._activity)
709
602
        self.assertEqual(70, index.key_count())
710
603
        # The entire index should have been read, as it is one page long.
711
604
        self.assertEqual([('readv', 'index', [(0, size)], False, None)],
712
 
            t._activity)
713
 
        self.assertEqualApproxCompressed(1173, size)
714
 
 
715
 
    def test_with_offset_no_size(self):
716
 
        index = self.make_index_with_offset(key_elements=1, ref_lists=1,
717
 
                                            offset=1234,
718
 
                                            nodes=self.make_nodes(200, 1, 1))
719
 
        index._size = None # throw away the size info
720
 
        self.assertEqual(200, index.key_count())
721
 
 
722
 
    def test_with_small_offset(self):
723
 
        index = self.make_index_with_offset(key_elements=1, ref_lists=1,
724
 
                                            offset=1234,
725
 
                                            nodes=self.make_nodes(200, 1, 1))
726
 
        self.assertEqual(200, index.key_count())
727
 
 
728
 
    def test_with_large_offset(self):
729
 
        index = self.make_index_with_offset(key_elements=1, ref_lists=1,
730
 
                                            offset=123456,
731
 
                                            nodes=self.make_nodes(200, 1, 1))
732
 
        self.assertEqual(200, index.key_count())
733
 
 
734
 
    def test__read_nodes_no_size_one_page_reads_once(self):
735
 
        self.make_index(nodes=[(('key',), 'value', ())])
736
 
        trans = transport.get_transport_from_url('trace+' + self.get_url())
737
 
        index = btree_index.BTreeGraphIndex(trans, 'index', None)
738
 
        del trans._activity[:]
739
 
        nodes = dict(index._read_nodes([0]))
740
 
        self.assertEqual([0], nodes.keys())
741
 
        node = nodes[0]
742
 
        self.assertEqual([('key',)], node.all_keys())
743
 
        self.assertEqual([('get', 'index')], trans._activity)
744
 
 
745
 
    def test__read_nodes_no_size_multiple_pages(self):
746
 
        index = self.make_index(2, 2, nodes=self.make_nodes(160, 2, 2))
747
 
        index.key_count()
748
 
        num_pages = index._row_offsets[-1]
749
 
        # Reopen with a traced transport and no size
750
 
        trans = transport.get_transport_from_url('trace+' + self.get_url())
751
 
        index = btree_index.BTreeGraphIndex(trans, 'index', None)
752
 
        del trans._activity[:]
753
 
        nodes = dict(index._read_nodes([0]))
754
 
        self.assertEqual(range(num_pages), nodes.keys())
 
605
            transport._activity)
 
606
        self.assertEqual(1199, size)
755
607
 
756
608
    def test_2_levels_key_count_2_2(self):
757
609
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
758
610
        nodes = self.make_nodes(160, 2, 2)
759
611
        for node in nodes:
760
612
            builder.add_node(*node)
761
 
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
762
 
        size = t.put_file('index', builder.finish())
763
 
        self.assertEqualApproxCompressed(17692, size)
764
 
        index = btree_index.BTreeGraphIndex(t, 'index', size)
765
 
        del t._activity[:]
766
 
        self.assertEqual([], t._activity)
 
613
        transport = get_transport('trace+' + self.get_url(''))
 
614
        size = transport.put_file('index', builder.finish())
 
615
        self.assertEqual(17692, size)
 
616
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
 
617
        del transport._activity[:]
 
618
        self.assertEqual([], transport._activity)
767
619
        self.assertEqual(320, index.key_count())
768
620
        # The entire index should not have been read.
769
621
        self.assertEqual([('readv', 'index', [(0, 4096)], False, None)],
770
 
                         t._activity)
 
622
            transport._activity)
771
623
 
772
624
    def test_validate_one_page(self):
773
625
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
774
626
        nodes = self.make_nodes(45, 2, 2)
775
627
        for node in nodes:
776
628
            builder.add_node(*node)
777
 
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
778
 
        size = t.put_file('index', builder.finish())
779
 
        index = btree_index.BTreeGraphIndex(t, 'index', size)
780
 
        del t._activity[:]
781
 
        self.assertEqual([], t._activity)
 
629
        transport = get_transport('trace+' + self.get_url(''))
 
630
        size = transport.put_file('index', builder.finish())
 
631
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
 
632
        del transport._activity[:]
 
633
        self.assertEqual([], transport._activity)
782
634
        index.validate()
783
635
        # The entire index should have been read linearly.
784
636
        self.assertEqual([('readv', 'index', [(0, size)], False, None)],
785
 
                         t._activity)
786
 
        self.assertEqualApproxCompressed(1488, size)
 
637
            transport._activity)
 
638
        self.assertEqual(1514, size)
787
639
 
788
640
    def test_validate_two_pages(self):
789
641
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
790
642
        nodes = self.make_nodes(80, 2, 2)
791
643
        for node in nodes:
792
644
            builder.add_node(*node)
793
 
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
794
 
        size = t.put_file('index', builder.finish())
 
645
        transport = get_transport('trace+' + self.get_url(''))
 
646
        size = transport.put_file('index', builder.finish())
795
647
        # Root page, 2 leaf pages
796
 
        self.assertEqualApproxCompressed(9339, size)
797
 
        index = btree_index.BTreeGraphIndex(t, 'index', size)
798
 
        del t._activity[:]
799
 
        self.assertEqual([], t._activity)
 
648
        self.assertEqual(9339, size)
 
649
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
 
650
        del transport._activity[:]
 
651
        self.assertEqual([], transport._activity)
800
652
        index.validate()
801
 
        rem = size - 8192 # Number of remaining bytes after second block
802
653
        # The entire index should have been read linearly.
803
 
        self.assertEqual(
804
 
            [('readv', 'index', [(0, 4096)], False, None),
805
 
             ('readv', 'index', [(4096, 4096), (8192, rem)], False, None)],
806
 
            t._activity)
 
654
        self.assertEqual([('readv', 'index', [(0, 4096)], False, None),
 
655
            ('readv', 'index', [(4096, 4096), (8192, 1147)], False, None)],
 
656
            transport._activity)
807
657
        # XXX: TODO: write some badly-ordered nodes, and some pointers-to-wrong
808
658
        # node and make validate find them.
809
659
 
810
660
    def test_eq_ne(self):
811
661
        # two indices are equal when constructed with the same parameters:
812
 
        t1 = transport.get_transport_from_url('trace+' + self.get_url(''))
813
 
        t2 = self.get_transport()
814
 
        self.assertTrue(
815
 
            btree_index.BTreeGraphIndex(t1, 'index', None) ==
816
 
            btree_index.BTreeGraphIndex(t1, 'index', None))
817
 
        self.assertTrue(
818
 
            btree_index.BTreeGraphIndex(t1, 'index', 20) ==
819
 
            btree_index.BTreeGraphIndex(t1, 'index', 20))
820
 
        self.assertFalse(
821
 
            btree_index.BTreeGraphIndex(t1, 'index', 20) ==
822
 
            btree_index.BTreeGraphIndex(t2, 'index', 20))
823
 
        self.assertFalse(
824
 
            btree_index.BTreeGraphIndex(t1, 'inde1', 20) ==
825
 
            btree_index.BTreeGraphIndex(t1, 'inde2', 20))
826
 
        self.assertFalse(
827
 
            btree_index.BTreeGraphIndex(t1, 'index', 10) ==
828
 
            btree_index.BTreeGraphIndex(t1, 'index', 20))
829
 
        self.assertFalse(
830
 
            btree_index.BTreeGraphIndex(t1, 'index', None) !=
831
 
            btree_index.BTreeGraphIndex(t1, 'index', None))
832
 
        self.assertFalse(
833
 
            btree_index.BTreeGraphIndex(t1, 'index', 20) !=
834
 
            btree_index.BTreeGraphIndex(t1, 'index', 20))
835
 
        self.assertTrue(
836
 
            btree_index.BTreeGraphIndex(t1, 'index', 20) !=
837
 
            btree_index.BTreeGraphIndex(t2, 'index', 20))
838
 
        self.assertTrue(
839
 
            btree_index.BTreeGraphIndex(t1, 'inde1', 20) !=
840
 
            btree_index.BTreeGraphIndex(t1, 'inde2', 20))
841
 
        self.assertTrue(
842
 
            btree_index.BTreeGraphIndex(t1, 'index', 10) !=
843
 
            btree_index.BTreeGraphIndex(t1, 'index', 20))
844
 
 
845
 
    def test_key_too_big(self):
846
 
        # the size that matters here is the _compressed_ size of the key, so we can't
847
 
        # do a simple character repeat.
848
 
        bigKey = ''.join(map(repr, xrange(btree_index._PAGE_SIZE)))
849
 
        self.assertRaises(errors.BadIndexKey,
850
 
                          self.make_index,
851
 
                          nodes=[((bigKey,), 'value', ())])
852
 
        
853
 
    def test_iter_all_only_root_no_size(self):
854
 
        self.make_index(nodes=[(('key',), 'value', ())])
855
 
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
856
 
        index = btree_index.BTreeGraphIndex(t, 'index', None)
857
 
        del t._activity[:]
858
 
        self.assertEqual([(('key',), 'value')],
859
 
                         [x[1:] for x in index.iter_all_entries()])
860
 
        self.assertEqual([('get', 'index')], t._activity)
 
662
        transport1 = get_transport('trace+' + self.get_url(''))
 
663
        transport2 = get_transport(self.get_url(''))
 
664
        self.assertTrue(
 
665
            btree_index.BTreeGraphIndex(transport1, 'index', None) ==
 
666
            btree_index.BTreeGraphIndex(transport1, 'index', None))
 
667
        self.assertTrue(
 
668
            btree_index.BTreeGraphIndex(transport1, 'index', 20) ==
 
669
            btree_index.BTreeGraphIndex(transport1, 'index', 20))
 
670
        self.assertFalse(
 
671
            btree_index.BTreeGraphIndex(transport1, 'index', 20) ==
 
672
            btree_index.BTreeGraphIndex(transport2, 'index', 20))
 
673
        self.assertFalse(
 
674
            btree_index.BTreeGraphIndex(transport1, 'inde1', 20) ==
 
675
            btree_index.BTreeGraphIndex(transport1, 'inde2', 20))
 
676
        self.assertFalse(
 
677
            btree_index.BTreeGraphIndex(transport1, 'index', 10) ==
 
678
            btree_index.BTreeGraphIndex(transport1, 'index', 20))
 
679
        self.assertFalse(
 
680
            btree_index.BTreeGraphIndex(transport1, 'index', None) !=
 
681
            btree_index.BTreeGraphIndex(transport1, 'index', None))
 
682
        self.assertFalse(
 
683
            btree_index.BTreeGraphIndex(transport1, 'index', 20) !=
 
684
            btree_index.BTreeGraphIndex(transport1, 'index', 20))
 
685
        self.assertTrue(
 
686
            btree_index.BTreeGraphIndex(transport1, 'index', 20) !=
 
687
            btree_index.BTreeGraphIndex(transport2, 'index', 20))
 
688
        self.assertTrue(
 
689
            btree_index.BTreeGraphIndex(transport1, 'inde1', 20) !=
 
690
            btree_index.BTreeGraphIndex(transport1, 'inde2', 20))
 
691
        self.assertTrue(
 
692
            btree_index.BTreeGraphIndex(transport1, 'index', 10) !=
 
693
            btree_index.BTreeGraphIndex(transport1, 'index', 20))
861
694
 
862
695
    def test_iter_all_entries_reads(self):
863
696
        # iterating all entries reads the header, then does a linear
869
702
        nodes = self.make_nodes(10000, 2, 2)
870
703
        for node in nodes:
871
704
            builder.add_node(*node)
872
 
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
873
 
        size = t.put_file('index', builder.finish())
 
705
        transport = get_transport('trace+' + self.get_url(''))
 
706
        size = transport.put_file('index', builder.finish())
 
707
        self.assertEqual(1303220, size, 'number of expected bytes in the'
 
708
                                        ' output changed')
874
709
        page_size = btree_index._PAGE_SIZE
875
710
        del builder
876
 
        index = btree_index.BTreeGraphIndex(t, 'index', size)
877
 
        del t._activity[:]
878
 
        self.assertEqual([], t._activity)
 
711
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
 
712
        del transport._activity[:]
 
713
        self.assertEqual([], transport._activity)
879
714
        found_nodes = self.time(list, index.iter_all_entries())
880
715
        bare_nodes = []
881
716
        for node in found_nodes:
892
727
        # The entire index should have been read
893
728
        total_pages = sum(index._row_lengths)
894
729
        self.assertEqual(total_pages, index._row_offsets[-1])
895
 
        self.assertEqualApproxCompressed(1303220, size)
 
730
        self.assertEqual(1303220, size)
896
731
        # The start of the leaves
897
732
        first_byte = index._row_offsets[-2] * page_size
898
733
        readv_request = []
899
734
        for offset in range(first_byte, size, page_size):
900
735
            readv_request.append((offset, page_size))
901
736
        # The last page is truncated
902
 
        readv_request[-1] = (readv_request[-1][0], size % page_size)
 
737
        readv_request[-1] = (readv_request[-1][0], 1303220 % page_size)
903
738
        expected = [('readv', 'index', [(0, page_size)], False, None),
904
739
             ('readv',  'index', readv_request, False, None)]
905
 
        if expected != t._activity:
 
740
        if expected != transport._activity:
906
741
            self.assertEqualDiff(pprint.pformat(expected),
907
 
                                 pprint.pformat(t._activity))
 
742
                                 pprint.pformat(transport._activity))
908
743
 
909
744
    def _test_iter_entries_references_resolved(self):
910
745
        index = self.make_index(1, nodes=[
922
757
        nodes = self.make_nodes(160, 2, 2)
923
758
        for node in nodes:
924
759
            builder.add_node(*node)
925
 
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
926
 
        size = t.put_file('index', builder.finish())
 
760
        transport = get_transport('trace+' + self.get_url(''))
 
761
        size = transport.put_file('index', builder.finish())
927
762
        del builder
928
 
        index = btree_index.BTreeGraphIndex(t, 'index', size)
929
 
        del t._activity[:]
930
 
        self.assertEqual([], t._activity)
 
763
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
 
764
        del transport._activity[:]
 
765
        self.assertEqual([], transport._activity)
931
766
        # search for one key
932
767
        found_nodes = list(index.iter_entries([nodes[30][0]]))
933
768
        bare_nodes = []
941
776
        # Should have read the root node, then one leaf page:
942
777
        self.assertEqual([('readv', 'index', [(0, 4096)], False, None),
943
778
             ('readv',  'index', [(8192, 4096), ], False, None)],
944
 
            t._activity)
 
779
            transport._activity)
945
780
 
946
781
    def test_iter_key_prefix_1_element_key_None(self):
947
782
        index = self.make_index()
998
833
            (index, ('name', 'fin2'), 'beta', ((), ))]),
999
834
            set(index.iter_entries_prefix([('name', None)])))
1000
835
 
1001
 
    # XXX: external_references tests are duplicated in test_index.  We
1002
 
    # probably should have per_graph_index tests...
1003
 
    def test_external_references_no_refs(self):
1004
 
        index = self.make_index(ref_lists=0, nodes=[])
1005
 
        self.assertRaises(ValueError, index.external_references, 0)
1006
 
 
1007
 
    def test_external_references_no_results(self):
1008
 
        index = self.make_index(ref_lists=1, nodes=[
1009
 
            (('key',), 'value', ([],))])
1010
 
        self.assertEqual(set(), index.external_references(0))
1011
 
 
1012
 
    def test_external_references_missing_ref(self):
1013
 
        missing_key = ('missing',)
1014
 
        index = self.make_index(ref_lists=1, nodes=[
1015
 
            (('key',), 'value', ([missing_key],))])
1016
 
        self.assertEqual(set([missing_key]), index.external_references(0))
1017
 
 
1018
 
    def test_external_references_multiple_ref_lists(self):
1019
 
        missing_key = ('missing',)
1020
 
        index = self.make_index(ref_lists=2, nodes=[
1021
 
            (('key',), 'value', ([], [missing_key]))])
1022
 
        self.assertEqual(set([]), index.external_references(0))
1023
 
        self.assertEqual(set([missing_key]), index.external_references(1))
1024
 
 
1025
 
    def test_external_references_two_records(self):
1026
 
        index = self.make_index(ref_lists=1, nodes=[
1027
 
            (('key-1',), 'value', ([('key-2',)],)),
1028
 
            (('key-2',), 'value', ([],)),
1029
 
            ])
1030
 
        self.assertEqual(set([]), index.external_references(0))
1031
 
 
1032
 
    def test__find_ancestors_one_page(self):
1033
 
        key1 = ('key-1',)
1034
 
        key2 = ('key-2',)
1035
 
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1036
 
            (key1, 'value', ([key2],)),
1037
 
            (key2, 'value', ([],)),
1038
 
            ])
1039
 
        parent_map = {}
1040
 
        missing_keys = set()
1041
 
        search_keys = index._find_ancestors([key1], 0, parent_map, missing_keys)
1042
 
        self.assertEqual({key1: (key2,), key2: ()}, parent_map)
1043
 
        self.assertEqual(set(), missing_keys)
1044
 
        self.assertEqual(set(), search_keys)
1045
 
 
1046
 
    def test__find_ancestors_one_page_w_missing(self):
1047
 
        key1 = ('key-1',)
1048
 
        key2 = ('key-2',)
1049
 
        key3 = ('key-3',)
1050
 
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1051
 
            (key1, 'value', ([key2],)),
1052
 
            (key2, 'value', ([],)),
1053
 
            ])
1054
 
        parent_map = {}
1055
 
        missing_keys = set()
1056
 
        search_keys = index._find_ancestors([key2, key3], 0, parent_map,
1057
 
                                            missing_keys)
1058
 
        self.assertEqual({key2: ()}, parent_map)
1059
 
        # we know that key3 is missing because we read the page that it would
1060
 
        # otherwise be on
1061
 
        self.assertEqual(set([key3]), missing_keys)
1062
 
        self.assertEqual(set(), search_keys)
1063
 
 
1064
 
    def test__find_ancestors_one_parent_missing(self):
1065
 
        key1 = ('key-1',)
1066
 
        key2 = ('key-2',)
1067
 
        key3 = ('key-3',)
1068
 
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1069
 
            (key1, 'value', ([key2],)),
1070
 
            (key2, 'value', ([key3],)),
1071
 
            ])
1072
 
        parent_map = {}
1073
 
        missing_keys = set()
1074
 
        search_keys = index._find_ancestors([key1], 0, parent_map,
1075
 
                                            missing_keys)
1076
 
        self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1077
 
        self.assertEqual(set(), missing_keys)
1078
 
        # all we know is that key3 wasn't present on the page we were reading
1079
 
        # but if you look, the last key is key2 which comes before key3, so we
1080
 
        # don't know whether key3 would land on this page or not.
1081
 
        self.assertEqual(set([key3]), search_keys)
1082
 
        search_keys = index._find_ancestors(search_keys, 0, parent_map,
1083
 
                                            missing_keys)
1084
 
        # passing it back in, we are sure it is 'missing'
1085
 
        self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1086
 
        self.assertEqual(set([key3]), missing_keys)
1087
 
        self.assertEqual(set([]), search_keys)
1088
 
 
1089
 
    def test__find_ancestors_dont_search_known(self):
1090
 
        key1 = ('key-1',)
1091
 
        key2 = ('key-2',)
1092
 
        key3 = ('key-3',)
1093
 
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1094
 
            (key1, 'value', ([key2],)),
1095
 
            (key2, 'value', ([key3],)),
1096
 
            (key3, 'value', ([],)),
1097
 
            ])
1098
 
        # We already know about key2, so we won't try to search for key3
1099
 
        parent_map = {key2: (key3,)}
1100
 
        missing_keys = set()
1101
 
        search_keys = index._find_ancestors([key1], 0, parent_map,
1102
 
                                            missing_keys)
1103
 
        self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1104
 
        self.assertEqual(set(), missing_keys)
1105
 
        self.assertEqual(set(), search_keys)
1106
 
 
1107
 
    def test__find_ancestors_multiple_pages(self):
1108
 
        # We need to use enough keys that we actually cause a split
1109
 
        start_time = 1249671539
1110
 
        email = "joebob@example.com"
1111
 
        nodes = []
1112
 
        ref_lists = ((),)
1113
 
        rev_keys = []
1114
 
        for i in xrange(400):
1115
 
            rev_id = '%s-%s-%s' % (email,
1116
 
                                   osutils.compact_date(start_time + i),
1117
 
                                   osutils.rand_chars(16))
1118
 
            rev_key = (rev_id,)
1119
 
            nodes.append((rev_key, 'value', ref_lists))
1120
 
            # We have a ref 'list' of length 1, with a list of parents, with 1
1121
 
            # parent which is a key
1122
 
            ref_lists = ((rev_key,),)
1123
 
            rev_keys.append(rev_key)
1124
 
        index = self.make_index(ref_lists=1, key_elements=1, nodes=nodes)
1125
 
        self.assertEqual(400, index.key_count())
1126
 
        self.assertEqual(3, len(index._row_offsets))
1127
 
        nodes = dict(index._read_nodes([1, 2]))
1128
 
        l1 = nodes[1]
1129
 
        l2 = nodes[2]
1130
 
        min_l2_key = l2.min_key
1131
 
        max_l1_key = l1.max_key
1132
 
        self.assertTrue(max_l1_key < min_l2_key)
1133
 
        parents_min_l2_key = l2[min_l2_key][1][0]
1134
 
        self.assertEqual((l1.max_key,), parents_min_l2_key)
1135
 
        # Now, whatever key we select that would fall on the second page,
1136
 
        # should give us all the parents until the page break
1137
 
        key_idx = rev_keys.index(min_l2_key)
1138
 
        next_key = rev_keys[key_idx+1]
1139
 
        # So now when we get the parent map, we should get the key we are
1140
 
        # looking for, min_l2_key, and then a reference to go look for the
1141
 
        # parent of that key
1142
 
        parent_map = {}
1143
 
        missing_keys = set()
1144
 
        search_keys = index._find_ancestors([next_key], 0, parent_map,
1145
 
                                            missing_keys)
1146
 
        self.assertEqual([min_l2_key, next_key], sorted(parent_map))
1147
 
        self.assertEqual(set(), missing_keys)
1148
 
        self.assertEqual(set([max_l1_key]), search_keys)
1149
 
        parent_map = {}
1150
 
        search_keys = index._find_ancestors([max_l1_key], 0, parent_map,
1151
 
                                            missing_keys)
1152
 
        self.assertEqual(l1.all_keys(), sorted(parent_map))
1153
 
        self.assertEqual(set(), missing_keys)
1154
 
        self.assertEqual(set(), search_keys)
1155
 
 
1156
 
    def test__find_ancestors_empty_index(self):
1157
 
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[])
1158
 
        parent_map = {}
1159
 
        missing_keys = set()
1160
 
        search_keys = index._find_ancestors([('one',), ('two',)], 0, parent_map,
1161
 
                                            missing_keys)
1162
 
        self.assertEqual(set(), search_keys)
1163
 
        self.assertEqual({}, parent_map)
1164
 
        self.assertEqual(set([('one',), ('two',)]), missing_keys)
1165
 
 
1166
 
    def test_supports_unlimited_cache(self):
1167
 
        builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
1168
 
        # We need enough nodes to cause a page split (so we have both an
1169
 
        # internal node and a couple leaf nodes. 500 seems to be enough.)
1170
 
        nodes = self.make_nodes(500, 1, 0)
1171
 
        for node in nodes:
1172
 
            builder.add_node(*node)
1173
 
        stream = builder.finish()
1174
 
        trans = self.get_transport()
1175
 
        size = trans.put_file('index', stream)
1176
 
        index = btree_index.BTreeGraphIndex(trans, 'index', size)
1177
 
        self.assertEqual(500, index.key_count())
1178
 
        # We have an internal node
1179
 
        self.assertEqual(2, len(index._row_lengths))
1180
 
        # We have at least 2 leaf nodes
1181
 
        self.assertTrue(index._row_lengths[-1] >= 2)
1182
 
        self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
1183
 
        self.assertEqual(btree_index._NODE_CACHE_SIZE,
1184
 
                         index._leaf_node_cache._max_cache)
1185
 
        self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
1186
 
        self.assertEqual(100, index._internal_node_cache._max_cache)
1187
 
        # No change if unlimited_cache=False is passed
1188
 
        index = btree_index.BTreeGraphIndex(trans, 'index', size,
1189
 
                                            unlimited_cache=False)
1190
 
        self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
1191
 
        self.assertEqual(btree_index._NODE_CACHE_SIZE,
1192
 
                         index._leaf_node_cache._max_cache)
1193
 
        self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
1194
 
        self.assertEqual(100, index._internal_node_cache._max_cache)
1195
 
        index = btree_index.BTreeGraphIndex(trans, 'index', size,
1196
 
                                            unlimited_cache=True)
1197
 
        self.assertIsInstance(index._leaf_node_cache, dict)
1198
 
        self.assertIs(type(index._internal_node_cache), dict)
1199
 
        # Exercise the lookup code
1200
 
        entries = set(index.iter_entries([n[0] for n in nodes]))
1201
 
        self.assertEqual(500, len(entries))
1202
 
 
1203
836
 
1204
837
class TestBTreeNodes(BTreeTestCase):
1205
838
 
1206
 
    scenarios = btreeparser_scenarios()
 
839
    def restore_parser(self):
 
840
        btree_index._btree_serializer = self.saved_parser
1207
841
 
1208
842
    def setUp(self):
1209
 
        super(TestBTreeNodes, self).setUp()
1210
 
        self.overrideAttr(btree_index, '_btree_serializer', self.parse_btree)
 
843
        BTreeTestCase.setUp(self)
 
844
        self.saved_parser = btree_index._btree_serializer
 
845
        self.addCleanup(self.restore_parser)
 
846
        btree_index._btree_serializer = self.parse_btree
1211
847
 
1212
848
    def test_LeafNode_1_0(self):
1213
849
        node_bytes = ("type=leaf\n"
1225
861
            ("2222222222222222222222222222222222222222",): ("value:2", ()),
1226
862
            ("3333333333333333333333333333333333333333",): ("value:3", ()),
1227
863
            ("4444444444444444444444444444444444444444",): ("value:4", ()),
1228
 
            }, dict(node.all_items()))
 
864
            }, node.keys)
1229
865
 
1230
866
    def test_LeafNode_2_2(self):
1231
867
        node_bytes = ("type=leaf\n"
1245
881
            ('11', '33'): ('value:3',
1246
882
                ((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22')))),
1247
883
            ('11', '44'): ('value:4', ((), (('11', 'ref00'),)))
1248
 
            }, dict(node.all_items()))
 
884
            }, node.keys)
1249
885
 
1250
886
    def test_InternalNode_1(self):
1251
887
        node_bytes = ("type=internal\n"
1286
922
            ('11', '33'): ('value:3',
1287
923
                ((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22')))),
1288
924
            ('11', '44'): ('value:4', ((), (('11', 'ref00'),)))
1289
 
            }, dict(node.all_items()))
 
925
            }, node.keys)
1290
926
 
1291
927
    def assertFlattened(self, expected, key, value, refs):
1292
928
        flat_key, flat_line = self.parse_btree._flatten_node(
1324
960
    def test_exists(self):
1325
961
        # This is just to let the user know if they don't have the feature
1326
962
        # available
1327
 
        self.requireFeature(compiled_btreeparser_feature)
 
963
        self.requireFeature(CompiledBtreeParserFeature)
1328
964
 
1329
965
 
1330
966
class TestMultiBisectRight(tests.TestCase):
1360
996
                                     (4, ['g', 'h'])],
1361
997
                                    ['a', 'b', 'd', 'e', 'g', 'h'],
1362
998
                                    ['c', 'd', 'f', 'g'])
1363
 
 
1364
 
 
1365
 
class TestExpandOffsets(tests.TestCase):
1366
 
 
1367
 
    def make_index(self, size, recommended_pages=None):
1368
 
        """Make an index with a generic size.
1369
 
 
1370
 
        This doesn't actually create anything on disk, it just primes a
1371
 
        BTreeGraphIndex with the recommended information.
1372
 
        """
1373
 
        index = btree_index.BTreeGraphIndex(
1374
 
            transport.get_transport_from_url('memory:///'),
1375
 
            'test-index', size=size)
1376
 
        if recommended_pages is not None:
1377
 
            index._recommended_pages = recommended_pages
1378
 
        return index
1379
 
 
1380
 
    def set_cached_offsets(self, index, cached_offsets):
1381
 
        """Monkeypatch to give a canned answer for _get_offsets_for...()."""
1382
 
        def _get_offsets_to_cached_pages():
1383
 
            cached = set(cached_offsets)
1384
 
            return cached
1385
 
        index._get_offsets_to_cached_pages = _get_offsets_to_cached_pages
1386
 
 
1387
 
    def prepare_index(self, index, node_ref_lists, key_length, key_count,
1388
 
                      row_lengths, cached_offsets):
1389
 
        """Setup the BTreeGraphIndex with some pre-canned information."""
1390
 
        index.node_ref_lists = node_ref_lists
1391
 
        index._key_length = key_length
1392
 
        index._key_count = key_count
1393
 
        index._row_lengths = row_lengths
1394
 
        index._compute_row_offsets()
1395
 
        index._root_node = btree_index._InternalNode('internal\noffset=0\n')
1396
 
        self.set_cached_offsets(index, cached_offsets)
1397
 
 
1398
 
    def make_100_node_index(self):
1399
 
        index = self.make_index(4096*100, 6)
1400
 
        # Consider we've already made a single request at the middle
1401
 
        self.prepare_index(index, node_ref_lists=0, key_length=1,
1402
 
                           key_count=1000, row_lengths=[1, 99],
1403
 
                           cached_offsets=[0, 50])
1404
 
        return index
1405
 
 
1406
 
    def make_1000_node_index(self):
1407
 
        index = self.make_index(4096*1000, 6)
1408
 
        # Pretend we've already made a single request in the middle
1409
 
        self.prepare_index(index, node_ref_lists=0, key_length=1,
1410
 
                           key_count=90000, row_lengths=[1, 9, 990],
1411
 
                           cached_offsets=[0, 5, 500])
1412
 
        return index
1413
 
 
1414
 
    def assertNumPages(self, expected_pages, index, size):
1415
 
        index._size = size
1416
 
        self.assertEqual(expected_pages, index._compute_total_pages_in_index())
1417
 
 
1418
 
    def assertExpandOffsets(self, expected, index, offsets):
1419
 
        self.assertEqual(expected, index._expand_offsets(offsets),
1420
 
                         'We did not get the expected value after expanding'
1421
 
                         ' %s' % (offsets,))
1422
 
 
1423
 
    def test_default_recommended_pages(self):
1424
 
        index = self.make_index(None)
1425
 
        # local transport recommends 4096 byte reads, which is 1 page
1426
 
        self.assertEqual(1, index._recommended_pages)
1427
 
 
1428
 
    def test__compute_total_pages_in_index(self):
1429
 
        index = self.make_index(None)
1430
 
        self.assertNumPages(1, index, 1024)
1431
 
        self.assertNumPages(1, index, 4095)
1432
 
        self.assertNumPages(1, index, 4096)
1433
 
        self.assertNumPages(2, index, 4097)
1434
 
        self.assertNumPages(2, index, 8192)
1435
 
        self.assertNumPages(76, index, 4096*75 + 10)
1436
 
 
1437
 
    def test__find_layer_start_and_stop(self):
1438
 
        index = self.make_1000_node_index()
1439
 
        self.assertEqual((0, 1), index._find_layer_first_and_end(0))
1440
 
        self.assertEqual((1, 10), index._find_layer_first_and_end(1))
1441
 
        self.assertEqual((1, 10), index._find_layer_first_and_end(9))
1442
 
        self.assertEqual((10, 1000), index._find_layer_first_and_end(10))
1443
 
        self.assertEqual((10, 1000), index._find_layer_first_and_end(99))
1444
 
        self.assertEqual((10, 1000), index._find_layer_first_and_end(999))
1445
 
 
1446
 
    def test_unknown_size(self):
1447
 
        # We should not expand if we don't know the file size
1448
 
        index = self.make_index(None, 10)
1449
 
        self.assertExpandOffsets([0], index, [0])
1450
 
        self.assertExpandOffsets([1, 4, 9], index, [1, 4, 9])
1451
 
 
1452
 
    def test_more_than_recommended(self):
1453
 
        index = self.make_index(4096*100, 2)
1454
 
        self.assertExpandOffsets([1, 10], index, [1, 10])
1455
 
        self.assertExpandOffsets([1, 10, 20], index, [1, 10, 20])
1456
 
 
1457
 
    def test_read_all_from_root(self):
1458
 
        index = self.make_index(4096*10, 20)
1459
 
        self.assertExpandOffsets(range(10), index, [0])
1460
 
 
1461
 
    def test_read_all_when_cached(self):
1462
 
        # We've read enough that we can grab all the rest in a single request
1463
 
        index = self.make_index(4096*10, 5)
1464
 
        self.prepare_index(index, node_ref_lists=0, key_length=1,
1465
 
                           key_count=1000, row_lengths=[1, 9],
1466
 
                           cached_offsets=[0, 1, 2, 5, 6])
1467
 
        # It should fill the remaining nodes, regardless of the one requested
1468
 
        self.assertExpandOffsets([3, 4, 7, 8, 9], index, [3])
1469
 
        self.assertExpandOffsets([3, 4, 7, 8, 9], index, [8])
1470
 
        self.assertExpandOffsets([3, 4, 7, 8, 9], index, [9])
1471
 
 
1472
 
    def test_no_root_node(self):
1473
 
        index = self.make_index(4096*10, 5)
1474
 
        self.assertExpandOffsets([0], index, [0])
1475
 
 
1476
 
    def test_include_neighbors(self):
1477
 
        index = self.make_100_node_index()
1478
 
        # We expand in both directions, until we have at least 'recommended'
1479
 
        # pages
1480
 
        self.assertExpandOffsets([9, 10, 11, 12, 13, 14, 15], index, [12])
1481
 
        self.assertExpandOffsets([88, 89, 90, 91, 92, 93, 94], index, [91])
1482
 
        # If we hit an 'edge' we continue in the other direction
1483
 
        self.assertExpandOffsets([1, 2, 3, 4, 5, 6], index, [2])
1484
 
        self.assertExpandOffsets([94, 95, 96, 97, 98, 99], index, [98])
1485
 
 
1486
 
        # Requesting many nodes will expand all locations equally
1487
 
        self.assertExpandOffsets([1, 2, 3, 80, 81, 82], index, [2, 81])
1488
 
        self.assertExpandOffsets([1, 2, 3, 9, 10, 11, 80, 81, 82], index,
1489
 
                               [2, 10, 81])
1490
 
 
1491
 
    def test_stop_at_cached(self):
1492
 
        index = self.make_100_node_index()
1493
 
        self.set_cached_offsets(index, [0, 10, 19])
1494
 
        self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [11])
1495
 
        self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [12])
1496
 
        self.assertExpandOffsets([12, 13, 14, 15, 16, 17, 18], index, [15])
1497
 
        self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [16])
1498
 
        self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [17])
1499
 
        self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [18])
1500
 
 
1501
 
    def test_cannot_fully_expand(self):
1502
 
        index = self.make_100_node_index()
1503
 
        self.set_cached_offsets(index, [0, 10, 12])
1504
 
        # We don't go into an endless loop if we are bound by cached nodes
1505
 
        self.assertExpandOffsets([11], index, [11])
1506
 
 
1507
 
    def test_overlap(self):
1508
 
        index = self.make_100_node_index()
1509
 
        self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [12, 13])
1510
 
        self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [11, 14])
1511
 
 
1512
 
    def test_stay_within_layer(self):
1513
 
        index = self.make_1000_node_index()
1514
 
        # When expanding a request, we won't read nodes from the next layer
1515
 
        self.assertExpandOffsets([1, 2, 3, 4], index, [2])
1516
 
        self.assertExpandOffsets([6, 7, 8, 9], index, [6])
1517
 
        self.assertExpandOffsets([6, 7, 8, 9], index, [9])
1518
 
        self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [10])
1519
 
        self.assertExpandOffsets([10, 11, 12, 13, 14, 15, 16], index, [13])
1520
 
 
1521
 
        self.set_cached_offsets(index, [0, 4, 12])
1522
 
        self.assertExpandOffsets([5, 6, 7, 8, 9], index, [7])
1523
 
        self.assertExpandOffsets([10, 11], index, [11])
1524
 
 
1525
 
    def test_small_requests_unexpanded(self):
1526
 
        index = self.make_100_node_index()
1527
 
        self.set_cached_offsets(index, [0])
1528
 
        self.assertExpandOffsets([1], index, [1])
1529
 
        self.assertExpandOffsets([50], index, [50])
1530
 
        # If we request more than one node, then we'll expand
1531
 
        self.assertExpandOffsets([49, 50, 51, 59, 60, 61], index, [50, 60])
1532
 
 
1533
 
        # The first pass does not expand
1534
 
        index = self.make_1000_node_index()
1535
 
        self.set_cached_offsets(index, [0])
1536
 
        self.assertExpandOffsets([1], index, [1])
1537
 
        self.set_cached_offsets(index, [0, 1])
1538
 
        self.assertExpandOffsets([100], index, [100])
1539
 
        self.set_cached_offsets(index, [0, 1, 100])
1540
 
        # But after the first depth, we will expand
1541
 
        self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [2])
1542
 
        self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [4])
1543
 
        self.set_cached_offsets(index, [0, 1, 2, 3, 4, 5, 6, 7, 100])
1544
 
        self.assertExpandOffsets([102, 103, 104, 105, 106, 107, 108], index,
1545
 
                                 [105])