~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_btree_index.py

  • Committer: Canonical.com Patch Queue Manager
  • Date: 2008-09-03 06:31:11 UTC
  • mfrom: (1739.2.12 readdir)
  • Revision ID: pqm@pqm.ubuntu.com-20080903063111-jr3ru4gv44xkwl2l
(robertc) Stat the contents of directories in inode order. (Robert
        Collins)

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2008-2012, 2016 Canonical Ltd
 
1
# Copyright (C) 2008 Canonical Ltd
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
12
12
#
13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
16
#
17
17
 
18
18
"""Tests for btree indices."""
23
23
from bzrlib import (
24
24
    btree_index,
25
25
    errors,
26
 
    fifo_cache,
27
 
    lru_cache,
28
 
    osutils,
29
26
    tests,
30
 
    transport,
31
27
    )
32
28
from bzrlib.tests import (
33
29
    TestCaseWithTransport,
34
 
    scenarios,
35
 
    )
36
 
from bzrlib.tests import (
37
 
    features,
38
 
    )
39
 
 
40
 
 
41
 
load_tests = scenarios.load_tests_apply_scenarios
42
 
 
43
 
 
44
 
def btreeparser_scenarios():
 
30
    TestScenarioApplier,
 
31
    adapt_tests,
 
32
    condition_isinstance,
 
33
    split_suite_by_condition,
 
34
    )
 
35
from bzrlib.transport import get_transport
 
36
 
 
37
 
 
38
def load_tests(standard_tests, module, loader):
 
39
    # parameterise the TestBTreeNodes tests
 
40
    node_tests, others = split_suite_by_condition(standard_tests,
 
41
        condition_isinstance(TestBTreeNodes))
 
42
    applier = TestScenarioApplier()
45
43
    import bzrlib._btree_serializer_py as py_module
46
 
    scenarios = [('python', {'parse_btree': py_module})]
47
 
    if compiled_btreeparser_feature.available():
48
 
        scenarios.append(('C', 
49
 
            {'parse_btree': compiled_btreeparser_feature.module}))
50
 
    return scenarios
51
 
 
52
 
 
53
 
compiled_btreeparser_feature = features.ModuleAvailableFeature(
54
 
    'bzrlib._btree_serializer_pyx')
 
44
    applier.scenarios = [('python', {'parse_btree': py_module})]
 
45
    if CompiledBtreeParserFeature.available():
 
46
        # Is there a way to do this that gets missing feature failures rather
 
47
        # than no indication to the user?
 
48
        import bzrlib._btree_serializer_c as c_module
 
49
        applier.scenarios.append(('C', {'parse_btree': c_module}))
 
50
    adapt_tests(node_tests, applier, others)
 
51
    return others
 
52
 
 
53
 
 
54
class _CompiledBtreeParserFeature(tests.Feature):
 
55
    def _probe(self):
 
56
        try:
 
57
            import bzrlib._btree_serializer_c
 
58
        except ImportError:
 
59
            return False
 
60
        return True
 
61
 
 
62
    def feature_name(self):
 
63
        return 'bzrlib._btree_serializer_c'
 
64
 
 
65
CompiledBtreeParserFeature = _CompiledBtreeParserFeature()
55
66
 
56
67
 
57
68
class BTreeTestCase(TestCaseWithTransport):
59
70
    # that they test.
60
71
 
61
72
    def setUp(self):
62
 
        super(BTreeTestCase, self).setUp()
63
 
        self.overrideAttr(btree_index, '_RESERVED_HEADER_BYTES', 100)
 
73
        TestCaseWithTransport.setUp(self)
 
74
        self._original_header = btree_index._RESERVED_HEADER_BYTES
 
75
        def restore():
 
76
            btree_index._RESERVED_HEADER_BYTES = self._original_header
 
77
        self.addCleanup(restore)
 
78
        btree_index._RESERVED_HEADER_BYTES = 100
64
79
 
65
80
    def make_nodes(self, count, key_elements, reference_lists):
66
81
        """Generate count*key_elements sample nodes."""
100
115
 
101
116
    def shrink_page_size(self):
102
117
        """Shrink the default page size so that less fits in a page."""
103
 
        self.overrideAttr(btree_index, '_PAGE_SIZE')
 
118
        old_page_size = btree_index._PAGE_SIZE
 
119
        def cleanup():
 
120
            btree_index._PAGE_SIZE = old_page_size
 
121
        self.addCleanup(cleanup)
104
122
        btree_index._PAGE_SIZE = 2048
105
123
 
106
 
    def assertEqualApproxCompressed(self, expected, actual, slop=6):
107
 
        """Check a count of compressed bytes is approximately as expected
108
 
 
109
 
        Relying on compressed length being stable even with fixed inputs is
110
 
        slightly bogus, but zlib is stable enough that this mostly works.
111
 
        """
112
 
        if not expected - slop < actual < expected + slop:
113
 
            self.fail("Expected around %d bytes compressed but got %d" %
114
 
                (expected, actual))
115
 
 
116
124
 
117
125
class TestBTreeBuilder(BTreeTestCase):
118
126
 
119
 
    def test_clear_cache(self):
120
 
        builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
121
 
        # This is a no-op, but we need the api to be consistent with other
122
 
        # BTreeGraphIndex apis.
123
 
        builder.clear_cache()
124
 
 
125
127
    def test_empty_1_0(self):
126
128
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
127
129
        # NamedTemporaryFile dies on builder.finish().read(). weird.
153
155
        temp_file = builder.finish()
154
156
        content = temp_file.read()
155
157
        del temp_file
156
 
        self.assertEqual(131, len(content))
 
158
        self.assertEqual(158, len(content))
157
159
        self.assertEqual(
158
160
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=5\n"
159
161
            "row_lengths=1\n",
177
179
        temp_file = builder.finish()
178
180
        content = temp_file.read()
179
181
        del temp_file
180
 
        self.assertEqual(238, len(content))
 
182
        self.assertEqual(264, len(content))
181
183
        self.assertEqual(
182
184
            "B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=10\n"
183
185
            "row_lengths=1\n",
209
211
        temp_file = builder.finish()
210
212
        content = temp_file.read()
211
213
        del temp_file
212
 
        self.assertEqualApproxCompressed(9283, len(content))
 
214
        self.assertEqual(9283, len(content))
213
215
        self.assertEqual(
214
216
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
215
217
            "row_lengths=1,2\n",
227
229
        leaf1_bytes = zlib.decompress(leaf1)
228
230
        sorted_node_keys = sorted(node[0] for node in nodes)
229
231
        node = btree_index._LeafNode(leaf1_bytes, 1, 0)
230
 
        self.assertEqual(231, len(node))
231
 
        self.assertEqual(sorted_node_keys[:231], node.all_keys())
 
232
        self.assertEqual(231, len(node.keys))
 
233
        self.assertEqual(sorted_node_keys[:231], sorted(node.keys))
232
234
        leaf2_bytes = zlib.decompress(leaf2)
233
235
        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
234
 
        self.assertEqual(400 - 231, len(node))
235
 
        self.assertEqual(sorted_node_keys[231:], node.all_keys())
 
236
        self.assertEqual(400 - 231, len(node.keys))
 
237
        self.assertEqual(sorted_node_keys[231:], sorted(node.keys))
236
238
 
237
239
    def test_last_page_rounded_1_layer(self):
238
240
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
243
245
        temp_file = builder.finish()
244
246
        content = temp_file.read()
245
247
        del temp_file
246
 
        self.assertEqualApproxCompressed(155, len(content))
 
248
        self.assertEqual(181, len(content))
247
249
        self.assertEqual(
248
250
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=10\n"
249
251
            "row_lengths=1\n",
252
254
        leaf2 = content[74:]
253
255
        leaf2_bytes = zlib.decompress(leaf2)
254
256
        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
255
 
        self.assertEqual(10, len(node))
 
257
        self.assertEqual(10, len(node.keys))
256
258
        sorted_node_keys = sorted(node[0] for node in nodes)
257
 
        self.assertEqual(sorted_node_keys, node.all_keys())
 
259
        self.assertEqual(sorted_node_keys, sorted(node.keys))
258
260
 
259
261
    def test_last_page_not_rounded_2_layer(self):
260
262
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
265
267
        temp_file = builder.finish()
266
268
        content = temp_file.read()
267
269
        del temp_file
268
 
        self.assertEqualApproxCompressed(9283, len(content))
 
270
        self.assertEqual(9283, len(content))
269
271
        self.assertEqual(
270
272
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
271
273
            "row_lengths=1,2\n",
274
276
        leaf2 = content[8192:]
275
277
        leaf2_bytes = zlib.decompress(leaf2)
276
278
        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
277
 
        self.assertEqual(400 - 231, len(node))
 
279
        self.assertEqual(400 - 231, len(node.keys))
278
280
        sorted_node_keys = sorted(node[0] for node in nodes)
279
 
        self.assertEqual(sorted_node_keys[231:], node.all_keys())
 
281
        self.assertEqual(sorted_node_keys[231:], sorted(node.keys))
280
282
 
281
283
    def test_three_level_tree_details(self):
282
284
        # The left most pointer in the second internal node in a row should
291
293
 
292
294
        for node in nodes:
293
295
            builder.add_node(*node)
294
 
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
295
 
        size = t.put_file('index', self.time(builder.finish))
 
296
        transport = get_transport('trace+' + self.get_url(''))
 
297
        size = transport.put_file('index', self.time(builder.finish))
296
298
        del builder
297
 
        index = btree_index.BTreeGraphIndex(t, 'index', size)
 
299
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
298
300
        # Seed the metadata, we're using internal calls now.
299
301
        index.key_count()
300
302
        self.assertEqual(3, len(index._row_lengths),
313
315
        # in the second node it points at
314
316
        pos = index._row_offsets[2] + internal_node2.offset + 1
315
317
        leaf = index._get_leaf_nodes([pos])[pos]
316
 
        self.assertTrue(internal_node2.keys[0] in leaf)
 
318
        self.assertTrue(internal_node2.keys[0] in leaf.keys)
317
319
 
318
320
    def test_2_leaves_2_2(self):
319
321
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
324
326
        temp_file = builder.finish()
325
327
        content = temp_file.read()
326
328
        del temp_file
327
 
        self.assertEqualApproxCompressed(12643, len(content))
 
329
        self.assertEqual(12643, len(content))
328
330
        self.assertEqual(
329
331
            "B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=200\n"
330
332
            "row_lengths=1,3\n",
351
353
        # Test the parts of the index that take up memory are doing so
352
354
        # predictably.
353
355
        self.assertEqual(1, len(builder._nodes))
354
 
        self.assertIs(None, builder._nodes_by_key)
 
356
        self.assertEqual(1, len(builder._keys))
 
357
        self.assertEqual({}, builder._nodes_by_key)
355
358
        builder.add_node(*nodes[1])
356
359
        self.assertEqual(0, len(builder._nodes))
357
 
        self.assertIs(None, builder._nodes_by_key)
 
360
        self.assertEqual(0, len(builder._keys))
 
361
        self.assertEqual({}, builder._nodes_by_key)
358
362
        self.assertEqual(1, len(builder._backing_indices))
359
363
        self.assertEqual(2, builder._backing_indices[0].key_count())
360
364
        # now back to memory
361
365
        builder.add_node(*nodes[2])
362
366
        self.assertEqual(1, len(builder._nodes))
363
 
        self.assertIs(None, builder._nodes_by_key)
 
367
        self.assertEqual(1, len(builder._keys))
 
368
        self.assertEqual({}, builder._nodes_by_key)
364
369
        # And spills to a second backing index combing all
365
370
        builder.add_node(*nodes[3])
366
371
        self.assertEqual(0, len(builder._nodes))
367
 
        self.assertIs(None, builder._nodes_by_key)
 
372
        self.assertEqual(0, len(builder._keys))
 
373
        self.assertEqual({}, builder._nodes_by_key)
368
374
        self.assertEqual(2, len(builder._backing_indices))
369
375
        self.assertEqual(None, builder._backing_indices[0])
370
376
        self.assertEqual(4, builder._backing_indices[1].key_count())
372
378
        builder.add_node(*nodes[4])
373
379
        builder.add_node(*nodes[5])
374
380
        self.assertEqual(0, len(builder._nodes))
375
 
        self.assertIs(None, builder._nodes_by_key)
 
381
        self.assertEqual(0, len(builder._keys))
 
382
        self.assertEqual({}, builder._nodes_by_key)
376
383
        self.assertEqual(2, len(builder._backing_indices))
377
384
        self.assertEqual(2, builder._backing_indices[0].key_count())
378
385
        self.assertEqual(4, builder._backing_indices[1].key_count())
420
427
        self.assertEqual(None, builder._backing_indices[2])
421
428
        self.assertEqual(16, builder._backing_indices[3].key_count())
422
429
        # Now finish, and check we got a correctly ordered tree
423
 
        t = self.get_transport('')
424
 
        size = t.put_file('index', builder.finish())
425
 
        index = btree_index.BTreeGraphIndex(t, 'index', size)
426
 
        nodes = list(index.iter_all_entries())
427
 
        self.assertEqual(sorted(nodes), nodes)
428
 
        self.assertEqual(16, len(nodes))
429
 
 
430
 
    def test_spill_index_stress_1_1_no_combine(self):
431
 
        builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
432
 
        builder.set_optimize(for_size=False, combine_backing_indices=False)
433
 
        nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
434
 
        builder.add_node(*nodes[0])
435
 
        # Test the parts of the index that take up memory are doing so
436
 
        # predictably.
437
 
        self.assertEqual(1, len(builder._nodes))
438
 
        self.assertIs(None, builder._nodes_by_key)
439
 
        builder.add_node(*nodes[1])
440
 
        self.assertEqual(0, len(builder._nodes))
441
 
        self.assertIs(None, builder._nodes_by_key)
442
 
        self.assertEqual(1, len(builder._backing_indices))
443
 
        self.assertEqual(2, builder._backing_indices[0].key_count())
444
 
        # now back to memory
445
 
        builder.add_node(*nodes[2])
446
 
        self.assertEqual(1, len(builder._nodes))
447
 
        self.assertIs(None, builder._nodes_by_key)
448
 
        # And spills to a second backing index but doesn't combine
449
 
        builder.add_node(*nodes[3])
450
 
        self.assertEqual(0, len(builder._nodes))
451
 
        self.assertIs(None, builder._nodes_by_key)
452
 
        self.assertEqual(2, len(builder._backing_indices))
453
 
        for backing_index in builder._backing_indices:
454
 
            self.assertEqual(2, backing_index.key_count())
455
 
        # The next spills to the 3rd slot
456
 
        builder.add_node(*nodes[4])
457
 
        builder.add_node(*nodes[5])
458
 
        self.assertEqual(0, len(builder._nodes))
459
 
        self.assertIs(None, builder._nodes_by_key)
460
 
        self.assertEqual(3, len(builder._backing_indices))
461
 
        for backing_index in builder._backing_indices:
462
 
            self.assertEqual(2, backing_index.key_count())
463
 
        # Now spill a few more, and check that we don't combine
464
 
        builder.add_node(*nodes[6])
465
 
        builder.add_node(*nodes[7])
466
 
        builder.add_node(*nodes[8])
467
 
        builder.add_node(*nodes[9])
468
 
        builder.add_node(*nodes[10])
469
 
        builder.add_node(*nodes[11])
470
 
        builder.add_node(*nodes[12])
471
 
        self.assertEqual(6, len(builder._backing_indices))
472
 
        for backing_index in builder._backing_indices:
473
 
            self.assertEqual(2, backing_index.key_count())
474
 
        # Test that memory and disk are both used for query methods; and that
475
 
        # None is skipped over happily.
476
 
        self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
477
 
            list(builder.iter_all_entries()))
478
 
        # Two nodes - one memory one disk
479
 
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
480
 
            set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
481
 
        self.assertEqual(13, builder.key_count())
482
 
        self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
483
 
            set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
484
 
        builder.add_node(*nodes[13])
485
 
        builder.add_node(*nodes[14])
486
 
        builder.add_node(*nodes[15])
487
 
        self.assertEqual(8, len(builder._backing_indices))
488
 
        for backing_index in builder._backing_indices:
489
 
            self.assertEqual(2, backing_index.key_count())
490
 
        # Now finish, and check we got a correctly ordered tree
491
430
        transport = self.get_transport('')
492
431
        size = transport.put_file('index', builder.finish())
493
432
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
495
434
        self.assertEqual(sorted(nodes), nodes)
496
435
        self.assertEqual(16, len(nodes))
497
436
 
498
 
    def test_set_optimize(self):
499
 
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
500
 
        builder.set_optimize(for_size=True)
501
 
        self.assertTrue(builder._optimize_for_size)
502
 
        builder.set_optimize(for_size=False)
503
 
        self.assertFalse(builder._optimize_for_size)
504
 
        # test that we can set combine_backing_indices without effecting
505
 
        # _optimize_for_size
506
 
        obj = object()
507
 
        builder._optimize_for_size = obj
508
 
        builder.set_optimize(combine_backing_indices=False)
509
 
        self.assertFalse(builder._combine_backing_indices)
510
 
        self.assertIs(obj, builder._optimize_for_size)
511
 
        builder.set_optimize(combine_backing_indices=True)
512
 
        self.assertTrue(builder._combine_backing_indices)
513
 
        self.assertIs(obj, builder._optimize_for_size)
514
 
 
515
437
    def test_spill_index_stress_2_2(self):
516
438
        # test that references and longer keys don't confuse things.
517
439
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2,
520
442
        builder.add_node(*nodes[0])
521
443
        # Test the parts of the index that take up memory are doing so
522
444
        # predictably.
523
 
        self.assertEqual(1, len(builder._nodes))
524
 
        self.assertIs(None, builder._nodes_by_key)
 
445
        self.assertEqual(1, len(builder._keys))
 
446
        self.assertEqual(2, len(builder._nodes))
 
447
        self.assertNotEqual({}, builder._nodes_by_key)
525
448
        builder.add_node(*nodes[1])
 
449
        self.assertEqual(0, len(builder._keys))
526
450
        self.assertEqual(0, len(builder._nodes))
527
 
        self.assertIs(None, builder._nodes_by_key)
 
451
        self.assertEqual({}, builder._nodes_by_key)
528
452
        self.assertEqual(1, len(builder._backing_indices))
529
453
        self.assertEqual(2, builder._backing_indices[0].key_count())
530
454
        # now back to memory
531
 
        old = dict(builder._get_nodes_by_key()) #Build up the nodes by key dict
532
455
        builder.add_node(*nodes[2])
533
 
        self.assertEqual(1, len(builder._nodes))
534
 
        self.assertIsNot(None, builder._nodes_by_key)
 
456
        self.assertEqual(2, len(builder._nodes))
 
457
        self.assertEqual(1, len(builder._keys))
535
458
        self.assertNotEqual({}, builder._nodes_by_key)
536
 
        # We should have a new entry
537
 
        self.assertNotEqual(old, builder._nodes_by_key)
538
459
        # And spills to a second backing index combing all
539
460
        builder.add_node(*nodes[3])
540
461
        self.assertEqual(0, len(builder._nodes))
541
 
        self.assertIs(None, builder._nodes_by_key)
 
462
        self.assertEqual(0, len(builder._keys))
 
463
        self.assertEqual({}, builder._nodes_by_key)
542
464
        self.assertEqual(2, len(builder._backing_indices))
543
465
        self.assertEqual(None, builder._backing_indices[0])
544
466
        self.assertEqual(4, builder._backing_indices[1].key_count())
546
468
        builder.add_node(*nodes[4])
547
469
        builder.add_node(*nodes[5])
548
470
        self.assertEqual(0, len(builder._nodes))
549
 
        self.assertIs(None, builder._nodes_by_key)
 
471
        self.assertEqual(0, len(builder._keys))
 
472
        self.assertEqual({}, builder._nodes_by_key)
550
473
        self.assertEqual(2, len(builder._backing_indices))
551
474
        self.assertEqual(2, builder._backing_indices[0].key_count())
552
475
        self.assertEqual(4, builder._backing_indices[1].key_count())
618
541
        for key, value, references in nodes:
619
542
            builder.add_node(key, value, references)
620
543
        stream = builder.finish()
621
 
        trans = transport.get_transport_from_url('trace+' + self.get_url())
 
544
        trans = get_transport('trace+' + self.get_url())
622
545
        size = trans.put_file('index', stream)
623
546
        return btree_index.BTreeGraphIndex(trans, 'index', size)
624
547
 
625
 
    def make_index_with_offset(self, ref_lists=1, key_elements=1, nodes=[],
626
 
                               offset=0):
627
 
        builder = btree_index.BTreeBuilder(key_elements=key_elements,
628
 
                                           reference_lists=ref_lists)
629
 
        builder.add_nodes(nodes)
630
 
        transport = self.get_transport('')
631
 
        # NamedTemporaryFile dies on builder.finish().read(). weird.
632
 
        temp_file = builder.finish()
633
 
        content = temp_file.read()
634
 
        del temp_file
635
 
        size = len(content)
636
 
        transport.put_bytes('index', (' '*offset)+content)
637
 
        return btree_index.BTreeGraphIndex(transport, 'index', size=size,
638
 
                                           offset=offset)
639
 
 
640
 
    def test_clear_cache(self):
641
 
        nodes = self.make_nodes(160, 2, 2)
642
 
        index = self.make_index(ref_lists=2, key_elements=2, nodes=nodes)
643
 
        self.assertEqual(1, len(list(index.iter_entries([nodes[30][0]]))))
644
 
        self.assertEqual([1, 4], index._row_lengths)
645
 
        self.assertIsNot(None, index._root_node)
646
 
        internal_node_pre_clear = index._internal_node_cache.keys()
647
 
        self.assertTrue(len(index._leaf_node_cache) > 0)
648
 
        index.clear_cache()
649
 
        # We don't touch _root_node or _internal_node_cache, both should be
650
 
        # small, and can save a round trip or two
651
 
        self.assertIsNot(None, index._root_node)
652
 
        # NOTE: We don't want to affect the _internal_node_cache, as we expect
653
 
        #       it will be small, and if we ever do touch this index again, it
654
 
        #       will save round-trips.  This assertion isn't very strong,
655
 
        #       becuase without a 3-level index, we don't have any internal
656
 
        #       nodes cached.
657
 
        self.assertEqual(internal_node_pre_clear,
658
 
                         index._internal_node_cache.keys())
659
 
        self.assertEqual(0, len(index._leaf_node_cache))
660
 
 
661
548
    def test_trivial_constructor(self):
662
 
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
663
 
        index = btree_index.BTreeGraphIndex(t, 'index', None)
 
549
        transport = get_transport('trace+' + self.get_url(''))
 
550
        index = btree_index.BTreeGraphIndex(transport, 'index', None)
664
551
        # Checks the page size at load, but that isn't logged yet.
665
 
        self.assertEqual([], t._activity)
 
552
        self.assertEqual([], transport._activity)
666
553
 
667
554
    def test_with_size_constructor(self):
668
 
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
669
 
        index = btree_index.BTreeGraphIndex(t, 'index', 1)
 
555
        transport = get_transport('trace+' + self.get_url(''))
 
556
        index = btree_index.BTreeGraphIndex(transport, 'index', 1)
670
557
        # Checks the page size at load, but that isn't logged yet.
671
 
        self.assertEqual([], t._activity)
 
558
        self.assertEqual([], transport._activity)
672
559
 
673
560
    def test_empty_key_count_no_size(self):
674
561
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
675
 
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
676
 
        t.put_file('index', builder.finish())
677
 
        index = btree_index.BTreeGraphIndex(t, 'index', None)
678
 
        del t._activity[:]
679
 
        self.assertEqual([], t._activity)
 
562
        transport = get_transport('trace+' + self.get_url(''))
 
563
        transport.put_file('index', builder.finish())
 
564
        index = btree_index.BTreeGraphIndex(transport, 'index', None)
 
565
        del transport._activity[:]
 
566
        self.assertEqual([], transport._activity)
680
567
        self.assertEqual(0, index.key_count())
681
568
        # The entire index should have been requested (as we generally have the
682
569
        # size available, and doing many small readvs is inappropriate).
683
570
        # We can't tell how much was actually read here, but - check the code.
684
 
        self.assertEqual([('get', 'index')], t._activity)
 
571
        self.assertEqual([('get', 'index'),
 
572
            ('readv', 'index', [(0, 72)], False, None)],
 
573
            transport._activity)
685
574
 
686
575
    def test_empty_key_count(self):
687
576
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
688
 
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
689
 
        size = t.put_file('index', builder.finish())
 
577
        transport = get_transport('trace+' + self.get_url(''))
 
578
        size = transport.put_file('index', builder.finish())
690
579
        self.assertEqual(72, size)
691
 
        index = btree_index.BTreeGraphIndex(t, 'index', size)
692
 
        del t._activity[:]
693
 
        self.assertEqual([], t._activity)
 
580
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
 
581
        del transport._activity[:]
 
582
        self.assertEqual([], transport._activity)
694
583
        self.assertEqual(0, index.key_count())
695
584
        # The entire index should have been read, as 4K > size
696
585
        self.assertEqual([('readv', 'index', [(0, 72)], False, None)],
697
 
                         t._activity)
 
586
            transport._activity)
698
587
 
699
588
    def test_non_empty_key_count_2_2(self):
700
589
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
701
590
        nodes = self.make_nodes(35, 2, 2)
702
591
        for node in nodes:
703
592
            builder.add_node(*node)
704
 
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
705
 
        size = t.put_file('index', builder.finish())
706
 
        index = btree_index.BTreeGraphIndex(t, 'index', size)
707
 
        del t._activity[:]
708
 
        self.assertEqual([], t._activity)
 
593
        transport = get_transport('trace+' + self.get_url(''))
 
594
        size = transport.put_file('index', builder.finish())
 
595
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
 
596
        del transport._activity[:]
 
597
        self.assertEqual([], transport._activity)
709
598
        self.assertEqual(70, index.key_count())
710
599
        # The entire index should have been read, as it is one page long.
711
600
        self.assertEqual([('readv', 'index', [(0, size)], False, None)],
712
 
            t._activity)
713
 
        self.assertEqualApproxCompressed(1173, size)
714
 
 
715
 
    def test_with_offset_no_size(self):
716
 
        index = self.make_index_with_offset(key_elements=1, ref_lists=1,
717
 
                                            offset=1234,
718
 
                                            nodes=self.make_nodes(200, 1, 1))
719
 
        index._size = None # throw away the size info
720
 
        self.assertEqual(200, index.key_count())
721
 
 
722
 
    def test_with_small_offset(self):
723
 
        index = self.make_index_with_offset(key_elements=1, ref_lists=1,
724
 
                                            offset=1234,
725
 
                                            nodes=self.make_nodes(200, 1, 1))
726
 
        self.assertEqual(200, index.key_count())
727
 
 
728
 
    def test_with_large_offset(self):
729
 
        index = self.make_index_with_offset(key_elements=1, ref_lists=1,
730
 
                                            offset=123456,
731
 
                                            nodes=self.make_nodes(200, 1, 1))
732
 
        self.assertEqual(200, index.key_count())
733
 
 
734
 
    def test__read_nodes_no_size_one_page_reads_once(self):
735
 
        self.make_index(nodes=[(('key',), 'value', ())])
736
 
        trans = transport.get_transport_from_url('trace+' + self.get_url())
737
 
        index = btree_index.BTreeGraphIndex(trans, 'index', None)
738
 
        del trans._activity[:]
739
 
        nodes = dict(index._read_nodes([0]))
740
 
        self.assertEqual([0], nodes.keys())
741
 
        node = nodes[0]
742
 
        self.assertEqual([('key',)], node.all_keys())
743
 
        self.assertEqual([('get', 'index')], trans._activity)
744
 
 
745
 
    def test__read_nodes_no_size_multiple_pages(self):
746
 
        index = self.make_index(2, 2, nodes=self.make_nodes(160, 2, 2))
747
 
        index.key_count()
748
 
        num_pages = index._row_offsets[-1]
749
 
        # Reopen with a traced transport and no size
750
 
        trans = transport.get_transport_from_url('trace+' + self.get_url())
751
 
        index = btree_index.BTreeGraphIndex(trans, 'index', None)
752
 
        del trans._activity[:]
753
 
        nodes = dict(index._read_nodes([0]))
754
 
        self.assertEqual(range(num_pages), nodes.keys())
 
601
            transport._activity)
 
602
        self.assertEqual(1199, size)
755
603
 
756
604
    def test_2_levels_key_count_2_2(self):
757
605
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
758
606
        nodes = self.make_nodes(160, 2, 2)
759
607
        for node in nodes:
760
608
            builder.add_node(*node)
761
 
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
762
 
        size = t.put_file('index', builder.finish())
763
 
        self.assertEqualApproxCompressed(17692, size)
764
 
        index = btree_index.BTreeGraphIndex(t, 'index', size)
765
 
        del t._activity[:]
766
 
        self.assertEqual([], t._activity)
 
609
        transport = get_transport('trace+' + self.get_url(''))
 
610
        size = transport.put_file('index', builder.finish())
 
611
        self.assertEqual(17692, size)
 
612
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
 
613
        del transport._activity[:]
 
614
        self.assertEqual([], transport._activity)
767
615
        self.assertEqual(320, index.key_count())
768
616
        # The entire index should not have been read.
769
617
        self.assertEqual([('readv', 'index', [(0, 4096)], False, None)],
770
 
                         t._activity)
 
618
            transport._activity)
771
619
 
772
620
    def test_validate_one_page(self):
773
621
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
774
622
        nodes = self.make_nodes(45, 2, 2)
775
623
        for node in nodes:
776
624
            builder.add_node(*node)
777
 
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
778
 
        size = t.put_file('index', builder.finish())
779
 
        index = btree_index.BTreeGraphIndex(t, 'index', size)
780
 
        del t._activity[:]
781
 
        self.assertEqual([], t._activity)
 
625
        transport = get_transport('trace+' + self.get_url(''))
 
626
        size = transport.put_file('index', builder.finish())
 
627
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
 
628
        del transport._activity[:]
 
629
        self.assertEqual([], transport._activity)
782
630
        index.validate()
783
631
        # The entire index should have been read linearly.
784
632
        self.assertEqual([('readv', 'index', [(0, size)], False, None)],
785
 
                         t._activity)
786
 
        self.assertEqualApproxCompressed(1488, size)
 
633
            transport._activity)
 
634
        self.assertEqual(1514, size)
787
635
 
788
636
    def test_validate_two_pages(self):
789
637
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
790
638
        nodes = self.make_nodes(80, 2, 2)
791
639
        for node in nodes:
792
640
            builder.add_node(*node)
793
 
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
794
 
        size = t.put_file('index', builder.finish())
 
641
        transport = get_transport('trace+' + self.get_url(''))
 
642
        size = transport.put_file('index', builder.finish())
795
643
        # Root page, 2 leaf pages
796
 
        self.assertEqualApproxCompressed(9339, size)
797
 
        index = btree_index.BTreeGraphIndex(t, 'index', size)
798
 
        del t._activity[:]
799
 
        self.assertEqual([], t._activity)
 
644
        self.assertEqual(9339, size)
 
645
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
 
646
        del transport._activity[:]
 
647
        self.assertEqual([], transport._activity)
800
648
        index.validate()
801
 
        rem = size - 8192 # Number of remaining bytes after second block
802
649
        # The entire index should have been read linearly.
803
 
        self.assertEqual(
804
 
            [('readv', 'index', [(0, 4096)], False, None),
805
 
             ('readv', 'index', [(4096, 4096), (8192, rem)], False, None)],
806
 
            t._activity)
 
650
        self.assertEqual([('readv', 'index', [(0, 4096)], False, None),
 
651
            ('readv', 'index', [(4096, 4096), (8192, 1147)], False, None)],
 
652
            transport._activity)
807
653
        # XXX: TODO: write some badly-ordered nodes, and some pointers-to-wrong
808
654
        # node and make validate find them.
809
655
 
810
656
    def test_eq_ne(self):
811
657
        # two indices are equal when constructed with the same parameters:
812
 
        t1 = transport.get_transport_from_url('trace+' + self.get_url(''))
813
 
        t2 = self.get_transport()
814
 
        self.assertTrue(
815
 
            btree_index.BTreeGraphIndex(t1, 'index', None) ==
816
 
            btree_index.BTreeGraphIndex(t1, 'index', None))
817
 
        self.assertTrue(
818
 
            btree_index.BTreeGraphIndex(t1, 'index', 20) ==
819
 
            btree_index.BTreeGraphIndex(t1, 'index', 20))
820
 
        self.assertFalse(
821
 
            btree_index.BTreeGraphIndex(t1, 'index', 20) ==
822
 
            btree_index.BTreeGraphIndex(t2, 'index', 20))
823
 
        self.assertFalse(
824
 
            btree_index.BTreeGraphIndex(t1, 'inde1', 20) ==
825
 
            btree_index.BTreeGraphIndex(t1, 'inde2', 20))
826
 
        self.assertFalse(
827
 
            btree_index.BTreeGraphIndex(t1, 'index', 10) ==
828
 
            btree_index.BTreeGraphIndex(t1, 'index', 20))
829
 
        self.assertFalse(
830
 
            btree_index.BTreeGraphIndex(t1, 'index', None) !=
831
 
            btree_index.BTreeGraphIndex(t1, 'index', None))
832
 
        self.assertFalse(
833
 
            btree_index.BTreeGraphIndex(t1, 'index', 20) !=
834
 
            btree_index.BTreeGraphIndex(t1, 'index', 20))
835
 
        self.assertTrue(
836
 
            btree_index.BTreeGraphIndex(t1, 'index', 20) !=
837
 
            btree_index.BTreeGraphIndex(t2, 'index', 20))
838
 
        self.assertTrue(
839
 
            btree_index.BTreeGraphIndex(t1, 'inde1', 20) !=
840
 
            btree_index.BTreeGraphIndex(t1, 'inde2', 20))
841
 
        self.assertTrue(
842
 
            btree_index.BTreeGraphIndex(t1, 'index', 10) !=
843
 
            btree_index.BTreeGraphIndex(t1, 'index', 20))
844
 
 
845
 
    def test_key_too_big(self):
846
 
        # the size that matters here is the _compressed_ size of the key, so we can't
847
 
        # do a simple character repeat.
848
 
        bigKey = ''.join(map(repr, xrange(btree_index._PAGE_SIZE)))
849
 
        self.assertRaises(errors.BadIndexKey,
850
 
                          self.make_index,
851
 
                          nodes=[((bigKey,), 'value', ())])
852
 
        
853
 
    def test_iter_all_only_root_no_size(self):
854
 
        self.make_index(nodes=[(('key',), 'value', ())])
855
 
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
856
 
        index = btree_index.BTreeGraphIndex(t, 'index', None)
857
 
        del t._activity[:]
858
 
        self.assertEqual([(('key',), 'value')],
859
 
                         [x[1:] for x in index.iter_all_entries()])
860
 
        self.assertEqual([('get', 'index')], t._activity)
 
658
        transport1 = get_transport('trace+' + self.get_url(''))
 
659
        transport2 = get_transport(self.get_url(''))
 
660
        self.assertTrue(
 
661
            btree_index.BTreeGraphIndex(transport1, 'index', None) ==
 
662
            btree_index.BTreeGraphIndex(transport1, 'index', None))
 
663
        self.assertTrue(
 
664
            btree_index.BTreeGraphIndex(transport1, 'index', 20) ==
 
665
            btree_index.BTreeGraphIndex(transport1, 'index', 20))
 
666
        self.assertFalse(
 
667
            btree_index.BTreeGraphIndex(transport1, 'index', 20) ==
 
668
            btree_index.BTreeGraphIndex(transport2, 'index', 20))
 
669
        self.assertFalse(
 
670
            btree_index.BTreeGraphIndex(transport1, 'inde1', 20) ==
 
671
            btree_index.BTreeGraphIndex(transport1, 'inde2', 20))
 
672
        self.assertFalse(
 
673
            btree_index.BTreeGraphIndex(transport1, 'index', 10) ==
 
674
            btree_index.BTreeGraphIndex(transport1, 'index', 20))
 
675
        self.assertFalse(
 
676
            btree_index.BTreeGraphIndex(transport1, 'index', None) !=
 
677
            btree_index.BTreeGraphIndex(transport1, 'index', None))
 
678
        self.assertFalse(
 
679
            btree_index.BTreeGraphIndex(transport1, 'index', 20) !=
 
680
            btree_index.BTreeGraphIndex(transport1, 'index', 20))
 
681
        self.assertTrue(
 
682
            btree_index.BTreeGraphIndex(transport1, 'index', 20) !=
 
683
            btree_index.BTreeGraphIndex(transport2, 'index', 20))
 
684
        self.assertTrue(
 
685
            btree_index.BTreeGraphIndex(transport1, 'inde1', 20) !=
 
686
            btree_index.BTreeGraphIndex(transport1, 'inde2', 20))
 
687
        self.assertTrue(
 
688
            btree_index.BTreeGraphIndex(transport1, 'index', 10) !=
 
689
            btree_index.BTreeGraphIndex(transport1, 'index', 20))
861
690
 
862
691
    def test_iter_all_entries_reads(self):
863
692
        # iterating all entries reads the header, then does a linear
869
698
        nodes = self.make_nodes(10000, 2, 2)
870
699
        for node in nodes:
871
700
            builder.add_node(*node)
872
 
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
873
 
        size = t.put_file('index', builder.finish())
 
701
        transport = get_transport('trace+' + self.get_url(''))
 
702
        size = transport.put_file('index', builder.finish())
 
703
        self.assertEqual(1303220, size, 'number of expected bytes in the'
 
704
                                        ' output changed')
874
705
        page_size = btree_index._PAGE_SIZE
875
706
        del builder
876
 
        index = btree_index.BTreeGraphIndex(t, 'index', size)
877
 
        del t._activity[:]
878
 
        self.assertEqual([], t._activity)
 
707
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
 
708
        del transport._activity[:]
 
709
        self.assertEqual([], transport._activity)
879
710
        found_nodes = self.time(list, index.iter_all_entries())
880
711
        bare_nodes = []
881
712
        for node in found_nodes:
892
723
        # The entire index should have been read
893
724
        total_pages = sum(index._row_lengths)
894
725
        self.assertEqual(total_pages, index._row_offsets[-1])
895
 
        self.assertEqualApproxCompressed(1303220, size)
 
726
        self.assertEqual(1303220, size)
896
727
        # The start of the leaves
897
728
        first_byte = index._row_offsets[-2] * page_size
898
729
        readv_request = []
899
730
        for offset in range(first_byte, size, page_size):
900
731
            readv_request.append((offset, page_size))
901
732
        # The last page is truncated
902
 
        readv_request[-1] = (readv_request[-1][0], size % page_size)
 
733
        readv_request[-1] = (readv_request[-1][0], 1303220 % page_size)
903
734
        expected = [('readv', 'index', [(0, page_size)], False, None),
904
735
             ('readv',  'index', readv_request, False, None)]
905
 
        if expected != t._activity:
 
736
        if expected != transport._activity:
906
737
            self.assertEqualDiff(pprint.pformat(expected),
907
 
                                 pprint.pformat(t._activity))
 
738
                                 pprint.pformat(transport._activity))
908
739
 
909
740
    def _test_iter_entries_references_resolved(self):
910
741
        index = self.make_index(1, nodes=[
922
753
        nodes = self.make_nodes(160, 2, 2)
923
754
        for node in nodes:
924
755
            builder.add_node(*node)
925
 
        t = transport.get_transport_from_url('trace+' + self.get_url(''))
926
 
        size = t.put_file('index', builder.finish())
 
756
        transport = get_transport('trace+' + self.get_url(''))
 
757
        size = transport.put_file('index', builder.finish())
927
758
        del builder
928
 
        index = btree_index.BTreeGraphIndex(t, 'index', size)
929
 
        del t._activity[:]
930
 
        self.assertEqual([], t._activity)
 
759
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
 
760
        del transport._activity[:]
 
761
        self.assertEqual([], transport._activity)
931
762
        # search for one key
932
763
        found_nodes = list(index.iter_entries([nodes[30][0]]))
933
764
        bare_nodes = []
941
772
        # Should have read the root node, then one leaf page:
942
773
        self.assertEqual([('readv', 'index', [(0, 4096)], False, None),
943
774
             ('readv',  'index', [(8192, 4096), ], False, None)],
944
 
            t._activity)
 
775
            transport._activity)
945
776
 
946
777
    def test_iter_key_prefix_1_element_key_None(self):
947
778
        index = self.make_index()
998
829
            (index, ('name', 'fin2'), 'beta', ((), ))]),
999
830
            set(index.iter_entries_prefix([('name', None)])))
1000
831
 
1001
 
    # XXX: external_references tests are duplicated in test_index.  We
1002
 
    # probably should have per_graph_index tests...
1003
 
    def test_external_references_no_refs(self):
1004
 
        index = self.make_index(ref_lists=0, nodes=[])
1005
 
        self.assertRaises(ValueError, index.external_references, 0)
1006
 
 
1007
 
    def test_external_references_no_results(self):
1008
 
        index = self.make_index(ref_lists=1, nodes=[
1009
 
            (('key',), 'value', ([],))])
1010
 
        self.assertEqual(set(), index.external_references(0))
1011
 
 
1012
 
    def test_external_references_missing_ref(self):
1013
 
        missing_key = ('missing',)
1014
 
        index = self.make_index(ref_lists=1, nodes=[
1015
 
            (('key',), 'value', ([missing_key],))])
1016
 
        self.assertEqual(set([missing_key]), index.external_references(0))
1017
 
 
1018
 
    def test_external_references_multiple_ref_lists(self):
1019
 
        missing_key = ('missing',)
1020
 
        index = self.make_index(ref_lists=2, nodes=[
1021
 
            (('key',), 'value', ([], [missing_key]))])
1022
 
        self.assertEqual(set([]), index.external_references(0))
1023
 
        self.assertEqual(set([missing_key]), index.external_references(1))
1024
 
 
1025
 
    def test_external_references_two_records(self):
1026
 
        index = self.make_index(ref_lists=1, nodes=[
1027
 
            (('key-1',), 'value', ([('key-2',)],)),
1028
 
            (('key-2',), 'value', ([],)),
1029
 
            ])
1030
 
        self.assertEqual(set([]), index.external_references(0))
1031
 
 
1032
 
    def test__find_ancestors_one_page(self):
1033
 
        key1 = ('key-1',)
1034
 
        key2 = ('key-2',)
1035
 
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1036
 
            (key1, 'value', ([key2],)),
1037
 
            (key2, 'value', ([],)),
1038
 
            ])
1039
 
        parent_map = {}
1040
 
        missing_keys = set()
1041
 
        search_keys = index._find_ancestors([key1], 0, parent_map, missing_keys)
1042
 
        self.assertEqual({key1: (key2,), key2: ()}, parent_map)
1043
 
        self.assertEqual(set(), missing_keys)
1044
 
        self.assertEqual(set(), search_keys)
1045
 
 
1046
 
    def test__find_ancestors_one_page_w_missing(self):
1047
 
        key1 = ('key-1',)
1048
 
        key2 = ('key-2',)
1049
 
        key3 = ('key-3',)
1050
 
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1051
 
            (key1, 'value', ([key2],)),
1052
 
            (key2, 'value', ([],)),
1053
 
            ])
1054
 
        parent_map = {}
1055
 
        missing_keys = set()
1056
 
        search_keys = index._find_ancestors([key2, key3], 0, parent_map,
1057
 
                                            missing_keys)
1058
 
        self.assertEqual({key2: ()}, parent_map)
1059
 
        # we know that key3 is missing because we read the page that it would
1060
 
        # otherwise be on
1061
 
        self.assertEqual(set([key3]), missing_keys)
1062
 
        self.assertEqual(set(), search_keys)
1063
 
 
1064
 
    def test__find_ancestors_one_parent_missing(self):
1065
 
        key1 = ('key-1',)
1066
 
        key2 = ('key-2',)
1067
 
        key3 = ('key-3',)
1068
 
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1069
 
            (key1, 'value', ([key2],)),
1070
 
            (key2, 'value', ([key3],)),
1071
 
            ])
1072
 
        parent_map = {}
1073
 
        missing_keys = set()
1074
 
        search_keys = index._find_ancestors([key1], 0, parent_map,
1075
 
                                            missing_keys)
1076
 
        self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1077
 
        self.assertEqual(set(), missing_keys)
1078
 
        # all we know is that key3 wasn't present on the page we were reading
1079
 
        # but if you look, the last key is key2 which comes before key3, so we
1080
 
        # don't know whether key3 would land on this page or not.
1081
 
        self.assertEqual(set([key3]), search_keys)
1082
 
        search_keys = index._find_ancestors(search_keys, 0, parent_map,
1083
 
                                            missing_keys)
1084
 
        # passing it back in, we are sure it is 'missing'
1085
 
        self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1086
 
        self.assertEqual(set([key3]), missing_keys)
1087
 
        self.assertEqual(set([]), search_keys)
1088
 
 
1089
 
    def test__find_ancestors_dont_search_known(self):
1090
 
        key1 = ('key-1',)
1091
 
        key2 = ('key-2',)
1092
 
        key3 = ('key-3',)
1093
 
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1094
 
            (key1, 'value', ([key2],)),
1095
 
            (key2, 'value', ([key3],)),
1096
 
            (key3, 'value', ([],)),
1097
 
            ])
1098
 
        # We already know about key2, so we won't try to search for key3
1099
 
        parent_map = {key2: (key3,)}
1100
 
        missing_keys = set()
1101
 
        search_keys = index._find_ancestors([key1], 0, parent_map,
1102
 
                                            missing_keys)
1103
 
        self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1104
 
        self.assertEqual(set(), missing_keys)
1105
 
        self.assertEqual(set(), search_keys)
1106
 
 
1107
 
    def test__find_ancestors_multiple_pages(self):
1108
 
        # We need to use enough keys that we actually cause a split
1109
 
        start_time = 1249671539
1110
 
        email = "joebob@example.com"
1111
 
        nodes = []
1112
 
        ref_lists = ((),)
1113
 
        rev_keys = []
1114
 
        for i in xrange(400):
1115
 
            rev_id = '%s-%s-%s' % (email,
1116
 
                                   osutils.compact_date(start_time + i),
1117
 
                                   osutils.rand_chars(16))
1118
 
            rev_key = (rev_id,)
1119
 
            nodes.append((rev_key, 'value', ref_lists))
1120
 
            # We have a ref 'list' of length 1, with a list of parents, with 1
1121
 
            # parent which is a key
1122
 
            ref_lists = ((rev_key,),)
1123
 
            rev_keys.append(rev_key)
1124
 
        index = self.make_index(ref_lists=1, key_elements=1, nodes=nodes)
1125
 
        self.assertEqual(400, index.key_count())
1126
 
        self.assertEqual(3, len(index._row_offsets))
1127
 
        nodes = dict(index._read_nodes([1, 2]))
1128
 
        l1 = nodes[1]
1129
 
        l2 = nodes[2]
1130
 
        min_l2_key = l2.min_key
1131
 
        max_l1_key = l1.max_key
1132
 
        self.assertTrue(max_l1_key < min_l2_key)
1133
 
        parents_min_l2_key = l2[min_l2_key][1][0]
1134
 
        self.assertEqual((l1.max_key,), parents_min_l2_key)
1135
 
        # Now, whatever key we select that would fall on the second page,
1136
 
        # should give us all the parents until the page break
1137
 
        key_idx = rev_keys.index(min_l2_key)
1138
 
        next_key = rev_keys[key_idx+1]
1139
 
        # So now when we get the parent map, we should get the key we are
1140
 
        # looking for, min_l2_key, and then a reference to go look for the
1141
 
        # parent of that key
1142
 
        parent_map = {}
1143
 
        missing_keys = set()
1144
 
        search_keys = index._find_ancestors([next_key], 0, parent_map,
1145
 
                                            missing_keys)
1146
 
        self.assertEqual([min_l2_key, next_key], sorted(parent_map))
1147
 
        self.assertEqual(set(), missing_keys)
1148
 
        self.assertEqual(set([max_l1_key]), search_keys)
1149
 
        parent_map = {}
1150
 
        search_keys = index._find_ancestors([max_l1_key], 0, parent_map,
1151
 
                                            missing_keys)
1152
 
        self.assertEqual(l1.all_keys(), sorted(parent_map))
1153
 
        self.assertEqual(set(), missing_keys)
1154
 
        self.assertEqual(set(), search_keys)
1155
 
 
1156
 
    def test__find_ancestors_empty_index(self):
1157
 
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[])
1158
 
        parent_map = {}
1159
 
        missing_keys = set()
1160
 
        search_keys = index._find_ancestors([('one',), ('two',)], 0, parent_map,
1161
 
                                            missing_keys)
1162
 
        self.assertEqual(set(), search_keys)
1163
 
        self.assertEqual({}, parent_map)
1164
 
        self.assertEqual(set([('one',), ('two',)]), missing_keys)
1165
 
 
1166
 
    def test_supports_unlimited_cache(self):
1167
 
        builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
1168
 
        # We need enough nodes to cause a page split (so we have both an
1169
 
        # internal node and a couple leaf nodes. 500 seems to be enough.)
1170
 
        nodes = self.make_nodes(500, 1, 0)
1171
 
        for node in nodes:
1172
 
            builder.add_node(*node)
1173
 
        stream = builder.finish()
1174
 
        trans = self.get_transport()
1175
 
        size = trans.put_file('index', stream)
1176
 
        index = btree_index.BTreeGraphIndex(trans, 'index', size)
1177
 
        self.assertEqual(500, index.key_count())
1178
 
        # We have an internal node
1179
 
        self.assertEqual(2, len(index._row_lengths))
1180
 
        # We have at least 2 leaf nodes
1181
 
        self.assertTrue(index._row_lengths[-1] >= 2)
1182
 
        self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
1183
 
        self.assertEqual(btree_index._NODE_CACHE_SIZE,
1184
 
                         index._leaf_node_cache._max_cache)
1185
 
        self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
1186
 
        self.assertEqual(100, index._internal_node_cache._max_cache)
1187
 
        # No change if unlimited_cache=False is passed
1188
 
        index = btree_index.BTreeGraphIndex(trans, 'index', size,
1189
 
                                            unlimited_cache=False)
1190
 
        self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
1191
 
        self.assertEqual(btree_index._NODE_CACHE_SIZE,
1192
 
                         index._leaf_node_cache._max_cache)
1193
 
        self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
1194
 
        self.assertEqual(100, index._internal_node_cache._max_cache)
1195
 
        index = btree_index.BTreeGraphIndex(trans, 'index', size,
1196
 
                                            unlimited_cache=True)
1197
 
        self.assertIsInstance(index._leaf_node_cache, dict)
1198
 
        self.assertIs(type(index._internal_node_cache), dict)
1199
 
        # Exercise the lookup code
1200
 
        entries = set(index.iter_entries([n[0] for n in nodes]))
1201
 
        self.assertEqual(500, len(entries))
1202
 
 
1203
832
 
1204
833
class TestBTreeNodes(BTreeTestCase):
1205
834
 
1206
 
    scenarios = btreeparser_scenarios()
 
835
    def restore_parser(self):
 
836
        btree_index._btree_serializer = self.saved_parser
1207
837
 
1208
838
    def setUp(self):
1209
 
        super(TestBTreeNodes, self).setUp()
1210
 
        self.overrideAttr(btree_index, '_btree_serializer', self.parse_btree)
 
839
        BTreeTestCase.setUp(self)
 
840
        self.saved_parser = btree_index._btree_serializer
 
841
        self.addCleanup(self.restore_parser)
 
842
        btree_index._btree_serializer = self.parse_btree
1211
843
 
1212
844
    def test_LeafNode_1_0(self):
1213
845
        node_bytes = ("type=leaf\n"
1225
857
            ("2222222222222222222222222222222222222222",): ("value:2", ()),
1226
858
            ("3333333333333333333333333333333333333333",): ("value:3", ()),
1227
859
            ("4444444444444444444444444444444444444444",): ("value:4", ()),
1228
 
            }, dict(node.all_items()))
 
860
            }, node.keys)
1229
861
 
1230
862
    def test_LeafNode_2_2(self):
1231
863
        node_bytes = ("type=leaf\n"
1245
877
            ('11', '33'): ('value:3',
1246
878
                ((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22')))),
1247
879
            ('11', '44'): ('value:4', ((), (('11', 'ref00'),)))
1248
 
            }, dict(node.all_items()))
 
880
            }, node.keys)
1249
881
 
1250
882
    def test_InternalNode_1(self):
1251
883
        node_bytes = ("type=internal\n"
1286
918
            ('11', '33'): ('value:3',
1287
919
                ((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22')))),
1288
920
            ('11', '44'): ('value:4', ((), (('11', 'ref00'),)))
1289
 
            }, dict(node.all_items()))
 
921
            }, node.keys)
1290
922
 
1291
923
    def assertFlattened(self, expected, key, value, refs):
1292
924
        flat_key, flat_line = self.parse_btree._flatten_node(
1324
956
    def test_exists(self):
1325
957
        # This is just to let the user know if they don't have the feature
1326
958
        # available
1327
 
        self.requireFeature(compiled_btreeparser_feature)
 
959
        self.requireFeature(CompiledBtreeParserFeature)
1328
960
 
1329
961
 
1330
962
class TestMultiBisectRight(tests.TestCase):
1360
992
                                     (4, ['g', 'h'])],
1361
993
                                    ['a', 'b', 'd', 'e', 'g', 'h'],
1362
994
                                    ['c', 'd', 'f', 'g'])
1363
 
 
1364
 
 
1365
 
class TestExpandOffsets(tests.TestCase):
1366
 
 
1367
 
    def make_index(self, size, recommended_pages=None):
1368
 
        """Make an index with a generic size.
1369
 
 
1370
 
        This doesn't actually create anything on disk, it just primes a
1371
 
        BTreeGraphIndex with the recommended information.
1372
 
        """
1373
 
        index = btree_index.BTreeGraphIndex(
1374
 
            transport.get_transport_from_url('memory:///'),
1375
 
            'test-index', size=size)
1376
 
        if recommended_pages is not None:
1377
 
            index._recommended_pages = recommended_pages
1378
 
        return index
1379
 
 
1380
 
    def set_cached_offsets(self, index, cached_offsets):
1381
 
        """Monkeypatch to give a canned answer for _get_offsets_for...()."""
1382
 
        def _get_offsets_to_cached_pages():
1383
 
            cached = set(cached_offsets)
1384
 
            return cached
1385
 
        index._get_offsets_to_cached_pages = _get_offsets_to_cached_pages
1386
 
 
1387
 
    def prepare_index(self, index, node_ref_lists, key_length, key_count,
1388
 
                      row_lengths, cached_offsets):
1389
 
        """Setup the BTreeGraphIndex with some pre-canned information."""
1390
 
        index.node_ref_lists = node_ref_lists
1391
 
        index._key_length = key_length
1392
 
        index._key_count = key_count
1393
 
        index._row_lengths = row_lengths
1394
 
        index._compute_row_offsets()
1395
 
        index._root_node = btree_index._InternalNode('internal\noffset=0\n')
1396
 
        self.set_cached_offsets(index, cached_offsets)
1397
 
 
1398
 
    def make_100_node_index(self):
1399
 
        index = self.make_index(4096*100, 6)
1400
 
        # Consider we've already made a single request at the middle
1401
 
        self.prepare_index(index, node_ref_lists=0, key_length=1,
1402
 
                           key_count=1000, row_lengths=[1, 99],
1403
 
                           cached_offsets=[0, 50])
1404
 
        return index
1405
 
 
1406
 
    def make_1000_node_index(self):
1407
 
        index = self.make_index(4096*1000, 6)
1408
 
        # Pretend we've already made a single request in the middle
1409
 
        self.prepare_index(index, node_ref_lists=0, key_length=1,
1410
 
                           key_count=90000, row_lengths=[1, 9, 990],
1411
 
                           cached_offsets=[0, 5, 500])
1412
 
        return index
1413
 
 
1414
 
    def assertNumPages(self, expected_pages, index, size):
1415
 
        index._size = size
1416
 
        self.assertEqual(expected_pages, index._compute_total_pages_in_index())
1417
 
 
1418
 
    def assertExpandOffsets(self, expected, index, offsets):
1419
 
        self.assertEqual(expected, index._expand_offsets(offsets),
1420
 
                         'We did not get the expected value after expanding'
1421
 
                         ' %s' % (offsets,))
1422
 
 
1423
 
    def test_default_recommended_pages(self):
1424
 
        index = self.make_index(None)
1425
 
        # local transport recommends 4096 byte reads, which is 1 page
1426
 
        self.assertEqual(1, index._recommended_pages)
1427
 
 
1428
 
    def test__compute_total_pages_in_index(self):
1429
 
        index = self.make_index(None)
1430
 
        self.assertNumPages(1, index, 1024)
1431
 
        self.assertNumPages(1, index, 4095)
1432
 
        self.assertNumPages(1, index, 4096)
1433
 
        self.assertNumPages(2, index, 4097)
1434
 
        self.assertNumPages(2, index, 8192)
1435
 
        self.assertNumPages(76, index, 4096*75 + 10)
1436
 
 
1437
 
    def test__find_layer_start_and_stop(self):
1438
 
        index = self.make_1000_node_index()
1439
 
        self.assertEqual((0, 1), index._find_layer_first_and_end(0))
1440
 
        self.assertEqual((1, 10), index._find_layer_first_and_end(1))
1441
 
        self.assertEqual((1, 10), index._find_layer_first_and_end(9))
1442
 
        self.assertEqual((10, 1000), index._find_layer_first_and_end(10))
1443
 
        self.assertEqual((10, 1000), index._find_layer_first_and_end(99))
1444
 
        self.assertEqual((10, 1000), index._find_layer_first_and_end(999))
1445
 
 
1446
 
    def test_unknown_size(self):
1447
 
        # We should not expand if we don't know the file size
1448
 
        index = self.make_index(None, 10)
1449
 
        self.assertExpandOffsets([0], index, [0])
1450
 
        self.assertExpandOffsets([1, 4, 9], index, [1, 4, 9])
1451
 
 
1452
 
    def test_more_than_recommended(self):
1453
 
        index = self.make_index(4096*100, 2)
1454
 
        self.assertExpandOffsets([1, 10], index, [1, 10])
1455
 
        self.assertExpandOffsets([1, 10, 20], index, [1, 10, 20])
1456
 
 
1457
 
    def test_read_all_from_root(self):
1458
 
        index = self.make_index(4096*10, 20)
1459
 
        self.assertExpandOffsets(range(10), index, [0])
1460
 
 
1461
 
    def test_read_all_when_cached(self):
1462
 
        # We've read enough that we can grab all the rest in a single request
1463
 
        index = self.make_index(4096*10, 5)
1464
 
        self.prepare_index(index, node_ref_lists=0, key_length=1,
1465
 
                           key_count=1000, row_lengths=[1, 9],
1466
 
                           cached_offsets=[0, 1, 2, 5, 6])
1467
 
        # It should fill the remaining nodes, regardless of the one requested
1468
 
        self.assertExpandOffsets([3, 4, 7, 8, 9], index, [3])
1469
 
        self.assertExpandOffsets([3, 4, 7, 8, 9], index, [8])
1470
 
        self.assertExpandOffsets([3, 4, 7, 8, 9], index, [9])
1471
 
 
1472
 
    def test_no_root_node(self):
1473
 
        index = self.make_index(4096*10, 5)
1474
 
        self.assertExpandOffsets([0], index, [0])
1475
 
 
1476
 
    def test_include_neighbors(self):
1477
 
        index = self.make_100_node_index()
1478
 
        # We expand in both directions, until we have at least 'recommended'
1479
 
        # pages
1480
 
        self.assertExpandOffsets([9, 10, 11, 12, 13, 14, 15], index, [12])
1481
 
        self.assertExpandOffsets([88, 89, 90, 91, 92, 93, 94], index, [91])
1482
 
        # If we hit an 'edge' we continue in the other direction
1483
 
        self.assertExpandOffsets([1, 2, 3, 4, 5, 6], index, [2])
1484
 
        self.assertExpandOffsets([94, 95, 96, 97, 98, 99], index, [98])
1485
 
 
1486
 
        # Requesting many nodes will expand all locations equally
1487
 
        self.assertExpandOffsets([1, 2, 3, 80, 81, 82], index, [2, 81])
1488
 
        self.assertExpandOffsets([1, 2, 3, 9, 10, 11, 80, 81, 82], index,
1489
 
                               [2, 10, 81])
1490
 
 
1491
 
    def test_stop_at_cached(self):
1492
 
        index = self.make_100_node_index()
1493
 
        self.set_cached_offsets(index, [0, 10, 19])
1494
 
        self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [11])
1495
 
        self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [12])
1496
 
        self.assertExpandOffsets([12, 13, 14, 15, 16, 17, 18], index, [15])
1497
 
        self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [16])
1498
 
        self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [17])
1499
 
        self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [18])
1500
 
 
1501
 
    def test_cannot_fully_expand(self):
1502
 
        index = self.make_100_node_index()
1503
 
        self.set_cached_offsets(index, [0, 10, 12])
1504
 
        # We don't go into an endless loop if we are bound by cached nodes
1505
 
        self.assertExpandOffsets([11], index, [11])
1506
 
 
1507
 
    def test_overlap(self):
1508
 
        index = self.make_100_node_index()
1509
 
        self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [12, 13])
1510
 
        self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [11, 14])
1511
 
 
1512
 
    def test_stay_within_layer(self):
1513
 
        index = self.make_1000_node_index()
1514
 
        # When expanding a request, we won't read nodes from the next layer
1515
 
        self.assertExpandOffsets([1, 2, 3, 4], index, [2])
1516
 
        self.assertExpandOffsets([6, 7, 8, 9], index, [6])
1517
 
        self.assertExpandOffsets([6, 7, 8, 9], index, [9])
1518
 
        self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [10])
1519
 
        self.assertExpandOffsets([10, 11, 12, 13, 14, 15, 16], index, [13])
1520
 
 
1521
 
        self.set_cached_offsets(index, [0, 4, 12])
1522
 
        self.assertExpandOffsets([5, 6, 7, 8, 9], index, [7])
1523
 
        self.assertExpandOffsets([10, 11], index, [11])
1524
 
 
1525
 
    def test_small_requests_unexpanded(self):
1526
 
        index = self.make_100_node_index()
1527
 
        self.set_cached_offsets(index, [0])
1528
 
        self.assertExpandOffsets([1], index, [1])
1529
 
        self.assertExpandOffsets([50], index, [50])
1530
 
        # If we request more than one node, then we'll expand
1531
 
        self.assertExpandOffsets([49, 50, 51, 59, 60, 61], index, [50, 60])
1532
 
 
1533
 
        # The first pass does not expand
1534
 
        index = self.make_1000_node_index()
1535
 
        self.set_cached_offsets(index, [0])
1536
 
        self.assertExpandOffsets([1], index, [1])
1537
 
        self.set_cached_offsets(index, [0, 1])
1538
 
        self.assertExpandOffsets([100], index, [100])
1539
 
        self.set_cached_offsets(index, [0, 1, 100])
1540
 
        # But after the first depth, we will expand
1541
 
        self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [2])
1542
 
        self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [4])
1543
 
        self.set_cached_offsets(index, [0, 1, 2, 3, 4, 5, 6, 7, 100])
1544
 
        self.assertExpandOffsets([102, 103, 104, 105, 106, 107, 108], index,
1545
 
                                 [105])