~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_btree_index.py

  • Committer: Ian Clatworthy
  • Date: 2010-02-19 03:02:07 UTC
  • mto: (4797.23.1 integration-2.1)
  • mto: This revision was merged to the branch mainline in revision 5055.
  • Revision ID: ian.clatworthy@canonical.com-20100219030207-zpbzx021zavx4sqt
What's New in 2.1 - a summary of changes since 2.0

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2008 Canonical Ltd
 
1
# Copyright (C) 2008, 2009, 2010 Canonical Ltd
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
23
23
from bzrlib import (
24
24
    btree_index,
25
25
    errors,
 
26
    fifo_cache,
 
27
    lru_cache,
 
28
    osutils,
26
29
    tests,
27
30
    )
28
31
from bzrlib.tests import (
40
43
        condition_isinstance(TestBTreeNodes))
41
44
    import bzrlib._btree_serializer_py as py_module
42
45
    scenarios = [('python', {'parse_btree': py_module})]
43
 
    if CompiledBtreeParserFeature.available():
44
 
        # Is there a way to do this that gets missing feature failures rather
45
 
        # than no indication to the user?
46
 
        import bzrlib._btree_serializer_pyx as c_module
47
 
        scenarios.append(('C', {'parse_btree': c_module}))
 
46
    if compiled_btreeparser_feature.available():
 
47
        scenarios.append(('C', {'parse_btree':
 
48
                                compiled_btreeparser_feature.module}))
48
49
    return multiply_tests(node_tests, scenarios, others)
49
50
 
50
51
 
51
 
class _CompiledBtreeParserFeature(tests.Feature):
52
 
    def _probe(self):
53
 
        try:
54
 
            import bzrlib._btree_serializer_pyx
55
 
        except ImportError:
56
 
            return False
57
 
        return True
58
 
 
59
 
    def feature_name(self):
60
 
        return 'bzrlib._btree_serializer_pyx'
61
 
 
62
 
CompiledBtreeParserFeature = _CompiledBtreeParserFeature()
 
52
compiled_btreeparser_feature = tests.ModuleAvailableFeature(
 
53
                                'bzrlib._btree_serializer_pyx')
63
54
 
64
55
 
65
56
class BTreeTestCase(TestCaseWithTransport):
121
112
 
122
113
class TestBTreeBuilder(BTreeTestCase):
123
114
 
 
115
    def test_clear_cache(self):
 
116
        builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
 
117
        # This is a no-op, but we need the api to be consistent with other
 
118
        # BTreeGraphIndex apis.
 
119
        builder.clear_cache()
 
120
 
124
121
    def test_empty_1_0(self):
125
122
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
126
123
        # NamedTemporaryFile dies on builder.finish().read(). weird.
152
149
        temp_file = builder.finish()
153
150
        content = temp_file.read()
154
151
        del temp_file
155
 
        self.assertEqual(158, len(content))
 
152
        self.assertEqual(131, len(content))
156
153
        self.assertEqual(
157
154
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=5\n"
158
155
            "row_lengths=1\n",
176
173
        temp_file = builder.finish()
177
174
        content = temp_file.read()
178
175
        del temp_file
179
 
        self.assertEqual(264, len(content))
 
176
        self.assertEqual(238, len(content))
180
177
        self.assertEqual(
181
178
            "B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=10\n"
182
179
            "row_lengths=1\n",
242
239
        temp_file = builder.finish()
243
240
        content = temp_file.read()
244
241
        del temp_file
245
 
        self.assertEqual(181, len(content))
 
242
        self.assertEqual(155, len(content))
246
243
        self.assertEqual(
247
244
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=10\n"
248
245
            "row_lengths=1\n",
350
347
        # Test the parts of the index that take up memory are doing so
351
348
        # predictably.
352
349
        self.assertEqual(1, len(builder._nodes))
353
 
        self.assertEqual(1, len(builder._keys))
354
350
        self.assertIs(None, builder._nodes_by_key)
355
351
        builder.add_node(*nodes[1])
356
352
        self.assertEqual(0, len(builder._nodes))
357
 
        self.assertEqual(0, len(builder._keys))
358
353
        self.assertIs(None, builder._nodes_by_key)
359
354
        self.assertEqual(1, len(builder._backing_indices))
360
355
        self.assertEqual(2, builder._backing_indices[0].key_count())
361
356
        # now back to memory
362
357
        builder.add_node(*nodes[2])
363
358
        self.assertEqual(1, len(builder._nodes))
364
 
        self.assertEqual(1, len(builder._keys))
365
359
        self.assertIs(None, builder._nodes_by_key)
366
360
        # And spills to a second backing index combing all
367
361
        builder.add_node(*nodes[3])
368
362
        self.assertEqual(0, len(builder._nodes))
369
 
        self.assertEqual(0, len(builder._keys))
370
363
        self.assertIs(None, builder._nodes_by_key)
371
364
        self.assertEqual(2, len(builder._backing_indices))
372
365
        self.assertEqual(None, builder._backing_indices[0])
375
368
        builder.add_node(*nodes[4])
376
369
        builder.add_node(*nodes[5])
377
370
        self.assertEqual(0, len(builder._nodes))
378
 
        self.assertEqual(0, len(builder._keys))
379
371
        self.assertIs(None, builder._nodes_by_key)
380
372
        self.assertEqual(2, len(builder._backing_indices))
381
373
        self.assertEqual(2, builder._backing_indices[0].key_count())
439
431
        # Test the parts of the index that take up memory are doing so
440
432
        # predictably.
441
433
        self.assertEqual(1, len(builder._nodes))
442
 
        self.assertEqual(1, len(builder._keys))
443
434
        self.assertIs(None, builder._nodes_by_key)
444
435
        builder.add_node(*nodes[1])
445
436
        self.assertEqual(0, len(builder._nodes))
446
 
        self.assertEqual(0, len(builder._keys))
447
437
        self.assertIs(None, builder._nodes_by_key)
448
438
        self.assertEqual(1, len(builder._backing_indices))
449
439
        self.assertEqual(2, builder._backing_indices[0].key_count())
450
440
        # now back to memory
451
441
        builder.add_node(*nodes[2])
452
442
        self.assertEqual(1, len(builder._nodes))
453
 
        self.assertEqual(1, len(builder._keys))
454
443
        self.assertIs(None, builder._nodes_by_key)
455
444
        # And spills to a second backing index but doesn't combine
456
445
        builder.add_node(*nodes[3])
457
446
        self.assertEqual(0, len(builder._nodes))
458
 
        self.assertEqual(0, len(builder._keys))
459
447
        self.assertIs(None, builder._nodes_by_key)
460
448
        self.assertEqual(2, len(builder._backing_indices))
461
449
        for backing_index in builder._backing_indices:
464
452
        builder.add_node(*nodes[4])
465
453
        builder.add_node(*nodes[5])
466
454
        self.assertEqual(0, len(builder._nodes))
467
 
        self.assertEqual(0, len(builder._keys))
468
455
        self.assertIs(None, builder._nodes_by_key)
469
456
        self.assertEqual(3, len(builder._backing_indices))
470
457
        for backing_index in builder._backing_indices:
529
516
        builder.add_node(*nodes[0])
530
517
        # Test the parts of the index that take up memory are doing so
531
518
        # predictably.
532
 
        self.assertEqual(1, len(builder._keys))
533
519
        self.assertEqual(1, len(builder._nodes))
534
520
        self.assertIs(None, builder._nodes_by_key)
535
521
        builder.add_node(*nodes[1])
536
 
        self.assertEqual(0, len(builder._keys))
537
522
        self.assertEqual(0, len(builder._nodes))
538
523
        self.assertIs(None, builder._nodes_by_key)
539
524
        self.assertEqual(1, len(builder._backing_indices))
542
527
        old = dict(builder._get_nodes_by_key()) #Build up the nodes by key dict
543
528
        builder.add_node(*nodes[2])
544
529
        self.assertEqual(1, len(builder._nodes))
545
 
        self.assertEqual(1, len(builder._keys))
546
530
        self.assertIsNot(None, builder._nodes_by_key)
547
531
        self.assertNotEqual({}, builder._nodes_by_key)
548
532
        # We should have a new entry
550
534
        # And spills to a second backing index combing all
551
535
        builder.add_node(*nodes[3])
552
536
        self.assertEqual(0, len(builder._nodes))
553
 
        self.assertEqual(0, len(builder._keys))
554
537
        self.assertIs(None, builder._nodes_by_key)
555
538
        self.assertEqual(2, len(builder._backing_indices))
556
539
        self.assertEqual(None, builder._backing_indices[0])
559
542
        builder.add_node(*nodes[4])
560
543
        builder.add_node(*nodes[5])
561
544
        self.assertEqual(0, len(builder._nodes))
562
 
        self.assertEqual(0, len(builder._keys))
563
545
        self.assertIs(None, builder._nodes_by_key)
564
546
        self.assertEqual(2, len(builder._backing_indices))
565
547
        self.assertEqual(2, builder._backing_indices[0].key_count())
636
618
        size = trans.put_file('index', stream)
637
619
        return btree_index.BTreeGraphIndex(trans, 'index', size)
638
620
 
 
621
    def test_clear_cache(self):
 
622
        nodes = self.make_nodes(160, 2, 2)
 
623
        index = self.make_index(ref_lists=2, key_elements=2, nodes=nodes)
 
624
        self.assertEqual(1, len(list(index.iter_entries([nodes[30][0]]))))
 
625
        self.assertEqual([1, 4], index._row_lengths)
 
626
        self.assertIsNot(None, index._root_node)
 
627
        internal_node_pre_clear = index._internal_node_cache.keys()
 
628
        self.assertTrue(len(index._leaf_node_cache) > 0)
 
629
        index.clear_cache()
 
630
        # We don't touch _root_node or _internal_node_cache, both should be
 
631
        # small, and can save a round trip or two
 
632
        self.assertIsNot(None, index._root_node)
 
633
        # NOTE: We don't want to affect the _internal_node_cache, as we expect
 
634
        #       it will be small, and if we ever do touch this index again, it
 
635
        #       will save round-trips.  This assertion isn't very strong,
 
636
        #       becuase without a 3-level index, we don't have any internal
 
637
        #       nodes cached.
 
638
        self.assertEqual(internal_node_pre_clear,
 
639
                         index._internal_node_cache.keys())
 
640
        self.assertEqual(0, len(index._leaf_node_cache))
 
641
 
639
642
    def test_trivial_constructor(self):
640
643
        transport = get_transport('trace+' + self.get_url(''))
641
644
        index = btree_index.BTreeGraphIndex(transport, 'index', None)
688
691
        # The entire index should have been read, as it is one page long.
689
692
        self.assertEqual([('readv', 'index', [(0, size)], False, None)],
690
693
            transport._activity)
691
 
        self.assertEqual(1199, size)
 
694
        self.assertEqual(1173, size)
692
695
 
693
696
    def test__read_nodes_no_size_one_page_reads_once(self):
694
697
        self.make_index(nodes=[(('key',), 'value', ())])
742
745
        # The entire index should have been read linearly.
743
746
        self.assertEqual([('readv', 'index', [(0, size)], False, None)],
744
747
            transport._activity)
745
 
        self.assertEqual(1514, size)
 
748
        self.assertEqual(1488, size)
746
749
 
747
750
    def test_validate_two_pages(self):
748
751
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
980
983
            ])
981
984
        self.assertEqual(set([]), index.external_references(0))
982
985
 
 
986
    def test__find_ancestors_one_page(self):
 
987
        key1 = ('key-1',)
 
988
        key2 = ('key-2',)
 
989
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
 
990
            (key1, 'value', ([key2],)),
 
991
            (key2, 'value', ([],)),
 
992
            ])
 
993
        parent_map = {}
 
994
        missing_keys = set()
 
995
        search_keys = index._find_ancestors([key1], 0, parent_map, missing_keys)
 
996
        self.assertEqual({key1: (key2,), key2: ()}, parent_map)
 
997
        self.assertEqual(set(), missing_keys)
 
998
        self.assertEqual(set(), search_keys)
 
999
 
 
1000
    def test__find_ancestors_one_page_w_missing(self):
 
1001
        key1 = ('key-1',)
 
1002
        key2 = ('key-2',)
 
1003
        key3 = ('key-3',)
 
1004
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
 
1005
            (key1, 'value', ([key2],)),
 
1006
            (key2, 'value', ([],)),
 
1007
            ])
 
1008
        parent_map = {}
 
1009
        missing_keys = set()
 
1010
        search_keys = index._find_ancestors([key2, key3], 0, parent_map,
 
1011
                                            missing_keys)
 
1012
        self.assertEqual({key2: ()}, parent_map)
 
1013
        # we know that key3 is missing because we read the page that it would
 
1014
        # otherwise be on
 
1015
        self.assertEqual(set([key3]), missing_keys)
 
1016
        self.assertEqual(set(), search_keys)
 
1017
 
 
1018
    def test__find_ancestors_one_parent_missing(self):
 
1019
        key1 = ('key-1',)
 
1020
        key2 = ('key-2',)
 
1021
        key3 = ('key-3',)
 
1022
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
 
1023
            (key1, 'value', ([key2],)),
 
1024
            (key2, 'value', ([key3],)),
 
1025
            ])
 
1026
        parent_map = {}
 
1027
        missing_keys = set()
 
1028
        search_keys = index._find_ancestors([key1], 0, parent_map,
 
1029
                                            missing_keys)
 
1030
        self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
 
1031
        self.assertEqual(set(), missing_keys)
 
1032
        # all we know is that key3 wasn't present on the page we were reading
 
1033
        # but if you look, the last key is key2 which comes before key3, so we
 
1034
        # don't know whether key3 would land on this page or not.
 
1035
        self.assertEqual(set([key3]), search_keys)
 
1036
        search_keys = index._find_ancestors(search_keys, 0, parent_map,
 
1037
                                            missing_keys)
 
1038
        # passing it back in, we are sure it is 'missing'
 
1039
        self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
 
1040
        self.assertEqual(set([key3]), missing_keys)
 
1041
        self.assertEqual(set([]), search_keys)
 
1042
 
 
1043
    def test__find_ancestors_dont_search_known(self):
 
1044
        key1 = ('key-1',)
 
1045
        key2 = ('key-2',)
 
1046
        key3 = ('key-3',)
 
1047
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
 
1048
            (key1, 'value', ([key2],)),
 
1049
            (key2, 'value', ([key3],)),
 
1050
            (key3, 'value', ([],)),
 
1051
            ])
 
1052
        # We already know about key2, so we won't try to search for key3
 
1053
        parent_map = {key2: (key3,)}
 
1054
        missing_keys = set()
 
1055
        search_keys = index._find_ancestors([key1], 0, parent_map,
 
1056
                                            missing_keys)
 
1057
        self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
 
1058
        self.assertEqual(set(), missing_keys)
 
1059
        self.assertEqual(set(), search_keys)
 
1060
 
 
1061
    def test__find_ancestors_multiple_pages(self):
 
1062
        # We need to use enough keys that we actually cause a split
 
1063
        start_time = 1249671539
 
1064
        email = "joebob@example.com"
 
1065
        nodes = []
 
1066
        ref_lists = ((),)
 
1067
        rev_keys = []
 
1068
        for i in xrange(400):
 
1069
            rev_id = '%s-%s-%s' % (email,
 
1070
                                   osutils.compact_date(start_time + i),
 
1071
                                   osutils.rand_chars(16))
 
1072
            rev_key = (rev_id,)
 
1073
            nodes.append((rev_key, 'value', ref_lists))
 
1074
            # We have a ref 'list' of length 1, with a list of parents, with 1
 
1075
            # parent which is a key
 
1076
            ref_lists = ((rev_key,),)
 
1077
            rev_keys.append(rev_key)
 
1078
        index = self.make_index(ref_lists=1, key_elements=1, nodes=nodes)
 
1079
        self.assertEqual(400, index.key_count())
 
1080
        self.assertEqual(3, len(index._row_offsets))
 
1081
        nodes = dict(index._read_nodes([1, 2]))
 
1082
        l1 = nodes[1]
 
1083
        l2 = nodes[2]
 
1084
        min_l2_key = l2.min_key
 
1085
        max_l1_key = l1.max_key
 
1086
        self.assertTrue(max_l1_key < min_l2_key)
 
1087
        parents_min_l2_key = l2.keys[min_l2_key][1][0]
 
1088
        self.assertEqual((l1.max_key,), parents_min_l2_key)
 
1089
        # Now, whatever key we select that would fall on the second page,
 
1090
        # should give us all the parents until the page break
 
1091
        key_idx = rev_keys.index(min_l2_key)
 
1092
        next_key = rev_keys[key_idx+1]
 
1093
        # So now when we get the parent map, we should get the key we are
 
1094
        # looking for, min_l2_key, and then a reference to go look for the
 
1095
        # parent of that key
 
1096
        parent_map = {}
 
1097
        missing_keys = set()
 
1098
        search_keys = index._find_ancestors([next_key], 0, parent_map,
 
1099
                                            missing_keys)
 
1100
        self.assertEqual([min_l2_key, next_key], sorted(parent_map))
 
1101
        self.assertEqual(set(), missing_keys)
 
1102
        self.assertEqual(set([max_l1_key]), search_keys)
 
1103
        parent_map = {}
 
1104
        search_keys = index._find_ancestors([max_l1_key], 0, parent_map,
 
1105
                                            missing_keys)
 
1106
        self.assertEqual(sorted(l1.keys), sorted(parent_map))
 
1107
        self.assertEqual(set(), missing_keys)
 
1108
        self.assertEqual(set(), search_keys)
 
1109
 
 
1110
    def test__find_ancestors_empty_index(self):
 
1111
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[])
 
1112
        parent_map = {}
 
1113
        missing_keys = set()
 
1114
        search_keys = index._find_ancestors([('one',), ('two',)], 0, parent_map,
 
1115
                                            missing_keys)
 
1116
        self.assertEqual(set(), search_keys)
 
1117
        self.assertEqual({}, parent_map)
 
1118
        self.assertEqual(set([('one',), ('two',)]), missing_keys)
 
1119
 
 
1120
    def test_supports_unlimited_cache(self):
 
1121
        builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
 
1122
        # We need enough nodes to cause a page split (so we have both an
 
1123
        # internal node and a couple leaf nodes. 500 seems to be enough.)
 
1124
        nodes = self.make_nodes(500, 1, 0)
 
1125
        for node in nodes:
 
1126
            builder.add_node(*node)
 
1127
        stream = builder.finish()
 
1128
        trans = get_transport(self.get_url())
 
1129
        size = trans.put_file('index', stream)
 
1130
        index = btree_index.BTreeGraphIndex(trans, 'index', size)
 
1131
        self.assertEqual(500, index.key_count())
 
1132
        # We have an internal node
 
1133
        self.assertEqual(2, len(index._row_lengths))
 
1134
        # We have at least 2 leaf nodes
 
1135
        self.assertTrue(index._row_lengths[-1] >= 2)
 
1136
        self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
 
1137
        self.assertEqual(btree_index._NODE_CACHE_SIZE,
 
1138
                         index._leaf_node_cache._max_cache)
 
1139
        self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
 
1140
        self.assertEqual(100, index._internal_node_cache._max_cache)
 
1141
        # No change if unlimited_cache=False is passed
 
1142
        index = btree_index.BTreeGraphIndex(trans, 'index', size,
 
1143
                                            unlimited_cache=False)
 
1144
        self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
 
1145
        self.assertEqual(btree_index._NODE_CACHE_SIZE,
 
1146
                         index._leaf_node_cache._max_cache)
 
1147
        self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
 
1148
        self.assertEqual(100, index._internal_node_cache._max_cache)
 
1149
        index = btree_index.BTreeGraphIndex(trans, 'index', size,
 
1150
                                            unlimited_cache=True)
 
1151
        self.assertIsInstance(index._leaf_node_cache, dict)
 
1152
        self.assertIs(type(index._internal_node_cache), dict)
 
1153
        # Exercise the lookup code
 
1154
        entries = set(index.iter_entries([n[0] for n in nodes]))
 
1155
        self.assertEqual(500, len(entries))
 
1156
 
983
1157
 
984
1158
class TestBTreeNodes(BTreeTestCase):
985
1159
 
1107
1281
    def test_exists(self):
1108
1282
        # This is just to let the user know if they don't have the feature
1109
1283
        # available
1110
 
        self.requireFeature(CompiledBtreeParserFeature)
 
1284
        self.requireFeature(compiled_btreeparser_feature)
1111
1285
 
1112
1286
 
1113
1287
class TestMultiBisectRight(tests.TestCase):