~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_btree_index.py

  • Committer: Sidnei da Silva
  • Date: 2009-05-29 14:19:29 UTC
  • mto: (4531.1.1 integration)
  • mto: This revision was merged to the branch mainline in revision 4532.
  • Revision ID: sidnei.da.silva@canonical.com-20090529141929-3heywbvj36po72a5
- Add initial config

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2008, 2009 Canonical Ltd
 
1
# Copyright (C) 2008 Canonical Ltd
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
23
23
from bzrlib import (
24
24
    btree_index,
25
25
    errors,
26
 
    fifo_cache,
27
 
    lru_cache,
28
 
    osutils,
29
26
    tests,
30
27
    )
31
28
from bzrlib.tests import (
43
40
        condition_isinstance(TestBTreeNodes))
44
41
    import bzrlib._btree_serializer_py as py_module
45
42
    scenarios = [('python', {'parse_btree': py_module})]
46
 
    if compiled_btreeparser_feature.available():
47
 
        scenarios.append(('C', {'parse_btree':
48
 
                                compiled_btreeparser_feature.module}))
 
43
    if CompiledBtreeParserFeature.available():
 
44
        # Is there a way to do this that gets missing feature failures rather
 
45
        # than no indication to the user?
 
46
        import bzrlib._btree_serializer_c as c_module
 
47
        scenarios.append(('C', {'parse_btree': c_module}))
49
48
    return multiply_tests(node_tests, scenarios, others)
50
49
 
51
50
 
52
 
compiled_btreeparser_feature = tests.ModuleAvailableFeature(
53
 
                                'bzrlib._btree_serializer_pyx')
 
51
class _CompiledBtreeParserFeature(tests.Feature):
 
52
    def _probe(self):
 
53
        try:
 
54
            import bzrlib._btree_serializer_c
 
55
        except ImportError:
 
56
            return False
 
57
        return True
 
58
 
 
59
    def feature_name(self):
 
60
        return 'bzrlib._btree_serializer_c'
 
61
 
 
62
CompiledBtreeParserFeature = _CompiledBtreeParserFeature()
54
63
 
55
64
 
56
65
class BTreeTestCase(TestCaseWithTransport):
59
68
 
60
69
    def setUp(self):
61
70
        TestCaseWithTransport.setUp(self)
62
 
        self.overrideAttr(btree_index, '_RESERVED_HEADER_BYTES', 100)
 
71
        self._original_header = btree_index._RESERVED_HEADER_BYTES
 
72
        def restore():
 
73
            btree_index._RESERVED_HEADER_BYTES = self._original_header
 
74
        self.addCleanup(restore)
 
75
        btree_index._RESERVED_HEADER_BYTES = 100
63
76
 
64
77
    def make_nodes(self, count, key_elements, reference_lists):
65
78
        """Generate count*key_elements sample nodes."""
99
112
 
100
113
    def shrink_page_size(self):
101
114
        """Shrink the default page size so that less fits in a page."""
102
 
        self.overrideAttr(btree_index, '_PAGE_SIZE')
 
115
        old_page_size = btree_index._PAGE_SIZE
 
116
        def cleanup():
 
117
            btree_index._PAGE_SIZE = old_page_size
 
118
        self.addCleanup(cleanup)
103
119
        btree_index._PAGE_SIZE = 2048
104
120
 
105
121
 
106
122
class TestBTreeBuilder(BTreeTestCase):
107
123
 
108
 
    def test_clear_cache(self):
109
 
        builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
110
 
        # This is a no-op, but we need the api to be consistent with other
111
 
        # BTreeGraphIndex apis.
112
 
        builder.clear_cache()
113
 
 
114
124
    def test_empty_1_0(self):
115
125
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
116
126
        # NamedTemporaryFile dies on builder.finish().read(). weird.
142
152
        temp_file = builder.finish()
143
153
        content = temp_file.read()
144
154
        del temp_file
145
 
        self.assertEqual(131, len(content))
 
155
        self.assertEqual(158, len(content))
146
156
        self.assertEqual(
147
157
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=5\n"
148
158
            "row_lengths=1\n",
166
176
        temp_file = builder.finish()
167
177
        content = temp_file.read()
168
178
        del temp_file
169
 
        self.assertEqual(238, len(content))
 
179
        self.assertEqual(264, len(content))
170
180
        self.assertEqual(
171
181
            "B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=10\n"
172
182
            "row_lengths=1\n",
232
242
        temp_file = builder.finish()
233
243
        content = temp_file.read()
234
244
        del temp_file
235
 
        self.assertEqual(155, len(content))
 
245
        self.assertEqual(181, len(content))
236
246
        self.assertEqual(
237
247
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=10\n"
238
248
            "row_lengths=1\n",
340
350
        # Test the parts of the index that take up memory are doing so
341
351
        # predictably.
342
352
        self.assertEqual(1, len(builder._nodes))
 
353
        self.assertEqual(1, len(builder._keys))
343
354
        self.assertIs(None, builder._nodes_by_key)
344
355
        builder.add_node(*nodes[1])
345
356
        self.assertEqual(0, len(builder._nodes))
 
357
        self.assertEqual(0, len(builder._keys))
346
358
        self.assertIs(None, builder._nodes_by_key)
347
359
        self.assertEqual(1, len(builder._backing_indices))
348
360
        self.assertEqual(2, builder._backing_indices[0].key_count())
349
361
        # now back to memory
350
362
        builder.add_node(*nodes[2])
351
363
        self.assertEqual(1, len(builder._nodes))
 
364
        self.assertEqual(1, len(builder._keys))
352
365
        self.assertIs(None, builder._nodes_by_key)
353
366
        # And spills to a second backing index combing all
354
367
        builder.add_node(*nodes[3])
355
368
        self.assertEqual(0, len(builder._nodes))
 
369
        self.assertEqual(0, len(builder._keys))
356
370
        self.assertIs(None, builder._nodes_by_key)
357
371
        self.assertEqual(2, len(builder._backing_indices))
358
372
        self.assertEqual(None, builder._backing_indices[0])
361
375
        builder.add_node(*nodes[4])
362
376
        builder.add_node(*nodes[5])
363
377
        self.assertEqual(0, len(builder._nodes))
 
378
        self.assertEqual(0, len(builder._keys))
364
379
        self.assertIs(None, builder._nodes_by_key)
365
380
        self.assertEqual(2, len(builder._backing_indices))
366
381
        self.assertEqual(2, builder._backing_indices[0].key_count())
424
439
        # Test the parts of the index that take up memory are doing so
425
440
        # predictably.
426
441
        self.assertEqual(1, len(builder._nodes))
 
442
        self.assertEqual(1, len(builder._keys))
427
443
        self.assertIs(None, builder._nodes_by_key)
428
444
        builder.add_node(*nodes[1])
429
445
        self.assertEqual(0, len(builder._nodes))
 
446
        self.assertEqual(0, len(builder._keys))
430
447
        self.assertIs(None, builder._nodes_by_key)
431
448
        self.assertEqual(1, len(builder._backing_indices))
432
449
        self.assertEqual(2, builder._backing_indices[0].key_count())
433
450
        # now back to memory
434
451
        builder.add_node(*nodes[2])
435
452
        self.assertEqual(1, len(builder._nodes))
 
453
        self.assertEqual(1, len(builder._keys))
436
454
        self.assertIs(None, builder._nodes_by_key)
437
455
        # And spills to a second backing index but doesn't combine
438
456
        builder.add_node(*nodes[3])
439
457
        self.assertEqual(0, len(builder._nodes))
 
458
        self.assertEqual(0, len(builder._keys))
440
459
        self.assertIs(None, builder._nodes_by_key)
441
460
        self.assertEqual(2, len(builder._backing_indices))
442
461
        for backing_index in builder._backing_indices:
445
464
        builder.add_node(*nodes[4])
446
465
        builder.add_node(*nodes[5])
447
466
        self.assertEqual(0, len(builder._nodes))
 
467
        self.assertEqual(0, len(builder._keys))
448
468
        self.assertIs(None, builder._nodes_by_key)
449
469
        self.assertEqual(3, len(builder._backing_indices))
450
470
        for backing_index in builder._backing_indices:
509
529
        builder.add_node(*nodes[0])
510
530
        # Test the parts of the index that take up memory are doing so
511
531
        # predictably.
 
532
        self.assertEqual(1, len(builder._keys))
512
533
        self.assertEqual(1, len(builder._nodes))
513
534
        self.assertIs(None, builder._nodes_by_key)
514
535
        builder.add_node(*nodes[1])
 
536
        self.assertEqual(0, len(builder._keys))
515
537
        self.assertEqual(0, len(builder._nodes))
516
538
        self.assertIs(None, builder._nodes_by_key)
517
539
        self.assertEqual(1, len(builder._backing_indices))
520
542
        old = dict(builder._get_nodes_by_key()) #Build up the nodes by key dict
521
543
        builder.add_node(*nodes[2])
522
544
        self.assertEqual(1, len(builder._nodes))
 
545
        self.assertEqual(1, len(builder._keys))
523
546
        self.assertIsNot(None, builder._nodes_by_key)
524
547
        self.assertNotEqual({}, builder._nodes_by_key)
525
548
        # We should have a new entry
527
550
        # And spills to a second backing index combing all
528
551
        builder.add_node(*nodes[3])
529
552
        self.assertEqual(0, len(builder._nodes))
 
553
        self.assertEqual(0, len(builder._keys))
530
554
        self.assertIs(None, builder._nodes_by_key)
531
555
        self.assertEqual(2, len(builder._backing_indices))
532
556
        self.assertEqual(None, builder._backing_indices[0])
535
559
        builder.add_node(*nodes[4])
536
560
        builder.add_node(*nodes[5])
537
561
        self.assertEqual(0, len(builder._nodes))
 
562
        self.assertEqual(0, len(builder._keys))
538
563
        self.assertIs(None, builder._nodes_by_key)
539
564
        self.assertEqual(2, len(builder._backing_indices))
540
565
        self.assertEqual(2, builder._backing_indices[0].key_count())
611
636
        size = trans.put_file('index', stream)
612
637
        return btree_index.BTreeGraphIndex(trans, 'index', size)
613
638
 
614
 
    def test_clear_cache(self):
615
 
        nodes = self.make_nodes(160, 2, 2)
616
 
        index = self.make_index(ref_lists=2, key_elements=2, nodes=nodes)
617
 
        self.assertEqual(1, len(list(index.iter_entries([nodes[30][0]]))))
618
 
        self.assertEqual([1, 4], index._row_lengths)
619
 
        self.assertIsNot(None, index._root_node)
620
 
        internal_node_pre_clear = index._internal_node_cache.keys()
621
 
        self.assertTrue(len(index._leaf_node_cache) > 0)
622
 
        index.clear_cache()
623
 
        # We don't touch _root_node or _internal_node_cache, both should be
624
 
        # small, and can save a round trip or two
625
 
        self.assertIsNot(None, index._root_node)
626
 
        # NOTE: We don't want to affect the _internal_node_cache, as we expect
627
 
        #       it will be small, and if we ever do touch this index again, it
628
 
        #       will save round-trips.  This assertion isn't very strong,
629
 
        #       becuase without a 3-level index, we don't have any internal
630
 
        #       nodes cached.
631
 
        self.assertEqual(internal_node_pre_clear,
632
 
                         index._internal_node_cache.keys())
633
 
        self.assertEqual(0, len(index._leaf_node_cache))
634
 
 
635
639
    def test_trivial_constructor(self):
636
640
        transport = get_transport('trace+' + self.get_url(''))
637
641
        index = btree_index.BTreeGraphIndex(transport, 'index', None)
684
688
        # The entire index should have been read, as it is one page long.
685
689
        self.assertEqual([('readv', 'index', [(0, size)], False, None)],
686
690
            transport._activity)
687
 
        self.assertEqual(1173, size)
 
691
        self.assertEqual(1199, size)
688
692
 
689
693
    def test__read_nodes_no_size_one_page_reads_once(self):
690
694
        self.make_index(nodes=[(('key',), 'value', ())])
738
742
        # The entire index should have been read linearly.
739
743
        self.assertEqual([('readv', 'index', [(0, size)], False, None)],
740
744
            transport._activity)
741
 
        self.assertEqual(1488, size)
 
745
        self.assertEqual(1514, size)
742
746
 
743
747
    def test_validate_two_pages(self):
744
748
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
976
980
            ])
977
981
        self.assertEqual(set([]), index.external_references(0))
978
982
 
979
 
    def test__find_ancestors_one_page(self):
980
 
        key1 = ('key-1',)
981
 
        key2 = ('key-2',)
982
 
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
983
 
            (key1, 'value', ([key2],)),
984
 
            (key2, 'value', ([],)),
985
 
            ])
986
 
        parent_map = {}
987
 
        missing_keys = set()
988
 
        search_keys = index._find_ancestors([key1], 0, parent_map, missing_keys)
989
 
        self.assertEqual({key1: (key2,), key2: ()}, parent_map)
990
 
        self.assertEqual(set(), missing_keys)
991
 
        self.assertEqual(set(), search_keys)
992
 
 
993
 
    def test__find_ancestors_one_page_w_missing(self):
994
 
        key1 = ('key-1',)
995
 
        key2 = ('key-2',)
996
 
        key3 = ('key-3',)
997
 
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
998
 
            (key1, 'value', ([key2],)),
999
 
            (key2, 'value', ([],)),
1000
 
            ])
1001
 
        parent_map = {}
1002
 
        missing_keys = set()
1003
 
        search_keys = index._find_ancestors([key2, key3], 0, parent_map,
1004
 
                                            missing_keys)
1005
 
        self.assertEqual({key2: ()}, parent_map)
1006
 
        # we know that key3 is missing because we read the page that it would
1007
 
        # otherwise be on
1008
 
        self.assertEqual(set([key3]), missing_keys)
1009
 
        self.assertEqual(set(), search_keys)
1010
 
 
1011
 
    def test__find_ancestors_one_parent_missing(self):
1012
 
        key1 = ('key-1',)
1013
 
        key2 = ('key-2',)
1014
 
        key3 = ('key-3',)
1015
 
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1016
 
            (key1, 'value', ([key2],)),
1017
 
            (key2, 'value', ([key3],)),
1018
 
            ])
1019
 
        parent_map = {}
1020
 
        missing_keys = set()
1021
 
        search_keys = index._find_ancestors([key1], 0, parent_map,
1022
 
                                            missing_keys)
1023
 
        self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1024
 
        self.assertEqual(set(), missing_keys)
1025
 
        # all we know is that key3 wasn't present on the page we were reading
1026
 
        # but if you look, the last key is key2 which comes before key3, so we
1027
 
        # don't know whether key3 would land on this page or not.
1028
 
        self.assertEqual(set([key3]), search_keys)
1029
 
        search_keys = index._find_ancestors(search_keys, 0, parent_map,
1030
 
                                            missing_keys)
1031
 
        # passing it back in, we are sure it is 'missing'
1032
 
        self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1033
 
        self.assertEqual(set([key3]), missing_keys)
1034
 
        self.assertEqual(set([]), search_keys)
1035
 
 
1036
 
    def test__find_ancestors_dont_search_known(self):
1037
 
        key1 = ('key-1',)
1038
 
        key2 = ('key-2',)
1039
 
        key3 = ('key-3',)
1040
 
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1041
 
            (key1, 'value', ([key2],)),
1042
 
            (key2, 'value', ([key3],)),
1043
 
            (key3, 'value', ([],)),
1044
 
            ])
1045
 
        # We already know about key2, so we won't try to search for key3
1046
 
        parent_map = {key2: (key3,)}
1047
 
        missing_keys = set()
1048
 
        search_keys = index._find_ancestors([key1], 0, parent_map,
1049
 
                                            missing_keys)
1050
 
        self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1051
 
        self.assertEqual(set(), missing_keys)
1052
 
        self.assertEqual(set(), search_keys)
1053
 
 
1054
 
    def test__find_ancestors_multiple_pages(self):
1055
 
        # We need to use enough keys that we actually cause a split
1056
 
        start_time = 1249671539
1057
 
        email = "joebob@example.com"
1058
 
        nodes = []
1059
 
        ref_lists = ((),)
1060
 
        rev_keys = []
1061
 
        for i in xrange(400):
1062
 
            rev_id = '%s-%s-%s' % (email,
1063
 
                                   osutils.compact_date(start_time + i),
1064
 
                                   osutils.rand_chars(16))
1065
 
            rev_key = (rev_id,)
1066
 
            nodes.append((rev_key, 'value', ref_lists))
1067
 
            # We have a ref 'list' of length 1, with a list of parents, with 1
1068
 
            # parent which is a key
1069
 
            ref_lists = ((rev_key,),)
1070
 
            rev_keys.append(rev_key)
1071
 
        index = self.make_index(ref_lists=1, key_elements=1, nodes=nodes)
1072
 
        self.assertEqual(400, index.key_count())
1073
 
        self.assertEqual(3, len(index._row_offsets))
1074
 
        nodes = dict(index._read_nodes([1, 2]))
1075
 
        l1 = nodes[1]
1076
 
        l2 = nodes[2]
1077
 
        min_l2_key = l2.min_key
1078
 
        max_l1_key = l1.max_key
1079
 
        self.assertTrue(max_l1_key < min_l2_key)
1080
 
        parents_min_l2_key = l2.keys[min_l2_key][1][0]
1081
 
        self.assertEqual((l1.max_key,), parents_min_l2_key)
1082
 
        # Now, whatever key we select that would fall on the second page,
1083
 
        # should give us all the parents until the page break
1084
 
        key_idx = rev_keys.index(min_l2_key)
1085
 
        next_key = rev_keys[key_idx+1]
1086
 
        # So now when we get the parent map, we should get the key we are
1087
 
        # looking for, min_l2_key, and then a reference to go look for the
1088
 
        # parent of that key
1089
 
        parent_map = {}
1090
 
        missing_keys = set()
1091
 
        search_keys = index._find_ancestors([next_key], 0, parent_map,
1092
 
                                            missing_keys)
1093
 
        self.assertEqual([min_l2_key, next_key], sorted(parent_map))
1094
 
        self.assertEqual(set(), missing_keys)
1095
 
        self.assertEqual(set([max_l1_key]), search_keys)
1096
 
        parent_map = {}
1097
 
        search_keys = index._find_ancestors([max_l1_key], 0, parent_map,
1098
 
                                            missing_keys)
1099
 
        self.assertEqual(sorted(l1.keys), sorted(parent_map))
1100
 
        self.assertEqual(set(), missing_keys)
1101
 
        self.assertEqual(set(), search_keys)
1102
 
 
1103
 
    def test__find_ancestors_empty_index(self):
1104
 
        index = self.make_index(ref_lists=1, key_elements=1, nodes=[])
1105
 
        parent_map = {}
1106
 
        missing_keys = set()
1107
 
        search_keys = index._find_ancestors([('one',), ('two',)], 0, parent_map,
1108
 
                                            missing_keys)
1109
 
        self.assertEqual(set(), search_keys)
1110
 
        self.assertEqual({}, parent_map)
1111
 
        self.assertEqual(set([('one',), ('two',)]), missing_keys)
1112
 
 
1113
 
    def test_supports_unlimited_cache(self):
1114
 
        builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
1115
 
        # We need enough nodes to cause a page split (so we have both an
1116
 
        # internal node and a couple leaf nodes. 500 seems to be enough.)
1117
 
        nodes = self.make_nodes(500, 1, 0)
1118
 
        for node in nodes:
1119
 
            builder.add_node(*node)
1120
 
        stream = builder.finish()
1121
 
        trans = get_transport(self.get_url())
1122
 
        size = trans.put_file('index', stream)
1123
 
        index = btree_index.BTreeGraphIndex(trans, 'index', size)
1124
 
        self.assertEqual(500, index.key_count())
1125
 
        # We have an internal node
1126
 
        self.assertEqual(2, len(index._row_lengths))
1127
 
        # We have at least 2 leaf nodes
1128
 
        self.assertTrue(index._row_lengths[-1] >= 2)
1129
 
        self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
1130
 
        self.assertEqual(btree_index._NODE_CACHE_SIZE,
1131
 
                         index._leaf_node_cache._max_cache)
1132
 
        self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
1133
 
        self.assertEqual(100, index._internal_node_cache._max_cache)
1134
 
        # No change if unlimited_cache=False is passed
1135
 
        index = btree_index.BTreeGraphIndex(trans, 'index', size,
1136
 
                                            unlimited_cache=False)
1137
 
        self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
1138
 
        self.assertEqual(btree_index._NODE_CACHE_SIZE,
1139
 
                         index._leaf_node_cache._max_cache)
1140
 
        self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
1141
 
        self.assertEqual(100, index._internal_node_cache._max_cache)
1142
 
        index = btree_index.BTreeGraphIndex(trans, 'index', size,
1143
 
                                            unlimited_cache=True)
1144
 
        self.assertIsInstance(index._leaf_node_cache, dict)
1145
 
        self.assertIs(type(index._internal_node_cache), dict)
1146
 
        # Exercise the lookup code
1147
 
        entries = set(index.iter_entries([n[0] for n in nodes]))
1148
 
        self.assertEqual(500, len(entries))
1149
 
 
1150
983
 
1151
984
class TestBTreeNodes(BTreeTestCase):
1152
985
 
 
986
    def restore_parser(self):
 
987
        btree_index._btree_serializer = self.saved_parser
 
988
 
1153
989
    def setUp(self):
1154
990
        BTreeTestCase.setUp(self)
1155
 
        self.overrideAttr(btree_index, '_btree_serializer', self.parse_btree)
 
991
        self.saved_parser = btree_index._btree_serializer
 
992
        self.addCleanup(self.restore_parser)
 
993
        btree_index._btree_serializer = self.parse_btree
1156
994
 
1157
995
    def test_LeafNode_1_0(self):
1158
996
        node_bytes = ("type=leaf\n"
1269
1107
    def test_exists(self):
1270
1108
        # This is just to let the user know if they don't have the feature
1271
1109
        # available
1272
 
        self.requireFeature(compiled_btreeparser_feature)
 
1110
        self.requireFeature(CompiledBtreeParserFeature)
1273
1111
 
1274
1112
 
1275
1113
class TestMultiBisectRight(tests.TestCase):