~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_btree_index.py

  • Committer: Canonical.com Patch Queue Manager
  • Date: 2008-08-28 03:01:04 UTC
  • mfrom: (3641.5.19 btree)
  • Revision ID: pqm@pqm.ubuntu.com-20080828030104-6a87mmhafprj1prs
(jam) Updates to the btree indexing code.

Show diffs side-by-side

added added

removed removed

Lines of Context:
113
113
                keys.append((key, value, refs))
114
114
        return keys
115
115
 
 
116
    def shrink_page_size(self):
 
117
        """Shrink the default page size so that less fits in a page."""
 
118
        old_page_size = btree_index._PAGE_SIZE
 
119
        def cleanup():
 
120
            btree_index._PAGE_SIZE = old_page_size
 
121
        self.addCleanup(cleanup)
 
122
        btree_index._PAGE_SIZE = 2048
 
123
 
116
124
 
117
125
class TestBTreeBuilder(BTreeTestCase):
118
126
 
196
204
 
197
205
    def test_2_leaves_1_0(self):
198
206
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
199
 
        nodes = self.make_nodes(800, 1, 0)
 
207
        nodes = self.make_nodes(400, 1, 0)
200
208
        for node in nodes:
201
209
            builder.add_node(*node)
202
210
        # NamedTemporaryFile dies on builder.finish().read(). weird.
203
211
        temp_file = builder.finish()
204
212
        content = temp_file.read()
205
213
        del temp_file
206
 
        self.assertEqual(10646, len(content))
 
214
        self.assertEqual(9283, len(content))
207
215
        self.assertEqual(
208
 
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=800\n"
 
216
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
209
217
            "row_lengths=1,2\n",
210
218
            content[:77])
211
219
        root = content[77:4096]
215
223
        expected_root = (
216
224
            "type=internal\n"
217
225
            "offset=0\n"
218
 
            "503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503\n"
219
 
            )
 
226
            ) + ("307" * 40) + "\n"
220
227
        self.assertEqual(expected_root, root_bytes)
221
228
        # We already know serialisation works for leaves, check key selection:
222
229
        leaf1_bytes = zlib.decompress(leaf1)
223
230
        sorted_node_keys = sorted(node[0] for node in nodes)
224
231
        node = btree_index._LeafNode(leaf1_bytes, 1, 0)
225
 
        self.assertEqual(448, len(node.keys))
226
 
        self.assertEqual(sorted_node_keys[:448], sorted(node.keys))
 
232
        self.assertEqual(231, len(node.keys))
 
233
        self.assertEqual(sorted_node_keys[:231], sorted(node.keys))
227
234
        leaf2_bytes = zlib.decompress(leaf2)
228
235
        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
229
 
        self.assertEqual(800 - 448, len(node.keys))
230
 
        self.assertEqual(sorted_node_keys[448:], sorted(node.keys))
 
236
        self.assertEqual(400 - 231, len(node.keys))
 
237
        self.assertEqual(sorted_node_keys[231:], sorted(node.keys))
231
238
 
232
239
    def test_last_page_rounded_1_layer(self):
233
240
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
253
260
 
254
261
    def test_last_page_not_rounded_2_layer(self):
255
262
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
256
 
        nodes = self.make_nodes(800, 1, 0)
 
263
        nodes = self.make_nodes(400, 1, 0)
257
264
        for node in nodes:
258
265
            builder.add_node(*node)
259
266
        # NamedTemporaryFile dies on builder.finish().read(). weird.
260
267
        temp_file = builder.finish()
261
268
        content = temp_file.read()
262
269
        del temp_file
263
 
        self.assertEqual(10646, len(content))
 
270
        self.assertEqual(9283, len(content))
264
271
        self.assertEqual(
265
 
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=800\n"
 
272
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
266
273
            "row_lengths=1,2\n",
267
274
            content[:77])
268
275
        # Check the last page is well formed
269
276
        leaf2 = content[8192:]
270
277
        leaf2_bytes = zlib.decompress(leaf2)
271
278
        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
272
 
        self.assertEqual(800 - 448, len(node.keys))
 
279
        self.assertEqual(400 - 231, len(node.keys))
273
280
        sorted_node_keys = sorted(node[0] for node in nodes)
274
 
        self.assertEqual(sorted_node_keys[448:], sorted(node.keys))
 
281
        self.assertEqual(sorted_node_keys[231:], sorted(node.keys))
275
282
 
276
283
    def test_three_level_tree_details(self):
277
284
        # The left most pointer in the second internal node in a row should
278
285
        # pointer to the second node that the internal node is for, _not_
279
286
        # the first, otherwise the first node overlaps with the last node of
280
287
        # the prior internal node on that row.
281
 
        # We will be adding 100,000 nodes, so spill at 100,001 to prevent
282
 
        # having to flush anything out to disk.
283
 
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2,
284
 
                                           spill_at=100001)
285
 
        # 100K nodes is *just* enough to create a two internal nodes on the
286
 
        # second level
287
 
        nodes = self.make_nodes(50000, 2, 2)
 
288
        self.shrink_page_size()
 
289
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
 
290
        # 40K nodes is enough to create a two internal nodes on the second
 
291
        # level, with a 2K page size
 
292
        nodes = self.make_nodes(20000, 2, 2)
288
293
 
289
294
        for node in nodes:
290
295
            builder.add_node(*node)
314
319
 
315
320
    def test_2_leaves_2_2(self):
316
321
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
317
 
        nodes = self.make_nodes(200, 2, 2)
 
322
        nodes = self.make_nodes(100, 2, 2)
318
323
        for node in nodes:
319
324
            builder.add_node(*node)
320
325
        # NamedTemporaryFile dies on builder.finish().read(). weird.
321
326
        temp_file = builder.finish()
322
327
        content = temp_file.read()
323
328
        del temp_file
324
 
        self.assertEqual(10574, len(content))
 
329
        self.assertEqual(12643, len(content))
325
330
        self.assertEqual(
326
 
            "B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=400\n"
327
 
            "row_lengths=1,2\n",
 
331
            "B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=200\n"
 
332
            "row_lengths=1,3\n",
328
333
            content[:77])
329
334
        root = content[77:4096]
330
335
        leaf1 = content[4096:8192]
331
 
        leaf2 = content[8192:]
 
336
        leaf2 = content[8192:12288]
 
337
        leaf3 = content[12288:]
332
338
        root_bytes = zlib.decompress(root)
333
339
        expected_root = (
334
340
            "type=internal\n"
335
341
            "offset=0\n"
336
 
            "1111111111111111111111111111111111111111\x00"
337
 
            "126126126126126126126126126126126126126126126126126126126"
338
 
            "126126126126126126126126126126126126126126126126126126126126126\n"
 
342
            + ("0" * 40) + "\x00" + ("91" * 40) + "\n"
 
343
            + ("1" * 40) + "\x00" + ("81" * 40) + "\n"
339
344
            )
340
345
        self.assertEqual(expected_root, root_bytes)
341
346
        # We assume the other leaf nodes have been written correctly - layering
594
599
        # The entire index should have been read, as it is one page long.
595
600
        self.assertEqual([('readv', 'index', [(0, size)], False, None)],
596
601
            transport._activity)
597
 
        self.assertEqual(1593, size)
 
602
        self.assertEqual(1199, size)
598
603
 
599
604
    def test_2_levels_key_count_2_2(self):
600
605
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
603
608
            builder.add_node(*node)
604
609
        transport = get_transport('trace+' + self.get_url(''))
605
610
        size = transport.put_file('index', builder.finish())
606
 
        self.assertEqual(10242, size)
 
611
        self.assertEqual(17692, size)
607
612
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
608
613
        del transport._activity[:]
609
614
        self.assertEqual([], transport._activity)
614
619
 
615
620
    def test_validate_one_page(self):
616
621
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
 
622
        nodes = self.make_nodes(45, 2, 2)
 
623
        for node in nodes:
 
624
            builder.add_node(*node)
 
625
        transport = get_transport('trace+' + self.get_url(''))
 
626
        size = transport.put_file('index', builder.finish())
 
627
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
 
628
        del transport._activity[:]
 
629
        self.assertEqual([], transport._activity)
 
630
        index.validate()
 
631
        # The entire index should have been read linearly.
 
632
        self.assertEqual([('readv', 'index', [(0, size)], False, None)],
 
633
            transport._activity)
 
634
        self.assertEqual(1514, size)
 
635
 
 
636
    def test_validate_two_pages(self):
 
637
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
617
638
        nodes = self.make_nodes(80, 2, 2)
618
639
        for node in nodes:
619
640
            builder.add_node(*node)
620
641
        transport = get_transport('trace+' + self.get_url(''))
621
642
        size = transport.put_file('index', builder.finish())
622
 
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
623
 
        del transport._activity[:]
624
 
        self.assertEqual([], transport._activity)
625
 
        index.validate()
626
 
        # The entire index should have been read linearly.
627
 
        self.assertEqual([('readv', 'index', [(0, size)], False, None)],
628
 
            transport._activity)
629
 
        self.assertEqual(3846, size)
630
 
 
631
 
    def test_validate_two_pages(self):
632
 
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
633
 
        nodes = self.make_nodes(160, 2, 2)
634
 
        for node in nodes:
635
 
            builder.add_node(*node)
636
 
        transport = get_transport('trace+' + self.get_url(''))
637
 
        size = transport.put_file('index', builder.finish())
638
643
        # Root page, 2 leaf pages
639
 
        self.assertEqual(10242, size)
 
644
        self.assertEqual(9339, size)
640
645
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
641
646
        del transport._activity[:]
642
647
        self.assertEqual([], transport._activity)
643
648
        index.validate()
644
649
        # The entire index should have been read linearly.
645
650
        self.assertEqual([('readv', 'index', [(0, 4096)], False, None),
646
 
            ('readv', 'index', [(4096, 4096), (8192, 2050)], False, None)],
 
651
            ('readv', 'index', [(4096, 4096), (8192, 1147)], False, None)],
647
652
            transport._activity)
648
653
        # XXX: TODO: write some badly-ordered nodes, and some pointers-to-wrong
649
654
        # node and make validate find them.
686
691
    def test_iter_all_entries_reads(self):
687
692
        # iterating all entries reads the header, then does a linear
688
693
        # read.
689
 
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2,
690
 
                                           spill_at=100001)
691
 
        # 100k nodes is enough to create a three-level index, which shows that
692
 
        # we skip the internal nodes and just read the leaf nodes.
693
 
        nodes = self.make_nodes(50000, 2, 2)
 
694
        self.shrink_page_size()
 
695
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
 
696
        # 20k nodes is enough to create a two internal nodes on the second
 
697
        # level, with a 2K page size
 
698
        nodes = self.make_nodes(10000, 2, 2)
694
699
        for node in nodes:
695
700
            builder.add_node(*node)
696
701
        transport = get_transport('trace+' + self.get_url(''))
697
702
        size = transport.put_file('index', builder.finish())
698
 
        self.assertEqual(2026722, size, 'number of expected bytes in the'
 
703
        self.assertEqual(1303220, size, 'number of expected bytes in the'
699
704
                                        ' output changed')
 
705
        page_size = btree_index._PAGE_SIZE
700
706
        del builder
701
707
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
702
708
        del transport._activity[:]
703
709
        self.assertEqual([], transport._activity)
704
 
        found_nodes = list(index.iter_all_entries())
 
710
        found_nodes = self.time(list, index.iter_all_entries())
705
711
        bare_nodes = []
706
712
        for node in found_nodes:
707
713
            self.assertTrue(node[0] is index)
709
715
        self.assertEqual(3, len(index._row_lengths),
710
716
            "Not enough rows: %r" % index._row_lengths)
711
717
        # Should be as long as the nodes we supplied
712
 
        self.assertEqual(100000, len(found_nodes))
 
718
        self.assertEqual(20000, len(found_nodes))
713
719
        # Should have the same content
714
720
        self.assertEqual(set(nodes), set(bare_nodes))
715
721
        # Should have done linear scan IO up the index, ignoring
717
723
        # The entire index should have been read
718
724
        total_pages = sum(index._row_lengths)
719
725
        self.assertEqual(total_pages, index._row_offsets[-1])
720
 
        self.assertEqual(2026722, size)
 
726
        self.assertEqual(1303220, size)
721
727
        # The start of the leaves
722
 
        first_byte = index._row_offsets[-2] * btree_index._PAGE_SIZE
 
728
        first_byte = index._row_offsets[-2] * page_size
723
729
        readv_request = []
724
 
        for offset in range(first_byte, size, 4096):
725
 
            readv_request.append((offset, 4096))
 
730
        for offset in range(first_byte, size, page_size):
 
731
            readv_request.append((offset, page_size))
726
732
        # The last page is truncated
727
 
        readv_request[-1] = (readv_request[-1][0], 3298)
728
 
        expected = [('readv', 'index', [(0, 4096)], False, None),
 
733
        readv_request[-1] = (readv_request[-1][0], 1303220 % page_size)
 
734
        expected = [('readv', 'index', [(0, page_size)], False, None),
729
735
             ('readv',  'index', readv_request, False, None)]
730
736
        if expected != transport._activity:
731
737
            self.assertEqualDiff(pprint.pformat(expected),
765
771
        self.assertEqual(nodes[30], bare_nodes[0])
766
772
        # Should have read the root node, then one leaf page:
767
773
        self.assertEqual([('readv', 'index', [(0, 4096)], False, None),
768
 
             ('readv',  'index', [(4096, 4096), ], False, None)],
 
774
             ('readv',  'index', [(8192, 4096), ], False, None)],
769
775
            transport._activity)
770
776
 
771
777
    def test_iter_key_prefix_1_element_key_None(self):