~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_btree_index.py

  • Committer: John Arbash Meinel
  • Date: 2008-08-28 20:13:31 UTC
  • mfrom: (3658 +trunk)
  • mto: This revision was merged to the branch mainline in revision 3688.
  • Revision ID: john@arbash-meinel.com-20080828201331-dqffxf54l2heokll
Merge bzr.dev 3658

Show diffs side-by-side

added added

removed removed

Lines of Context:
113
113
                keys.append((key, value, refs))
114
114
        return keys
115
115
 
 
116
    def shrink_page_size(self):
 
117
        """Shrink the default page size so that less fits in a page."""
 
118
        old_page_size = btree_index._PAGE_SIZE
 
119
        def cleanup():
 
120
            btree_index._PAGE_SIZE = old_page_size
 
121
        self.addCleanup(cleanup)
 
122
        btree_index._PAGE_SIZE = 2048
 
123
 
116
124
 
117
125
class TestBTreeBuilder(BTreeTestCase):
118
126
 
196
204
 
197
205
    def test_2_leaves_1_0(self):
198
206
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
199
 
        nodes = self.make_nodes(800, 1, 0)
 
207
        nodes = self.make_nodes(400, 1, 0)
200
208
        for node in nodes:
201
209
            builder.add_node(*node)
202
210
        # NamedTemporaryFile dies on builder.finish().read(). weird.
203
211
        temp_file = builder.finish()
204
212
        content = temp_file.read()
205
213
        del temp_file
206
 
        self.assertEqual(10646, len(content))
 
214
        self.assertEqual(9283, len(content))
207
215
        self.assertEqual(
208
 
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=800\n"
 
216
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
209
217
            "row_lengths=1,2\n",
210
218
            content[:77])
211
219
        root = content[77:4096]
215
223
        expected_root = (
216
224
            "type=internal\n"
217
225
            "offset=0\n"
218
 
            "503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503\n"
219
 
            )
 
226
            ) + ("307" * 40) + "\n"
220
227
        self.assertEqual(expected_root, root_bytes)
221
228
        # We already know serialisation works for leaves, check key selection:
222
229
        leaf1_bytes = zlib.decompress(leaf1)
223
230
        sorted_node_keys = sorted(node[0] for node in nodes)
224
231
        node = btree_index._LeafNode(leaf1_bytes, 1, 0)
225
 
        self.assertEqual(448, len(node.keys))
226
 
        self.assertEqual(sorted_node_keys[:448], sorted(node.keys))
 
232
        self.assertEqual(231, len(node.keys))
 
233
        self.assertEqual(sorted_node_keys[:231], sorted(node.keys))
227
234
        leaf2_bytes = zlib.decompress(leaf2)
228
235
        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
229
 
        self.assertEqual(800 - 448, len(node.keys))
230
 
        self.assertEqual(sorted_node_keys[448:], sorted(node.keys))
 
236
        self.assertEqual(400 - 231, len(node.keys))
 
237
        self.assertEqual(sorted_node_keys[231:], sorted(node.keys))
231
238
 
232
239
    def test_last_page_rounded_1_layer(self):
233
240
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
253
260
 
254
261
    def test_last_page_not_rounded_2_layer(self):
255
262
        builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
256
 
        nodes = self.make_nodes(800, 1, 0)
 
263
        nodes = self.make_nodes(400, 1, 0)
257
264
        for node in nodes:
258
265
            builder.add_node(*node)
259
266
        # NamedTemporaryFile dies on builder.finish().read(). weird.
260
267
        temp_file = builder.finish()
261
268
        content = temp_file.read()
262
269
        del temp_file
263
 
        self.assertEqual(10646, len(content))
 
270
        self.assertEqual(9283, len(content))
264
271
        self.assertEqual(
265
 
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=800\n"
 
272
            "B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
266
273
            "row_lengths=1,2\n",
267
274
            content[:77])
268
275
        # Check the last page is well formed
269
276
        leaf2 = content[8192:]
270
277
        leaf2_bytes = zlib.decompress(leaf2)
271
278
        node = btree_index._LeafNode(leaf2_bytes, 1, 0)
272
 
        self.assertEqual(800 - 448, len(node.keys))
 
279
        self.assertEqual(400 - 231, len(node.keys))
273
280
        sorted_node_keys = sorted(node[0] for node in nodes)
274
 
        self.assertEqual(sorted_node_keys[448:], sorted(node.keys))
 
281
        self.assertEqual(sorted_node_keys[231:], sorted(node.keys))
275
282
 
276
283
    def test_three_level_tree_details(self):
277
284
        # The left most pointer in the second internal node in a row should
278
285
        # pointer to the second node that the internal node is for, _not_
279
286
        # the first, otherwise the first node overlaps with the last node of
280
287
        # the prior internal node on that row.
281
 
        # We will be adding 100,000 nodes, so spill at 100,001 to prevent
282
 
        # having to flush anything out to disk.
283
 
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2,
284
 
                                           spill_at=100001)
285
 
        # 100K nodes is *just* enough to create a two internal nodes on the
286
 
        # second level
287
 
        nodes = self.make_nodes(50000, 2, 2)
 
288
        self.shrink_page_size()
 
289
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
 
290
        # 40K nodes is enough to create a two internal nodes on the second
 
291
        # level, with a 2K page size
 
292
        nodes = self.make_nodes(20000, 2, 2)
288
293
 
289
294
        for node in nodes:
290
295
            builder.add_node(*node)
314
319
 
315
320
    def test_2_leaves_2_2(self):
316
321
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
317
 
        nodes = self.make_nodes(200, 2, 2)
 
322
        nodes = self.make_nodes(100, 2, 2)
318
323
        for node in nodes:
319
324
            builder.add_node(*node)
320
325
        # NamedTemporaryFile dies on builder.finish().read(). weird.
321
326
        temp_file = builder.finish()
322
327
        content = temp_file.read()
323
328
        del temp_file
324
 
        self.assertEqual(10574, len(content))
 
329
        self.assertEqual(12643, len(content))
325
330
        self.assertEqual(
326
 
            "B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=400\n"
327
 
            "row_lengths=1,2\n",
 
331
            "B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=200\n"
 
332
            "row_lengths=1,3\n",
328
333
            content[:77])
329
334
        root = content[77:4096]
330
335
        leaf1 = content[4096:8192]
331
 
        leaf2 = content[8192:]
 
336
        leaf2 = content[8192:12288]
 
337
        leaf3 = content[12288:]
332
338
        root_bytes = zlib.decompress(root)
333
339
        expected_root = (
334
340
            "type=internal\n"
335
341
            "offset=0\n"
336
 
            "1111111111111111111111111111111111111111\x00"
337
 
            "126126126126126126126126126126126126126126126126126126126"
338
 
            "126126126126126126126126126126126126126126126126126126126126126\n"
 
342
            + ("0" * 40) + "\x00" + ("91" * 40) + "\n"
 
343
            + ("1" * 40) + "\x00" + ("81" * 40) + "\n"
339
344
            )
340
345
        self.assertEqual(expected_root, root_bytes)
341
346
        # We assume the other leaf nodes have been written correctly - layering
598
603
        # The entire index should have been read, as it is one page long.
599
604
        self.assertEqual([('readv', 'index', [(0, size)], False, None)],
600
605
            transport._activity)
601
 
        self.assertEqual(1593, size)
 
606
        self.assertEqual(1199, size)
602
607
 
603
608
    def test_2_levels_key_count_2_2(self):
604
609
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
607
612
            builder.add_node(*node)
608
613
        transport = get_transport('trace+' + self.get_url(''))
609
614
        size = transport.put_file('index', builder.finish())
610
 
        self.assertEqual(10242, size)
 
615
        self.assertEqual(17692, size)
611
616
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
612
617
        del transport._activity[:]
613
618
        self.assertEqual([], transport._activity)
618
623
 
619
624
    def test_validate_one_page(self):
620
625
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
 
626
        nodes = self.make_nodes(45, 2, 2)
 
627
        for node in nodes:
 
628
            builder.add_node(*node)
 
629
        transport = get_transport('trace+' + self.get_url(''))
 
630
        size = transport.put_file('index', builder.finish())
 
631
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
 
632
        del transport._activity[:]
 
633
        self.assertEqual([], transport._activity)
 
634
        index.validate()
 
635
        # The entire index should have been read linearly.
 
636
        self.assertEqual([('readv', 'index', [(0, size)], False, None)],
 
637
            transport._activity)
 
638
        self.assertEqual(1514, size)
 
639
 
 
640
    def test_validate_two_pages(self):
 
641
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
621
642
        nodes = self.make_nodes(80, 2, 2)
622
643
        for node in nodes:
623
644
            builder.add_node(*node)
624
645
        transport = get_transport('trace+' + self.get_url(''))
625
646
        size = transport.put_file('index', builder.finish())
626
 
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
627
 
        del transport._activity[:]
628
 
        self.assertEqual([], transport._activity)
629
 
        index.validate()
630
 
        # The entire index should have been read linearly.
631
 
        self.assertEqual([('readv', 'index', [(0, size)], False, None)],
632
 
            transport._activity)
633
 
        self.assertEqual(3846, size)
634
 
 
635
 
    def test_validate_two_pages(self):
636
 
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
637
 
        nodes = self.make_nodes(160, 2, 2)
638
 
        for node in nodes:
639
 
            builder.add_node(*node)
640
 
        transport = get_transport('trace+' + self.get_url(''))
641
 
        size = transport.put_file('index', builder.finish())
642
647
        # Root page, 2 leaf pages
643
 
        self.assertEqual(10242, size)
 
648
        self.assertEqual(9339, size)
644
649
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
645
650
        del transport._activity[:]
646
651
        self.assertEqual([], transport._activity)
647
652
        index.validate()
648
653
        # The entire index should have been read linearly.
649
654
        self.assertEqual([('readv', 'index', [(0, 4096)], False, None),
650
 
            ('readv', 'index', [(4096, 4096), (8192, 2050)], False, None)],
 
655
            ('readv', 'index', [(4096, 4096), (8192, 1147)], False, None)],
651
656
            transport._activity)
652
657
        # XXX: TODO: write some badly-ordered nodes, and some pointers-to-wrong
653
658
        # node and make validate find them.
690
695
    def test_iter_all_entries_reads(self):
691
696
        # iterating all entries reads the header, then does a linear
692
697
        # read.
693
 
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2,
694
 
                                           spill_at=100001)
695
 
        # 100k nodes is enough to create a three-level index, which shows that
696
 
        # we skip the internal nodes and just read the leaf nodes.
697
 
        nodes = self.make_nodes(50000, 2, 2)
 
698
        self.shrink_page_size()
 
699
        builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
 
700
        # 20k nodes is enough to create a two internal nodes on the second
 
701
        # level, with a 2K page size
 
702
        nodes = self.make_nodes(10000, 2, 2)
698
703
        for node in nodes:
699
704
            builder.add_node(*node)
700
705
        transport = get_transport('trace+' + self.get_url(''))
701
706
        size = transport.put_file('index', builder.finish())
702
 
        self.assertEqual(2026722, size, 'number of expected bytes in the'
 
707
        self.assertEqual(1303220, size, 'number of expected bytes in the'
703
708
                                        ' output changed')
 
709
        page_size = btree_index._PAGE_SIZE
704
710
        del builder
705
711
        index = btree_index.BTreeGraphIndex(transport, 'index', size)
706
712
        del transport._activity[:]
707
713
        self.assertEqual([], transport._activity)
708
 
        found_nodes = list(index.iter_all_entries())
 
714
        found_nodes = self.time(list, index.iter_all_entries())
709
715
        bare_nodes = []
710
716
        for node in found_nodes:
711
717
            self.assertTrue(node[0] is index)
713
719
        self.assertEqual(3, len(index._row_lengths),
714
720
            "Not enough rows: %r" % index._row_lengths)
715
721
        # Should be as long as the nodes we supplied
716
 
        self.assertEqual(100000, len(found_nodes))
 
722
        self.assertEqual(20000, len(found_nodes))
717
723
        # Should have the same content
718
724
        self.assertEqual(set(nodes), set(bare_nodes))
719
725
        # Should have done linear scan IO up the index, ignoring
721
727
        # The entire index should have been read
722
728
        total_pages = sum(index._row_lengths)
723
729
        self.assertEqual(total_pages, index._row_offsets[-1])
724
 
        self.assertEqual(2026722, size)
 
730
        self.assertEqual(1303220, size)
725
731
        # The start of the leaves
726
 
        first_byte = index._row_offsets[-2] * btree_index._PAGE_SIZE
 
732
        first_byte = index._row_offsets[-2] * page_size
727
733
        readv_request = []
728
 
        for offset in range(first_byte, size, 4096):
729
 
            readv_request.append((offset, 4096))
 
734
        for offset in range(first_byte, size, page_size):
 
735
            readv_request.append((offset, page_size))
730
736
        # The last page is truncated
731
 
        readv_request[-1] = (readv_request[-1][0], 3298)
732
 
        expected = [('readv', 'index', [(0, 4096)], False, None),
 
737
        readv_request[-1] = (readv_request[-1][0], 1303220 % page_size)
 
738
        expected = [('readv', 'index', [(0, page_size)], False, None),
733
739
             ('readv',  'index', readv_request, False, None)]
734
740
        if expected != transport._activity:
735
741
            self.assertEqualDiff(pprint.pformat(expected),
769
775
        self.assertEqual(nodes[30], bare_nodes[0])
770
776
        # Should have read the root node, then one leaf page:
771
777
        self.assertEqual([('readv', 'index', [(0, 4096)], False, None),
772
 
             ('readv',  'index', [(4096, 4096), ], False, None)],
 
778
             ('readv',  'index', [(8192, 4096), ], False, None)],
773
779
            transport._activity)
774
780
 
775
781
    def test_iter_key_prefix_1_element_key_None(self):