197
205
def test_2_leaves_1_0(self):
198
206
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
199
nodes = self.make_nodes(800, 1, 0)
207
nodes = self.make_nodes(400, 1, 0)
200
208
for node in nodes:
201
209
builder.add_node(*node)
202
210
# NamedTemporaryFile dies on builder.finish().read(). weird.
203
211
temp_file = builder.finish()
204
212
content = temp_file.read()
206
self.assertEqual(10646, len(content))
214
self.assertEqual(9283, len(content))
207
215
self.assertEqual(
208
"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=800\n"
216
"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
209
217
"row_lengths=1,2\n",
211
219
root = content[77:4096]
215
223
expected_root = (
216
224
"type=internal\n"
218
"503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503\n"
226
) + ("307" * 40) + "\n"
220
227
self.assertEqual(expected_root, root_bytes)
221
228
# We already know serialisation works for leaves, check key selection:
222
229
leaf1_bytes = zlib.decompress(leaf1)
223
230
sorted_node_keys = sorted(node[0] for node in nodes)
224
231
node = btree_index._LeafNode(leaf1_bytes, 1, 0)
225
self.assertEqual(448, len(node.keys))
226
self.assertEqual(sorted_node_keys[:448], sorted(node.keys))
232
self.assertEqual(231, len(node.keys))
233
self.assertEqual(sorted_node_keys[:231], sorted(node.keys))
227
234
leaf2_bytes = zlib.decompress(leaf2)
228
235
node = btree_index._LeafNode(leaf2_bytes, 1, 0)
229
self.assertEqual(800 - 448, len(node.keys))
230
self.assertEqual(sorted_node_keys[448:], sorted(node.keys))
236
self.assertEqual(400 - 231, len(node.keys))
237
self.assertEqual(sorted_node_keys[231:], sorted(node.keys))
232
239
def test_last_page_rounded_1_layer(self):
233
240
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
254
261
def test_last_page_not_rounded_2_layer(self):
255
262
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
256
nodes = self.make_nodes(800, 1, 0)
263
nodes = self.make_nodes(400, 1, 0)
257
264
for node in nodes:
258
265
builder.add_node(*node)
259
266
# NamedTemporaryFile dies on builder.finish().read(). weird.
260
267
temp_file = builder.finish()
261
268
content = temp_file.read()
263
self.assertEqual(10646, len(content))
270
self.assertEqual(9283, len(content))
264
271
self.assertEqual(
265
"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=800\n"
272
"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
266
273
"row_lengths=1,2\n",
268
275
# Check the last page is well formed
269
276
leaf2 = content[8192:]
270
277
leaf2_bytes = zlib.decompress(leaf2)
271
278
node = btree_index._LeafNode(leaf2_bytes, 1, 0)
272
self.assertEqual(800 - 448, len(node.keys))
279
self.assertEqual(400 - 231, len(node.keys))
273
280
sorted_node_keys = sorted(node[0] for node in nodes)
274
self.assertEqual(sorted_node_keys[448:], sorted(node.keys))
281
self.assertEqual(sorted_node_keys[231:], sorted(node.keys))
276
283
def test_three_level_tree_details(self):
277
284
# The left most pointer in the second internal node in a row should
278
285
# pointer to the second node that the internal node is for, _not_
279
286
# the first, otherwise the first node overlaps with the last node of
280
287
# the prior internal node on that row.
281
# We will be adding 100,000 nodes, so spill at 100,001 to prevent
282
# having to flush anything out to disk.
283
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2,
285
# 100K nodes is *just* enough to create a two internal nodes on the
287
nodes = self.make_nodes(50000, 2, 2)
288
self.shrink_page_size()
289
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
290
# 40K nodes is enough to create a two internal nodes on the second
291
# level, with a 2K page size
292
nodes = self.make_nodes(20000, 2, 2)
289
294
for node in nodes:
290
295
builder.add_node(*node)
315
320
def test_2_leaves_2_2(self):
316
321
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
317
nodes = self.make_nodes(200, 2, 2)
322
nodes = self.make_nodes(100, 2, 2)
318
323
for node in nodes:
319
324
builder.add_node(*node)
320
325
# NamedTemporaryFile dies on builder.finish().read(). weird.
321
326
temp_file = builder.finish()
322
327
content = temp_file.read()
324
self.assertEqual(10574, len(content))
329
self.assertEqual(12643, len(content))
325
330
self.assertEqual(
326
"B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=400\n"
331
"B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=200\n"
329
334
root = content[77:4096]
330
335
leaf1 = content[4096:8192]
331
leaf2 = content[8192:]
336
leaf2 = content[8192:12288]
337
leaf3 = content[12288:]
332
338
root_bytes = zlib.decompress(root)
333
339
expected_root = (
334
340
"type=internal\n"
336
"1111111111111111111111111111111111111111\x00"
337
"126126126126126126126126126126126126126126126126126126126"
338
"126126126126126126126126126126126126126126126126126126126126126\n"
342
+ ("0" * 40) + "\x00" + ("91" * 40) + "\n"
343
+ ("1" * 40) + "\x00" + ("81" * 40) + "\n"
340
345
self.assertEqual(expected_root, root_bytes)
341
346
# We assume the other leaf nodes have been written correctly - layering
598
603
# The entire index should have been read, as it is one page long.
599
604
self.assertEqual([('readv', 'index', [(0, size)], False, None)],
600
605
transport._activity)
601
self.assertEqual(1593, size)
606
self.assertEqual(1199, size)
603
608
def test_2_levels_key_count_2_2(self):
604
609
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
619
624
def test_validate_one_page(self):
620
625
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
626
nodes = self.make_nodes(45, 2, 2)
628
builder.add_node(*node)
629
transport = get_transport('trace+' + self.get_url(''))
630
size = transport.put_file('index', builder.finish())
631
index = btree_index.BTreeGraphIndex(transport, 'index', size)
632
del transport._activity[:]
633
self.assertEqual([], transport._activity)
635
# The entire index should have been read linearly.
636
self.assertEqual([('readv', 'index', [(0, size)], False, None)],
638
self.assertEqual(1514, size)
640
def test_validate_two_pages(self):
641
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
621
642
nodes = self.make_nodes(80, 2, 2)
622
643
for node in nodes:
623
644
builder.add_node(*node)
624
645
transport = get_transport('trace+' + self.get_url(''))
625
646
size = transport.put_file('index', builder.finish())
626
index = btree_index.BTreeGraphIndex(transport, 'index', size)
627
del transport._activity[:]
628
self.assertEqual([], transport._activity)
630
# The entire index should have been read linearly.
631
self.assertEqual([('readv', 'index', [(0, size)], False, None)],
633
self.assertEqual(3846, size)
635
def test_validate_two_pages(self):
636
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
637
nodes = self.make_nodes(160, 2, 2)
639
builder.add_node(*node)
640
transport = get_transport('trace+' + self.get_url(''))
641
size = transport.put_file('index', builder.finish())
642
647
# Root page, 2 leaf pages
643
self.assertEqual(10242, size)
648
self.assertEqual(9339, size)
644
649
index = btree_index.BTreeGraphIndex(transport, 'index', size)
645
650
del transport._activity[:]
646
651
self.assertEqual([], transport._activity)
648
653
# The entire index should have been read linearly.
649
654
self.assertEqual([('readv', 'index', [(0, 4096)], False, None),
650
('readv', 'index', [(4096, 4096), (8192, 2050)], False, None)],
655
('readv', 'index', [(4096, 4096), (8192, 1147)], False, None)],
651
656
transport._activity)
652
657
# XXX: TODO: write some badly-ordered nodes, and some pointers-to-wrong
653
658
# node and make validate find them.
690
695
def test_iter_all_entries_reads(self):
691
696
# iterating all entries reads the header, then does a linear
693
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2,
695
# 100k nodes is enough to create a three-level index, which shows that
696
# we skip the internal nodes and just read the leaf nodes.
697
nodes = self.make_nodes(50000, 2, 2)
698
self.shrink_page_size()
699
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
700
# 20k nodes is enough to create a two internal nodes on the second
701
# level, with a 2K page size
702
nodes = self.make_nodes(10000, 2, 2)
698
703
for node in nodes:
699
704
builder.add_node(*node)
700
705
transport = get_transport('trace+' + self.get_url(''))
701
706
size = transport.put_file('index', builder.finish())
702
self.assertEqual(2026722, size, 'number of expected bytes in the'
707
self.assertEqual(1303220, size, 'number of expected bytes in the'
703
708
' output changed')
709
page_size = btree_index._PAGE_SIZE
705
711
index = btree_index.BTreeGraphIndex(transport, 'index', size)
706
712
del transport._activity[:]
707
713
self.assertEqual([], transport._activity)
708
found_nodes = list(index.iter_all_entries())
714
found_nodes = self.time(list, index.iter_all_entries())
710
716
for node in found_nodes:
711
717
self.assertTrue(node[0] is index)
713
719
self.assertEqual(3, len(index._row_lengths),
714
720
"Not enough rows: %r" % index._row_lengths)
715
721
# Should be as long as the nodes we supplied
716
self.assertEqual(100000, len(found_nodes))
722
self.assertEqual(20000, len(found_nodes))
717
723
# Should have the same content
718
724
self.assertEqual(set(nodes), set(bare_nodes))
719
725
# Should have done linear scan IO up the index, ignoring
721
727
# The entire index should have been read
722
728
total_pages = sum(index._row_lengths)
723
729
self.assertEqual(total_pages, index._row_offsets[-1])
724
self.assertEqual(2026722, size)
730
self.assertEqual(1303220, size)
725
731
# The start of the leaves
726
first_byte = index._row_offsets[-2] * btree_index._PAGE_SIZE
732
first_byte = index._row_offsets[-2] * page_size
727
733
readv_request = []
728
for offset in range(first_byte, size, 4096):
729
readv_request.append((offset, 4096))
734
for offset in range(first_byte, size, page_size):
735
readv_request.append((offset, page_size))
730
736
# The last page is truncated
731
readv_request[-1] = (readv_request[-1][0], 3298)
732
expected = [('readv', 'index', [(0, 4096)], False, None),
737
readv_request[-1] = (readv_request[-1][0], 1303220 % page_size)
738
expected = [('readv', 'index', [(0, page_size)], False, None),
733
739
('readv', 'index', readv_request, False, None)]
734
740
if expected != transport._activity:
735
741
self.assertEqualDiff(pprint.pformat(expected),
769
775
self.assertEqual(nodes[30], bare_nodes[0])
770
776
# Should have read the root node, then one leaf page:
771
777
self.assertEqual([('readv', 'index', [(0, 4096)], False, None),
772
('readv', 'index', [(4096, 4096), ], False, None)],
778
('readv', 'index', [(8192, 4096), ], False, None)],
773
779
transport._activity)
775
781
def test_iter_key_prefix_1_element_key_None(self):