197
205
def test_2_leaves_1_0(self):
198
206
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
199
nodes = self.make_nodes(800, 1, 0)
207
nodes = self.make_nodes(400, 1, 0)
200
208
for node in nodes:
201
209
builder.add_node(*node)
202
210
# NamedTemporaryFile dies on builder.finish().read(). weird.
203
211
temp_file = builder.finish()
204
212
content = temp_file.read()
206
self.assertEqual(10646, len(content))
214
self.assertEqual(9283, len(content))
207
215
self.assertEqual(
208
"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=800\n"
216
"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
209
217
"row_lengths=1,2\n",
211
219
root = content[77:4096]
215
223
expected_root = (
216
224
"type=internal\n"
218
"503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503503\n"
226
) + ("307" * 40) + "\n"
220
227
self.assertEqual(expected_root, root_bytes)
221
228
# We already know serialisation works for leaves, check key selection:
222
229
leaf1_bytes = zlib.decompress(leaf1)
223
230
sorted_node_keys = sorted(node[0] for node in nodes)
224
231
node = btree_index._LeafNode(leaf1_bytes, 1, 0)
225
self.assertEqual(448, len(node.keys))
226
self.assertEqual(sorted_node_keys[:448], sorted(node.keys))
232
self.assertEqual(231, len(node.keys))
233
self.assertEqual(sorted_node_keys[:231], sorted(node.keys))
227
234
leaf2_bytes = zlib.decompress(leaf2)
228
235
node = btree_index._LeafNode(leaf2_bytes, 1, 0)
229
self.assertEqual(800 - 448, len(node.keys))
230
self.assertEqual(sorted_node_keys[448:], sorted(node.keys))
236
self.assertEqual(400 - 231, len(node.keys))
237
self.assertEqual(sorted_node_keys[231:], sorted(node.keys))
232
239
def test_last_page_rounded_1_layer(self):
233
240
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
254
261
def test_last_page_not_rounded_2_layer(self):
255
262
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
256
nodes = self.make_nodes(800, 1, 0)
263
nodes = self.make_nodes(400, 1, 0)
257
264
for node in nodes:
258
265
builder.add_node(*node)
259
266
# NamedTemporaryFile dies on builder.finish().read(). weird.
260
267
temp_file = builder.finish()
261
268
content = temp_file.read()
263
self.assertEqual(10646, len(content))
270
self.assertEqual(9283, len(content))
264
271
self.assertEqual(
265
"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=800\n"
272
"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
266
273
"row_lengths=1,2\n",
268
275
# Check the last page is well formed
269
276
leaf2 = content[8192:]
270
277
leaf2_bytes = zlib.decompress(leaf2)
271
278
node = btree_index._LeafNode(leaf2_bytes, 1, 0)
272
self.assertEqual(800 - 448, len(node.keys))
279
self.assertEqual(400 - 231, len(node.keys))
273
280
sorted_node_keys = sorted(node[0] for node in nodes)
274
self.assertEqual(sorted_node_keys[448:], sorted(node.keys))
281
self.assertEqual(sorted_node_keys[231:], sorted(node.keys))
276
283
def test_three_level_tree_details(self):
277
284
# The left most pointer in the second internal node in a row should
278
285
# pointer to the second node that the internal node is for, _not_
279
286
# the first, otherwise the first node overlaps with the last node of
280
287
# the prior internal node on that row.
281
# We will be adding 100,000 nodes, so spill at 100,001 to prevent
282
# having to flush anything out to disk.
283
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2,
285
# 100K nodes is *just* enough to create a two internal nodes on the
287
nodes = self.make_nodes(50000, 2, 2)
288
self.shrink_page_size()
289
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
290
# 40K nodes is enough to create a two internal nodes on the second
291
# level, with a 2K page size
292
nodes = self.make_nodes(20000, 2, 2)
289
294
for node in nodes:
290
295
builder.add_node(*node)
315
320
def test_2_leaves_2_2(self):
316
321
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
317
nodes = self.make_nodes(200, 2, 2)
322
nodes = self.make_nodes(100, 2, 2)
318
323
for node in nodes:
319
324
builder.add_node(*node)
320
325
# NamedTemporaryFile dies on builder.finish().read(). weird.
321
326
temp_file = builder.finish()
322
327
content = temp_file.read()
324
self.assertEqual(10574, len(content))
329
self.assertEqual(12643, len(content))
325
330
self.assertEqual(
326
"B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=400\n"
331
"B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=200\n"
329
334
root = content[77:4096]
330
335
leaf1 = content[4096:8192]
331
leaf2 = content[8192:]
336
leaf2 = content[8192:12288]
337
leaf3 = content[12288:]
332
338
root_bytes = zlib.decompress(root)
333
339
expected_root = (
334
340
"type=internal\n"
336
"1111111111111111111111111111111111111111\x00"
337
"126126126126126126126126126126126126126126126126126126126"
338
"126126126126126126126126126126126126126126126126126126126126126\n"
342
+ ("0" * 40) + "\x00" + ("91" * 40) + "\n"
343
+ ("1" * 40) + "\x00" + ("81" * 40) + "\n"
340
345
self.assertEqual(expected_root, root_bytes)
341
346
# We assume the other leaf nodes have been written correctly - layering
594
599
# The entire index should have been read, as it is one page long.
595
600
self.assertEqual([('readv', 'index', [(0, size)], False, None)],
596
601
transport._activity)
597
self.assertEqual(1593, size)
602
self.assertEqual(1199, size)
599
604
def test_2_levels_key_count_2_2(self):
600
605
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
615
620
def test_validate_one_page(self):
616
621
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
622
nodes = self.make_nodes(45, 2, 2)
624
builder.add_node(*node)
625
transport = get_transport('trace+' + self.get_url(''))
626
size = transport.put_file('index', builder.finish())
627
index = btree_index.BTreeGraphIndex(transport, 'index', size)
628
del transport._activity[:]
629
self.assertEqual([], transport._activity)
631
# The entire index should have been read linearly.
632
self.assertEqual([('readv', 'index', [(0, size)], False, None)],
634
self.assertEqual(1514, size)
636
def test_validate_two_pages(self):
637
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
617
638
nodes = self.make_nodes(80, 2, 2)
618
639
for node in nodes:
619
640
builder.add_node(*node)
620
641
transport = get_transport('trace+' + self.get_url(''))
621
642
size = transport.put_file('index', builder.finish())
622
index = btree_index.BTreeGraphIndex(transport, 'index', size)
623
del transport._activity[:]
624
self.assertEqual([], transport._activity)
626
# The entire index should have been read linearly.
627
self.assertEqual([('readv', 'index', [(0, size)], False, None)],
629
self.assertEqual(3846, size)
631
def test_validate_two_pages(self):
632
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
633
nodes = self.make_nodes(160, 2, 2)
635
builder.add_node(*node)
636
transport = get_transport('trace+' + self.get_url(''))
637
size = transport.put_file('index', builder.finish())
638
643
# Root page, 2 leaf pages
639
self.assertEqual(10242, size)
644
self.assertEqual(9339, size)
640
645
index = btree_index.BTreeGraphIndex(transport, 'index', size)
641
646
del transport._activity[:]
642
647
self.assertEqual([], transport._activity)
644
649
# The entire index should have been read linearly.
645
650
self.assertEqual([('readv', 'index', [(0, 4096)], False, None),
646
('readv', 'index', [(4096, 4096), (8192, 2050)], False, None)],
651
('readv', 'index', [(4096, 4096), (8192, 1147)], False, None)],
647
652
transport._activity)
648
653
# XXX: TODO: write some badly-ordered nodes, and some pointers-to-wrong
649
654
# node and make validate find them.
686
691
def test_iter_all_entries_reads(self):
687
692
# iterating all entries reads the header, then does a linear
689
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2,
691
# 100k nodes is enough to create a three-level index, which shows that
692
# we skip the internal nodes and just read the leaf nodes.
693
nodes = self.make_nodes(50000, 2, 2)
694
self.shrink_page_size()
695
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
696
# 20k nodes is enough to create a two internal nodes on the second
697
# level, with a 2K page size
698
nodes = self.make_nodes(10000, 2, 2)
694
699
for node in nodes:
695
700
builder.add_node(*node)
696
701
transport = get_transport('trace+' + self.get_url(''))
697
702
size = transport.put_file('index', builder.finish())
698
self.assertEqual(2026722, size, 'number of expected bytes in the'
703
self.assertEqual(1303220, size, 'number of expected bytes in the'
699
704
' output changed')
705
page_size = btree_index._PAGE_SIZE
701
707
index = btree_index.BTreeGraphIndex(transport, 'index', size)
702
708
del transport._activity[:]
703
709
self.assertEqual([], transport._activity)
704
found_nodes = list(index.iter_all_entries())
710
found_nodes = self.time(list, index.iter_all_entries())
706
712
for node in found_nodes:
707
713
self.assertTrue(node[0] is index)
709
715
self.assertEqual(3, len(index._row_lengths),
710
716
"Not enough rows: %r" % index._row_lengths)
711
717
# Should be as long as the nodes we supplied
712
self.assertEqual(100000, len(found_nodes))
718
self.assertEqual(20000, len(found_nodes))
713
719
# Should have the same content
714
720
self.assertEqual(set(nodes), set(bare_nodes))
715
721
# Should have done linear scan IO up the index, ignoring
717
723
# The entire index should have been read
718
724
total_pages = sum(index._row_lengths)
719
725
self.assertEqual(total_pages, index._row_offsets[-1])
720
self.assertEqual(2026722, size)
726
self.assertEqual(1303220, size)
721
727
# The start of the leaves
722
first_byte = index._row_offsets[-2] * btree_index._PAGE_SIZE
728
first_byte = index._row_offsets[-2] * page_size
723
729
readv_request = []
724
for offset in range(first_byte, size, 4096):
725
readv_request.append((offset, 4096))
730
for offset in range(first_byte, size, page_size):
731
readv_request.append((offset, page_size))
726
732
# The last page is truncated
727
readv_request[-1] = (readv_request[-1][0], 3298)
728
expected = [('readv', 'index', [(0, 4096)], False, None),
733
readv_request[-1] = (readv_request[-1][0], 1303220 % page_size)
734
expected = [('readv', 'index', [(0, page_size)], False, None),
729
735
('readv', 'index', readv_request, False, None)]
730
736
if expected != transport._activity:
731
737
self.assertEqualDiff(pprint.pformat(expected),
765
771
self.assertEqual(nodes[30], bare_nodes[0])
766
772
# Should have read the root node, then one leaf page:
767
773
self.assertEqual([('readv', 'index', [(0, 4096)], False, None),
768
('readv', 'index', [(4096, 4096), ], False, None)],
774
('readv', 'index', [(8192, 4096), ], False, None)],
769
775
transport._activity)
771
777
def test_iter_key_prefix_1_element_key_None(self):