1
# Copyright (C) 2008-2011 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
"""Tests for btree indices."""
32
from bzrlib.tests import (
33
TestCaseWithTransport,
36
from bzrlib.tests import (
41
load_tests = scenarios.load_tests_apply_scenarios
44
def btreeparser_scenarios():
45
import bzrlib._btree_serializer_py as py_module
46
scenarios = [('python', {'parse_btree': py_module})]
47
if compiled_btreeparser_feature.available():
48
scenarios.append(('C',
49
{'parse_btree': compiled_btreeparser_feature.module}))
53
compiled_btreeparser_feature = features.ModuleAvailableFeature(
54
'bzrlib._btree_serializer_pyx')
57
class BTreeTestCase(TestCaseWithTransport):
58
# test names here are suffixed by the key length and reference list count
62
TestCaseWithTransport.setUp(self)
63
self.overrideAttr(btree_index, '_RESERVED_HEADER_BYTES', 100)
65
def make_nodes(self, count, key_elements, reference_lists):
66
"""Generate count*key_elements sample nodes."""
68
for prefix_pos in range(key_elements):
70
prefix = (str(prefix_pos) * 40,)
73
for pos in xrange(count):
74
# TODO: This creates odd keys. When count == 100,000, it
75
# creates a 240 byte key
76
key = prefix + (str(pos) * 40,)
77
value = "value:%s" % pos
79
# generate some references
81
for list_pos in range(reference_lists):
82
# as many keys in each list as its index + the key depth
83
# mod 2 - this generates both 0 length lists and
84
# ones slightly longer than the number of lists.
85
# It also ensures we have non homogeneous lists.
87
for ref_pos in range(list_pos + pos % 2):
89
# refer to a nearby key
90
refs[-1].append(prefix + ("ref" + str(pos - 1) * 40,))
92
# serial of this ref in the ref list
93
refs[-1].append(prefix + ("ref" + str(ref_pos) * 40,))
94
refs[-1] = tuple(refs[-1])
98
keys.append((key, value, refs))
101
def shrink_page_size(self):
102
"""Shrink the default page size so that less fits in a page."""
103
self.overrideAttr(btree_index, '_PAGE_SIZE')
104
btree_index._PAGE_SIZE = 2048
106
def assertEqualsApproxCompressed(self, expected, actual, slop=6):
107
"""Check a count of compressed bytes is approximately as expected
109
Relying on compressed length being stable even with fixed inputs is
110
slightly bogus, but zlib is stable enough that this mostly works.
112
if not expected - slop < actual < expected + slop:
113
self.fail("Expected around %d bytes compressed but got %d" %
117
class TestBTreeBuilder(BTreeTestCase):
119
def test_clear_cache(self):
120
builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
121
# This is a no-op, but we need the api to be consistent with other
122
# BTreeGraphIndex apis.
123
builder.clear_cache()
125
def test_empty_1_0(self):
126
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
127
# NamedTemporaryFile dies on builder.finish().read(). weird.
128
temp_file = builder.finish()
129
content = temp_file.read()
132
"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=0\n"
136
def test_empty_2_1(self):
137
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=1)
138
# NamedTemporaryFile dies on builder.finish().read(). weird.
139
temp_file = builder.finish()
140
content = temp_file.read()
143
"B+Tree Graph Index 2\nnode_ref_lists=1\nkey_elements=2\nlen=0\n"
147
def test_root_leaf_1_0(self):
148
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
149
nodes = self.make_nodes(5, 1, 0)
151
builder.add_node(*node)
152
# NamedTemporaryFile dies on builder.finish().read(). weird.
153
temp_file = builder.finish()
154
content = temp_file.read()
156
self.assertEqual(131, len(content))
158
"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=5\n"
161
node_content = content[73:]
162
node_bytes = zlib.decompress(node_content)
163
expected_node = ("type=leaf\n"
164
"0000000000000000000000000000000000000000\x00\x00value:0\n"
165
"1111111111111111111111111111111111111111\x00\x00value:1\n"
166
"2222222222222222222222222222222222222222\x00\x00value:2\n"
167
"3333333333333333333333333333333333333333\x00\x00value:3\n"
168
"4444444444444444444444444444444444444444\x00\x00value:4\n")
169
self.assertEqual(expected_node, node_bytes)
171
def test_root_leaf_2_2(self):
172
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
173
nodes = self.make_nodes(5, 2, 2)
175
builder.add_node(*node)
176
# NamedTemporaryFile dies on builder.finish().read(). weird.
177
temp_file = builder.finish()
178
content = temp_file.read()
180
self.assertEqual(238, len(content))
182
"B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=10\n"
185
node_content = content[74:]
186
node_bytes = zlib.decompress(node_content)
189
"0000000000000000000000000000000000000000\x000000000000000000000000000000000000000000\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:0\n"
190
"0000000000000000000000000000000000000000\x001111111111111111111111111111111111111111\x000000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\r0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:1\n"
191
"0000000000000000000000000000000000000000\x002222222222222222222222222222222222222222\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:2\n"
192
"0000000000000000000000000000000000000000\x003333333333333333333333333333333333333333\x000000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\t0000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\r0000000000000000000000000000000000000000\x00ref2222222222222222222222222222222222222222\x00value:3\n"
193
"0000000000000000000000000000000000000000\x004444444444444444444444444444444444444444\x00\t0000000000000000000000000000000000000000\x00ref0000000000000000000000000000000000000000\x00value:4\n"
194
"1111111111111111111111111111111111111111\x000000000000000000000000000000000000000000\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:0\n"
195
"1111111111111111111111111111111111111111\x001111111111111111111111111111111111111111\x001111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\r1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:1\n"
196
"1111111111111111111111111111111111111111\x002222222222222222222222222222222222222222\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:2\n"
197
"1111111111111111111111111111111111111111\x003333333333333333333333333333333333333333\x001111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\t1111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\r1111111111111111111111111111111111111111\x00ref2222222222222222222222222222222222222222\x00value:3\n"
198
"1111111111111111111111111111111111111111\x004444444444444444444444444444444444444444\x00\t1111111111111111111111111111111111111111\x00ref0000000000000000000000000000000000000000\x00value:4\n"
201
self.assertEqual(expected_node, node_bytes)
203
def test_2_leaves_1_0(self):
204
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
205
nodes = self.make_nodes(400, 1, 0)
207
builder.add_node(*node)
208
# NamedTemporaryFile dies on builder.finish().read(). weird.
209
temp_file = builder.finish()
210
content = temp_file.read()
212
self.assertEqualsApproxCompressed(9283, len(content))
214
"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
217
root = content[77:4096]
218
leaf1 = content[4096:8192]
219
leaf2 = content[8192:]
220
root_bytes = zlib.decompress(root)
224
) + ("307" * 40) + "\n"
225
self.assertEqual(expected_root, root_bytes)
226
# We already know serialisation works for leaves, check key selection:
227
leaf1_bytes = zlib.decompress(leaf1)
228
sorted_node_keys = sorted(node[0] for node in nodes)
229
node = btree_index._LeafNode(leaf1_bytes, 1, 0)
230
self.assertEqual(231, len(node))
231
self.assertEqual(sorted_node_keys[:231], node.all_keys())
232
leaf2_bytes = zlib.decompress(leaf2)
233
node = btree_index._LeafNode(leaf2_bytes, 1, 0)
234
self.assertEqual(400 - 231, len(node))
235
self.assertEqual(sorted_node_keys[231:], node.all_keys())
237
def test_last_page_rounded_1_layer(self):
238
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
239
nodes = self.make_nodes(10, 1, 0)
241
builder.add_node(*node)
242
# NamedTemporaryFile dies on builder.finish().read(). weird.
243
temp_file = builder.finish()
244
content = temp_file.read()
246
self.assertEqualsApproxCompressed(155, len(content))
248
"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=10\n"
251
# Check thelast page is well formed
253
leaf2_bytes = zlib.decompress(leaf2)
254
node = btree_index._LeafNode(leaf2_bytes, 1, 0)
255
self.assertEqual(10, len(node))
256
sorted_node_keys = sorted(node[0] for node in nodes)
257
self.assertEqual(sorted_node_keys, node.all_keys())
259
def test_last_page_not_rounded_2_layer(self):
260
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
261
nodes = self.make_nodes(400, 1, 0)
263
builder.add_node(*node)
264
# NamedTemporaryFile dies on builder.finish().read(). weird.
265
temp_file = builder.finish()
266
content = temp_file.read()
268
self.assertEqualsApproxCompressed(9283, len(content))
270
"B+Tree Graph Index 2\nnode_ref_lists=0\nkey_elements=1\nlen=400\n"
273
# Check the last page is well formed
274
leaf2 = content[8192:]
275
leaf2_bytes = zlib.decompress(leaf2)
276
node = btree_index._LeafNode(leaf2_bytes, 1, 0)
277
self.assertEqual(400 - 231, len(node))
278
sorted_node_keys = sorted(node[0] for node in nodes)
279
self.assertEqual(sorted_node_keys[231:], node.all_keys())
281
def test_three_level_tree_details(self):
282
# The left most pointer in the second internal node in a row should
283
# pointer to the second node that the internal node is for, _not_
284
# the first, otherwise the first node overlaps with the last node of
285
# the prior internal node on that row.
286
self.shrink_page_size()
287
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
288
# 40K nodes is enough to create a two internal nodes on the second
289
# level, with a 2K page size
290
nodes = self.make_nodes(20000, 2, 2)
293
builder.add_node(*node)
294
t = transport.get_transport_from_url('trace+' + self.get_url(''))
295
size = t.put_file('index', self.time(builder.finish))
297
index = btree_index.BTreeGraphIndex(t, 'index', size)
298
# Seed the metadata, we're using internal calls now.
300
self.assertEqual(3, len(index._row_lengths),
301
"Not enough rows: %r" % index._row_lengths)
302
self.assertEqual(4, len(index._row_offsets))
303
self.assertEqual(sum(index._row_lengths), index._row_offsets[-1])
304
internal_nodes = index._get_internal_nodes([0, 1, 2])
305
root_node = internal_nodes[0]
306
internal_node1 = internal_nodes[1]
307
internal_node2 = internal_nodes[2]
308
# The left most node node2 points at should be one after the right most
309
# node pointed at by node1.
310
self.assertEqual(internal_node2.offset, 1 + len(internal_node1.keys))
311
# The left most key of the second node pointed at by internal_node2
312
# should be its first key. We can check this by looking for its first key
313
# in the second node it points at
314
pos = index._row_offsets[2] + internal_node2.offset + 1
315
leaf = index._get_leaf_nodes([pos])[pos]
316
self.assertTrue(internal_node2.keys[0] in leaf)
318
def test_2_leaves_2_2(self):
319
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
320
nodes = self.make_nodes(100, 2, 2)
322
builder.add_node(*node)
323
# NamedTemporaryFile dies on builder.finish().read(). weird.
324
temp_file = builder.finish()
325
content = temp_file.read()
327
self.assertEqualsApproxCompressed(12643, len(content))
329
"B+Tree Graph Index 2\nnode_ref_lists=2\nkey_elements=2\nlen=200\n"
332
root = content[77:4096]
333
leaf1 = content[4096:8192]
334
leaf2 = content[8192:12288]
335
leaf3 = content[12288:]
336
root_bytes = zlib.decompress(root)
340
+ ("0" * 40) + "\x00" + ("91" * 40) + "\n"
341
+ ("1" * 40) + "\x00" + ("81" * 40) + "\n"
343
self.assertEqual(expected_root, root_bytes)
344
# We assume the other leaf nodes have been written correctly - layering
347
def test_spill_index_stress_1_1(self):
348
builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
349
nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
350
builder.add_node(*nodes[0])
351
# Test the parts of the index that take up memory are doing so
353
self.assertEqual(1, len(builder._nodes))
354
self.assertIs(None, builder._nodes_by_key)
355
builder.add_node(*nodes[1])
356
self.assertEqual(0, len(builder._nodes))
357
self.assertIs(None, builder._nodes_by_key)
358
self.assertEqual(1, len(builder._backing_indices))
359
self.assertEqual(2, builder._backing_indices[0].key_count())
361
builder.add_node(*nodes[2])
362
self.assertEqual(1, len(builder._nodes))
363
self.assertIs(None, builder._nodes_by_key)
364
# And spills to a second backing index combing all
365
builder.add_node(*nodes[3])
366
self.assertEqual(0, len(builder._nodes))
367
self.assertIs(None, builder._nodes_by_key)
368
self.assertEqual(2, len(builder._backing_indices))
369
self.assertEqual(None, builder._backing_indices[0])
370
self.assertEqual(4, builder._backing_indices[1].key_count())
371
# The next spills to the 2-len slot
372
builder.add_node(*nodes[4])
373
builder.add_node(*nodes[5])
374
self.assertEqual(0, len(builder._nodes))
375
self.assertIs(None, builder._nodes_by_key)
376
self.assertEqual(2, len(builder._backing_indices))
377
self.assertEqual(2, builder._backing_indices[0].key_count())
378
self.assertEqual(4, builder._backing_indices[1].key_count())
379
# Next spill combines
380
builder.add_node(*nodes[6])
381
builder.add_node(*nodes[7])
382
self.assertEqual(3, len(builder._backing_indices))
383
self.assertEqual(None, builder._backing_indices[0])
384
self.assertEqual(None, builder._backing_indices[1])
385
self.assertEqual(8, builder._backing_indices[2].key_count())
386
# And so forth - counting up in binary.
387
builder.add_node(*nodes[8])
388
builder.add_node(*nodes[9])
389
self.assertEqual(3, len(builder._backing_indices))
390
self.assertEqual(2, builder._backing_indices[0].key_count())
391
self.assertEqual(None, builder._backing_indices[1])
392
self.assertEqual(8, builder._backing_indices[2].key_count())
393
builder.add_node(*nodes[10])
394
builder.add_node(*nodes[11])
395
self.assertEqual(3, len(builder._backing_indices))
396
self.assertEqual(None, builder._backing_indices[0])
397
self.assertEqual(4, builder._backing_indices[1].key_count())
398
self.assertEqual(8, builder._backing_indices[2].key_count())
399
builder.add_node(*nodes[12])
400
# Test that memory and disk are both used for query methods; and that
401
# None is skipped over happily.
402
self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
403
list(builder.iter_all_entries()))
404
# Two nodes - one memory one disk
405
self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
406
set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
407
self.assertEqual(13, builder.key_count())
408
self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
409
set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
410
builder.add_node(*nodes[13])
411
self.assertEqual(3, len(builder._backing_indices))
412
self.assertEqual(2, builder._backing_indices[0].key_count())
413
self.assertEqual(4, builder._backing_indices[1].key_count())
414
self.assertEqual(8, builder._backing_indices[2].key_count())
415
builder.add_node(*nodes[14])
416
builder.add_node(*nodes[15])
417
self.assertEqual(4, len(builder._backing_indices))
418
self.assertEqual(None, builder._backing_indices[0])
419
self.assertEqual(None, builder._backing_indices[1])
420
self.assertEqual(None, builder._backing_indices[2])
421
self.assertEqual(16, builder._backing_indices[3].key_count())
422
# Now finish, and check we got a correctly ordered tree
423
t = self.get_transport('')
424
size = t.put_file('index', builder.finish())
425
index = btree_index.BTreeGraphIndex(t, 'index', size)
426
nodes = list(index.iter_all_entries())
427
self.assertEqual(sorted(nodes), nodes)
428
self.assertEqual(16, len(nodes))
430
def test_spill_index_stress_1_1_no_combine(self):
431
builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
432
builder.set_optimize(for_size=False, combine_backing_indices=False)
433
nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
434
builder.add_node(*nodes[0])
435
# Test the parts of the index that take up memory are doing so
437
self.assertEqual(1, len(builder._nodes))
438
self.assertIs(None, builder._nodes_by_key)
439
builder.add_node(*nodes[1])
440
self.assertEqual(0, len(builder._nodes))
441
self.assertIs(None, builder._nodes_by_key)
442
self.assertEqual(1, len(builder._backing_indices))
443
self.assertEqual(2, builder._backing_indices[0].key_count())
445
builder.add_node(*nodes[2])
446
self.assertEqual(1, len(builder._nodes))
447
self.assertIs(None, builder._nodes_by_key)
448
# And spills to a second backing index but doesn't combine
449
builder.add_node(*nodes[3])
450
self.assertEqual(0, len(builder._nodes))
451
self.assertIs(None, builder._nodes_by_key)
452
self.assertEqual(2, len(builder._backing_indices))
453
for backing_index in builder._backing_indices:
454
self.assertEqual(2, backing_index.key_count())
455
# The next spills to the 3rd slot
456
builder.add_node(*nodes[4])
457
builder.add_node(*nodes[5])
458
self.assertEqual(0, len(builder._nodes))
459
self.assertIs(None, builder._nodes_by_key)
460
self.assertEqual(3, len(builder._backing_indices))
461
for backing_index in builder._backing_indices:
462
self.assertEqual(2, backing_index.key_count())
463
# Now spill a few more, and check that we don't combine
464
builder.add_node(*nodes[6])
465
builder.add_node(*nodes[7])
466
builder.add_node(*nodes[8])
467
builder.add_node(*nodes[9])
468
builder.add_node(*nodes[10])
469
builder.add_node(*nodes[11])
470
builder.add_node(*nodes[12])
471
self.assertEqual(6, len(builder._backing_indices))
472
for backing_index in builder._backing_indices:
473
self.assertEqual(2, backing_index.key_count())
474
# Test that memory and disk are both used for query methods; and that
475
# None is skipped over happily.
476
self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
477
list(builder.iter_all_entries()))
478
# Two nodes - one memory one disk
479
self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
480
set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
481
self.assertEqual(13, builder.key_count())
482
self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
483
set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
484
builder.add_node(*nodes[13])
485
builder.add_node(*nodes[14])
486
builder.add_node(*nodes[15])
487
self.assertEqual(8, len(builder._backing_indices))
488
for backing_index in builder._backing_indices:
489
self.assertEqual(2, backing_index.key_count())
490
# Now finish, and check we got a correctly ordered tree
491
transport = self.get_transport('')
492
size = transport.put_file('index', builder.finish())
493
index = btree_index.BTreeGraphIndex(transport, 'index', size)
494
nodes = list(index.iter_all_entries())
495
self.assertEqual(sorted(nodes), nodes)
496
self.assertEqual(16, len(nodes))
498
def test_set_optimize(self):
499
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
500
builder.set_optimize(for_size=True)
501
self.assertTrue(builder._optimize_for_size)
502
builder.set_optimize(for_size=False)
503
self.assertFalse(builder._optimize_for_size)
504
# test that we can set combine_backing_indices without effecting
507
builder._optimize_for_size = obj
508
builder.set_optimize(combine_backing_indices=False)
509
self.assertFalse(builder._combine_backing_indices)
510
self.assertIs(obj, builder._optimize_for_size)
511
builder.set_optimize(combine_backing_indices=True)
512
self.assertTrue(builder._combine_backing_indices)
513
self.assertIs(obj, builder._optimize_for_size)
515
def test_spill_index_stress_2_2(self):
516
# test that references and longer keys don't confuse things.
517
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2,
519
nodes = self.make_nodes(16, 2, 2)
520
builder.add_node(*nodes[0])
521
# Test the parts of the index that take up memory are doing so
523
self.assertEqual(1, len(builder._nodes))
524
self.assertIs(None, builder._nodes_by_key)
525
builder.add_node(*nodes[1])
526
self.assertEqual(0, len(builder._nodes))
527
self.assertIs(None, builder._nodes_by_key)
528
self.assertEqual(1, len(builder._backing_indices))
529
self.assertEqual(2, builder._backing_indices[0].key_count())
531
old = dict(builder._get_nodes_by_key()) #Build up the nodes by key dict
532
builder.add_node(*nodes[2])
533
self.assertEqual(1, len(builder._nodes))
534
self.assertIsNot(None, builder._nodes_by_key)
535
self.assertNotEqual({}, builder._nodes_by_key)
536
# We should have a new entry
537
self.assertNotEqual(old, builder._nodes_by_key)
538
# And spills to a second backing index combing all
539
builder.add_node(*nodes[3])
540
self.assertEqual(0, len(builder._nodes))
541
self.assertIs(None, builder._nodes_by_key)
542
self.assertEqual(2, len(builder._backing_indices))
543
self.assertEqual(None, builder._backing_indices[0])
544
self.assertEqual(4, builder._backing_indices[1].key_count())
545
# The next spills to the 2-len slot
546
builder.add_node(*nodes[4])
547
builder.add_node(*nodes[5])
548
self.assertEqual(0, len(builder._nodes))
549
self.assertIs(None, builder._nodes_by_key)
550
self.assertEqual(2, len(builder._backing_indices))
551
self.assertEqual(2, builder._backing_indices[0].key_count())
552
self.assertEqual(4, builder._backing_indices[1].key_count())
553
# Next spill combines
554
builder.add_node(*nodes[6])
555
builder.add_node(*nodes[7])
556
self.assertEqual(3, len(builder._backing_indices))
557
self.assertEqual(None, builder._backing_indices[0])
558
self.assertEqual(None, builder._backing_indices[1])
559
self.assertEqual(8, builder._backing_indices[2].key_count())
560
# And so forth - counting up in binary.
561
builder.add_node(*nodes[8])
562
builder.add_node(*nodes[9])
563
self.assertEqual(3, len(builder._backing_indices))
564
self.assertEqual(2, builder._backing_indices[0].key_count())
565
self.assertEqual(None, builder._backing_indices[1])
566
self.assertEqual(8, builder._backing_indices[2].key_count())
567
builder.add_node(*nodes[10])
568
builder.add_node(*nodes[11])
569
self.assertEqual(3, len(builder._backing_indices))
570
self.assertEqual(None, builder._backing_indices[0])
571
self.assertEqual(4, builder._backing_indices[1].key_count())
572
self.assertEqual(8, builder._backing_indices[2].key_count())
573
builder.add_node(*nodes[12])
574
# Test that memory and disk are both used for query methods; and that
575
# None is skipped over happily.
576
self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
577
list(builder.iter_all_entries()))
578
# Two nodes - one memory one disk
579
self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
580
set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
581
self.assertEqual(13, builder.key_count())
582
self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
583
set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
584
builder.add_node(*nodes[13])
585
self.assertEqual(3, len(builder._backing_indices))
586
self.assertEqual(2, builder._backing_indices[0].key_count())
587
self.assertEqual(4, builder._backing_indices[1].key_count())
588
self.assertEqual(8, builder._backing_indices[2].key_count())
589
builder.add_node(*nodes[14])
590
builder.add_node(*nodes[15])
591
self.assertEqual(4, len(builder._backing_indices))
592
self.assertEqual(None, builder._backing_indices[0])
593
self.assertEqual(None, builder._backing_indices[1])
594
self.assertEqual(None, builder._backing_indices[2])
595
self.assertEqual(16, builder._backing_indices[3].key_count())
596
# Now finish, and check we got a correctly ordered tree
597
transport = self.get_transport('')
598
size = transport.put_file('index', builder.finish())
599
index = btree_index.BTreeGraphIndex(transport, 'index', size)
600
nodes = list(index.iter_all_entries())
601
self.assertEqual(sorted(nodes), nodes)
602
self.assertEqual(16, len(nodes))
604
def test_spill_index_duplicate_key_caught_on_finish(self):
605
builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
606
nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
607
builder.add_node(*nodes[0])
608
builder.add_node(*nodes[1])
609
builder.add_node(*nodes[0])
610
self.assertRaises(errors.BadIndexDuplicateKey, builder.finish)
613
class TestBTreeIndex(BTreeTestCase):
615
def make_index(self, ref_lists=0, key_elements=1, nodes=[]):
616
builder = btree_index.BTreeBuilder(reference_lists=ref_lists,
617
key_elements=key_elements)
618
for key, value, references in nodes:
619
builder.add_node(key, value, references)
620
stream = builder.finish()
621
trans = transport.get_transport_from_url('trace+' + self.get_url())
622
size = trans.put_file('index', stream)
623
return btree_index.BTreeGraphIndex(trans, 'index', size)
625
def make_index_with_offset(self, ref_lists=1, key_elements=1, nodes=[],
627
builder = btree_index.BTreeBuilder(key_elements=key_elements,
628
reference_lists=ref_lists)
629
builder.add_nodes(nodes)
630
transport = self.get_transport('')
631
# NamedTemporaryFile dies on builder.finish().read(). weird.
632
temp_file = builder.finish()
633
content = temp_file.read()
636
transport.put_bytes('index', (' '*offset)+content)
637
return btree_index.BTreeGraphIndex(transport, 'index', size=size,
640
def test_clear_cache(self):
641
nodes = self.make_nodes(160, 2, 2)
642
index = self.make_index(ref_lists=2, key_elements=2, nodes=nodes)
643
self.assertEqual(1, len(list(index.iter_entries([nodes[30][0]]))))
644
self.assertEqual([1, 4], index._row_lengths)
645
self.assertIsNot(None, index._root_node)
646
internal_node_pre_clear = index._internal_node_cache.keys()
647
self.assertTrue(len(index._leaf_node_cache) > 0)
649
# We don't touch _root_node or _internal_node_cache, both should be
650
# small, and can save a round trip or two
651
self.assertIsNot(None, index._root_node)
652
# NOTE: We don't want to affect the _internal_node_cache, as we expect
653
# it will be small, and if we ever do touch this index again, it
654
# will save round-trips. This assertion isn't very strong,
655
# becuase without a 3-level index, we don't have any internal
657
self.assertEqual(internal_node_pre_clear,
658
index._internal_node_cache.keys())
659
self.assertEqual(0, len(index._leaf_node_cache))
661
def test_trivial_constructor(self):
662
t = transport.get_transport_from_url('trace+' + self.get_url(''))
663
index = btree_index.BTreeGraphIndex(t, 'index', None)
664
# Checks the page size at load, but that isn't logged yet.
665
self.assertEqual([], t._activity)
667
def test_with_size_constructor(self):
668
t = transport.get_transport_from_url('trace+' + self.get_url(''))
669
index = btree_index.BTreeGraphIndex(t, 'index', 1)
670
# Checks the page size at load, but that isn't logged yet.
671
self.assertEqual([], t._activity)
673
def test_empty_key_count_no_size(self):
674
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
675
t = transport.get_transport_from_url('trace+' + self.get_url(''))
676
t.put_file('index', builder.finish())
677
index = btree_index.BTreeGraphIndex(t, 'index', None)
679
self.assertEqual([], t._activity)
680
self.assertEqual(0, index.key_count())
681
# The entire index should have been requested (as we generally have the
682
# size available, and doing many small readvs is inappropriate).
683
# We can't tell how much was actually read here, but - check the code.
684
self.assertEqual([('get', 'index')], t._activity)
686
def test_empty_key_count(self):
687
builder = btree_index.BTreeBuilder(key_elements=1, reference_lists=0)
688
t = transport.get_transport_from_url('trace+' + self.get_url(''))
689
size = t.put_file('index', builder.finish())
690
self.assertEqual(72, size)
691
index = btree_index.BTreeGraphIndex(t, 'index', size)
693
self.assertEqual([], t._activity)
694
self.assertEqual(0, index.key_count())
695
# The entire index should have been read, as 4K > size
696
self.assertEqual([('readv', 'index', [(0, 72)], False, None)],
699
def test_non_empty_key_count_2_2(self):
700
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
701
nodes = self.make_nodes(35, 2, 2)
703
builder.add_node(*node)
704
t = transport.get_transport_from_url('trace+' + self.get_url(''))
705
size = t.put_file('index', builder.finish())
706
index = btree_index.BTreeGraphIndex(t, 'index', size)
708
self.assertEqual([], t._activity)
709
self.assertEqual(70, index.key_count())
710
# The entire index should have been read, as it is one page long.
711
self.assertEqual([('readv', 'index', [(0, size)], False, None)],
713
self.assertEqualsApproxCompressed(1173, size)
715
def test_with_offset_no_size(self):
716
index = self.make_index_with_offset(key_elements=1, ref_lists=1,
718
nodes=self.make_nodes(200, 1, 1))
719
index._size = None # throw away the size info
720
self.assertEqual(200, index.key_count())
722
def test_with_small_offset(self):
723
index = self.make_index_with_offset(key_elements=1, ref_lists=1,
725
nodes=self.make_nodes(200, 1, 1))
726
self.assertEqual(200, index.key_count())
728
def test_with_large_offset(self):
729
index = self.make_index_with_offset(key_elements=1, ref_lists=1,
731
nodes=self.make_nodes(200, 1, 1))
732
self.assertEqual(200, index.key_count())
734
def test__read_nodes_no_size_one_page_reads_once(self):
735
self.make_index(nodes=[(('key',), 'value', ())])
736
trans = transport.get_transport_from_url('trace+' + self.get_url())
737
index = btree_index.BTreeGraphIndex(trans, 'index', None)
738
del trans._activity[:]
739
nodes = dict(index._read_nodes([0]))
740
self.assertEqual([0], nodes.keys())
742
self.assertEqual([('key',)], node.all_keys())
743
self.assertEqual([('get', 'index')], trans._activity)
745
def test__read_nodes_no_size_multiple_pages(self):
746
index = self.make_index(2, 2, nodes=self.make_nodes(160, 2, 2))
748
num_pages = index._row_offsets[-1]
749
# Reopen with a traced transport and no size
750
trans = transport.get_transport_from_url('trace+' + self.get_url())
751
index = btree_index.BTreeGraphIndex(trans, 'index', None)
752
del trans._activity[:]
753
nodes = dict(index._read_nodes([0]))
754
self.assertEqual(range(num_pages), nodes.keys())
756
def test_2_levels_key_count_2_2(self):
757
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
758
nodes = self.make_nodes(160, 2, 2)
760
builder.add_node(*node)
761
t = transport.get_transport_from_url('trace+' + self.get_url(''))
762
size = t.put_file('index', builder.finish())
763
self.assertEqualsApproxCompressed(17692, size)
764
index = btree_index.BTreeGraphIndex(t, 'index', size)
766
self.assertEqual([], t._activity)
767
self.assertEqual(320, index.key_count())
768
# The entire index should not have been read.
769
self.assertEqual([('readv', 'index', [(0, 4096)], False, None)],
772
def test_validate_one_page(self):
773
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
774
nodes = self.make_nodes(45, 2, 2)
776
builder.add_node(*node)
777
t = transport.get_transport_from_url('trace+' + self.get_url(''))
778
size = t.put_file('index', builder.finish())
779
index = btree_index.BTreeGraphIndex(t, 'index', size)
781
self.assertEqual([], t._activity)
783
# The entire index should have been read linearly.
784
self.assertEqual([('readv', 'index', [(0, size)], False, None)],
786
self.assertEqualsApproxCompressed(1488, size)
788
def test_validate_two_pages(self):
789
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
790
nodes = self.make_nodes(80, 2, 2)
792
builder.add_node(*node)
793
t = transport.get_transport_from_url('trace+' + self.get_url(''))
794
size = t.put_file('index', builder.finish())
795
# Root page, 2 leaf pages
796
self.assertEqualsApproxCompressed(9339, size)
797
index = btree_index.BTreeGraphIndex(t, 'index', size)
799
self.assertEqual([], t._activity)
801
rem = size - 8192 # Number of remaining bytes after second block
802
# The entire index should have been read linearly.
804
[('readv', 'index', [(0, 4096)], False, None),
805
('readv', 'index', [(4096, 4096), (8192, rem)], False, None)],
807
# XXX: TODO: write some badly-ordered nodes, and some pointers-to-wrong
808
# node and make validate find them.
810
def test_eq_ne(self):
811
# two indices are equal when constructed with the same parameters:
812
t1 = transport.get_transport_from_url('trace+' + self.get_url(''))
813
t2 = self.get_transport()
815
btree_index.BTreeGraphIndex(t1, 'index', None) ==
816
btree_index.BTreeGraphIndex(t1, 'index', None))
818
btree_index.BTreeGraphIndex(t1, 'index', 20) ==
819
btree_index.BTreeGraphIndex(t1, 'index', 20))
821
btree_index.BTreeGraphIndex(t1, 'index', 20) ==
822
btree_index.BTreeGraphIndex(t2, 'index', 20))
824
btree_index.BTreeGraphIndex(t1, 'inde1', 20) ==
825
btree_index.BTreeGraphIndex(t1, 'inde2', 20))
827
btree_index.BTreeGraphIndex(t1, 'index', 10) ==
828
btree_index.BTreeGraphIndex(t1, 'index', 20))
830
btree_index.BTreeGraphIndex(t1, 'index', None) !=
831
btree_index.BTreeGraphIndex(t1, 'index', None))
833
btree_index.BTreeGraphIndex(t1, 'index', 20) !=
834
btree_index.BTreeGraphIndex(t1, 'index', 20))
836
btree_index.BTreeGraphIndex(t1, 'index', 20) !=
837
btree_index.BTreeGraphIndex(t2, 'index', 20))
839
btree_index.BTreeGraphIndex(t1, 'inde1', 20) !=
840
btree_index.BTreeGraphIndex(t1, 'inde2', 20))
842
btree_index.BTreeGraphIndex(t1, 'index', 10) !=
843
btree_index.BTreeGraphIndex(t1, 'index', 20))
845
def test_key_too_big(self):
846
# the size that matters here is the _compressed_ size of the key, so we can't
847
# do a simple character repeat.
848
bigKey = ''.join(map(repr, xrange(btree_index._PAGE_SIZE)))
849
self.assertRaises(errors.BadIndexKey,
851
nodes=[((bigKey,), 'value', ())])
853
def test_iter_all_only_root_no_size(self):
854
self.make_index(nodes=[(('key',), 'value', ())])
855
t = transport.get_transport_from_url('trace+' + self.get_url(''))
856
index = btree_index.BTreeGraphIndex(t, 'index', None)
858
self.assertEqual([(('key',), 'value')],
859
[x[1:] for x in index.iter_all_entries()])
860
self.assertEqual([('get', 'index')], t._activity)
862
def test_iter_all_entries_reads(self):
863
# iterating all entries reads the header, then does a linear
865
self.shrink_page_size()
866
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
867
# 20k nodes is enough to create a two internal nodes on the second
868
# level, with a 2K page size
869
nodes = self.make_nodes(10000, 2, 2)
871
builder.add_node(*node)
872
t = transport.get_transport_from_url('trace+' + self.get_url(''))
873
size = t.put_file('index', builder.finish())
874
page_size = btree_index._PAGE_SIZE
876
index = btree_index.BTreeGraphIndex(t, 'index', size)
878
self.assertEqual([], t._activity)
879
found_nodes = self.time(list, index.iter_all_entries())
881
for node in found_nodes:
882
self.assertTrue(node[0] is index)
883
bare_nodes.append(node[1:])
884
self.assertEqual(3, len(index._row_lengths),
885
"Not enough rows: %r" % index._row_lengths)
886
# Should be as long as the nodes we supplied
887
self.assertEqual(20000, len(found_nodes))
888
# Should have the same content
889
self.assertEqual(set(nodes), set(bare_nodes))
890
# Should have done linear scan IO up the index, ignoring
891
# the internal nodes:
892
# The entire index should have been read
893
total_pages = sum(index._row_lengths)
894
self.assertEqual(total_pages, index._row_offsets[-1])
895
self.assertEqualsApproxCompressed(1303220, size)
896
# The start of the leaves
897
first_byte = index._row_offsets[-2] * page_size
899
for offset in range(first_byte, size, page_size):
900
readv_request.append((offset, page_size))
901
# The last page is truncated
902
readv_request[-1] = (readv_request[-1][0], size % page_size)
903
expected = [('readv', 'index', [(0, page_size)], False, None),
904
('readv', 'index', readv_request, False, None)]
905
if expected != t._activity:
906
self.assertEqualDiff(pprint.pformat(expected),
907
pprint.pformat(t._activity))
909
def _test_iter_entries_references_resolved(self):
910
index = self.make_index(1, nodes=[
911
(('name', ), 'data', ([('ref', ), ('ref', )], )),
912
(('ref', ), 'refdata', ([], ))])
913
self.assertEqual(set([(index, ('name', ), 'data', ((('ref',),('ref',)),)),
914
(index, ('ref', ), 'refdata', ((), ))]),
915
set(index.iter_entries([('name',), ('ref',)])))
917
def test_iter_entries_references_2_refs_resolved(self):
918
# iterating some entries reads just the pages needed. For now, to
919
# get it working and start measuring, only 4K pages are read.
920
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
921
# 80 nodes is enough to create a two-level index.
922
nodes = self.make_nodes(160, 2, 2)
924
builder.add_node(*node)
925
t = transport.get_transport_from_url('trace+' + self.get_url(''))
926
size = t.put_file('index', builder.finish())
928
index = btree_index.BTreeGraphIndex(t, 'index', size)
930
self.assertEqual([], t._activity)
932
found_nodes = list(index.iter_entries([nodes[30][0]]))
934
for node in found_nodes:
935
self.assertTrue(node[0] is index)
936
bare_nodes.append(node[1:])
937
# Should be as long as the nodes we supplied
938
self.assertEqual(1, len(found_nodes))
939
# Should have the same content
940
self.assertEqual(nodes[30], bare_nodes[0])
941
# Should have read the root node, then one leaf page:
942
self.assertEqual([('readv', 'index', [(0, 4096)], False, None),
943
('readv', 'index', [(8192, 4096), ], False, None)],
946
def test_iter_key_prefix_1_element_key_None(self):
947
index = self.make_index()
948
self.assertRaises(errors.BadIndexKey, list,
949
index.iter_entries_prefix([(None, )]))
951
def test_iter_key_prefix_wrong_length(self):
952
index = self.make_index()
953
self.assertRaises(errors.BadIndexKey, list,
954
index.iter_entries_prefix([('foo', None)]))
955
index = self.make_index(key_elements=2)
956
self.assertRaises(errors.BadIndexKey, list,
957
index.iter_entries_prefix([('foo', )]))
958
self.assertRaises(errors.BadIndexKey, list,
959
index.iter_entries_prefix([('foo', None, None)]))
961
def test_iter_key_prefix_1_key_element_no_refs(self):
962
index = self.make_index( nodes=[
963
(('name', ), 'data', ()),
964
(('ref', ), 'refdata', ())])
965
self.assertEqual(set([(index, ('name', ), 'data'),
966
(index, ('ref', ), 'refdata')]),
967
set(index.iter_entries_prefix([('name', ), ('ref', )])))
969
def test_iter_key_prefix_1_key_element_refs(self):
970
index = self.make_index(1, nodes=[
971
(('name', ), 'data', ([('ref', )], )),
972
(('ref', ), 'refdata', ([], ))])
973
self.assertEqual(set([(index, ('name', ), 'data', ((('ref',),),)),
974
(index, ('ref', ), 'refdata', ((), ))]),
975
set(index.iter_entries_prefix([('name', ), ('ref', )])))
977
def test_iter_key_prefix_2_key_element_no_refs(self):
978
index = self.make_index(key_elements=2, nodes=[
979
(('name', 'fin1'), 'data', ()),
980
(('name', 'fin2'), 'beta', ()),
981
(('ref', 'erence'), 'refdata', ())])
982
self.assertEqual(set([(index, ('name', 'fin1'), 'data'),
983
(index, ('ref', 'erence'), 'refdata')]),
984
set(index.iter_entries_prefix([('name', 'fin1'), ('ref', 'erence')])))
985
self.assertEqual(set([(index, ('name', 'fin1'), 'data'),
986
(index, ('name', 'fin2'), 'beta')]),
987
set(index.iter_entries_prefix([('name', None)])))
989
def test_iter_key_prefix_2_key_element_refs(self):
990
index = self.make_index(1, key_elements=2, nodes=[
991
(('name', 'fin1'), 'data', ([('ref', 'erence')], )),
992
(('name', 'fin2'), 'beta', ([], )),
993
(('ref', 'erence'), 'refdata', ([], ))])
994
self.assertEqual(set([(index, ('name', 'fin1'), 'data', ((('ref', 'erence'),),)),
995
(index, ('ref', 'erence'), 'refdata', ((), ))]),
996
set(index.iter_entries_prefix([('name', 'fin1'), ('ref', 'erence')])))
997
self.assertEqual(set([(index, ('name', 'fin1'), 'data', ((('ref', 'erence'),),)),
998
(index, ('name', 'fin2'), 'beta', ((), ))]),
999
set(index.iter_entries_prefix([('name', None)])))
1001
# XXX: external_references tests are duplicated in test_index. We
1002
# probably should have per_graph_index tests...
1003
def test_external_references_no_refs(self):
1004
index = self.make_index(ref_lists=0, nodes=[])
1005
self.assertRaises(ValueError, index.external_references, 0)
1007
def test_external_references_no_results(self):
1008
index = self.make_index(ref_lists=1, nodes=[
1009
(('key',), 'value', ([],))])
1010
self.assertEqual(set(), index.external_references(0))
1012
def test_external_references_missing_ref(self):
1013
missing_key = ('missing',)
1014
index = self.make_index(ref_lists=1, nodes=[
1015
(('key',), 'value', ([missing_key],))])
1016
self.assertEqual(set([missing_key]), index.external_references(0))
1018
def test_external_references_multiple_ref_lists(self):
1019
missing_key = ('missing',)
1020
index = self.make_index(ref_lists=2, nodes=[
1021
(('key',), 'value', ([], [missing_key]))])
1022
self.assertEqual(set([]), index.external_references(0))
1023
self.assertEqual(set([missing_key]), index.external_references(1))
1025
def test_external_references_two_records(self):
1026
index = self.make_index(ref_lists=1, nodes=[
1027
(('key-1',), 'value', ([('key-2',)],)),
1028
(('key-2',), 'value', ([],)),
1030
self.assertEqual(set([]), index.external_references(0))
1032
def test__find_ancestors_one_page(self):
1035
index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1036
(key1, 'value', ([key2],)),
1037
(key2, 'value', ([],)),
1040
missing_keys = set()
1041
search_keys = index._find_ancestors([key1], 0, parent_map, missing_keys)
1042
self.assertEqual({key1: (key2,), key2: ()}, parent_map)
1043
self.assertEqual(set(), missing_keys)
1044
self.assertEqual(set(), search_keys)
1046
def test__find_ancestors_one_page_w_missing(self):
1050
index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1051
(key1, 'value', ([key2],)),
1052
(key2, 'value', ([],)),
1055
missing_keys = set()
1056
search_keys = index._find_ancestors([key2, key3], 0, parent_map,
1058
self.assertEqual({key2: ()}, parent_map)
1059
# we know that key3 is missing because we read the page that it would
1061
self.assertEqual(set([key3]), missing_keys)
1062
self.assertEqual(set(), search_keys)
1064
def test__find_ancestors_one_parent_missing(self):
1068
index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1069
(key1, 'value', ([key2],)),
1070
(key2, 'value', ([key3],)),
1073
missing_keys = set()
1074
search_keys = index._find_ancestors([key1], 0, parent_map,
1076
self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1077
self.assertEqual(set(), missing_keys)
1078
# all we know is that key3 wasn't present on the page we were reading
1079
# but if you look, the last key is key2 which comes before key3, so we
1080
# don't know whether key3 would land on this page or not.
1081
self.assertEqual(set([key3]), search_keys)
1082
search_keys = index._find_ancestors(search_keys, 0, parent_map,
1084
# passing it back in, we are sure it is 'missing'
1085
self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1086
self.assertEqual(set([key3]), missing_keys)
1087
self.assertEqual(set([]), search_keys)
1089
def test__find_ancestors_dont_search_known(self):
1093
index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1094
(key1, 'value', ([key2],)),
1095
(key2, 'value', ([key3],)),
1096
(key3, 'value', ([],)),
1098
# We already know about key2, so we won't try to search for key3
1099
parent_map = {key2: (key3,)}
1100
missing_keys = set()
1101
search_keys = index._find_ancestors([key1], 0, parent_map,
1103
self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1104
self.assertEqual(set(), missing_keys)
1105
self.assertEqual(set(), search_keys)
1107
def test__find_ancestors_multiple_pages(self):
1108
# We need to use enough keys that we actually cause a split
1109
start_time = 1249671539
1110
email = "joebob@example.com"
1114
for i in xrange(400):
1115
rev_id = '%s-%s-%s' % (email,
1116
osutils.compact_date(start_time + i),
1117
osutils.rand_chars(16))
1119
nodes.append((rev_key, 'value', ref_lists))
1120
# We have a ref 'list' of length 1, with a list of parents, with 1
1121
# parent which is a key
1122
ref_lists = ((rev_key,),)
1123
rev_keys.append(rev_key)
1124
index = self.make_index(ref_lists=1, key_elements=1, nodes=nodes)
1125
self.assertEqual(400, index.key_count())
1126
self.assertEqual(3, len(index._row_offsets))
1127
nodes = dict(index._read_nodes([1, 2]))
1130
min_l2_key = l2.min_key
1131
max_l1_key = l1.max_key
1132
self.assertTrue(max_l1_key < min_l2_key)
1133
parents_min_l2_key = l2[min_l2_key][1][0]
1134
self.assertEqual((l1.max_key,), parents_min_l2_key)
1135
# Now, whatever key we select that would fall on the second page,
1136
# should give us all the parents until the page break
1137
key_idx = rev_keys.index(min_l2_key)
1138
next_key = rev_keys[key_idx+1]
1139
# So now when we get the parent map, we should get the key we are
1140
# looking for, min_l2_key, and then a reference to go look for the
1141
# parent of that key
1143
missing_keys = set()
1144
search_keys = index._find_ancestors([next_key], 0, parent_map,
1146
self.assertEqual([min_l2_key, next_key], sorted(parent_map))
1147
self.assertEqual(set(), missing_keys)
1148
self.assertEqual(set([max_l1_key]), search_keys)
1150
search_keys = index._find_ancestors([max_l1_key], 0, parent_map,
1152
self.assertEqual(l1.all_keys(), sorted(parent_map))
1153
self.assertEqual(set(), missing_keys)
1154
self.assertEqual(set(), search_keys)
1156
def test__find_ancestors_empty_index(self):
1157
index = self.make_index(ref_lists=1, key_elements=1, nodes=[])
1159
missing_keys = set()
1160
search_keys = index._find_ancestors([('one',), ('two',)], 0, parent_map,
1162
self.assertEqual(set(), search_keys)
1163
self.assertEqual({}, parent_map)
1164
self.assertEqual(set([('one',), ('two',)]), missing_keys)
1166
def test_supports_unlimited_cache(self):
1167
builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
1168
# We need enough nodes to cause a page split (so we have both an
1169
# internal node and a couple leaf nodes. 500 seems to be enough.)
1170
nodes = self.make_nodes(500, 1, 0)
1172
builder.add_node(*node)
1173
stream = builder.finish()
1174
trans = self.get_transport()
1175
size = trans.put_file('index', stream)
1176
index = btree_index.BTreeGraphIndex(trans, 'index', size)
1177
self.assertEqual(500, index.key_count())
1178
# We have an internal node
1179
self.assertEqual(2, len(index._row_lengths))
1180
# We have at least 2 leaf nodes
1181
self.assertTrue(index._row_lengths[-1] >= 2)
1182
self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
1183
self.assertEqual(btree_index._NODE_CACHE_SIZE,
1184
index._leaf_node_cache._max_cache)
1185
self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
1186
self.assertEqual(100, index._internal_node_cache._max_cache)
1187
# No change if unlimited_cache=False is passed
1188
index = btree_index.BTreeGraphIndex(trans, 'index', size,
1189
unlimited_cache=False)
1190
self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
1191
self.assertEqual(btree_index._NODE_CACHE_SIZE,
1192
index._leaf_node_cache._max_cache)
1193
self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
1194
self.assertEqual(100, index._internal_node_cache._max_cache)
1195
index = btree_index.BTreeGraphIndex(trans, 'index', size,
1196
unlimited_cache=True)
1197
self.assertIsInstance(index._leaf_node_cache, dict)
1198
self.assertIs(type(index._internal_node_cache), dict)
1199
# Exercise the lookup code
1200
entries = set(index.iter_entries([n[0] for n in nodes]))
1201
self.assertEqual(500, len(entries))
1204
class TestBTreeNodes(BTreeTestCase):
1206
scenarios = btreeparser_scenarios()
1209
BTreeTestCase.setUp(self)
1210
self.overrideAttr(btree_index, '_btree_serializer', self.parse_btree)
1212
def test_LeafNode_1_0(self):
1213
node_bytes = ("type=leaf\n"
1214
"0000000000000000000000000000000000000000\x00\x00value:0\n"
1215
"1111111111111111111111111111111111111111\x00\x00value:1\n"
1216
"2222222222222222222222222222222222222222\x00\x00value:2\n"
1217
"3333333333333333333333333333333333333333\x00\x00value:3\n"
1218
"4444444444444444444444444444444444444444\x00\x00value:4\n")
1219
node = btree_index._LeafNode(node_bytes, 1, 0)
1220
# We do direct access, or don't care about order, to leaf nodes most of
1221
# the time, so a dict is useful:
1223
("0000000000000000000000000000000000000000",): ("value:0", ()),
1224
("1111111111111111111111111111111111111111",): ("value:1", ()),
1225
("2222222222222222222222222222222222222222",): ("value:2", ()),
1226
("3333333333333333333333333333333333333333",): ("value:3", ()),
1227
("4444444444444444444444444444444444444444",): ("value:4", ()),
1228
}, dict(node.all_items()))
1230
def test_LeafNode_2_2(self):
1231
node_bytes = ("type=leaf\n"
1232
"00\x0000\x00\t00\x00ref00\x00value:0\n"
1233
"00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n"
1234
"11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n"
1235
"11\x0044\x00\t11\x00ref00\x00value:4\n"
1238
node = btree_index._LeafNode(node_bytes, 2, 2)
1239
# We do direct access, or don't care about order, to leaf nodes most of
1240
# the time, so a dict is useful:
1242
('00', '00'): ('value:0', ((), (('00', 'ref00'),))),
1243
('00', '11'): ('value:1',
1244
((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01')))),
1245
('11', '33'): ('value:3',
1246
((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22')))),
1247
('11', '44'): ('value:4', ((), (('11', 'ref00'),)))
1248
}, dict(node.all_items()))
1250
def test_InternalNode_1(self):
1251
node_bytes = ("type=internal\n"
1253
"0000000000000000000000000000000000000000\n"
1254
"1111111111111111111111111111111111111111\n"
1255
"2222222222222222222222222222222222222222\n"
1256
"3333333333333333333333333333333333333333\n"
1257
"4444444444444444444444444444444444444444\n"
1259
node = btree_index._InternalNode(node_bytes)
1260
# We want to bisect to find the right children from this node, so a
1261
# vector is most useful.
1263
("0000000000000000000000000000000000000000",),
1264
("1111111111111111111111111111111111111111",),
1265
("2222222222222222222222222222222222222222",),
1266
("3333333333333333333333333333333333333333",),
1267
("4444444444444444444444444444444444444444",),
1269
self.assertEqual(1, node.offset)
1271
def test_LeafNode_2_2(self):
1272
node_bytes = ("type=leaf\n"
1273
"00\x0000\x00\t00\x00ref00\x00value:0\n"
1274
"00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n"
1275
"11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n"
1276
"11\x0044\x00\t11\x00ref00\x00value:4\n"
1279
node = btree_index._LeafNode(node_bytes, 2, 2)
1280
# We do direct access, or don't care about order, to leaf nodes most of
1281
# the time, so a dict is useful:
1283
('00', '00'): ('value:0', ((), (('00', 'ref00'),))),
1284
('00', '11'): ('value:1',
1285
((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01')))),
1286
('11', '33'): ('value:3',
1287
((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22')))),
1288
('11', '44'): ('value:4', ((), (('11', 'ref00'),)))
1289
}, dict(node.all_items()))
1291
def assertFlattened(self, expected, key, value, refs):
1292
flat_key, flat_line = self.parse_btree._flatten_node(
1293
(None, key, value, refs), bool(refs))
1294
self.assertEqual('\x00'.join(key), flat_key)
1295
self.assertEqual(expected, flat_line)
1297
def test__flatten_node(self):
1298
self.assertFlattened('key\0\0value\n', ('key',), 'value', [])
1299
self.assertFlattened('key\0tuple\0\0value str\n',
1300
('key', 'tuple'), 'value str', [])
1301
self.assertFlattened('key\0tuple\0triple\0\0value str\n',
1302
('key', 'tuple', 'triple'), 'value str', [])
1303
self.assertFlattened('k\0t\0s\0ref\0value str\n',
1304
('k', 't', 's'), 'value str', [[('ref',)]])
1305
self.assertFlattened('key\0tuple\0ref\0key\0value str\n',
1306
('key', 'tuple'), 'value str', [[('ref', 'key')]])
1307
self.assertFlattened("00\x0000\x00\t00\x00ref00\x00value:0\n",
1308
('00', '00'), 'value:0', ((), (('00', 'ref00'),)))
1309
self.assertFlattened(
1310
"00\x0011\x0000\x00ref00\t00\x00ref00\r01\x00ref01\x00value:1\n",
1311
('00', '11'), 'value:1',
1312
((('00', 'ref00'),), (('00', 'ref00'), ('01', 'ref01'))))
1313
self.assertFlattened(
1314
"11\x0033\x0011\x00ref22\t11\x00ref22\r11\x00ref22\x00value:3\n",
1315
('11', '33'), 'value:3',
1316
((('11', 'ref22'),), (('11', 'ref22'), ('11', 'ref22'))))
1317
self.assertFlattened(
1318
"11\x0044\x00\t11\x00ref00\x00value:4\n",
1319
('11', '44'), 'value:4', ((), (('11', 'ref00'),)))
1322
class TestCompiledBtree(tests.TestCase):
1324
def test_exists(self):
1325
# This is just to let the user know if they don't have the feature
1327
self.requireFeature(compiled_btreeparser_feature)
1330
class TestMultiBisectRight(tests.TestCase):
1332
def assertMultiBisectRight(self, offsets, search_keys, fixed_keys):
1333
self.assertEqual(offsets,
1334
btree_index.BTreeGraphIndex._multi_bisect_right(
1335
search_keys, fixed_keys))
1337
def test_after(self):
1338
self.assertMultiBisectRight([(1, ['b'])], ['b'], ['a'])
1339
self.assertMultiBisectRight([(3, ['e', 'f', 'g'])],
1340
['e', 'f', 'g'], ['a', 'b', 'c'])
1342
def test_before(self):
1343
self.assertMultiBisectRight([(0, ['a'])], ['a'], ['b'])
1344
self.assertMultiBisectRight([(0, ['a', 'b', 'c', 'd'])],
1345
['a', 'b', 'c', 'd'], ['e', 'f', 'g'])
1347
def test_exact(self):
1348
self.assertMultiBisectRight([(1, ['a'])], ['a'], ['a'])
1349
self.assertMultiBisectRight([(1, ['a']), (2, ['b'])], ['a', 'b'], ['a', 'b'])
1350
self.assertMultiBisectRight([(1, ['a']), (3, ['c'])],
1351
['a', 'c'], ['a', 'b', 'c'])
1353
def test_inbetween(self):
1354
self.assertMultiBisectRight([(1, ['b'])], ['b'], ['a', 'c'])
1355
self.assertMultiBisectRight([(1, ['b', 'c', 'd']), (2, ['f', 'g'])],
1356
['b', 'c', 'd', 'f', 'g'], ['a', 'e', 'h'])
1358
def test_mixed(self):
1359
self.assertMultiBisectRight([(0, ['a', 'b']), (2, ['d', 'e']),
1361
['a', 'b', 'd', 'e', 'g', 'h'],
1362
['c', 'd', 'f', 'g'])
1365
class TestExpandOffsets(tests.TestCase):
1367
def make_index(self, size, recommended_pages=None):
1368
"""Make an index with a generic size.
1370
This doesn't actually create anything on disk, it just primes a
1371
BTreeGraphIndex with the recommended information.
1373
index = btree_index.BTreeGraphIndex(
1374
transport.get_transport_from_url('memory:///'),
1375
'test-index', size=size)
1376
if recommended_pages is not None:
1377
index._recommended_pages = recommended_pages
1380
def set_cached_offsets(self, index, cached_offsets):
1381
"""Monkeypatch to give a canned answer for _get_offsets_for...()."""
1382
def _get_offsets_to_cached_pages():
1383
cached = set(cached_offsets)
1385
index._get_offsets_to_cached_pages = _get_offsets_to_cached_pages
1387
def prepare_index(self, index, node_ref_lists, key_length, key_count,
1388
row_lengths, cached_offsets):
1389
"""Setup the BTreeGraphIndex with some pre-canned information."""
1390
index.node_ref_lists = node_ref_lists
1391
index._key_length = key_length
1392
index._key_count = key_count
1393
index._row_lengths = row_lengths
1394
index._compute_row_offsets()
1395
index._root_node = btree_index._InternalNode('internal\noffset=0\n')
1396
self.set_cached_offsets(index, cached_offsets)
1398
def make_100_node_index(self):
1399
index = self.make_index(4096*100, 6)
1400
# Consider we've already made a single request at the middle
1401
self.prepare_index(index, node_ref_lists=0, key_length=1,
1402
key_count=1000, row_lengths=[1, 99],
1403
cached_offsets=[0, 50])
1406
def make_1000_node_index(self):
1407
index = self.make_index(4096*1000, 6)
1408
# Pretend we've already made a single request in the middle
1409
self.prepare_index(index, node_ref_lists=0, key_length=1,
1410
key_count=90000, row_lengths=[1, 9, 990],
1411
cached_offsets=[0, 5, 500])
1414
def assertNumPages(self, expected_pages, index, size):
1416
self.assertEqual(expected_pages, index._compute_total_pages_in_index())
1418
def assertExpandOffsets(self, expected, index, offsets):
1419
self.assertEqual(expected, index._expand_offsets(offsets),
1420
'We did not get the expected value after expanding'
1423
def test_default_recommended_pages(self):
1424
index = self.make_index(None)
1425
# local transport recommends 4096 byte reads, which is 1 page
1426
self.assertEqual(1, index._recommended_pages)
1428
def test__compute_total_pages_in_index(self):
1429
index = self.make_index(None)
1430
self.assertNumPages(1, index, 1024)
1431
self.assertNumPages(1, index, 4095)
1432
self.assertNumPages(1, index, 4096)
1433
self.assertNumPages(2, index, 4097)
1434
self.assertNumPages(2, index, 8192)
1435
self.assertNumPages(76, index, 4096*75 + 10)
1437
def test__find_layer_start_and_stop(self):
1438
index = self.make_1000_node_index()
1439
self.assertEqual((0, 1), index._find_layer_first_and_end(0))
1440
self.assertEqual((1, 10), index._find_layer_first_and_end(1))
1441
self.assertEqual((1, 10), index._find_layer_first_and_end(9))
1442
self.assertEqual((10, 1000), index._find_layer_first_and_end(10))
1443
self.assertEqual((10, 1000), index._find_layer_first_and_end(99))
1444
self.assertEqual((10, 1000), index._find_layer_first_and_end(999))
1446
def test_unknown_size(self):
1447
# We should not expand if we don't know the file size
1448
index = self.make_index(None, 10)
1449
self.assertExpandOffsets([0], index, [0])
1450
self.assertExpandOffsets([1, 4, 9], index, [1, 4, 9])
1452
def test_more_than_recommended(self):
1453
index = self.make_index(4096*100, 2)
1454
self.assertExpandOffsets([1, 10], index, [1, 10])
1455
self.assertExpandOffsets([1, 10, 20], index, [1, 10, 20])
1457
def test_read_all_from_root(self):
1458
index = self.make_index(4096*10, 20)
1459
self.assertExpandOffsets(range(10), index, [0])
1461
def test_read_all_when_cached(self):
1462
# We've read enough that we can grab all the rest in a single request
1463
index = self.make_index(4096*10, 5)
1464
self.prepare_index(index, node_ref_lists=0, key_length=1,
1465
key_count=1000, row_lengths=[1, 9],
1466
cached_offsets=[0, 1, 2, 5, 6])
1467
# It should fill the remaining nodes, regardless of the one requested
1468
self.assertExpandOffsets([3, 4, 7, 8, 9], index, [3])
1469
self.assertExpandOffsets([3, 4, 7, 8, 9], index, [8])
1470
self.assertExpandOffsets([3, 4, 7, 8, 9], index, [9])
1472
def test_no_root_node(self):
1473
index = self.make_index(4096*10, 5)
1474
self.assertExpandOffsets([0], index, [0])
1476
def test_include_neighbors(self):
1477
index = self.make_100_node_index()
1478
# We expand in both directions, until we have at least 'recommended'
1480
self.assertExpandOffsets([9, 10, 11, 12, 13, 14, 15], index, [12])
1481
self.assertExpandOffsets([88, 89, 90, 91, 92, 93, 94], index, [91])
1482
# If we hit an 'edge' we continue in the other direction
1483
self.assertExpandOffsets([1, 2, 3, 4, 5, 6], index, [2])
1484
self.assertExpandOffsets([94, 95, 96, 97, 98, 99], index, [98])
1486
# Requesting many nodes will expand all locations equally
1487
self.assertExpandOffsets([1, 2, 3, 80, 81, 82], index, [2, 81])
1488
self.assertExpandOffsets([1, 2, 3, 9, 10, 11, 80, 81, 82], index,
1491
def test_stop_at_cached(self):
1492
index = self.make_100_node_index()
1493
self.set_cached_offsets(index, [0, 10, 19])
1494
self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [11])
1495
self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [12])
1496
self.assertExpandOffsets([12, 13, 14, 15, 16, 17, 18], index, [15])
1497
self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [16])
1498
self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [17])
1499
self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [18])
1501
def test_cannot_fully_expand(self):
1502
index = self.make_100_node_index()
1503
self.set_cached_offsets(index, [0, 10, 12])
1504
# We don't go into an endless loop if we are bound by cached nodes
1505
self.assertExpandOffsets([11], index, [11])
1507
def test_overlap(self):
1508
index = self.make_100_node_index()
1509
self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [12, 13])
1510
self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [11, 14])
1512
def test_stay_within_layer(self):
1513
index = self.make_1000_node_index()
1514
# When expanding a request, we won't read nodes from the next layer
1515
self.assertExpandOffsets([1, 2, 3, 4], index, [2])
1516
self.assertExpandOffsets([6, 7, 8, 9], index, [6])
1517
self.assertExpandOffsets([6, 7, 8, 9], index, [9])
1518
self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [10])
1519
self.assertExpandOffsets([10, 11, 12, 13, 14, 15, 16], index, [13])
1521
self.set_cached_offsets(index, [0, 4, 12])
1522
self.assertExpandOffsets([5, 6, 7, 8, 9], index, [7])
1523
self.assertExpandOffsets([10, 11], index, [11])
1525
def test_small_requests_unexpanded(self):
1526
index = self.make_100_node_index()
1527
self.set_cached_offsets(index, [0])
1528
self.assertExpandOffsets([1], index, [1])
1529
self.assertExpandOffsets([50], index, [50])
1530
# If we request more than one node, then we'll expand
1531
self.assertExpandOffsets([49, 50, 51, 59, 60, 61], index, [50, 60])
1533
# The first pass does not expand
1534
index = self.make_1000_node_index()
1535
self.set_cached_offsets(index, [0])
1536
self.assertExpandOffsets([1], index, [1])
1537
self.set_cached_offsets(index, [0, 1])
1538
self.assertExpandOffsets([100], index, [100])
1539
self.set_cached_offsets(index, [0, 1, 100])
1540
# But after the first depth, we will expand
1541
self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [2])
1542
self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [4])
1543
self.set_cached_offsets(index, [0, 1, 2, 3, 4, 5, 6, 7, 100])
1544
self.assertExpandOffsets([102, 103, 104, 105, 106, 107, 108], index,