434
423
self.assertEqual(sorted(nodes), nodes)
435
424
self.assertEqual(16, len(nodes))
426
def test_spill_index_stress_1_1_no_combine(self):
427
builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
428
builder.set_optimize(for_size=False, combine_backing_indices=False)
429
nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
430
builder.add_node(*nodes[0])
431
# Test the parts of the index that take up memory are doing so
433
self.assertEqual(1, len(builder._nodes))
434
self.assertIs(None, builder._nodes_by_key)
435
builder.add_node(*nodes[1])
436
self.assertEqual(0, len(builder._nodes))
437
self.assertIs(None, builder._nodes_by_key)
438
self.assertEqual(1, len(builder._backing_indices))
439
self.assertEqual(2, builder._backing_indices[0].key_count())
441
builder.add_node(*nodes[2])
442
self.assertEqual(1, len(builder._nodes))
443
self.assertIs(None, builder._nodes_by_key)
444
# And spills to a second backing index but doesn't combine
445
builder.add_node(*nodes[3])
446
self.assertEqual(0, len(builder._nodes))
447
self.assertIs(None, builder._nodes_by_key)
448
self.assertEqual(2, len(builder._backing_indices))
449
for backing_index in builder._backing_indices:
450
self.assertEqual(2, backing_index.key_count())
451
# The next spills to the 3rd slot
452
builder.add_node(*nodes[4])
453
builder.add_node(*nodes[5])
454
self.assertEqual(0, len(builder._nodes))
455
self.assertIs(None, builder._nodes_by_key)
456
self.assertEqual(3, len(builder._backing_indices))
457
for backing_index in builder._backing_indices:
458
self.assertEqual(2, backing_index.key_count())
459
# Now spill a few more, and check that we don't combine
460
builder.add_node(*nodes[6])
461
builder.add_node(*nodes[7])
462
builder.add_node(*nodes[8])
463
builder.add_node(*nodes[9])
464
builder.add_node(*nodes[10])
465
builder.add_node(*nodes[11])
466
builder.add_node(*nodes[12])
467
self.assertEqual(6, len(builder._backing_indices))
468
for backing_index in builder._backing_indices:
469
self.assertEqual(2, backing_index.key_count())
470
# Test that memory and disk are both used for query methods; and that
471
# None is skipped over happily.
472
self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
473
list(builder.iter_all_entries()))
474
# Two nodes - one memory one disk
475
self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
476
set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
477
self.assertEqual(13, builder.key_count())
478
self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
479
set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
480
builder.add_node(*nodes[13])
481
builder.add_node(*nodes[14])
482
builder.add_node(*nodes[15])
483
self.assertEqual(8, len(builder._backing_indices))
484
for backing_index in builder._backing_indices:
485
self.assertEqual(2, backing_index.key_count())
486
# Now finish, and check we got a correctly ordered tree
487
transport = self.get_transport('')
488
size = transport.put_file('index', builder.finish())
489
index = btree_index.BTreeGraphIndex(transport, 'index', size)
490
nodes = list(index.iter_all_entries())
491
self.assertEqual(sorted(nodes), nodes)
492
self.assertEqual(16, len(nodes))
437
494
def test_set_optimize(self):
438
495
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
439
496
builder.set_optimize(for_size=True)
440
497
self.assertTrue(builder._optimize_for_size)
441
498
builder.set_optimize(for_size=False)
442
499
self.assertFalse(builder._optimize_for_size)
500
# test that we can set combine_backing_indices without effecting
503
builder._optimize_for_size = obj
504
builder.set_optimize(combine_backing_indices=False)
505
self.assertFalse(builder._combine_backing_indices)
506
self.assertIs(obj, builder._optimize_for_size)
507
builder.set_optimize(combine_backing_indices=True)
508
self.assertTrue(builder._combine_backing_indices)
509
self.assertIs(obj, builder._optimize_for_size)
444
511
def test_spill_index_stress_2_2(self):
445
512
# test that references and longer keys don't confuse things.
869
952
(index, ('name', 'fin2'), 'beta', ((), ))]),
870
953
set(index.iter_entries_prefix([('name', None)])))
955
# XXX: external_references tests are duplicated in test_index. We
956
# probably should have per_graph_index tests...
957
def test_external_references_no_refs(self):
958
index = self.make_index(ref_lists=0, nodes=[])
959
self.assertRaises(ValueError, index.external_references, 0)
961
def test_external_references_no_results(self):
962
index = self.make_index(ref_lists=1, nodes=[
963
(('key',), 'value', ([],))])
964
self.assertEqual(set(), index.external_references(0))
966
def test_external_references_missing_ref(self):
967
missing_key = ('missing',)
968
index = self.make_index(ref_lists=1, nodes=[
969
(('key',), 'value', ([missing_key],))])
970
self.assertEqual(set([missing_key]), index.external_references(0))
972
def test_external_references_multiple_ref_lists(self):
973
missing_key = ('missing',)
974
index = self.make_index(ref_lists=2, nodes=[
975
(('key',), 'value', ([], [missing_key]))])
976
self.assertEqual(set([]), index.external_references(0))
977
self.assertEqual(set([missing_key]), index.external_references(1))
979
def test_external_references_two_records(self):
980
index = self.make_index(ref_lists=1, nodes=[
981
(('key-1',), 'value', ([('key-2',)],)),
982
(('key-2',), 'value', ([],)),
984
self.assertEqual(set([]), index.external_references(0))
986
def test__find_ancestors_one_page(self):
989
index = self.make_index(ref_lists=1, key_elements=1, nodes=[
990
(key1, 'value', ([key2],)),
991
(key2, 'value', ([],)),
995
search_keys = index._find_ancestors([key1], 0, parent_map, missing_keys)
996
self.assertEqual({key1: (key2,), key2: ()}, parent_map)
997
self.assertEqual(set(), missing_keys)
998
self.assertEqual(set(), search_keys)
1000
def test__find_ancestors_one_page_w_missing(self):
1004
index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1005
(key1, 'value', ([key2],)),
1006
(key2, 'value', ([],)),
1009
missing_keys = set()
1010
search_keys = index._find_ancestors([key2, key3], 0, parent_map,
1012
self.assertEqual({key2: ()}, parent_map)
1013
# we know that key3 is missing because we read the page that it would
1015
self.assertEqual(set([key3]), missing_keys)
1016
self.assertEqual(set(), search_keys)
1018
def test__find_ancestors_one_parent_missing(self):
1022
index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1023
(key1, 'value', ([key2],)),
1024
(key2, 'value', ([key3],)),
1027
missing_keys = set()
1028
search_keys = index._find_ancestors([key1], 0, parent_map,
1030
self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1031
self.assertEqual(set(), missing_keys)
1032
# all we know is that key3 wasn't present on the page we were reading
1033
# but if you look, the last key is key2 which comes before key3, so we
1034
# don't know whether key3 would land on this page or not.
1035
self.assertEqual(set([key3]), search_keys)
1036
search_keys = index._find_ancestors(search_keys, 0, parent_map,
1038
# passing it back in, we are sure it is 'missing'
1039
self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1040
self.assertEqual(set([key3]), missing_keys)
1041
self.assertEqual(set([]), search_keys)
1043
def test__find_ancestors_dont_search_known(self):
1047
index = self.make_index(ref_lists=1, key_elements=1, nodes=[
1048
(key1, 'value', ([key2],)),
1049
(key2, 'value', ([key3],)),
1050
(key3, 'value', ([],)),
1052
# We already know about key2, so we won't try to search for key3
1053
parent_map = {key2: (key3,)}
1054
missing_keys = set()
1055
search_keys = index._find_ancestors([key1], 0, parent_map,
1057
self.assertEqual({key1: (key2,), key2: (key3,)}, parent_map)
1058
self.assertEqual(set(), missing_keys)
1059
self.assertEqual(set(), search_keys)
1061
def test__find_ancestors_multiple_pages(self):
1062
# We need to use enough keys that we actually cause a split
1063
start_time = 1249671539
1064
email = "joebob@example.com"
1068
for i in xrange(400):
1069
rev_id = '%s-%s-%s' % (email,
1070
osutils.compact_date(start_time + i),
1071
osutils.rand_chars(16))
1073
nodes.append((rev_key, 'value', ref_lists))
1074
# We have a ref 'list' of length 1, with a list of parents, with 1
1075
# parent which is a key
1076
ref_lists = ((rev_key,),)
1077
rev_keys.append(rev_key)
1078
index = self.make_index(ref_lists=1, key_elements=1, nodes=nodes)
1079
self.assertEqual(400, index.key_count())
1080
self.assertEqual(3, len(index._row_offsets))
1081
nodes = dict(index._read_nodes([1, 2]))
1084
min_l2_key = l2.min_key
1085
max_l1_key = l1.max_key
1086
self.assertTrue(max_l1_key < min_l2_key)
1087
parents_min_l2_key = l2.keys[min_l2_key][1][0]
1088
self.assertEqual((l1.max_key,), parents_min_l2_key)
1089
# Now, whatever key we select that would fall on the second page,
1090
# should give us all the parents until the page break
1091
key_idx = rev_keys.index(min_l2_key)
1092
next_key = rev_keys[key_idx+1]
1093
# So now when we get the parent map, we should get the key we are
1094
# looking for, min_l2_key, and then a reference to go look for the
1095
# parent of that key
1097
missing_keys = set()
1098
search_keys = index._find_ancestors([next_key], 0, parent_map,
1100
self.assertEqual([min_l2_key, next_key], sorted(parent_map))
1101
self.assertEqual(set(), missing_keys)
1102
self.assertEqual(set([max_l1_key]), search_keys)
1104
search_keys = index._find_ancestors([max_l1_key], 0, parent_map,
1106
self.assertEqual(sorted(l1.keys), sorted(parent_map))
1107
self.assertEqual(set(), missing_keys)
1108
self.assertEqual(set(), search_keys)
1110
def test__find_ancestors_empty_index(self):
1111
index = self.make_index(ref_lists=1, key_elements=1, nodes=[])
1113
missing_keys = set()
1114
search_keys = index._find_ancestors([('one',), ('two',)], 0, parent_map,
1116
self.assertEqual(set(), search_keys)
1117
self.assertEqual({}, parent_map)
1118
self.assertEqual(set([('one',), ('two',)]), missing_keys)
1120
def test_supports_unlimited_cache(self):
1121
builder = btree_index.BTreeBuilder(reference_lists=0, key_elements=1)
1122
# We need enough nodes to cause a page split (so we have both an
1123
# internal node and a couple leaf nodes. 500 seems to be enough.)
1124
nodes = self.make_nodes(500, 1, 0)
1126
builder.add_node(*node)
1127
stream = builder.finish()
1128
trans = get_transport(self.get_url())
1129
size = trans.put_file('index', stream)
1130
index = btree_index.BTreeGraphIndex(trans, 'index', size)
1131
self.assertEqual(500, index.key_count())
1132
# We have an internal node
1133
self.assertEqual(2, len(index._row_lengths))
1134
# We have at least 2 leaf nodes
1135
self.assertTrue(index._row_lengths[-1] >= 2)
1136
self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
1137
self.assertEqual(btree_index._NODE_CACHE_SIZE,
1138
index._leaf_node_cache._max_cache)
1139
self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
1140
self.assertEqual(100, index._internal_node_cache._max_cache)
1141
# No change if unlimited_cache=False is passed
1142
index = btree_index.BTreeGraphIndex(trans, 'index', size,
1143
unlimited_cache=False)
1144
self.assertIsInstance(index._leaf_node_cache, lru_cache.LRUCache)
1145
self.assertEqual(btree_index._NODE_CACHE_SIZE,
1146
index._leaf_node_cache._max_cache)
1147
self.assertIsInstance(index._internal_node_cache, fifo_cache.FIFOCache)
1148
self.assertEqual(100, index._internal_node_cache._max_cache)
1149
index = btree_index.BTreeGraphIndex(trans, 'index', size,
1150
unlimited_cache=True)
1151
self.assertIsInstance(index._leaf_node_cache, dict)
1152
self.assertIs(type(index._internal_node_cache), dict)
1153
# Exercise the lookup code
1154
entries = set(index.iter_entries([n[0] for n in nodes]))
1155
self.assertEqual(500, len(entries))
873
1158
class TestBTreeNodes(BTreeTestCase):