431
434
self.assertEqual(sorted(nodes), nodes)
432
435
self.assertEqual(16, len(nodes))
434
def test_spill_index_stress_1_1_no_combine(self):
435
builder = btree_index.BTreeBuilder(key_elements=1, spill_at=2)
436
builder.set_optimize(for_size=False, combine_backing_indices=False)
437
nodes = [node[0:2] for node in self.make_nodes(16, 1, 0)]
438
builder.add_node(*nodes[0])
439
# Test the parts of the index that take up memory are doing so
441
self.assertEqual(1, len(builder._nodes))
442
self.assertEqual(1, len(builder._keys))
443
self.assertIs(None, builder._nodes_by_key)
444
builder.add_node(*nodes[1])
445
self.assertEqual(0, len(builder._nodes))
446
self.assertEqual(0, len(builder._keys))
447
self.assertIs(None, builder._nodes_by_key)
448
self.assertEqual(1, len(builder._backing_indices))
449
self.assertEqual(2, builder._backing_indices[0].key_count())
451
builder.add_node(*nodes[2])
452
self.assertEqual(1, len(builder._nodes))
453
self.assertEqual(1, len(builder._keys))
454
self.assertIs(None, builder._nodes_by_key)
455
# And spills to a second backing index but doesn't combine
456
builder.add_node(*nodes[3])
457
self.assertEqual(0, len(builder._nodes))
458
self.assertEqual(0, len(builder._keys))
459
self.assertIs(None, builder._nodes_by_key)
460
self.assertEqual(2, len(builder._backing_indices))
461
for backing_index in builder._backing_indices:
462
self.assertEqual(2, backing_index.key_count())
463
# The next spills to the 3rd slot
464
builder.add_node(*nodes[4])
465
builder.add_node(*nodes[5])
466
self.assertEqual(0, len(builder._nodes))
467
self.assertEqual(0, len(builder._keys))
468
self.assertIs(None, builder._nodes_by_key)
469
self.assertEqual(3, len(builder._backing_indices))
470
for backing_index in builder._backing_indices:
471
self.assertEqual(2, backing_index.key_count())
472
# Now spill a few more, and check that we don't combine
473
builder.add_node(*nodes[6])
474
builder.add_node(*nodes[7])
475
builder.add_node(*nodes[8])
476
builder.add_node(*nodes[9])
477
builder.add_node(*nodes[10])
478
builder.add_node(*nodes[11])
479
builder.add_node(*nodes[12])
480
self.assertEqual(6, len(builder._backing_indices))
481
for backing_index in builder._backing_indices:
482
self.assertEqual(2, backing_index.key_count())
483
# Test that memory and disk are both used for query methods; and that
484
# None is skipped over happily.
485
self.assertEqual([(builder,) + node for node in sorted(nodes[:13])],
486
list(builder.iter_all_entries()))
487
# Two nodes - one memory one disk
488
self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
489
set(builder.iter_entries([nodes[12][0], nodes[11][0]])))
490
self.assertEqual(13, builder.key_count())
491
self.assertEqual(set([(builder,) + node for node in nodes[11:13]]),
492
set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]])))
493
builder.add_node(*nodes[13])
494
builder.add_node(*nodes[14])
495
builder.add_node(*nodes[15])
496
self.assertEqual(8, len(builder._backing_indices))
497
for backing_index in builder._backing_indices:
498
self.assertEqual(2, backing_index.key_count())
499
# Now finish, and check we got a correctly ordered tree
500
transport = self.get_transport('')
501
size = transport.put_file('index', builder.finish())
502
index = btree_index.BTreeGraphIndex(transport, 'index', size)
503
nodes = list(index.iter_all_entries())
504
self.assertEqual(sorted(nodes), nodes)
505
self.assertEqual(16, len(nodes))
507
def test_set_optimize(self):
508
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2)
509
builder.set_optimize(for_size=True)
510
self.assertTrue(builder._optimize_for_size)
511
builder.set_optimize(for_size=False)
512
self.assertFalse(builder._optimize_for_size)
513
# test that we can set combine_backing_indices without effecting
516
builder._optimize_for_size = obj
517
builder.set_optimize(combine_backing_indices=False)
518
self.assertFalse(builder._combine_backing_indices)
519
self.assertIs(obj, builder._optimize_for_size)
520
builder.set_optimize(combine_backing_indices=True)
521
self.assertTrue(builder._combine_backing_indices)
522
self.assertIs(obj, builder._optimize_for_size)
524
437
def test_spill_index_stress_2_2(self):
525
438
# test that references and longer keys don't confuse things.
526
439
builder = btree_index.BTreeBuilder(key_elements=2, reference_lists=2,
1143
996
(4, ['g', 'h'])],
1144
997
['a', 'b', 'd', 'e', 'g', 'h'],
1145
998
['c', 'd', 'f', 'g'])
1148
class TestExpandOffsets(tests.TestCase):
1150
def make_index(self, size, recommended_pages=None):
1151
"""Make an index with a generic size.
1153
This doesn't actually create anything on disk, it just primes a
1154
BTreeGraphIndex with the recommended information.
1156
index = btree_index.BTreeGraphIndex(get_transport('memory:///'),
1157
'test-index', size=size)
1158
if recommended_pages is not None:
1159
index._recommended_pages = recommended_pages
1162
def set_cached_offsets(self, index, cached_offsets):
1163
"""Monkeypatch to give a canned answer for _get_offsets_for...()."""
1164
def _get_offsets_to_cached_pages():
1165
cached = set(cached_offsets)
1167
index._get_offsets_to_cached_pages = _get_offsets_to_cached_pages
1169
def prepare_index(self, index, node_ref_lists, key_length, key_count,
1170
row_lengths, cached_offsets):
1171
"""Setup the BTreeGraphIndex with some pre-canned information."""
1172
index.node_ref_lists = node_ref_lists
1173
index._key_length = key_length
1174
index._key_count = key_count
1175
index._row_lengths = row_lengths
1176
index._compute_row_offsets()
1177
index._root_node = btree_index._InternalNode('internal\noffset=0\n')
1178
self.set_cached_offsets(index, cached_offsets)
1180
def make_100_node_index(self):
1181
index = self.make_index(4096*100, 6)
1182
# Consider we've already made a single request at the middle
1183
self.prepare_index(index, node_ref_lists=0, key_length=1,
1184
key_count=1000, row_lengths=[1, 99],
1185
cached_offsets=[0, 50])
1188
def make_1000_node_index(self):
1189
index = self.make_index(4096*1000, 6)
1190
# Pretend we've already made a single request in the middle
1191
self.prepare_index(index, node_ref_lists=0, key_length=1,
1192
key_count=90000, row_lengths=[1, 9, 990],
1193
cached_offsets=[0, 5, 500])
1196
def assertNumPages(self, expected_pages, index, size):
1198
self.assertEqual(expected_pages, index._compute_total_pages_in_index())
1200
def assertExpandOffsets(self, expected, index, offsets):
1201
self.assertEqual(expected, index._expand_offsets(offsets),
1202
'We did not get the expected value after expanding'
1205
def test_default_recommended_pages(self):
1206
index = self.make_index(None)
1207
# local transport recommends 4096 byte reads, which is 1 page
1208
self.assertEqual(1, index._recommended_pages)
1210
def test__compute_total_pages_in_index(self):
1211
index = self.make_index(None)
1212
self.assertNumPages(1, index, 1024)
1213
self.assertNumPages(1, index, 4095)
1214
self.assertNumPages(1, index, 4096)
1215
self.assertNumPages(2, index, 4097)
1216
self.assertNumPages(2, index, 8192)
1217
self.assertNumPages(76, index, 4096*75 + 10)
1219
def test__find_layer_start_and_stop(self):
1220
index = self.make_1000_node_index()
1221
self.assertEqual((0, 1), index._find_layer_first_and_end(0))
1222
self.assertEqual((1, 10), index._find_layer_first_and_end(1))
1223
self.assertEqual((1, 10), index._find_layer_first_and_end(9))
1224
self.assertEqual((10, 1000), index._find_layer_first_and_end(10))
1225
self.assertEqual((10, 1000), index._find_layer_first_and_end(99))
1226
self.assertEqual((10, 1000), index._find_layer_first_and_end(999))
1228
def test_unknown_size(self):
1229
# We should not expand if we don't know the file size
1230
index = self.make_index(None, 10)
1231
self.assertExpandOffsets([0], index, [0])
1232
self.assertExpandOffsets([1, 4, 9], index, [1, 4, 9])
1234
def test_more_than_recommended(self):
1235
index = self.make_index(4096*100, 2)
1236
self.assertExpandOffsets([1, 10], index, [1, 10])
1237
self.assertExpandOffsets([1, 10, 20], index, [1, 10, 20])
1239
def test_read_all_from_root(self):
1240
index = self.make_index(4096*10, 20)
1241
self.assertExpandOffsets(range(10), index, [0])
1243
def test_read_all_when_cached(self):
1244
# We've read enough that we can grab all the rest in a single request
1245
index = self.make_index(4096*10, 5)
1246
self.prepare_index(index, node_ref_lists=0, key_length=1,
1247
key_count=1000, row_lengths=[1, 9],
1248
cached_offsets=[0, 1, 2, 5, 6])
1249
# It should fill the remaining nodes, regardless of the one requested
1250
self.assertExpandOffsets([3, 4, 7, 8, 9], index, [3])
1251
self.assertExpandOffsets([3, 4, 7, 8, 9], index, [8])
1252
self.assertExpandOffsets([3, 4, 7, 8, 9], index, [9])
1254
def test_no_root_node(self):
1255
index = self.make_index(4096*10, 5)
1256
self.assertExpandOffsets([0], index, [0])
1258
def test_include_neighbors(self):
1259
index = self.make_100_node_index()
1260
# We expand in both directions, until we have at least 'recommended'
1262
self.assertExpandOffsets([9, 10, 11, 12, 13, 14, 15], index, [12])
1263
self.assertExpandOffsets([88, 89, 90, 91, 92, 93, 94], index, [91])
1264
# If we hit an 'edge' we continue in the other direction
1265
self.assertExpandOffsets([1, 2, 3, 4, 5, 6], index, [2])
1266
self.assertExpandOffsets([94, 95, 96, 97, 98, 99], index, [98])
1268
# Requesting many nodes will expand all locations equally
1269
self.assertExpandOffsets([1, 2, 3, 80, 81, 82], index, [2, 81])
1270
self.assertExpandOffsets([1, 2, 3, 9, 10, 11, 80, 81, 82], index,
1273
def test_stop_at_cached(self):
1274
index = self.make_100_node_index()
1275
self.set_cached_offsets(index, [0, 10, 19])
1276
self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [11])
1277
self.assertExpandOffsets([11, 12, 13, 14, 15, 16], index, [12])
1278
self.assertExpandOffsets([12, 13, 14, 15, 16, 17, 18], index, [15])
1279
self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [16])
1280
self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [17])
1281
self.assertExpandOffsets([13, 14, 15, 16, 17, 18], index, [18])
1283
def test_cannot_fully_expand(self):
1284
index = self.make_100_node_index()
1285
self.set_cached_offsets(index, [0, 10, 12])
1286
# We don't go into an endless loop if we are bound by cached nodes
1287
self.assertExpandOffsets([11], index, [11])
1289
def test_overlap(self):
1290
index = self.make_100_node_index()
1291
self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [12, 13])
1292
self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [11, 14])
1294
def test_stay_within_layer(self):
1295
index = self.make_1000_node_index()
1296
# When expanding a request, we won't read nodes from the next layer
1297
self.assertExpandOffsets([1, 2, 3, 4], index, [2])
1298
self.assertExpandOffsets([6, 7, 8, 9], index, [6])
1299
self.assertExpandOffsets([6, 7, 8, 9], index, [9])
1300
self.assertExpandOffsets([10, 11, 12, 13, 14, 15], index, [10])
1301
self.assertExpandOffsets([10, 11, 12, 13, 14, 15, 16], index, [13])
1303
self.set_cached_offsets(index, [0, 4, 12])
1304
self.assertExpandOffsets([5, 6, 7, 8, 9], index, [7])
1305
self.assertExpandOffsets([10, 11], index, [11])
1307
def test_small_requests_unexpanded(self):
1308
index = self.make_100_node_index()
1309
self.set_cached_offsets(index, [0])
1310
self.assertExpandOffsets([1], index, [1])
1311
self.assertExpandOffsets([50], index, [50])
1312
# If we request more than one node, then we'll expand
1313
self.assertExpandOffsets([49, 50, 51, 59, 60, 61], index, [50, 60])
1315
# The first pass does not expand
1316
index = self.make_1000_node_index()
1317
self.set_cached_offsets(index, [0])
1318
self.assertExpandOffsets([1], index, [1])
1319
self.set_cached_offsets(index, [0, 1])
1320
self.assertExpandOffsets([100], index, [100])
1321
self.set_cached_offsets(index, [0, 1, 100])
1322
# But after the first depth, we will expand
1323
self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [2])
1324
self.assertExpandOffsets([2, 3, 4, 5, 6, 7], index, [4])
1325
self.set_cached_offsets(index, [0, 1, 2, 3, 4, 5, 6, 7, 100])
1326
self.assertExpandOffsets([102, 103, 104, 105, 106, 107, 108], index,