350
350
builder.add_node(('k', 'ey'), 'data', ([('reference', 'tokey')], ))
351
351
builder.add_node(('reference', 'tokey'), 'data', ([],))
353
def test_set_optimize(self):
354
builder = GraphIndexBuilder(reference_lists=1, key_elements=2)
355
builder.set_optimize(for_size=True)
356
self.assertTrue(builder._optimize_for_size)
357
builder.set_optimize(for_size=False)
358
self.assertFalse(builder._optimize_for_size)
361
354
class TestGraphIndex(TestCaseWithMemoryTransport):
363
def make_key(self, number):
364
return (str(number) + 'X'*100,)
366
def make_value(self, number):
367
return str(number) + 'Y'*100
369
def make_nodes(self, count=64):
370
# generate a big enough index that we only read some of it on a typical
373
for counter in range(count):
374
nodes.append((self.make_key(counter), self.make_value(counter), ()))
377
356
def make_index(self, ref_lists=0, key_elements=1, nodes=[]):
378
357
builder = GraphIndexBuilder(ref_lists, key_elements=key_elements)
379
358
for key, value, references in nodes:
425
390
# is a trivial index.
426
391
self.assertEqual([((index._size // 2, ('missing', )), False)],
428
# And this should have caused the file to be fully buffered
429
self.assertIsNot(None, index._nodes)
430
self.assertEqual([], index._parsed_byte_map)
393
# And the regions of the file that have been parsed - in this case the
394
# entire file - should be in the parsed region map.
395
self.assertEqual([(0, 60)], index._parsed_byte_map)
396
self.assertEqual([(None, None)], index._parsed_key_map)
432
def test_first_lookup_key_via_location(self):
433
# We need enough data so that the _HEADER_READV doesn't consume the
434
# whole file. We always read 800 bytes for every key, and the local
435
# transport natural expansion is 4096 bytes. So we have to have >8192
436
# bytes or we will trigger "buffer_all".
437
# We also want the 'missing' key to fall within the range that *did*
440
index = self.make_index(nodes=self.make_nodes(64))
398
def test_parsing_parses_data_adjacent_to_parsed_regions(self):
399
# we trim data we recieve to remove the first and trailing
400
# partial lines, except when they start at the end/finish at the start
401
# of a region we've alread parsed/ the end of the file. The trivial
402
# test for this is an index with 1 key.
403
index = self.make_index(nodes=[(('name', ), 'data', ())])
441
404
# reset the transport log
442
405
del index._transport._activity[:]
443
# do a _lookup_keys_via_location call for the middle of the file, which
444
# is what bisection uses.
445
start_lookup = index._size // 2
446
406
result = index._lookup_keys_via_location(
447
[(start_lookup, ('40missing', ))])
407
[(index._size // 2, ('missing', ))])
448
408
# this should have asked for a readv request, with adjust_for_latency,
449
409
# and two regions: the header, and half-way into the file.
450
410
self.assertEqual([
452
[(start_lookup, 800), (0, 200)], True, index._size),
411
('readv', 'index', [(36, 36), (0, 200)], True, 72),
454
413
index._transport._activity)
455
414
# and the result should be that the key cannot be present, because this
456
# is a trivial index.
457
self.assertEqual([((start_lookup, ('40missing', )), False)],
415
# is a trivial index and we should not have to do more round trips.
416
self.assertEqual([((index._size // 2, ('missing', )), False)],
459
# And this should not have caused the file to be fully buffered
460
self.assertIs(None, index._nodes)
461
# And the regions of the file that have been parsed should be in the
462
# parsed_byte_map and the parsed_key_map
463
self.assertEqual([(0, 4008), (5046, 8996)], index._parsed_byte_map)
464
self.assertEqual([(None, self.make_key(26)),
465
(self.make_key(31), self.make_key(48))],
466
index._parsed_key_map)
418
# The whole file should be parsed at this point.
419
self.assertEqual([(0, 72)], index._parsed_byte_map)
420
self.assertEqual([(None, ('name',))], index._parsed_key_map)
468
422
def test_parsing_non_adjacent_data_trims(self):
469
index = self.make_index(nodes=self.make_nodes(64))
423
# generate a big enough index that we only read some of it on a typical
426
def make_key(number):
427
return (str(number) + 'X'*100,)
428
for counter in range(64):
429
nodes.append((make_key(counter), 'Y'*100, ()))
430
index = self.make_index(nodes=nodes)
470
431
result = index._lookup_keys_via_location(
471
432
[(index._size // 2, ('40', ))])
472
433
# and the result should be that the key cannot be present, because key is
488
448
# which then trims the start and end so the parsed size is < readv
490
450
# then a dual lookup (or a reference lookup for that matter) which
491
# abuts or overlaps the parsed region on both sides will need to
451
# abuts or overlaps the parsed region on both sides will need to
492
452
# discard the data in the middle, but parse the end as well.
494
# we test this by doing a single lookup to seed the data, then
495
# a lookup for two keys that are present, and adjacent -
454
# we test this by doing a single lookup to seed the data, then
455
# a lookup for two keys that are present, and adjacent -
496
456
# we except both to be found, and the parsed byte map to include the
497
457
# locations of both keys.
498
index = self.make_index(nodes=self.make_nodes(128))
459
def make_key(number):
460
return (str(number) + 'X'*100,)
461
def make_value(number):
463
for counter in range(128):
464
nodes.append((make_key(counter), make_value(counter), ()))
465
index = self.make_index(nodes=nodes)
499
466
result = index._lookup_keys_via_location(
500
467
[(index._size // 2, ('40', ))])
501
468
# and we should have a parse map that includes the header and the
502
469
# region that was parsed after trimming.
503
self.assertEqual([(0, 4045), (11759, 15707)], index._parsed_byte_map)
504
self.assertEqual([(None, self.make_key(116)),
505
(self.make_key(35), self.make_key(51))],
470
self.assertEqual([(0, 3991), (11622, 15534)], index._parsed_byte_map)
471
self.assertEqual([(None, make_key(116)), (make_key(35), make_key(51))],
506
472
index._parsed_key_map)
507
473
# now ask for two keys, right before and after the parsed region
508
474
result = index._lookup_keys_via_location(
509
[(11450, self.make_key(34)), (15707, self.make_key(52))])
475
[(11450, make_key(34)), (15534, make_key(52))])
510
476
self.assertEqual([
511
((11450, self.make_key(34)),
512
(index, self.make_key(34), self.make_value(34))),
513
((15707, self.make_key(52)),
514
(index, self.make_key(52), self.make_value(52))),
477
((11450, make_key(34)), (index, make_key(34), make_value(34))),
478
((15534, make_key(52)), (index, make_key(52), make_value(52))),
517
self.assertEqual([(0, 4045), (9889, 17993)], index._parsed_byte_map)
481
self.assertEqual([(0, 3991), (9975, 17799)], index._parsed_byte_map)
519
483
def test_lookup_missing_key_answers_without_io_when_map_permits(self):
520
484
# generate a big enough index that we only read some of it on a typical
521
485
# bisection lookup.
522
index = self.make_index(nodes=self.make_nodes(64))
487
def make_key(number):
488
return (str(number) + 'X'*100,)
489
for counter in range(64):
490
nodes.append((make_key(counter), 'Y'*100, ()))
491
index = self.make_index(nodes=nodes)
523
492
# lookup the keys in the middle of the file
524
493
result =index._lookup_keys_via_location(
525
494
[(index._size // 2, ('40', ))])
526
495
# check the parse map, this determines the test validity
527
self.assertEqual([(0, 4008), (5046, 8996)], index._parsed_byte_map)
528
self.assertEqual([(None, self.make_key(26)),
529
(self.make_key(31), self.make_key(48))],
496
self.assertEqual([(0, 3972), (5001, 8914)], index._parsed_byte_map)
497
self.assertEqual([(None, make_key(26)), (make_key(31), make_key(48))],
530
498
index._parsed_key_map)
531
499
# reset the transport log
532
500
del index._transport._activity[:]
559
534
# be in the index) - even when the byte location we ask for is outside
560
535
# the parsed region
562
result = index._lookup_keys_via_location([(4000, self.make_key(40))])
537
result = index._lookup_keys_via_location([(4000, make_key(40))])
563
538
self.assertEqual(
564
[((4000, self.make_key(40)),
565
(index, self.make_key(40), self.make_value(40)))],
539
[((4000, make_key(40)), (index, make_key(40), make_value(40)))],
567
541
self.assertEqual([], index._transport._activity)
569
543
def test_lookup_key_below_probed_area(self):
570
544
# generate a big enough index that we only read some of it on a typical
571
545
# bisection lookup.
572
index = self.make_index(nodes=self.make_nodes(64))
547
def make_key(number):
548
return (str(number) + 'X'*100,)
549
for counter in range(64):
550
nodes.append((make_key(counter), 'Y'*100, ()))
551
index = self.make_index(nodes=nodes)
573
552
# ask for the key in the middle, but a key that is located in the
574
553
# unparsed region before the middle.
575
554
result =index._lookup_keys_via_location(
576
555
[(index._size // 2, ('30', ))])
577
556
# check the parse map, this determines the test validity
578
self.assertEqual([(0, 4008), (5046, 8996)], index._parsed_byte_map)
579
self.assertEqual([(None, self.make_key(26)),
580
(self.make_key(31), self.make_key(48))],
557
self.assertEqual([(0, 3972), (5001, 8914)], index._parsed_byte_map)
558
self.assertEqual([(None, make_key(26)), (make_key(31), make_key(48))],
581
559
index._parsed_key_map)
582
560
self.assertEqual([((index._size // 2, ('30', )), -1)],
585
563
def test_lookup_key_above_probed_area(self):
586
564
# generate a big enough index that we only read some of it on a typical
587
565
# bisection lookup.
588
index = self.make_index(nodes=self.make_nodes(64))
567
def make_key(number):
568
return (str(number) + 'X'*100,)
569
for counter in range(64):
570
nodes.append((make_key(counter), 'Y'*100, ()))
571
index = self.make_index(nodes=nodes)
589
572
# ask for the key in the middle, but a key that is located in the
590
573
# unparsed region after the middle.
591
574
result =index._lookup_keys_via_location(
592
575
[(index._size // 2, ('50', ))])
593
576
# check the parse map, this determines the test validity
594
self.assertEqual([(0, 4008), (5046, 8996)], index._parsed_byte_map)
595
self.assertEqual([(None, self.make_key(26)),
596
(self.make_key(31), self.make_key(48))],
577
self.assertEqual([(0, 3972), (5001, 8914)], index._parsed_byte_map)
578
self.assertEqual([(None, make_key(26)), (make_key(31), make_key(48))],
597
579
index._parsed_key_map)
598
580
self.assertEqual([((index._size // 2, ('50', )), +1)],
602
584
# generate a big enough index that we only read some of it on a typical
603
585
# bisection lookup.
605
for counter in range(99):
606
nodes.append((self.make_key(counter), self.make_value(counter),
607
((self.make_key(counter + 20),),) ))
608
index = self.make_index(ref_lists=1, nodes=nodes)
609
# lookup a key in the middle that does not exist, so that when we can
610
# check that the referred-to-keys are not accessed automatically.
611
index_size = index._size
612
index_center = index_size // 2
613
result = index._lookup_keys_via_location(
614
[(index_center, ('40', ))])
615
# check the parse map - only the start and middle should have been
617
self.assertEqual([(0, 4027), (10198, 14028)], index._parsed_byte_map)
618
self.assertEqual([(None, self.make_key(17)),
619
(self.make_key(44), self.make_key(5))],
620
index._parsed_key_map)
621
# and check the transport activity likewise.
623
[('readv', 'index', [(index_center, 800), (0, 200)], True,
625
index._transport._activity)
626
# reset the transport log for testing the reference lookup
627
del index._transport._activity[:]
628
# now looking up a key in the portion of the file already parsed should
629
# only perform IO to resolve its key references.
630
result = index._lookup_keys_via_location([(11000, self.make_key(45))])
632
[((11000, self.make_key(45)),
633
(index, self.make_key(45), self.make_value(45),
634
((self.make_key(65),),)))],
636
self.assertEqual([('readv', 'index', [(15093, 800)], True, index_size)],
637
index._transport._activity)
639
def test_lookup_key_can_buffer_all(self):
587
def make_key(number):
588
return (str(number) + 'X'*100,)
589
def make_value(number):
590
return str(number) + 'Y'*100
641
591
for counter in range(64):
642
nodes.append((self.make_key(counter), self.make_value(counter),
643
((self.make_key(counter + 20),),) ))
592
nodes.append((make_key(counter), make_value(counter),
593
((make_key(counter + 20),),) ))
644
594
index = self.make_index(ref_lists=1, nodes=nodes)
645
595
# lookup a key in the middle that does not exist, so that when we can
646
596
# check that the referred-to-keys are not accessed automatically.
647
index_size = index._size
648
index_center = index_size // 2
649
result = index._lookup_keys_via_location([(index_center, ('40', ))])
597
result =index._lookup_keys_via_location(
598
[(index._size // 2, ('40', ))])
650
599
# check the parse map - only the start and middle should have been
652
601
self.assertEqual([(0, 3890), (6444, 10274)], index._parsed_byte_map)
653
self.assertEqual([(None, self.make_key(25)),
654
(self.make_key(37), self.make_key(52))],
602
self.assertEqual([(None, make_key(25)), (make_key(37), make_key(52))],
655
603
index._parsed_key_map)
656
604
# and check the transport activity likewise.
657
605
self.assertEqual(
658
[('readv', 'index', [(index_center, 800), (0, 200)], True,
606
[('readv', 'index', [(7906, 800), (0, 200)], True, 15813)],
660
607
index._transport._activity)
661
608
# reset the transport log for testing the reference lookup
662
609
del index._transport._activity[:]
663
610
# now looking up a key in the portion of the file already parsed should
664
611
# only perform IO to resolve its key references.
665
result = index._lookup_keys_via_location([(7000, self.make_key(40))])
612
result = index._lookup_keys_via_location([(4000, make_key(40))])
666
613
self.assertEqual(
667
[((7000, self.make_key(40)),
668
(index, self.make_key(40), self.make_value(40),
669
((self.make_key(60),),)))],
614
[((4000, make_key(40)),
615
(index, make_key(40), make_value(40), ((make_key(60),),)))],
671
# Resolving the references would have required more data read, and we
672
# are already above the 50% threshold, so it triggered a _buffer_all
673
self.assertEqual([('get', 'index')], index._transport._activity)
617
self.assertEqual([('readv', 'index', [(11976, 800)], True, 15813)],
618
index._transport._activity)
675
620
def test_iter_all_entries_empty(self):
676
621
index = self.make_index()
695
640
(index, ('ref', ), 'refdata', ((), ))]),
696
641
set(index.iter_all_entries()))
698
def test_iter_entries_buffers_once(self):
699
index = self.make_index(nodes=self.make_nodes(2))
700
# reset the transport log
701
del index._transport._activity[:]
702
self.assertEqual(set([(index, self.make_key(1), self.make_value(1))]),
703
set(index.iter_entries([self.make_key(1)])))
704
# We should have requested reading the header bytes
705
# But not needed any more than that because it would have triggered a
708
('readv', 'index', [(0, 200)], True, index._size),
710
index._transport._activity)
711
# And that should have been enough to trigger reading the whole index
713
self.assertIsNot(None, index._nodes)
715
def test_iter_entries_buffers_by_bytes_read(self):
716
index = self.make_index(nodes=self.make_nodes(64))
717
list(index.iter_entries([self.make_key(10)]))
718
# The first time through isn't enough to trigger a buffer all
719
self.assertIs(None, index._nodes)
720
self.assertEqual(4096, index._bytes_read)
721
# Grabbing a key in that same page won't trigger a buffer all, as we
722
# still haven't read 50% of the file
723
list(index.iter_entries([self.make_key(11)]))
724
self.assertIs(None, index._nodes)
725
self.assertEqual(4096, index._bytes_read)
726
# We haven't read more data, so reading outside the range won't trigger
727
# a buffer all right away
728
list(index.iter_entries([self.make_key(40)]))
729
self.assertIs(None, index._nodes)
730
self.assertEqual(8192, index._bytes_read)
731
# On the next pass, we will not trigger buffer all if the key is
732
# available without reading more
733
list(index.iter_entries([self.make_key(32)]))
734
self.assertIs(None, index._nodes)
735
# But if we *would* need to read more to resolve it, then we will
737
list(index.iter_entries([self.make_key(60)]))
738
self.assertIsNot(None, index._nodes)
740
643
def test_iter_entries_references_resolved(self):
741
644
index = self.make_index(1, nodes=[
742
645
(('name', ), 'data', ([('ref', ), ('ref', )], )),
860
763
(('name', ), '', ()), (('foo', ), '', ())])
861
764
self.assertEqual(2, index.key_count())
863
def test_read_and_parse_tracks_real_read_value(self):
864
index = self.make_index(nodes=self.make_nodes(10))
865
del index._transport._activity[:]
866
index._read_and_parse([(0, 200)])
868
('readv', 'index', [(0, 200)], True, index._size),
870
index._transport._activity)
871
# The readv expansion code will expand the initial request to 4096
872
# bytes, which is more than enough to read the entire index, and we
873
# will track the fact that we read that many bytes.
874
self.assertEqual(index._size, index._bytes_read)
876
def test_read_and_parse_triggers_buffer_all(self):
877
index = self.make_index(key_elements=2, nodes=[
878
(('name', 'fin1'), 'data', ()),
879
(('name', 'fin2'), 'beta', ()),
880
(('ref', 'erence'), 'refdata', ())])
881
self.assertTrue(index._size > 0)
882
self.assertIs(None, index._nodes)
883
index._read_and_parse([(0, index._size)])
884
self.assertIsNot(None, index._nodes)
886
766
def test_validate_bad_index_errors(self):
887
767
trans = self.get_transport()
888
768
trans.put_bytes('name', "not an index\n")
934
814
size = trans.put_file(name, stream)
935
815
return GraphIndex(trans, name, size)
937
def make_combined_index_with_missing(self, missing=['1', '2']):
938
"""Create a CombinedGraphIndex which will have missing indexes.
940
This creates a CGI which thinks it has 2 indexes, however they have
941
been deleted. If CGI._reload_func() is called, then it will repopulate
944
:param missing: The underlying indexes to delete
945
:return: (CombinedGraphIndex, reload_counter)
947
index1 = self.make_index('1', nodes=[(('1',), '', ())])
948
index2 = self.make_index('2', nodes=[(('2',), '', ())])
949
index3 = self.make_index('3', nodes=[
953
# total_reloads, num_changed, num_unchanged
954
reload_counter = [0, 0, 0]
956
reload_counter[0] += 1
957
new_indices = [index3]
958
if index._indices == new_indices:
959
reload_counter[2] += 1
961
reload_counter[1] += 1
962
index._indices[:] = new_indices
964
index = CombinedGraphIndex([index1, index2], reload_func=reload)
965
trans = self.get_transport()
966
for fname in missing:
968
return index, reload_counter
970
817
def test_open_missing_index_no_error(self):
971
818
trans = self.get_transport()
972
819
index1 = GraphIndex(trans, 'missing', 100)
1110
957
index = CombinedGraphIndex([])
1111
958
index.validate()
1113
def test_key_count_reloads(self):
1114
index, reload_counter = self.make_combined_index_with_missing()
1115
self.assertEqual(2, index.key_count())
1116
self.assertEqual([1, 1, 0], reload_counter)
1118
def test_key_count_no_reload(self):
1119
index, reload_counter = self.make_combined_index_with_missing()
1120
index._reload_func = None
1121
# Without a _reload_func we just raise the exception
1122
self.assertRaises(errors.NoSuchFile, index.key_count)
1124
def test_key_count_reloads_and_fails(self):
1125
# We have deleted all underlying indexes, so we will try to reload, but
1126
# still fail. This is mostly to test we don't get stuck in an infinite
1127
# loop trying to reload
1128
index, reload_counter = self.make_combined_index_with_missing(
1130
self.assertRaises(errors.NoSuchFile, index.key_count)
1131
self.assertEqual([2, 1, 1], reload_counter)
1133
def test_iter_entries_reloads(self):
1134
index, reload_counter = self.make_combined_index_with_missing()
1135
result = list(index.iter_entries([('1',), ('2',), ('3',)]))
1136
index3 = index._indices[0]
1137
self.assertEqual([(index3, ('1',), ''), (index3, ('2',), '')],
1139
self.assertEqual([1, 1, 0], reload_counter)
1141
def test_iter_entries_reloads_midway(self):
1142
# The first index still looks present, so we get interrupted mid-way
1144
index, reload_counter = self.make_combined_index_with_missing(['2'])
1145
index1, index2 = index._indices
1146
result = list(index.iter_entries([('1',), ('2',), ('3',)]))
1147
index3 = index._indices[0]
1148
# We had already yielded '1', so we just go on to the next, we should
1149
# not yield '1' twice.
1150
self.assertEqual([(index1, ('1',), ''), (index3, ('2',), '')],
1152
self.assertEqual([1, 1, 0], reload_counter)
1154
def test_iter_entries_no_reload(self):
1155
index, reload_counter = self.make_combined_index_with_missing()
1156
index._reload_func = None
1157
# Without a _reload_func we just raise the exception
1158
self.assertListRaises(errors.NoSuchFile, index.iter_entries, [('3',)])
1160
def test_iter_entries_reloads_and_fails(self):
1161
index, reload_counter = self.make_combined_index_with_missing(
1163
self.assertListRaises(errors.NoSuchFile, index.iter_entries, [('3',)])
1164
self.assertEqual([2, 1, 1], reload_counter)
1166
def test_iter_all_entries_reloads(self):
1167
index, reload_counter = self.make_combined_index_with_missing()
1168
result = list(index.iter_all_entries())
1169
index3 = index._indices[0]
1170
self.assertEqual([(index3, ('1',), ''), (index3, ('2',), '')],
1172
self.assertEqual([1, 1, 0], reload_counter)
1174
def test_iter_all_entries_reloads_midway(self):
1175
index, reload_counter = self.make_combined_index_with_missing(['2'])
1176
index1, index2 = index._indices
1177
result = list(index.iter_all_entries())
1178
index3 = index._indices[0]
1179
# We had already yielded '1', so we just go on to the next, we should
1180
# not yield '1' twice.
1181
self.assertEqual([(index1, ('1',), ''), (index3, ('2',), '')],
1183
self.assertEqual([1, 1, 0], reload_counter)
1185
def test_iter_all_entries_no_reload(self):
1186
index, reload_counter = self.make_combined_index_with_missing()
1187
index._reload_func = None
1188
self.assertListRaises(errors.NoSuchFile, index.iter_all_entries)
1190
def test_iter_all_entries_reloads_and_fails(self):
1191
index, reload_counter = self.make_combined_index_with_missing(
1193
self.assertListRaises(errors.NoSuchFile, index.iter_all_entries)
1195
def test_iter_entries_prefix_reloads(self):
1196
index, reload_counter = self.make_combined_index_with_missing()
1197
result = list(index.iter_entries_prefix([('1',)]))
1198
index3 = index._indices[0]
1199
self.assertEqual([(index3, ('1',), '')], result)
1200
self.assertEqual([1, 1, 0], reload_counter)
1202
def test_iter_entries_prefix_reloads_midway(self):
1203
index, reload_counter = self.make_combined_index_with_missing(['2'])
1204
index1, index2 = index._indices
1205
result = list(index.iter_entries_prefix([('1',)]))
1206
index3 = index._indices[0]
1207
# We had already yielded '1', so we just go on to the next, we should
1208
# not yield '1' twice.
1209
self.assertEqual([(index1, ('1',), '')], result)
1210
self.assertEqual([1, 1, 0], reload_counter)
1212
def test_iter_entries_prefix_no_reload(self):
1213
index, reload_counter = self.make_combined_index_with_missing()
1214
index._reload_func = None
1215
self.assertListRaises(errors.NoSuchFile, index.iter_entries_prefix,
1218
def test_iter_entries_prefix_reloads_and_fails(self):
1219
index, reload_counter = self.make_combined_index_with_missing(
1221
self.assertListRaises(errors.NoSuchFile, index.iter_entries_prefix,
1224
def test_validate_reloads(self):
1225
index, reload_counter = self.make_combined_index_with_missing()
1227
self.assertEqual([1, 1, 0], reload_counter)
1229
def test_validate_reloads_midway(self):
1230
index, reload_counter = self.make_combined_index_with_missing(['2'])
1233
def test_validate_no_reload(self):
1234
index, reload_counter = self.make_combined_index_with_missing()
1235
index._reload_func = None
1236
self.assertRaises(errors.NoSuchFile, index.validate)
1238
def test_validate_reloads_and_fails(self):
1239
index, reload_counter = self.make_combined_index_with_missing(
1241
self.assertRaises(errors.NoSuchFile, index.validate)
1244
961
class TestInMemoryGraphIndex(TestCaseWithMemoryTransport):