372
386
self.assertEqual([], index._parsed_byte_map)
373
387
self.assertEqual([], index._parsed_key_map)
389
def test_key_count_buffers(self):
390
index = self.make_index(nodes=self.make_nodes(2))
391
# reset the transport log
392
del index._transport._activity[:]
393
self.assertEqual(2, index.key_count())
394
# We should have requested reading the header bytes
396
('readv', 'index', [(0, 200)], True, index._size),
398
index._transport._activity)
399
# And that should have been enough to trigger reading the whole index
401
self.assertIsNot(None, index._nodes)
403
def test_lookup_key_via_location_buffers(self):
404
index = self.make_index()
405
# reset the transport log
406
del index._transport._activity[:]
407
# do a _lookup_keys_via_location call for the middle of the file, which
408
# is what bisection uses.
409
result = index._lookup_keys_via_location(
410
[(index._size // 2, ('missing', ))])
411
# this should have asked for a readv request, with adjust_for_latency,
412
# and two regions: the header, and half-way into the file.
414
('readv', 'index', [(30, 30), (0, 200)], True, 60),
416
index._transport._activity)
417
# and the result should be that the key cannot be present, because this
418
# is a trivial index.
419
self.assertEqual([((index._size // 2, ('missing', )), False)],
421
# And this should have caused the file to be fully buffered
422
self.assertIsNot(None, index._nodes)
423
self.assertEqual([], index._parsed_byte_map)
375
425
def test_first_lookup_key_via_location(self):
376
index = self.make_index()
426
# We need enough data so that the _HEADER_READV doesn't consume the
427
# whole file. We always read 800 bytes for every key, and the local
428
# transport natural expansion is 4096 bytes. So we have to have >8192
429
# bytes or we will trigger "buffer_all".
430
# We also want the 'missing' key to fall within the range that *did*
433
index = self.make_index(nodes=self.make_nodes(64))
377
434
# reset the transport log
378
435
del index._transport._activity[:]
379
436
# do a _lookup_keys_via_location call for the middle of the file, which
380
437
# is what bisection uses.
438
start_lookup = index._size // 2
381
439
result = index._lookup_keys_via_location(
382
[(index._size // 2, ('missing', ))])
440
[(start_lookup, ('40missing', ))])
383
441
# this should have asked for a readv request, with adjust_for_latency,
384
442
# and two regions: the header, and half-way into the file.
385
443
self.assertEqual([
386
('readv', 'index', [(30, 30), (0, 200)], True, 60),
445
[(start_lookup, 800), (0, 200)], True, index._size),
388
447
index._transport._activity)
389
448
# and the result should be that the key cannot be present, because this
390
449
# is a trivial index.
391
self.assertEqual([((index._size // 2, ('missing', )), False)],
393
# And the regions of the file that have been parsed - in this case the
394
# entire file - should be in the parsed region map.
395
self.assertEqual([(0, 60)], index._parsed_byte_map)
396
self.assertEqual([(None, None)], index._parsed_key_map)
398
def test_parsing_parses_data_adjacent_to_parsed_regions(self):
399
# we trim data we recieve to remove the first and trailing
400
# partial lines, except when they start at the end/finish at the start
401
# of a region we've alread parsed/ the end of the file. The trivial
402
# test for this is an index with 1 key.
403
index = self.make_index(nodes=[(('name', ), 'data', ())])
404
# reset the transport log
405
del index._transport._activity[:]
406
result = index._lookup_keys_via_location(
407
[(index._size // 2, ('missing', ))])
408
# this should have asked for a readv request, with adjust_for_latency,
409
# and two regions: the header, and half-way into the file.
411
('readv', 'index', [(36, 36), (0, 200)], True, 72),
413
index._transport._activity)
414
# and the result should be that the key cannot be present, because this
415
# is a trivial index and we should not have to do more round trips.
416
self.assertEqual([((index._size // 2, ('missing', )), False)],
418
# The whole file should be parsed at this point.
419
self.assertEqual([(0, 72)], index._parsed_byte_map)
420
self.assertEqual([(None, ('name',))], index._parsed_key_map)
450
self.assertEqual([((start_lookup, ('40missing', )), False)],
452
# And this should not have caused the file to be fully buffered
453
self.assertIs(None, index._nodes)
454
# And the regions of the file that have been parsed should be in the
455
# parsed_byte_map and the parsed_key_map
456
self.assertEqual([(0, 4008), (5046, 8996)], index._parsed_byte_map)
457
self.assertEqual([(None, self.make_key(26)),
458
(self.make_key(31), self.make_key(48))],
459
index._parsed_key_map)
422
461
def test_parsing_non_adjacent_data_trims(self):
423
# generate a big enough index that we only read some of it on a typical
426
def make_key(number):
427
return (str(number) + 'X'*100,)
428
for counter in range(64):
429
nodes.append((make_key(counter), 'Y'*100, ()))
430
index = self.make_index(nodes=nodes)
462
index = self.make_index(nodes=self.make_nodes(64))
431
463
result = index._lookup_keys_via_location(
432
464
[(index._size // 2, ('40', ))])
433
465
# and the result should be that the key cannot be present, because key is
448
481
# which then trims the start and end so the parsed size is < readv
450
483
# then a dual lookup (or a reference lookup for that matter) which
451
# abuts or overlaps the parsed region on both sides will need to
484
# abuts or overlaps the parsed region on both sides will need to
452
485
# discard the data in the middle, but parse the end as well.
454
# we test this by doing a single lookup to seed the data, then
455
# a lookup for two keys that are present, and adjacent -
487
# we test this by doing a single lookup to seed the data, then
488
# a lookup for two keys that are present, and adjacent -
456
489
# we except both to be found, and the parsed byte map to include the
457
490
# locations of both keys.
459
def make_key(number):
460
return (str(number) + 'X'*100,)
461
def make_value(number):
463
for counter in range(128):
464
nodes.append((make_key(counter), make_value(counter), ()))
465
index = self.make_index(nodes=nodes)
491
index = self.make_index(nodes=self.make_nodes(128))
466
492
result = index._lookup_keys_via_location(
467
493
[(index._size // 2, ('40', ))])
468
494
# and we should have a parse map that includes the header and the
469
495
# region that was parsed after trimming.
470
self.assertEqual([(0, 3991), (11622, 15534)], index._parsed_byte_map)
471
self.assertEqual([(None, make_key(116)), (make_key(35), make_key(51))],
496
self.assertEqual([(0, 4045), (11759, 15707)], index._parsed_byte_map)
497
self.assertEqual([(None, self.make_key(116)),
498
(self.make_key(35), self.make_key(51))],
472
499
index._parsed_key_map)
473
500
# now ask for two keys, right before and after the parsed region
474
501
result = index._lookup_keys_via_location(
475
[(11450, make_key(34)), (15534, make_key(52))])
502
[(11450, self.make_key(34)), (15707, self.make_key(52))])
476
503
self.assertEqual([
477
((11450, make_key(34)), (index, make_key(34), make_value(34))),
478
((15534, make_key(52)), (index, make_key(52), make_value(52))),
504
((11450, self.make_key(34)),
505
(index, self.make_key(34), self.make_value(34))),
506
((15707, self.make_key(52)),
507
(index, self.make_key(52), self.make_value(52))),
481
self.assertEqual([(0, 3991), (9975, 17799)], index._parsed_byte_map)
510
self.assertEqual([(0, 4045), (9889, 17993)], index._parsed_byte_map)
483
512
def test_lookup_missing_key_answers_without_io_when_map_permits(self):
484
513
# generate a big enough index that we only read some of it on a typical
485
514
# bisection lookup.
487
def make_key(number):
488
return (str(number) + 'X'*100,)
489
for counter in range(64):
490
nodes.append((make_key(counter), 'Y'*100, ()))
491
index = self.make_index(nodes=nodes)
515
index = self.make_index(nodes=self.make_nodes(64))
492
516
# lookup the keys in the middle of the file
493
517
result =index._lookup_keys_via_location(
494
518
[(index._size // 2, ('40', ))])
495
519
# check the parse map, this determines the test validity
496
self.assertEqual([(0, 3972), (5001, 8914)], index._parsed_byte_map)
497
self.assertEqual([(None, make_key(26)), (make_key(31), make_key(48))],
520
self.assertEqual([(0, 4008), (5046, 8996)], index._parsed_byte_map)
521
self.assertEqual([(None, self.make_key(26)),
522
(self.make_key(31), self.make_key(48))],
498
523
index._parsed_key_map)
499
524
# reset the transport log
500
525
del index._transport._activity[:]
512
536
def test_lookup_present_key_answers_without_io_when_map_permits(self):
513
537
# generate a big enough index that we only read some of it on a typical
514
538
# bisection lookup.
516
def make_key(number):
517
return (str(number) + 'X'*100,)
518
def make_value(number):
519
return str(number) + 'Y'*100
520
for counter in range(64):
521
nodes.append((make_key(counter), make_value(counter), ()))
522
index = self.make_index(nodes=nodes)
539
index = self.make_index(nodes=self.make_nodes(64))
523
540
# lookup the keys in the middle of the file
524
541
result =index._lookup_keys_via_location(
525
542
[(index._size // 2, ('40', ))])
526
543
# check the parse map, this determines the test validity
527
544
self.assertEqual([(0, 4008), (5046, 8996)], index._parsed_byte_map)
528
self.assertEqual([(None, make_key(26)), (make_key(31), make_key(48))],
545
self.assertEqual([(None, self.make_key(26)),
546
(self.make_key(31), self.make_key(48))],
529
547
index._parsed_key_map)
530
548
# reset the transport log
531
549
del index._transport._activity[:]
534
552
# be in the index) - even when the byte location we ask for is outside
535
553
# the parsed region
537
result = index._lookup_keys_via_location([(4000, make_key(40))])
555
result = index._lookup_keys_via_location([(4000, self.make_key(40))])
538
556
self.assertEqual(
539
[((4000, make_key(40)), (index, make_key(40), make_value(40)))],
557
[((4000, self.make_key(40)),
558
(index, self.make_key(40), self.make_value(40)))],
541
560
self.assertEqual([], index._transport._activity)
543
562
def test_lookup_key_below_probed_area(self):
544
563
# generate a big enough index that we only read some of it on a typical
545
564
# bisection lookup.
547
def make_key(number):
548
return (str(number) + 'X'*100,)
549
for counter in range(64):
550
nodes.append((make_key(counter), 'Y'*100, ()))
551
index = self.make_index(nodes=nodes)
565
index = self.make_index(nodes=self.make_nodes(64))
552
566
# ask for the key in the middle, but a key that is located in the
553
567
# unparsed region before the middle.
554
568
result =index._lookup_keys_via_location(
555
569
[(index._size // 2, ('30', ))])
556
570
# check the parse map, this determines the test validity
557
self.assertEqual([(0, 3972), (5001, 8914)], index._parsed_byte_map)
558
self.assertEqual([(None, make_key(26)), (make_key(31), make_key(48))],
571
self.assertEqual([(0, 4008), (5046, 8996)], index._parsed_byte_map)
572
self.assertEqual([(None, self.make_key(26)),
573
(self.make_key(31), self.make_key(48))],
559
574
index._parsed_key_map)
560
575
self.assertEqual([((index._size // 2, ('30', )), -1)],
563
578
def test_lookup_key_above_probed_area(self):
564
579
# generate a big enough index that we only read some of it on a typical
565
580
# bisection lookup.
567
def make_key(number):
568
return (str(number) + 'X'*100,)
569
for counter in range(64):
570
nodes.append((make_key(counter), 'Y'*100, ()))
571
index = self.make_index(nodes=nodes)
581
index = self.make_index(nodes=self.make_nodes(64))
572
582
# ask for the key in the middle, but a key that is located in the
573
583
# unparsed region after the middle.
574
584
result =index._lookup_keys_via_location(
575
585
[(index._size // 2, ('50', ))])
576
586
# check the parse map, this determines the test validity
577
self.assertEqual([(0, 3972), (5001, 8914)], index._parsed_byte_map)
578
self.assertEqual([(None, make_key(26)), (make_key(31), make_key(48))],
587
self.assertEqual([(0, 4008), (5046, 8996)], index._parsed_byte_map)
588
self.assertEqual([(None, self.make_key(26)),
589
(self.make_key(31), self.make_key(48))],
579
590
index._parsed_key_map)
580
591
self.assertEqual([((index._size // 2, ('50', )), +1)],
584
595
# generate a big enough index that we only read some of it on a typical
585
596
# bisection lookup.
587
def make_key(number):
588
return (str(number) + 'X'*100,)
589
def make_value(number):
590
return str(number) + 'Y'*100
598
for counter in range(99):
599
nodes.append((self.make_key(counter), self.make_value(counter),
600
((self.make_key(counter + 20),),) ))
601
index = self.make_index(ref_lists=1, nodes=nodes)
602
# lookup a key in the middle that does not exist, so that when we can
603
# check that the referred-to-keys are not accessed automatically.
604
index_size = index._size
605
index_center = index_size // 2
606
result = index._lookup_keys_via_location(
607
[(index_center, ('40', ))])
608
# check the parse map - only the start and middle should have been
610
self.assertEqual([(0, 4027), (10198, 14028)], index._parsed_byte_map)
611
self.assertEqual([(None, self.make_key(17)),
612
(self.make_key(44), self.make_key(5))],
613
index._parsed_key_map)
614
# and check the transport activity likewise.
616
[('readv', 'index', [(index_center, 800), (0, 200)], True,
618
index._transport._activity)
619
# reset the transport log for testing the reference lookup
620
del index._transport._activity[:]
621
# now looking up a key in the portion of the file already parsed should
622
# only perform IO to resolve its key references.
623
result = index._lookup_keys_via_location([(11000, self.make_key(45))])
625
[((11000, self.make_key(45)),
626
(index, self.make_key(45), self.make_value(45),
627
((self.make_key(65),),)))],
629
self.assertEqual([('readv', 'index', [(15093, 800)], True, index_size)],
630
index._transport._activity)
632
def test_lookup_key_can_buffer_all(self):
591
634
for counter in range(64):
592
nodes.append((make_key(counter), make_value(counter),
593
((make_key(counter + 20),),) ))
635
nodes.append((self.make_key(counter), self.make_value(counter),
636
((self.make_key(counter + 20),),) ))
594
637
index = self.make_index(ref_lists=1, nodes=nodes)
595
638
# lookup a key in the middle that does not exist, so that when we can
596
639
# check that the referred-to-keys are not accessed automatically.
597
result =index._lookup_keys_via_location(
598
[(index._size // 2, ('40', ))])
640
index_size = index._size
641
index_center = index_size // 2
642
result = index._lookup_keys_via_location([(index_center, ('40', ))])
599
643
# check the parse map - only the start and middle should have been
601
645
self.assertEqual([(0, 3890), (6444, 10274)], index._parsed_byte_map)
602
self.assertEqual([(None, make_key(25)), (make_key(37), make_key(52))],
646
self.assertEqual([(None, self.make_key(25)),
647
(self.make_key(37), self.make_key(52))],
603
648
index._parsed_key_map)
604
649
# and check the transport activity likewise.
605
650
self.assertEqual(
606
[('readv', 'index', [(7906, 800), (0, 200)], True, 15813)],
651
[('readv', 'index', [(index_center, 800), (0, 200)], True,
607
653
index._transport._activity)
608
654
# reset the transport log for testing the reference lookup
609
655
del index._transport._activity[:]
610
656
# now looking up a key in the portion of the file already parsed should
611
657
# only perform IO to resolve its key references.
612
result = index._lookup_keys_via_location([(4000, make_key(40))])
658
result = index._lookup_keys_via_location([(7000, self.make_key(40))])
613
659
self.assertEqual(
614
[((4000, make_key(40)),
615
(index, make_key(40), make_value(40), ((make_key(60),),)))],
660
[((7000, self.make_key(40)),
661
(index, self.make_key(40), self.make_value(40),
662
((self.make_key(60),),)))],
617
self.assertEqual([('readv', 'index', [(11976, 800)], True, 15813)],
618
index._transport._activity)
664
# Resolving the references would have required more data read, and we
665
# are already above the 50% threshold, so it triggered a _buffer_all
666
self.assertEqual([('get', 'index')], index._transport._activity)
620
668
def test_iter_all_entries_empty(self):
621
669
index = self.make_index()
640
688
(index, ('ref', ), 'refdata', ((), ))]),
641
689
set(index.iter_all_entries()))
691
def test_iter_entries_buffers_once(self):
692
index = self.make_index(nodes=self.make_nodes(2))
693
# reset the transport log
694
del index._transport._activity[:]
695
self.assertEqual(set([(index, self.make_key(1), self.make_value(1))]),
696
set(index.iter_entries([self.make_key(1)])))
697
# We should have requested reading the header bytes
698
# But not needed any more than that because it would have triggered a
701
('readv', 'index', [(0, 200)], True, index._size),
703
index._transport._activity)
704
# And that should have been enough to trigger reading the whole index
706
self.assertIsNot(None, index._nodes)
708
def test_iter_entries_buffers_by_bytes_read(self):
709
index = self.make_index(nodes=self.make_nodes(64))
710
list(index.iter_entries([self.make_key(10)]))
711
# The first time through isn't enough to trigger a buffer all
712
self.assertIs(None, index._nodes)
713
self.assertEqual(4096, index._bytes_read)
714
# Grabbing a key in that same page won't trigger a buffer all, as we
715
# still haven't read 50% of the file
716
list(index.iter_entries([self.make_key(11)]))
717
self.assertIs(None, index._nodes)
718
self.assertEqual(4096, index._bytes_read)
719
# We haven't read more data, so reading outside the range won't trigger
720
# a buffer all right away
721
list(index.iter_entries([self.make_key(40)]))
722
self.assertIs(None, index._nodes)
723
self.assertEqual(8192, index._bytes_read)
724
# On the next pass, we will not trigger buffer all if the key is
725
# available without reading more
726
list(index.iter_entries([self.make_key(32)]))
727
self.assertIs(None, index._nodes)
728
# But if we *would* need to read more to resolve it, then we will
730
list(index.iter_entries([self.make_key(60)]))
731
self.assertIsNot(None, index._nodes)
643
733
def test_iter_entries_references_resolved(self):
644
734
index = self.make_index(1, nodes=[
645
735
(('name', ), 'data', ([('ref', ), ('ref', )], )),
763
853
(('name', ), '', ()), (('foo', ), '', ())])
764
854
self.assertEqual(2, index.key_count())
856
def test_read_and_parse_tracks_real_read_value(self):
857
index = self.make_index(nodes=self.make_nodes(10))
858
del index._transport._activity[:]
859
index._read_and_parse([(0, 200)])
861
('readv', 'index', [(0, 200)], True, index._size),
863
index._transport._activity)
864
# The readv expansion code will expand the initial request to 4096
865
# bytes, which is more than enough to read the entire index, and we
866
# will track the fact that we read that many bytes.
867
self.assertEqual(index._size, index._bytes_read)
869
def test_read_and_parse_triggers_buffer_all(self):
870
index = self.make_index(key_elements=2, nodes=[
871
(('name', 'fin1'), 'data', ()),
872
(('name', 'fin2'), 'beta', ()),
873
(('ref', 'erence'), 'refdata', ())])
874
self.assertTrue(index._size > 0)
875
self.assertIs(None, index._nodes)
876
index._read_and_parse([(0, index._size)])
877
self.assertIsNot(None, index._nodes)
766
879
def test_validate_bad_index_errors(self):
767
880
trans = self.get_transport()
768
881
trans.put_bytes('name', "not an index\n")