386
372
self.assertEqual([], index._parsed_byte_map)
387
373
self.assertEqual([], index._parsed_key_map)
389
def test_key_count_buffers(self):
390
index = self.make_index(nodes=self.make_nodes(2))
391
# reset the transport log
392
del index._transport._activity[:]
393
self.assertEqual(2, index.key_count())
394
# We should have requested reading the header bytes
396
('readv', 'index', [(0, 200)], True, index._size),
398
index._transport._activity)
399
# And that should have been enough to trigger reading the whole index
401
self.assertIsNot(None, index._nodes)
403
def test_lookup_key_via_location_buffers(self):
375
def test_first_lookup_key_via_location(self):
404
376
index = self.make_index()
405
377
# reset the transport log
406
378
del index._transport._activity[:]
418
390
# is a trivial index.
419
391
self.assertEqual([((index._size // 2, ('missing', )), False)],
421
# And this should have caused the file to be fully buffered
422
self.assertIsNot(None, index._nodes)
423
self.assertEqual([], index._parsed_byte_map)
393
# And the regions of the file that have been parsed - in this case the
394
# entire file - should be in the parsed region map.
395
self.assertEqual([(0, 60)], index._parsed_byte_map)
396
self.assertEqual([(None, None)], index._parsed_key_map)
425
def test_first_lookup_key_via_location(self):
426
# We need enough data so that the _HEADER_READV doesn't consume the
427
# whole file. We always read 800 bytes for every key, and the local
428
# transport natural expansion is 4096 bytes. So we have to have >8192
429
# bytes or we will trigger "buffer_all".
430
# We also want the 'missing' key to fall within the range that *did*
433
index = self.make_index(nodes=self.make_nodes(64))
398
def test_parsing_parses_data_adjacent_to_parsed_regions(self):
399
# we trim data we recieve to remove the first and trailing
400
# partial lines, except when they start at the end/finish at the start
401
# of a region we've alread parsed/ the end of the file. The trivial
402
# test for this is an index with 1 key.
403
index = self.make_index(nodes=[(('name', ), 'data', ())])
434
404
# reset the transport log
435
405
del index._transport._activity[:]
436
# do a _lookup_keys_via_location call for the middle of the file, which
437
# is what bisection uses.
438
start_lookup = index._size // 2
439
406
result = index._lookup_keys_via_location(
440
[(start_lookup, ('40missing', ))])
407
[(index._size // 2, ('missing', ))])
441
408
# this should have asked for a readv request, with adjust_for_latency,
442
409
# and two regions: the header, and half-way into the file.
443
410
self.assertEqual([
445
[(start_lookup, 800), (0, 200)], True, index._size),
411
('readv', 'index', [(36, 36), (0, 200)], True, 72),
447
413
index._transport._activity)
448
414
# and the result should be that the key cannot be present, because this
449
# is a trivial index.
450
self.assertEqual([((start_lookup, ('40missing', )), False)],
415
# is a trivial index and we should not have to do more round trips.
416
self.assertEqual([((index._size // 2, ('missing', )), False)],
452
# And this should not have caused the file to be fully buffered
453
self.assertIs(None, index._nodes)
454
# And the regions of the file that have been parsed should be in the
455
# parsed_byte_map and the parsed_key_map
456
self.assertEqual([(0, 4008), (5046, 8996)], index._parsed_byte_map)
457
self.assertEqual([(None, self.make_key(26)),
458
(self.make_key(31), self.make_key(48))],
459
index._parsed_key_map)
418
# The whole file should be parsed at this point.
419
self.assertEqual([(0, 72)], index._parsed_byte_map)
420
self.assertEqual([(None, ('name',))], index._parsed_key_map)
461
422
def test_parsing_non_adjacent_data_trims(self):
462
index = self.make_index(nodes=self.make_nodes(64))
423
# generate a big enough index that we only read some of it on a typical
426
def make_key(number):
427
return (str(number) + 'X'*100,)
428
for counter in range(64):
429
nodes.append((make_key(counter), 'Y'*100, ()))
430
index = self.make_index(nodes=nodes)
463
431
result = index._lookup_keys_via_location(
464
432
[(index._size // 2, ('40', ))])
465
433
# and the result should be that the key cannot be present, because key is
481
448
# which then trims the start and end so the parsed size is < readv
483
450
# then a dual lookup (or a reference lookup for that matter) which
484
# abuts or overlaps the parsed region on both sides will need to
451
# abuts or overlaps the parsed region on both sides will need to
485
452
# discard the data in the middle, but parse the end as well.
487
# we test this by doing a single lookup to seed the data, then
488
# a lookup for two keys that are present, and adjacent -
454
# we test this by doing a single lookup to seed the data, then
455
# a lookup for two keys that are present, and adjacent -
489
456
# we except both to be found, and the parsed byte map to include the
490
457
# locations of both keys.
491
index = self.make_index(nodes=self.make_nodes(128))
459
def make_key(number):
460
return (str(number) + 'X'*100,)
461
def make_value(number):
463
for counter in range(128):
464
nodes.append((make_key(counter), make_value(counter), ()))
465
index = self.make_index(nodes=nodes)
492
466
result = index._lookup_keys_via_location(
493
467
[(index._size // 2, ('40', ))])
494
468
# and we should have a parse map that includes the header and the
495
469
# region that was parsed after trimming.
496
self.assertEqual([(0, 4045), (11759, 15707)], index._parsed_byte_map)
497
self.assertEqual([(None, self.make_key(116)),
498
(self.make_key(35), self.make_key(51))],
470
self.assertEqual([(0, 3991), (11622, 15534)], index._parsed_byte_map)
471
self.assertEqual([(None, make_key(116)), (make_key(35), make_key(51))],
499
472
index._parsed_key_map)
500
473
# now ask for two keys, right before and after the parsed region
501
474
result = index._lookup_keys_via_location(
502
[(11450, self.make_key(34)), (15707, self.make_key(52))])
475
[(11450, make_key(34)), (15534, make_key(52))])
503
476
self.assertEqual([
504
((11450, self.make_key(34)),
505
(index, self.make_key(34), self.make_value(34))),
506
((15707, self.make_key(52)),
507
(index, self.make_key(52), self.make_value(52))),
477
((11450, make_key(34)), (index, make_key(34), make_value(34))),
478
((15534, make_key(52)), (index, make_key(52), make_value(52))),
510
self.assertEqual([(0, 4045), (9889, 17993)], index._parsed_byte_map)
481
self.assertEqual([(0, 3991), (9975, 17799)], index._parsed_byte_map)
512
483
def test_lookup_missing_key_answers_without_io_when_map_permits(self):
513
484
# generate a big enough index that we only read some of it on a typical
514
485
# bisection lookup.
515
index = self.make_index(nodes=self.make_nodes(64))
487
def make_key(number):
488
return (str(number) + 'X'*100,)
489
for counter in range(64):
490
nodes.append((make_key(counter), 'Y'*100, ()))
491
index = self.make_index(nodes=nodes)
516
492
# lookup the keys in the middle of the file
517
493
result =index._lookup_keys_via_location(
518
494
[(index._size // 2, ('40', ))])
519
495
# check the parse map, this determines the test validity
520
self.assertEqual([(0, 4008), (5046, 8996)], index._parsed_byte_map)
521
self.assertEqual([(None, self.make_key(26)),
522
(self.make_key(31), self.make_key(48))],
496
self.assertEqual([(0, 3972), (5001, 8914)], index._parsed_byte_map)
497
self.assertEqual([(None, make_key(26)), (make_key(31), make_key(48))],
523
498
index._parsed_key_map)
524
499
# reset the transport log
525
500
del index._transport._activity[:]
536
512
def test_lookup_present_key_answers_without_io_when_map_permits(self):
537
513
# generate a big enough index that we only read some of it on a typical
538
514
# bisection lookup.
539
index = self.make_index(nodes=self.make_nodes(64))
516
def make_key(number):
517
return (str(number) + 'X'*100,)
518
def make_value(number):
519
return str(number) + 'Y'*100
520
for counter in range(64):
521
nodes.append((make_key(counter), make_value(counter), ()))
522
index = self.make_index(nodes=nodes)
540
523
# lookup the keys in the middle of the file
541
524
result =index._lookup_keys_via_location(
542
525
[(index._size // 2, ('40', ))])
543
526
# check the parse map, this determines the test validity
544
527
self.assertEqual([(0, 4008), (5046, 8996)], index._parsed_byte_map)
545
self.assertEqual([(None, self.make_key(26)),
546
(self.make_key(31), self.make_key(48))],
528
self.assertEqual([(None, make_key(26)), (make_key(31), make_key(48))],
547
529
index._parsed_key_map)
548
530
# reset the transport log
549
531
del index._transport._activity[:]
552
534
# be in the index) - even when the byte location we ask for is outside
553
535
# the parsed region
555
result = index._lookup_keys_via_location([(4000, self.make_key(40))])
537
result = index._lookup_keys_via_location([(4000, make_key(40))])
556
538
self.assertEqual(
557
[((4000, self.make_key(40)),
558
(index, self.make_key(40), self.make_value(40)))],
539
[((4000, make_key(40)), (index, make_key(40), make_value(40)))],
560
541
self.assertEqual([], index._transport._activity)
562
543
def test_lookup_key_below_probed_area(self):
563
544
# generate a big enough index that we only read some of it on a typical
564
545
# bisection lookup.
565
index = self.make_index(nodes=self.make_nodes(64))
547
def make_key(number):
548
return (str(number) + 'X'*100,)
549
for counter in range(64):
550
nodes.append((make_key(counter), 'Y'*100, ()))
551
index = self.make_index(nodes=nodes)
566
552
# ask for the key in the middle, but a key that is located in the
567
553
# unparsed region before the middle.
568
554
result =index._lookup_keys_via_location(
569
555
[(index._size // 2, ('30', ))])
570
556
# check the parse map, this determines the test validity
571
self.assertEqual([(0, 4008), (5046, 8996)], index._parsed_byte_map)
572
self.assertEqual([(None, self.make_key(26)),
573
(self.make_key(31), self.make_key(48))],
557
self.assertEqual([(0, 3972), (5001, 8914)], index._parsed_byte_map)
558
self.assertEqual([(None, make_key(26)), (make_key(31), make_key(48))],
574
559
index._parsed_key_map)
575
560
self.assertEqual([((index._size // 2, ('30', )), -1)],
578
563
def test_lookup_key_above_probed_area(self):
579
564
# generate a big enough index that we only read some of it on a typical
580
565
# bisection lookup.
581
index = self.make_index(nodes=self.make_nodes(64))
567
def make_key(number):
568
return (str(number) + 'X'*100,)
569
for counter in range(64):
570
nodes.append((make_key(counter), 'Y'*100, ()))
571
index = self.make_index(nodes=nodes)
582
572
# ask for the key in the middle, but a key that is located in the
583
573
# unparsed region after the middle.
584
574
result =index._lookup_keys_via_location(
585
575
[(index._size // 2, ('50', ))])
586
576
# check the parse map, this determines the test validity
587
self.assertEqual([(0, 4008), (5046, 8996)], index._parsed_byte_map)
588
self.assertEqual([(None, self.make_key(26)),
589
(self.make_key(31), self.make_key(48))],
577
self.assertEqual([(0, 3972), (5001, 8914)], index._parsed_byte_map)
578
self.assertEqual([(None, make_key(26)), (make_key(31), make_key(48))],
590
579
index._parsed_key_map)
591
580
self.assertEqual([((index._size // 2, ('50', )), +1)],
595
584
# generate a big enough index that we only read some of it on a typical
596
585
# bisection lookup.
598
for counter in range(99):
599
nodes.append((self.make_key(counter), self.make_value(counter),
600
((self.make_key(counter + 20),),) ))
601
index = self.make_index(ref_lists=1, nodes=nodes)
602
# lookup a key in the middle that does not exist, so that when we can
603
# check that the referred-to-keys are not accessed automatically.
604
index_size = index._size
605
index_center = index_size // 2
606
result = index._lookup_keys_via_location(
607
[(index_center, ('40', ))])
608
# check the parse map - only the start and middle should have been
610
self.assertEqual([(0, 4027), (10198, 14028)], index._parsed_byte_map)
611
self.assertEqual([(None, self.make_key(17)),
612
(self.make_key(44), self.make_key(5))],
613
index._parsed_key_map)
614
# and check the transport activity likewise.
616
[('readv', 'index', [(index_center, 800), (0, 200)], True,
618
index._transport._activity)
619
# reset the transport log for testing the reference lookup
620
del index._transport._activity[:]
621
# now looking up a key in the portion of the file already parsed should
622
# only perform IO to resolve its key references.
623
result = index._lookup_keys_via_location([(11000, self.make_key(45))])
625
[((11000, self.make_key(45)),
626
(index, self.make_key(45), self.make_value(45),
627
((self.make_key(65),),)))],
629
self.assertEqual([('readv', 'index', [(15093, 800)], True, index_size)],
630
index._transport._activity)
632
def test_lookup_key_can_buffer_all(self):
587
def make_key(number):
588
return (str(number) + 'X'*100,)
589
def make_value(number):
590
return str(number) + 'Y'*100
634
591
for counter in range(64):
635
nodes.append((self.make_key(counter), self.make_value(counter),
636
((self.make_key(counter + 20),),) ))
592
nodes.append((make_key(counter), make_value(counter),
593
((make_key(counter + 20),),) ))
637
594
index = self.make_index(ref_lists=1, nodes=nodes)
638
595
# lookup a key in the middle that does not exist, so that when we can
639
596
# check that the referred-to-keys are not accessed automatically.
640
index_size = index._size
641
index_center = index_size // 2
642
result = index._lookup_keys_via_location([(index_center, ('40', ))])
597
result =index._lookup_keys_via_location(
598
[(index._size // 2, ('40', ))])
643
599
# check the parse map - only the start and middle should have been
645
601
self.assertEqual([(0, 3890), (6444, 10274)], index._parsed_byte_map)
646
self.assertEqual([(None, self.make_key(25)),
647
(self.make_key(37), self.make_key(52))],
602
self.assertEqual([(None, make_key(25)), (make_key(37), make_key(52))],
648
603
index._parsed_key_map)
649
604
# and check the transport activity likewise.
650
605
self.assertEqual(
651
[('readv', 'index', [(index_center, 800), (0, 200)], True,
606
[('readv', 'index', [(7906, 800), (0, 200)], True, 15813)],
653
607
index._transport._activity)
654
608
# reset the transport log for testing the reference lookup
655
609
del index._transport._activity[:]
656
610
# now looking up a key in the portion of the file already parsed should
657
611
# only perform IO to resolve its key references.
658
result = index._lookup_keys_via_location([(7000, self.make_key(40))])
612
result = index._lookup_keys_via_location([(4000, make_key(40))])
659
613
self.assertEqual(
660
[((7000, self.make_key(40)),
661
(index, self.make_key(40), self.make_value(40),
662
((self.make_key(60),),)))],
614
[((4000, make_key(40)),
615
(index, make_key(40), make_value(40), ((make_key(60),),)))],
664
# Resolving the references would have required more data read, and we
665
# are already above the 50% threshold, so it triggered a _buffer_all
666
self.assertEqual([('get', 'index')], index._transport._activity)
617
self.assertEqual([('readv', 'index', [(11976, 800)], True, 15813)],
618
index._transport._activity)
668
620
def test_iter_all_entries_empty(self):
669
621
index = self.make_index()
688
640
(index, ('ref', ), 'refdata', ((), ))]),
689
641
set(index.iter_all_entries()))
691
def test_iter_entries_buffers_once(self):
692
index = self.make_index(nodes=self.make_nodes(2))
693
# reset the transport log
694
del index._transport._activity[:]
695
self.assertEqual(set([(index, self.make_key(1), self.make_value(1))]),
696
set(index.iter_entries([self.make_key(1)])))
697
# We should have requested reading the header bytes
698
# But not needed any more than that because it would have triggered a
701
('readv', 'index', [(0, 200)], True, index._size),
703
index._transport._activity)
704
# And that should have been enough to trigger reading the whole index
706
self.assertIsNot(None, index._nodes)
708
def test_iter_entries_buffers_by_bytes_read(self):
709
index = self.make_index(nodes=self.make_nodes(64))
710
list(index.iter_entries([self.make_key(10)]))
711
# The first time through isn't enough to trigger a buffer all
712
self.assertIs(None, index._nodes)
713
self.assertEqual(4096, index._bytes_read)
714
# Grabbing a key in that same page won't trigger a buffer all, as we
715
# still haven't read 50% of the file
716
list(index.iter_entries([self.make_key(11)]))
717
self.assertIs(None, index._nodes)
718
self.assertEqual(4096, index._bytes_read)
719
# We haven't read more data, so reading outside the range won't trigger
720
# a buffer all right away
721
list(index.iter_entries([self.make_key(40)]))
722
self.assertIs(None, index._nodes)
723
self.assertEqual(8192, index._bytes_read)
724
# On the next pass, we will not trigger buffer all if the key is
725
# available without reading more
726
list(index.iter_entries([self.make_key(32)]))
727
self.assertIs(None, index._nodes)
728
# But if we *would* need to read more to resolve it, then we will
730
list(index.iter_entries([self.make_key(60)]))
731
self.assertIsNot(None, index._nodes)
733
643
def test_iter_entries_references_resolved(self):
734
644
index = self.make_index(1, nodes=[
735
645
(('name', ), 'data', ([('ref', ), ('ref', )], )),
853
763
(('name', ), '', ()), (('foo', ), '', ())])
854
764
self.assertEqual(2, index.key_count())
856
def test_read_and_parse_tracks_real_read_value(self):
857
index = self.make_index(nodes=self.make_nodes(10))
858
del index._transport._activity[:]
859
index._read_and_parse([(0, 200)])
861
('readv', 'index', [(0, 200)], True, index._size),
863
index._transport._activity)
864
# The readv expansion code will expand the initial request to 4096
865
# bytes, which is more than enough to read the entire index, and we
866
# will track the fact that we read that many bytes.
867
self.assertEqual(index._size, index._bytes_read)
869
def test_read_and_parse_triggers_buffer_all(self):
870
index = self.make_index(key_elements=2, nodes=[
871
(('name', 'fin1'), 'data', ()),
872
(('name', 'fin2'), 'beta', ()),
873
(('ref', 'erence'), 'refdata', ())])
874
self.assertTrue(index._size > 0)
875
self.assertIs(None, index._nodes)
876
index._read_and_parse([(0, index._size)])
877
self.assertIsNot(None, index._nodes)
879
766
def test_validate_bad_index_errors(self):
880
767
trans = self.get_transport()
881
768
trans.put_bytes('name', "not an index\n")