406
437
block._z_content = z_content
407
438
block._z_content_length = len(z_content)
408
439
block._compressor_name = 'zlib'
409
block._content_length = None # Don't tell the decompressed length
440
block._content_length = 158634
410
441
self.assertIs(None, block._content)
411
block._ensure_content(100)
412
self.assertIsNot(None, block._content)
413
# We have decompressed at least 100 bytes
414
self.assertTrue(len(block._content) >= 100)
415
# We have not decompressed the whole content
416
self.assertTrue(len(block._content) < 158634)
417
self.assertEqualDiff(content[:len(block._content)], block._content)
418
# ensuring content that we already have shouldn't cause any more data
420
cur_len = len(block._content)
421
block._ensure_content(cur_len - 10)
422
self.assertEqual(cur_len, len(block._content))
423
# Now we want a bit more content
425
block._ensure_content(cur_len)
426
self.assertTrue(len(block._content) >= cur_len)
427
self.assertTrue(len(block._content) < 158634)
428
self.assertEqualDiff(content[:len(block._content)], block._content)
429
# And now lets finish
430
block._ensure_content()
442
# The first _ensure_content got all of the required data
443
block._ensure_content(158634)
431
444
self.assertEqualDiff(content, block._content)
432
# And the decompressor is finalized
445
# And we should have released the _z_content_decompressor since it was
433
447
self.assertIs(None, block._z_content_decompressor)
436
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
449
def test__dump(self):
450
dup_content = 'some duplicate content\nwhich is sufficiently long\n'
451
key_to_text = {('1',): dup_content + '1 unique\n',
452
('2',): dup_content + '2 extra special\n'}
453
locs, block = self.make_block(key_to_text)
454
self.assertEqual([('f', len(key_to_text[('1',)])),
455
('d', 21, len(key_to_text[('2',)]),
456
[('c', 2, len(dup_content)),
457
('i', len('2 extra special\n'), '')
462
class TestCaseWithGroupCompressVersionedFiles(
463
tests.TestCaseWithMemoryTransport):
438
465
def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
466
dir='.', inconsistency_fatal=True):
440
467
t = self.get_transport(dir)
442
469
vf = groupcompress.make_pack_factory(graph=create_graph,
443
delta=False, keylength=keylength)(t)
470
delta=False, keylength=keylength,
471
inconsistency_fatal=inconsistency_fatal)(t)
445
473
self.addCleanup(groupcompress.cleanup_pack_group, vf)
580
669
self.assertIs(block, record._manager._block)
671
def test_add_missing_noncompression_parent_unvalidated_index(self):
672
unvalidated = self.make_g_index_missing_parent()
673
combined = _mod_index.CombinedGraphIndex([unvalidated])
674
index = groupcompress._GCGraphIndex(combined,
675
is_locked=lambda: True, parents=True,
676
track_external_parent_refs=True)
677
index.scan_unvalidated_index(unvalidated)
679
frozenset([('missing-parent',)]), index.get_missing_parents())
681
def test_track_external_parent_refs(self):
682
g_index = self.make_g_index('empty', 1, [])
683
mod_index = btree_index.BTreeBuilder(1, 1)
684
combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
685
index = groupcompress._GCGraphIndex(combined,
686
is_locked=lambda: True, parents=True,
687
add_callback=mod_index.add_nodes,
688
track_external_parent_refs=True)
690
(('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
692
frozenset([('parent-1',), ('parent-2',)]),
693
index.get_missing_parents())
695
def make_source_with_b(self, a_parent, path):
696
source = self.make_test_vf(True, dir=path)
697
source.add_lines(('a',), (), ['lines\n'])
699
b_parents = (('a',),)
702
source.add_lines(('b',), b_parents, ['lines\n'])
705
def do_inconsistent_inserts(self, inconsistency_fatal):
706
target = self.make_test_vf(True, dir='target',
707
inconsistency_fatal=inconsistency_fatal)
709
source = self.make_source_with_b(x==1, 'source%s' % x)
710
target.insert_record_stream(source.get_record_stream(
711
[('b',)], 'unordered', False))
713
def test_inconsistent_redundant_inserts_warn(self):
714
"""Should not insert a record that is already present."""
716
def warning(template, args):
717
warnings.append(template % args)
718
_trace_warning = trace.warning
719
trace.warning = warning
721
self.do_inconsistent_inserts(inconsistency_fatal=False)
723
trace.warning = _trace_warning
724
self.assertEqual(["inconsistent details in skipped record: ('b',)"
725
" ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
728
def test_inconsistent_redundant_inserts_raises(self):
729
e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
730
inconsistency_fatal=True)
731
self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
733
" \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
734
" 0 8', \(\(\('a',\),\),\)\)")
736
def test_clear_cache(self):
737
vf = self.make_source_with_b(True, 'source')
739
for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
742
self.assertTrue(len(vf._group_cache) > 0)
744
self.assertEqual(0, len(vf._group_cache))
748
class StubGCVF(object):
749
def __init__(self, canned_get_blocks=None):
750
self._group_cache = {}
751
self._canned_get_blocks = canned_get_blocks or []
752
def _get_blocks(self, read_memos):
753
return iter(self._canned_get_blocks)
756
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
757
"""Simple whitebox unit tests for _BatchingBlockFetcher."""
759
def test_add_key_new_read_memo(self):
760
"""Adding a key with an uncached read_memo new to this batch adds that
761
read_memo to the list of memos to fetch.
763
# locations are: index_memo, ignored, parents, ignored
764
# where index_memo is: (idx, offset, len, factory_start, factory_end)
765
# and (idx, offset, size) is known as the 'read_memo', identifying the
767
read_memo = ('fake index', 100, 50)
769
('key',): (read_memo + (None, None), None, None, None)}
770
batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
771
total_size = batcher.add_key(('key',))
772
self.assertEqual(50, total_size)
773
self.assertEqual([('key',)], batcher.keys)
774
self.assertEqual([read_memo], batcher.memos_to_get)
776
def test_add_key_duplicate_read_memo(self):
777
"""read_memos that occur multiple times in a batch will only be fetched
780
read_memo = ('fake index', 100, 50)
781
# Two keys, both sharing the same read memo (but different overall
784
('key1',): (read_memo + (0, 1), None, None, None),
785
('key2',): (read_memo + (1, 2), None, None, None)}
786
batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
787
total_size = batcher.add_key(('key1',))
788
total_size = batcher.add_key(('key2',))
789
self.assertEqual(50, total_size)
790
self.assertEqual([('key1',), ('key2',)], batcher.keys)
791
self.assertEqual([read_memo], batcher.memos_to_get)
793
def test_add_key_cached_read_memo(self):
794
"""Adding a key with a cached read_memo will not cause that read_memo
795
to be added to the list to fetch.
797
read_memo = ('fake index', 100, 50)
799
gcvf._group_cache[read_memo] = 'fake block'
801
('key',): (read_memo + (None, None), None, None, None)}
802
batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
803
total_size = batcher.add_key(('key',))
804
self.assertEqual(0, total_size)
805
self.assertEqual([('key',)], batcher.keys)
806
self.assertEqual([], batcher.memos_to_get)
808
def test_yield_factories_empty(self):
809
"""An empty batch yields no factories."""
810
batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
811
self.assertEqual([], list(batcher.yield_factories()))
813
def test_yield_factories_calls_get_blocks(self):
814
"""Uncached memos are retrieved via get_blocks."""
815
read_memo1 = ('fake index', 100, 50)
816
read_memo2 = ('fake index', 150, 40)
819
(read_memo1, groupcompress.GroupCompressBlock()),
820
(read_memo2, groupcompress.GroupCompressBlock())])
822
('key1',): (read_memo1 + (None, None), None, None, None),
823
('key2',): (read_memo2 + (None, None), None, None, None)}
824
batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
825
batcher.add_key(('key1',))
826
batcher.add_key(('key2',))
827
factories = list(batcher.yield_factories(full_flush=True))
828
self.assertLength(2, factories)
829
keys = [f.key for f in factories]
830
kinds = [f.storage_kind for f in factories]
831
self.assertEqual([('key1',), ('key2',)], keys)
832
self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
834
def test_yield_factories_flushing(self):
835
"""yield_factories holds back on yielding results from the final block
836
unless passed full_flush=True.
838
fake_block = groupcompress.GroupCompressBlock()
839
read_memo = ('fake index', 100, 50)
841
gcvf._group_cache[read_memo] = fake_block
843
('key',): (read_memo + (None, None), None, None, None)}
844
batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
845
batcher.add_key(('key',))
846
self.assertEqual([], list(batcher.yield_factories()))
847
factories = list(batcher.yield_factories(full_flush=True))
848
self.assertLength(1, factories)
849
self.assertEqual(('key',), factories[0].key)
850
self.assertEqual('groupcompress-block', factories[0].storage_kind)
583
853
class TestLazyGroupCompress(tests.TestCaseWithTransport):
586
856
('key1',): "this is a text\n"
587
"with a reasonable amount of compressible bytes\n",
857
"with a reasonable amount of compressible bytes\n"
858
"which can be shared between various other texts\n",
588
859
('key2',): "another text\n"
589
"with a reasonable amount of compressible bytes\n",
860
"with a reasonable amount of compressible bytes\n"
861
"which can be shared between various other texts\n",
590
862
('key3',): "yet another text which won't be extracted\n"
591
"with a reasonable amount of compressible bytes\n",
863
"with a reasonable amount of compressible bytes\n"
864
"which can be shared between various other texts\n",
592
865
('key4',): "this will be extracted\n"
593
866
"but references most of its bytes from\n"
594
867
"yet another text which won't be extracted\n"
595
"with a reasonable amount of compressible bytes\n",
868
"with a reasonable amount of compressible bytes\n"
869
"which can be shared between various other texts\n",
597
871
def make_block(self, key_to_text):
598
872
"""Create a GroupCompressBlock, filling it with the given texts."""
744
1019
self.assertEqual(('key4',), record.key)
745
1020
self.assertEqual(self._texts[record.key],
746
1021
record.get_bytes_as('fulltext'))
1023
def test_check_is_well_utilized_all_keys(self):
1024
block, manager = self.make_block_and_full_manager(self._texts)
1025
self.assertFalse(manager.check_is_well_utilized())
1026
# Though we can fake it by changing the recommended minimum size
1027
manager._full_enough_block_size = block._content_length
1028
self.assertTrue(manager.check_is_well_utilized())
1029
# Setting it just above causes it to fail
1030
manager._full_enough_block_size = block._content_length + 1
1031
self.assertFalse(manager.check_is_well_utilized())
1032
# Setting the mixed-block size doesn't do anything, because the content
1033
# is considered to not be 'mixed'
1034
manager._full_enough_mixed_block_size = block._content_length
1035
self.assertFalse(manager.check_is_well_utilized())
1037
def test_check_is_well_utilized_mixed_keys(self):
1043
texts[f1k1] = self._texts[('key1',)]
1044
texts[f1k2] = self._texts[('key2',)]
1045
texts[f2k1] = self._texts[('key3',)]
1046
texts[f2k2] = self._texts[('key4',)]
1047
block, manager = self.make_block_and_full_manager(texts)
1048
self.assertFalse(manager.check_is_well_utilized())
1049
manager._full_enough_block_size = block._content_length
1050
self.assertTrue(manager.check_is_well_utilized())
1051
manager._full_enough_block_size = block._content_length + 1
1052
self.assertFalse(manager.check_is_well_utilized())
1053
manager._full_enough_mixed_block_size = block._content_length
1054
self.assertTrue(manager.check_is_well_utilized())
1056
def test_check_is_well_utilized_partial_use(self):
1057
locations, block = self.make_block(self._texts)
1058
manager = groupcompress._LazyGroupContentManager(block)
1059
manager._full_enough_block_size = block._content_length
1060
self.add_key_to_manager(('key1',), locations, block, manager)
1061
self.add_key_to_manager(('key2',), locations, block, manager)
1062
# Just using the content from key1 and 2 is not enough to be considered
1064
self.assertFalse(manager.check_is_well_utilized())
1065
# However if we add key3, then we have enough, as we only require 75%
1067
self.add_key_to_manager(('key4',), locations, block, manager)
1068
self.assertTrue(manager.check_is_well_utilized())
1071
class Test_GCBuildDetails(tests.TestCase):
1073
def test_acts_like_tuple(self):
1074
# _GCBuildDetails inlines some of the data that used to be spread out
1075
# across a bunch of tuples
1076
bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1077
('INDEX', 10, 20, 0, 5))
1078
self.assertEqual(4, len(bd))
1079
self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
1080
self.assertEqual(None, bd[1]) # Compression Parent is always None
1081
self.assertEqual((('parent1',), ('parent2',)), bd[2])
1082
self.assertEqual(('group', None), bd[3]) # Record details
1084
def test__repr__(self):
1085
bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1086
('INDEX', 10, 20, 0, 5))
1087
self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
1088
" (('parent1',), ('parent2',)))",