736
649
frozenset([('parent-1',), ('parent-2',)]),
737
650
index.get_missing_parents())
739
def make_source_with_b(self, a_parent, path):
740
source = self.make_test_vf(True, dir=path)
741
source.add_lines(('a',), (), ['lines\n'])
743
b_parents = (('a',),)
746
source.add_lines(('b',), b_parents, ['lines\n'])
749
def do_inconsistent_inserts(self, inconsistency_fatal):
750
target = self.make_test_vf(True, dir='target',
751
inconsistency_fatal=inconsistency_fatal)
753
source = self.make_source_with_b(x==1, 'source%s' % x)
754
target.insert_record_stream(source.get_record_stream(
755
[('b',)], 'unordered', False))
757
def test_inconsistent_redundant_inserts_warn(self):
758
"""Should not insert a record that is already present."""
760
def warning(template, args):
761
warnings.append(template % args)
762
_trace_warning = trace.warning
763
trace.warning = warning
765
self.do_inconsistent_inserts(inconsistency_fatal=False)
767
trace.warning = _trace_warning
768
self.assertEqual(["inconsistent details in skipped record: ('b',)"
769
" ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
772
def test_inconsistent_redundant_inserts_raises(self):
773
e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
774
inconsistency_fatal=True)
775
self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
777
" \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
778
" 0 8', \(\(\('a',\),\),\)\)")
780
def test_clear_cache(self):
781
vf = self.make_source_with_b(True, 'source')
783
for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
786
self.assertTrue(len(vf._group_cache) > 0)
788
self.assertEqual(0, len(vf._group_cache))
791
class TestGroupCompressConfig(tests.TestCaseWithTransport):
793
def make_test_vf(self):
794
t = self.get_transport('.')
796
factory = groupcompress.make_pack_factory(graph=True,
797
delta=False, keylength=1, inconsistency_fatal=True)
799
self.addCleanup(groupcompress.cleanup_pack_group, vf)
802
def test_max_bytes_to_index_default(self):
803
vf = self.make_test_vf()
804
gc = vf._make_group_compressor()
805
self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
806
vf._max_bytes_to_index)
807
if isinstance(gc, groupcompress.PyrexGroupCompressor):
808
self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
809
gc._delta_index._max_bytes_to_index)
811
def test_max_bytes_to_index_in_config(self):
812
c = config.GlobalConfig()
813
c.set_user_option('bzr.groupcompress.max_bytes_to_index', '10000')
814
vf = self.make_test_vf()
815
gc = vf._make_group_compressor()
816
self.assertEqual(10000, vf._max_bytes_to_index)
817
if isinstance(gc, groupcompress.PyrexGroupCompressor):
818
self.assertEqual(10000, gc._delta_index._max_bytes_to_index)
820
def test_max_bytes_to_index_bad_config(self):
821
c = config.GlobalConfig()
822
c.set_user_option('bzr.groupcompress.max_bytes_to_index', 'boogah')
823
vf = self.make_test_vf()
824
# TODO: This is triggering a warning, we might want to trap and make
825
# sure it is readable.
826
gc = vf._make_group_compressor()
827
self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
828
vf._max_bytes_to_index)
829
if isinstance(gc, groupcompress.PyrexGroupCompressor):
830
self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
831
gc._delta_index._max_bytes_to_index)
834
class StubGCVF(object):
835
def __init__(self, canned_get_blocks=None):
836
self._group_cache = {}
837
self._canned_get_blocks = canned_get_blocks or []
838
def _get_blocks(self, read_memos):
839
return iter(self._canned_get_blocks)
842
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
843
"""Simple whitebox unit tests for _BatchingBlockFetcher."""
845
def test_add_key_new_read_memo(self):
846
"""Adding a key with an uncached read_memo new to this batch adds that
847
read_memo to the list of memos to fetch.
849
# locations are: index_memo, ignored, parents, ignored
850
# where index_memo is: (idx, offset, len, factory_start, factory_end)
851
# and (idx, offset, size) is known as the 'read_memo', identifying the
853
read_memo = ('fake index', 100, 50)
855
('key',): (read_memo + (None, None), None, None, None)}
856
batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
857
total_size = batcher.add_key(('key',))
858
self.assertEqual(50, total_size)
859
self.assertEqual([('key',)], batcher.keys)
860
self.assertEqual([read_memo], batcher.memos_to_get)
862
def test_add_key_duplicate_read_memo(self):
863
"""read_memos that occur multiple times in a batch will only be fetched
866
read_memo = ('fake index', 100, 50)
867
# Two keys, both sharing the same read memo (but different overall
870
('key1',): (read_memo + (0, 1), None, None, None),
871
('key2',): (read_memo + (1, 2), None, None, None)}
872
batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
873
total_size = batcher.add_key(('key1',))
874
total_size = batcher.add_key(('key2',))
875
self.assertEqual(50, total_size)
876
self.assertEqual([('key1',), ('key2',)], batcher.keys)
877
self.assertEqual([read_memo], batcher.memos_to_get)
879
def test_add_key_cached_read_memo(self):
880
"""Adding a key with a cached read_memo will not cause that read_memo
881
to be added to the list to fetch.
883
read_memo = ('fake index', 100, 50)
885
gcvf._group_cache[read_memo] = 'fake block'
887
('key',): (read_memo + (None, None), None, None, None)}
888
batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
889
total_size = batcher.add_key(('key',))
890
self.assertEqual(0, total_size)
891
self.assertEqual([('key',)], batcher.keys)
892
self.assertEqual([], batcher.memos_to_get)
894
def test_yield_factories_empty(self):
895
"""An empty batch yields no factories."""
896
batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
897
self.assertEqual([], list(batcher.yield_factories()))
899
def test_yield_factories_calls_get_blocks(self):
900
"""Uncached memos are retrieved via get_blocks."""
901
read_memo1 = ('fake index', 100, 50)
902
read_memo2 = ('fake index', 150, 40)
905
(read_memo1, groupcompress.GroupCompressBlock()),
906
(read_memo2, groupcompress.GroupCompressBlock())])
908
('key1',): (read_memo1 + (None, None), None, None, None),
909
('key2',): (read_memo2 + (None, None), None, None, None)}
910
batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
911
batcher.add_key(('key1',))
912
batcher.add_key(('key2',))
913
factories = list(batcher.yield_factories(full_flush=True))
914
self.assertLength(2, factories)
915
keys = [f.key for f in factories]
916
kinds = [f.storage_kind for f in factories]
917
self.assertEqual([('key1',), ('key2',)], keys)
918
self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
920
def test_yield_factories_flushing(self):
921
"""yield_factories holds back on yielding results from the final block
922
unless passed full_flush=True.
924
fake_block = groupcompress.GroupCompressBlock()
925
read_memo = ('fake index', 100, 50)
927
gcvf._group_cache[read_memo] = fake_block
929
('key',): (read_memo + (None, None), None, None, None)}
930
batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
931
batcher.add_key(('key',))
932
self.assertEqual([], list(batcher.yield_factories()))
933
factories = list(batcher.yield_factories(full_flush=True))
934
self.assertLength(1, factories)
935
self.assertEqual(('key',), factories[0].key)
936
self.assertEqual('groupcompress-block', factories[0].storage_kind)
939
653
class TestLazyGroupCompress(tests.TestCaseWithTransport):
942
656
('key1',): "this is a text\n"
943
"with a reasonable amount of compressible bytes\n"
944
"which can be shared between various other texts\n",
657
"with a reasonable amount of compressible bytes\n",
945
658
('key2',): "another text\n"
946
"with a reasonable amount of compressible bytes\n"
947
"which can be shared between various other texts\n",
659
"with a reasonable amount of compressible bytes\n",
948
660
('key3',): "yet another text which won't be extracted\n"
949
"with a reasonable amount of compressible bytes\n"
950
"which can be shared between various other texts\n",
661
"with a reasonable amount of compressible bytes\n",
951
662
('key4',): "this will be extracted\n"
952
663
"but references most of its bytes from\n"
953
664
"yet another text which won't be extracted\n"
954
"with a reasonable amount of compressible bytes\n"
955
"which can be shared between various other texts\n",
665
"with a reasonable amount of compressible bytes\n",
957
667
def make_block(self, key_to_text):
958
668
"""Create a GroupCompressBlock, filling it with the given texts."""
1105
814
self.assertEqual(('key4',), record.key)
1106
815
self.assertEqual(self._texts[record.key],
1107
816
record.get_bytes_as('fulltext'))
1109
def test_manager_default_compressor_settings(self):
1110
locations, old_block = self.make_block(self._texts)
1111
manager = groupcompress._LazyGroupContentManager(old_block)
1112
gcvf = groupcompress.GroupCompressVersionedFiles
1113
# It doesn't greedily evaluate _max_bytes_to_index
1114
self.assertIs(None, manager._compressor_settings)
1115
self.assertEqual(gcvf._DEFAULT_COMPRESSOR_SETTINGS,
1116
manager._get_compressor_settings())
1118
def test_manager_custom_compressor_settings(self):
1119
locations, old_block = self.make_block(self._texts)
1121
def compressor_settings():
1122
called.append('called')
1124
manager = groupcompress._LazyGroupContentManager(old_block,
1125
get_compressor_settings=compressor_settings)
1126
gcvf = groupcompress.GroupCompressVersionedFiles
1127
# It doesn't greedily evaluate compressor_settings
1128
self.assertIs(None, manager._compressor_settings)
1129
self.assertEqual((10,), manager._get_compressor_settings())
1130
self.assertEqual((10,), manager._get_compressor_settings())
1131
self.assertEqual((10,), manager._compressor_settings)
1132
# Only called 1 time
1133
self.assertEqual(['called'], called)
1135
def test__rebuild_handles_compressor_settings(self):
1136
if not isinstance(groupcompress.GroupCompressor,
1137
groupcompress.PyrexGroupCompressor):
1138
raise tests.TestNotApplicable('pure-python compressor'
1139
' does not handle compressor_settings')
1140
locations, old_block = self.make_block(self._texts)
1141
manager = groupcompress._LazyGroupContentManager(old_block,
1142
get_compressor_settings=lambda: dict(max_bytes_to_index=32))
1143
gc = manager._make_group_compressor()
1144
self.assertEqual(32, gc._delta_index._max_bytes_to_index)
1145
self.add_key_to_manager(('key3',), locations, old_block, manager)
1146
self.add_key_to_manager(('key4',), locations, old_block, manager)
1147
action, last_byte, total_bytes = manager._check_rebuild_action()
1148
self.assertEqual('rebuild', action)
1149
manager._rebuild_block()
1150
new_block = manager._block
1151
self.assertIsNot(old_block, new_block)
1152
# Because of the new max_bytes_to_index, we do a poor job of
1153
# rebuilding. This is a side-effect of the change, but at least it does
1154
# show the setting had an effect.
1155
self.assertTrue(old_block._content_length < new_block._content_length)
1157
def test_check_is_well_utilized_all_keys(self):
1158
block, manager = self.make_block_and_full_manager(self._texts)
1159
self.assertFalse(manager.check_is_well_utilized())
1160
# Though we can fake it by changing the recommended minimum size
1161
manager._full_enough_block_size = block._content_length
1162
self.assertTrue(manager.check_is_well_utilized())
1163
# Setting it just above causes it to fail
1164
manager._full_enough_block_size = block._content_length + 1
1165
self.assertFalse(manager.check_is_well_utilized())
1166
# Setting the mixed-block size doesn't do anything, because the content
1167
# is considered to not be 'mixed'
1168
manager._full_enough_mixed_block_size = block._content_length
1169
self.assertFalse(manager.check_is_well_utilized())
1171
def test_check_is_well_utilized_mixed_keys(self):
1177
texts[f1k1] = self._texts[('key1',)]
1178
texts[f1k2] = self._texts[('key2',)]
1179
texts[f2k1] = self._texts[('key3',)]
1180
texts[f2k2] = self._texts[('key4',)]
1181
block, manager = self.make_block_and_full_manager(texts)
1182
self.assertFalse(manager.check_is_well_utilized())
1183
manager._full_enough_block_size = block._content_length
1184
self.assertTrue(manager.check_is_well_utilized())
1185
manager._full_enough_block_size = block._content_length + 1
1186
self.assertFalse(manager.check_is_well_utilized())
1187
manager._full_enough_mixed_block_size = block._content_length
1188
self.assertTrue(manager.check_is_well_utilized())
1190
def test_check_is_well_utilized_partial_use(self):
1191
locations, block = self.make_block(self._texts)
1192
manager = groupcompress._LazyGroupContentManager(block)
1193
manager._full_enough_block_size = block._content_length
1194
self.add_key_to_manager(('key1',), locations, block, manager)
1195
self.add_key_to_manager(('key2',), locations, block, manager)
1196
# Just using the content from key1 and 2 is not enough to be considered
1198
self.assertFalse(manager.check_is_well_utilized())
1199
# However if we add key3, then we have enough, as we only require 75%
1201
self.add_key_to_manager(('key4',), locations, block, manager)
1202
self.assertTrue(manager.check_is_well_utilized())
1205
class Test_GCBuildDetails(tests.TestCase):
1207
def test_acts_like_tuple(self):
1208
# _GCBuildDetails inlines some of the data that used to be spread out
1209
# across a bunch of tuples
1210
bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1211
('INDEX', 10, 20, 0, 5))
1212
self.assertEqual(4, len(bd))
1213
self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
1214
self.assertEqual(None, bd[1]) # Compression Parent is always None
1215
self.assertEqual((('parent1',), ('parent2',)), bd[2])
1216
self.assertEqual(('group', None), bd[3]) # Record details
1218
def test__repr__(self):
1219
bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1220
('INDEX', 10, 20, 0, 5))
1221
self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
1222
" (('parent1',), ('parent2',)))",