594
600
record._manager._block._z_content)
595
601
self.assertEqual(8, num_records)
603
def test_insert_record_stream_packs_on_the_fly(self):
604
vf = self.make_test_vf(True, dir='source')
605
def grouped_stream(revision_ids, first_parents=()):
606
parents = first_parents
607
for revision_id in revision_ids:
609
record = versionedfile.FulltextContentFactory(
611
'some content that is\n'
612
'identical except for\n'
613
'revision_id:%s\n' % (revision_id,))
617
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
619
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
620
first_parents=(('d',),)))
621
# Now copy the blocks into another vf, and see that the
622
# insert_record_stream rebuilt a new block on-the-fly because of
624
vf2 = self.make_test_vf(True, dir='target')
625
vf2.insert_record_stream(vf.get_record_stream(
626
[(r,) for r in 'abcdefgh'], 'groupcompress', False))
627
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
628
'groupcompress', False)
631
# All of the records should be recombined into a single block
633
for record in stream:
636
block = record._manager._block
638
self.assertIs(block, record._manager._block)
639
self.assertEqual(8, num_records)
597
641
def test__insert_record_stream_no_reuse_block(self):
598
642
vf = self.make_test_vf(True, dir='source')
599
643
def grouped_stream(revision_ids, first_parents=()):
702
746
" 0 8', \(\(\('a',\),\),\)\)")
749
class StubGCVF(object):
750
def __init__(self, canned_get_blocks=None):
751
self._group_cache = {}
752
self._canned_get_blocks = canned_get_blocks or []
753
def _get_blocks(self, read_memos):
754
return iter(self._canned_get_blocks)
757
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
758
"""Simple whitebox unit tests for _BatchingBlockFetcher."""
760
def test_add_key_new_read_memo(self):
761
"""Adding a key with an uncached read_memo new to this batch adds that
762
read_memo to the list of memos to fetch.
764
# locations are: index_memo, ignored, parents, ignored
765
# where index_memo is: (idx, offset, len, factory_start, factory_end)
766
# and (idx, offset, size) is known as the 'read_memo', identifying the
768
read_memo = ('fake index', 100, 50)
770
('key',): (read_memo + (None, None), None, None, None)}
771
batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
772
total_size = batcher.add_key(('key',))
773
self.assertEqual(50, total_size)
774
self.assertEqual([('key',)], batcher.keys)
775
self.assertEqual([read_memo], batcher.memos_to_get)
777
def test_add_key_duplicate_read_memo(self):
778
"""read_memos that occur multiple times in a batch will only be fetched
781
read_memo = ('fake index', 100, 50)
782
# Two keys, both sharing the same read memo (but different overall
785
('key1',): (read_memo + (0, 1), None, None, None),
786
('key2',): (read_memo + (1, 2), None, None, None)}
787
batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
788
total_size = batcher.add_key(('key1',))
789
total_size = batcher.add_key(('key2',))
790
self.assertEqual(50, total_size)
791
self.assertEqual([('key1',), ('key2',)], batcher.keys)
792
self.assertEqual([read_memo], batcher.memos_to_get)
794
def test_add_key_cached_read_memo(self):
795
"""Adding a key with a cached read_memo will not cause that read_memo
796
to be added to the list to fetch.
798
read_memo = ('fake index', 100, 50)
800
gcvf._group_cache[read_memo] = 'fake block'
802
('key',): (read_memo + (None, None), None, None, None)}
803
batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
804
total_size = batcher.add_key(('key',))
805
self.assertEqual(0, total_size)
806
self.assertEqual([('key',)], batcher.keys)
807
self.assertEqual([], batcher.memos_to_get)
809
def test_yield_factories_empty(self):
810
"""An empty batch yields no factories."""
811
batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
812
self.assertEqual([], list(batcher.yield_factories()))
814
def test_yield_factories_calls_get_blocks(self):
815
"""Uncached memos are retrieved via get_blocks."""
816
read_memo1 = ('fake index', 100, 50)
817
read_memo2 = ('fake index', 150, 40)
820
(read_memo1, groupcompress.GroupCompressBlock()),
821
(read_memo2, groupcompress.GroupCompressBlock())])
823
('key1',): (read_memo1 + (None, None), None, None, None),
824
('key2',): (read_memo2 + (None, None), None, None, None)}
825
batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
826
batcher.add_key(('key1',))
827
batcher.add_key(('key2',))
828
factories = list(batcher.yield_factories(full_flush=True))
829
self.assertLength(2, factories)
830
keys = [f.key for f in factories]
831
kinds = [f.storage_kind for f in factories]
832
self.assertEqual([('key1',), ('key2',)], keys)
833
self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
835
def test_yield_factories_flushing(self):
836
"""yield_factories holds back on yielding results from the final block
837
unless passed full_flush=True.
839
fake_block = groupcompress.GroupCompressBlock()
840
read_memo = ('fake index', 100, 50)
842
gcvf._group_cache[read_memo] = fake_block
844
('key',): (read_memo + (None, None), None, None, None)}
845
batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
846
batcher.add_key(('key',))
847
self.assertEqual([], list(batcher.yield_factories()))
848
factories = list(batcher.yield_factories(full_flush=True))
849
self.assertLength(1, factories)
850
self.assertEqual(('key',), factories[0].key)
851
self.assertEqual('groupcompress-block', factories[0].storage_kind)
705
854
class TestLazyGroupCompress(tests.TestCaseWithTransport):
708
857
('key1',): "this is a text\n"
709
"with a reasonable amount of compressible bytes\n",
858
"with a reasonable amount of compressible bytes\n"
859
"which can be shared between various other texts\n",
710
860
('key2',): "another text\n"
711
"with a reasonable amount of compressible bytes\n",
861
"with a reasonable amount of compressible bytes\n"
862
"which can be shared between various other texts\n",
712
863
('key3',): "yet another text which won't be extracted\n"
713
"with a reasonable amount of compressible bytes\n",
864
"with a reasonable amount of compressible bytes\n"
865
"which can be shared between various other texts\n",
714
866
('key4',): "this will be extracted\n"
715
867
"but references most of its bytes from\n"
716
868
"yet another text which won't be extracted\n"
717
"with a reasonable amount of compressible bytes\n",
869
"with a reasonable amount of compressible bytes\n"
870
"which can be shared between various other texts\n",
719
872
def make_block(self, key_to_text):
720
873
"""Create a GroupCompressBlock, filling it with the given texts."""
866
1020
self.assertEqual(('key4',), record.key)
867
1021
self.assertEqual(self._texts[record.key],
868
1022
record.get_bytes_as('fulltext'))
1024
def test_check_is_well_utilized_all_keys(self):
1025
block, manager = self.make_block_and_full_manager(self._texts)
1026
self.assertFalse(manager.check_is_well_utilized())
1027
# Though we can fake it by changing the recommended minimum size
1028
manager._full_enough_block_size = block._content_length
1029
self.assertTrue(manager.check_is_well_utilized())
1030
# Setting it just above causes it to fail
1031
manager._full_enough_block_size = block._content_length + 1
1032
self.assertFalse(manager.check_is_well_utilized())
1033
# Setting the mixed-block size doesn't do anything, because the content
1034
# is considered to not be 'mixed'
1035
manager._full_enough_mixed_block_size = block._content_length
1036
self.assertFalse(manager.check_is_well_utilized())
1038
def test_check_is_well_utilized_mixed_keys(self):
1044
texts[f1k1] = self._texts[('key1',)]
1045
texts[f1k2] = self._texts[('key2',)]
1046
texts[f2k1] = self._texts[('key3',)]
1047
texts[f2k2] = self._texts[('key4',)]
1048
block, manager = self.make_block_and_full_manager(texts)
1049
self.assertFalse(manager.check_is_well_utilized())
1050
manager._full_enough_block_size = block._content_length
1051
self.assertTrue(manager.check_is_well_utilized())
1052
manager._full_enough_block_size = block._content_length + 1
1053
self.assertFalse(manager.check_is_well_utilized())
1054
manager._full_enough_mixed_block_size = block._content_length
1055
self.assertTrue(manager.check_is_well_utilized())
1057
def test_check_is_well_utilized_partial_use(self):
1058
locations, block = self.make_block(self._texts)
1059
manager = groupcompress._LazyGroupContentManager(block)
1060
manager._full_enough_block_size = block._content_length
1061
self.add_key_to_manager(('key1',), locations, block, manager)
1062
self.add_key_to_manager(('key2',), locations, block, manager)
1063
# Just using the content from key1 and 2 is not enough to be considered
1065
self.assertFalse(manager.check_is_well_utilized())
1066
# However if we add key3, then we have enough, as we only require 75%
1068
self.add_key_to_manager(('key4',), locations, block, manager)
1069
self.assertTrue(manager.check_is_well_utilized())