813
813
('key1',): "this is a text\n"
814
"with a reasonable amount of compressible bytes\n",
814
"with a reasonable amount of compressible bytes\n"
815
"which can be shared between various other texts\n",
815
816
('key2',): "another text\n"
816
"with a reasonable amount of compressible bytes\n",
817
"with a reasonable amount of compressible bytes\n"
818
"which can be shared between various other texts\n",
817
819
('key3',): "yet another text which won't be extracted\n"
818
"with a reasonable amount of compressible bytes\n",
820
"with a reasonable amount of compressible bytes\n"
821
"which can be shared between various other texts\n",
819
822
('key4',): "this will be extracted\n"
820
823
"but references most of its bytes from\n"
821
824
"yet another text which won't be extracted\n"
822
"with a reasonable amount of compressible bytes\n",
825
"with a reasonable amount of compressible bytes\n"
826
"which can be shared between various other texts\n",
824
828
def make_block(self, key_to_text):
825
829
"""Create a GroupCompressBlock, filling it with the given texts."""
837
841
start, end = locations[key]
838
842
manager.add_factory(key, (), start, end)
844
def make_block_and_full_manager(self, texts):
845
locations, block = self.make_block(texts)
846
manager = groupcompress._LazyGroupContentManager(block)
847
for key in sorted(texts):
848
self.add_key_to_manager(key, locations, block, manager)
849
return block, manager
840
851
def test_get_fulltexts(self):
841
852
locations, block = self.make_block(self._texts)
842
853
manager = groupcompress._LazyGroupContentManager(block)
934
945
self.assertEqual([('key1',), ('key4',)], result_order)
936
947
def test__check_rebuild_no_changes(self):
937
locations, block = self.make_block(self._texts)
938
manager = groupcompress._LazyGroupContentManager(block)
939
# Request all the keys, which ensures that we won't rebuild
940
self.add_key_to_manager(('key1',), locations, block, manager)
941
self.add_key_to_manager(('key2',), locations, block, manager)
942
self.add_key_to_manager(('key3',), locations, block, manager)
943
self.add_key_to_manager(('key4',), locations, block, manager)
948
block, manager = self.make_block_and_full_manager(self._texts)
944
949
manager._check_rebuild_block()
945
950
self.assertIs(block, manager._block)
971
976
self.assertEqual(('key4',), record.key)
972
977
self.assertEqual(self._texts[record.key],
973
978
record.get_bytes_as('fulltext'))
980
def test_check_is_well_utilized_all_keys(self):
981
block, manager = self.make_block_and_full_manager(self._texts)
982
self.assertFalse(manager.check_is_well_utilized())
983
# Though we can fake it by changing the recommended minimum size
984
manager._full_enough_block_size = block._content_length
985
self.assertTrue(manager.check_is_well_utilized())
986
# Setting it just above causes it to fail
987
manager._full_enough_block_size = block._content_length + 1
988
self.assertFalse(manager.check_is_well_utilized())
989
# Setting the mixed-block size doesn't do anything, because the content
990
# is considered to not be 'mixed'
991
manager._full_enough_mixed_block_size = block._content_length
992
self.assertFalse(manager.check_is_well_utilized())
994
def test_check_is_well_utilized_mixed_keys(self):
1000
texts[f1k1] = self._texts[('key1',)]
1001
texts[f1k2] = self._texts[('key2',)]
1002
texts[f2k1] = self._texts[('key3',)]
1003
texts[f2k2] = self._texts[('key4',)]
1004
block, manager = self.make_block_and_full_manager(texts)
1005
self.assertFalse(manager.check_is_well_utilized())
1006
manager._full_enough_block_size = block._content_length
1007
self.assertTrue(manager.check_is_well_utilized())
1008
manager._full_enough_block_size = block._content_length + 1
1009
self.assertFalse(manager.check_is_well_utilized())
1010
manager._full_enough_mixed_block_size = block._content_length
1011
self.assertTrue(manager.check_is_well_utilized())
1013
def test_check_is_well_utilized_partial_use(self):
1014
locations, block = self.make_block(self._texts)
1015
manager = groupcompress._LazyGroupContentManager(block)
1016
manager._full_enough_block_size = block._content_length
1017
self.add_key_to_manager(('key1',), locations, block, manager)
1018
self.add_key_to_manager(('key2',), locations, block, manager)
1019
# Just using the content from key1 and 2 is not enough to be considered
1021
self.assertFalse(manager.check_is_well_utilized())
1022
# However if we add key3, then we have enough, as we only require 75%
1024
self.add_key_to_manager(('key4',), locations, block, manager)
1025
self.assertTrue(manager.check_is_well_utilized())