347
347
self.assertEqual(z_content, block._z_content)
348
348
self.assertEqual(content, block._content)
350
def test_to_chunks(self):
351
content_chunks = ['this is some content\n',
352
'this content will be compressed\n']
353
content_len = sum(map(len, content_chunks))
354
content = ''.join(content_chunks)
355
gcb = groupcompress.GroupCompressBlock()
356
gcb.set_chunked_content(content_chunks, content_len)
357
total_len, block_chunks = gcb.to_chunks()
358
block_bytes = ''.join(block_chunks)
359
self.assertEqual(gcb._z_content_length, len(gcb._z_content))
360
self.assertEqual(total_len, len(block_bytes))
361
self.assertEqual(gcb._content_length, content_len)
362
expected_header =('gcb1z\n' # group compress block v1 zlib
363
'%d\n' # Length of compressed content
364
'%d\n' # Length of uncompressed content
365
) % (gcb._z_content_length, gcb._content_length)
366
# The first chunk should be the header chunk. It is small, fixed size,
367
# and there is no compelling reason to split it up
368
self.assertEqual(expected_header, block_chunks[0])
369
self.assertStartsWith(block_bytes, expected_header)
370
remaining_bytes = block_bytes[len(expected_header):]
371
raw_bytes = zlib.decompress(remaining_bytes)
372
self.assertEqual(content, raw_bytes)
374
350
def test_to_bytes(self):
375
351
content = ('this is some content\n'
376
352
'this content will be compressed\n')
458
430
z_content = zlib.compress(content)
459
431
self.assertEqual(57182, len(z_content))
460
432
block = groupcompress.GroupCompressBlock()
461
block._z_content_chunks = (z_content,)
433
block._z_content = z_content
462
434
block._z_content_length = len(z_content)
463
435
block._compressor_name = 'zlib'
464
block._content_length = 158634
436
block._content_length = None # Don't tell the decompressed length
465
437
self.assertIs(None, block._content)
466
# The first _ensure_content got all of the required data
467
block._ensure_content(158634)
438
block._ensure_content(100)
439
self.assertIsNot(None, block._content)
440
# We have decompressed at least 100 bytes
441
self.assertTrue(len(block._content) >= 100)
442
# We have not decompressed the whole content
443
self.assertTrue(len(block._content) < 158634)
444
self.assertEqualDiff(content[:len(block._content)], block._content)
445
# ensuring content that we already have shouldn't cause any more data
447
cur_len = len(block._content)
448
block._ensure_content(cur_len - 10)
449
self.assertEqual(cur_len, len(block._content))
450
# Now we want a bit more content
452
block._ensure_content(cur_len)
453
self.assertTrue(len(block._content) >= cur_len)
454
self.assertTrue(len(block._content) < 158634)
455
self.assertEqualDiff(content[:len(block._content)], block._content)
456
# And now lets finish
457
block._ensure_content()
468
458
self.assertEqualDiff(content, block._content)
469
# And we should have released the _z_content_decompressor since it was
459
# And the decompressor is finalized
471
460
self.assertIs(None, block._z_content_decompressor)
473
462
def test__dump(self):
612
594
record._manager._block._z_content)
613
595
self.assertEqual(8, num_records)
615
def test_insert_record_stream_packs_on_the_fly(self):
616
vf = self.make_test_vf(True, dir='source')
617
def grouped_stream(revision_ids, first_parents=()):
618
parents = first_parents
619
for revision_id in revision_ids:
621
record = versionedfile.FulltextContentFactory(
623
'some content that is\n'
624
'identical except for\n'
625
'revision_id:%s\n' % (revision_id,))
629
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
631
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
632
first_parents=(('d',),)))
633
# Now copy the blocks into another vf, and see that the
634
# insert_record_stream rebuilt a new block on-the-fly because of
636
vf2 = self.make_test_vf(True, dir='target')
637
vf2.insert_record_stream(vf.get_record_stream(
638
[(r,) for r in 'abcdefgh'], 'groupcompress', False))
639
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
640
'groupcompress', False)
643
# All of the records should be recombined into a single block
645
for record in stream:
648
block = record._manager._block
650
self.assertIs(block, record._manager._block)
651
self.assertEqual(8, num_records)
653
597
def test__insert_record_stream_no_reuse_block(self):
654
598
vf = self.make_test_vf(True, dir='source')
655
599
def grouped_stream(revision_ids, first_parents=()):
757
701
" \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
758
702
" 0 8', \(\(\('a',\),\),\)\)")
760
def test_clear_cache(self):
761
vf = self.make_source_with_b(True, 'source')
763
for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
766
self.assertTrue(len(vf._group_cache) > 0)
768
self.assertEqual(0, len(vf._group_cache))
772
class StubGCVF(object):
773
def __init__(self, canned_get_blocks=None):
774
self._group_cache = {}
775
self._canned_get_blocks = canned_get_blocks or []
776
def _get_blocks(self, read_memos):
777
return iter(self._canned_get_blocks)
780
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
781
"""Simple whitebox unit tests for _BatchingBlockFetcher."""
783
def test_add_key_new_read_memo(self):
784
"""Adding a key with an uncached read_memo new to this batch adds that
785
read_memo to the list of memos to fetch.
787
# locations are: index_memo, ignored, parents, ignored
788
# where index_memo is: (idx, offset, len, factory_start, factory_end)
789
# and (idx, offset, size) is known as the 'read_memo', identifying the
791
read_memo = ('fake index', 100, 50)
793
('key',): (read_memo + (None, None), None, None, None)}
794
batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
795
total_size = batcher.add_key(('key',))
796
self.assertEqual(50, total_size)
797
self.assertEqual([('key',)], batcher.keys)
798
self.assertEqual([read_memo], batcher.memos_to_get)
800
def test_add_key_duplicate_read_memo(self):
801
"""read_memos that occur multiple times in a batch will only be fetched
804
read_memo = ('fake index', 100, 50)
805
# Two keys, both sharing the same read memo (but different overall
808
('key1',): (read_memo + (0, 1), None, None, None),
809
('key2',): (read_memo + (1, 2), None, None, None)}
810
batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
811
total_size = batcher.add_key(('key1',))
812
total_size = batcher.add_key(('key2',))
813
self.assertEqual(50, total_size)
814
self.assertEqual([('key1',), ('key2',)], batcher.keys)
815
self.assertEqual([read_memo], batcher.memos_to_get)
817
def test_add_key_cached_read_memo(self):
818
"""Adding a key with a cached read_memo will not cause that read_memo
819
to be added to the list to fetch.
821
read_memo = ('fake index', 100, 50)
823
gcvf._group_cache[read_memo] = 'fake block'
825
('key',): (read_memo + (None, None), None, None, None)}
826
batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
827
total_size = batcher.add_key(('key',))
828
self.assertEqual(0, total_size)
829
self.assertEqual([('key',)], batcher.keys)
830
self.assertEqual([], batcher.memos_to_get)
832
def test_yield_factories_empty(self):
833
"""An empty batch yields no factories."""
834
batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
835
self.assertEqual([], list(batcher.yield_factories()))
837
def test_yield_factories_calls_get_blocks(self):
838
"""Uncached memos are retrieved via get_blocks."""
839
read_memo1 = ('fake index', 100, 50)
840
read_memo2 = ('fake index', 150, 40)
843
(read_memo1, groupcompress.GroupCompressBlock()),
844
(read_memo2, groupcompress.GroupCompressBlock())])
846
('key1',): (read_memo1 + (None, None), None, None, None),
847
('key2',): (read_memo2 + (None, None), None, None, None)}
848
batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
849
batcher.add_key(('key1',))
850
batcher.add_key(('key2',))
851
factories = list(batcher.yield_factories(full_flush=True))
852
self.assertLength(2, factories)
853
keys = [f.key for f in factories]
854
kinds = [f.storage_kind for f in factories]
855
self.assertEqual([('key1',), ('key2',)], keys)
856
self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
858
def test_yield_factories_flushing(self):
859
"""yield_factories holds back on yielding results from the final block
860
unless passed full_flush=True.
862
fake_block = groupcompress.GroupCompressBlock()
863
read_memo = ('fake index', 100, 50)
865
gcvf._group_cache[read_memo] = fake_block
867
('key',): (read_memo + (None, None), None, None, None)}
868
batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
869
batcher.add_key(('key',))
870
self.assertEqual([], list(batcher.yield_factories()))
871
factories = list(batcher.yield_factories(full_flush=True))
872
self.assertLength(1, factories)
873
self.assertEqual(('key',), factories[0].key)
874
self.assertEqual('groupcompress-block', factories[0].storage_kind)
877
705
class TestLazyGroupCompress(tests.TestCaseWithTransport):
880
708
('key1',): "this is a text\n"
881
"with a reasonable amount of compressible bytes\n"
882
"which can be shared between various other texts\n",
709
"with a reasonable amount of compressible bytes\n",
883
710
('key2',): "another text\n"
884
"with a reasonable amount of compressible bytes\n"
885
"which can be shared between various other texts\n",
711
"with a reasonable amount of compressible bytes\n",
886
712
('key3',): "yet another text which won't be extracted\n"
887
"with a reasonable amount of compressible bytes\n"
888
"which can be shared between various other texts\n",
713
"with a reasonable amount of compressible bytes\n",
889
714
('key4',): "this will be extracted\n"
890
715
"but references most of its bytes from\n"
891
716
"yet another text which won't be extracted\n"
892
"with a reasonable amount of compressible bytes\n"
893
"which can be shared between various other texts\n",
717
"with a reasonable amount of compressible bytes\n",
895
719
def make_block(self, key_to_text):
896
720
"""Create a GroupCompressBlock, filling it with the given texts."""
1043
866
self.assertEqual(('key4',), record.key)
1044
867
self.assertEqual(self._texts[record.key],
1045
868
record.get_bytes_as('fulltext'))
1047
def test_check_is_well_utilized_all_keys(self):
1048
block, manager = self.make_block_and_full_manager(self._texts)
1049
self.assertFalse(manager.check_is_well_utilized())
1050
# Though we can fake it by changing the recommended minimum size
1051
manager._full_enough_block_size = block._content_length
1052
self.assertTrue(manager.check_is_well_utilized())
1053
# Setting it just above causes it to fail
1054
manager._full_enough_block_size = block._content_length + 1
1055
self.assertFalse(manager.check_is_well_utilized())
1056
# Setting the mixed-block size doesn't do anything, because the content
1057
# is considered to not be 'mixed'
1058
manager._full_enough_mixed_block_size = block._content_length
1059
self.assertFalse(manager.check_is_well_utilized())
1061
def test_check_is_well_utilized_mixed_keys(self):
1067
texts[f1k1] = self._texts[('key1',)]
1068
texts[f1k2] = self._texts[('key2',)]
1069
texts[f2k1] = self._texts[('key3',)]
1070
texts[f2k2] = self._texts[('key4',)]
1071
block, manager = self.make_block_and_full_manager(texts)
1072
self.assertFalse(manager.check_is_well_utilized())
1073
manager._full_enough_block_size = block._content_length
1074
self.assertTrue(manager.check_is_well_utilized())
1075
manager._full_enough_block_size = block._content_length + 1
1076
self.assertFalse(manager.check_is_well_utilized())
1077
manager._full_enough_mixed_block_size = block._content_length
1078
self.assertTrue(manager.check_is_well_utilized())
1080
def test_check_is_well_utilized_partial_use(self):
1081
locations, block = self.make_block(self._texts)
1082
manager = groupcompress._LazyGroupContentManager(block)
1083
manager._full_enough_block_size = block._content_length
1084
self.add_key_to_manager(('key1',), locations, block, manager)
1085
self.add_key_to_manager(('key2',), locations, block, manager)
1086
# Just using the content from key1 and 2 is not enough to be considered
1088
self.assertFalse(manager.check_is_well_utilized())
1089
# However if we add key3, then we have enough, as we only require 75%
1091
self.add_key_to_manager(('key4',), locations, block, manager)
1092
self.assertTrue(manager.check_is_well_utilized())
1095
class Test_GCBuildDetails(tests.TestCase):
1097
def test_acts_like_tuple(self):
1098
# _GCBuildDetails inlines some of the data that used to be spread out
1099
# across a bunch of tuples
1100
bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1101
('INDEX', 10, 20, 0, 5))
1102
self.assertEqual(4, len(bd))
1103
self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
1104
self.assertEqual(None, bd[1]) # Compression Parent is always None
1105
self.assertEqual((('parent1',), ('parent2',)), bd[2])
1106
self.assertEqual(('group', None), bd[3]) # Record details
1108
def test__repr__(self):
1109
bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1110
('INDEX', 10, 20, 0, 5))
1111
self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
1112
" (('parent1',), ('parent2',)))",