~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

  • Committer: Martin Pool
  • Date: 2009-07-10 06:46:10 UTC
  • mto: (4525.1.1 integration)
  • mto: This revision was merged to the branch mainline in revision 4526.
  • Revision ID: mbp@sourcefrog.net-20090710064610-sqviksbqp5i34sw2
Rename to per_interrepository

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2008, 2009, 2010 Canonical Ltd
 
1
# Copyright (C) 2008, 2009 Canonical Ltd
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
29
29
    versionedfile,
30
30
    )
31
31
from bzrlib.osutils import sha_string
32
 
from bzrlib.tests.test__groupcompress import compiled_groupcompress_feature
 
32
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
33
33
 
34
34
 
35
35
def load_tests(standard_tests, module, loader):
39
39
    scenarios = [
40
40
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
41
41
        ]
42
 
    if compiled_groupcompress_feature.available():
 
42
    if CompiledGroupCompressFeature.available():
43
43
        scenarios.append(('C',
44
44
            {'compressor': groupcompress.PyrexGroupCompressor}))
45
45
    return tests.multiply_tests(to_adapt, scenarios, result)
135
135
 
136
136
class TestPyrexGroupCompressor(TestGroupCompressor):
137
137
 
138
 
    _test_needs_features = [compiled_groupcompress_feature]
 
138
    _test_needs_features = [CompiledGroupCompressFeature]
139
139
    compressor = groupcompress.PyrexGroupCompressor
140
140
 
141
141
    def test_stats(self):
347
347
        self.assertEqual(z_content, block._z_content)
348
348
        self.assertEqual(content, block._content)
349
349
 
350
 
    def test_to_chunks(self):
351
 
        content_chunks = ['this is some content\n',
352
 
                          'this content will be compressed\n']
353
 
        content_len = sum(map(len, content_chunks))
354
 
        content = ''.join(content_chunks)
355
 
        gcb = groupcompress.GroupCompressBlock()
356
 
        gcb.set_chunked_content(content_chunks, content_len)
357
 
        total_len, block_chunks = gcb.to_chunks()
358
 
        block_bytes = ''.join(block_chunks)
359
 
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
360
 
        self.assertEqual(total_len, len(block_bytes))
361
 
        self.assertEqual(gcb._content_length, content_len)
362
 
        expected_header =('gcb1z\n' # group compress block v1 zlib
363
 
                          '%d\n' # Length of compressed content
364
 
                          '%d\n' # Length of uncompressed content
365
 
                         ) % (gcb._z_content_length, gcb._content_length)
366
 
        # The first chunk should be the header chunk. It is small, fixed size,
367
 
        # and there is no compelling reason to split it up
368
 
        self.assertEqual(expected_header, block_chunks[0])
369
 
        self.assertStartsWith(block_bytes, expected_header)
370
 
        remaining_bytes = block_bytes[len(expected_header):]
371
 
        raw_bytes = zlib.decompress(remaining_bytes)
372
 
        self.assertEqual(content, raw_bytes)
373
 
 
374
350
    def test_to_bytes(self):
375
351
        content = ('this is some content\n'
376
352
                   'this content will be compressed\n')
413
389
        z_content = zlib.compress(content)
414
390
        self.assertEqual(57182, len(z_content))
415
391
        block = groupcompress.GroupCompressBlock()
416
 
        block._z_content_chunks = (z_content,)
 
392
        block._z_content = z_content
417
393
        block._z_content_length = len(z_content)
418
394
        block._compressor_name = 'zlib'
419
395
        block._content_length = 158634
442
418
        # And the decompressor is finalized
443
419
        self.assertIs(None, block._z_content_decompressor)
444
420
 
445
 
    def test__ensure_all_content(self):
 
421
    def test_partial_decomp_no_known_length(self):
446
422
        content_chunks = []
447
 
        # We need a sufficient amount of data so that zlib.decompress has
448
 
        # partial decompression to work with. Most auto-generated data
449
 
        # compresses a bit too well, we want a combination, so we combine a sha
450
 
        # hash with compressible data.
451
423
        for i in xrange(2048):
452
424
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
453
425
            content_chunks.append(next_content)
458
430
        z_content = zlib.compress(content)
459
431
        self.assertEqual(57182, len(z_content))
460
432
        block = groupcompress.GroupCompressBlock()
461
 
        block._z_content_chunks = (z_content,)
 
433
        block._z_content = z_content
462
434
        block._z_content_length = len(z_content)
463
435
        block._compressor_name = 'zlib'
464
 
        block._content_length = 158634
 
436
        block._content_length = None # Don't tell the decompressed length
465
437
        self.assertIs(None, block._content)
466
 
        # The first _ensure_content got all of the required data
467
 
        block._ensure_content(158634)
 
438
        block._ensure_content(100)
 
439
        self.assertIsNot(None, block._content)
 
440
        # We have decompressed at least 100 bytes
 
441
        self.assertTrue(len(block._content) >= 100)
 
442
        # We have not decompressed the whole content
 
443
        self.assertTrue(len(block._content) < 158634)
 
444
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
445
        # ensuring content that we already have shouldn't cause any more data
 
446
        # to be extracted
 
447
        cur_len = len(block._content)
 
448
        block._ensure_content(cur_len - 10)
 
449
        self.assertEqual(cur_len, len(block._content))
 
450
        # Now we want a bit more content
 
451
        cur_len += 10
 
452
        block._ensure_content(cur_len)
 
453
        self.assertTrue(len(block._content) >= cur_len)
 
454
        self.assertTrue(len(block._content) < 158634)
 
455
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
456
        # And now lets finish
 
457
        block._ensure_content()
468
458
        self.assertEqualDiff(content, block._content)
469
 
        # And we should have released the _z_content_decompressor since it was
470
 
        # fully consumed
 
459
        # And the decompressor is finalized
471
460
        self.assertIs(None, block._z_content_decompressor)
472
461
 
473
462
    def test__dump(self):
483
472
                         ], block._dump())
484
473
 
485
474
 
486
 
class TestCaseWithGroupCompressVersionedFiles(
487
 
        tests.TestCaseWithMemoryTransport):
 
475
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
488
476
 
489
477
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
490
478
                     dir='.', inconsistency_fatal=True):
550
538
                    'as-requested', False)]
551
539
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
552
540
 
553
 
    def test_insert_record_stream_reuses_blocks(self):
 
541
    def test_insert_record_stream_re_uses_blocks(self):
554
542
        vf = self.make_test_vf(True, dir='source')
555
543
        def grouped_stream(revision_ids, first_parents=()):
556
544
            parents = first_parents
594
582
        vf2 = self.make_test_vf(True, dir='target')
595
583
        # ordering in 'groupcompress' order, should actually swap the groups in
596
584
        # the target vf, but the groups themselves should not be disturbed.
597
 
        def small_size_stream():
598
 
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
599
 
                                               'groupcompress', False):
600
 
                record._manager._full_enough_block_size = \
601
 
                    record._manager._block._content_length
602
 
                yield record
603
 
                        
604
 
        vf2.insert_record_stream(small_size_stream())
 
585
        vf2.insert_record_stream(vf.get_record_stream(
 
586
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
605
587
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
606
588
                                       'groupcompress', False)
607
589
        vf2.writer.end()
612
594
                             record._manager._block._z_content)
613
595
        self.assertEqual(8, num_records)
614
596
 
615
 
    def test_insert_record_stream_packs_on_the_fly(self):
616
 
        vf = self.make_test_vf(True, dir='source')
617
 
        def grouped_stream(revision_ids, first_parents=()):
618
 
            parents = first_parents
619
 
            for revision_id in revision_ids:
620
 
                key = (revision_id,)
621
 
                record = versionedfile.FulltextContentFactory(
622
 
                    key, parents, None,
623
 
                    'some content that is\n'
624
 
                    'identical except for\n'
625
 
                    'revision_id:%s\n' % (revision_id,))
626
 
                yield record
627
 
                parents = (key,)
628
 
        # One group, a-d
629
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
630
 
        # Second group, e-h
631
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
632
 
                                               first_parents=(('d',),)))
633
 
        # Now copy the blocks into another vf, and see that the
634
 
        # insert_record_stream rebuilt a new block on-the-fly because of
635
 
        # under-utilization
636
 
        vf2 = self.make_test_vf(True, dir='target')
637
 
        vf2.insert_record_stream(vf.get_record_stream(
638
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
639
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
640
 
                                       'groupcompress', False)
641
 
        vf2.writer.end()
642
 
        num_records = 0
643
 
        # All of the records should be recombined into a single block
644
 
        block = None
645
 
        for record in stream:
646
 
            num_records += 1
647
 
            if block is None:
648
 
                block = record._manager._block
649
 
            else:
650
 
                self.assertIs(block, record._manager._block)
651
 
        self.assertEqual(8, num_records)
652
 
 
653
597
    def test__insert_record_stream_no_reuse_block(self):
654
598
        vf = self.make_test_vf(True, dir='source')
655
599
        def grouped_stream(revision_ids, first_parents=()):
757
701
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
758
702
                              " 0 8', \(\(\('a',\),\),\)\)")
759
703
 
760
 
    def test_clear_cache(self):
761
 
        vf = self.make_source_with_b(True, 'source')
762
 
        vf.writer.end()
763
 
        for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
764
 
                                           True):
765
 
            pass
766
 
        self.assertTrue(len(vf._group_cache) > 0)
767
 
        vf.clear_cache()
768
 
        self.assertEqual(0, len(vf._group_cache))
769
 
 
770
 
 
771
 
 
772
 
class StubGCVF(object):
773
 
    def __init__(self, canned_get_blocks=None):
774
 
        self._group_cache = {}
775
 
        self._canned_get_blocks = canned_get_blocks or []
776
 
    def _get_blocks(self, read_memos):
777
 
        return iter(self._canned_get_blocks)
778
 
    
779
 
 
780
 
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
781
 
    """Simple whitebox unit tests for _BatchingBlockFetcher."""
782
 
    
783
 
    def test_add_key_new_read_memo(self):
784
 
        """Adding a key with an uncached read_memo new to this batch adds that
785
 
        read_memo to the list of memos to fetch.
786
 
        """
787
 
        # locations are: index_memo, ignored, parents, ignored
788
 
        # where index_memo is: (idx, offset, len, factory_start, factory_end)
789
 
        # and (idx, offset, size) is known as the 'read_memo', identifying the
790
 
        # raw bytes needed.
791
 
        read_memo = ('fake index', 100, 50)
792
 
        locations = {
793
 
            ('key',): (read_memo + (None, None), None, None, None)}
794
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
795
 
        total_size = batcher.add_key(('key',))
796
 
        self.assertEqual(50, total_size)
797
 
        self.assertEqual([('key',)], batcher.keys)
798
 
        self.assertEqual([read_memo], batcher.memos_to_get)
799
 
 
800
 
    def test_add_key_duplicate_read_memo(self):
801
 
        """read_memos that occur multiple times in a batch will only be fetched
802
 
        once.
803
 
        """
804
 
        read_memo = ('fake index', 100, 50)
805
 
        # Two keys, both sharing the same read memo (but different overall
806
 
        # index_memos).
807
 
        locations = {
808
 
            ('key1',): (read_memo + (0, 1), None, None, None),
809
 
            ('key2',): (read_memo + (1, 2), None, None, None)}
810
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
811
 
        total_size = batcher.add_key(('key1',))
812
 
        total_size = batcher.add_key(('key2',))
813
 
        self.assertEqual(50, total_size)
814
 
        self.assertEqual([('key1',), ('key2',)], batcher.keys)
815
 
        self.assertEqual([read_memo], batcher.memos_to_get)
816
 
 
817
 
    def test_add_key_cached_read_memo(self):
818
 
        """Adding a key with a cached read_memo will not cause that read_memo
819
 
        to be added to the list to fetch.
820
 
        """
821
 
        read_memo = ('fake index', 100, 50)
822
 
        gcvf = StubGCVF()
823
 
        gcvf._group_cache[read_memo] = 'fake block'
824
 
        locations = {
825
 
            ('key',): (read_memo + (None, None), None, None, None)}
826
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
827
 
        total_size = batcher.add_key(('key',))
828
 
        self.assertEqual(0, total_size)
829
 
        self.assertEqual([('key',)], batcher.keys)
830
 
        self.assertEqual([], batcher.memos_to_get)
831
 
 
832
 
    def test_yield_factories_empty(self):
833
 
        """An empty batch yields no factories."""
834
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
835
 
        self.assertEqual([], list(batcher.yield_factories()))
836
 
 
837
 
    def test_yield_factories_calls_get_blocks(self):
838
 
        """Uncached memos are retrieved via get_blocks."""
839
 
        read_memo1 = ('fake index', 100, 50)
840
 
        read_memo2 = ('fake index', 150, 40)
841
 
        gcvf = StubGCVF(
842
 
            canned_get_blocks=[
843
 
                (read_memo1, groupcompress.GroupCompressBlock()),
844
 
                (read_memo2, groupcompress.GroupCompressBlock())])
845
 
        locations = {
846
 
            ('key1',): (read_memo1 + (None, None), None, None, None),
847
 
            ('key2',): (read_memo2 + (None, None), None, None, None)}
848
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
849
 
        batcher.add_key(('key1',))
850
 
        batcher.add_key(('key2',))
851
 
        factories = list(batcher.yield_factories(full_flush=True))
852
 
        self.assertLength(2, factories)
853
 
        keys = [f.key for f in factories]
854
 
        kinds = [f.storage_kind for f in factories]
855
 
        self.assertEqual([('key1',), ('key2',)], keys)
856
 
        self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
857
 
 
858
 
    def test_yield_factories_flushing(self):
859
 
        """yield_factories holds back on yielding results from the final block
860
 
        unless passed full_flush=True.
861
 
        """
862
 
        fake_block = groupcompress.GroupCompressBlock()
863
 
        read_memo = ('fake index', 100, 50)
864
 
        gcvf = StubGCVF()
865
 
        gcvf._group_cache[read_memo] = fake_block
866
 
        locations = {
867
 
            ('key',): (read_memo + (None, None), None, None, None)}
868
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
869
 
        batcher.add_key(('key',))
870
 
        self.assertEqual([], list(batcher.yield_factories()))
871
 
        factories = list(batcher.yield_factories(full_flush=True))
872
 
        self.assertLength(1, factories)
873
 
        self.assertEqual(('key',), factories[0].key)
874
 
        self.assertEqual('groupcompress-block', factories[0].storage_kind)
875
 
 
876
704
 
877
705
class TestLazyGroupCompress(tests.TestCaseWithTransport):
878
706
 
879
707
    _texts = {
880
708
        ('key1',): "this is a text\n"
881
 
                   "with a reasonable amount of compressible bytes\n"
882
 
                   "which can be shared between various other texts\n",
 
709
                   "with a reasonable amount of compressible bytes\n",
883
710
        ('key2',): "another text\n"
884
 
                   "with a reasonable amount of compressible bytes\n"
885
 
                   "which can be shared between various other texts\n",
 
711
                   "with a reasonable amount of compressible bytes\n",
886
712
        ('key3',): "yet another text which won't be extracted\n"
887
 
                   "with a reasonable amount of compressible bytes\n"
888
 
                   "which can be shared between various other texts\n",
 
713
                   "with a reasonable amount of compressible bytes\n",
889
714
        ('key4',): "this will be extracted\n"
890
715
                   "but references most of its bytes from\n"
891
716
                   "yet another text which won't be extracted\n"
892
 
                   "with a reasonable amount of compressible bytes\n"
893
 
                   "which can be shared between various other texts\n",
 
717
                   "with a reasonable amount of compressible bytes\n",
894
718
    }
895
719
    def make_block(self, key_to_text):
896
720
        """Create a GroupCompressBlock, filling it with the given texts."""
908
732
        start, end = locations[key]
909
733
        manager.add_factory(key, (), start, end)
910
734
 
911
 
    def make_block_and_full_manager(self, texts):
912
 
        locations, block = self.make_block(texts)
913
 
        manager = groupcompress._LazyGroupContentManager(block)
914
 
        for key in sorted(texts):
915
 
            self.add_key_to_manager(key, locations, block, manager)
916
 
        return block, manager
917
 
 
918
735
    def test_get_fulltexts(self):
919
736
        locations, block = self.make_block(self._texts)
920
737
        manager = groupcompress._LazyGroupContentManager(block)
971
788
        header_len = int(header_len)
972
789
        block_len = int(block_len)
973
790
        self.assertEqual('groupcompress-block', storage_kind)
974
 
        self.assertEqual(34, z_header_len)
975
 
        self.assertEqual(26, header_len)
 
791
        self.assertEqual(33, z_header_len)
 
792
        self.assertEqual(25, header_len)
976
793
        self.assertEqual(len(block_bytes), block_len)
977
794
        z_header = rest[:z_header_len]
978
795
        header = zlib.decompress(z_header)
1012
829
        self.assertEqual([('key1',), ('key4',)], result_order)
1013
830
 
1014
831
    def test__check_rebuild_no_changes(self):
1015
 
        block, manager = self.make_block_and_full_manager(self._texts)
 
832
        locations, block = self.make_block(self._texts)
 
833
        manager = groupcompress._LazyGroupContentManager(block)
 
834
        # Request all the keys, which ensures that we won't rebuild
 
835
        self.add_key_to_manager(('key1',), locations, block, manager)
 
836
        self.add_key_to_manager(('key2',), locations, block, manager)
 
837
        self.add_key_to_manager(('key3',), locations, block, manager)
 
838
        self.add_key_to_manager(('key4',), locations, block, manager)
1016
839
        manager._check_rebuild_block()
1017
840
        self.assertIs(block, manager._block)
1018
841
 
1043
866
            self.assertEqual(('key4',), record.key)
1044
867
            self.assertEqual(self._texts[record.key],
1045
868
                             record.get_bytes_as('fulltext'))
1046
 
 
1047
 
    def test_check_is_well_utilized_all_keys(self):
1048
 
        block, manager = self.make_block_and_full_manager(self._texts)
1049
 
        self.assertFalse(manager.check_is_well_utilized())
1050
 
        # Though we can fake it by changing the recommended minimum size
1051
 
        manager._full_enough_block_size = block._content_length
1052
 
        self.assertTrue(manager.check_is_well_utilized())
1053
 
        # Setting it just above causes it to fail
1054
 
        manager._full_enough_block_size = block._content_length + 1
1055
 
        self.assertFalse(manager.check_is_well_utilized())
1056
 
        # Setting the mixed-block size doesn't do anything, because the content
1057
 
        # is considered to not be 'mixed'
1058
 
        manager._full_enough_mixed_block_size = block._content_length
1059
 
        self.assertFalse(manager.check_is_well_utilized())
1060
 
 
1061
 
    def test_check_is_well_utilized_mixed_keys(self):
1062
 
        texts = {}
1063
 
        f1k1 = ('f1', 'k1')
1064
 
        f1k2 = ('f1', 'k2')
1065
 
        f2k1 = ('f2', 'k1')
1066
 
        f2k2 = ('f2', 'k2')
1067
 
        texts[f1k1] = self._texts[('key1',)]
1068
 
        texts[f1k2] = self._texts[('key2',)]
1069
 
        texts[f2k1] = self._texts[('key3',)]
1070
 
        texts[f2k2] = self._texts[('key4',)]
1071
 
        block, manager = self.make_block_and_full_manager(texts)
1072
 
        self.assertFalse(manager.check_is_well_utilized())
1073
 
        manager._full_enough_block_size = block._content_length
1074
 
        self.assertTrue(manager.check_is_well_utilized())
1075
 
        manager._full_enough_block_size = block._content_length + 1
1076
 
        self.assertFalse(manager.check_is_well_utilized())
1077
 
        manager._full_enough_mixed_block_size = block._content_length
1078
 
        self.assertTrue(manager.check_is_well_utilized())
1079
 
 
1080
 
    def test_check_is_well_utilized_partial_use(self):
1081
 
        locations, block = self.make_block(self._texts)
1082
 
        manager = groupcompress._LazyGroupContentManager(block)
1083
 
        manager._full_enough_block_size = block._content_length
1084
 
        self.add_key_to_manager(('key1',), locations, block, manager)
1085
 
        self.add_key_to_manager(('key2',), locations, block, manager)
1086
 
        # Just using the content from key1 and 2 is not enough to be considered
1087
 
        # 'complete'
1088
 
        self.assertFalse(manager.check_is_well_utilized())
1089
 
        # However if we add key3, then we have enough, as we only require 75%
1090
 
        # consumption
1091
 
        self.add_key_to_manager(('key4',), locations, block, manager)
1092
 
        self.assertTrue(manager.check_is_well_utilized())
1093
 
 
1094
 
 
1095
 
class Test_GCBuildDetails(tests.TestCase):
1096
 
 
1097
 
    def test_acts_like_tuple(self):
1098
 
        # _GCBuildDetails inlines some of the data that used to be spread out
1099
 
        # across a bunch of tuples
1100
 
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1101
 
            ('INDEX', 10, 20, 0, 5))
1102
 
        self.assertEqual(4, len(bd))
1103
 
        self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
1104
 
        self.assertEqual(None, bd[1]) # Compression Parent is always None
1105
 
        self.assertEqual((('parent1',), ('parent2',)), bd[2])
1106
 
        self.assertEqual(('group', None), bd[3]) # Record details
1107
 
 
1108
 
    def test__repr__(self):
1109
 
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1110
 
            ('INDEX', 10, 20, 0, 5))
1111
 
        self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
1112
 
                         " (('parent1',), ('parent2',)))",
1113
 
                         repr(bd))
1114