~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

  • Committer: Ian Clatworthy
  • Date: 2009-09-02 16:03:51 UTC
  • mto: (4634.39.1 pdf-chm-docs)
  • mto: This revision was merged to the branch mainline in revision 4689.
  • Revision ID: ian.clatworthy@canonical.com-20090902160351-sxptcz3ttc1aencw
first cut at pdf docs via sphinx

Show diffs side-by-side

added added

removed removed

Lines of Context:
29
29
    versionedfile,
30
30
    )
31
31
from bzrlib.osutils import sha_string
32
 
from bzrlib.tests.test__groupcompress import compiled_groupcompress_feature
 
32
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
33
33
 
34
34
 
35
35
def load_tests(standard_tests, module, loader):
39
39
    scenarios = [
40
40
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
41
41
        ]
42
 
    if compiled_groupcompress_feature.available():
 
42
    if CompiledGroupCompressFeature.available():
43
43
        scenarios.append(('C',
44
44
            {'compressor': groupcompress.PyrexGroupCompressor}))
45
45
    return tests.multiply_tests(to_adapt, scenarios, result)
135
135
 
136
136
class TestPyrexGroupCompressor(TestGroupCompressor):
137
137
 
138
 
    _test_needs_features = [compiled_groupcompress_feature]
 
138
    _test_needs_features = [CompiledGroupCompressFeature]
139
139
    compressor = groupcompress.PyrexGroupCompressor
140
140
 
141
141
    def test_stats(self):
418
418
        # And the decompressor is finalized
419
419
        self.assertIs(None, block._z_content_decompressor)
420
420
 
421
 
    def test__ensure_all_content(self):
 
421
    def test_partial_decomp_no_known_length(self):
422
422
        content_chunks = []
423
 
        # We need a sufficient amount of data so that zlib.decompress has
424
 
        # partial decompression to work with. Most auto-generated data
425
 
        # compresses a bit too well, we want a combination, so we combine a sha
426
 
        # hash with compressible data.
427
423
        for i in xrange(2048):
428
424
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
429
425
            content_chunks.append(next_content)
437
433
        block._z_content = z_content
438
434
        block._z_content_length = len(z_content)
439
435
        block._compressor_name = 'zlib'
440
 
        block._content_length = 158634
 
436
        block._content_length = None # Don't tell the decompressed length
441
437
        self.assertIs(None, block._content)
442
 
        # The first _ensure_content got all of the required data
443
 
        block._ensure_content(158634)
 
438
        block._ensure_content(100)
 
439
        self.assertIsNot(None, block._content)
 
440
        # We have decompressed at least 100 bytes
 
441
        self.assertTrue(len(block._content) >= 100)
 
442
        # We have not decompressed the whole content
 
443
        self.assertTrue(len(block._content) < 158634)
 
444
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
445
        # ensuring content that we already have shouldn't cause any more data
 
446
        # to be extracted
 
447
        cur_len = len(block._content)
 
448
        block._ensure_content(cur_len - 10)
 
449
        self.assertEqual(cur_len, len(block._content))
 
450
        # Now we want a bit more content
 
451
        cur_len += 10
 
452
        block._ensure_content(cur_len)
 
453
        self.assertTrue(len(block._content) >= cur_len)
 
454
        self.assertTrue(len(block._content) < 158634)
 
455
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
456
        # And now lets finish
 
457
        block._ensure_content()
444
458
        self.assertEqualDiff(content, block._content)
445
 
        # And we should have released the _z_content_decompressor since it was
446
 
        # fully consumed
 
459
        # And the decompressor is finalized
447
460
        self.assertIs(None, block._z_content_decompressor)
448
461
 
449
462
    def test__dump(self):
459
472
                         ], block._dump())
460
473
 
461
474
 
462
 
class TestCaseWithGroupCompressVersionedFiles(
463
 
        tests.TestCaseWithMemoryTransport):
 
475
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
464
476
 
465
477
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
466
478
                     dir='.', inconsistency_fatal=True):
526
538
                    'as-requested', False)]
527
539
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
528
540
 
529
 
    def test_insert_record_stream_reuses_blocks(self):
 
541
    def test_insert_record_stream_re_uses_blocks(self):
530
542
        vf = self.make_test_vf(True, dir='source')
531
543
        def grouped_stream(revision_ids, first_parents=()):
532
544
            parents = first_parents
570
582
        vf2 = self.make_test_vf(True, dir='target')
571
583
        # ordering in 'groupcompress' order, should actually swap the groups in
572
584
        # the target vf, but the groups themselves should not be disturbed.
573
 
        def small_size_stream():
574
 
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
575
 
                                               'groupcompress', False):
576
 
                record._manager._full_enough_block_size = \
577
 
                    record._manager._block._content_length
578
 
                yield record
579
 
                        
580
 
        vf2.insert_record_stream(small_size_stream())
 
585
        vf2.insert_record_stream(vf.get_record_stream(
 
586
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
581
587
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
582
588
                                       'groupcompress', False)
583
589
        vf2.writer.end()
588
594
                             record._manager._block._z_content)
589
595
        self.assertEqual(8, num_records)
590
596
 
591
 
    def test_insert_record_stream_packs_on_the_fly(self):
592
 
        vf = self.make_test_vf(True, dir='source')
593
 
        def grouped_stream(revision_ids, first_parents=()):
594
 
            parents = first_parents
595
 
            for revision_id in revision_ids:
596
 
                key = (revision_id,)
597
 
                record = versionedfile.FulltextContentFactory(
598
 
                    key, parents, None,
599
 
                    'some content that is\n'
600
 
                    'identical except for\n'
601
 
                    'revision_id:%s\n' % (revision_id,))
602
 
                yield record
603
 
                parents = (key,)
604
 
        # One group, a-d
605
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
606
 
        # Second group, e-h
607
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
608
 
                                               first_parents=(('d',),)))
609
 
        # Now copy the blocks into another vf, and see that the
610
 
        # insert_record_stream rebuilt a new block on-the-fly because of
611
 
        # under-utilization
612
 
        vf2 = self.make_test_vf(True, dir='target')
613
 
        vf2.insert_record_stream(vf.get_record_stream(
614
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
615
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
616
 
                                       'groupcompress', False)
617
 
        vf2.writer.end()
618
 
        num_records = 0
619
 
        # All of the records should be recombined into a single block
620
 
        block = None
621
 
        for record in stream:
622
 
            num_records += 1
623
 
            if block is None:
624
 
                block = record._manager._block
625
 
            else:
626
 
                self.assertIs(block, record._manager._block)
627
 
        self.assertEqual(8, num_records)
628
 
 
629
597
    def test__insert_record_stream_no_reuse_block(self):
630
598
        vf = self.make_test_vf(True, dir='source')
631
599
        def grouped_stream(revision_ids, first_parents=()):
733
701
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
734
702
                              " 0 8', \(\(\('a',\),\),\)\)")
735
703
 
736
 
    def test_clear_cache(self):
737
 
        vf = self.make_source_with_b(True, 'source')
738
 
        vf.writer.end()
739
 
        for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
740
 
                                           True):
741
 
            pass
742
 
        self.assertTrue(len(vf._group_cache) > 0)
743
 
        vf.clear_cache()
744
 
        self.assertEqual(0, len(vf._group_cache))
745
 
 
746
 
 
747
 
 
748
 
class StubGCVF(object):
749
 
    def __init__(self, canned_get_blocks=None):
750
 
        self._group_cache = {}
751
 
        self._canned_get_blocks = canned_get_blocks or []
752
 
    def _get_blocks(self, read_memos):
753
 
        return iter(self._canned_get_blocks)
754
 
    
755
 
 
756
 
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
757
 
    """Simple whitebox unit tests for _BatchingBlockFetcher."""
758
 
    
759
 
    def test_add_key_new_read_memo(self):
760
 
        """Adding a key with an uncached read_memo new to this batch adds that
761
 
        read_memo to the list of memos to fetch.
762
 
        """
763
 
        # locations are: index_memo, ignored, parents, ignored
764
 
        # where index_memo is: (idx, offset, len, factory_start, factory_end)
765
 
        # and (idx, offset, size) is known as the 'read_memo', identifying the
766
 
        # raw bytes needed.
767
 
        read_memo = ('fake index', 100, 50)
768
 
        locations = {
769
 
            ('key',): (read_memo + (None, None), None, None, None)}
770
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
771
 
        total_size = batcher.add_key(('key',))
772
 
        self.assertEqual(50, total_size)
773
 
        self.assertEqual([('key',)], batcher.keys)
774
 
        self.assertEqual([read_memo], batcher.memos_to_get)
775
 
 
776
 
    def test_add_key_duplicate_read_memo(self):
777
 
        """read_memos that occur multiple times in a batch will only be fetched
778
 
        once.
779
 
        """
780
 
        read_memo = ('fake index', 100, 50)
781
 
        # Two keys, both sharing the same read memo (but different overall
782
 
        # index_memos).
783
 
        locations = {
784
 
            ('key1',): (read_memo + (0, 1), None, None, None),
785
 
            ('key2',): (read_memo + (1, 2), None, None, None)}
786
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
787
 
        total_size = batcher.add_key(('key1',))
788
 
        total_size = batcher.add_key(('key2',))
789
 
        self.assertEqual(50, total_size)
790
 
        self.assertEqual([('key1',), ('key2',)], batcher.keys)
791
 
        self.assertEqual([read_memo], batcher.memos_to_get)
792
 
 
793
 
    def test_add_key_cached_read_memo(self):
794
 
        """Adding a key with a cached read_memo will not cause that read_memo
795
 
        to be added to the list to fetch.
796
 
        """
797
 
        read_memo = ('fake index', 100, 50)
798
 
        gcvf = StubGCVF()
799
 
        gcvf._group_cache[read_memo] = 'fake block'
800
 
        locations = {
801
 
            ('key',): (read_memo + (None, None), None, None, None)}
802
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
803
 
        total_size = batcher.add_key(('key',))
804
 
        self.assertEqual(0, total_size)
805
 
        self.assertEqual([('key',)], batcher.keys)
806
 
        self.assertEqual([], batcher.memos_to_get)
807
 
 
808
 
    def test_yield_factories_empty(self):
809
 
        """An empty batch yields no factories."""
810
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
811
 
        self.assertEqual([], list(batcher.yield_factories()))
812
 
 
813
 
    def test_yield_factories_calls_get_blocks(self):
814
 
        """Uncached memos are retrieved via get_blocks."""
815
 
        read_memo1 = ('fake index', 100, 50)
816
 
        read_memo2 = ('fake index', 150, 40)
817
 
        gcvf = StubGCVF(
818
 
            canned_get_blocks=[
819
 
                (read_memo1, groupcompress.GroupCompressBlock()),
820
 
                (read_memo2, groupcompress.GroupCompressBlock())])
821
 
        locations = {
822
 
            ('key1',): (read_memo1 + (None, None), None, None, None),
823
 
            ('key2',): (read_memo2 + (None, None), None, None, None)}
824
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
825
 
        batcher.add_key(('key1',))
826
 
        batcher.add_key(('key2',))
827
 
        factories = list(batcher.yield_factories(full_flush=True))
828
 
        self.assertLength(2, factories)
829
 
        keys = [f.key for f in factories]
830
 
        kinds = [f.storage_kind for f in factories]
831
 
        self.assertEqual([('key1',), ('key2',)], keys)
832
 
        self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
833
 
 
834
 
    def test_yield_factories_flushing(self):
835
 
        """yield_factories holds back on yielding results from the final block
836
 
        unless passed full_flush=True.
837
 
        """
838
 
        fake_block = groupcompress.GroupCompressBlock()
839
 
        read_memo = ('fake index', 100, 50)
840
 
        gcvf = StubGCVF()
841
 
        gcvf._group_cache[read_memo] = fake_block
842
 
        locations = {
843
 
            ('key',): (read_memo + (None, None), None, None, None)}
844
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
845
 
        batcher.add_key(('key',))
846
 
        self.assertEqual([], list(batcher.yield_factories()))
847
 
        factories = list(batcher.yield_factories(full_flush=True))
848
 
        self.assertLength(1, factories)
849
 
        self.assertEqual(('key',), factories[0].key)
850
 
        self.assertEqual('groupcompress-block', factories[0].storage_kind)
851
 
 
852
704
 
853
705
class TestLazyGroupCompress(tests.TestCaseWithTransport):
854
706
 
855
707
    _texts = {
856
708
        ('key1',): "this is a text\n"
857
 
                   "with a reasonable amount of compressible bytes\n"
858
 
                   "which can be shared between various other texts\n",
 
709
                   "with a reasonable amount of compressible bytes\n",
859
710
        ('key2',): "another text\n"
860
 
                   "with a reasonable amount of compressible bytes\n"
861
 
                   "which can be shared between various other texts\n",
 
711
                   "with a reasonable amount of compressible bytes\n",
862
712
        ('key3',): "yet another text which won't be extracted\n"
863
 
                   "with a reasonable amount of compressible bytes\n"
864
 
                   "which can be shared between various other texts\n",
 
713
                   "with a reasonable amount of compressible bytes\n",
865
714
        ('key4',): "this will be extracted\n"
866
715
                   "but references most of its bytes from\n"
867
716
                   "yet another text which won't be extracted\n"
868
 
                   "with a reasonable amount of compressible bytes\n"
869
 
                   "which can be shared between various other texts\n",
 
717
                   "with a reasonable amount of compressible bytes\n",
870
718
    }
871
719
    def make_block(self, key_to_text):
872
720
        """Create a GroupCompressBlock, filling it with the given texts."""
884
732
        start, end = locations[key]
885
733
        manager.add_factory(key, (), start, end)
886
734
 
887
 
    def make_block_and_full_manager(self, texts):
888
 
        locations, block = self.make_block(texts)
889
 
        manager = groupcompress._LazyGroupContentManager(block)
890
 
        for key in sorted(texts):
891
 
            self.add_key_to_manager(key, locations, block, manager)
892
 
        return block, manager
893
 
 
894
735
    def test_get_fulltexts(self):
895
736
        locations, block = self.make_block(self._texts)
896
737
        manager = groupcompress._LazyGroupContentManager(block)
947
788
        header_len = int(header_len)
948
789
        block_len = int(block_len)
949
790
        self.assertEqual('groupcompress-block', storage_kind)
950
 
        self.assertEqual(34, z_header_len)
951
 
        self.assertEqual(26, header_len)
 
791
        self.assertEqual(33, z_header_len)
 
792
        self.assertEqual(25, header_len)
952
793
        self.assertEqual(len(block_bytes), block_len)
953
794
        z_header = rest[:z_header_len]
954
795
        header = zlib.decompress(z_header)
988
829
        self.assertEqual([('key1',), ('key4',)], result_order)
989
830
 
990
831
    def test__check_rebuild_no_changes(self):
991
 
        block, manager = self.make_block_and_full_manager(self._texts)
 
832
        locations, block = self.make_block(self._texts)
 
833
        manager = groupcompress._LazyGroupContentManager(block)
 
834
        # Request all the keys, which ensures that we won't rebuild
 
835
        self.add_key_to_manager(('key1',), locations, block, manager)
 
836
        self.add_key_to_manager(('key2',), locations, block, manager)
 
837
        self.add_key_to_manager(('key3',), locations, block, manager)
 
838
        self.add_key_to_manager(('key4',), locations, block, manager)
992
839
        manager._check_rebuild_block()
993
840
        self.assertIs(block, manager._block)
994
841
 
1019
866
            self.assertEqual(('key4',), record.key)
1020
867
            self.assertEqual(self._texts[record.key],
1021
868
                             record.get_bytes_as('fulltext'))
1022
 
 
1023
 
    def test_check_is_well_utilized_all_keys(self):
1024
 
        block, manager = self.make_block_and_full_manager(self._texts)
1025
 
        self.assertFalse(manager.check_is_well_utilized())
1026
 
        # Though we can fake it by changing the recommended minimum size
1027
 
        manager._full_enough_block_size = block._content_length
1028
 
        self.assertTrue(manager.check_is_well_utilized())
1029
 
        # Setting it just above causes it to fail
1030
 
        manager._full_enough_block_size = block._content_length + 1
1031
 
        self.assertFalse(manager.check_is_well_utilized())
1032
 
        # Setting the mixed-block size doesn't do anything, because the content
1033
 
        # is considered to not be 'mixed'
1034
 
        manager._full_enough_mixed_block_size = block._content_length
1035
 
        self.assertFalse(manager.check_is_well_utilized())
1036
 
 
1037
 
    def test_check_is_well_utilized_mixed_keys(self):
1038
 
        texts = {}
1039
 
        f1k1 = ('f1', 'k1')
1040
 
        f1k2 = ('f1', 'k2')
1041
 
        f2k1 = ('f2', 'k1')
1042
 
        f2k2 = ('f2', 'k2')
1043
 
        texts[f1k1] = self._texts[('key1',)]
1044
 
        texts[f1k2] = self._texts[('key2',)]
1045
 
        texts[f2k1] = self._texts[('key3',)]
1046
 
        texts[f2k2] = self._texts[('key4',)]
1047
 
        block, manager = self.make_block_and_full_manager(texts)
1048
 
        self.assertFalse(manager.check_is_well_utilized())
1049
 
        manager._full_enough_block_size = block._content_length
1050
 
        self.assertTrue(manager.check_is_well_utilized())
1051
 
        manager._full_enough_block_size = block._content_length + 1
1052
 
        self.assertFalse(manager.check_is_well_utilized())
1053
 
        manager._full_enough_mixed_block_size = block._content_length
1054
 
        self.assertTrue(manager.check_is_well_utilized())
1055
 
 
1056
 
    def test_check_is_well_utilized_partial_use(self):
1057
 
        locations, block = self.make_block(self._texts)
1058
 
        manager = groupcompress._LazyGroupContentManager(block)
1059
 
        manager._full_enough_block_size = block._content_length
1060
 
        self.add_key_to_manager(('key1',), locations, block, manager)
1061
 
        self.add_key_to_manager(('key2',), locations, block, manager)
1062
 
        # Just using the content from key1 and 2 is not enough to be considered
1063
 
        # 'complete'
1064
 
        self.assertFalse(manager.check_is_well_utilized())
1065
 
        # However if we add key3, then we have enough, as we only require 75%
1066
 
        # consumption
1067
 
        self.add_key_to_manager(('key4',), locations, block, manager)
1068
 
        self.assertTrue(manager.check_is_well_utilized())