~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

  • Committer: Sidnei da Silva
  • Date: 2009-05-29 14:19:29 UTC
  • mto: (4531.1.1 integration)
  • mto: This revision was merged to the branch mainline in revision 4532.
  • Revision ID: sidnei.da.silva@canonical.com-20090529141929-3heywbvj36po72a5
- Add initial config

Show diffs side-by-side

added added

removed removed

Lines of Context:
25
25
    index as _mod_index,
26
26
    osutils,
27
27
    tests,
28
 
    trace,
29
28
    versionedfile,
30
29
    )
31
30
from bzrlib.osutils import sha_string
32
 
from bzrlib.tests.test__groupcompress import compiled_groupcompress_feature
 
31
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
33
32
 
34
33
 
35
34
def load_tests(standard_tests, module, loader):
39
38
    scenarios = [
40
39
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
41
40
        ]
42
 
    if compiled_groupcompress_feature.available():
 
41
    if CompiledGroupCompressFeature.available():
43
42
        scenarios.append(('C',
44
43
            {'compressor': groupcompress.PyrexGroupCompressor}))
45
44
    return tests.multiply_tests(to_adapt, scenarios, result)
135
134
 
136
135
class TestPyrexGroupCompressor(TestGroupCompressor):
137
136
 
138
 
    _test_needs_features = [compiled_groupcompress_feature]
 
137
    _test_needs_features = [CompiledGroupCompressFeature]
139
138
    compressor = groupcompress.PyrexGroupCompressor
140
139
 
141
140
    def test_stats(self):
364
363
        raw_bytes = zlib.decompress(remaining_bytes)
365
364
        self.assertEqual(content, raw_bytes)
366
365
 
367
 
        # we should get the same results if using the chunked version
368
 
        gcb = groupcompress.GroupCompressBlock()
369
 
        gcb.set_chunked_content(['this is some content\n'
370
 
                                 'this content will be compressed\n'],
371
 
                                 len(content))
372
 
        old_bytes = bytes
373
 
        bytes = gcb.to_bytes()
374
 
        self.assertEqual(old_bytes, bytes)
375
 
 
376
366
    def test_partial_decomp(self):
377
367
        content_chunks = []
378
368
        # We need a sufficient amount of data so that zlib.decompress has
418
408
        # And the decompressor is finalized
419
409
        self.assertIs(None, block._z_content_decompressor)
420
410
 
421
 
    def test__ensure_all_content(self):
 
411
    def test_partial_decomp_no_known_length(self):
422
412
        content_chunks = []
423
 
        # We need a sufficient amount of data so that zlib.decompress has
424
 
        # partial decompression to work with. Most auto-generated data
425
 
        # compresses a bit too well, we want a combination, so we combine a sha
426
 
        # hash with compressible data.
427
413
        for i in xrange(2048):
428
414
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
429
415
            content_chunks.append(next_content)
437
423
        block._z_content = z_content
438
424
        block._z_content_length = len(z_content)
439
425
        block._compressor_name = 'zlib'
440
 
        block._content_length = 158634
 
426
        block._content_length = None # Don't tell the decompressed length
441
427
        self.assertIs(None, block._content)
442
 
        # The first _ensure_content got all of the required data
443
 
        block._ensure_content(158634)
 
428
        block._ensure_content(100)
 
429
        self.assertIsNot(None, block._content)
 
430
        # We have decompressed at least 100 bytes
 
431
        self.assertTrue(len(block._content) >= 100)
 
432
        # We have not decompressed the whole content
 
433
        self.assertTrue(len(block._content) < 158634)
 
434
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
435
        # ensuring content that we already have shouldn't cause any more data
 
436
        # to be extracted
 
437
        cur_len = len(block._content)
 
438
        block._ensure_content(cur_len - 10)
 
439
        self.assertEqual(cur_len, len(block._content))
 
440
        # Now we want a bit more content
 
441
        cur_len += 10
 
442
        block._ensure_content(cur_len)
 
443
        self.assertTrue(len(block._content) >= cur_len)
 
444
        self.assertTrue(len(block._content) < 158634)
 
445
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
446
        # And now lets finish
 
447
        block._ensure_content()
444
448
        self.assertEqualDiff(content, block._content)
445
 
        # And we should have released the _z_content_decompressor since it was
446
 
        # fully consumed
 
449
        # And the decompressor is finalized
447
450
        self.assertIs(None, block._z_content_decompressor)
448
451
 
449
452
    def test__dump(self):
459
462
                         ], block._dump())
460
463
 
461
464
 
462
 
class TestCaseWithGroupCompressVersionedFiles(
463
 
        tests.TestCaseWithMemoryTransport):
 
465
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
464
466
 
465
467
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
466
 
                     dir='.', inconsistency_fatal=True):
 
468
                     dir='.'):
467
469
        t = self.get_transport(dir)
468
470
        t.ensure_base()
469
471
        vf = groupcompress.make_pack_factory(graph=create_graph,
470
 
            delta=False, keylength=keylength,
471
 
            inconsistency_fatal=inconsistency_fatal)(t)
 
472
            delta=False, keylength=keylength)(t)
472
473
        if do_cleanup:
473
474
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
474
475
        return vf
526
527
                    'as-requested', False)]
527
528
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
528
529
 
529
 
    def test_insert_record_stream_reuses_blocks(self):
 
530
    def test_insert_record_stream_re_uses_blocks(self):
530
531
        vf = self.make_test_vf(True, dir='source')
531
532
        def grouped_stream(revision_ids, first_parents=()):
532
533
            parents = first_parents
570
571
        vf2 = self.make_test_vf(True, dir='target')
571
572
        # ordering in 'groupcompress' order, should actually swap the groups in
572
573
        # the target vf, but the groups themselves should not be disturbed.
573
 
        def small_size_stream():
574
 
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
575
 
                                               'groupcompress', False):
576
 
                record._manager._full_enough_block_size = \
577
 
                    record._manager._block._content_length
578
 
                yield record
579
 
                        
580
 
        vf2.insert_record_stream(small_size_stream())
 
574
        vf2.insert_record_stream(vf.get_record_stream(
 
575
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
581
576
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
582
577
                                       'groupcompress', False)
583
578
        vf2.writer.end()
588
583
                             record._manager._block._z_content)
589
584
        self.assertEqual(8, num_records)
590
585
 
591
 
    def test_insert_record_stream_packs_on_the_fly(self):
592
 
        vf = self.make_test_vf(True, dir='source')
593
 
        def grouped_stream(revision_ids, first_parents=()):
594
 
            parents = first_parents
595
 
            for revision_id in revision_ids:
596
 
                key = (revision_id,)
597
 
                record = versionedfile.FulltextContentFactory(
598
 
                    key, parents, None,
599
 
                    'some content that is\n'
600
 
                    'identical except for\n'
601
 
                    'revision_id:%s\n' % (revision_id,))
602
 
                yield record
603
 
                parents = (key,)
604
 
        # One group, a-d
605
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
606
 
        # Second group, e-h
607
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
608
 
                                               first_parents=(('d',),)))
609
 
        # Now copy the blocks into another vf, and see that the
610
 
        # insert_record_stream rebuilt a new block on-the-fly because of
611
 
        # under-utilization
612
 
        vf2 = self.make_test_vf(True, dir='target')
613
 
        vf2.insert_record_stream(vf.get_record_stream(
614
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
615
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
616
 
                                       'groupcompress', False)
617
 
        vf2.writer.end()
618
 
        num_records = 0
619
 
        # All of the records should be recombined into a single block
620
 
        block = None
621
 
        for record in stream:
622
 
            num_records += 1
623
 
            if block is None:
624
 
                block = record._manager._block
625
 
            else:
626
 
                self.assertIs(block, record._manager._block)
627
 
        self.assertEqual(8, num_records)
628
 
 
629
586
    def test__insert_record_stream_no_reuse_block(self):
630
587
        vf = self.make_test_vf(True, dir='source')
631
588
        def grouped_stream(revision_ids, first_parents=()):
692
649
            frozenset([('parent-1',), ('parent-2',)]),
693
650
            index.get_missing_parents())
694
651
 
695
 
    def make_source_with_b(self, a_parent, path):
696
 
        source = self.make_test_vf(True, dir=path)
697
 
        source.add_lines(('a',), (), ['lines\n'])
698
 
        if a_parent:
699
 
            b_parents = (('a',),)
700
 
        else:
701
 
            b_parents = ()
702
 
        source.add_lines(('b',), b_parents, ['lines\n'])
703
 
        return source
704
 
 
705
 
    def do_inconsistent_inserts(self, inconsistency_fatal):
706
 
        target = self.make_test_vf(True, dir='target',
707
 
                                   inconsistency_fatal=inconsistency_fatal)
708
 
        for x in range(2):
709
 
            source = self.make_source_with_b(x==1, 'source%s' % x)
710
 
            target.insert_record_stream(source.get_record_stream(
711
 
                [('b',)], 'unordered', False))
712
 
 
713
 
    def test_inconsistent_redundant_inserts_warn(self):
714
 
        """Should not insert a record that is already present."""
715
 
        warnings = []
716
 
        def warning(template, args):
717
 
            warnings.append(template % args)
718
 
        _trace_warning = trace.warning
719
 
        trace.warning = warning
720
 
        try:
721
 
            self.do_inconsistent_inserts(inconsistency_fatal=False)
722
 
        finally:
723
 
            trace.warning = _trace_warning
724
 
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
725
 
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
726
 
                         warnings)
727
 
 
728
 
    def test_inconsistent_redundant_inserts_raises(self):
729
 
        e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
730
 
                              inconsistency_fatal=True)
731
 
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
732
 
                              " in add_records:"
733
 
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
734
 
                              " 0 8', \(\(\('a',\),\),\)\)")
735
 
 
736
 
    def test_clear_cache(self):
737
 
        vf = self.make_source_with_b(True, 'source')
738
 
        vf.writer.end()
739
 
        for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
740
 
                                           True):
741
 
            pass
742
 
        self.assertTrue(len(vf._group_cache) > 0)
743
 
        vf.clear_cache()
744
 
        self.assertEqual(0, len(vf._group_cache))
745
 
 
746
 
 
747
 
 
748
 
class StubGCVF(object):
749
 
    def __init__(self, canned_get_blocks=None):
750
 
        self._group_cache = {}
751
 
        self._canned_get_blocks = canned_get_blocks or []
752
 
    def _get_blocks(self, read_memos):
753
 
        return iter(self._canned_get_blocks)
754
 
    
755
 
 
756
 
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
757
 
    """Simple whitebox unit tests for _BatchingBlockFetcher."""
758
 
    
759
 
    def test_add_key_new_read_memo(self):
760
 
        """Adding a key with an uncached read_memo new to this batch adds that
761
 
        read_memo to the list of memos to fetch.
762
 
        """
763
 
        # locations are: index_memo, ignored, parents, ignored
764
 
        # where index_memo is: (idx, offset, len, factory_start, factory_end)
765
 
        # and (idx, offset, size) is known as the 'read_memo', identifying the
766
 
        # raw bytes needed.
767
 
        read_memo = ('fake index', 100, 50)
768
 
        locations = {
769
 
            ('key',): (read_memo + (None, None), None, None, None)}
770
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
771
 
        total_size = batcher.add_key(('key',))
772
 
        self.assertEqual(50, total_size)
773
 
        self.assertEqual([('key',)], batcher.keys)
774
 
        self.assertEqual([read_memo], batcher.memos_to_get)
775
 
 
776
 
    def test_add_key_duplicate_read_memo(self):
777
 
        """read_memos that occur multiple times in a batch will only be fetched
778
 
        once.
779
 
        """
780
 
        read_memo = ('fake index', 100, 50)
781
 
        # Two keys, both sharing the same read memo (but different overall
782
 
        # index_memos).
783
 
        locations = {
784
 
            ('key1',): (read_memo + (0, 1), None, None, None),
785
 
            ('key2',): (read_memo + (1, 2), None, None, None)}
786
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
787
 
        total_size = batcher.add_key(('key1',))
788
 
        total_size = batcher.add_key(('key2',))
789
 
        self.assertEqual(50, total_size)
790
 
        self.assertEqual([('key1',), ('key2',)], batcher.keys)
791
 
        self.assertEqual([read_memo], batcher.memos_to_get)
792
 
 
793
 
    def test_add_key_cached_read_memo(self):
794
 
        """Adding a key with a cached read_memo will not cause that read_memo
795
 
        to be added to the list to fetch.
796
 
        """
797
 
        read_memo = ('fake index', 100, 50)
798
 
        gcvf = StubGCVF()
799
 
        gcvf._group_cache[read_memo] = 'fake block'
800
 
        locations = {
801
 
            ('key',): (read_memo + (None, None), None, None, None)}
802
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
803
 
        total_size = batcher.add_key(('key',))
804
 
        self.assertEqual(0, total_size)
805
 
        self.assertEqual([('key',)], batcher.keys)
806
 
        self.assertEqual([], batcher.memos_to_get)
807
 
 
808
 
    def test_yield_factories_empty(self):
809
 
        """An empty batch yields no factories."""
810
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
811
 
        self.assertEqual([], list(batcher.yield_factories()))
812
 
 
813
 
    def test_yield_factories_calls_get_blocks(self):
814
 
        """Uncached memos are retrieved via get_blocks."""
815
 
        read_memo1 = ('fake index', 100, 50)
816
 
        read_memo2 = ('fake index', 150, 40)
817
 
        gcvf = StubGCVF(
818
 
            canned_get_blocks=[
819
 
                (read_memo1, groupcompress.GroupCompressBlock()),
820
 
                (read_memo2, groupcompress.GroupCompressBlock())])
821
 
        locations = {
822
 
            ('key1',): (read_memo1 + (None, None), None, None, None),
823
 
            ('key2',): (read_memo2 + (None, None), None, None, None)}
824
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
825
 
        batcher.add_key(('key1',))
826
 
        batcher.add_key(('key2',))
827
 
        factories = list(batcher.yield_factories(full_flush=True))
828
 
        self.assertLength(2, factories)
829
 
        keys = [f.key for f in factories]
830
 
        kinds = [f.storage_kind for f in factories]
831
 
        self.assertEqual([('key1',), ('key2',)], keys)
832
 
        self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
833
 
 
834
 
    def test_yield_factories_flushing(self):
835
 
        """yield_factories holds back on yielding results from the final block
836
 
        unless passed full_flush=True.
837
 
        """
838
 
        fake_block = groupcompress.GroupCompressBlock()
839
 
        read_memo = ('fake index', 100, 50)
840
 
        gcvf = StubGCVF()
841
 
        gcvf._group_cache[read_memo] = fake_block
842
 
        locations = {
843
 
            ('key',): (read_memo + (None, None), None, None, None)}
844
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
845
 
        batcher.add_key(('key',))
846
 
        self.assertEqual([], list(batcher.yield_factories()))
847
 
        factories = list(batcher.yield_factories(full_flush=True))
848
 
        self.assertLength(1, factories)
849
 
        self.assertEqual(('key',), factories[0].key)
850
 
        self.assertEqual('groupcompress-block', factories[0].storage_kind)
851
 
 
852
652
 
853
653
class TestLazyGroupCompress(tests.TestCaseWithTransport):
854
654
 
855
655
    _texts = {
856
656
        ('key1',): "this is a text\n"
857
 
                   "with a reasonable amount of compressible bytes\n"
858
 
                   "which can be shared between various other texts\n",
 
657
                   "with a reasonable amount of compressible bytes\n",
859
658
        ('key2',): "another text\n"
860
 
                   "with a reasonable amount of compressible bytes\n"
861
 
                   "which can be shared between various other texts\n",
 
659
                   "with a reasonable amount of compressible bytes\n",
862
660
        ('key3',): "yet another text which won't be extracted\n"
863
 
                   "with a reasonable amount of compressible bytes\n"
864
 
                   "which can be shared between various other texts\n",
 
661
                   "with a reasonable amount of compressible bytes\n",
865
662
        ('key4',): "this will be extracted\n"
866
663
                   "but references most of its bytes from\n"
867
664
                   "yet another text which won't be extracted\n"
868
 
                   "with a reasonable amount of compressible bytes\n"
869
 
                   "which can be shared between various other texts\n",
 
665
                   "with a reasonable amount of compressible bytes\n",
870
666
    }
871
667
    def make_block(self, key_to_text):
872
668
        """Create a GroupCompressBlock, filling it with the given texts."""
884
680
        start, end = locations[key]
885
681
        manager.add_factory(key, (), start, end)
886
682
 
887
 
    def make_block_and_full_manager(self, texts):
888
 
        locations, block = self.make_block(texts)
889
 
        manager = groupcompress._LazyGroupContentManager(block)
890
 
        for key in sorted(texts):
891
 
            self.add_key_to_manager(key, locations, block, manager)
892
 
        return block, manager
893
 
 
894
683
    def test_get_fulltexts(self):
895
684
        locations, block = self.make_block(self._texts)
896
685
        manager = groupcompress._LazyGroupContentManager(block)
947
736
        header_len = int(header_len)
948
737
        block_len = int(block_len)
949
738
        self.assertEqual('groupcompress-block', storage_kind)
950
 
        self.assertEqual(34, z_header_len)
951
 
        self.assertEqual(26, header_len)
 
739
        self.assertEqual(33, z_header_len)
 
740
        self.assertEqual(25, header_len)
952
741
        self.assertEqual(len(block_bytes), block_len)
953
742
        z_header = rest[:z_header_len]
954
743
        header = zlib.decompress(z_header)
988
777
        self.assertEqual([('key1',), ('key4',)], result_order)
989
778
 
990
779
    def test__check_rebuild_no_changes(self):
991
 
        block, manager = self.make_block_and_full_manager(self._texts)
 
780
        locations, block = self.make_block(self._texts)
 
781
        manager = groupcompress._LazyGroupContentManager(block)
 
782
        # Request all the keys, which ensures that we won't rebuild
 
783
        self.add_key_to_manager(('key1',), locations, block, manager)
 
784
        self.add_key_to_manager(('key2',), locations, block, manager)
 
785
        self.add_key_to_manager(('key3',), locations, block, manager)
 
786
        self.add_key_to_manager(('key4',), locations, block, manager)
992
787
        manager._check_rebuild_block()
993
788
        self.assertIs(block, manager._block)
994
789
 
1019
814
            self.assertEqual(('key4',), record.key)
1020
815
            self.assertEqual(self._texts[record.key],
1021
816
                             record.get_bytes_as('fulltext'))
1022
 
 
1023
 
    def test_check_is_well_utilized_all_keys(self):
1024
 
        block, manager = self.make_block_and_full_manager(self._texts)
1025
 
        self.assertFalse(manager.check_is_well_utilized())
1026
 
        # Though we can fake it by changing the recommended minimum size
1027
 
        manager._full_enough_block_size = block._content_length
1028
 
        self.assertTrue(manager.check_is_well_utilized())
1029
 
        # Setting it just above causes it to fail
1030
 
        manager._full_enough_block_size = block._content_length + 1
1031
 
        self.assertFalse(manager.check_is_well_utilized())
1032
 
        # Setting the mixed-block size doesn't do anything, because the content
1033
 
        # is considered to not be 'mixed'
1034
 
        manager._full_enough_mixed_block_size = block._content_length
1035
 
        self.assertFalse(manager.check_is_well_utilized())
1036
 
 
1037
 
    def test_check_is_well_utilized_mixed_keys(self):
1038
 
        texts = {}
1039
 
        f1k1 = ('f1', 'k1')
1040
 
        f1k2 = ('f1', 'k2')
1041
 
        f2k1 = ('f2', 'k1')
1042
 
        f2k2 = ('f2', 'k2')
1043
 
        texts[f1k1] = self._texts[('key1',)]
1044
 
        texts[f1k2] = self._texts[('key2',)]
1045
 
        texts[f2k1] = self._texts[('key3',)]
1046
 
        texts[f2k2] = self._texts[('key4',)]
1047
 
        block, manager = self.make_block_and_full_manager(texts)
1048
 
        self.assertFalse(manager.check_is_well_utilized())
1049
 
        manager._full_enough_block_size = block._content_length
1050
 
        self.assertTrue(manager.check_is_well_utilized())
1051
 
        manager._full_enough_block_size = block._content_length + 1
1052
 
        self.assertFalse(manager.check_is_well_utilized())
1053
 
        manager._full_enough_mixed_block_size = block._content_length
1054
 
        self.assertTrue(manager.check_is_well_utilized())
1055
 
 
1056
 
    def test_check_is_well_utilized_partial_use(self):
1057
 
        locations, block = self.make_block(self._texts)
1058
 
        manager = groupcompress._LazyGroupContentManager(block)
1059
 
        manager._full_enough_block_size = block._content_length
1060
 
        self.add_key_to_manager(('key1',), locations, block, manager)
1061
 
        self.add_key_to_manager(('key2',), locations, block, manager)
1062
 
        # Just using the content from key1 and 2 is not enough to be considered
1063
 
        # 'complete'
1064
 
        self.assertFalse(manager.check_is_well_utilized())
1065
 
        # However if we add key3, then we have enough, as we only require 75%
1066
 
        # consumption
1067
 
        self.add_key_to_manager(('key4',), locations, block, manager)
1068
 
        self.assertTrue(manager.check_is_well_utilized())