~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

  • Committer: Martin Pool
  • Date: 2009-09-14 01:48:28 UTC
  • mfrom: (4685 +trunk)
  • mto: This revision was merged to the branch mainline in revision 4688.
  • Revision ID: mbp@sourcefrog.net-20090914014828-ydr9rlkdfq2sv57z
Merge news

Show diffs side-by-side

added added

removed removed

Lines of Context:
538
538
                    'as-requested', False)]
539
539
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
540
540
 
541
 
    def test_insert_record_stream_re_uses_blocks(self):
 
541
    def test_insert_record_stream_reuses_blocks(self):
542
542
        vf = self.make_test_vf(True, dir='source')
543
543
        def grouped_stream(revision_ids, first_parents=()):
544
544
            parents = first_parents
582
582
        vf2 = self.make_test_vf(True, dir='target')
583
583
        # ordering in 'groupcompress' order, should actually swap the groups in
584
584
        # the target vf, but the groups themselves should not be disturbed.
585
 
        vf2.insert_record_stream(vf.get_record_stream(
586
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
 
585
        def small_size_stream():
 
586
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
 
587
                                               'groupcompress', False):
 
588
                record._manager._full_enough_block_size = \
 
589
                    record._manager._block._content_length
 
590
                yield record
 
591
                        
 
592
        vf2.insert_record_stream(small_size_stream())
587
593
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
588
594
                                       'groupcompress', False)
589
595
        vf2.writer.end()
594
600
                             record._manager._block._z_content)
595
601
        self.assertEqual(8, num_records)
596
602
 
 
603
    def test_insert_record_stream_packs_on_the_fly(self):
 
604
        vf = self.make_test_vf(True, dir='source')
 
605
        def grouped_stream(revision_ids, first_parents=()):
 
606
            parents = first_parents
 
607
            for revision_id in revision_ids:
 
608
                key = (revision_id,)
 
609
                record = versionedfile.FulltextContentFactory(
 
610
                    key, parents, None,
 
611
                    'some content that is\n'
 
612
                    'identical except for\n'
 
613
                    'revision_id:%s\n' % (revision_id,))
 
614
                yield record
 
615
                parents = (key,)
 
616
        # One group, a-d
 
617
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
618
        # Second group, e-h
 
619
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
620
                                               first_parents=(('d',),)))
 
621
        # Now copy the blocks into another vf, and see that the
 
622
        # insert_record_stream rebuilt a new block on-the-fly because of
 
623
        # under-utilization
 
624
        vf2 = self.make_test_vf(True, dir='target')
 
625
        vf2.insert_record_stream(vf.get_record_stream(
 
626
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
 
627
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
628
                                       'groupcompress', False)
 
629
        vf2.writer.end()
 
630
        num_records = 0
 
631
        # All of the records should be recombined into a single block
 
632
        block = None
 
633
        for record in stream:
 
634
            num_records += 1
 
635
            if block is None:
 
636
                block = record._manager._block
 
637
            else:
 
638
                self.assertIs(block, record._manager._block)
 
639
        self.assertEqual(8, num_records)
 
640
 
597
641
    def test__insert_record_stream_no_reuse_block(self):
598
642
        vf = self.make_test_vf(True, dir='source')
599
643
        def grouped_stream(revision_ids, first_parents=()):
702
746
                              " 0 8', \(\(\('a',\),\),\)\)")
703
747
 
704
748
 
 
749
class StubGCVF(object):
 
750
    def __init__(self, canned_get_blocks=None):
 
751
        self._group_cache = {}
 
752
        self._canned_get_blocks = canned_get_blocks or []
 
753
    def _get_blocks(self, read_memos):
 
754
        return iter(self._canned_get_blocks)
 
755
    
 
756
 
 
757
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
 
758
    """Simple whitebox unit tests for _BatchingBlockFetcher."""
 
759
    
 
760
    def test_add_key_new_read_memo(self):
 
761
        """Adding a key with an uncached read_memo new to this batch adds that
 
762
        read_memo to the list of memos to fetch.
 
763
        """
 
764
        # locations are: index_memo, ignored, parents, ignored
 
765
        # where index_memo is: (idx, offset, len, factory_start, factory_end)
 
766
        # and (idx, offset, size) is known as the 'read_memo', identifying the
 
767
        # raw bytes needed.
 
768
        read_memo = ('fake index', 100, 50)
 
769
        locations = {
 
770
            ('key',): (read_memo + (None, None), None, None, None)}
 
771
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
 
772
        total_size = batcher.add_key(('key',))
 
773
        self.assertEqual(50, total_size)
 
774
        self.assertEqual([('key',)], batcher.keys)
 
775
        self.assertEqual([read_memo], batcher.memos_to_get)
 
776
 
 
777
    def test_add_key_duplicate_read_memo(self):
 
778
        """read_memos that occur multiple times in a batch will only be fetched
 
779
        once.
 
780
        """
 
781
        read_memo = ('fake index', 100, 50)
 
782
        # Two keys, both sharing the same read memo (but different overall
 
783
        # index_memos).
 
784
        locations = {
 
785
            ('key1',): (read_memo + (0, 1), None, None, None),
 
786
            ('key2',): (read_memo + (1, 2), None, None, None)}
 
787
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
 
788
        total_size = batcher.add_key(('key1',))
 
789
        total_size = batcher.add_key(('key2',))
 
790
        self.assertEqual(50, total_size)
 
791
        self.assertEqual([('key1',), ('key2',)], batcher.keys)
 
792
        self.assertEqual([read_memo], batcher.memos_to_get)
 
793
 
 
794
    def test_add_key_cached_read_memo(self):
 
795
        """Adding a key with a cached read_memo will not cause that read_memo
 
796
        to be added to the list to fetch.
 
797
        """
 
798
        read_memo = ('fake index', 100, 50)
 
799
        gcvf = StubGCVF()
 
800
        gcvf._group_cache[read_memo] = 'fake block'
 
801
        locations = {
 
802
            ('key',): (read_memo + (None, None), None, None, None)}
 
803
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
 
804
        total_size = batcher.add_key(('key',))
 
805
        self.assertEqual(0, total_size)
 
806
        self.assertEqual([('key',)], batcher.keys)
 
807
        self.assertEqual([], batcher.memos_to_get)
 
808
 
 
809
    def test_yield_factories_empty(self):
 
810
        """An empty batch yields no factories."""
 
811
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
 
812
        self.assertEqual([], list(batcher.yield_factories()))
 
813
 
 
814
    def test_yield_factories_calls_get_blocks(self):
 
815
        """Uncached memos are retrieved via get_blocks."""
 
816
        read_memo1 = ('fake index', 100, 50)
 
817
        read_memo2 = ('fake index', 150, 40)
 
818
        gcvf = StubGCVF(
 
819
            canned_get_blocks=[
 
820
                (read_memo1, groupcompress.GroupCompressBlock()),
 
821
                (read_memo2, groupcompress.GroupCompressBlock())])
 
822
        locations = {
 
823
            ('key1',): (read_memo1 + (None, None), None, None, None),
 
824
            ('key2',): (read_memo2 + (None, None), None, None, None)}
 
825
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
 
826
        batcher.add_key(('key1',))
 
827
        batcher.add_key(('key2',))
 
828
        factories = list(batcher.yield_factories(full_flush=True))
 
829
        self.assertLength(2, factories)
 
830
        keys = [f.key for f in factories]
 
831
        kinds = [f.storage_kind for f in factories]
 
832
        self.assertEqual([('key1',), ('key2',)], keys)
 
833
        self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
 
834
 
 
835
    def test_yield_factories_flushing(self):
 
836
        """yield_factories holds back on yielding results from the final block
 
837
        unless passed full_flush=True.
 
838
        """
 
839
        fake_block = groupcompress.GroupCompressBlock()
 
840
        read_memo = ('fake index', 100, 50)
 
841
        gcvf = StubGCVF()
 
842
        gcvf._group_cache[read_memo] = fake_block
 
843
        locations = {
 
844
            ('key',): (read_memo + (None, None), None, None, None)}
 
845
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
 
846
        batcher.add_key(('key',))
 
847
        self.assertEqual([], list(batcher.yield_factories()))
 
848
        factories = list(batcher.yield_factories(full_flush=True))
 
849
        self.assertLength(1, factories)
 
850
        self.assertEqual(('key',), factories[0].key)
 
851
        self.assertEqual('groupcompress-block', factories[0].storage_kind)
 
852
 
 
853
 
705
854
class TestLazyGroupCompress(tests.TestCaseWithTransport):
706
855
 
707
856
    _texts = {
708
857
        ('key1',): "this is a text\n"
709
 
                   "with a reasonable amount of compressible bytes\n",
 
858
                   "with a reasonable amount of compressible bytes\n"
 
859
                   "which can be shared between various other texts\n",
710
860
        ('key2',): "another text\n"
711
 
                   "with a reasonable amount of compressible bytes\n",
 
861
                   "with a reasonable amount of compressible bytes\n"
 
862
                   "which can be shared between various other texts\n",
712
863
        ('key3',): "yet another text which won't be extracted\n"
713
 
                   "with a reasonable amount of compressible bytes\n",
 
864
                   "with a reasonable amount of compressible bytes\n"
 
865
                   "which can be shared between various other texts\n",
714
866
        ('key4',): "this will be extracted\n"
715
867
                   "but references most of its bytes from\n"
716
868
                   "yet another text which won't be extracted\n"
717
 
                   "with a reasonable amount of compressible bytes\n",
 
869
                   "with a reasonable amount of compressible bytes\n"
 
870
                   "which can be shared between various other texts\n",
718
871
    }
719
872
    def make_block(self, key_to_text):
720
873
        """Create a GroupCompressBlock, filling it with the given texts."""
732
885
        start, end = locations[key]
733
886
        manager.add_factory(key, (), start, end)
734
887
 
 
888
    def make_block_and_full_manager(self, texts):
 
889
        locations, block = self.make_block(texts)
 
890
        manager = groupcompress._LazyGroupContentManager(block)
 
891
        for key in sorted(texts):
 
892
            self.add_key_to_manager(key, locations, block, manager)
 
893
        return block, manager
 
894
 
735
895
    def test_get_fulltexts(self):
736
896
        locations, block = self.make_block(self._texts)
737
897
        manager = groupcompress._LazyGroupContentManager(block)
788
948
        header_len = int(header_len)
789
949
        block_len = int(block_len)
790
950
        self.assertEqual('groupcompress-block', storage_kind)
791
 
        self.assertEqual(33, z_header_len)
792
 
        self.assertEqual(25, header_len)
 
951
        self.assertEqual(34, z_header_len)
 
952
        self.assertEqual(26, header_len)
793
953
        self.assertEqual(len(block_bytes), block_len)
794
954
        z_header = rest[:z_header_len]
795
955
        header = zlib.decompress(z_header)
829
989
        self.assertEqual([('key1',), ('key4',)], result_order)
830
990
 
831
991
    def test__check_rebuild_no_changes(self):
832
 
        locations, block = self.make_block(self._texts)
833
 
        manager = groupcompress._LazyGroupContentManager(block)
834
 
        # Request all the keys, which ensures that we won't rebuild
835
 
        self.add_key_to_manager(('key1',), locations, block, manager)
836
 
        self.add_key_to_manager(('key2',), locations, block, manager)
837
 
        self.add_key_to_manager(('key3',), locations, block, manager)
838
 
        self.add_key_to_manager(('key4',), locations, block, manager)
 
992
        block, manager = self.make_block_and_full_manager(self._texts)
839
993
        manager._check_rebuild_block()
840
994
        self.assertIs(block, manager._block)
841
995
 
866
1020
            self.assertEqual(('key4',), record.key)
867
1021
            self.assertEqual(self._texts[record.key],
868
1022
                             record.get_bytes_as('fulltext'))
 
1023
 
 
1024
    def test_check_is_well_utilized_all_keys(self):
 
1025
        block, manager = self.make_block_and_full_manager(self._texts)
 
1026
        self.assertFalse(manager.check_is_well_utilized())
 
1027
        # Though we can fake it by changing the recommended minimum size
 
1028
        manager._full_enough_block_size = block._content_length
 
1029
        self.assertTrue(manager.check_is_well_utilized())
 
1030
        # Setting it just above causes it to fail
 
1031
        manager._full_enough_block_size = block._content_length + 1
 
1032
        self.assertFalse(manager.check_is_well_utilized())
 
1033
        # Setting the mixed-block size doesn't do anything, because the content
 
1034
        # is considered to not be 'mixed'
 
1035
        manager._full_enough_mixed_block_size = block._content_length
 
1036
        self.assertFalse(manager.check_is_well_utilized())
 
1037
 
 
1038
    def test_check_is_well_utilized_mixed_keys(self):
 
1039
        texts = {}
 
1040
        f1k1 = ('f1', 'k1')
 
1041
        f1k2 = ('f1', 'k2')
 
1042
        f2k1 = ('f2', 'k1')
 
1043
        f2k2 = ('f2', 'k2')
 
1044
        texts[f1k1] = self._texts[('key1',)]
 
1045
        texts[f1k2] = self._texts[('key2',)]
 
1046
        texts[f2k1] = self._texts[('key3',)]
 
1047
        texts[f2k2] = self._texts[('key4',)]
 
1048
        block, manager = self.make_block_and_full_manager(texts)
 
1049
        self.assertFalse(manager.check_is_well_utilized())
 
1050
        manager._full_enough_block_size = block._content_length
 
1051
        self.assertTrue(manager.check_is_well_utilized())
 
1052
        manager._full_enough_block_size = block._content_length + 1
 
1053
        self.assertFalse(manager.check_is_well_utilized())
 
1054
        manager._full_enough_mixed_block_size = block._content_length
 
1055
        self.assertTrue(manager.check_is_well_utilized())
 
1056
 
 
1057
    def test_check_is_well_utilized_partial_use(self):
 
1058
        locations, block = self.make_block(self._texts)
 
1059
        manager = groupcompress._LazyGroupContentManager(block)
 
1060
        manager._full_enough_block_size = block._content_length
 
1061
        self.add_key_to_manager(('key1',), locations, block, manager)
 
1062
        self.add_key_to_manager(('key2',), locations, block, manager)
 
1063
        # Just using the content from key1 and 2 is not enough to be considered
 
1064
        # 'complete'
 
1065
        self.assertFalse(manager.check_is_well_utilized())
 
1066
        # However if we add key3, then we have enough, as we only require 75%
 
1067
        # consumption
 
1068
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1069
        self.assertTrue(manager.check_is_well_utilized())