~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

merge 2.0 trunk (rev 4660)

Show diffs side-by-side

added added

removed removed

Lines of Context:
538
538
                    'as-requested', False)]
539
539
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
540
540
 
541
 
    def test_insert_record_stream_re_uses_blocks(self):
 
541
    def test_insert_record_stream_reuses_blocks(self):
542
542
        vf = self.make_test_vf(True, dir='source')
543
543
        def grouped_stream(revision_ids, first_parents=()):
544
544
            parents = first_parents
582
582
        vf2 = self.make_test_vf(True, dir='target')
583
583
        # ordering in 'groupcompress' order, should actually swap the groups in
584
584
        # the target vf, but the groups themselves should not be disturbed.
585
 
        vf2.insert_record_stream(vf.get_record_stream(
586
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
 
585
        def small_size_stream():
 
586
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
 
587
                                               'groupcompress', False):
 
588
                record._manager._full_enough_block_size = \
 
589
                    record._manager._block._content_length
 
590
                yield record
 
591
                        
 
592
        vf2.insert_record_stream(small_size_stream())
587
593
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
588
594
                                       'groupcompress', False)
589
595
        vf2.writer.end()
594
600
                             record._manager._block._z_content)
595
601
        self.assertEqual(8, num_records)
596
602
 
 
603
    def test_insert_record_stream_packs_on_the_fly(self):
 
604
        vf = self.make_test_vf(True, dir='source')
 
605
        def grouped_stream(revision_ids, first_parents=()):
 
606
            parents = first_parents
 
607
            for revision_id in revision_ids:
 
608
                key = (revision_id,)
 
609
                record = versionedfile.FulltextContentFactory(
 
610
                    key, parents, None,
 
611
                    'some content that is\n'
 
612
                    'identical except for\n'
 
613
                    'revision_id:%s\n' % (revision_id,))
 
614
                yield record
 
615
                parents = (key,)
 
616
        # One group, a-d
 
617
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
618
        # Second group, e-h
 
619
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
620
                                               first_parents=(('d',),)))
 
621
        # Now copy the blocks into another vf, and see that the
 
622
        # insert_record_stream rebuilt a new block on-the-fly because of
 
623
        # under-utilization
 
624
        vf2 = self.make_test_vf(True, dir='target')
 
625
        vf2.insert_record_stream(vf.get_record_stream(
 
626
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
 
627
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
628
                                       'groupcompress', False)
 
629
        vf2.writer.end()
 
630
        num_records = 0
 
631
        # All of the records should be recombined into a single block
 
632
        block = None
 
633
        for record in stream:
 
634
            num_records += 1
 
635
            if block is None:
 
636
                block = record._manager._block
 
637
            else:
 
638
                self.assertIs(block, record._manager._block)
 
639
        self.assertEqual(8, num_records)
 
640
 
597
641
    def test__insert_record_stream_no_reuse_block(self):
598
642
        vf = self.make_test_vf(True, dir='source')
599
643
        def grouped_stream(revision_ids, first_parents=()):
706
750
 
707
751
    _texts = {
708
752
        ('key1',): "this is a text\n"
709
 
                   "with a reasonable amount of compressible bytes\n",
 
753
                   "with a reasonable amount of compressible bytes\n"
 
754
                   "which can be shared between various other texts\n",
710
755
        ('key2',): "another text\n"
711
 
                   "with a reasonable amount of compressible bytes\n",
 
756
                   "with a reasonable amount of compressible bytes\n"
 
757
                   "which can be shared between various other texts\n",
712
758
        ('key3',): "yet another text which won't be extracted\n"
713
 
                   "with a reasonable amount of compressible bytes\n",
 
759
                   "with a reasonable amount of compressible bytes\n"
 
760
                   "which can be shared between various other texts\n",
714
761
        ('key4',): "this will be extracted\n"
715
762
                   "but references most of its bytes from\n"
716
763
                   "yet another text which won't be extracted\n"
717
 
                   "with a reasonable amount of compressible bytes\n",
 
764
                   "with a reasonable amount of compressible bytes\n"
 
765
                   "which can be shared between various other texts\n",
718
766
    }
719
767
    def make_block(self, key_to_text):
720
768
        """Create a GroupCompressBlock, filling it with the given texts."""
732
780
        start, end = locations[key]
733
781
        manager.add_factory(key, (), start, end)
734
782
 
 
783
    def make_block_and_full_manager(self, texts):
 
784
        locations, block = self.make_block(texts)
 
785
        manager = groupcompress._LazyGroupContentManager(block)
 
786
        for key in sorted(texts):
 
787
            self.add_key_to_manager(key, locations, block, manager)
 
788
        return block, manager
 
789
 
735
790
    def test_get_fulltexts(self):
736
791
        locations, block = self.make_block(self._texts)
737
792
        manager = groupcompress._LazyGroupContentManager(block)
788
843
        header_len = int(header_len)
789
844
        block_len = int(block_len)
790
845
        self.assertEqual('groupcompress-block', storage_kind)
791
 
        self.assertEqual(33, z_header_len)
792
 
        self.assertEqual(25, header_len)
 
846
        self.assertEqual(34, z_header_len)
 
847
        self.assertEqual(26, header_len)
793
848
        self.assertEqual(len(block_bytes), block_len)
794
849
        z_header = rest[:z_header_len]
795
850
        header = zlib.decompress(z_header)
829
884
        self.assertEqual([('key1',), ('key4',)], result_order)
830
885
 
831
886
    def test__check_rebuild_no_changes(self):
832
 
        locations, block = self.make_block(self._texts)
833
 
        manager = groupcompress._LazyGroupContentManager(block)
834
 
        # Request all the keys, which ensures that we won't rebuild
835
 
        self.add_key_to_manager(('key1',), locations, block, manager)
836
 
        self.add_key_to_manager(('key2',), locations, block, manager)
837
 
        self.add_key_to_manager(('key3',), locations, block, manager)
838
 
        self.add_key_to_manager(('key4',), locations, block, manager)
 
887
        block, manager = self.make_block_and_full_manager(self._texts)
839
888
        manager._check_rebuild_block()
840
889
        self.assertIs(block, manager._block)
841
890
 
866
915
            self.assertEqual(('key4',), record.key)
867
916
            self.assertEqual(self._texts[record.key],
868
917
                             record.get_bytes_as('fulltext'))
 
918
 
 
919
    def test_check_is_well_utilized_all_keys(self):
 
920
        block, manager = self.make_block_and_full_manager(self._texts)
 
921
        self.assertFalse(manager.check_is_well_utilized())
 
922
        # Though we can fake it by changing the recommended minimum size
 
923
        manager._full_enough_block_size = block._content_length
 
924
        self.assertTrue(manager.check_is_well_utilized())
 
925
        # Setting it just above causes it to fail
 
926
        manager._full_enough_block_size = block._content_length + 1
 
927
        self.assertFalse(manager.check_is_well_utilized())
 
928
        # Setting the mixed-block size doesn't do anything, because the content
 
929
        # is considered to not be 'mixed'
 
930
        manager._full_enough_mixed_block_size = block._content_length
 
931
        self.assertFalse(manager.check_is_well_utilized())
 
932
 
 
933
    def test_check_is_well_utilized_mixed_keys(self):
 
934
        texts = {}
 
935
        f1k1 = ('f1', 'k1')
 
936
        f1k2 = ('f1', 'k2')
 
937
        f2k1 = ('f2', 'k1')
 
938
        f2k2 = ('f2', 'k2')
 
939
        texts[f1k1] = self._texts[('key1',)]
 
940
        texts[f1k2] = self._texts[('key2',)]
 
941
        texts[f2k1] = self._texts[('key3',)]
 
942
        texts[f2k2] = self._texts[('key4',)]
 
943
        block, manager = self.make_block_and_full_manager(texts)
 
944
        self.assertFalse(manager.check_is_well_utilized())
 
945
        manager._full_enough_block_size = block._content_length
 
946
        self.assertTrue(manager.check_is_well_utilized())
 
947
        manager._full_enough_block_size = block._content_length + 1
 
948
        self.assertFalse(manager.check_is_well_utilized())
 
949
        manager._full_enough_mixed_block_size = block._content_length
 
950
        self.assertTrue(manager.check_is_well_utilized())
 
951
 
 
952
    def test_check_is_well_utilized_partial_use(self):
 
953
        locations, block = self.make_block(self._texts)
 
954
        manager = groupcompress._LazyGroupContentManager(block)
 
955
        manager._full_enough_block_size = block._content_length
 
956
        self.add_key_to_manager(('key1',), locations, block, manager)
 
957
        self.add_key_to_manager(('key2',), locations, block, manager)
 
958
        # Just using the content from key1 and 2 is not enough to be considered
 
959
        # 'complete'
 
960
        self.assertFalse(manager.check_is_well_utilized())
 
961
        # However if we add key3, then we have enough, as we only require 75%
 
962
        # consumption
 
963
        self.add_key_to_manager(('key4',), locations, block, manager)
 
964
        self.assertTrue(manager.check_is_well_utilized())