~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

  • Committer: Joe Julian
  • Date: 2010-01-10 02:25:31 UTC
  • mto: (4634.119.7 2.0)
  • mto: This revision was merged to the branch mainline in revision 4959.
  • Revision ID: joe@julianfamily.org-20100110022531-wqk61rsagz8xsiga
Added MANIFEST.in to allow bdist_rpm to have all the required include files and tools. bdist_rpm will still fail to build correctly on some distributions due to a disttools bug http://bugs.python.org/issue644744

Show diffs side-by-side

added added

removed removed

Lines of Context:
19
19
import zlib
20
20
 
21
21
from bzrlib import (
 
22
    btree_index,
22
23
    groupcompress,
23
24
    errors,
 
25
    index as _mod_index,
24
26
    osutils,
25
27
    tests,
 
28
    trace,
26
29
    versionedfile,
27
30
    )
28
31
from bzrlib.osutils import sha_string
361
364
        raw_bytes = zlib.decompress(remaining_bytes)
362
365
        self.assertEqual(content, raw_bytes)
363
366
 
 
367
        # we should get the same results if using the chunked version
 
368
        gcb = groupcompress.GroupCompressBlock()
 
369
        gcb.set_chunked_content(['this is some content\n'
 
370
                                 'this content will be compressed\n'],
 
371
                                 len(content))
 
372
        old_bytes = bytes
 
373
        bytes = gcb.to_bytes()
 
374
        self.assertEqual(old_bytes, bytes)
 
375
 
364
376
    def test_partial_decomp(self):
365
377
        content_chunks = []
366
378
        # We need a sufficient amount of data so that zlib.decompress has
463
475
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
464
476
 
465
477
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
466
 
                     dir='.'):
 
478
                     dir='.', inconsistency_fatal=True):
467
479
        t = self.get_transport(dir)
468
480
        t.ensure_base()
469
481
        vf = groupcompress.make_pack_factory(graph=create_graph,
470
 
            delta=False, keylength=keylength)(t)
 
482
            delta=False, keylength=keylength,
 
483
            inconsistency_fatal=inconsistency_fatal)(t)
471
484
        if do_cleanup:
472
485
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
473
486
        return vf
475
488
 
476
489
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
477
490
 
 
491
    def make_g_index(self, name, ref_lists=0, nodes=[]):
 
492
        builder = btree_index.BTreeBuilder(ref_lists)
 
493
        for node, references, value in nodes:
 
494
            builder.add_node(node, references, value)
 
495
        stream = builder.finish()
 
496
        trans = self.get_transport()
 
497
        size = trans.put_file(name, stream)
 
498
        return btree_index.BTreeGraphIndex(trans, name, size)
 
499
 
 
500
    def make_g_index_missing_parent(self):
 
501
        graph_index = self.make_g_index('missing_parent', 1,
 
502
            [(('parent', ), '2 78 2 10', ([],)),
 
503
             (('tip', ), '2 78 2 10',
 
504
              ([('parent', ), ('missing-parent', )],)),
 
505
              ])
 
506
        return graph_index
 
507
 
478
508
    def test_get_record_stream_as_requested(self):
479
509
        # Consider promoting 'as-requested' to general availability, and
480
510
        # make this a VF interface test
508
538
                    'as-requested', False)]
509
539
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
510
540
 
511
 
    def test_insert_record_stream_re_uses_blocks(self):
 
541
    def test_insert_record_stream_reuses_blocks(self):
512
542
        vf = self.make_test_vf(True, dir='source')
513
543
        def grouped_stream(revision_ids, first_parents=()):
514
544
            parents = first_parents
552
582
        vf2 = self.make_test_vf(True, dir='target')
553
583
        # ordering in 'groupcompress' order, should actually swap the groups in
554
584
        # the target vf, but the groups themselves should not be disturbed.
555
 
        vf2.insert_record_stream(vf.get_record_stream(
556
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
 
585
        def small_size_stream():
 
586
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
 
587
                                               'groupcompress', False):
 
588
                record._manager._full_enough_block_size = \
 
589
                    record._manager._block._content_length
 
590
                yield record
 
591
                        
 
592
        vf2.insert_record_stream(small_size_stream())
557
593
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
558
594
                                       'groupcompress', False)
559
595
        vf2.writer.end()
564
600
                             record._manager._block._z_content)
565
601
        self.assertEqual(8, num_records)
566
602
 
 
603
    def test_insert_record_stream_packs_on_the_fly(self):
 
604
        vf = self.make_test_vf(True, dir='source')
 
605
        def grouped_stream(revision_ids, first_parents=()):
 
606
            parents = first_parents
 
607
            for revision_id in revision_ids:
 
608
                key = (revision_id,)
 
609
                record = versionedfile.FulltextContentFactory(
 
610
                    key, parents, None,
 
611
                    'some content that is\n'
 
612
                    'identical except for\n'
 
613
                    'revision_id:%s\n' % (revision_id,))
 
614
                yield record
 
615
                parents = (key,)
 
616
        # One group, a-d
 
617
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
618
        # Second group, e-h
 
619
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
620
                                               first_parents=(('d',),)))
 
621
        # Now copy the blocks into another vf, and see that the
 
622
        # insert_record_stream rebuilt a new block on-the-fly because of
 
623
        # under-utilization
 
624
        vf2 = self.make_test_vf(True, dir='target')
 
625
        vf2.insert_record_stream(vf.get_record_stream(
 
626
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
 
627
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
628
                                       'groupcompress', False)
 
629
        vf2.writer.end()
 
630
        num_records = 0
 
631
        # All of the records should be recombined into a single block
 
632
        block = None
 
633
        for record in stream:
 
634
            num_records += 1
 
635
            if block is None:
 
636
                block = record._manager._block
 
637
            else:
 
638
                self.assertIs(block, record._manager._block)
 
639
        self.assertEqual(8, num_records)
 
640
 
567
641
    def test__insert_record_stream_no_reuse_block(self):
568
642
        vf = self.make_test_vf(True, dir='source')
569
643
        def grouped_stream(revision_ids, first_parents=()):
606
680
            else:
607
681
                self.assertIs(block, record._manager._block)
608
682
 
 
683
    def test_add_missing_noncompression_parent_unvalidated_index(self):
 
684
        unvalidated = self.make_g_index_missing_parent()
 
685
        combined = _mod_index.CombinedGraphIndex([unvalidated])
 
686
        index = groupcompress._GCGraphIndex(combined,
 
687
            is_locked=lambda: True, parents=True,
 
688
            track_external_parent_refs=True)
 
689
        index.scan_unvalidated_index(unvalidated)
 
690
        self.assertEqual(
 
691
            frozenset([('missing-parent',)]), index.get_missing_parents())
 
692
 
 
693
    def test_track_external_parent_refs(self):
 
694
        g_index = self.make_g_index('empty', 1, [])
 
695
        mod_index = btree_index.BTreeBuilder(1, 1)
 
696
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
 
697
        index = groupcompress._GCGraphIndex(combined,
 
698
            is_locked=lambda: True, parents=True,
 
699
            add_callback=mod_index.add_nodes,
 
700
            track_external_parent_refs=True)
 
701
        index.add_records([
 
702
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
 
703
        self.assertEqual(
 
704
            frozenset([('parent-1',), ('parent-2',)]),
 
705
            index.get_missing_parents())
 
706
 
 
707
    def make_source_with_b(self, a_parent, path):
 
708
        source = self.make_test_vf(True, dir=path)
 
709
        source.add_lines(('a',), (), ['lines\n'])
 
710
        if a_parent:
 
711
            b_parents = (('a',),)
 
712
        else:
 
713
            b_parents = ()
 
714
        source.add_lines(('b',), b_parents, ['lines\n'])
 
715
        return source
 
716
 
 
717
    def do_inconsistent_inserts(self, inconsistency_fatal):
 
718
        target = self.make_test_vf(True, dir='target',
 
719
                                   inconsistency_fatal=inconsistency_fatal)
 
720
        for x in range(2):
 
721
            source = self.make_source_with_b(x==1, 'source%s' % x)
 
722
            target.insert_record_stream(source.get_record_stream(
 
723
                [('b',)], 'unordered', False))
 
724
 
 
725
    def test_inconsistent_redundant_inserts_warn(self):
 
726
        """Should not insert a record that is already present."""
 
727
        warnings = []
 
728
        def warning(template, args):
 
729
            warnings.append(template % args)
 
730
        _trace_warning = trace.warning
 
731
        trace.warning = warning
 
732
        try:
 
733
            self.do_inconsistent_inserts(inconsistency_fatal=False)
 
734
        finally:
 
735
            trace.warning = _trace_warning
 
736
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
 
737
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
 
738
                         warnings)
 
739
 
 
740
    def test_inconsistent_redundant_inserts_raises(self):
 
741
        e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
 
742
                              inconsistency_fatal=True)
 
743
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
 
744
                              " in add_records:"
 
745
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
 
746
                              " 0 8', \(\(\('a',\),\),\)\)")
 
747
 
609
748
 
610
749
class TestLazyGroupCompress(tests.TestCaseWithTransport):
611
750
 
612
751
    _texts = {
613
752
        ('key1',): "this is a text\n"
614
 
                   "with a reasonable amount of compressible bytes\n",
 
753
                   "with a reasonable amount of compressible bytes\n"
 
754
                   "which can be shared between various other texts\n",
615
755
        ('key2',): "another text\n"
616
 
                   "with a reasonable amount of compressible bytes\n",
 
756
                   "with a reasonable amount of compressible bytes\n"
 
757
                   "which can be shared between various other texts\n",
617
758
        ('key3',): "yet another text which won't be extracted\n"
618
 
                   "with a reasonable amount of compressible bytes\n",
 
759
                   "with a reasonable amount of compressible bytes\n"
 
760
                   "which can be shared between various other texts\n",
619
761
        ('key4',): "this will be extracted\n"
620
762
                   "but references most of its bytes from\n"
621
763
                   "yet another text which won't be extracted\n"
622
 
                   "with a reasonable amount of compressible bytes\n",
 
764
                   "with a reasonable amount of compressible bytes\n"
 
765
                   "which can be shared between various other texts\n",
623
766
    }
624
767
    def make_block(self, key_to_text):
625
768
        """Create a GroupCompressBlock, filling it with the given texts."""
637
780
        start, end = locations[key]
638
781
        manager.add_factory(key, (), start, end)
639
782
 
 
783
    def make_block_and_full_manager(self, texts):
 
784
        locations, block = self.make_block(texts)
 
785
        manager = groupcompress._LazyGroupContentManager(block)
 
786
        for key in sorted(texts):
 
787
            self.add_key_to_manager(key, locations, block, manager)
 
788
        return block, manager
 
789
 
640
790
    def test_get_fulltexts(self):
641
791
        locations, block = self.make_block(self._texts)
642
792
        manager = groupcompress._LazyGroupContentManager(block)
693
843
        header_len = int(header_len)
694
844
        block_len = int(block_len)
695
845
        self.assertEqual('groupcompress-block', storage_kind)
696
 
        self.assertEqual(33, z_header_len)
697
 
        self.assertEqual(25, header_len)
 
846
        self.assertEqual(34, z_header_len)
 
847
        self.assertEqual(26, header_len)
698
848
        self.assertEqual(len(block_bytes), block_len)
699
849
        z_header = rest[:z_header_len]
700
850
        header = zlib.decompress(z_header)
734
884
        self.assertEqual([('key1',), ('key4',)], result_order)
735
885
 
736
886
    def test__check_rebuild_no_changes(self):
737
 
        locations, block = self.make_block(self._texts)
738
 
        manager = groupcompress._LazyGroupContentManager(block)
739
 
        # Request all the keys, which ensures that we won't rebuild
740
 
        self.add_key_to_manager(('key1',), locations, block, manager)
741
 
        self.add_key_to_manager(('key2',), locations, block, manager)
742
 
        self.add_key_to_manager(('key3',), locations, block, manager)
743
 
        self.add_key_to_manager(('key4',), locations, block, manager)
 
887
        block, manager = self.make_block_and_full_manager(self._texts)
744
888
        manager._check_rebuild_block()
745
889
        self.assertIs(block, manager._block)
746
890
 
771
915
            self.assertEqual(('key4',), record.key)
772
916
            self.assertEqual(self._texts[record.key],
773
917
                             record.get_bytes_as('fulltext'))
 
918
 
 
919
    def test_check_is_well_utilized_all_keys(self):
 
920
        block, manager = self.make_block_and_full_manager(self._texts)
 
921
        self.assertFalse(manager.check_is_well_utilized())
 
922
        # Though we can fake it by changing the recommended minimum size
 
923
        manager._full_enough_block_size = block._content_length
 
924
        self.assertTrue(manager.check_is_well_utilized())
 
925
        # Setting it just above causes it to fail
 
926
        manager._full_enough_block_size = block._content_length + 1
 
927
        self.assertFalse(manager.check_is_well_utilized())
 
928
        # Setting the mixed-block size doesn't do anything, because the content
 
929
        # is considered to not be 'mixed'
 
930
        manager._full_enough_mixed_block_size = block._content_length
 
931
        self.assertFalse(manager.check_is_well_utilized())
 
932
 
 
933
    def test_check_is_well_utilized_mixed_keys(self):
 
934
        texts = {}
 
935
        f1k1 = ('f1', 'k1')
 
936
        f1k2 = ('f1', 'k2')
 
937
        f2k1 = ('f2', 'k1')
 
938
        f2k2 = ('f2', 'k2')
 
939
        texts[f1k1] = self._texts[('key1',)]
 
940
        texts[f1k2] = self._texts[('key2',)]
 
941
        texts[f2k1] = self._texts[('key3',)]
 
942
        texts[f2k2] = self._texts[('key4',)]
 
943
        block, manager = self.make_block_and_full_manager(texts)
 
944
        self.assertFalse(manager.check_is_well_utilized())
 
945
        manager._full_enough_block_size = block._content_length
 
946
        self.assertTrue(manager.check_is_well_utilized())
 
947
        manager._full_enough_block_size = block._content_length + 1
 
948
        self.assertFalse(manager.check_is_well_utilized())
 
949
        manager._full_enough_mixed_block_size = block._content_length
 
950
        self.assertTrue(manager.check_is_well_utilized())
 
951
 
 
952
    def test_check_is_well_utilized_partial_use(self):
 
953
        locations, block = self.make_block(self._texts)
 
954
        manager = groupcompress._LazyGroupContentManager(block)
 
955
        manager._full_enough_block_size = block._content_length
 
956
        self.add_key_to_manager(('key1',), locations, block, manager)
 
957
        self.add_key_to_manager(('key2',), locations, block, manager)
 
958
        # Just using the content from key1 and 2 is not enough to be considered
 
959
        # 'complete'
 
960
        self.assertFalse(manager.check_is_well_utilized())
 
961
        # However if we add key3, then we have enough, as we only require 75%
 
962
        # consumption
 
963
        self.add_key_to_manager(('key4',), locations, block, manager)
 
964
        self.assertTrue(manager.check_is_well_utilized())