~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

  • Committer: Tarmac
  • Author(s): Vincent Ladeuil
  • Date: 2017-01-30 14:42:05 UTC
  • mfrom: (6620.1.1 trunk)
  • Revision ID: tarmac-20170130144205-r8fh2xpmiuxyozpv
Merge  2.7 into trunk including fix for bug #1657238 [r=vila]

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2008, 2009 Canonical Ltd
 
1
# Copyright (C) 2008-2011 Canonical Ltd
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
20
20
 
21
21
from bzrlib import (
22
22
    btree_index,
 
23
    config,
23
24
    groupcompress,
24
25
    errors,
25
26
    index as _mod_index,
29
30
    versionedfile,
30
31
    )
31
32
from bzrlib.osutils import sha_string
32
 
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
33
 
 
34
 
 
35
 
def load_tests(standard_tests, module, loader):
36
 
    """Parameterize tests for all versions of groupcompress."""
37
 
    to_adapt, result = tests.split_suite_by_condition(
38
 
        standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
 
33
from bzrlib.tests.test__groupcompress import compiled_groupcompress_feature
 
34
from bzrlib.tests.scenarios import load_tests_apply_scenarios
 
35
 
 
36
 
 
37
def group_compress_implementation_scenarios():
39
38
    scenarios = [
40
39
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
41
40
        ]
42
 
    if CompiledGroupCompressFeature.available():
 
41
    if compiled_groupcompress_feature.available():
43
42
        scenarios.append(('C',
44
43
            {'compressor': groupcompress.PyrexGroupCompressor}))
45
 
    return tests.multiply_tests(to_adapt, scenarios, result)
 
44
    return scenarios
 
45
 
 
46
 
 
47
load_tests = load_tests_apply_scenarios
46
48
 
47
49
 
48
50
class TestGroupCompressor(tests.TestCase):
66
68
class TestAllGroupCompressors(TestGroupCompressor):
67
69
    """Tests for GroupCompressor"""
68
70
 
69
 
    compressor = None # Set by multiply_tests
 
71
    scenarios = group_compress_implementation_scenarios()
 
72
    compressor = None # Set by scenario
70
73
 
71
74
    def test_empty_delta(self):
72
75
        compressor = self.compressor()
135
138
 
136
139
class TestPyrexGroupCompressor(TestGroupCompressor):
137
140
 
138
 
    _test_needs_features = [CompiledGroupCompressFeature]
 
141
    _test_needs_features = [compiled_groupcompress_feature]
139
142
    compressor = groupcompress.PyrexGroupCompressor
140
143
 
141
144
    def test_stats(self):
347
350
        self.assertEqual(z_content, block._z_content)
348
351
        self.assertEqual(content, block._content)
349
352
 
 
353
    def test_to_chunks(self):
 
354
        content_chunks = ['this is some content\n',
 
355
                          'this content will be compressed\n']
 
356
        content_len = sum(map(len, content_chunks))
 
357
        content = ''.join(content_chunks)
 
358
        gcb = groupcompress.GroupCompressBlock()
 
359
        gcb.set_chunked_content(content_chunks, content_len)
 
360
        total_len, block_chunks = gcb.to_chunks()
 
361
        block_bytes = ''.join(block_chunks)
 
362
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
 
363
        self.assertEqual(total_len, len(block_bytes))
 
364
        self.assertEqual(gcb._content_length, content_len)
 
365
        expected_header =('gcb1z\n' # group compress block v1 zlib
 
366
                          '%d\n' # Length of compressed content
 
367
                          '%d\n' # Length of uncompressed content
 
368
                         ) % (gcb._z_content_length, gcb._content_length)
 
369
        # The first chunk should be the header chunk. It is small, fixed size,
 
370
        # and there is no compelling reason to split it up
 
371
        self.assertEqual(expected_header, block_chunks[0])
 
372
        self.assertStartsWith(block_bytes, expected_header)
 
373
        remaining_bytes = block_bytes[len(expected_header):]
 
374
        raw_bytes = zlib.decompress(remaining_bytes)
 
375
        self.assertEqual(content, raw_bytes)
 
376
 
350
377
    def test_to_bytes(self):
351
378
        content = ('this is some content\n'
352
379
                   'this content will be compressed\n')
389
416
        z_content = zlib.compress(content)
390
417
        self.assertEqual(57182, len(z_content))
391
418
        block = groupcompress.GroupCompressBlock()
392
 
        block._z_content = z_content
 
419
        block._z_content_chunks = (z_content,)
393
420
        block._z_content_length = len(z_content)
394
421
        block._compressor_name = 'zlib'
395
422
        block._content_length = 158634
418
445
        # And the decompressor is finalized
419
446
        self.assertIs(None, block._z_content_decompressor)
420
447
 
421
 
    def test_partial_decomp_no_known_length(self):
 
448
    def test__ensure_all_content(self):
422
449
        content_chunks = []
 
450
        # We need a sufficient amount of data so that zlib.decompress has
 
451
        # partial decompression to work with. Most auto-generated data
 
452
        # compresses a bit too well, we want a combination, so we combine a sha
 
453
        # hash with compressible data.
423
454
        for i in xrange(2048):
424
455
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
425
456
            content_chunks.append(next_content)
430
461
        z_content = zlib.compress(content)
431
462
        self.assertEqual(57182, len(z_content))
432
463
        block = groupcompress.GroupCompressBlock()
433
 
        block._z_content = z_content
 
464
        block._z_content_chunks = (z_content,)
434
465
        block._z_content_length = len(z_content)
435
466
        block._compressor_name = 'zlib'
436
 
        block._content_length = None # Don't tell the decompressed length
 
467
        block._content_length = 158634
437
468
        self.assertIs(None, block._content)
438
 
        block._ensure_content(100)
439
 
        self.assertIsNot(None, block._content)
440
 
        # We have decompressed at least 100 bytes
441
 
        self.assertTrue(len(block._content) >= 100)
442
 
        # We have not decompressed the whole content
443
 
        self.assertTrue(len(block._content) < 158634)
444
 
        self.assertEqualDiff(content[:len(block._content)], block._content)
445
 
        # ensuring content that we already have shouldn't cause any more data
446
 
        # to be extracted
447
 
        cur_len = len(block._content)
448
 
        block._ensure_content(cur_len - 10)
449
 
        self.assertEqual(cur_len, len(block._content))
450
 
        # Now we want a bit more content
451
 
        cur_len += 10
452
 
        block._ensure_content(cur_len)
453
 
        self.assertTrue(len(block._content) >= cur_len)
454
 
        self.assertTrue(len(block._content) < 158634)
455
 
        self.assertEqualDiff(content[:len(block._content)], block._content)
456
 
        # And now lets finish
457
 
        block._ensure_content()
 
469
        # The first _ensure_content got all of the required data
 
470
        block._ensure_content(158634)
458
471
        self.assertEqualDiff(content, block._content)
459
 
        # And the decompressor is finalized
 
472
        # And we should have released the _z_content_decompressor since it was
 
473
        # fully consumed
460
474
        self.assertIs(None, block._z_content_decompressor)
461
475
 
462
476
    def test__dump(self):
472
486
                         ], block._dump())
473
487
 
474
488
 
475
 
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
 
489
class TestCaseWithGroupCompressVersionedFiles(
 
490
        tests.TestCaseWithMemoryTransport):
476
491
 
477
492
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
478
493
                     dir='.', inconsistency_fatal=True):
538
553
                    'as-requested', False)]
539
554
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
540
555
 
541
 
    def test_insert_record_stream_re_uses_blocks(self):
 
556
    def test_get_record_stream_max_bytes_to_index_default(self):
 
557
        vf = self.make_test_vf(True, dir='source')
 
558
        vf.add_lines(('a',), (), ['lines\n'])
 
559
        vf.writer.end()
 
560
        record = vf.get_record_stream([('a',)], 'unordered', True).next()
 
561
        self.assertEqual(vf._DEFAULT_COMPRESSOR_SETTINGS,
 
562
                         record._manager._get_compressor_settings())
 
563
 
 
564
    def test_get_record_stream_accesses_compressor_settings(self):
 
565
        vf = self.make_test_vf(True, dir='source')
 
566
        vf.add_lines(('a',), (), ['lines\n'])
 
567
        vf.writer.end()
 
568
        vf._max_bytes_to_index = 1234
 
569
        record = vf.get_record_stream([('a',)], 'unordered', True).next()
 
570
        self.assertEqual(dict(max_bytes_to_index=1234),
 
571
                         record._manager._get_compressor_settings())
 
572
 
 
573
    def test_insert_record_stream_reuses_blocks(self):
542
574
        vf = self.make_test_vf(True, dir='source')
543
575
        def grouped_stream(revision_ids, first_parents=()):
544
576
            parents = first_parents
582
614
        vf2 = self.make_test_vf(True, dir='target')
583
615
        # ordering in 'groupcompress' order, should actually swap the groups in
584
616
        # the target vf, but the groups themselves should not be disturbed.
585
 
        vf2.insert_record_stream(vf.get_record_stream(
586
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
 
617
        def small_size_stream():
 
618
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
 
619
                                               'groupcompress', False):
 
620
                record._manager._full_enough_block_size = \
 
621
                    record._manager._block._content_length
 
622
                yield record
 
623
                        
 
624
        vf2.insert_record_stream(small_size_stream())
587
625
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
588
626
                                       'groupcompress', False)
589
627
        vf2.writer.end()
594
632
                             record._manager._block._z_content)
595
633
        self.assertEqual(8, num_records)
596
634
 
 
635
    def test_insert_record_stream_packs_on_the_fly(self):
 
636
        vf = self.make_test_vf(True, dir='source')
 
637
        def grouped_stream(revision_ids, first_parents=()):
 
638
            parents = first_parents
 
639
            for revision_id in revision_ids:
 
640
                key = (revision_id,)
 
641
                record = versionedfile.FulltextContentFactory(
 
642
                    key, parents, None,
 
643
                    'some content that is\n'
 
644
                    'identical except for\n'
 
645
                    'revision_id:%s\n' % (revision_id,))
 
646
                yield record
 
647
                parents = (key,)
 
648
        # One group, a-d
 
649
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
650
        # Second group, e-h
 
651
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
652
                                               first_parents=(('d',),)))
 
653
        # Now copy the blocks into another vf, and see that the
 
654
        # insert_record_stream rebuilt a new block on-the-fly because of
 
655
        # under-utilization
 
656
        vf2 = self.make_test_vf(True, dir='target')
 
657
        vf2.insert_record_stream(vf.get_record_stream(
 
658
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
 
659
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
660
                                       'groupcompress', False)
 
661
        vf2.writer.end()
 
662
        num_records = 0
 
663
        # All of the records should be recombined into a single block
 
664
        block = None
 
665
        for record in stream:
 
666
            num_records += 1
 
667
            if block is None:
 
668
                block = record._manager._block
 
669
            else:
 
670
                self.assertIs(block, record._manager._block)
 
671
        self.assertEqual(8, num_records)
 
672
 
597
673
    def test__insert_record_stream_no_reuse_block(self):
598
674
        vf = self.make_test_vf(True, dir='source')
599
675
        def grouped_stream(revision_ids, first_parents=()):
701
777
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
702
778
                              " 0 8', \(\(\('a',\),\),\)\)")
703
779
 
 
780
    def test_clear_cache(self):
 
781
        vf = self.make_source_with_b(True, 'source')
 
782
        vf.writer.end()
 
783
        for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
 
784
                                           True):
 
785
            pass
 
786
        self.assertTrue(len(vf._group_cache) > 0)
 
787
        vf.clear_cache()
 
788
        self.assertEqual(0, len(vf._group_cache))
 
789
 
 
790
 
 
791
class TestGroupCompressConfig(tests.TestCaseWithTransport):
 
792
 
 
793
    def make_test_vf(self):
 
794
        t = self.get_transport('.')
 
795
        t.ensure_base()
 
796
        factory = groupcompress.make_pack_factory(graph=True,
 
797
            delta=False, keylength=1, inconsistency_fatal=True)
 
798
        vf = factory(t)
 
799
        self.addCleanup(groupcompress.cleanup_pack_group, vf)
 
800
        return vf
 
801
 
 
802
    def test_max_bytes_to_index_default(self):
 
803
        vf = self.make_test_vf()
 
804
        gc = vf._make_group_compressor()
 
805
        self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
 
806
                         vf._max_bytes_to_index)
 
807
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
 
808
            self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
 
809
                             gc._delta_index._max_bytes_to_index)
 
810
 
 
811
    def test_max_bytes_to_index_in_config(self):
 
812
        c = config.GlobalConfig()
 
813
        c.set_user_option('bzr.groupcompress.max_bytes_to_index', '10000')
 
814
        vf = self.make_test_vf()
 
815
        gc = vf._make_group_compressor()
 
816
        self.assertEqual(10000, vf._max_bytes_to_index)
 
817
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
 
818
            self.assertEqual(10000, gc._delta_index._max_bytes_to_index)
 
819
 
 
820
    def test_max_bytes_to_index_bad_config(self):
 
821
        c = config.GlobalConfig()
 
822
        c.set_user_option('bzr.groupcompress.max_bytes_to_index', 'boogah')
 
823
        vf = self.make_test_vf()
 
824
        # TODO: This is triggering a warning, we might want to trap and make
 
825
        #       sure it is readable.
 
826
        gc = vf._make_group_compressor()
 
827
        self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
 
828
                         vf._max_bytes_to_index)
 
829
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
 
830
            self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
 
831
                             gc._delta_index._max_bytes_to_index)
 
832
 
 
833
 
 
834
class StubGCVF(object):
 
835
    def __init__(self, canned_get_blocks=None):
 
836
        self._group_cache = {}
 
837
        self._canned_get_blocks = canned_get_blocks or []
 
838
    def _get_blocks(self, read_memos):
 
839
        return iter(self._canned_get_blocks)
 
840
    
 
841
 
 
842
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
 
843
    """Simple whitebox unit tests for _BatchingBlockFetcher."""
 
844
    
 
845
    def test_add_key_new_read_memo(self):
 
846
        """Adding a key with an uncached read_memo new to this batch adds that
 
847
        read_memo to the list of memos to fetch.
 
848
        """
 
849
        # locations are: index_memo, ignored, parents, ignored
 
850
        # where index_memo is: (idx, offset, len, factory_start, factory_end)
 
851
        # and (idx, offset, size) is known as the 'read_memo', identifying the
 
852
        # raw bytes needed.
 
853
        read_memo = ('fake index', 100, 50)
 
854
        locations = {
 
855
            ('key',): (read_memo + (None, None), None, None, None)}
 
856
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
 
857
        total_size = batcher.add_key(('key',))
 
858
        self.assertEqual(50, total_size)
 
859
        self.assertEqual([('key',)], batcher.keys)
 
860
        self.assertEqual([read_memo], batcher.memos_to_get)
 
861
 
 
862
    def test_add_key_duplicate_read_memo(self):
 
863
        """read_memos that occur multiple times in a batch will only be fetched
 
864
        once.
 
865
        """
 
866
        read_memo = ('fake index', 100, 50)
 
867
        # Two keys, both sharing the same read memo (but different overall
 
868
        # index_memos).
 
869
        locations = {
 
870
            ('key1',): (read_memo + (0, 1), None, None, None),
 
871
            ('key2',): (read_memo + (1, 2), None, None, None)}
 
872
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
 
873
        total_size = batcher.add_key(('key1',))
 
874
        total_size = batcher.add_key(('key2',))
 
875
        self.assertEqual(50, total_size)
 
876
        self.assertEqual([('key1',), ('key2',)], batcher.keys)
 
877
        self.assertEqual([read_memo], batcher.memos_to_get)
 
878
 
 
879
    def test_add_key_cached_read_memo(self):
 
880
        """Adding a key with a cached read_memo will not cause that read_memo
 
881
        to be added to the list to fetch.
 
882
        """
 
883
        read_memo = ('fake index', 100, 50)
 
884
        gcvf = StubGCVF()
 
885
        gcvf._group_cache[read_memo] = 'fake block'
 
886
        locations = {
 
887
            ('key',): (read_memo + (None, None), None, None, None)}
 
888
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
 
889
        total_size = batcher.add_key(('key',))
 
890
        self.assertEqual(0, total_size)
 
891
        self.assertEqual([('key',)], batcher.keys)
 
892
        self.assertEqual([], batcher.memos_to_get)
 
893
 
 
894
    def test_yield_factories_empty(self):
 
895
        """An empty batch yields no factories."""
 
896
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
 
897
        self.assertEqual([], list(batcher.yield_factories()))
 
898
 
 
899
    def test_yield_factories_calls_get_blocks(self):
 
900
        """Uncached memos are retrieved via get_blocks."""
 
901
        read_memo1 = ('fake index', 100, 50)
 
902
        read_memo2 = ('fake index', 150, 40)
 
903
        gcvf = StubGCVF(
 
904
            canned_get_blocks=[
 
905
                (read_memo1, groupcompress.GroupCompressBlock()),
 
906
                (read_memo2, groupcompress.GroupCompressBlock())])
 
907
        locations = {
 
908
            ('key1',): (read_memo1 + (None, None), None, None, None),
 
909
            ('key2',): (read_memo2 + (None, None), None, None, None)}
 
910
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
 
911
        batcher.add_key(('key1',))
 
912
        batcher.add_key(('key2',))
 
913
        factories = list(batcher.yield_factories(full_flush=True))
 
914
        self.assertLength(2, factories)
 
915
        keys = [f.key for f in factories]
 
916
        kinds = [f.storage_kind for f in factories]
 
917
        self.assertEqual([('key1',), ('key2',)], keys)
 
918
        self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
 
919
 
 
920
    def test_yield_factories_flushing(self):
 
921
        """yield_factories holds back on yielding results from the final block
 
922
        unless passed full_flush=True.
 
923
        """
 
924
        fake_block = groupcompress.GroupCompressBlock()
 
925
        read_memo = ('fake index', 100, 50)
 
926
        gcvf = StubGCVF()
 
927
        gcvf._group_cache[read_memo] = fake_block
 
928
        locations = {
 
929
            ('key',): (read_memo + (None, None), None, None, None)}
 
930
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
 
931
        batcher.add_key(('key',))
 
932
        self.assertEqual([], list(batcher.yield_factories()))
 
933
        factories = list(batcher.yield_factories(full_flush=True))
 
934
        self.assertLength(1, factories)
 
935
        self.assertEqual(('key',), factories[0].key)
 
936
        self.assertEqual('groupcompress-block', factories[0].storage_kind)
 
937
 
704
938
 
705
939
class TestLazyGroupCompress(tests.TestCaseWithTransport):
706
940
 
707
941
    _texts = {
708
942
        ('key1',): "this is a text\n"
709
 
                   "with a reasonable amount of compressible bytes\n",
 
943
                   "with a reasonable amount of compressible bytes\n"
 
944
                   "which can be shared between various other texts\n",
710
945
        ('key2',): "another text\n"
711
 
                   "with a reasonable amount of compressible bytes\n",
 
946
                   "with a reasonable amount of compressible bytes\n"
 
947
                   "which can be shared between various other texts\n",
712
948
        ('key3',): "yet another text which won't be extracted\n"
713
 
                   "with a reasonable amount of compressible bytes\n",
 
949
                   "with a reasonable amount of compressible bytes\n"
 
950
                   "which can be shared between various other texts\n",
714
951
        ('key4',): "this will be extracted\n"
715
952
                   "but references most of its bytes from\n"
716
953
                   "yet another text which won't be extracted\n"
717
 
                   "with a reasonable amount of compressible bytes\n",
 
954
                   "with a reasonable amount of compressible bytes\n"
 
955
                   "which can be shared between various other texts\n",
718
956
    }
719
957
    def make_block(self, key_to_text):
720
958
        """Create a GroupCompressBlock, filling it with the given texts."""
732
970
        start, end = locations[key]
733
971
        manager.add_factory(key, (), start, end)
734
972
 
 
973
    def make_block_and_full_manager(self, texts):
 
974
        locations, block = self.make_block(texts)
 
975
        manager = groupcompress._LazyGroupContentManager(block)
 
976
        for key in sorted(texts):
 
977
            self.add_key_to_manager(key, locations, block, manager)
 
978
        return block, manager
 
979
 
735
980
    def test_get_fulltexts(self):
736
981
        locations, block = self.make_block(self._texts)
737
982
        manager = groupcompress._LazyGroupContentManager(block)
788
1033
        header_len = int(header_len)
789
1034
        block_len = int(block_len)
790
1035
        self.assertEqual('groupcompress-block', storage_kind)
791
 
        self.assertEqual(33, z_header_len)
792
 
        self.assertEqual(25, header_len)
 
1036
        self.assertEqual(34, z_header_len)
 
1037
        self.assertEqual(26, header_len)
793
1038
        self.assertEqual(len(block_bytes), block_len)
794
1039
        z_header = rest[:z_header_len]
795
1040
        header = zlib.decompress(z_header)
829
1074
        self.assertEqual([('key1',), ('key4',)], result_order)
830
1075
 
831
1076
    def test__check_rebuild_no_changes(self):
832
 
        locations, block = self.make_block(self._texts)
833
 
        manager = groupcompress._LazyGroupContentManager(block)
834
 
        # Request all the keys, which ensures that we won't rebuild
835
 
        self.add_key_to_manager(('key1',), locations, block, manager)
836
 
        self.add_key_to_manager(('key2',), locations, block, manager)
837
 
        self.add_key_to_manager(('key3',), locations, block, manager)
838
 
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1077
        block, manager = self.make_block_and_full_manager(self._texts)
839
1078
        manager._check_rebuild_block()
840
1079
        self.assertIs(block, manager._block)
841
1080
 
866
1105
            self.assertEqual(('key4',), record.key)
867
1106
            self.assertEqual(self._texts[record.key],
868
1107
                             record.get_bytes_as('fulltext'))
 
1108
 
 
1109
    def test_manager_default_compressor_settings(self):
 
1110
        locations, old_block = self.make_block(self._texts)
 
1111
        manager = groupcompress._LazyGroupContentManager(old_block)
 
1112
        gcvf = groupcompress.GroupCompressVersionedFiles
 
1113
        # It doesn't greedily evaluate _max_bytes_to_index
 
1114
        self.assertIs(None, manager._compressor_settings)
 
1115
        self.assertEqual(gcvf._DEFAULT_COMPRESSOR_SETTINGS,
 
1116
                         manager._get_compressor_settings())
 
1117
 
 
1118
    def test_manager_custom_compressor_settings(self):
 
1119
        locations, old_block = self.make_block(self._texts)
 
1120
        called = []
 
1121
        def compressor_settings():
 
1122
            called.append('called')
 
1123
            return (10,)
 
1124
        manager = groupcompress._LazyGroupContentManager(old_block,
 
1125
            get_compressor_settings=compressor_settings)
 
1126
        gcvf = groupcompress.GroupCompressVersionedFiles
 
1127
        # It doesn't greedily evaluate compressor_settings
 
1128
        self.assertIs(None, manager._compressor_settings)
 
1129
        self.assertEqual((10,), manager._get_compressor_settings())
 
1130
        self.assertEqual((10,), manager._get_compressor_settings())
 
1131
        self.assertEqual((10,), manager._compressor_settings)
 
1132
        # Only called 1 time
 
1133
        self.assertEqual(['called'], called)
 
1134
 
 
1135
    def test__rebuild_handles_compressor_settings(self):
 
1136
        if not isinstance(groupcompress.GroupCompressor,
 
1137
                          groupcompress.PyrexGroupCompressor):
 
1138
            raise tests.TestNotApplicable('pure-python compressor'
 
1139
                ' does not handle compressor_settings')
 
1140
        locations, old_block = self.make_block(self._texts)
 
1141
        manager = groupcompress._LazyGroupContentManager(old_block,
 
1142
            get_compressor_settings=lambda: dict(max_bytes_to_index=32))
 
1143
        gc = manager._make_group_compressor()
 
1144
        self.assertEqual(32, gc._delta_index._max_bytes_to_index)
 
1145
        self.add_key_to_manager(('key3',), locations, old_block, manager)
 
1146
        self.add_key_to_manager(('key4',), locations, old_block, manager)
 
1147
        action, last_byte, total_bytes = manager._check_rebuild_action()
 
1148
        self.assertEqual('rebuild', action)
 
1149
        manager._rebuild_block()
 
1150
        new_block = manager._block
 
1151
        self.assertIsNot(old_block, new_block)
 
1152
        # Because of the new max_bytes_to_index, we do a poor job of
 
1153
        # rebuilding. This is a side-effect of the change, but at least it does
 
1154
        # show the setting had an effect.
 
1155
        self.assertTrue(old_block._content_length < new_block._content_length)
 
1156
 
 
1157
    def test_check_is_well_utilized_all_keys(self):
 
1158
        block, manager = self.make_block_and_full_manager(self._texts)
 
1159
        self.assertFalse(manager.check_is_well_utilized())
 
1160
        # Though we can fake it by changing the recommended minimum size
 
1161
        manager._full_enough_block_size = block._content_length
 
1162
        self.assertTrue(manager.check_is_well_utilized())
 
1163
        # Setting it just above causes it to fail
 
1164
        manager._full_enough_block_size = block._content_length + 1
 
1165
        self.assertFalse(manager.check_is_well_utilized())
 
1166
        # Setting the mixed-block size doesn't do anything, because the content
 
1167
        # is considered to not be 'mixed'
 
1168
        manager._full_enough_mixed_block_size = block._content_length
 
1169
        self.assertFalse(manager.check_is_well_utilized())
 
1170
 
 
1171
    def test_check_is_well_utilized_mixed_keys(self):
 
1172
        texts = {}
 
1173
        f1k1 = ('f1', 'k1')
 
1174
        f1k2 = ('f1', 'k2')
 
1175
        f2k1 = ('f2', 'k1')
 
1176
        f2k2 = ('f2', 'k2')
 
1177
        texts[f1k1] = self._texts[('key1',)]
 
1178
        texts[f1k2] = self._texts[('key2',)]
 
1179
        texts[f2k1] = self._texts[('key3',)]
 
1180
        texts[f2k2] = self._texts[('key4',)]
 
1181
        block, manager = self.make_block_and_full_manager(texts)
 
1182
        self.assertFalse(manager.check_is_well_utilized())
 
1183
        manager._full_enough_block_size = block._content_length
 
1184
        self.assertTrue(manager.check_is_well_utilized())
 
1185
        manager._full_enough_block_size = block._content_length + 1
 
1186
        self.assertFalse(manager.check_is_well_utilized())
 
1187
        manager._full_enough_mixed_block_size = block._content_length
 
1188
        self.assertTrue(manager.check_is_well_utilized())
 
1189
 
 
1190
    def test_check_is_well_utilized_partial_use(self):
 
1191
        locations, block = self.make_block(self._texts)
 
1192
        manager = groupcompress._LazyGroupContentManager(block)
 
1193
        manager._full_enough_block_size = block._content_length
 
1194
        self.add_key_to_manager(('key1',), locations, block, manager)
 
1195
        self.add_key_to_manager(('key2',), locations, block, manager)
 
1196
        # Just using the content from key1 and 2 is not enough to be considered
 
1197
        # 'complete'
 
1198
        self.assertFalse(manager.check_is_well_utilized())
 
1199
        # However if we add key3, then we have enough, as we only require 75%
 
1200
        # consumption
 
1201
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1202
        self.assertTrue(manager.check_is_well_utilized())
 
1203
 
 
1204
 
 
1205
class Test_GCBuildDetails(tests.TestCase):
 
1206
 
 
1207
    def test_acts_like_tuple(self):
 
1208
        # _GCBuildDetails inlines some of the data that used to be spread out
 
1209
        # across a bunch of tuples
 
1210
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
 
1211
            ('INDEX', 10, 20, 0, 5))
 
1212
        self.assertEqual(4, len(bd))
 
1213
        self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
 
1214
        self.assertEqual(None, bd[1]) # Compression Parent is always None
 
1215
        self.assertEqual((('parent1',), ('parent2',)), bd[2])
 
1216
        self.assertEqual(('group', None), bd[3]) # Record details
 
1217
 
 
1218
    def test__repr__(self):
 
1219
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
 
1220
            ('INDEX', 10, 20, 0, 5))
 
1221
        self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
 
1222
                         " (('parent1',), ('parent2',)))",
 
1223
                         repr(bd))
 
1224