~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

  • Committer: Aaron Bentley
  • Date: 2009-06-19 21:16:31 UTC
  • mto: This revision was merged to the branch mainline in revision 4481.
  • Revision ID: aaron@aaronbentley.com-20090619211631-4fnkv2uui98xj7ux
Provide control over switch and shelver messaging.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2008-2011 Canonical Ltd
 
1
# Copyright (C) 2008, 2009 Canonical Ltd
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
20
20
 
21
21
from bzrlib import (
22
22
    btree_index,
23
 
    config,
24
23
    groupcompress,
25
24
    errors,
26
25
    index as _mod_index,
27
26
    osutils,
28
27
    tests,
29
 
    trace,
30
28
    versionedfile,
31
29
    )
32
30
from bzrlib.osutils import sha_string
33
 
from bzrlib.tests.test__groupcompress import compiled_groupcompress_feature
34
 
from bzrlib.tests.scenarios import load_tests_apply_scenarios
35
 
 
36
 
 
37
 
def group_compress_implementation_scenarios():
 
31
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
 
32
 
 
33
 
 
34
def load_tests(standard_tests, module, loader):
 
35
    """Parameterize tests for all versions of groupcompress."""
 
36
    to_adapt, result = tests.split_suite_by_condition(
 
37
        standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
38
38
    scenarios = [
39
39
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
40
40
        ]
41
 
    if compiled_groupcompress_feature.available():
 
41
    if CompiledGroupCompressFeature.available():
42
42
        scenarios.append(('C',
43
43
            {'compressor': groupcompress.PyrexGroupCompressor}))
44
 
    return scenarios
45
 
 
46
 
 
47
 
load_tests = load_tests_apply_scenarios
 
44
    return tests.multiply_tests(to_adapt, scenarios, result)
48
45
 
49
46
 
50
47
class TestGroupCompressor(tests.TestCase):
68
65
class TestAllGroupCompressors(TestGroupCompressor):
69
66
    """Tests for GroupCompressor"""
70
67
 
71
 
    scenarios = group_compress_implementation_scenarios()
72
 
    compressor = None # Set by scenario
 
68
    compressor = None # Set by multiply_tests
73
69
 
74
70
    def test_empty_delta(self):
75
71
        compressor = self.compressor()
138
134
 
139
135
class TestPyrexGroupCompressor(TestGroupCompressor):
140
136
 
141
 
    _test_needs_features = [compiled_groupcompress_feature]
 
137
    _test_needs_features = [CompiledGroupCompressFeature]
142
138
    compressor = groupcompress.PyrexGroupCompressor
143
139
 
144
140
    def test_stats(self):
350
346
        self.assertEqual(z_content, block._z_content)
351
347
        self.assertEqual(content, block._content)
352
348
 
353
 
    def test_to_chunks(self):
354
 
        content_chunks = ['this is some content\n',
355
 
                          'this content will be compressed\n']
356
 
        content_len = sum(map(len, content_chunks))
357
 
        content = ''.join(content_chunks)
358
 
        gcb = groupcompress.GroupCompressBlock()
359
 
        gcb.set_chunked_content(content_chunks, content_len)
360
 
        total_len, block_chunks = gcb.to_chunks()
361
 
        block_bytes = ''.join(block_chunks)
362
 
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
363
 
        self.assertEqual(total_len, len(block_bytes))
364
 
        self.assertEqual(gcb._content_length, content_len)
365
 
        expected_header =('gcb1z\n' # group compress block v1 zlib
366
 
                          '%d\n' # Length of compressed content
367
 
                          '%d\n' # Length of uncompressed content
368
 
                         ) % (gcb._z_content_length, gcb._content_length)
369
 
        # The first chunk should be the header chunk. It is small, fixed size,
370
 
        # and there is no compelling reason to split it up
371
 
        self.assertEqual(expected_header, block_chunks[0])
372
 
        self.assertStartsWith(block_bytes, expected_header)
373
 
        remaining_bytes = block_bytes[len(expected_header):]
374
 
        raw_bytes = zlib.decompress(remaining_bytes)
375
 
        self.assertEqual(content, raw_bytes)
376
 
 
377
349
    def test_to_bytes(self):
378
350
        content = ('this is some content\n'
379
351
                   'this content will be compressed\n')
391
363
        raw_bytes = zlib.decompress(remaining_bytes)
392
364
        self.assertEqual(content, raw_bytes)
393
365
 
394
 
        # we should get the same results if using the chunked version
395
 
        gcb = groupcompress.GroupCompressBlock()
396
 
        gcb.set_chunked_content(['this is some content\n'
397
 
                                 'this content will be compressed\n'],
398
 
                                 len(content))
399
 
        old_bytes = bytes
400
 
        bytes = gcb.to_bytes()
401
 
        self.assertEqual(old_bytes, bytes)
402
 
 
403
366
    def test_partial_decomp(self):
404
367
        content_chunks = []
405
368
        # We need a sufficient amount of data so that zlib.decompress has
416
379
        z_content = zlib.compress(content)
417
380
        self.assertEqual(57182, len(z_content))
418
381
        block = groupcompress.GroupCompressBlock()
419
 
        block._z_content_chunks = (z_content,)
 
382
        block._z_content = z_content
420
383
        block._z_content_length = len(z_content)
421
384
        block._compressor_name = 'zlib'
422
385
        block._content_length = 158634
445
408
        # And the decompressor is finalized
446
409
        self.assertIs(None, block._z_content_decompressor)
447
410
 
448
 
    def test__ensure_all_content(self):
 
411
    def test_partial_decomp_no_known_length(self):
449
412
        content_chunks = []
450
 
        # We need a sufficient amount of data so that zlib.decompress has
451
 
        # partial decompression to work with. Most auto-generated data
452
 
        # compresses a bit too well, we want a combination, so we combine a sha
453
 
        # hash with compressible data.
454
413
        for i in xrange(2048):
455
414
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
456
415
            content_chunks.append(next_content)
461
420
        z_content = zlib.compress(content)
462
421
        self.assertEqual(57182, len(z_content))
463
422
        block = groupcompress.GroupCompressBlock()
464
 
        block._z_content_chunks = (z_content,)
 
423
        block._z_content = z_content
465
424
        block._z_content_length = len(z_content)
466
425
        block._compressor_name = 'zlib'
467
 
        block._content_length = 158634
 
426
        block._content_length = None # Don't tell the decompressed length
468
427
        self.assertIs(None, block._content)
469
 
        # The first _ensure_content got all of the required data
470
 
        block._ensure_content(158634)
 
428
        block._ensure_content(100)
 
429
        self.assertIsNot(None, block._content)
 
430
        # We have decompressed at least 100 bytes
 
431
        self.assertTrue(len(block._content) >= 100)
 
432
        # We have not decompressed the whole content
 
433
        self.assertTrue(len(block._content) < 158634)
 
434
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
435
        # ensuring content that we already have shouldn't cause any more data
 
436
        # to be extracted
 
437
        cur_len = len(block._content)
 
438
        block._ensure_content(cur_len - 10)
 
439
        self.assertEqual(cur_len, len(block._content))
 
440
        # Now we want a bit more content
 
441
        cur_len += 10
 
442
        block._ensure_content(cur_len)
 
443
        self.assertTrue(len(block._content) >= cur_len)
 
444
        self.assertTrue(len(block._content) < 158634)
 
445
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
446
        # And now lets finish
 
447
        block._ensure_content()
471
448
        self.assertEqualDiff(content, block._content)
472
 
        # And we should have released the _z_content_decompressor since it was
473
 
        # fully consumed
 
449
        # And the decompressor is finalized
474
450
        self.assertIs(None, block._z_content_decompressor)
475
451
 
476
452
    def test__dump(self):
486
462
                         ], block._dump())
487
463
 
488
464
 
489
 
class TestCaseWithGroupCompressVersionedFiles(
490
 
        tests.TestCaseWithMemoryTransport):
 
465
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
491
466
 
492
467
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
493
 
                     dir='.', inconsistency_fatal=True):
 
468
                     dir='.'):
494
469
        t = self.get_transport(dir)
495
470
        t.ensure_base()
496
471
        vf = groupcompress.make_pack_factory(graph=create_graph,
497
 
            delta=False, keylength=keylength,
498
 
            inconsistency_fatal=inconsistency_fatal)(t)
 
472
            delta=False, keylength=keylength)(t)
499
473
        if do_cleanup:
500
474
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
501
475
        return vf
553
527
                    'as-requested', False)]
554
528
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
555
529
 
556
 
    def test_get_record_stream_max_bytes_to_index_default(self):
557
 
        vf = self.make_test_vf(True, dir='source')
558
 
        vf.add_lines(('a',), (), ['lines\n'])
559
 
        vf.writer.end()
560
 
        record = vf.get_record_stream([('a',)], 'unordered', True).next()
561
 
        self.assertEqual(vf._DEFAULT_COMPRESSOR_SETTINGS,
562
 
                         record._manager._get_compressor_settings())
563
 
 
564
 
    def test_get_record_stream_accesses_compressor_settings(self):
565
 
        vf = self.make_test_vf(True, dir='source')
566
 
        vf.add_lines(('a',), (), ['lines\n'])
567
 
        vf.writer.end()
568
 
        vf._max_bytes_to_index = 1234
569
 
        record = vf.get_record_stream([('a',)], 'unordered', True).next()
570
 
        self.assertEqual(dict(max_bytes_to_index=1234),
571
 
                         record._manager._get_compressor_settings())
572
 
 
573
 
    def test_insert_record_stream_reuses_blocks(self):
 
530
    def test_insert_record_stream_re_uses_blocks(self):
574
531
        vf = self.make_test_vf(True, dir='source')
575
532
        def grouped_stream(revision_ids, first_parents=()):
576
533
            parents = first_parents
614
571
        vf2 = self.make_test_vf(True, dir='target')
615
572
        # ordering in 'groupcompress' order, should actually swap the groups in
616
573
        # the target vf, but the groups themselves should not be disturbed.
617
 
        def small_size_stream():
618
 
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
619
 
                                               'groupcompress', False):
620
 
                record._manager._full_enough_block_size = \
621
 
                    record._manager._block._content_length
622
 
                yield record
623
 
                        
624
 
        vf2.insert_record_stream(small_size_stream())
 
574
        vf2.insert_record_stream(vf.get_record_stream(
 
575
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
625
576
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
626
577
                                       'groupcompress', False)
627
578
        vf2.writer.end()
632
583
                             record._manager._block._z_content)
633
584
        self.assertEqual(8, num_records)
634
585
 
635
 
    def test_insert_record_stream_packs_on_the_fly(self):
636
 
        vf = self.make_test_vf(True, dir='source')
637
 
        def grouped_stream(revision_ids, first_parents=()):
638
 
            parents = first_parents
639
 
            for revision_id in revision_ids:
640
 
                key = (revision_id,)
641
 
                record = versionedfile.FulltextContentFactory(
642
 
                    key, parents, None,
643
 
                    'some content that is\n'
644
 
                    'identical except for\n'
645
 
                    'revision_id:%s\n' % (revision_id,))
646
 
                yield record
647
 
                parents = (key,)
648
 
        # One group, a-d
649
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
650
 
        # Second group, e-h
651
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
652
 
                                               first_parents=(('d',),)))
653
 
        # Now copy the blocks into another vf, and see that the
654
 
        # insert_record_stream rebuilt a new block on-the-fly because of
655
 
        # under-utilization
656
 
        vf2 = self.make_test_vf(True, dir='target')
657
 
        vf2.insert_record_stream(vf.get_record_stream(
658
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
659
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
660
 
                                       'groupcompress', False)
661
 
        vf2.writer.end()
662
 
        num_records = 0
663
 
        # All of the records should be recombined into a single block
664
 
        block = None
665
 
        for record in stream:
666
 
            num_records += 1
667
 
            if block is None:
668
 
                block = record._manager._block
669
 
            else:
670
 
                self.assertIs(block, record._manager._block)
671
 
        self.assertEqual(8, num_records)
672
 
 
673
586
    def test__insert_record_stream_no_reuse_block(self):
674
587
        vf = self.make_test_vf(True, dir='source')
675
588
        def grouped_stream(revision_ids, first_parents=()):
736
649
            frozenset([('parent-1',), ('parent-2',)]),
737
650
            index.get_missing_parents())
738
651
 
739
 
    def make_source_with_b(self, a_parent, path):
740
 
        source = self.make_test_vf(True, dir=path)
741
 
        source.add_lines(('a',), (), ['lines\n'])
742
 
        if a_parent:
743
 
            b_parents = (('a',),)
744
 
        else:
745
 
            b_parents = ()
746
 
        source.add_lines(('b',), b_parents, ['lines\n'])
747
 
        return source
748
 
 
749
 
    def do_inconsistent_inserts(self, inconsistency_fatal):
750
 
        target = self.make_test_vf(True, dir='target',
751
 
                                   inconsistency_fatal=inconsistency_fatal)
752
 
        for x in range(2):
753
 
            source = self.make_source_with_b(x==1, 'source%s' % x)
754
 
            target.insert_record_stream(source.get_record_stream(
755
 
                [('b',)], 'unordered', False))
756
 
 
757
 
    def test_inconsistent_redundant_inserts_warn(self):
758
 
        """Should not insert a record that is already present."""
759
 
        warnings = []
760
 
        def warning(template, args):
761
 
            warnings.append(template % args)
762
 
        _trace_warning = trace.warning
763
 
        trace.warning = warning
764
 
        try:
765
 
            self.do_inconsistent_inserts(inconsistency_fatal=False)
766
 
        finally:
767
 
            trace.warning = _trace_warning
768
 
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
769
 
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
770
 
                         warnings)
771
 
 
772
 
    def test_inconsistent_redundant_inserts_raises(self):
773
 
        e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
774
 
                              inconsistency_fatal=True)
775
 
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
776
 
                              " in add_records:"
777
 
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
778
 
                              " 0 8', \(\(\('a',\),\),\)\)")
779
 
 
780
 
    def test_clear_cache(self):
781
 
        vf = self.make_source_with_b(True, 'source')
782
 
        vf.writer.end()
783
 
        for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
784
 
                                           True):
785
 
            pass
786
 
        self.assertTrue(len(vf._group_cache) > 0)
787
 
        vf.clear_cache()
788
 
        self.assertEqual(0, len(vf._group_cache))
789
 
 
790
 
 
791
 
class TestGroupCompressConfig(tests.TestCaseWithTransport):
792
 
 
793
 
    def make_test_vf(self):
794
 
        t = self.get_transport('.')
795
 
        t.ensure_base()
796
 
        factory = groupcompress.make_pack_factory(graph=True,
797
 
            delta=False, keylength=1, inconsistency_fatal=True)
798
 
        vf = factory(t)
799
 
        self.addCleanup(groupcompress.cleanup_pack_group, vf)
800
 
        return vf
801
 
 
802
 
    def test_max_bytes_to_index_default(self):
803
 
        vf = self.make_test_vf()
804
 
        gc = vf._make_group_compressor()
805
 
        self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
806
 
                         vf._max_bytes_to_index)
807
 
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
808
 
            self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
809
 
                             gc._delta_index._max_bytes_to_index)
810
 
 
811
 
    def test_max_bytes_to_index_in_config(self):
812
 
        c = config.GlobalConfig()
813
 
        c.set_user_option('bzr.groupcompress.max_bytes_to_index', '10000')
814
 
        vf = self.make_test_vf()
815
 
        gc = vf._make_group_compressor()
816
 
        self.assertEqual(10000, vf._max_bytes_to_index)
817
 
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
818
 
            self.assertEqual(10000, gc._delta_index._max_bytes_to_index)
819
 
 
820
 
    def test_max_bytes_to_index_bad_config(self):
821
 
        c = config.GlobalConfig()
822
 
        c.set_user_option('bzr.groupcompress.max_bytes_to_index', 'boogah')
823
 
        vf = self.make_test_vf()
824
 
        # TODO: This is triggering a warning, we might want to trap and make
825
 
        #       sure it is readable.
826
 
        gc = vf._make_group_compressor()
827
 
        self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
828
 
                         vf._max_bytes_to_index)
829
 
        if isinstance(gc, groupcompress.PyrexGroupCompressor):
830
 
            self.assertEqual(vf._DEFAULT_MAX_BYTES_TO_INDEX,
831
 
                             gc._delta_index._max_bytes_to_index)
832
 
 
833
 
 
834
 
class StubGCVF(object):
835
 
    def __init__(self, canned_get_blocks=None):
836
 
        self._group_cache = {}
837
 
        self._canned_get_blocks = canned_get_blocks or []
838
 
    def _get_blocks(self, read_memos):
839
 
        return iter(self._canned_get_blocks)
840
 
    
841
 
 
842
 
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
843
 
    """Simple whitebox unit tests for _BatchingBlockFetcher."""
844
 
    
845
 
    def test_add_key_new_read_memo(self):
846
 
        """Adding a key with an uncached read_memo new to this batch adds that
847
 
        read_memo to the list of memos to fetch.
848
 
        """
849
 
        # locations are: index_memo, ignored, parents, ignored
850
 
        # where index_memo is: (idx, offset, len, factory_start, factory_end)
851
 
        # and (idx, offset, size) is known as the 'read_memo', identifying the
852
 
        # raw bytes needed.
853
 
        read_memo = ('fake index', 100, 50)
854
 
        locations = {
855
 
            ('key',): (read_memo + (None, None), None, None, None)}
856
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
857
 
        total_size = batcher.add_key(('key',))
858
 
        self.assertEqual(50, total_size)
859
 
        self.assertEqual([('key',)], batcher.keys)
860
 
        self.assertEqual([read_memo], batcher.memos_to_get)
861
 
 
862
 
    def test_add_key_duplicate_read_memo(self):
863
 
        """read_memos that occur multiple times in a batch will only be fetched
864
 
        once.
865
 
        """
866
 
        read_memo = ('fake index', 100, 50)
867
 
        # Two keys, both sharing the same read memo (but different overall
868
 
        # index_memos).
869
 
        locations = {
870
 
            ('key1',): (read_memo + (0, 1), None, None, None),
871
 
            ('key2',): (read_memo + (1, 2), None, None, None)}
872
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
873
 
        total_size = batcher.add_key(('key1',))
874
 
        total_size = batcher.add_key(('key2',))
875
 
        self.assertEqual(50, total_size)
876
 
        self.assertEqual([('key1',), ('key2',)], batcher.keys)
877
 
        self.assertEqual([read_memo], batcher.memos_to_get)
878
 
 
879
 
    def test_add_key_cached_read_memo(self):
880
 
        """Adding a key with a cached read_memo will not cause that read_memo
881
 
        to be added to the list to fetch.
882
 
        """
883
 
        read_memo = ('fake index', 100, 50)
884
 
        gcvf = StubGCVF()
885
 
        gcvf._group_cache[read_memo] = 'fake block'
886
 
        locations = {
887
 
            ('key',): (read_memo + (None, None), None, None, None)}
888
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
889
 
        total_size = batcher.add_key(('key',))
890
 
        self.assertEqual(0, total_size)
891
 
        self.assertEqual([('key',)], batcher.keys)
892
 
        self.assertEqual([], batcher.memos_to_get)
893
 
 
894
 
    def test_yield_factories_empty(self):
895
 
        """An empty batch yields no factories."""
896
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
897
 
        self.assertEqual([], list(batcher.yield_factories()))
898
 
 
899
 
    def test_yield_factories_calls_get_blocks(self):
900
 
        """Uncached memos are retrieved via get_blocks."""
901
 
        read_memo1 = ('fake index', 100, 50)
902
 
        read_memo2 = ('fake index', 150, 40)
903
 
        gcvf = StubGCVF(
904
 
            canned_get_blocks=[
905
 
                (read_memo1, groupcompress.GroupCompressBlock()),
906
 
                (read_memo2, groupcompress.GroupCompressBlock())])
907
 
        locations = {
908
 
            ('key1',): (read_memo1 + (None, None), None, None, None),
909
 
            ('key2',): (read_memo2 + (None, None), None, None, None)}
910
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
911
 
        batcher.add_key(('key1',))
912
 
        batcher.add_key(('key2',))
913
 
        factories = list(batcher.yield_factories(full_flush=True))
914
 
        self.assertLength(2, factories)
915
 
        keys = [f.key for f in factories]
916
 
        kinds = [f.storage_kind for f in factories]
917
 
        self.assertEqual([('key1',), ('key2',)], keys)
918
 
        self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
919
 
 
920
 
    def test_yield_factories_flushing(self):
921
 
        """yield_factories holds back on yielding results from the final block
922
 
        unless passed full_flush=True.
923
 
        """
924
 
        fake_block = groupcompress.GroupCompressBlock()
925
 
        read_memo = ('fake index', 100, 50)
926
 
        gcvf = StubGCVF()
927
 
        gcvf._group_cache[read_memo] = fake_block
928
 
        locations = {
929
 
            ('key',): (read_memo + (None, None), None, None, None)}
930
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
931
 
        batcher.add_key(('key',))
932
 
        self.assertEqual([], list(batcher.yield_factories()))
933
 
        factories = list(batcher.yield_factories(full_flush=True))
934
 
        self.assertLength(1, factories)
935
 
        self.assertEqual(('key',), factories[0].key)
936
 
        self.assertEqual('groupcompress-block', factories[0].storage_kind)
937
 
 
938
652
 
939
653
class TestLazyGroupCompress(tests.TestCaseWithTransport):
940
654
 
941
655
    _texts = {
942
656
        ('key1',): "this is a text\n"
943
 
                   "with a reasonable amount of compressible bytes\n"
944
 
                   "which can be shared between various other texts\n",
 
657
                   "with a reasonable amount of compressible bytes\n",
945
658
        ('key2',): "another text\n"
946
 
                   "with a reasonable amount of compressible bytes\n"
947
 
                   "which can be shared between various other texts\n",
 
659
                   "with a reasonable amount of compressible bytes\n",
948
660
        ('key3',): "yet another text which won't be extracted\n"
949
 
                   "with a reasonable amount of compressible bytes\n"
950
 
                   "which can be shared between various other texts\n",
 
661
                   "with a reasonable amount of compressible bytes\n",
951
662
        ('key4',): "this will be extracted\n"
952
663
                   "but references most of its bytes from\n"
953
664
                   "yet another text which won't be extracted\n"
954
 
                   "with a reasonable amount of compressible bytes\n"
955
 
                   "which can be shared between various other texts\n",
 
665
                   "with a reasonable amount of compressible bytes\n",
956
666
    }
957
667
    def make_block(self, key_to_text):
958
668
        """Create a GroupCompressBlock, filling it with the given texts."""
970
680
        start, end = locations[key]
971
681
        manager.add_factory(key, (), start, end)
972
682
 
973
 
    def make_block_and_full_manager(self, texts):
974
 
        locations, block = self.make_block(texts)
975
 
        manager = groupcompress._LazyGroupContentManager(block)
976
 
        for key in sorted(texts):
977
 
            self.add_key_to_manager(key, locations, block, manager)
978
 
        return block, manager
979
 
 
980
683
    def test_get_fulltexts(self):
981
684
        locations, block = self.make_block(self._texts)
982
685
        manager = groupcompress._LazyGroupContentManager(block)
1033
736
        header_len = int(header_len)
1034
737
        block_len = int(block_len)
1035
738
        self.assertEqual('groupcompress-block', storage_kind)
1036
 
        self.assertEqual(34, z_header_len)
1037
 
        self.assertEqual(26, header_len)
 
739
        self.assertEqual(33, z_header_len)
 
740
        self.assertEqual(25, header_len)
1038
741
        self.assertEqual(len(block_bytes), block_len)
1039
742
        z_header = rest[:z_header_len]
1040
743
        header = zlib.decompress(z_header)
1074
777
        self.assertEqual([('key1',), ('key4',)], result_order)
1075
778
 
1076
779
    def test__check_rebuild_no_changes(self):
1077
 
        block, manager = self.make_block_and_full_manager(self._texts)
 
780
        locations, block = self.make_block(self._texts)
 
781
        manager = groupcompress._LazyGroupContentManager(block)
 
782
        # Request all the keys, which ensures that we won't rebuild
 
783
        self.add_key_to_manager(('key1',), locations, block, manager)
 
784
        self.add_key_to_manager(('key2',), locations, block, manager)
 
785
        self.add_key_to_manager(('key3',), locations, block, manager)
 
786
        self.add_key_to_manager(('key4',), locations, block, manager)
1078
787
        manager._check_rebuild_block()
1079
788
        self.assertIs(block, manager._block)
1080
789
 
1105
814
            self.assertEqual(('key4',), record.key)
1106
815
            self.assertEqual(self._texts[record.key],
1107
816
                             record.get_bytes_as('fulltext'))
1108
 
 
1109
 
    def test_manager_default_compressor_settings(self):
1110
 
        locations, old_block = self.make_block(self._texts)
1111
 
        manager = groupcompress._LazyGroupContentManager(old_block)
1112
 
        gcvf = groupcompress.GroupCompressVersionedFiles
1113
 
        # It doesn't greedily evaluate _max_bytes_to_index
1114
 
        self.assertIs(None, manager._compressor_settings)
1115
 
        self.assertEqual(gcvf._DEFAULT_COMPRESSOR_SETTINGS,
1116
 
                         manager._get_compressor_settings())
1117
 
 
1118
 
    def test_manager_custom_compressor_settings(self):
1119
 
        locations, old_block = self.make_block(self._texts)
1120
 
        called = []
1121
 
        def compressor_settings():
1122
 
            called.append('called')
1123
 
            return (10,)
1124
 
        manager = groupcompress._LazyGroupContentManager(old_block,
1125
 
            get_compressor_settings=compressor_settings)
1126
 
        gcvf = groupcompress.GroupCompressVersionedFiles
1127
 
        # It doesn't greedily evaluate compressor_settings
1128
 
        self.assertIs(None, manager._compressor_settings)
1129
 
        self.assertEqual((10,), manager._get_compressor_settings())
1130
 
        self.assertEqual((10,), manager._get_compressor_settings())
1131
 
        self.assertEqual((10,), manager._compressor_settings)
1132
 
        # Only called 1 time
1133
 
        self.assertEqual(['called'], called)
1134
 
 
1135
 
    def test__rebuild_handles_compressor_settings(self):
1136
 
        if not isinstance(groupcompress.GroupCompressor,
1137
 
                          groupcompress.PyrexGroupCompressor):
1138
 
            raise tests.TestNotApplicable('pure-python compressor'
1139
 
                ' does not handle compressor_settings')
1140
 
        locations, old_block = self.make_block(self._texts)
1141
 
        manager = groupcompress._LazyGroupContentManager(old_block,
1142
 
            get_compressor_settings=lambda: dict(max_bytes_to_index=32))
1143
 
        gc = manager._make_group_compressor()
1144
 
        self.assertEqual(32, gc._delta_index._max_bytes_to_index)
1145
 
        self.add_key_to_manager(('key3',), locations, old_block, manager)
1146
 
        self.add_key_to_manager(('key4',), locations, old_block, manager)
1147
 
        action, last_byte, total_bytes = manager._check_rebuild_action()
1148
 
        self.assertEqual('rebuild', action)
1149
 
        manager._rebuild_block()
1150
 
        new_block = manager._block
1151
 
        self.assertIsNot(old_block, new_block)
1152
 
        # Because of the new max_bytes_to_index, we do a poor job of
1153
 
        # rebuilding. This is a side-effect of the change, but at least it does
1154
 
        # show the setting had an effect.
1155
 
        self.assertTrue(old_block._content_length < new_block._content_length)
1156
 
 
1157
 
    def test_check_is_well_utilized_all_keys(self):
1158
 
        block, manager = self.make_block_and_full_manager(self._texts)
1159
 
        self.assertFalse(manager.check_is_well_utilized())
1160
 
        # Though we can fake it by changing the recommended minimum size
1161
 
        manager._full_enough_block_size = block._content_length
1162
 
        self.assertTrue(manager.check_is_well_utilized())
1163
 
        # Setting it just above causes it to fail
1164
 
        manager._full_enough_block_size = block._content_length + 1
1165
 
        self.assertFalse(manager.check_is_well_utilized())
1166
 
        # Setting the mixed-block size doesn't do anything, because the content
1167
 
        # is considered to not be 'mixed'
1168
 
        manager._full_enough_mixed_block_size = block._content_length
1169
 
        self.assertFalse(manager.check_is_well_utilized())
1170
 
 
1171
 
    def test_check_is_well_utilized_mixed_keys(self):
1172
 
        texts = {}
1173
 
        f1k1 = ('f1', 'k1')
1174
 
        f1k2 = ('f1', 'k2')
1175
 
        f2k1 = ('f2', 'k1')
1176
 
        f2k2 = ('f2', 'k2')
1177
 
        texts[f1k1] = self._texts[('key1',)]
1178
 
        texts[f1k2] = self._texts[('key2',)]
1179
 
        texts[f2k1] = self._texts[('key3',)]
1180
 
        texts[f2k2] = self._texts[('key4',)]
1181
 
        block, manager = self.make_block_and_full_manager(texts)
1182
 
        self.assertFalse(manager.check_is_well_utilized())
1183
 
        manager._full_enough_block_size = block._content_length
1184
 
        self.assertTrue(manager.check_is_well_utilized())
1185
 
        manager._full_enough_block_size = block._content_length + 1
1186
 
        self.assertFalse(manager.check_is_well_utilized())
1187
 
        manager._full_enough_mixed_block_size = block._content_length
1188
 
        self.assertTrue(manager.check_is_well_utilized())
1189
 
 
1190
 
    def test_check_is_well_utilized_partial_use(self):
1191
 
        locations, block = self.make_block(self._texts)
1192
 
        manager = groupcompress._LazyGroupContentManager(block)
1193
 
        manager._full_enough_block_size = block._content_length
1194
 
        self.add_key_to_manager(('key1',), locations, block, manager)
1195
 
        self.add_key_to_manager(('key2',), locations, block, manager)
1196
 
        # Just using the content from key1 and 2 is not enough to be considered
1197
 
        # 'complete'
1198
 
        self.assertFalse(manager.check_is_well_utilized())
1199
 
        # However if we add key3, then we have enough, as we only require 75%
1200
 
        # consumption
1201
 
        self.add_key_to_manager(('key4',), locations, block, manager)
1202
 
        self.assertTrue(manager.check_is_well_utilized())
1203
 
 
1204
 
 
1205
 
class Test_GCBuildDetails(tests.TestCase):
1206
 
 
1207
 
    def test_acts_like_tuple(self):
1208
 
        # _GCBuildDetails inlines some of the data that used to be spread out
1209
 
        # across a bunch of tuples
1210
 
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1211
 
            ('INDEX', 10, 20, 0, 5))
1212
 
        self.assertEqual(4, len(bd))
1213
 
        self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
1214
 
        self.assertEqual(None, bd[1]) # Compression Parent is always None
1215
 
        self.assertEqual((('parent1',), ('parent2',)), bd[2])
1216
 
        self.assertEqual(('group', None), bd[3]) # Record details
1217
 
 
1218
 
    def test__repr__(self):
1219
 
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1220
 
            ('INDEX', 10, 20, 0, 5))
1221
 
        self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
1222
 
                         " (('parent1',), ('parent2',)))",
1223
 
                         repr(bd))
1224