~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

  • Committer: Canonical.com Patch Queue Manager
  • Date: 2010-09-01 08:02:42 UTC
  • mfrom: (5390.3.3 faster-revert-593560)
  • Revision ID: pqm@pqm.ubuntu.com-20100901080242-esg62ody4frwmy66
(spiv) Avoid repeatedly calling self.target.all_file_ids() in
 InterTree.iter_changes. (Andrew Bennetts)

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2008, 2009 Canonical Ltd
 
1
# Copyright (C) 2008, 2009, 2010 Canonical Ltd
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
19
19
import zlib
20
20
 
21
21
from bzrlib import (
 
22
    btree_index,
22
23
    groupcompress,
23
24
    errors,
 
25
    index as _mod_index,
24
26
    osutils,
25
27
    tests,
 
28
    trace,
26
29
    versionedfile,
27
30
    )
28
31
from bzrlib.osutils import sha_string
29
 
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
 
32
from bzrlib.tests.test__groupcompress import compiled_groupcompress_feature
30
33
 
31
34
 
32
35
def load_tests(standard_tests, module, loader):
36
39
    scenarios = [
37
40
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
38
41
        ]
39
 
    if CompiledGroupCompressFeature.available():
 
42
    if compiled_groupcompress_feature.available():
40
43
        scenarios.append(('C',
41
44
            {'compressor': groupcompress.PyrexGroupCompressor}))
42
45
    return tests.multiply_tests(to_adapt, scenarios, result)
72
75
    def test_one_nosha_delta(self):
73
76
        # diff against NUKK
74
77
        compressor = self.compressor()
75
 
        sha1, start_point, end_point, _, _ = compressor.compress(('label',),
 
78
        sha1, start_point, end_point, _ = compressor.compress(('label',),
76
79
            'strange\ncommon\n', None)
77
80
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
78
81
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
83
86
    def test_empty_content(self):
84
87
        compressor = self.compressor()
85
88
        # Adding empty bytes should return the 'null' record
86
 
        sha1, start_point, end_point, kind, _ = compressor.compress(('empty',),
87
 
            '', None)
 
89
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
 
90
                                                                 '', None)
88
91
        self.assertEqual(0, start_point)
89
92
        self.assertEqual(0, end_point)
90
93
        self.assertEqual('fulltext', kind)
94
97
        # Even after adding some content
95
98
        compressor.compress(('content',), 'some\nbytes\n', None)
96
99
        self.assertTrue(compressor.endpoint > 0)
97
 
        sha1, start_point, end_point, kind, _ = compressor.compress(('empty2',),
98
 
            '', None)
 
100
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
 
101
                                                                 '', None)
99
102
        self.assertEqual(0, start_point)
100
103
        self.assertEqual(0, end_point)
101
104
        self.assertEqual('fulltext', kind)
105
108
        # Knit fetching will try to reconstruct texts locally which results in
106
109
        # reading something that is in the compressor stream already.
107
110
        compressor = self.compressor()
108
 
        sha1_1, _, _, _, _ = compressor.compress(('label',),
 
111
        sha1_1, _, _, _ = compressor.compress(('label',),
109
112
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
110
113
        expected_lines = list(compressor.chunks)
111
 
        sha1_2, _, end_point, _, _ = compressor.compress(('newlabel',),
 
114
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
112
115
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
113
116
        # get the first out
114
117
        self.assertEqual(('strange\ncommon long line\n'
119
122
                          'different\n', sha1_2),
120
123
                         compressor.extract(('newlabel',)))
121
124
 
 
125
    def test_pop_last(self):
 
126
        compressor = self.compressor()
 
127
        _, _, _, _ = compressor.compress(('key1',),
 
128
            'some text\nfor the first entry\n', None)
 
129
        expected_lines = list(compressor.chunks)
 
130
        _, _, _, _ = compressor.compress(('key2',),
 
131
            'some text\nfor the second entry\n', None)
 
132
        compressor.pop_last()
 
133
        self.assertEqual(expected_lines, compressor.chunks)
 
134
 
122
135
 
123
136
class TestPyrexGroupCompressor(TestGroupCompressor):
124
137
 
125
 
    _test_needs_features = [CompiledGroupCompressFeature]
 
138
    _test_needs_features = [compiled_groupcompress_feature]
126
139
    compressor = groupcompress.PyrexGroupCompressor
127
140
 
128
141
    def test_stats(self):
146
159
 
147
160
    def test_two_nosha_delta(self):
148
161
        compressor = self.compressor()
149
 
        sha1_1, _, _, _, _ = compressor.compress(('label',),
 
162
        sha1_1, _, _, _ = compressor.compress(('label',),
150
163
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
151
164
        expected_lines = list(compressor.chunks)
152
 
        sha1_2, start_point, end_point, _, _ = compressor.compress(('newlabel',),
 
165
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
153
166
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
154
167
        self.assertEqual(sha_string('common long line\n'
155
168
                                    'that needs a 16 byte match\n'
171
184
        # The first interesting test: make a change that should use lines from
172
185
        # both parents.
173
186
        compressor = self.compressor()
174
 
        sha1_1, _, _, _, _ = compressor.compress(('label',),
 
187
        sha1_1, _, _, _ = compressor.compress(('label',),
175
188
            'strange\ncommon very very long line\nwith some extra text\n', None)
176
 
        sha1_2, _, _, _, _ = compressor.compress(('newlabel',),
 
189
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
177
190
            'different\nmoredifferent\nand then some more\n', None)
178
191
        expected_lines = list(compressor.chunks)
179
 
        sha1_3, start_point, end_point, _, _ = compressor.compress(('label3',),
 
192
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
180
193
            'new\ncommon very very long line\nwith some extra text\n'
181
194
            'different\nmoredifferent\nand then some more\n',
182
195
            None)
225
238
 
226
239
    def test_two_nosha_delta(self):
227
240
        compressor = self.compressor()
228
 
        sha1_1, _, _, _, _ = compressor.compress(('label',),
 
241
        sha1_1, _, _, _ = compressor.compress(('label',),
229
242
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
230
243
        expected_lines = list(compressor.chunks)
231
 
        sha1_2, start_point, end_point, _, _ = compressor.compress(('newlabel',),
 
244
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
232
245
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
233
246
        self.assertEqual(sha_string('common long line\n'
234
247
                                    'that needs a 16 byte match\n'
250
263
        # The first interesting test: make a change that should use lines from
251
264
        # both parents.
252
265
        compressor = self.compressor()
253
 
        sha1_1, _, _, _, _ = compressor.compress(('label',),
 
266
        sha1_1, _, _, _ = compressor.compress(('label',),
254
267
            'strange\ncommon very very long line\nwith some extra text\n', None)
255
 
        sha1_2, _, _, _, _ = compressor.compress(('newlabel',),
 
268
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
256
269
            'different\nmoredifferent\nand then some more\n', None)
257
270
        expected_lines = list(compressor.chunks)
258
 
        sha1_3, start_point, end_point, _, _ = compressor.compress(('label3',),
 
271
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
259
272
            'new\ncommon very very long line\nwith some extra text\n'
260
273
            'different\nmoredifferent\nand then some more\n',
261
274
            None)
310
323
        self.assertEqual('', block._z_content)
311
324
        block._ensure_content() # Ensure content is safe to call 2x
312
325
 
 
326
    def test_from_invalid(self):
 
327
        self.assertRaises(ValueError,
 
328
                          groupcompress.GroupCompressBlock.from_bytes,
 
329
                          'this is not a valid header')
 
330
 
313
331
    def test_from_bytes(self):
314
332
        content = ('a tiny bit of content\n')
315
333
        z_content = zlib.compress(content)
346
364
        raw_bytes = zlib.decompress(remaining_bytes)
347
365
        self.assertEqual(content, raw_bytes)
348
366
 
 
367
        # we should get the same results if using the chunked version
 
368
        gcb = groupcompress.GroupCompressBlock()
 
369
        gcb.set_chunked_content(['this is some content\n'
 
370
                                 'this content will be compressed\n'],
 
371
                                 len(content))
 
372
        old_bytes = bytes
 
373
        bytes = gcb.to_bytes()
 
374
        self.assertEqual(old_bytes, bytes)
 
375
 
349
376
    def test_partial_decomp(self):
350
377
        content_chunks = []
351
378
        # We need a sufficient amount of data so that zlib.decompress has
391
418
        # And the decompressor is finalized
392
419
        self.assertIs(None, block._z_content_decompressor)
393
420
 
394
 
    def test_partial_decomp_no_known_length(self):
 
421
    def test__ensure_all_content(self):
395
422
        content_chunks = []
 
423
        # We need a sufficient amount of data so that zlib.decompress has
 
424
        # partial decompression to work with. Most auto-generated data
 
425
        # compresses a bit too well, we want a combination, so we combine a sha
 
426
        # hash with compressible data.
396
427
        for i in xrange(2048):
397
428
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
398
429
            content_chunks.append(next_content)
406
437
        block._z_content = z_content
407
438
        block._z_content_length = len(z_content)
408
439
        block._compressor_name = 'zlib'
409
 
        block._content_length = None # Don't tell the decompressed length
 
440
        block._content_length = 158634
410
441
        self.assertIs(None, block._content)
411
 
        block._ensure_content(100)
412
 
        self.assertIsNot(None, block._content)
413
 
        # We have decompressed at least 100 bytes
414
 
        self.assertTrue(len(block._content) >= 100)
415
 
        # We have not decompressed the whole content
416
 
        self.assertTrue(len(block._content) < 158634)
417
 
        self.assertEqualDiff(content[:len(block._content)], block._content)
418
 
        # ensuring content that we already have shouldn't cause any more data
419
 
        # to be extracted
420
 
        cur_len = len(block._content)
421
 
        block._ensure_content(cur_len - 10)
422
 
        self.assertEqual(cur_len, len(block._content))
423
 
        # Now we want a bit more content
424
 
        cur_len += 10
425
 
        block._ensure_content(cur_len)
426
 
        self.assertTrue(len(block._content) >= cur_len)
427
 
        self.assertTrue(len(block._content) < 158634)
428
 
        self.assertEqualDiff(content[:len(block._content)], block._content)
429
 
        # And now lets finish
430
 
        block._ensure_content()
 
442
        # The first _ensure_content got all of the required data
 
443
        block._ensure_content(158634)
431
444
        self.assertEqualDiff(content, block._content)
432
 
        # And the decompressor is finalized
 
445
        # And we should have released the _z_content_decompressor since it was
 
446
        # fully consumed
433
447
        self.assertIs(None, block._z_content_decompressor)
434
448
 
435
 
 
436
 
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
 
449
    def test__dump(self):
 
450
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
 
451
        key_to_text = {('1',): dup_content + '1 unique\n',
 
452
                       ('2',): dup_content + '2 extra special\n'}
 
453
        locs, block = self.make_block(key_to_text)
 
454
        self.assertEqual([('f', len(key_to_text[('1',)])),
 
455
                          ('d', 21, len(key_to_text[('2',)]),
 
456
                           [('c', 2, len(dup_content)),
 
457
                            ('i', len('2 extra special\n'), '')
 
458
                           ]),
 
459
                         ], block._dump())
 
460
 
 
461
 
 
462
class TestCaseWithGroupCompressVersionedFiles(
 
463
        tests.TestCaseWithMemoryTransport):
437
464
 
438
465
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
439
 
                     dir='.'):
 
466
                     dir='.', inconsistency_fatal=True):
440
467
        t = self.get_transport(dir)
441
468
        t.ensure_base()
442
469
        vf = groupcompress.make_pack_factory(graph=create_graph,
443
 
            delta=False, keylength=keylength)(t)
 
470
            delta=False, keylength=keylength,
 
471
            inconsistency_fatal=inconsistency_fatal)(t)
444
472
        if do_cleanup:
445
473
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
446
474
        return vf
448
476
 
449
477
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
450
478
 
 
479
    def make_g_index(self, name, ref_lists=0, nodes=[]):
 
480
        builder = btree_index.BTreeBuilder(ref_lists)
 
481
        for node, references, value in nodes:
 
482
            builder.add_node(node, references, value)
 
483
        stream = builder.finish()
 
484
        trans = self.get_transport()
 
485
        size = trans.put_file(name, stream)
 
486
        return btree_index.BTreeGraphIndex(trans, name, size)
 
487
 
 
488
    def make_g_index_missing_parent(self):
 
489
        graph_index = self.make_g_index('missing_parent', 1,
 
490
            [(('parent', ), '2 78 2 10', ([],)),
 
491
             (('tip', ), '2 78 2 10',
 
492
              ([('parent', ), ('missing-parent', )],)),
 
493
              ])
 
494
        return graph_index
 
495
 
451
496
    def test_get_record_stream_as_requested(self):
452
497
        # Consider promoting 'as-requested' to general availability, and
453
498
        # make this a VF interface test
481
526
                    'as-requested', False)]
482
527
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
483
528
 
484
 
    def test_insert_record_stream_re_uses_blocks(self):
 
529
    def test_insert_record_stream_reuses_blocks(self):
485
530
        vf = self.make_test_vf(True, dir='source')
486
531
        def grouped_stream(revision_ids, first_parents=()):
487
532
            parents = first_parents
525
570
        vf2 = self.make_test_vf(True, dir='target')
526
571
        # ordering in 'groupcompress' order, should actually swap the groups in
527
572
        # the target vf, but the groups themselves should not be disturbed.
528
 
        vf2.insert_record_stream(vf.get_record_stream(
529
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
 
573
        def small_size_stream():
 
574
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
 
575
                                               'groupcompress', False):
 
576
                record._manager._full_enough_block_size = \
 
577
                    record._manager._block._content_length
 
578
                yield record
 
579
                        
 
580
        vf2.insert_record_stream(small_size_stream())
530
581
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
531
582
                                       'groupcompress', False)
532
583
        vf2.writer.end()
537
588
                             record._manager._block._z_content)
538
589
        self.assertEqual(8, num_records)
539
590
 
 
591
    def test_insert_record_stream_packs_on_the_fly(self):
 
592
        vf = self.make_test_vf(True, dir='source')
 
593
        def grouped_stream(revision_ids, first_parents=()):
 
594
            parents = first_parents
 
595
            for revision_id in revision_ids:
 
596
                key = (revision_id,)
 
597
                record = versionedfile.FulltextContentFactory(
 
598
                    key, parents, None,
 
599
                    'some content that is\n'
 
600
                    'identical except for\n'
 
601
                    'revision_id:%s\n' % (revision_id,))
 
602
                yield record
 
603
                parents = (key,)
 
604
        # One group, a-d
 
605
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
606
        # Second group, e-h
 
607
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
608
                                               first_parents=(('d',),)))
 
609
        # Now copy the blocks into another vf, and see that the
 
610
        # insert_record_stream rebuilt a new block on-the-fly because of
 
611
        # under-utilization
 
612
        vf2 = self.make_test_vf(True, dir='target')
 
613
        vf2.insert_record_stream(vf.get_record_stream(
 
614
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
 
615
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
616
                                       'groupcompress', False)
 
617
        vf2.writer.end()
 
618
        num_records = 0
 
619
        # All of the records should be recombined into a single block
 
620
        block = None
 
621
        for record in stream:
 
622
            num_records += 1
 
623
            if block is None:
 
624
                block = record._manager._block
 
625
            else:
 
626
                self.assertIs(block, record._manager._block)
 
627
        self.assertEqual(8, num_records)
 
628
 
540
629
    def test__insert_record_stream_no_reuse_block(self):
541
630
        vf = self.make_test_vf(True, dir='source')
542
631
        def grouped_stream(revision_ids, first_parents=()):
579
668
            else:
580
669
                self.assertIs(block, record._manager._block)
581
670
 
 
671
    def test_add_missing_noncompression_parent_unvalidated_index(self):
 
672
        unvalidated = self.make_g_index_missing_parent()
 
673
        combined = _mod_index.CombinedGraphIndex([unvalidated])
 
674
        index = groupcompress._GCGraphIndex(combined,
 
675
            is_locked=lambda: True, parents=True,
 
676
            track_external_parent_refs=True)
 
677
        index.scan_unvalidated_index(unvalidated)
 
678
        self.assertEqual(
 
679
            frozenset([('missing-parent',)]), index.get_missing_parents())
 
680
 
 
681
    def test_track_external_parent_refs(self):
 
682
        g_index = self.make_g_index('empty', 1, [])
 
683
        mod_index = btree_index.BTreeBuilder(1, 1)
 
684
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
 
685
        index = groupcompress._GCGraphIndex(combined,
 
686
            is_locked=lambda: True, parents=True,
 
687
            add_callback=mod_index.add_nodes,
 
688
            track_external_parent_refs=True)
 
689
        index.add_records([
 
690
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
 
691
        self.assertEqual(
 
692
            frozenset([('parent-1',), ('parent-2',)]),
 
693
            index.get_missing_parents())
 
694
 
 
695
    def make_source_with_b(self, a_parent, path):
 
696
        source = self.make_test_vf(True, dir=path)
 
697
        source.add_lines(('a',), (), ['lines\n'])
 
698
        if a_parent:
 
699
            b_parents = (('a',),)
 
700
        else:
 
701
            b_parents = ()
 
702
        source.add_lines(('b',), b_parents, ['lines\n'])
 
703
        return source
 
704
 
 
705
    def do_inconsistent_inserts(self, inconsistency_fatal):
 
706
        target = self.make_test_vf(True, dir='target',
 
707
                                   inconsistency_fatal=inconsistency_fatal)
 
708
        for x in range(2):
 
709
            source = self.make_source_with_b(x==1, 'source%s' % x)
 
710
            target.insert_record_stream(source.get_record_stream(
 
711
                [('b',)], 'unordered', False))
 
712
 
 
713
    def test_inconsistent_redundant_inserts_warn(self):
 
714
        """Should not insert a record that is already present."""
 
715
        warnings = []
 
716
        def warning(template, args):
 
717
            warnings.append(template % args)
 
718
        _trace_warning = trace.warning
 
719
        trace.warning = warning
 
720
        try:
 
721
            self.do_inconsistent_inserts(inconsistency_fatal=False)
 
722
        finally:
 
723
            trace.warning = _trace_warning
 
724
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
 
725
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
 
726
                         warnings)
 
727
 
 
728
    def test_inconsistent_redundant_inserts_raises(self):
 
729
        e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
 
730
                              inconsistency_fatal=True)
 
731
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
 
732
                              " in add_records:"
 
733
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
 
734
                              " 0 8', \(\(\('a',\),\),\)\)")
 
735
 
 
736
    def test_clear_cache(self):
 
737
        vf = self.make_source_with_b(True, 'source')
 
738
        vf.writer.end()
 
739
        for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
 
740
                                           True):
 
741
            pass
 
742
        self.assertTrue(len(vf._group_cache) > 0)
 
743
        vf.clear_cache()
 
744
        self.assertEqual(0, len(vf._group_cache))
 
745
 
 
746
 
 
747
 
 
748
class StubGCVF(object):
 
749
    def __init__(self, canned_get_blocks=None):
 
750
        self._group_cache = {}
 
751
        self._canned_get_blocks = canned_get_blocks or []
 
752
    def _get_blocks(self, read_memos):
 
753
        return iter(self._canned_get_blocks)
 
754
    
 
755
 
 
756
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
 
757
    """Simple whitebox unit tests for _BatchingBlockFetcher."""
 
758
    
 
759
    def test_add_key_new_read_memo(self):
 
760
        """Adding a key with an uncached read_memo new to this batch adds that
 
761
        read_memo to the list of memos to fetch.
 
762
        """
 
763
        # locations are: index_memo, ignored, parents, ignored
 
764
        # where index_memo is: (idx, offset, len, factory_start, factory_end)
 
765
        # and (idx, offset, size) is known as the 'read_memo', identifying the
 
766
        # raw bytes needed.
 
767
        read_memo = ('fake index', 100, 50)
 
768
        locations = {
 
769
            ('key',): (read_memo + (None, None), None, None, None)}
 
770
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
 
771
        total_size = batcher.add_key(('key',))
 
772
        self.assertEqual(50, total_size)
 
773
        self.assertEqual([('key',)], batcher.keys)
 
774
        self.assertEqual([read_memo], batcher.memos_to_get)
 
775
 
 
776
    def test_add_key_duplicate_read_memo(self):
 
777
        """read_memos that occur multiple times in a batch will only be fetched
 
778
        once.
 
779
        """
 
780
        read_memo = ('fake index', 100, 50)
 
781
        # Two keys, both sharing the same read memo (but different overall
 
782
        # index_memos).
 
783
        locations = {
 
784
            ('key1',): (read_memo + (0, 1), None, None, None),
 
785
            ('key2',): (read_memo + (1, 2), None, None, None)}
 
786
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
 
787
        total_size = batcher.add_key(('key1',))
 
788
        total_size = batcher.add_key(('key2',))
 
789
        self.assertEqual(50, total_size)
 
790
        self.assertEqual([('key1',), ('key2',)], batcher.keys)
 
791
        self.assertEqual([read_memo], batcher.memos_to_get)
 
792
 
 
793
    def test_add_key_cached_read_memo(self):
 
794
        """Adding a key with a cached read_memo will not cause that read_memo
 
795
        to be added to the list to fetch.
 
796
        """
 
797
        read_memo = ('fake index', 100, 50)
 
798
        gcvf = StubGCVF()
 
799
        gcvf._group_cache[read_memo] = 'fake block'
 
800
        locations = {
 
801
            ('key',): (read_memo + (None, None), None, None, None)}
 
802
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
 
803
        total_size = batcher.add_key(('key',))
 
804
        self.assertEqual(0, total_size)
 
805
        self.assertEqual([('key',)], batcher.keys)
 
806
        self.assertEqual([], batcher.memos_to_get)
 
807
 
 
808
    def test_yield_factories_empty(self):
 
809
        """An empty batch yields no factories."""
 
810
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
 
811
        self.assertEqual([], list(batcher.yield_factories()))
 
812
 
 
813
    def test_yield_factories_calls_get_blocks(self):
 
814
        """Uncached memos are retrieved via get_blocks."""
 
815
        read_memo1 = ('fake index', 100, 50)
 
816
        read_memo2 = ('fake index', 150, 40)
 
817
        gcvf = StubGCVF(
 
818
            canned_get_blocks=[
 
819
                (read_memo1, groupcompress.GroupCompressBlock()),
 
820
                (read_memo2, groupcompress.GroupCompressBlock())])
 
821
        locations = {
 
822
            ('key1',): (read_memo1 + (None, None), None, None, None),
 
823
            ('key2',): (read_memo2 + (None, None), None, None, None)}
 
824
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
 
825
        batcher.add_key(('key1',))
 
826
        batcher.add_key(('key2',))
 
827
        factories = list(batcher.yield_factories(full_flush=True))
 
828
        self.assertLength(2, factories)
 
829
        keys = [f.key for f in factories]
 
830
        kinds = [f.storage_kind for f in factories]
 
831
        self.assertEqual([('key1',), ('key2',)], keys)
 
832
        self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
 
833
 
 
834
    def test_yield_factories_flushing(self):
 
835
        """yield_factories holds back on yielding results from the final block
 
836
        unless passed full_flush=True.
 
837
        """
 
838
        fake_block = groupcompress.GroupCompressBlock()
 
839
        read_memo = ('fake index', 100, 50)
 
840
        gcvf = StubGCVF()
 
841
        gcvf._group_cache[read_memo] = fake_block
 
842
        locations = {
 
843
            ('key',): (read_memo + (None, None), None, None, None)}
 
844
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
 
845
        batcher.add_key(('key',))
 
846
        self.assertEqual([], list(batcher.yield_factories()))
 
847
        factories = list(batcher.yield_factories(full_flush=True))
 
848
        self.assertLength(1, factories)
 
849
        self.assertEqual(('key',), factories[0].key)
 
850
        self.assertEqual('groupcompress-block', factories[0].storage_kind)
 
851
 
582
852
 
583
853
class TestLazyGroupCompress(tests.TestCaseWithTransport):
584
854
 
585
855
    _texts = {
586
856
        ('key1',): "this is a text\n"
587
 
                   "with a reasonable amount of compressible bytes\n",
 
857
                   "with a reasonable amount of compressible bytes\n"
 
858
                   "which can be shared between various other texts\n",
588
859
        ('key2',): "another text\n"
589
 
                   "with a reasonable amount of compressible bytes\n",
 
860
                   "with a reasonable amount of compressible bytes\n"
 
861
                   "which can be shared between various other texts\n",
590
862
        ('key3',): "yet another text which won't be extracted\n"
591
 
                   "with a reasonable amount of compressible bytes\n",
 
863
                   "with a reasonable amount of compressible bytes\n"
 
864
                   "which can be shared between various other texts\n",
592
865
        ('key4',): "this will be extracted\n"
593
866
                   "but references most of its bytes from\n"
594
867
                   "yet another text which won't be extracted\n"
595
 
                   "with a reasonable amount of compressible bytes\n",
 
868
                   "with a reasonable amount of compressible bytes\n"
 
869
                   "which can be shared between various other texts\n",
596
870
    }
597
871
    def make_block(self, key_to_text):
598
872
        """Create a GroupCompressBlock, filling it with the given texts."""
610
884
        start, end = locations[key]
611
885
        manager.add_factory(key, (), start, end)
612
886
 
 
887
    def make_block_and_full_manager(self, texts):
 
888
        locations, block = self.make_block(texts)
 
889
        manager = groupcompress._LazyGroupContentManager(block)
 
890
        for key in sorted(texts):
 
891
            self.add_key_to_manager(key, locations, block, manager)
 
892
        return block, manager
 
893
 
613
894
    def test_get_fulltexts(self):
614
895
        locations, block = self.make_block(self._texts)
615
896
        manager = groupcompress._LazyGroupContentManager(block)
666
947
        header_len = int(header_len)
667
948
        block_len = int(block_len)
668
949
        self.assertEqual('groupcompress-block', storage_kind)
669
 
        self.assertEqual(33, z_header_len)
670
 
        self.assertEqual(25, header_len)
 
950
        self.assertEqual(34, z_header_len)
 
951
        self.assertEqual(26, header_len)
671
952
        self.assertEqual(len(block_bytes), block_len)
672
953
        z_header = rest[:z_header_len]
673
954
        header = zlib.decompress(z_header)
707
988
        self.assertEqual([('key1',), ('key4',)], result_order)
708
989
 
709
990
    def test__check_rebuild_no_changes(self):
710
 
        locations, block = self.make_block(self._texts)
711
 
        manager = groupcompress._LazyGroupContentManager(block)
712
 
        # Request all the keys, which ensures that we won't rebuild
713
 
        self.add_key_to_manager(('key1',), locations, block, manager)
714
 
        self.add_key_to_manager(('key2',), locations, block, manager)
715
 
        self.add_key_to_manager(('key3',), locations, block, manager)
716
 
        self.add_key_to_manager(('key4',), locations, block, manager)
 
991
        block, manager = self.make_block_and_full_manager(self._texts)
717
992
        manager._check_rebuild_block()
718
993
        self.assertIs(block, manager._block)
719
994
 
744
1019
            self.assertEqual(('key4',), record.key)
745
1020
            self.assertEqual(self._texts[record.key],
746
1021
                             record.get_bytes_as('fulltext'))
 
1022
 
 
1023
    def test_check_is_well_utilized_all_keys(self):
 
1024
        block, manager = self.make_block_and_full_manager(self._texts)
 
1025
        self.assertFalse(manager.check_is_well_utilized())
 
1026
        # Though we can fake it by changing the recommended minimum size
 
1027
        manager._full_enough_block_size = block._content_length
 
1028
        self.assertTrue(manager.check_is_well_utilized())
 
1029
        # Setting it just above causes it to fail
 
1030
        manager._full_enough_block_size = block._content_length + 1
 
1031
        self.assertFalse(manager.check_is_well_utilized())
 
1032
        # Setting the mixed-block size doesn't do anything, because the content
 
1033
        # is considered to not be 'mixed'
 
1034
        manager._full_enough_mixed_block_size = block._content_length
 
1035
        self.assertFalse(manager.check_is_well_utilized())
 
1036
 
 
1037
    def test_check_is_well_utilized_mixed_keys(self):
 
1038
        texts = {}
 
1039
        f1k1 = ('f1', 'k1')
 
1040
        f1k2 = ('f1', 'k2')
 
1041
        f2k1 = ('f2', 'k1')
 
1042
        f2k2 = ('f2', 'k2')
 
1043
        texts[f1k1] = self._texts[('key1',)]
 
1044
        texts[f1k2] = self._texts[('key2',)]
 
1045
        texts[f2k1] = self._texts[('key3',)]
 
1046
        texts[f2k2] = self._texts[('key4',)]
 
1047
        block, manager = self.make_block_and_full_manager(texts)
 
1048
        self.assertFalse(manager.check_is_well_utilized())
 
1049
        manager._full_enough_block_size = block._content_length
 
1050
        self.assertTrue(manager.check_is_well_utilized())
 
1051
        manager._full_enough_block_size = block._content_length + 1
 
1052
        self.assertFalse(manager.check_is_well_utilized())
 
1053
        manager._full_enough_mixed_block_size = block._content_length
 
1054
        self.assertTrue(manager.check_is_well_utilized())
 
1055
 
 
1056
    def test_check_is_well_utilized_partial_use(self):
 
1057
        locations, block = self.make_block(self._texts)
 
1058
        manager = groupcompress._LazyGroupContentManager(block)
 
1059
        manager._full_enough_block_size = block._content_length
 
1060
        self.add_key_to_manager(('key1',), locations, block, manager)
 
1061
        self.add_key_to_manager(('key2',), locations, block, manager)
 
1062
        # Just using the content from key1 and 2 is not enough to be considered
 
1063
        # 'complete'
 
1064
        self.assertFalse(manager.check_is_well_utilized())
 
1065
        # However if we add key3, then we have enough, as we only require 75%
 
1066
        # consumption
 
1067
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1068
        self.assertTrue(manager.check_is_well_utilized())
 
1069
 
 
1070
 
 
1071
class Test_GCBuildDetails(tests.TestCase):
 
1072
 
 
1073
    def test_acts_like_tuple(self):
 
1074
        # _GCBuildDetails inlines some of the data that used to be spread out
 
1075
        # across a bunch of tuples
 
1076
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
 
1077
            ('INDEX', 10, 20, 0, 5))
 
1078
        self.assertEqual(4, len(bd))
 
1079
        self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
 
1080
        self.assertEqual(None, bd[1]) # Compression Parent is always None
 
1081
        self.assertEqual((('parent1',), ('parent2',)), bd[2])
 
1082
        self.assertEqual(('group', None), bd[3]) # Record details
 
1083
 
 
1084
    def test__repr__(self):
 
1085
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
 
1086
            ('INDEX', 10, 20, 0, 5))
 
1087
        self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
 
1088
                         " (('parent1',), ('parent2',)))",
 
1089
                         repr(bd))
 
1090