~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

  • Committer: John Arbash Meinel
  • Date: 2009-03-27 22:29:55 UTC
  • mto: (3735.39.2 clean)
  • mto: This revision was merged to the branch mainline in revision 4280.
  • Revision ID: john@arbash-meinel.com-20090327222955-utifmfm888zerixt
Implement apply_delta_to_source which doesn't have to malloc another string.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2008, 2009, 2010 Canonical Ltd
 
1
# Copyright (C) 2008, 2009 Canonical Ltd
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
19
19
import zlib
20
20
 
21
21
from bzrlib import (
22
 
    btree_index,
23
22
    groupcompress,
24
23
    errors,
25
 
    index as _mod_index,
26
24
    osutils,
27
25
    tests,
28
 
    trace,
29
26
    versionedfile,
30
27
    )
31
28
from bzrlib.osutils import sha_string
32
 
from bzrlib.tests.test__groupcompress import compiled_groupcompress_feature
 
29
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
33
30
 
34
31
 
35
32
def load_tests(standard_tests, module, loader):
39
36
    scenarios = [
40
37
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
41
38
        ]
42
 
    if compiled_groupcompress_feature.available():
 
39
    if CompiledGroupCompressFeature.available():
43
40
        scenarios.append(('C',
44
41
            {'compressor': groupcompress.PyrexGroupCompressor}))
45
42
    return tests.multiply_tests(to_adapt, scenarios, result)
75
72
    def test_one_nosha_delta(self):
76
73
        # diff against NUKK
77
74
        compressor = self.compressor()
78
 
        sha1, start_point, end_point, _ = compressor.compress(('label',),
 
75
        sha1, start_point, end_point, _, _ = compressor.compress(('label',),
79
76
            'strange\ncommon\n', None)
80
77
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
81
78
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
86
83
    def test_empty_content(self):
87
84
        compressor = self.compressor()
88
85
        # Adding empty bytes should return the 'null' record
89
 
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
90
 
                                                                 '', None)
 
86
        sha1, start_point, end_point, kind, _ = compressor.compress(('empty',),
 
87
            '', None)
91
88
        self.assertEqual(0, start_point)
92
89
        self.assertEqual(0, end_point)
93
90
        self.assertEqual('fulltext', kind)
97
94
        # Even after adding some content
98
95
        compressor.compress(('content',), 'some\nbytes\n', None)
99
96
        self.assertTrue(compressor.endpoint > 0)
100
 
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
101
 
                                                                 '', None)
 
97
        sha1, start_point, end_point, kind, _ = compressor.compress(('empty2',),
 
98
            '', None)
102
99
        self.assertEqual(0, start_point)
103
100
        self.assertEqual(0, end_point)
104
101
        self.assertEqual('fulltext', kind)
108
105
        # Knit fetching will try to reconstruct texts locally which results in
109
106
        # reading something that is in the compressor stream already.
110
107
        compressor = self.compressor()
111
 
        sha1_1, _, _, _ = compressor.compress(('label',),
 
108
        sha1_1, _, _, _, _ = compressor.compress(('label',),
112
109
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
113
110
        expected_lines = list(compressor.chunks)
114
 
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
 
111
        sha1_2, _, end_point, _, _ = compressor.compress(('newlabel',),
115
112
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
116
113
        # get the first out
117
114
        self.assertEqual(('strange\ncommon long line\n'
122
119
                          'different\n', sha1_2),
123
120
                         compressor.extract(('newlabel',)))
124
121
 
125
 
    def test_pop_last(self):
126
 
        compressor = self.compressor()
127
 
        _, _, _, _ = compressor.compress(('key1',),
128
 
            'some text\nfor the first entry\n', None)
129
 
        expected_lines = list(compressor.chunks)
130
 
        _, _, _, _ = compressor.compress(('key2',),
131
 
            'some text\nfor the second entry\n', None)
132
 
        compressor.pop_last()
133
 
        self.assertEqual(expected_lines, compressor.chunks)
134
 
 
135
122
 
136
123
class TestPyrexGroupCompressor(TestGroupCompressor):
137
124
 
138
 
    _test_needs_features = [compiled_groupcompress_feature]
 
125
    _test_needs_features = [CompiledGroupCompressFeature]
139
126
    compressor = groupcompress.PyrexGroupCompressor
140
127
 
141
128
    def test_stats(self):
159
146
 
160
147
    def test_two_nosha_delta(self):
161
148
        compressor = self.compressor()
162
 
        sha1_1, _, _, _ = compressor.compress(('label',),
 
149
        sha1_1, _, _, _, _ = compressor.compress(('label',),
163
150
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
164
151
        expected_lines = list(compressor.chunks)
165
 
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
 
152
        sha1_2, start_point, end_point, _, _ = compressor.compress(('newlabel',),
166
153
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
167
154
        self.assertEqual(sha_string('common long line\n'
168
155
                                    'that needs a 16 byte match\n'
184
171
        # The first interesting test: make a change that should use lines from
185
172
        # both parents.
186
173
        compressor = self.compressor()
187
 
        sha1_1, _, _, _ = compressor.compress(('label',),
 
174
        sha1_1, _, _, _, _ = compressor.compress(('label',),
188
175
            'strange\ncommon very very long line\nwith some extra text\n', None)
189
 
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
 
176
        sha1_2, _, _, _, _ = compressor.compress(('newlabel',),
190
177
            'different\nmoredifferent\nand then some more\n', None)
191
178
        expected_lines = list(compressor.chunks)
192
 
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
 
179
        sha1_3, start_point, end_point, _, _ = compressor.compress(('label3',),
193
180
            'new\ncommon very very long line\nwith some extra text\n'
194
181
            'different\nmoredifferent\nand then some more\n',
195
182
            None)
238
225
 
239
226
    def test_two_nosha_delta(self):
240
227
        compressor = self.compressor()
241
 
        sha1_1, _, _, _ = compressor.compress(('label',),
 
228
        sha1_1, _, _, _, _ = compressor.compress(('label',),
242
229
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
243
230
        expected_lines = list(compressor.chunks)
244
 
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
 
231
        sha1_2, start_point, end_point, _, _ = compressor.compress(('newlabel',),
245
232
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
246
233
        self.assertEqual(sha_string('common long line\n'
247
234
                                    'that needs a 16 byte match\n'
263
250
        # The first interesting test: make a change that should use lines from
264
251
        # both parents.
265
252
        compressor = self.compressor()
266
 
        sha1_1, _, _, _ = compressor.compress(('label',),
 
253
        sha1_1, _, _, _, _ = compressor.compress(('label',),
267
254
            'strange\ncommon very very long line\nwith some extra text\n', None)
268
 
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
 
255
        sha1_2, _, _, _, _ = compressor.compress(('newlabel',),
269
256
            'different\nmoredifferent\nand then some more\n', None)
270
257
        expected_lines = list(compressor.chunks)
271
 
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
 
258
        sha1_3, start_point, end_point, _, _ = compressor.compress(('label3',),
272
259
            'new\ncommon very very long line\nwith some extra text\n'
273
260
            'different\nmoredifferent\nand then some more\n',
274
261
            None)
323
310
        self.assertEqual('', block._z_content)
324
311
        block._ensure_content() # Ensure content is safe to call 2x
325
312
 
326
 
    def test_from_invalid(self):
327
 
        self.assertRaises(ValueError,
328
 
                          groupcompress.GroupCompressBlock.from_bytes,
329
 
                          'this is not a valid header')
330
 
 
331
313
    def test_from_bytes(self):
332
314
        content = ('a tiny bit of content\n')
333
315
        z_content = zlib.compress(content)
364
346
        raw_bytes = zlib.decompress(remaining_bytes)
365
347
        self.assertEqual(content, raw_bytes)
366
348
 
367
 
        # we should get the same results if using the chunked version
368
 
        gcb = groupcompress.GroupCompressBlock()
369
 
        gcb.set_chunked_content(['this is some content\n'
370
 
                                 'this content will be compressed\n'],
371
 
                                 len(content))
372
 
        old_bytes = bytes
373
 
        bytes = gcb.to_bytes()
374
 
        self.assertEqual(old_bytes, bytes)
375
 
 
376
349
    def test_partial_decomp(self):
377
350
        content_chunks = []
378
351
        # We need a sufficient amount of data so that zlib.decompress has
418
391
        # And the decompressor is finalized
419
392
        self.assertIs(None, block._z_content_decompressor)
420
393
 
421
 
    def test__ensure_all_content(self):
 
394
    def test_partial_decomp_no_known_length(self):
422
395
        content_chunks = []
423
 
        # We need a sufficient amount of data so that zlib.decompress has
424
 
        # partial decompression to work with. Most auto-generated data
425
 
        # compresses a bit too well, we want a combination, so we combine a sha
426
 
        # hash with compressible data.
427
396
        for i in xrange(2048):
428
397
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
429
398
            content_chunks.append(next_content)
437
406
        block._z_content = z_content
438
407
        block._z_content_length = len(z_content)
439
408
        block._compressor_name = 'zlib'
440
 
        block._content_length = 158634
 
409
        block._content_length = None # Don't tell the decompressed length
441
410
        self.assertIs(None, block._content)
442
 
        # The first _ensure_content got all of the required data
443
 
        block._ensure_content(158634)
 
411
        block._ensure_content(100)
 
412
        self.assertIsNot(None, block._content)
 
413
        # We have decompressed at least 100 bytes
 
414
        self.assertTrue(len(block._content) >= 100)
 
415
        # We have not decompressed the whole content
 
416
        self.assertTrue(len(block._content) < 158634)
 
417
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
418
        # ensuring content that we already have shouldn't cause any more data
 
419
        # to be extracted
 
420
        cur_len = len(block._content)
 
421
        block._ensure_content(cur_len - 10)
 
422
        self.assertEqual(cur_len, len(block._content))
 
423
        # Now we want a bit more content
 
424
        cur_len += 10
 
425
        block._ensure_content(cur_len)
 
426
        self.assertTrue(len(block._content) >= cur_len)
 
427
        self.assertTrue(len(block._content) < 158634)
 
428
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
429
        # And now lets finish
 
430
        block._ensure_content()
444
431
        self.assertEqualDiff(content, block._content)
445
 
        # And we should have released the _z_content_decompressor since it was
446
 
        # fully consumed
 
432
        # And the decompressor is finalized
447
433
        self.assertIs(None, block._z_content_decompressor)
448
434
 
449
 
    def test__dump(self):
450
 
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
451
 
        key_to_text = {('1',): dup_content + '1 unique\n',
452
 
                       ('2',): dup_content + '2 extra special\n'}
453
 
        locs, block = self.make_block(key_to_text)
454
 
        self.assertEqual([('f', len(key_to_text[('1',)])),
455
 
                          ('d', 21, len(key_to_text[('2',)]),
456
 
                           [('c', 2, len(dup_content)),
457
 
                            ('i', len('2 extra special\n'), '')
458
 
                           ]),
459
 
                         ], block._dump())
460
 
 
461
 
 
462
 
class TestCaseWithGroupCompressVersionedFiles(
463
 
        tests.TestCaseWithMemoryTransport):
 
435
 
 
436
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
464
437
 
465
438
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
466
 
                     dir='.', inconsistency_fatal=True):
 
439
                     dir='.'):
467
440
        t = self.get_transport(dir)
468
441
        t.ensure_base()
469
442
        vf = groupcompress.make_pack_factory(graph=create_graph,
470
 
            delta=False, keylength=keylength,
471
 
            inconsistency_fatal=inconsistency_fatal)(t)
 
443
            delta=False, keylength=keylength)(t)
472
444
        if do_cleanup:
473
445
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
474
446
        return vf
476
448
 
477
449
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
478
450
 
479
 
    def make_g_index(self, name, ref_lists=0, nodes=[]):
480
 
        builder = btree_index.BTreeBuilder(ref_lists)
481
 
        for node, references, value in nodes:
482
 
            builder.add_node(node, references, value)
483
 
        stream = builder.finish()
484
 
        trans = self.get_transport()
485
 
        size = trans.put_file(name, stream)
486
 
        return btree_index.BTreeGraphIndex(trans, name, size)
487
 
 
488
 
    def make_g_index_missing_parent(self):
489
 
        graph_index = self.make_g_index('missing_parent', 1,
490
 
            [(('parent', ), '2 78 2 10', ([],)),
491
 
             (('tip', ), '2 78 2 10',
492
 
              ([('parent', ), ('missing-parent', )],)),
493
 
              ])
494
 
        return graph_index
495
 
 
496
451
    def test_get_record_stream_as_requested(self):
497
452
        # Consider promoting 'as-requested' to general availability, and
498
453
        # make this a VF interface test
526
481
                    'as-requested', False)]
527
482
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
528
483
 
529
 
    def test_insert_record_stream_reuses_blocks(self):
 
484
    def test_insert_record_stream_re_uses_blocks(self):
530
485
        vf = self.make_test_vf(True, dir='source')
531
486
        def grouped_stream(revision_ids, first_parents=()):
532
487
            parents = first_parents
570
525
        vf2 = self.make_test_vf(True, dir='target')
571
526
        # ordering in 'groupcompress' order, should actually swap the groups in
572
527
        # the target vf, but the groups themselves should not be disturbed.
573
 
        def small_size_stream():
574
 
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
575
 
                                               'groupcompress', False):
576
 
                record._manager._full_enough_block_size = \
577
 
                    record._manager._block._content_length
578
 
                yield record
579
 
                        
580
 
        vf2.insert_record_stream(small_size_stream())
 
528
        vf2.insert_record_stream(vf.get_record_stream(
 
529
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
581
530
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
582
531
                                       'groupcompress', False)
583
532
        vf2.writer.end()
588
537
                             record._manager._block._z_content)
589
538
        self.assertEqual(8, num_records)
590
539
 
591
 
    def test_insert_record_stream_packs_on_the_fly(self):
592
 
        vf = self.make_test_vf(True, dir='source')
593
 
        def grouped_stream(revision_ids, first_parents=()):
594
 
            parents = first_parents
595
 
            for revision_id in revision_ids:
596
 
                key = (revision_id,)
597
 
                record = versionedfile.FulltextContentFactory(
598
 
                    key, parents, None,
599
 
                    'some content that is\n'
600
 
                    'identical except for\n'
601
 
                    'revision_id:%s\n' % (revision_id,))
602
 
                yield record
603
 
                parents = (key,)
604
 
        # One group, a-d
605
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
606
 
        # Second group, e-h
607
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
608
 
                                               first_parents=(('d',),)))
609
 
        # Now copy the blocks into another vf, and see that the
610
 
        # insert_record_stream rebuilt a new block on-the-fly because of
611
 
        # under-utilization
612
 
        vf2 = self.make_test_vf(True, dir='target')
613
 
        vf2.insert_record_stream(vf.get_record_stream(
614
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
615
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
616
 
                                       'groupcompress', False)
617
 
        vf2.writer.end()
618
 
        num_records = 0
619
 
        # All of the records should be recombined into a single block
620
 
        block = None
621
 
        for record in stream:
622
 
            num_records += 1
623
 
            if block is None:
624
 
                block = record._manager._block
625
 
            else:
626
 
                self.assertIs(block, record._manager._block)
627
 
        self.assertEqual(8, num_records)
628
 
 
629
540
    def test__insert_record_stream_no_reuse_block(self):
630
541
        vf = self.make_test_vf(True, dir='source')
631
542
        def grouped_stream(revision_ids, first_parents=()):
668
579
            else:
669
580
                self.assertIs(block, record._manager._block)
670
581
 
671
 
    def test_add_missing_noncompression_parent_unvalidated_index(self):
672
 
        unvalidated = self.make_g_index_missing_parent()
673
 
        combined = _mod_index.CombinedGraphIndex([unvalidated])
674
 
        index = groupcompress._GCGraphIndex(combined,
675
 
            is_locked=lambda: True, parents=True,
676
 
            track_external_parent_refs=True)
677
 
        index.scan_unvalidated_index(unvalidated)
678
 
        self.assertEqual(
679
 
            frozenset([('missing-parent',)]), index.get_missing_parents())
680
 
 
681
 
    def test_track_external_parent_refs(self):
682
 
        g_index = self.make_g_index('empty', 1, [])
683
 
        mod_index = btree_index.BTreeBuilder(1, 1)
684
 
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
685
 
        index = groupcompress._GCGraphIndex(combined,
686
 
            is_locked=lambda: True, parents=True,
687
 
            add_callback=mod_index.add_nodes,
688
 
            track_external_parent_refs=True)
689
 
        index.add_records([
690
 
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
691
 
        self.assertEqual(
692
 
            frozenset([('parent-1',), ('parent-2',)]),
693
 
            index.get_missing_parents())
694
 
 
695
 
    def make_source_with_b(self, a_parent, path):
696
 
        source = self.make_test_vf(True, dir=path)
697
 
        source.add_lines(('a',), (), ['lines\n'])
698
 
        if a_parent:
699
 
            b_parents = (('a',),)
700
 
        else:
701
 
            b_parents = ()
702
 
        source.add_lines(('b',), b_parents, ['lines\n'])
703
 
        return source
704
 
 
705
 
    def do_inconsistent_inserts(self, inconsistency_fatal):
706
 
        target = self.make_test_vf(True, dir='target',
707
 
                                   inconsistency_fatal=inconsistency_fatal)
708
 
        for x in range(2):
709
 
            source = self.make_source_with_b(x==1, 'source%s' % x)
710
 
            target.insert_record_stream(source.get_record_stream(
711
 
                [('b',)], 'unordered', False))
712
 
 
713
 
    def test_inconsistent_redundant_inserts_warn(self):
714
 
        """Should not insert a record that is already present."""
715
 
        warnings = []
716
 
        def warning(template, args):
717
 
            warnings.append(template % args)
718
 
        _trace_warning = trace.warning
719
 
        trace.warning = warning
720
 
        try:
721
 
            self.do_inconsistent_inserts(inconsistency_fatal=False)
722
 
        finally:
723
 
            trace.warning = _trace_warning
724
 
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
725
 
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
726
 
                         warnings)
727
 
 
728
 
    def test_inconsistent_redundant_inserts_raises(self):
729
 
        e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
730
 
                              inconsistency_fatal=True)
731
 
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
732
 
                              " in add_records:"
733
 
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
734
 
                              " 0 8', \(\(\('a',\),\),\)\)")
735
 
 
736
 
    def test_clear_cache(self):
737
 
        vf = self.make_source_with_b(True, 'source')
738
 
        vf.writer.end()
739
 
        for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
740
 
                                           True):
741
 
            pass
742
 
        self.assertTrue(len(vf._group_cache) > 0)
743
 
        vf.clear_cache()
744
 
        self.assertEqual(0, len(vf._group_cache))
745
 
 
746
 
 
747
 
 
748
 
class StubGCVF(object):
749
 
    def __init__(self, canned_get_blocks=None):
750
 
        self._group_cache = {}
751
 
        self._canned_get_blocks = canned_get_blocks or []
752
 
    def _get_blocks(self, read_memos):
753
 
        return iter(self._canned_get_blocks)
754
 
    
755
 
 
756
 
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
757
 
    """Simple whitebox unit tests for _BatchingBlockFetcher."""
758
 
    
759
 
    def test_add_key_new_read_memo(self):
760
 
        """Adding a key with an uncached read_memo new to this batch adds that
761
 
        read_memo to the list of memos to fetch.
762
 
        """
763
 
        # locations are: index_memo, ignored, parents, ignored
764
 
        # where index_memo is: (idx, offset, len, factory_start, factory_end)
765
 
        # and (idx, offset, size) is known as the 'read_memo', identifying the
766
 
        # raw bytes needed.
767
 
        read_memo = ('fake index', 100, 50)
768
 
        locations = {
769
 
            ('key',): (read_memo + (None, None), None, None, None)}
770
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
771
 
        total_size = batcher.add_key(('key',))
772
 
        self.assertEqual(50, total_size)
773
 
        self.assertEqual([('key',)], batcher.keys)
774
 
        self.assertEqual([read_memo], batcher.memos_to_get)
775
 
 
776
 
    def test_add_key_duplicate_read_memo(self):
777
 
        """read_memos that occur multiple times in a batch will only be fetched
778
 
        once.
779
 
        """
780
 
        read_memo = ('fake index', 100, 50)
781
 
        # Two keys, both sharing the same read memo (but different overall
782
 
        # index_memos).
783
 
        locations = {
784
 
            ('key1',): (read_memo + (0, 1), None, None, None),
785
 
            ('key2',): (read_memo + (1, 2), None, None, None)}
786
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
787
 
        total_size = batcher.add_key(('key1',))
788
 
        total_size = batcher.add_key(('key2',))
789
 
        self.assertEqual(50, total_size)
790
 
        self.assertEqual([('key1',), ('key2',)], batcher.keys)
791
 
        self.assertEqual([read_memo], batcher.memos_to_get)
792
 
 
793
 
    def test_add_key_cached_read_memo(self):
794
 
        """Adding a key with a cached read_memo will not cause that read_memo
795
 
        to be added to the list to fetch.
796
 
        """
797
 
        read_memo = ('fake index', 100, 50)
798
 
        gcvf = StubGCVF()
799
 
        gcvf._group_cache[read_memo] = 'fake block'
800
 
        locations = {
801
 
            ('key',): (read_memo + (None, None), None, None, None)}
802
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
803
 
        total_size = batcher.add_key(('key',))
804
 
        self.assertEqual(0, total_size)
805
 
        self.assertEqual([('key',)], batcher.keys)
806
 
        self.assertEqual([], batcher.memos_to_get)
807
 
 
808
 
    def test_yield_factories_empty(self):
809
 
        """An empty batch yields no factories."""
810
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
811
 
        self.assertEqual([], list(batcher.yield_factories()))
812
 
 
813
 
    def test_yield_factories_calls_get_blocks(self):
814
 
        """Uncached memos are retrieved via get_blocks."""
815
 
        read_memo1 = ('fake index', 100, 50)
816
 
        read_memo2 = ('fake index', 150, 40)
817
 
        gcvf = StubGCVF(
818
 
            canned_get_blocks=[
819
 
                (read_memo1, groupcompress.GroupCompressBlock()),
820
 
                (read_memo2, groupcompress.GroupCompressBlock())])
821
 
        locations = {
822
 
            ('key1',): (read_memo1 + (None, None), None, None, None),
823
 
            ('key2',): (read_memo2 + (None, None), None, None, None)}
824
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
825
 
        batcher.add_key(('key1',))
826
 
        batcher.add_key(('key2',))
827
 
        factories = list(batcher.yield_factories(full_flush=True))
828
 
        self.assertLength(2, factories)
829
 
        keys = [f.key for f in factories]
830
 
        kinds = [f.storage_kind for f in factories]
831
 
        self.assertEqual([('key1',), ('key2',)], keys)
832
 
        self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
833
 
 
834
 
    def test_yield_factories_flushing(self):
835
 
        """yield_factories holds back on yielding results from the final block
836
 
        unless passed full_flush=True.
837
 
        """
838
 
        fake_block = groupcompress.GroupCompressBlock()
839
 
        read_memo = ('fake index', 100, 50)
840
 
        gcvf = StubGCVF()
841
 
        gcvf._group_cache[read_memo] = fake_block
842
 
        locations = {
843
 
            ('key',): (read_memo + (None, None), None, None, None)}
844
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
845
 
        batcher.add_key(('key',))
846
 
        self.assertEqual([], list(batcher.yield_factories()))
847
 
        factories = list(batcher.yield_factories(full_flush=True))
848
 
        self.assertLength(1, factories)
849
 
        self.assertEqual(('key',), factories[0].key)
850
 
        self.assertEqual('groupcompress-block', factories[0].storage_kind)
851
 
 
852
582
 
853
583
class TestLazyGroupCompress(tests.TestCaseWithTransport):
854
584
 
855
585
    _texts = {
856
586
        ('key1',): "this is a text\n"
857
 
                   "with a reasonable amount of compressible bytes\n"
858
 
                   "which can be shared between various other texts\n",
 
587
                   "with a reasonable amount of compressible bytes\n",
859
588
        ('key2',): "another text\n"
860
 
                   "with a reasonable amount of compressible bytes\n"
861
 
                   "which can be shared between various other texts\n",
 
589
                   "with a reasonable amount of compressible bytes\n",
862
590
        ('key3',): "yet another text which won't be extracted\n"
863
 
                   "with a reasonable amount of compressible bytes\n"
864
 
                   "which can be shared between various other texts\n",
 
591
                   "with a reasonable amount of compressible bytes\n",
865
592
        ('key4',): "this will be extracted\n"
866
593
                   "but references most of its bytes from\n"
867
594
                   "yet another text which won't be extracted\n"
868
 
                   "with a reasonable amount of compressible bytes\n"
869
 
                   "which can be shared between various other texts\n",
 
595
                   "with a reasonable amount of compressible bytes\n",
870
596
    }
871
597
    def make_block(self, key_to_text):
872
598
        """Create a GroupCompressBlock, filling it with the given texts."""
884
610
        start, end = locations[key]
885
611
        manager.add_factory(key, (), start, end)
886
612
 
887
 
    def make_block_and_full_manager(self, texts):
888
 
        locations, block = self.make_block(texts)
889
 
        manager = groupcompress._LazyGroupContentManager(block)
890
 
        for key in sorted(texts):
891
 
            self.add_key_to_manager(key, locations, block, manager)
892
 
        return block, manager
893
 
 
894
613
    def test_get_fulltexts(self):
895
614
        locations, block = self.make_block(self._texts)
896
615
        manager = groupcompress._LazyGroupContentManager(block)
947
666
        header_len = int(header_len)
948
667
        block_len = int(block_len)
949
668
        self.assertEqual('groupcompress-block', storage_kind)
950
 
        self.assertEqual(34, z_header_len)
951
 
        self.assertEqual(26, header_len)
 
669
        self.assertEqual(33, z_header_len)
 
670
        self.assertEqual(25, header_len)
952
671
        self.assertEqual(len(block_bytes), block_len)
953
672
        z_header = rest[:z_header_len]
954
673
        header = zlib.decompress(z_header)
988
707
        self.assertEqual([('key1',), ('key4',)], result_order)
989
708
 
990
709
    def test__check_rebuild_no_changes(self):
991
 
        block, manager = self.make_block_and_full_manager(self._texts)
 
710
        locations, block = self.make_block(self._texts)
 
711
        manager = groupcompress._LazyGroupContentManager(block)
 
712
        # Request all the keys, which ensures that we won't rebuild
 
713
        self.add_key_to_manager(('key1',), locations, block, manager)
 
714
        self.add_key_to_manager(('key2',), locations, block, manager)
 
715
        self.add_key_to_manager(('key3',), locations, block, manager)
 
716
        self.add_key_to_manager(('key4',), locations, block, manager)
992
717
        manager._check_rebuild_block()
993
718
        self.assertIs(block, manager._block)
994
719
 
1019
744
            self.assertEqual(('key4',), record.key)
1020
745
            self.assertEqual(self._texts[record.key],
1021
746
                             record.get_bytes_as('fulltext'))
1022
 
 
1023
 
    def test_check_is_well_utilized_all_keys(self):
1024
 
        block, manager = self.make_block_and_full_manager(self._texts)
1025
 
        self.assertFalse(manager.check_is_well_utilized())
1026
 
        # Though we can fake it by changing the recommended minimum size
1027
 
        manager._full_enough_block_size = block._content_length
1028
 
        self.assertTrue(manager.check_is_well_utilized())
1029
 
        # Setting it just above causes it to fail
1030
 
        manager._full_enough_block_size = block._content_length + 1
1031
 
        self.assertFalse(manager.check_is_well_utilized())
1032
 
        # Setting the mixed-block size doesn't do anything, because the content
1033
 
        # is considered to not be 'mixed'
1034
 
        manager._full_enough_mixed_block_size = block._content_length
1035
 
        self.assertFalse(manager.check_is_well_utilized())
1036
 
 
1037
 
    def test_check_is_well_utilized_mixed_keys(self):
1038
 
        texts = {}
1039
 
        f1k1 = ('f1', 'k1')
1040
 
        f1k2 = ('f1', 'k2')
1041
 
        f2k1 = ('f2', 'k1')
1042
 
        f2k2 = ('f2', 'k2')
1043
 
        texts[f1k1] = self._texts[('key1',)]
1044
 
        texts[f1k2] = self._texts[('key2',)]
1045
 
        texts[f2k1] = self._texts[('key3',)]
1046
 
        texts[f2k2] = self._texts[('key4',)]
1047
 
        block, manager = self.make_block_and_full_manager(texts)
1048
 
        self.assertFalse(manager.check_is_well_utilized())
1049
 
        manager._full_enough_block_size = block._content_length
1050
 
        self.assertTrue(manager.check_is_well_utilized())
1051
 
        manager._full_enough_block_size = block._content_length + 1
1052
 
        self.assertFalse(manager.check_is_well_utilized())
1053
 
        manager._full_enough_mixed_block_size = block._content_length
1054
 
        self.assertTrue(manager.check_is_well_utilized())
1055
 
 
1056
 
    def test_check_is_well_utilized_partial_use(self):
1057
 
        locations, block = self.make_block(self._texts)
1058
 
        manager = groupcompress._LazyGroupContentManager(block)
1059
 
        manager._full_enough_block_size = block._content_length
1060
 
        self.add_key_to_manager(('key1',), locations, block, manager)
1061
 
        self.add_key_to_manager(('key2',), locations, block, manager)
1062
 
        # Just using the content from key1 and 2 is not enough to be considered
1063
 
        # 'complete'
1064
 
        self.assertFalse(manager.check_is_well_utilized())
1065
 
        # However if we add key3, then we have enough, as we only require 75%
1066
 
        # consumption
1067
 
        self.add_key_to_manager(('key4',), locations, block, manager)
1068
 
        self.assertTrue(manager.check_is_well_utilized())