~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

  • Committer: Martin Pool
  • Date: 2005-05-17 06:56:16 UTC
  • Revision ID: mbp@sourcefrog.net-20050517065616-6f23381d6184a8aa
- add space for un-merged patches

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2008, 2009 Canonical Ltd
2
 
#
3
 
# This program is free software; you can redistribute it and/or modify
4
 
# it under the terms of the GNU General Public License as published by
5
 
# the Free Software Foundation; either version 2 of the License, or
6
 
# (at your option) any later version.
7
 
#
8
 
# This program is distributed in the hope that it will be useful,
9
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
 
# GNU General Public License for more details.
12
 
#
13
 
# You should have received a copy of the GNU General Public License
14
 
# along with this program; if not, write to the Free Software
15
 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
 
 
17
 
"""Tests for group compression."""
18
 
 
19
 
import zlib
20
 
 
21
 
from bzrlib import (
22
 
    btree_index,
23
 
    groupcompress,
24
 
    errors,
25
 
    index as _mod_index,
26
 
    osutils,
27
 
    tests,
28
 
    trace,
29
 
    versionedfile,
30
 
    )
31
 
from bzrlib.osutils import sha_string
32
 
from bzrlib.tests.test__groupcompress import compiled_groupcompress_feature
33
 
 
34
 
 
35
 
def load_tests(standard_tests, module, loader):
36
 
    """Parameterize tests for all versions of groupcompress."""
37
 
    to_adapt, result = tests.split_suite_by_condition(
38
 
        standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
39
 
    scenarios = [
40
 
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
41
 
        ]
42
 
    if compiled_groupcompress_feature.available():
43
 
        scenarios.append(('C',
44
 
            {'compressor': groupcompress.PyrexGroupCompressor}))
45
 
    return tests.multiply_tests(to_adapt, scenarios, result)
46
 
 
47
 
 
48
 
class TestGroupCompressor(tests.TestCase):
49
 
 
50
 
    def _chunks_to_repr_lines(self, chunks):
51
 
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
52
 
 
53
 
    def assertEqualDiffEncoded(self, expected, actual):
54
 
        """Compare the actual content to the expected content.
55
 
 
56
 
        :param expected: A group of chunks that we expect to see
57
 
        :param actual: The measured 'chunks'
58
 
 
59
 
        We will transform the chunks back into lines, and then run 'repr()'
60
 
        over them to handle non-ascii characters.
61
 
        """
62
 
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
63
 
                             self._chunks_to_repr_lines(actual))
64
 
 
65
 
 
66
 
class TestAllGroupCompressors(TestGroupCompressor):
67
 
    """Tests for GroupCompressor"""
68
 
 
69
 
    compressor = None # Set by multiply_tests
70
 
 
71
 
    def test_empty_delta(self):
72
 
        compressor = self.compressor()
73
 
        self.assertEqual([], compressor.chunks)
74
 
 
75
 
    def test_one_nosha_delta(self):
76
 
        # diff against NUKK
77
 
        compressor = self.compressor()
78
 
        sha1, start_point, end_point, _ = compressor.compress(('label',),
79
 
            'strange\ncommon\n', None)
80
 
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
81
 
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
82
 
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
83
 
        self.assertEqual(0, start_point)
84
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
85
 
 
86
 
    def test_empty_content(self):
87
 
        compressor = self.compressor()
88
 
        # Adding empty bytes should return the 'null' record
89
 
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
90
 
                                                                 '', None)
91
 
        self.assertEqual(0, start_point)
92
 
        self.assertEqual(0, end_point)
93
 
        self.assertEqual('fulltext', kind)
94
 
        self.assertEqual(groupcompress._null_sha1, sha1)
95
 
        self.assertEqual(0, compressor.endpoint)
96
 
        self.assertEqual([], compressor.chunks)
97
 
        # Even after adding some content
98
 
        compressor.compress(('content',), 'some\nbytes\n', None)
99
 
        self.assertTrue(compressor.endpoint > 0)
100
 
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
101
 
                                                                 '', None)
102
 
        self.assertEqual(0, start_point)
103
 
        self.assertEqual(0, end_point)
104
 
        self.assertEqual('fulltext', kind)
105
 
        self.assertEqual(groupcompress._null_sha1, sha1)
106
 
 
107
 
    def test_extract_from_compressor(self):
108
 
        # Knit fetching will try to reconstruct texts locally which results in
109
 
        # reading something that is in the compressor stream already.
110
 
        compressor = self.compressor()
111
 
        sha1_1, _, _, _ = compressor.compress(('label',),
112
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
113
 
        expected_lines = list(compressor.chunks)
114
 
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
115
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
116
 
        # get the first out
117
 
        self.assertEqual(('strange\ncommon long line\n'
118
 
                          'that needs a 16 byte match\n', sha1_1),
119
 
                         compressor.extract(('label',)))
120
 
        # and the second
121
 
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
122
 
                          'different\n', sha1_2),
123
 
                         compressor.extract(('newlabel',)))
124
 
 
125
 
    def test_pop_last(self):
126
 
        compressor = self.compressor()
127
 
        _, _, _, _ = compressor.compress(('key1',),
128
 
            'some text\nfor the first entry\n', None)
129
 
        expected_lines = list(compressor.chunks)
130
 
        _, _, _, _ = compressor.compress(('key2',),
131
 
            'some text\nfor the second entry\n', None)
132
 
        compressor.pop_last()
133
 
        self.assertEqual(expected_lines, compressor.chunks)
134
 
 
135
 
 
136
 
class TestPyrexGroupCompressor(TestGroupCompressor):
137
 
 
138
 
    _test_needs_features = [compiled_groupcompress_feature]
139
 
    compressor = groupcompress.PyrexGroupCompressor
140
 
 
141
 
    def test_stats(self):
142
 
        compressor = self.compressor()
143
 
        compressor.compress(('label',),
144
 
                            'strange\n'
145
 
                            'common very very long line\n'
146
 
                            'plus more text\n', None)
147
 
        compressor.compress(('newlabel',),
148
 
                            'common very very long line\n'
149
 
                            'plus more text\n'
150
 
                            'different\n'
151
 
                            'moredifferent\n', None)
152
 
        compressor.compress(('label3',),
153
 
                            'new\n'
154
 
                            'common very very long line\n'
155
 
                            'plus more text\n'
156
 
                            'different\n'
157
 
                            'moredifferent\n', None)
158
 
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
159
 
 
160
 
    def test_two_nosha_delta(self):
161
 
        compressor = self.compressor()
162
 
        sha1_1, _, _, _ = compressor.compress(('label',),
163
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
164
 
        expected_lines = list(compressor.chunks)
165
 
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
166
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
167
 
        self.assertEqual(sha_string('common long line\n'
168
 
                                    'that needs a 16 byte match\n'
169
 
                                    'different\n'), sha1_2)
170
 
        expected_lines.extend([
171
 
            # 'delta', delta length
172
 
            'd\x0f',
173
 
            # source and target length
174
 
            '\x36',
175
 
            # copy the line common
176
 
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
177
 
            # add the line different, and the trailing newline
178
 
            '\x0adifferent\n', # insert 10 bytes
179
 
            ])
180
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
181
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
182
 
 
183
 
    def test_three_nosha_delta(self):
184
 
        # The first interesting test: make a change that should use lines from
185
 
        # both parents.
186
 
        compressor = self.compressor()
187
 
        sha1_1, _, _, _ = compressor.compress(('label',),
188
 
            'strange\ncommon very very long line\nwith some extra text\n', None)
189
 
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
190
 
            'different\nmoredifferent\nand then some more\n', None)
191
 
        expected_lines = list(compressor.chunks)
192
 
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
193
 
            'new\ncommon very very long line\nwith some extra text\n'
194
 
            'different\nmoredifferent\nand then some more\n',
195
 
            None)
196
 
        self.assertEqual(
197
 
            sha_string('new\ncommon very very long line\nwith some extra text\n'
198
 
                       'different\nmoredifferent\nand then some more\n'),
199
 
            sha1_3)
200
 
        expected_lines.extend([
201
 
            # 'delta', delta length
202
 
            'd\x0b',
203
 
            # source and target length
204
 
            '\x5f'
205
 
            # insert new
206
 
            '\x03new',
207
 
            # Copy of first parent 'common' range
208
 
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
209
 
            # Copy of second parent 'different' range
210
 
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
211
 
            ])
212
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
213
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
214
 
 
215
 
 
216
 
class TestPythonGroupCompressor(TestGroupCompressor):
217
 
 
218
 
    compressor = groupcompress.PythonGroupCompressor
219
 
 
220
 
    def test_stats(self):
221
 
        compressor = self.compressor()
222
 
        compressor.compress(('label',),
223
 
                            'strange\n'
224
 
                            'common very very long line\n'
225
 
                            'plus more text\n', None)
226
 
        compressor.compress(('newlabel',),
227
 
                            'common very very long line\n'
228
 
                            'plus more text\n'
229
 
                            'different\n'
230
 
                            'moredifferent\n', None)
231
 
        compressor.compress(('label3',),
232
 
                            'new\n'
233
 
                            'common very very long line\n'
234
 
                            'plus more text\n'
235
 
                            'different\n'
236
 
                            'moredifferent\n', None)
237
 
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
238
 
 
239
 
    def test_two_nosha_delta(self):
240
 
        compressor = self.compressor()
241
 
        sha1_1, _, _, _ = compressor.compress(('label',),
242
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
243
 
        expected_lines = list(compressor.chunks)
244
 
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
245
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
246
 
        self.assertEqual(sha_string('common long line\n'
247
 
                                    'that needs a 16 byte match\n'
248
 
                                    'different\n'), sha1_2)
249
 
        expected_lines.extend([
250
 
            # 'delta', delta length
251
 
            'd\x0f',
252
 
            # target length
253
 
            '\x36',
254
 
            # copy the line common
255
 
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
256
 
            # add the line different, and the trailing newline
257
 
            '\x0adifferent\n', # insert 10 bytes
258
 
            ])
259
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
260
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
261
 
 
262
 
    def test_three_nosha_delta(self):
263
 
        # The first interesting test: make a change that should use lines from
264
 
        # both parents.
265
 
        compressor = self.compressor()
266
 
        sha1_1, _, _, _ = compressor.compress(('label',),
267
 
            'strange\ncommon very very long line\nwith some extra text\n', None)
268
 
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
269
 
            'different\nmoredifferent\nand then some more\n', None)
270
 
        expected_lines = list(compressor.chunks)
271
 
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
272
 
            'new\ncommon very very long line\nwith some extra text\n'
273
 
            'different\nmoredifferent\nand then some more\n',
274
 
            None)
275
 
        self.assertEqual(
276
 
            sha_string('new\ncommon very very long line\nwith some extra text\n'
277
 
                       'different\nmoredifferent\nand then some more\n'),
278
 
            sha1_3)
279
 
        expected_lines.extend([
280
 
            # 'delta', delta length
281
 
            'd\x0c',
282
 
            # target length
283
 
            '\x5f'
284
 
            # insert new
285
 
            '\x04new\n',
286
 
            # Copy of first parent 'common' range
287
 
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
288
 
            # Copy of second parent 'different' range
289
 
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
290
 
            ])
291
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
292
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
293
 
 
294
 
 
295
 
class TestGroupCompressBlock(tests.TestCase):
296
 
 
297
 
    def make_block(self, key_to_text):
298
 
        """Create a GroupCompressBlock, filling it with the given texts."""
299
 
        compressor = groupcompress.GroupCompressor()
300
 
        start = 0
301
 
        for key in sorted(key_to_text):
302
 
            compressor.compress(key, key_to_text[key], None)
303
 
        locs = dict((key, (start, end)) for key, (start, _, end, _)
304
 
                    in compressor.labels_deltas.iteritems())
305
 
        block = compressor.flush()
306
 
        raw_bytes = block.to_bytes()
307
 
        # Go through from_bytes(to_bytes()) so that we start with a compressed
308
 
        # content object
309
 
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
310
 
 
311
 
    def test_from_empty_bytes(self):
312
 
        self.assertRaises(ValueError,
313
 
                          groupcompress.GroupCompressBlock.from_bytes, '')
314
 
 
315
 
    def test_from_minimal_bytes(self):
316
 
        block = groupcompress.GroupCompressBlock.from_bytes(
317
 
            'gcb1z\n0\n0\n')
318
 
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
319
 
        self.assertIs(None, block._content)
320
 
        self.assertEqual('', block._z_content)
321
 
        block._ensure_content()
322
 
        self.assertEqual('', block._content)
323
 
        self.assertEqual('', block._z_content)
324
 
        block._ensure_content() # Ensure content is safe to call 2x
325
 
 
326
 
    def test_from_invalid(self):
327
 
        self.assertRaises(ValueError,
328
 
                          groupcompress.GroupCompressBlock.from_bytes,
329
 
                          'this is not a valid header')
330
 
 
331
 
    def test_from_bytes(self):
332
 
        content = ('a tiny bit of content\n')
333
 
        z_content = zlib.compress(content)
334
 
        z_bytes = (
335
 
            'gcb1z\n' # group compress block v1 plain
336
 
            '%d\n' # Length of compressed content
337
 
            '%d\n' # Length of uncompressed content
338
 
            '%s'   # Compressed content
339
 
            ) % (len(z_content), len(content), z_content)
340
 
        block = groupcompress.GroupCompressBlock.from_bytes(
341
 
            z_bytes)
342
 
        self.assertEqual(z_content, block._z_content)
343
 
        self.assertIs(None, block._content)
344
 
        self.assertEqual(len(z_content), block._z_content_length)
345
 
        self.assertEqual(len(content), block._content_length)
346
 
        block._ensure_content()
347
 
        self.assertEqual(z_content, block._z_content)
348
 
        self.assertEqual(content, block._content)
349
 
 
350
 
    def test_to_bytes(self):
351
 
        content = ('this is some content\n'
352
 
                   'this content will be compressed\n')
353
 
        gcb = groupcompress.GroupCompressBlock()
354
 
        gcb.set_content(content)
355
 
        bytes = gcb.to_bytes()
356
 
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
357
 
        self.assertEqual(gcb._content_length, len(content))
358
 
        expected_header =('gcb1z\n' # group compress block v1 zlib
359
 
                          '%d\n' # Length of compressed content
360
 
                          '%d\n' # Length of uncompressed content
361
 
                         ) % (gcb._z_content_length, gcb._content_length)
362
 
        self.assertStartsWith(bytes, expected_header)
363
 
        remaining_bytes = bytes[len(expected_header):]
364
 
        raw_bytes = zlib.decompress(remaining_bytes)
365
 
        self.assertEqual(content, raw_bytes)
366
 
 
367
 
        # we should get the same results if using the chunked version
368
 
        gcb = groupcompress.GroupCompressBlock()
369
 
        gcb.set_chunked_content(['this is some content\n'
370
 
                                 'this content will be compressed\n'],
371
 
                                 len(content))
372
 
        old_bytes = bytes
373
 
        bytes = gcb.to_bytes()
374
 
        self.assertEqual(old_bytes, bytes)
375
 
 
376
 
    def test_partial_decomp(self):
377
 
        content_chunks = []
378
 
        # We need a sufficient amount of data so that zlib.decompress has
379
 
        # partial decompression to work with. Most auto-generated data
380
 
        # compresses a bit too well, we want a combination, so we combine a sha
381
 
        # hash with compressible data.
382
 
        for i in xrange(2048):
383
 
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
384
 
            content_chunks.append(next_content)
385
 
            next_sha1 = osutils.sha_string(next_content)
386
 
            content_chunks.append(next_sha1 + '\n')
387
 
        content = ''.join(content_chunks)
388
 
        self.assertEqual(158634, len(content))
389
 
        z_content = zlib.compress(content)
390
 
        self.assertEqual(57182, len(z_content))
391
 
        block = groupcompress.GroupCompressBlock()
392
 
        block._z_content = z_content
393
 
        block._z_content_length = len(z_content)
394
 
        block._compressor_name = 'zlib'
395
 
        block._content_length = 158634
396
 
        self.assertIs(None, block._content)
397
 
        block._ensure_content(100)
398
 
        self.assertIsNot(None, block._content)
399
 
        # We have decompressed at least 100 bytes
400
 
        self.assertTrue(len(block._content) >= 100)
401
 
        # We have not decompressed the whole content
402
 
        self.assertTrue(len(block._content) < 158634)
403
 
        self.assertEqualDiff(content[:len(block._content)], block._content)
404
 
        # ensuring content that we already have shouldn't cause any more data
405
 
        # to be extracted
406
 
        cur_len = len(block._content)
407
 
        block._ensure_content(cur_len - 10)
408
 
        self.assertEqual(cur_len, len(block._content))
409
 
        # Now we want a bit more content
410
 
        cur_len += 10
411
 
        block._ensure_content(cur_len)
412
 
        self.assertTrue(len(block._content) >= cur_len)
413
 
        self.assertTrue(len(block._content) < 158634)
414
 
        self.assertEqualDiff(content[:len(block._content)], block._content)
415
 
        # And now lets finish
416
 
        block._ensure_content(158634)
417
 
        self.assertEqualDiff(content, block._content)
418
 
        # And the decompressor is finalized
419
 
        self.assertIs(None, block._z_content_decompressor)
420
 
 
421
 
    def test__ensure_all_content(self):
422
 
        content_chunks = []
423
 
        # We need a sufficient amount of data so that zlib.decompress has
424
 
        # partial decompression to work with. Most auto-generated data
425
 
        # compresses a bit too well, we want a combination, so we combine a sha
426
 
        # hash with compressible data.
427
 
        for i in xrange(2048):
428
 
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
429
 
            content_chunks.append(next_content)
430
 
            next_sha1 = osutils.sha_string(next_content)
431
 
            content_chunks.append(next_sha1 + '\n')
432
 
        content = ''.join(content_chunks)
433
 
        self.assertEqual(158634, len(content))
434
 
        z_content = zlib.compress(content)
435
 
        self.assertEqual(57182, len(z_content))
436
 
        block = groupcompress.GroupCompressBlock()
437
 
        block._z_content = z_content
438
 
        block._z_content_length = len(z_content)
439
 
        block._compressor_name = 'zlib'
440
 
        block._content_length = 158634
441
 
        self.assertIs(None, block._content)
442
 
        # The first _ensure_content got all of the required data
443
 
        block._ensure_content(158634)
444
 
        self.assertEqualDiff(content, block._content)
445
 
        # And we should have released the _z_content_decompressor since it was
446
 
        # fully consumed
447
 
        self.assertIs(None, block._z_content_decompressor)
448
 
 
449
 
    def test__dump(self):
450
 
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
451
 
        key_to_text = {('1',): dup_content + '1 unique\n',
452
 
                       ('2',): dup_content + '2 extra special\n'}
453
 
        locs, block = self.make_block(key_to_text)
454
 
        self.assertEqual([('f', len(key_to_text[('1',)])),
455
 
                          ('d', 21, len(key_to_text[('2',)]),
456
 
                           [('c', 2, len(dup_content)),
457
 
                            ('i', len('2 extra special\n'), '')
458
 
                           ]),
459
 
                         ], block._dump())
460
 
 
461
 
 
462
 
class TestCaseWithGroupCompressVersionedFiles(
463
 
        tests.TestCaseWithMemoryTransport):
464
 
 
465
 
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
466
 
                     dir='.', inconsistency_fatal=True):
467
 
        t = self.get_transport(dir)
468
 
        t.ensure_base()
469
 
        vf = groupcompress.make_pack_factory(graph=create_graph,
470
 
            delta=False, keylength=keylength,
471
 
            inconsistency_fatal=inconsistency_fatal)(t)
472
 
        if do_cleanup:
473
 
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
474
 
        return vf
475
 
 
476
 
 
477
 
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
478
 
 
479
 
    def make_g_index(self, name, ref_lists=0, nodes=[]):
480
 
        builder = btree_index.BTreeBuilder(ref_lists)
481
 
        for node, references, value in nodes:
482
 
            builder.add_node(node, references, value)
483
 
        stream = builder.finish()
484
 
        trans = self.get_transport()
485
 
        size = trans.put_file(name, stream)
486
 
        return btree_index.BTreeGraphIndex(trans, name, size)
487
 
 
488
 
    def make_g_index_missing_parent(self):
489
 
        graph_index = self.make_g_index('missing_parent', 1,
490
 
            [(('parent', ), '2 78 2 10', ([],)),
491
 
             (('tip', ), '2 78 2 10',
492
 
              ([('parent', ), ('missing-parent', )],)),
493
 
              ])
494
 
        return graph_index
495
 
 
496
 
    def test_get_record_stream_as_requested(self):
497
 
        # Consider promoting 'as-requested' to general availability, and
498
 
        # make this a VF interface test
499
 
        vf = self.make_test_vf(False, dir='source')
500
 
        vf.add_lines(('a',), (), ['lines\n'])
501
 
        vf.add_lines(('b',), (), ['lines\n'])
502
 
        vf.add_lines(('c',), (), ['lines\n'])
503
 
        vf.add_lines(('d',), (), ['lines\n'])
504
 
        vf.writer.end()
505
 
        keys = [record.key for record in vf.get_record_stream(
506
 
                    [('a',), ('b',), ('c',), ('d',)],
507
 
                    'as-requested', False)]
508
 
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
509
 
        keys = [record.key for record in vf.get_record_stream(
510
 
                    [('b',), ('a',), ('d',), ('c',)],
511
 
                    'as-requested', False)]
512
 
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
513
 
 
514
 
        # It should work even after being repacked into another VF
515
 
        vf2 = self.make_test_vf(False, dir='target')
516
 
        vf2.insert_record_stream(vf.get_record_stream(
517
 
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
518
 
        vf2.writer.end()
519
 
 
520
 
        keys = [record.key for record in vf2.get_record_stream(
521
 
                    [('a',), ('b',), ('c',), ('d',)],
522
 
                    'as-requested', False)]
523
 
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
524
 
        keys = [record.key for record in vf2.get_record_stream(
525
 
                    [('b',), ('a',), ('d',), ('c',)],
526
 
                    'as-requested', False)]
527
 
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
528
 
 
529
 
    def test_insert_record_stream_reuses_blocks(self):
530
 
        vf = self.make_test_vf(True, dir='source')
531
 
        def grouped_stream(revision_ids, first_parents=()):
532
 
            parents = first_parents
533
 
            for revision_id in revision_ids:
534
 
                key = (revision_id,)
535
 
                record = versionedfile.FulltextContentFactory(
536
 
                    key, parents, None,
537
 
                    'some content that is\n'
538
 
                    'identical except for\n'
539
 
                    'revision_id:%s\n' % (revision_id,))
540
 
                yield record
541
 
                parents = (key,)
542
 
        # One group, a-d
543
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
544
 
        # Second group, e-h
545
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
546
 
                                               first_parents=(('d',),)))
547
 
        block_bytes = {}
548
 
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
549
 
                                      'unordered', False)
550
 
        num_records = 0
551
 
        for record in stream:
552
 
            if record.key in [('a',), ('e',)]:
553
 
                self.assertEqual('groupcompress-block', record.storage_kind)
554
 
            else:
555
 
                self.assertEqual('groupcompress-block-ref',
556
 
                                 record.storage_kind)
557
 
            block_bytes[record.key] = record._manager._block._z_content
558
 
            num_records += 1
559
 
        self.assertEqual(8, num_records)
560
 
        for r in 'abcd':
561
 
            key = (r,)
562
 
            self.assertIs(block_bytes[key], block_bytes[('a',)])
563
 
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
564
 
        for r in 'efgh':
565
 
            key = (r,)
566
 
            self.assertIs(block_bytes[key], block_bytes[('e',)])
567
 
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
568
 
        # Now copy the blocks into another vf, and ensure that the blocks are
569
 
        # preserved without creating new entries
570
 
        vf2 = self.make_test_vf(True, dir='target')
571
 
        # ordering in 'groupcompress' order, should actually swap the groups in
572
 
        # the target vf, but the groups themselves should not be disturbed.
573
 
        def small_size_stream():
574
 
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
575
 
                                               'groupcompress', False):
576
 
                record._manager._full_enough_block_size = \
577
 
                    record._manager._block._content_length
578
 
                yield record
579
 
                        
580
 
        vf2.insert_record_stream(small_size_stream())
581
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
582
 
                                       'groupcompress', False)
583
 
        vf2.writer.end()
584
 
        num_records = 0
585
 
        for record in stream:
586
 
            num_records += 1
587
 
            self.assertEqual(block_bytes[record.key],
588
 
                             record._manager._block._z_content)
589
 
        self.assertEqual(8, num_records)
590
 
 
591
 
    def test_insert_record_stream_packs_on_the_fly(self):
592
 
        vf = self.make_test_vf(True, dir='source')
593
 
        def grouped_stream(revision_ids, first_parents=()):
594
 
            parents = first_parents
595
 
            for revision_id in revision_ids:
596
 
                key = (revision_id,)
597
 
                record = versionedfile.FulltextContentFactory(
598
 
                    key, parents, None,
599
 
                    'some content that is\n'
600
 
                    'identical except for\n'
601
 
                    'revision_id:%s\n' % (revision_id,))
602
 
                yield record
603
 
                parents = (key,)
604
 
        # One group, a-d
605
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
606
 
        # Second group, e-h
607
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
608
 
                                               first_parents=(('d',),)))
609
 
        # Now copy the blocks into another vf, and see that the
610
 
        # insert_record_stream rebuilt a new block on-the-fly because of
611
 
        # under-utilization
612
 
        vf2 = self.make_test_vf(True, dir='target')
613
 
        vf2.insert_record_stream(vf.get_record_stream(
614
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
615
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
616
 
                                       'groupcompress', False)
617
 
        vf2.writer.end()
618
 
        num_records = 0
619
 
        # All of the records should be recombined into a single block
620
 
        block = None
621
 
        for record in stream:
622
 
            num_records += 1
623
 
            if block is None:
624
 
                block = record._manager._block
625
 
            else:
626
 
                self.assertIs(block, record._manager._block)
627
 
        self.assertEqual(8, num_records)
628
 
 
629
 
    def test__insert_record_stream_no_reuse_block(self):
630
 
        vf = self.make_test_vf(True, dir='source')
631
 
        def grouped_stream(revision_ids, first_parents=()):
632
 
            parents = first_parents
633
 
            for revision_id in revision_ids:
634
 
                key = (revision_id,)
635
 
                record = versionedfile.FulltextContentFactory(
636
 
                    key, parents, None,
637
 
                    'some content that is\n'
638
 
                    'identical except for\n'
639
 
                    'revision_id:%s\n' % (revision_id,))
640
 
                yield record
641
 
                parents = (key,)
642
 
        # One group, a-d
643
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
644
 
        # Second group, e-h
645
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
646
 
                                               first_parents=(('d',),)))
647
 
        vf.writer.end()
648
 
        self.assertEqual(8, len(list(vf.get_record_stream(
649
 
                                        [(r,) for r in 'abcdefgh'],
650
 
                                        'unordered', False))))
651
 
        # Now copy the blocks into another vf, and ensure that the blocks are
652
 
        # preserved without creating new entries
653
 
        vf2 = self.make_test_vf(True, dir='target')
654
 
        # ordering in 'groupcompress' order, should actually swap the groups in
655
 
        # the target vf, but the groups themselves should not be disturbed.
656
 
        list(vf2._insert_record_stream(vf.get_record_stream(
657
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
658
 
            reuse_blocks=False))
659
 
        vf2.writer.end()
660
 
        # After inserting with reuse_blocks=False, we should have everything in
661
 
        # a single new block.
662
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
663
 
                                       'groupcompress', False)
664
 
        block = None
665
 
        for record in stream:
666
 
            if block is None:
667
 
                block = record._manager._block
668
 
            else:
669
 
                self.assertIs(block, record._manager._block)
670
 
 
671
 
    def test_add_missing_noncompression_parent_unvalidated_index(self):
672
 
        unvalidated = self.make_g_index_missing_parent()
673
 
        combined = _mod_index.CombinedGraphIndex([unvalidated])
674
 
        index = groupcompress._GCGraphIndex(combined,
675
 
            is_locked=lambda: True, parents=True,
676
 
            track_external_parent_refs=True)
677
 
        index.scan_unvalidated_index(unvalidated)
678
 
        self.assertEqual(
679
 
            frozenset([('missing-parent',)]), index.get_missing_parents())
680
 
 
681
 
    def test_track_external_parent_refs(self):
682
 
        g_index = self.make_g_index('empty', 1, [])
683
 
        mod_index = btree_index.BTreeBuilder(1, 1)
684
 
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
685
 
        index = groupcompress._GCGraphIndex(combined,
686
 
            is_locked=lambda: True, parents=True,
687
 
            add_callback=mod_index.add_nodes,
688
 
            track_external_parent_refs=True)
689
 
        index.add_records([
690
 
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
691
 
        self.assertEqual(
692
 
            frozenset([('parent-1',), ('parent-2',)]),
693
 
            index.get_missing_parents())
694
 
 
695
 
    def make_source_with_b(self, a_parent, path):
696
 
        source = self.make_test_vf(True, dir=path)
697
 
        source.add_lines(('a',), (), ['lines\n'])
698
 
        if a_parent:
699
 
            b_parents = (('a',),)
700
 
        else:
701
 
            b_parents = ()
702
 
        source.add_lines(('b',), b_parents, ['lines\n'])
703
 
        return source
704
 
 
705
 
    def do_inconsistent_inserts(self, inconsistency_fatal):
706
 
        target = self.make_test_vf(True, dir='target',
707
 
                                   inconsistency_fatal=inconsistency_fatal)
708
 
        for x in range(2):
709
 
            source = self.make_source_with_b(x==1, 'source%s' % x)
710
 
            target.insert_record_stream(source.get_record_stream(
711
 
                [('b',)], 'unordered', False))
712
 
 
713
 
    def test_inconsistent_redundant_inserts_warn(self):
714
 
        """Should not insert a record that is already present."""
715
 
        warnings = []
716
 
        def warning(template, args):
717
 
            warnings.append(template % args)
718
 
        _trace_warning = trace.warning
719
 
        trace.warning = warning
720
 
        try:
721
 
            self.do_inconsistent_inserts(inconsistency_fatal=False)
722
 
        finally:
723
 
            trace.warning = _trace_warning
724
 
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
725
 
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
726
 
                         warnings)
727
 
 
728
 
    def test_inconsistent_redundant_inserts_raises(self):
729
 
        e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
730
 
                              inconsistency_fatal=True)
731
 
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
732
 
                              " in add_records:"
733
 
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
734
 
                              " 0 8', \(\(\('a',\),\),\)\)")
735
 
 
736
 
    def test_clear_cache(self):
737
 
        vf = self.make_source_with_b(True, 'source')
738
 
        vf.writer.end()
739
 
        for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
740
 
                                           True):
741
 
            pass
742
 
        self.assertTrue(len(vf._group_cache) > 0)
743
 
        vf.clear_cache()
744
 
        self.assertEqual(0, len(vf._group_cache))
745
 
 
746
 
 
747
 
 
748
 
class StubGCVF(object):
749
 
    def __init__(self, canned_get_blocks=None):
750
 
        self._group_cache = {}
751
 
        self._canned_get_blocks = canned_get_blocks or []
752
 
    def _get_blocks(self, read_memos):
753
 
        return iter(self._canned_get_blocks)
754
 
    
755
 
 
756
 
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
757
 
    """Simple whitebox unit tests for _BatchingBlockFetcher."""
758
 
    
759
 
    def test_add_key_new_read_memo(self):
760
 
        """Adding a key with an uncached read_memo new to this batch adds that
761
 
        read_memo to the list of memos to fetch.
762
 
        """
763
 
        # locations are: index_memo, ignored, parents, ignored
764
 
        # where index_memo is: (idx, offset, len, factory_start, factory_end)
765
 
        # and (idx, offset, size) is known as the 'read_memo', identifying the
766
 
        # raw bytes needed.
767
 
        read_memo = ('fake index', 100, 50)
768
 
        locations = {
769
 
            ('key',): (read_memo + (None, None), None, None, None)}
770
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
771
 
        total_size = batcher.add_key(('key',))
772
 
        self.assertEqual(50, total_size)
773
 
        self.assertEqual([('key',)], batcher.keys)
774
 
        self.assertEqual([read_memo], batcher.memos_to_get)
775
 
 
776
 
    def test_add_key_duplicate_read_memo(self):
777
 
        """read_memos that occur multiple times in a batch will only be fetched
778
 
        once.
779
 
        """
780
 
        read_memo = ('fake index', 100, 50)
781
 
        # Two keys, both sharing the same read memo (but different overall
782
 
        # index_memos).
783
 
        locations = {
784
 
            ('key1',): (read_memo + (0, 1), None, None, None),
785
 
            ('key2',): (read_memo + (1, 2), None, None, None)}
786
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
787
 
        total_size = batcher.add_key(('key1',))
788
 
        total_size = batcher.add_key(('key2',))
789
 
        self.assertEqual(50, total_size)
790
 
        self.assertEqual([('key1',), ('key2',)], batcher.keys)
791
 
        self.assertEqual([read_memo], batcher.memos_to_get)
792
 
 
793
 
    def test_add_key_cached_read_memo(self):
794
 
        """Adding a key with a cached read_memo will not cause that read_memo
795
 
        to be added to the list to fetch.
796
 
        """
797
 
        read_memo = ('fake index', 100, 50)
798
 
        gcvf = StubGCVF()
799
 
        gcvf._group_cache[read_memo] = 'fake block'
800
 
        locations = {
801
 
            ('key',): (read_memo + (None, None), None, None, None)}
802
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
803
 
        total_size = batcher.add_key(('key',))
804
 
        self.assertEqual(0, total_size)
805
 
        self.assertEqual([('key',)], batcher.keys)
806
 
        self.assertEqual([], batcher.memos_to_get)
807
 
 
808
 
    def test_yield_factories_empty(self):
809
 
        """An empty batch yields no factories."""
810
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
811
 
        self.assertEqual([], list(batcher.yield_factories()))
812
 
 
813
 
    def test_yield_factories_calls_get_blocks(self):
814
 
        """Uncached memos are retrieved via get_blocks."""
815
 
        read_memo1 = ('fake index', 100, 50)
816
 
        read_memo2 = ('fake index', 150, 40)
817
 
        gcvf = StubGCVF(
818
 
            canned_get_blocks=[
819
 
                (read_memo1, groupcompress.GroupCompressBlock()),
820
 
                (read_memo2, groupcompress.GroupCompressBlock())])
821
 
        locations = {
822
 
            ('key1',): (read_memo1 + (None, None), None, None, None),
823
 
            ('key2',): (read_memo2 + (None, None), None, None, None)}
824
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
825
 
        batcher.add_key(('key1',))
826
 
        batcher.add_key(('key2',))
827
 
        factories = list(batcher.yield_factories(full_flush=True))
828
 
        self.assertLength(2, factories)
829
 
        keys = [f.key for f in factories]
830
 
        kinds = [f.storage_kind for f in factories]
831
 
        self.assertEqual([('key1',), ('key2',)], keys)
832
 
        self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
833
 
 
834
 
    def test_yield_factories_flushing(self):
835
 
        """yield_factories holds back on yielding results from the final block
836
 
        unless passed full_flush=True.
837
 
        """
838
 
        fake_block = groupcompress.GroupCompressBlock()
839
 
        read_memo = ('fake index', 100, 50)
840
 
        gcvf = StubGCVF()
841
 
        gcvf._group_cache[read_memo] = fake_block
842
 
        locations = {
843
 
            ('key',): (read_memo + (None, None), None, None, None)}
844
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
845
 
        batcher.add_key(('key',))
846
 
        self.assertEqual([], list(batcher.yield_factories()))
847
 
        factories = list(batcher.yield_factories(full_flush=True))
848
 
        self.assertLength(1, factories)
849
 
        self.assertEqual(('key',), factories[0].key)
850
 
        self.assertEqual('groupcompress-block', factories[0].storage_kind)
851
 
 
852
 
 
853
 
class TestLazyGroupCompress(tests.TestCaseWithTransport):
854
 
 
855
 
    _texts = {
856
 
        ('key1',): "this is a text\n"
857
 
                   "with a reasonable amount of compressible bytes\n"
858
 
                   "which can be shared between various other texts\n",
859
 
        ('key2',): "another text\n"
860
 
                   "with a reasonable amount of compressible bytes\n"
861
 
                   "which can be shared between various other texts\n",
862
 
        ('key3',): "yet another text which won't be extracted\n"
863
 
                   "with a reasonable amount of compressible bytes\n"
864
 
                   "which can be shared between various other texts\n",
865
 
        ('key4',): "this will be extracted\n"
866
 
                   "but references most of its bytes from\n"
867
 
                   "yet another text which won't be extracted\n"
868
 
                   "with a reasonable amount of compressible bytes\n"
869
 
                   "which can be shared between various other texts\n",
870
 
    }
871
 
    def make_block(self, key_to_text):
872
 
        """Create a GroupCompressBlock, filling it with the given texts."""
873
 
        compressor = groupcompress.GroupCompressor()
874
 
        start = 0
875
 
        for key in sorted(key_to_text):
876
 
            compressor.compress(key, key_to_text[key], None)
877
 
        locs = dict((key, (start, end)) for key, (start, _, end, _)
878
 
                    in compressor.labels_deltas.iteritems())
879
 
        block = compressor.flush()
880
 
        raw_bytes = block.to_bytes()
881
 
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
882
 
 
883
 
    def add_key_to_manager(self, key, locations, block, manager):
884
 
        start, end = locations[key]
885
 
        manager.add_factory(key, (), start, end)
886
 
 
887
 
    def make_block_and_full_manager(self, texts):
888
 
        locations, block = self.make_block(texts)
889
 
        manager = groupcompress._LazyGroupContentManager(block)
890
 
        for key in sorted(texts):
891
 
            self.add_key_to_manager(key, locations, block, manager)
892
 
        return block, manager
893
 
 
894
 
    def test_get_fulltexts(self):
895
 
        locations, block = self.make_block(self._texts)
896
 
        manager = groupcompress._LazyGroupContentManager(block)
897
 
        self.add_key_to_manager(('key1',), locations, block, manager)
898
 
        self.add_key_to_manager(('key2',), locations, block, manager)
899
 
        result_order = []
900
 
        for record in manager.get_record_stream():
901
 
            result_order.append(record.key)
902
 
            text = self._texts[record.key]
903
 
            self.assertEqual(text, record.get_bytes_as('fulltext'))
904
 
        self.assertEqual([('key1',), ('key2',)], result_order)
905
 
 
906
 
        # If we build the manager in the opposite order, we should get them
907
 
        # back in the opposite order
908
 
        manager = groupcompress._LazyGroupContentManager(block)
909
 
        self.add_key_to_manager(('key2',), locations, block, manager)
910
 
        self.add_key_to_manager(('key1',), locations, block, manager)
911
 
        result_order = []
912
 
        for record in manager.get_record_stream():
913
 
            result_order.append(record.key)
914
 
            text = self._texts[record.key]
915
 
            self.assertEqual(text, record.get_bytes_as('fulltext'))
916
 
        self.assertEqual([('key2',), ('key1',)], result_order)
917
 
 
918
 
    def test__wire_bytes_no_keys(self):
919
 
        locations, block = self.make_block(self._texts)
920
 
        manager = groupcompress._LazyGroupContentManager(block)
921
 
        wire_bytes = manager._wire_bytes()
922
 
        block_length = len(block.to_bytes())
923
 
        # We should have triggered a strip, since we aren't using any content
924
 
        stripped_block = manager._block.to_bytes()
925
 
        self.assertTrue(block_length > len(stripped_block))
926
 
        empty_z_header = zlib.compress('')
927
 
        self.assertEqual('groupcompress-block\n'
928
 
                         '8\n' # len(compress(''))
929
 
                         '0\n' # len('')
930
 
                         '%d\n'# compressed block len
931
 
                         '%s'  # zheader
932
 
                         '%s'  # block
933
 
                         % (len(stripped_block), empty_z_header,
934
 
                            stripped_block),
935
 
                         wire_bytes)
936
 
 
937
 
    def test__wire_bytes(self):
938
 
        locations, block = self.make_block(self._texts)
939
 
        manager = groupcompress._LazyGroupContentManager(block)
940
 
        self.add_key_to_manager(('key1',), locations, block, manager)
941
 
        self.add_key_to_manager(('key4',), locations, block, manager)
942
 
        block_bytes = block.to_bytes()
943
 
        wire_bytes = manager._wire_bytes()
944
 
        (storage_kind, z_header_len, header_len,
945
 
         block_len, rest) = wire_bytes.split('\n', 4)
946
 
        z_header_len = int(z_header_len)
947
 
        header_len = int(header_len)
948
 
        block_len = int(block_len)
949
 
        self.assertEqual('groupcompress-block', storage_kind)
950
 
        self.assertEqual(34, z_header_len)
951
 
        self.assertEqual(26, header_len)
952
 
        self.assertEqual(len(block_bytes), block_len)
953
 
        z_header = rest[:z_header_len]
954
 
        header = zlib.decompress(z_header)
955
 
        self.assertEqual(header_len, len(header))
956
 
        entry1 = locations[('key1',)]
957
 
        entry4 = locations[('key4',)]
958
 
        self.assertEqualDiff('key1\n'
959
 
                             '\n'  # no parents
960
 
                             '%d\n' # start offset
961
 
                             '%d\n' # end offset
962
 
                             'key4\n'
963
 
                             '\n'
964
 
                             '%d\n'
965
 
                             '%d\n'
966
 
                             % (entry1[0], entry1[1],
967
 
                                entry4[0], entry4[1]),
968
 
                            header)
969
 
        z_block = rest[z_header_len:]
970
 
        self.assertEqual(block_bytes, z_block)
971
 
 
972
 
    def test_from_bytes(self):
973
 
        locations, block = self.make_block(self._texts)
974
 
        manager = groupcompress._LazyGroupContentManager(block)
975
 
        self.add_key_to_manager(('key1',), locations, block, manager)
976
 
        self.add_key_to_manager(('key4',), locations, block, manager)
977
 
        wire_bytes = manager._wire_bytes()
978
 
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
979
 
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
980
 
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
981
 
        self.assertEqual(2, len(manager._factories))
982
 
        self.assertEqual(block._z_content, manager._block._z_content)
983
 
        result_order = []
984
 
        for record in manager.get_record_stream():
985
 
            result_order.append(record.key)
986
 
            text = self._texts[record.key]
987
 
            self.assertEqual(text, record.get_bytes_as('fulltext'))
988
 
        self.assertEqual([('key1',), ('key4',)], result_order)
989
 
 
990
 
    def test__check_rebuild_no_changes(self):
991
 
        block, manager = self.make_block_and_full_manager(self._texts)
992
 
        manager._check_rebuild_block()
993
 
        self.assertIs(block, manager._block)
994
 
 
995
 
    def test__check_rebuild_only_one(self):
996
 
        locations, block = self.make_block(self._texts)
997
 
        manager = groupcompress._LazyGroupContentManager(block)
998
 
        # Request just the first key, which should trigger a 'strip' action
999
 
        self.add_key_to_manager(('key1',), locations, block, manager)
1000
 
        manager._check_rebuild_block()
1001
 
        self.assertIsNot(block, manager._block)
1002
 
        self.assertTrue(block._content_length > manager._block._content_length)
1003
 
        # We should be able to still get the content out of this block, though
1004
 
        # it should only have 1 entry
1005
 
        for record in manager.get_record_stream():
1006
 
            self.assertEqual(('key1',), record.key)
1007
 
            self.assertEqual(self._texts[record.key],
1008
 
                             record.get_bytes_as('fulltext'))
1009
 
 
1010
 
    def test__check_rebuild_middle(self):
1011
 
        locations, block = self.make_block(self._texts)
1012
 
        manager = groupcompress._LazyGroupContentManager(block)
1013
 
        # Request a small key in the middle should trigger a 'rebuild'
1014
 
        self.add_key_to_manager(('key4',), locations, block, manager)
1015
 
        manager._check_rebuild_block()
1016
 
        self.assertIsNot(block, manager._block)
1017
 
        self.assertTrue(block._content_length > manager._block._content_length)
1018
 
        for record in manager.get_record_stream():
1019
 
            self.assertEqual(('key4',), record.key)
1020
 
            self.assertEqual(self._texts[record.key],
1021
 
                             record.get_bytes_as('fulltext'))
1022
 
 
1023
 
    def test_check_is_well_utilized_all_keys(self):
1024
 
        block, manager = self.make_block_and_full_manager(self._texts)
1025
 
        self.assertFalse(manager.check_is_well_utilized())
1026
 
        # Though we can fake it by changing the recommended minimum size
1027
 
        manager._full_enough_block_size = block._content_length
1028
 
        self.assertTrue(manager.check_is_well_utilized())
1029
 
        # Setting it just above causes it to fail
1030
 
        manager._full_enough_block_size = block._content_length + 1
1031
 
        self.assertFalse(manager.check_is_well_utilized())
1032
 
        # Setting the mixed-block size doesn't do anything, because the content
1033
 
        # is considered to not be 'mixed'
1034
 
        manager._full_enough_mixed_block_size = block._content_length
1035
 
        self.assertFalse(manager.check_is_well_utilized())
1036
 
 
1037
 
    def test_check_is_well_utilized_mixed_keys(self):
1038
 
        texts = {}
1039
 
        f1k1 = ('f1', 'k1')
1040
 
        f1k2 = ('f1', 'k2')
1041
 
        f2k1 = ('f2', 'k1')
1042
 
        f2k2 = ('f2', 'k2')
1043
 
        texts[f1k1] = self._texts[('key1',)]
1044
 
        texts[f1k2] = self._texts[('key2',)]
1045
 
        texts[f2k1] = self._texts[('key3',)]
1046
 
        texts[f2k2] = self._texts[('key4',)]
1047
 
        block, manager = self.make_block_and_full_manager(texts)
1048
 
        self.assertFalse(manager.check_is_well_utilized())
1049
 
        manager._full_enough_block_size = block._content_length
1050
 
        self.assertTrue(manager.check_is_well_utilized())
1051
 
        manager._full_enough_block_size = block._content_length + 1
1052
 
        self.assertFalse(manager.check_is_well_utilized())
1053
 
        manager._full_enough_mixed_block_size = block._content_length
1054
 
        self.assertTrue(manager.check_is_well_utilized())
1055
 
 
1056
 
    def test_check_is_well_utilized_partial_use(self):
1057
 
        locations, block = self.make_block(self._texts)
1058
 
        manager = groupcompress._LazyGroupContentManager(block)
1059
 
        manager._full_enough_block_size = block._content_length
1060
 
        self.add_key_to_manager(('key1',), locations, block, manager)
1061
 
        self.add_key_to_manager(('key2',), locations, block, manager)
1062
 
        # Just using the content from key1 and 2 is not enough to be considered
1063
 
        # 'complete'
1064
 
        self.assertFalse(manager.check_is_well_utilized())
1065
 
        # However if we add key3, then we have enough, as we only require 75%
1066
 
        # consumption
1067
 
        self.add_key_to_manager(('key4',), locations, block, manager)
1068
 
        self.assertTrue(manager.check_is_well_utilized())