~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

  • Committer: Robert Collins
  • Date: 2009-09-07 03:08:30 UTC
  • mto: This revision was merged to the branch mainline in revision 4690.
  • Revision ID: robertc@robertcollins.net-20090907030830-rf59kt28d550eauj
Milestones language tightning, internal consistency.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# Copyright (C) 2008, 2009 Canonical Ltd
 
2
#
 
3
# This program is free software; you can redistribute it and/or modify
 
4
# it under the terms of the GNU General Public License as published by
 
5
# the Free Software Foundation; either version 2 of the License, or
 
6
# (at your option) any later version.
 
7
#
 
8
# This program is distributed in the hope that it will be useful,
 
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
11
# GNU General Public License for more details.
 
12
#
 
13
# You should have received a copy of the GNU General Public License
 
14
# along with this program; if not, write to the Free Software
 
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 
16
 
 
17
"""Tests for group compression."""
 
18
 
 
19
import zlib
 
20
 
 
21
from bzrlib import (
 
22
    btree_index,
 
23
    groupcompress,
 
24
    errors,
 
25
    index as _mod_index,
 
26
    osutils,
 
27
    tests,
 
28
    trace,
 
29
    versionedfile,
 
30
    )
 
31
from bzrlib.osutils import sha_string
 
32
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
 
33
 
 
34
 
 
35
def load_tests(standard_tests, module, loader):
 
36
    """Parameterize tests for all versions of groupcompress."""
 
37
    to_adapt, result = tests.split_suite_by_condition(
 
38
        standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
 
39
    scenarios = [
 
40
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
 
41
        ]
 
42
    if CompiledGroupCompressFeature.available():
 
43
        scenarios.append(('C',
 
44
            {'compressor': groupcompress.PyrexGroupCompressor}))
 
45
    return tests.multiply_tests(to_adapt, scenarios, result)
 
46
 
 
47
 
 
48
class TestGroupCompressor(tests.TestCase):
 
49
 
 
50
    def _chunks_to_repr_lines(self, chunks):
 
51
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
 
52
 
 
53
    def assertEqualDiffEncoded(self, expected, actual):
 
54
        """Compare the actual content to the expected content.
 
55
 
 
56
        :param expected: A group of chunks that we expect to see
 
57
        :param actual: The measured 'chunks'
 
58
 
 
59
        We will transform the chunks back into lines, and then run 'repr()'
 
60
        over them to handle non-ascii characters.
 
61
        """
 
62
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
 
63
                             self._chunks_to_repr_lines(actual))
 
64
 
 
65
 
 
66
class TestAllGroupCompressors(TestGroupCompressor):
 
67
    """Tests for GroupCompressor"""
 
68
 
 
69
    compressor = None # Set by multiply_tests
 
70
 
 
71
    def test_empty_delta(self):
 
72
        compressor = self.compressor()
 
73
        self.assertEqual([], compressor.chunks)
 
74
 
 
75
    def test_one_nosha_delta(self):
 
76
        # diff against NUKK
 
77
        compressor = self.compressor()
 
78
        sha1, start_point, end_point, _ = compressor.compress(('label',),
 
79
            'strange\ncommon\n', None)
 
80
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
 
81
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
 
82
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
 
83
        self.assertEqual(0, start_point)
 
84
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
85
 
 
86
    def test_empty_content(self):
 
87
        compressor = self.compressor()
 
88
        # Adding empty bytes should return the 'null' record
 
89
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
 
90
                                                                 '', None)
 
91
        self.assertEqual(0, start_point)
 
92
        self.assertEqual(0, end_point)
 
93
        self.assertEqual('fulltext', kind)
 
94
        self.assertEqual(groupcompress._null_sha1, sha1)
 
95
        self.assertEqual(0, compressor.endpoint)
 
96
        self.assertEqual([], compressor.chunks)
 
97
        # Even after adding some content
 
98
        compressor.compress(('content',), 'some\nbytes\n', None)
 
99
        self.assertTrue(compressor.endpoint > 0)
 
100
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
 
101
                                                                 '', None)
 
102
        self.assertEqual(0, start_point)
 
103
        self.assertEqual(0, end_point)
 
104
        self.assertEqual('fulltext', kind)
 
105
        self.assertEqual(groupcompress._null_sha1, sha1)
 
106
 
 
107
    def test_extract_from_compressor(self):
 
108
        # Knit fetching will try to reconstruct texts locally which results in
 
109
        # reading something that is in the compressor stream already.
 
110
        compressor = self.compressor()
 
111
        sha1_1, _, _, _ = compressor.compress(('label',),
 
112
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
113
        expected_lines = list(compressor.chunks)
 
114
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
 
115
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
116
        # get the first out
 
117
        self.assertEqual(('strange\ncommon long line\n'
 
118
                          'that needs a 16 byte match\n', sha1_1),
 
119
                         compressor.extract(('label',)))
 
120
        # and the second
 
121
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
 
122
                          'different\n', sha1_2),
 
123
                         compressor.extract(('newlabel',)))
 
124
 
 
125
    def test_pop_last(self):
 
126
        compressor = self.compressor()
 
127
        _, _, _, _ = compressor.compress(('key1',),
 
128
            'some text\nfor the first entry\n', None)
 
129
        expected_lines = list(compressor.chunks)
 
130
        _, _, _, _ = compressor.compress(('key2',),
 
131
            'some text\nfor the second entry\n', None)
 
132
        compressor.pop_last()
 
133
        self.assertEqual(expected_lines, compressor.chunks)
 
134
 
 
135
 
 
136
class TestPyrexGroupCompressor(TestGroupCompressor):
 
137
 
 
138
    _test_needs_features = [CompiledGroupCompressFeature]
 
139
    compressor = groupcompress.PyrexGroupCompressor
 
140
 
 
141
    def test_stats(self):
 
142
        compressor = self.compressor()
 
143
        compressor.compress(('label',),
 
144
                            'strange\n'
 
145
                            'common very very long line\n'
 
146
                            'plus more text\n', None)
 
147
        compressor.compress(('newlabel',),
 
148
                            'common very very long line\n'
 
149
                            'plus more text\n'
 
150
                            'different\n'
 
151
                            'moredifferent\n', None)
 
152
        compressor.compress(('label3',),
 
153
                            'new\n'
 
154
                            'common very very long line\n'
 
155
                            'plus more text\n'
 
156
                            'different\n'
 
157
                            'moredifferent\n', None)
 
158
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
 
159
 
 
160
    def test_two_nosha_delta(self):
 
161
        compressor = self.compressor()
 
162
        sha1_1, _, _, _ = compressor.compress(('label',),
 
163
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
164
        expected_lines = list(compressor.chunks)
 
165
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
 
166
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
167
        self.assertEqual(sha_string('common long line\n'
 
168
                                    'that needs a 16 byte match\n'
 
169
                                    'different\n'), sha1_2)
 
170
        expected_lines.extend([
 
171
            # 'delta', delta length
 
172
            'd\x0f',
 
173
            # source and target length
 
174
            '\x36',
 
175
            # copy the line common
 
176
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
177
            # add the line different, and the trailing newline
 
178
            '\x0adifferent\n', # insert 10 bytes
 
179
            ])
 
180
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
181
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
182
 
 
183
    def test_three_nosha_delta(self):
 
184
        # The first interesting test: make a change that should use lines from
 
185
        # both parents.
 
186
        compressor = self.compressor()
 
187
        sha1_1, _, _, _ = compressor.compress(('label',),
 
188
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
189
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
 
190
            'different\nmoredifferent\nand then some more\n', None)
 
191
        expected_lines = list(compressor.chunks)
 
192
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
 
193
            'new\ncommon very very long line\nwith some extra text\n'
 
194
            'different\nmoredifferent\nand then some more\n',
 
195
            None)
 
196
        self.assertEqual(
 
197
            sha_string('new\ncommon very very long line\nwith some extra text\n'
 
198
                       'different\nmoredifferent\nand then some more\n'),
 
199
            sha1_3)
 
200
        expected_lines.extend([
 
201
            # 'delta', delta length
 
202
            'd\x0b',
 
203
            # source and target length
 
204
            '\x5f'
 
205
            # insert new
 
206
            '\x03new',
 
207
            # Copy of first parent 'common' range
 
208
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
 
209
            # Copy of second parent 'different' range
 
210
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
211
            ])
 
212
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
213
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
214
 
 
215
 
 
216
class TestPythonGroupCompressor(TestGroupCompressor):
 
217
 
 
218
    compressor = groupcompress.PythonGroupCompressor
 
219
 
 
220
    def test_stats(self):
 
221
        compressor = self.compressor()
 
222
        compressor.compress(('label',),
 
223
                            'strange\n'
 
224
                            'common very very long line\n'
 
225
                            'plus more text\n', None)
 
226
        compressor.compress(('newlabel',),
 
227
                            'common very very long line\n'
 
228
                            'plus more text\n'
 
229
                            'different\n'
 
230
                            'moredifferent\n', None)
 
231
        compressor.compress(('label3',),
 
232
                            'new\n'
 
233
                            'common very very long line\n'
 
234
                            'plus more text\n'
 
235
                            'different\n'
 
236
                            'moredifferent\n', None)
 
237
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
 
238
 
 
239
    def test_two_nosha_delta(self):
 
240
        compressor = self.compressor()
 
241
        sha1_1, _, _, _ = compressor.compress(('label',),
 
242
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
243
        expected_lines = list(compressor.chunks)
 
244
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
 
245
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
246
        self.assertEqual(sha_string('common long line\n'
 
247
                                    'that needs a 16 byte match\n'
 
248
                                    'different\n'), sha1_2)
 
249
        expected_lines.extend([
 
250
            # 'delta', delta length
 
251
            'd\x0f',
 
252
            # target length
 
253
            '\x36',
 
254
            # copy the line common
 
255
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
256
            # add the line different, and the trailing newline
 
257
            '\x0adifferent\n', # insert 10 bytes
 
258
            ])
 
259
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
260
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
261
 
 
262
    def test_three_nosha_delta(self):
 
263
        # The first interesting test: make a change that should use lines from
 
264
        # both parents.
 
265
        compressor = self.compressor()
 
266
        sha1_1, _, _, _ = compressor.compress(('label',),
 
267
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
268
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
 
269
            'different\nmoredifferent\nand then some more\n', None)
 
270
        expected_lines = list(compressor.chunks)
 
271
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
 
272
            'new\ncommon very very long line\nwith some extra text\n'
 
273
            'different\nmoredifferent\nand then some more\n',
 
274
            None)
 
275
        self.assertEqual(
 
276
            sha_string('new\ncommon very very long line\nwith some extra text\n'
 
277
                       'different\nmoredifferent\nand then some more\n'),
 
278
            sha1_3)
 
279
        expected_lines.extend([
 
280
            # 'delta', delta length
 
281
            'd\x0c',
 
282
            # target length
 
283
            '\x5f'
 
284
            # insert new
 
285
            '\x04new\n',
 
286
            # Copy of first parent 'common' range
 
287
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
 
288
            # Copy of second parent 'different' range
 
289
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
290
            ])
 
291
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
292
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
293
 
 
294
 
 
295
class TestGroupCompressBlock(tests.TestCase):
 
296
 
 
297
    def make_block(self, key_to_text):
 
298
        """Create a GroupCompressBlock, filling it with the given texts."""
 
299
        compressor = groupcompress.GroupCompressor()
 
300
        start = 0
 
301
        for key in sorted(key_to_text):
 
302
            compressor.compress(key, key_to_text[key], None)
 
303
        locs = dict((key, (start, end)) for key, (start, _, end, _)
 
304
                    in compressor.labels_deltas.iteritems())
 
305
        block = compressor.flush()
 
306
        raw_bytes = block.to_bytes()
 
307
        # Go through from_bytes(to_bytes()) so that we start with a compressed
 
308
        # content object
 
309
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
 
310
 
 
311
    def test_from_empty_bytes(self):
 
312
        self.assertRaises(ValueError,
 
313
                          groupcompress.GroupCompressBlock.from_bytes, '')
 
314
 
 
315
    def test_from_minimal_bytes(self):
 
316
        block = groupcompress.GroupCompressBlock.from_bytes(
 
317
            'gcb1z\n0\n0\n')
 
318
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
 
319
        self.assertIs(None, block._content)
 
320
        self.assertEqual('', block._z_content)
 
321
        block._ensure_content()
 
322
        self.assertEqual('', block._content)
 
323
        self.assertEqual('', block._z_content)
 
324
        block._ensure_content() # Ensure content is safe to call 2x
 
325
 
 
326
    def test_from_invalid(self):
 
327
        self.assertRaises(ValueError,
 
328
                          groupcompress.GroupCompressBlock.from_bytes,
 
329
                          'this is not a valid header')
 
330
 
 
331
    def test_from_bytes(self):
 
332
        content = ('a tiny bit of content\n')
 
333
        z_content = zlib.compress(content)
 
334
        z_bytes = (
 
335
            'gcb1z\n' # group compress block v1 plain
 
336
            '%d\n' # Length of compressed content
 
337
            '%d\n' # Length of uncompressed content
 
338
            '%s'   # Compressed content
 
339
            ) % (len(z_content), len(content), z_content)
 
340
        block = groupcompress.GroupCompressBlock.from_bytes(
 
341
            z_bytes)
 
342
        self.assertEqual(z_content, block._z_content)
 
343
        self.assertIs(None, block._content)
 
344
        self.assertEqual(len(z_content), block._z_content_length)
 
345
        self.assertEqual(len(content), block._content_length)
 
346
        block._ensure_content()
 
347
        self.assertEqual(z_content, block._z_content)
 
348
        self.assertEqual(content, block._content)
 
349
 
 
350
    def test_to_bytes(self):
 
351
        content = ('this is some content\n'
 
352
                   'this content will be compressed\n')
 
353
        gcb = groupcompress.GroupCompressBlock()
 
354
        gcb.set_content(content)
 
355
        bytes = gcb.to_bytes()
 
356
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
 
357
        self.assertEqual(gcb._content_length, len(content))
 
358
        expected_header =('gcb1z\n' # group compress block v1 zlib
 
359
                          '%d\n' # Length of compressed content
 
360
                          '%d\n' # Length of uncompressed content
 
361
                         ) % (gcb._z_content_length, gcb._content_length)
 
362
        self.assertStartsWith(bytes, expected_header)
 
363
        remaining_bytes = bytes[len(expected_header):]
 
364
        raw_bytes = zlib.decompress(remaining_bytes)
 
365
        self.assertEqual(content, raw_bytes)
 
366
 
 
367
        # we should get the same results if using the chunked version
 
368
        gcb = groupcompress.GroupCompressBlock()
 
369
        gcb.set_chunked_content(['this is some content\n'
 
370
                                 'this content will be compressed\n'],
 
371
                                 len(content))
 
372
        old_bytes = bytes
 
373
        bytes = gcb.to_bytes()
 
374
        self.assertEqual(old_bytes, bytes)
 
375
 
 
376
    def test_partial_decomp(self):
 
377
        content_chunks = []
 
378
        # We need a sufficient amount of data so that zlib.decompress has
 
379
        # partial decompression to work with. Most auto-generated data
 
380
        # compresses a bit too well, we want a combination, so we combine a sha
 
381
        # hash with compressible data.
 
382
        for i in xrange(2048):
 
383
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
384
            content_chunks.append(next_content)
 
385
            next_sha1 = osutils.sha_string(next_content)
 
386
            content_chunks.append(next_sha1 + '\n')
 
387
        content = ''.join(content_chunks)
 
388
        self.assertEqual(158634, len(content))
 
389
        z_content = zlib.compress(content)
 
390
        self.assertEqual(57182, len(z_content))
 
391
        block = groupcompress.GroupCompressBlock()
 
392
        block._z_content = z_content
 
393
        block._z_content_length = len(z_content)
 
394
        block._compressor_name = 'zlib'
 
395
        block._content_length = 158634
 
396
        self.assertIs(None, block._content)
 
397
        block._ensure_content(100)
 
398
        self.assertIsNot(None, block._content)
 
399
        # We have decompressed at least 100 bytes
 
400
        self.assertTrue(len(block._content) >= 100)
 
401
        # We have not decompressed the whole content
 
402
        self.assertTrue(len(block._content) < 158634)
 
403
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
404
        # ensuring content that we already have shouldn't cause any more data
 
405
        # to be extracted
 
406
        cur_len = len(block._content)
 
407
        block._ensure_content(cur_len - 10)
 
408
        self.assertEqual(cur_len, len(block._content))
 
409
        # Now we want a bit more content
 
410
        cur_len += 10
 
411
        block._ensure_content(cur_len)
 
412
        self.assertTrue(len(block._content) >= cur_len)
 
413
        self.assertTrue(len(block._content) < 158634)
 
414
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
415
        # And now lets finish
 
416
        block._ensure_content(158634)
 
417
        self.assertEqualDiff(content, block._content)
 
418
        # And the decompressor is finalized
 
419
        self.assertIs(None, block._z_content_decompressor)
 
420
 
 
421
    def test_partial_decomp_no_known_length(self):
 
422
        content_chunks = []
 
423
        for i in xrange(2048):
 
424
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
425
            content_chunks.append(next_content)
 
426
            next_sha1 = osutils.sha_string(next_content)
 
427
            content_chunks.append(next_sha1 + '\n')
 
428
        content = ''.join(content_chunks)
 
429
        self.assertEqual(158634, len(content))
 
430
        z_content = zlib.compress(content)
 
431
        self.assertEqual(57182, len(z_content))
 
432
        block = groupcompress.GroupCompressBlock()
 
433
        block._z_content = z_content
 
434
        block._z_content_length = len(z_content)
 
435
        block._compressor_name = 'zlib'
 
436
        block._content_length = None # Don't tell the decompressed length
 
437
        self.assertIs(None, block._content)
 
438
        block._ensure_content(100)
 
439
        self.assertIsNot(None, block._content)
 
440
        # We have decompressed at least 100 bytes
 
441
        self.assertTrue(len(block._content) >= 100)
 
442
        # We have not decompressed the whole content
 
443
        self.assertTrue(len(block._content) < 158634)
 
444
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
445
        # ensuring content that we already have shouldn't cause any more data
 
446
        # to be extracted
 
447
        cur_len = len(block._content)
 
448
        block._ensure_content(cur_len - 10)
 
449
        self.assertEqual(cur_len, len(block._content))
 
450
        # Now we want a bit more content
 
451
        cur_len += 10
 
452
        block._ensure_content(cur_len)
 
453
        self.assertTrue(len(block._content) >= cur_len)
 
454
        self.assertTrue(len(block._content) < 158634)
 
455
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
456
        # And now lets finish
 
457
        block._ensure_content()
 
458
        self.assertEqualDiff(content, block._content)
 
459
        # And the decompressor is finalized
 
460
        self.assertIs(None, block._z_content_decompressor)
 
461
 
 
462
    def test__dump(self):
 
463
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
 
464
        key_to_text = {('1',): dup_content + '1 unique\n',
 
465
                       ('2',): dup_content + '2 extra special\n'}
 
466
        locs, block = self.make_block(key_to_text)
 
467
        self.assertEqual([('f', len(key_to_text[('1',)])),
 
468
                          ('d', 21, len(key_to_text[('2',)]),
 
469
                           [('c', 2, len(dup_content)),
 
470
                            ('i', len('2 extra special\n'), '')
 
471
                           ]),
 
472
                         ], block._dump())
 
473
 
 
474
 
 
475
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
 
476
 
 
477
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
 
478
                     dir='.', inconsistency_fatal=True):
 
479
        t = self.get_transport(dir)
 
480
        t.ensure_base()
 
481
        vf = groupcompress.make_pack_factory(graph=create_graph,
 
482
            delta=False, keylength=keylength,
 
483
            inconsistency_fatal=inconsistency_fatal)(t)
 
484
        if do_cleanup:
 
485
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
 
486
        return vf
 
487
 
 
488
 
 
489
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
 
490
 
 
491
    def make_g_index(self, name, ref_lists=0, nodes=[]):
 
492
        builder = btree_index.BTreeBuilder(ref_lists)
 
493
        for node, references, value in nodes:
 
494
            builder.add_node(node, references, value)
 
495
        stream = builder.finish()
 
496
        trans = self.get_transport()
 
497
        size = trans.put_file(name, stream)
 
498
        return btree_index.BTreeGraphIndex(trans, name, size)
 
499
 
 
500
    def make_g_index_missing_parent(self):
 
501
        graph_index = self.make_g_index('missing_parent', 1,
 
502
            [(('parent', ), '2 78 2 10', ([],)),
 
503
             (('tip', ), '2 78 2 10',
 
504
              ([('parent', ), ('missing-parent', )],)),
 
505
              ])
 
506
        return graph_index
 
507
 
 
508
    def test_get_record_stream_as_requested(self):
 
509
        # Consider promoting 'as-requested' to general availability, and
 
510
        # make this a VF interface test
 
511
        vf = self.make_test_vf(False, dir='source')
 
512
        vf.add_lines(('a',), (), ['lines\n'])
 
513
        vf.add_lines(('b',), (), ['lines\n'])
 
514
        vf.add_lines(('c',), (), ['lines\n'])
 
515
        vf.add_lines(('d',), (), ['lines\n'])
 
516
        vf.writer.end()
 
517
        keys = [record.key for record in vf.get_record_stream(
 
518
                    [('a',), ('b',), ('c',), ('d',)],
 
519
                    'as-requested', False)]
 
520
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
521
        keys = [record.key for record in vf.get_record_stream(
 
522
                    [('b',), ('a',), ('d',), ('c',)],
 
523
                    'as-requested', False)]
 
524
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
525
 
 
526
        # It should work even after being repacked into another VF
 
527
        vf2 = self.make_test_vf(False, dir='target')
 
528
        vf2.insert_record_stream(vf.get_record_stream(
 
529
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
 
530
        vf2.writer.end()
 
531
 
 
532
        keys = [record.key for record in vf2.get_record_stream(
 
533
                    [('a',), ('b',), ('c',), ('d',)],
 
534
                    'as-requested', False)]
 
535
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
536
        keys = [record.key for record in vf2.get_record_stream(
 
537
                    [('b',), ('a',), ('d',), ('c',)],
 
538
                    'as-requested', False)]
 
539
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
540
 
 
541
    def test_insert_record_stream_reuses_blocks(self):
 
542
        vf = self.make_test_vf(True, dir='source')
 
543
        def grouped_stream(revision_ids, first_parents=()):
 
544
            parents = first_parents
 
545
            for revision_id in revision_ids:
 
546
                key = (revision_id,)
 
547
                record = versionedfile.FulltextContentFactory(
 
548
                    key, parents, None,
 
549
                    'some content that is\n'
 
550
                    'identical except for\n'
 
551
                    'revision_id:%s\n' % (revision_id,))
 
552
                yield record
 
553
                parents = (key,)
 
554
        # One group, a-d
 
555
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
556
        # Second group, e-h
 
557
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
558
                                               first_parents=(('d',),)))
 
559
        block_bytes = {}
 
560
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
 
561
                                      'unordered', False)
 
562
        num_records = 0
 
563
        for record in stream:
 
564
            if record.key in [('a',), ('e',)]:
 
565
                self.assertEqual('groupcompress-block', record.storage_kind)
 
566
            else:
 
567
                self.assertEqual('groupcompress-block-ref',
 
568
                                 record.storage_kind)
 
569
            block_bytes[record.key] = record._manager._block._z_content
 
570
            num_records += 1
 
571
        self.assertEqual(8, num_records)
 
572
        for r in 'abcd':
 
573
            key = (r,)
 
574
            self.assertIs(block_bytes[key], block_bytes[('a',)])
 
575
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
 
576
        for r in 'efgh':
 
577
            key = (r,)
 
578
            self.assertIs(block_bytes[key], block_bytes[('e',)])
 
579
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
 
580
        # Now copy the blocks into another vf, and ensure that the blocks are
 
581
        # preserved without creating new entries
 
582
        vf2 = self.make_test_vf(True, dir='target')
 
583
        # ordering in 'groupcompress' order, should actually swap the groups in
 
584
        # the target vf, but the groups themselves should not be disturbed.
 
585
        def small_size_stream():
 
586
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
 
587
                                               'groupcompress', False):
 
588
                record._manager._full_enough_block_size = \
 
589
                    record._manager._block._content_length
 
590
                yield record
 
591
                        
 
592
        vf2.insert_record_stream(small_size_stream())
 
593
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
594
                                       'groupcompress', False)
 
595
        vf2.writer.end()
 
596
        num_records = 0
 
597
        for record in stream:
 
598
            num_records += 1
 
599
            self.assertEqual(block_bytes[record.key],
 
600
                             record._manager._block._z_content)
 
601
        self.assertEqual(8, num_records)
 
602
 
 
603
    def test_insert_record_stream_packs_on_the_fly(self):
 
604
        vf = self.make_test_vf(True, dir='source')
 
605
        def grouped_stream(revision_ids, first_parents=()):
 
606
            parents = first_parents
 
607
            for revision_id in revision_ids:
 
608
                key = (revision_id,)
 
609
                record = versionedfile.FulltextContentFactory(
 
610
                    key, parents, None,
 
611
                    'some content that is\n'
 
612
                    'identical except for\n'
 
613
                    'revision_id:%s\n' % (revision_id,))
 
614
                yield record
 
615
                parents = (key,)
 
616
        # One group, a-d
 
617
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
618
        # Second group, e-h
 
619
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
620
                                               first_parents=(('d',),)))
 
621
        # Now copy the blocks into another vf, and see that the
 
622
        # insert_record_stream rebuilt a new block on-the-fly because of
 
623
        # under-utilization
 
624
        vf2 = self.make_test_vf(True, dir='target')
 
625
        vf2.insert_record_stream(vf.get_record_stream(
 
626
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
 
627
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
628
                                       'groupcompress', False)
 
629
        vf2.writer.end()
 
630
        num_records = 0
 
631
        # All of the records should be recombined into a single block
 
632
        block = None
 
633
        for record in stream:
 
634
            num_records += 1
 
635
            if block is None:
 
636
                block = record._manager._block
 
637
            else:
 
638
                self.assertIs(block, record._manager._block)
 
639
        self.assertEqual(8, num_records)
 
640
 
 
641
    def test__insert_record_stream_no_reuse_block(self):
 
642
        vf = self.make_test_vf(True, dir='source')
 
643
        def grouped_stream(revision_ids, first_parents=()):
 
644
            parents = first_parents
 
645
            for revision_id in revision_ids:
 
646
                key = (revision_id,)
 
647
                record = versionedfile.FulltextContentFactory(
 
648
                    key, parents, None,
 
649
                    'some content that is\n'
 
650
                    'identical except for\n'
 
651
                    'revision_id:%s\n' % (revision_id,))
 
652
                yield record
 
653
                parents = (key,)
 
654
        # One group, a-d
 
655
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
656
        # Second group, e-h
 
657
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
658
                                               first_parents=(('d',),)))
 
659
        vf.writer.end()
 
660
        self.assertEqual(8, len(list(vf.get_record_stream(
 
661
                                        [(r,) for r in 'abcdefgh'],
 
662
                                        'unordered', False))))
 
663
        # Now copy the blocks into another vf, and ensure that the blocks are
 
664
        # preserved without creating new entries
 
665
        vf2 = self.make_test_vf(True, dir='target')
 
666
        # ordering in 'groupcompress' order, should actually swap the groups in
 
667
        # the target vf, but the groups themselves should not be disturbed.
 
668
        list(vf2._insert_record_stream(vf.get_record_stream(
 
669
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
 
670
            reuse_blocks=False))
 
671
        vf2.writer.end()
 
672
        # After inserting with reuse_blocks=False, we should have everything in
 
673
        # a single new block.
 
674
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
675
                                       'groupcompress', False)
 
676
        block = None
 
677
        for record in stream:
 
678
            if block is None:
 
679
                block = record._manager._block
 
680
            else:
 
681
                self.assertIs(block, record._manager._block)
 
682
 
 
683
    def test_add_missing_noncompression_parent_unvalidated_index(self):
 
684
        unvalidated = self.make_g_index_missing_parent()
 
685
        combined = _mod_index.CombinedGraphIndex([unvalidated])
 
686
        index = groupcompress._GCGraphIndex(combined,
 
687
            is_locked=lambda: True, parents=True,
 
688
            track_external_parent_refs=True)
 
689
        index.scan_unvalidated_index(unvalidated)
 
690
        self.assertEqual(
 
691
            frozenset([('missing-parent',)]), index.get_missing_parents())
 
692
 
 
693
    def test_track_external_parent_refs(self):
 
694
        g_index = self.make_g_index('empty', 1, [])
 
695
        mod_index = btree_index.BTreeBuilder(1, 1)
 
696
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
 
697
        index = groupcompress._GCGraphIndex(combined,
 
698
            is_locked=lambda: True, parents=True,
 
699
            add_callback=mod_index.add_nodes,
 
700
            track_external_parent_refs=True)
 
701
        index.add_records([
 
702
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
 
703
        self.assertEqual(
 
704
            frozenset([('parent-1',), ('parent-2',)]),
 
705
            index.get_missing_parents())
 
706
 
 
707
    def make_source_with_b(self, a_parent, path):
 
708
        source = self.make_test_vf(True, dir=path)
 
709
        source.add_lines(('a',), (), ['lines\n'])
 
710
        if a_parent:
 
711
            b_parents = (('a',),)
 
712
        else:
 
713
            b_parents = ()
 
714
        source.add_lines(('b',), b_parents, ['lines\n'])
 
715
        return source
 
716
 
 
717
    def do_inconsistent_inserts(self, inconsistency_fatal):
 
718
        target = self.make_test_vf(True, dir='target',
 
719
                                   inconsistency_fatal=inconsistency_fatal)
 
720
        for x in range(2):
 
721
            source = self.make_source_with_b(x==1, 'source%s' % x)
 
722
            target.insert_record_stream(source.get_record_stream(
 
723
                [('b',)], 'unordered', False))
 
724
 
 
725
    def test_inconsistent_redundant_inserts_warn(self):
 
726
        """Should not insert a record that is already present."""
 
727
        warnings = []
 
728
        def warning(template, args):
 
729
            warnings.append(template % args)
 
730
        _trace_warning = trace.warning
 
731
        trace.warning = warning
 
732
        try:
 
733
            self.do_inconsistent_inserts(inconsistency_fatal=False)
 
734
        finally:
 
735
            trace.warning = _trace_warning
 
736
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
 
737
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
 
738
                         warnings)
 
739
 
 
740
    def test_inconsistent_redundant_inserts_raises(self):
 
741
        e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
 
742
                              inconsistency_fatal=True)
 
743
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
 
744
                              " in add_records:"
 
745
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
 
746
                              " 0 8', \(\(\('a',\),\),\)\)")
 
747
 
 
748
 
 
749
class StubGCVF(object):
 
750
    def __init__(self, canned_get_blocks=None):
 
751
        self._group_cache = {}
 
752
        self._canned_get_blocks = canned_get_blocks or []
 
753
    def _get_blocks(self, read_memos):
 
754
        return iter(self._canned_get_blocks)
 
755
    
 
756
 
 
757
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
 
758
    """Simple whitebox unit tests for _BatchingBlockFetcher."""
 
759
    
 
760
    def test_add_key_new_read_memo(self):
 
761
        """Adding a key with an uncached read_memo new to this batch adds that
 
762
        read_memo to the list of memos to fetch.
 
763
        """
 
764
        # locations are: index_memo, ignored, parents, ignored
 
765
        # where index_memo is: (idx, offset, len, factory_start, factory_end)
 
766
        # and (idx, offset, size) is known as the 'read_memo', identifying the
 
767
        # raw bytes needed.
 
768
        read_memo = ('fake index', 100, 50)
 
769
        locations = {
 
770
            ('key',): (read_memo + (None, None), None, None, None)}
 
771
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
 
772
        total_size = batcher.add_key(('key',))
 
773
        self.assertEqual(50, total_size)
 
774
        self.assertEqual([('key',)], batcher.keys)
 
775
        self.assertEqual([read_memo], batcher.memos_to_get)
 
776
 
 
777
    def test_add_key_duplicate_read_memo(self):
 
778
        """read_memos that occur multiple times in a batch will only be fetched
 
779
        once.
 
780
        """
 
781
        read_memo = ('fake index', 100, 50)
 
782
        # Two keys, both sharing the same read memo (but different overall
 
783
        # index_memos).
 
784
        locations = {
 
785
            ('key1',): (read_memo + (0, 1), None, None, None),
 
786
            ('key2',): (read_memo + (1, 2), None, None, None)}
 
787
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
 
788
        total_size = batcher.add_key(('key1',))
 
789
        total_size = batcher.add_key(('key2',))
 
790
        self.assertEqual(50, total_size)
 
791
        self.assertEqual([('key1',), ('key2',)], batcher.keys)
 
792
        self.assertEqual([read_memo], batcher.memos_to_get)
 
793
 
 
794
    def test_add_key_cached_read_memo(self):
 
795
        """Adding a key with a cached read_memo will not cause that read_memo
 
796
        to be added to the list to fetch.
 
797
        """
 
798
        read_memo = ('fake index', 100, 50)
 
799
        gcvf = StubGCVF()
 
800
        gcvf._group_cache[read_memo] = 'fake block'
 
801
        locations = {
 
802
            ('key',): (read_memo + (None, None), None, None, None)}
 
803
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
 
804
        total_size = batcher.add_key(('key',))
 
805
        self.assertEqual(0, total_size)
 
806
        self.assertEqual([('key',)], batcher.keys)
 
807
        self.assertEqual([], batcher.memos_to_get)
 
808
 
 
809
    def test_yield_factories_empty(self):
 
810
        """An empty batch yields no factories."""
 
811
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
 
812
        self.assertEqual([], list(batcher.yield_factories()))
 
813
 
 
814
    def test_yield_factories_calls_get_blocks(self):
 
815
        """Uncached memos are retrieved via get_blocks."""
 
816
        read_memo1 = ('fake index', 100, 50)
 
817
        read_memo2 = ('fake index', 150, 40)
 
818
        gcvf = StubGCVF(
 
819
            canned_get_blocks=[
 
820
                (read_memo1, groupcompress.GroupCompressBlock()),
 
821
                (read_memo2, groupcompress.GroupCompressBlock())])
 
822
        locations = {
 
823
            ('key1',): (read_memo1 + (None, None), None, None, None),
 
824
            ('key2',): (read_memo2 + (None, None), None, None, None)}
 
825
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
 
826
        batcher.add_key(('key1',))
 
827
        batcher.add_key(('key2',))
 
828
        factories = list(batcher.yield_factories(full_flush=True))
 
829
        self.assertLength(2, factories)
 
830
        keys = [f.key for f in factories]
 
831
        kinds = [f.storage_kind for f in factories]
 
832
        self.assertEqual([('key1',), ('key2',)], keys)
 
833
        self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
 
834
 
 
835
    def test_yield_factories_flushing(self):
 
836
        """yield_factories holds back on yielding results from the final block
 
837
        unless passed full_flush=True.
 
838
        """
 
839
        fake_block = groupcompress.GroupCompressBlock()
 
840
        read_memo = ('fake index', 100, 50)
 
841
        gcvf = StubGCVF()
 
842
        gcvf._group_cache[read_memo] = fake_block
 
843
        locations = {
 
844
            ('key',): (read_memo + (None, None), None, None, None)}
 
845
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
 
846
        batcher.add_key(('key',))
 
847
        self.assertEqual([], list(batcher.yield_factories()))
 
848
        factories = list(batcher.yield_factories(full_flush=True))
 
849
        self.assertLength(1, factories)
 
850
        self.assertEqual(('key',), factories[0].key)
 
851
        self.assertEqual('groupcompress-block', factories[0].storage_kind)
 
852
 
 
853
 
 
854
class TestLazyGroupCompress(tests.TestCaseWithTransport):
 
855
 
 
856
    _texts = {
 
857
        ('key1',): "this is a text\n"
 
858
                   "with a reasonable amount of compressible bytes\n"
 
859
                   "which can be shared between various other texts\n",
 
860
        ('key2',): "another text\n"
 
861
                   "with a reasonable amount of compressible bytes\n"
 
862
                   "which can be shared between various other texts\n",
 
863
        ('key3',): "yet another text which won't be extracted\n"
 
864
                   "with a reasonable amount of compressible bytes\n"
 
865
                   "which can be shared between various other texts\n",
 
866
        ('key4',): "this will be extracted\n"
 
867
                   "but references most of its bytes from\n"
 
868
                   "yet another text which won't be extracted\n"
 
869
                   "with a reasonable amount of compressible bytes\n"
 
870
                   "which can be shared between various other texts\n",
 
871
    }
 
872
    def make_block(self, key_to_text):
 
873
        """Create a GroupCompressBlock, filling it with the given texts."""
 
874
        compressor = groupcompress.GroupCompressor()
 
875
        start = 0
 
876
        for key in sorted(key_to_text):
 
877
            compressor.compress(key, key_to_text[key], None)
 
878
        locs = dict((key, (start, end)) for key, (start, _, end, _)
 
879
                    in compressor.labels_deltas.iteritems())
 
880
        block = compressor.flush()
 
881
        raw_bytes = block.to_bytes()
 
882
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
 
883
 
 
884
    def add_key_to_manager(self, key, locations, block, manager):
 
885
        start, end = locations[key]
 
886
        manager.add_factory(key, (), start, end)
 
887
 
 
888
    def make_block_and_full_manager(self, texts):
 
889
        locations, block = self.make_block(texts)
 
890
        manager = groupcompress._LazyGroupContentManager(block)
 
891
        for key in sorted(texts):
 
892
            self.add_key_to_manager(key, locations, block, manager)
 
893
        return block, manager
 
894
 
 
895
    def test_get_fulltexts(self):
 
896
        locations, block = self.make_block(self._texts)
 
897
        manager = groupcompress._LazyGroupContentManager(block)
 
898
        self.add_key_to_manager(('key1',), locations, block, manager)
 
899
        self.add_key_to_manager(('key2',), locations, block, manager)
 
900
        result_order = []
 
901
        for record in manager.get_record_stream():
 
902
            result_order.append(record.key)
 
903
            text = self._texts[record.key]
 
904
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
905
        self.assertEqual([('key1',), ('key2',)], result_order)
 
906
 
 
907
        # If we build the manager in the opposite order, we should get them
 
908
        # back in the opposite order
 
909
        manager = groupcompress._LazyGroupContentManager(block)
 
910
        self.add_key_to_manager(('key2',), locations, block, manager)
 
911
        self.add_key_to_manager(('key1',), locations, block, manager)
 
912
        result_order = []
 
913
        for record in manager.get_record_stream():
 
914
            result_order.append(record.key)
 
915
            text = self._texts[record.key]
 
916
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
917
        self.assertEqual([('key2',), ('key1',)], result_order)
 
918
 
 
919
    def test__wire_bytes_no_keys(self):
 
920
        locations, block = self.make_block(self._texts)
 
921
        manager = groupcompress._LazyGroupContentManager(block)
 
922
        wire_bytes = manager._wire_bytes()
 
923
        block_length = len(block.to_bytes())
 
924
        # We should have triggered a strip, since we aren't using any content
 
925
        stripped_block = manager._block.to_bytes()
 
926
        self.assertTrue(block_length > len(stripped_block))
 
927
        empty_z_header = zlib.compress('')
 
928
        self.assertEqual('groupcompress-block\n'
 
929
                         '8\n' # len(compress(''))
 
930
                         '0\n' # len('')
 
931
                         '%d\n'# compressed block len
 
932
                         '%s'  # zheader
 
933
                         '%s'  # block
 
934
                         % (len(stripped_block), empty_z_header,
 
935
                            stripped_block),
 
936
                         wire_bytes)
 
937
 
 
938
    def test__wire_bytes(self):
 
939
        locations, block = self.make_block(self._texts)
 
940
        manager = groupcompress._LazyGroupContentManager(block)
 
941
        self.add_key_to_manager(('key1',), locations, block, manager)
 
942
        self.add_key_to_manager(('key4',), locations, block, manager)
 
943
        block_bytes = block.to_bytes()
 
944
        wire_bytes = manager._wire_bytes()
 
945
        (storage_kind, z_header_len, header_len,
 
946
         block_len, rest) = wire_bytes.split('\n', 4)
 
947
        z_header_len = int(z_header_len)
 
948
        header_len = int(header_len)
 
949
        block_len = int(block_len)
 
950
        self.assertEqual('groupcompress-block', storage_kind)
 
951
        self.assertEqual(34, z_header_len)
 
952
        self.assertEqual(26, header_len)
 
953
        self.assertEqual(len(block_bytes), block_len)
 
954
        z_header = rest[:z_header_len]
 
955
        header = zlib.decompress(z_header)
 
956
        self.assertEqual(header_len, len(header))
 
957
        entry1 = locations[('key1',)]
 
958
        entry4 = locations[('key4',)]
 
959
        self.assertEqualDiff('key1\n'
 
960
                             '\n'  # no parents
 
961
                             '%d\n' # start offset
 
962
                             '%d\n' # end offset
 
963
                             'key4\n'
 
964
                             '\n'
 
965
                             '%d\n'
 
966
                             '%d\n'
 
967
                             % (entry1[0], entry1[1],
 
968
                                entry4[0], entry4[1]),
 
969
                            header)
 
970
        z_block = rest[z_header_len:]
 
971
        self.assertEqual(block_bytes, z_block)
 
972
 
 
973
    def test_from_bytes(self):
 
974
        locations, block = self.make_block(self._texts)
 
975
        manager = groupcompress._LazyGroupContentManager(block)
 
976
        self.add_key_to_manager(('key1',), locations, block, manager)
 
977
        self.add_key_to_manager(('key4',), locations, block, manager)
 
978
        wire_bytes = manager._wire_bytes()
 
979
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
 
980
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
 
981
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
 
982
        self.assertEqual(2, len(manager._factories))
 
983
        self.assertEqual(block._z_content, manager._block._z_content)
 
984
        result_order = []
 
985
        for record in manager.get_record_stream():
 
986
            result_order.append(record.key)
 
987
            text = self._texts[record.key]
 
988
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
989
        self.assertEqual([('key1',), ('key4',)], result_order)
 
990
 
 
991
    def test__check_rebuild_no_changes(self):
 
992
        block, manager = self.make_block_and_full_manager(self._texts)
 
993
        manager._check_rebuild_block()
 
994
        self.assertIs(block, manager._block)
 
995
 
 
996
    def test__check_rebuild_only_one(self):
 
997
        locations, block = self.make_block(self._texts)
 
998
        manager = groupcompress._LazyGroupContentManager(block)
 
999
        # Request just the first key, which should trigger a 'strip' action
 
1000
        self.add_key_to_manager(('key1',), locations, block, manager)
 
1001
        manager._check_rebuild_block()
 
1002
        self.assertIsNot(block, manager._block)
 
1003
        self.assertTrue(block._content_length > manager._block._content_length)
 
1004
        # We should be able to still get the content out of this block, though
 
1005
        # it should only have 1 entry
 
1006
        for record in manager.get_record_stream():
 
1007
            self.assertEqual(('key1',), record.key)
 
1008
            self.assertEqual(self._texts[record.key],
 
1009
                             record.get_bytes_as('fulltext'))
 
1010
 
 
1011
    def test__check_rebuild_middle(self):
 
1012
        locations, block = self.make_block(self._texts)
 
1013
        manager = groupcompress._LazyGroupContentManager(block)
 
1014
        # Request a small key in the middle should trigger a 'rebuild'
 
1015
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1016
        manager._check_rebuild_block()
 
1017
        self.assertIsNot(block, manager._block)
 
1018
        self.assertTrue(block._content_length > manager._block._content_length)
 
1019
        for record in manager.get_record_stream():
 
1020
            self.assertEqual(('key4',), record.key)
 
1021
            self.assertEqual(self._texts[record.key],
 
1022
                             record.get_bytes_as('fulltext'))
 
1023
 
 
1024
    def test_check_is_well_utilized_all_keys(self):
 
1025
        block, manager = self.make_block_and_full_manager(self._texts)
 
1026
        self.assertFalse(manager.check_is_well_utilized())
 
1027
        # Though we can fake it by changing the recommended minimum size
 
1028
        manager._full_enough_block_size = block._content_length
 
1029
        self.assertTrue(manager.check_is_well_utilized())
 
1030
        # Setting it just above causes it to fail
 
1031
        manager._full_enough_block_size = block._content_length + 1
 
1032
        self.assertFalse(manager.check_is_well_utilized())
 
1033
        # Setting the mixed-block size doesn't do anything, because the content
 
1034
        # is considered to not be 'mixed'
 
1035
        manager._full_enough_mixed_block_size = block._content_length
 
1036
        self.assertFalse(manager.check_is_well_utilized())
 
1037
 
 
1038
    def test_check_is_well_utilized_mixed_keys(self):
 
1039
        texts = {}
 
1040
        f1k1 = ('f1', 'k1')
 
1041
        f1k2 = ('f1', 'k2')
 
1042
        f2k1 = ('f2', 'k1')
 
1043
        f2k2 = ('f2', 'k2')
 
1044
        texts[f1k1] = self._texts[('key1',)]
 
1045
        texts[f1k2] = self._texts[('key2',)]
 
1046
        texts[f2k1] = self._texts[('key3',)]
 
1047
        texts[f2k2] = self._texts[('key4',)]
 
1048
        block, manager = self.make_block_and_full_manager(texts)
 
1049
        self.assertFalse(manager.check_is_well_utilized())
 
1050
        manager._full_enough_block_size = block._content_length
 
1051
        self.assertTrue(manager.check_is_well_utilized())
 
1052
        manager._full_enough_block_size = block._content_length + 1
 
1053
        self.assertFalse(manager.check_is_well_utilized())
 
1054
        manager._full_enough_mixed_block_size = block._content_length
 
1055
        self.assertTrue(manager.check_is_well_utilized())
 
1056
 
 
1057
    def test_check_is_well_utilized_partial_use(self):
 
1058
        locations, block = self.make_block(self._texts)
 
1059
        manager = groupcompress._LazyGroupContentManager(block)
 
1060
        manager._full_enough_block_size = block._content_length
 
1061
        self.add_key_to_manager(('key1',), locations, block, manager)
 
1062
        self.add_key_to_manager(('key2',), locations, block, manager)
 
1063
        # Just using the content from key1 and 2 is not enough to be considered
 
1064
        # 'complete'
 
1065
        self.assertFalse(manager.check_is_well_utilized())
 
1066
        # However if we add key3, then we have enough, as we only require 75%
 
1067
        # consumption
 
1068
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1069
        self.assertTrue(manager.check_is_well_utilized())