~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

  • Committer: Canonical.com Patch Queue Manager
  • Date: 2011-05-04 12:10:51 UTC
  • mfrom: (5819.1.4 777007-developer-doc)
  • Revision ID: pqm@pqm.ubuntu.com-20110504121051-aovlsmqiivjmc4fc
(jelmer) Small fixes to developer documentation. (Jonathan Riddell)

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# Copyright (C) 2008-2011 Canonical Ltd
 
2
#
 
3
# This program is free software; you can redistribute it and/or modify
 
4
# it under the terms of the GNU General Public License as published by
 
5
# the Free Software Foundation; either version 2 of the License, or
 
6
# (at your option) any later version.
 
7
#
 
8
# This program is distributed in the hope that it will be useful,
 
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
11
# GNU General Public License for more details.
 
12
#
 
13
# You should have received a copy of the GNU General Public License
 
14
# along with this program; if not, write to the Free Software
 
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 
16
 
 
17
"""Tests for group compression."""
 
18
 
 
19
import zlib
 
20
 
 
21
from bzrlib import (
 
22
    btree_index,
 
23
    groupcompress,
 
24
    errors,
 
25
    index as _mod_index,
 
26
    osutils,
 
27
    tests,
 
28
    trace,
 
29
    versionedfile,
 
30
    )
 
31
from bzrlib.osutils import sha_string
 
32
from bzrlib.tests.test__groupcompress import compiled_groupcompress_feature
 
33
from bzrlib.tests.scenarios import load_tests_apply_scenarios
 
34
 
 
35
 
 
36
def group_compress_implementation_scenarios():
 
37
    scenarios = [
 
38
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
 
39
        ]
 
40
    if compiled_groupcompress_feature.available():
 
41
        scenarios.append(('C',
 
42
            {'compressor': groupcompress.PyrexGroupCompressor}))
 
43
    return scenarios
 
44
 
 
45
 
 
46
load_tests = load_tests_apply_scenarios
 
47
 
 
48
 
 
49
class TestGroupCompressor(tests.TestCase):
 
50
 
 
51
    def _chunks_to_repr_lines(self, chunks):
 
52
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
 
53
 
 
54
    def assertEqualDiffEncoded(self, expected, actual):
 
55
        """Compare the actual content to the expected content.
 
56
 
 
57
        :param expected: A group of chunks that we expect to see
 
58
        :param actual: The measured 'chunks'
 
59
 
 
60
        We will transform the chunks back into lines, and then run 'repr()'
 
61
        over them to handle non-ascii characters.
 
62
        """
 
63
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
 
64
                             self._chunks_to_repr_lines(actual))
 
65
 
 
66
 
 
67
class TestAllGroupCompressors(TestGroupCompressor):
 
68
    """Tests for GroupCompressor"""
 
69
 
 
70
    scenarios = group_compress_implementation_scenarios()
 
71
    compressor = None # Set by scenario
 
72
 
 
73
    def test_empty_delta(self):
 
74
        compressor = self.compressor()
 
75
        self.assertEqual([], compressor.chunks)
 
76
 
 
77
    def test_one_nosha_delta(self):
 
78
        # diff against NUKK
 
79
        compressor = self.compressor()
 
80
        sha1, start_point, end_point, _ = compressor.compress(('label',),
 
81
            'strange\ncommon\n', None)
 
82
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
 
83
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
 
84
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
 
85
        self.assertEqual(0, start_point)
 
86
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
87
 
 
88
    def test_empty_content(self):
 
89
        compressor = self.compressor()
 
90
        # Adding empty bytes should return the 'null' record
 
91
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
 
92
                                                                 '', None)
 
93
        self.assertEqual(0, start_point)
 
94
        self.assertEqual(0, end_point)
 
95
        self.assertEqual('fulltext', kind)
 
96
        self.assertEqual(groupcompress._null_sha1, sha1)
 
97
        self.assertEqual(0, compressor.endpoint)
 
98
        self.assertEqual([], compressor.chunks)
 
99
        # Even after adding some content
 
100
        compressor.compress(('content',), 'some\nbytes\n', None)
 
101
        self.assertTrue(compressor.endpoint > 0)
 
102
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
 
103
                                                                 '', None)
 
104
        self.assertEqual(0, start_point)
 
105
        self.assertEqual(0, end_point)
 
106
        self.assertEqual('fulltext', kind)
 
107
        self.assertEqual(groupcompress._null_sha1, sha1)
 
108
 
 
109
    def test_extract_from_compressor(self):
 
110
        # Knit fetching will try to reconstruct texts locally which results in
 
111
        # reading something that is in the compressor stream already.
 
112
        compressor = self.compressor()
 
113
        sha1_1, _, _, _ = compressor.compress(('label',),
 
114
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
115
        expected_lines = list(compressor.chunks)
 
116
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
 
117
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
118
        # get the first out
 
119
        self.assertEqual(('strange\ncommon long line\n'
 
120
                          'that needs a 16 byte match\n', sha1_1),
 
121
                         compressor.extract(('label',)))
 
122
        # and the second
 
123
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
 
124
                          'different\n', sha1_2),
 
125
                         compressor.extract(('newlabel',)))
 
126
 
 
127
    def test_pop_last(self):
 
128
        compressor = self.compressor()
 
129
        _, _, _, _ = compressor.compress(('key1',),
 
130
            'some text\nfor the first entry\n', None)
 
131
        expected_lines = list(compressor.chunks)
 
132
        _, _, _, _ = compressor.compress(('key2',),
 
133
            'some text\nfor the second entry\n', None)
 
134
        compressor.pop_last()
 
135
        self.assertEqual(expected_lines, compressor.chunks)
 
136
 
 
137
 
 
138
class TestPyrexGroupCompressor(TestGroupCompressor):
 
139
 
 
140
    _test_needs_features = [compiled_groupcompress_feature]
 
141
    compressor = groupcompress.PyrexGroupCompressor
 
142
 
 
143
    def test_stats(self):
 
144
        compressor = self.compressor()
 
145
        compressor.compress(('label',),
 
146
                            'strange\n'
 
147
                            'common very very long line\n'
 
148
                            'plus more text\n', None)
 
149
        compressor.compress(('newlabel',),
 
150
                            'common very very long line\n'
 
151
                            'plus more text\n'
 
152
                            'different\n'
 
153
                            'moredifferent\n', None)
 
154
        compressor.compress(('label3',),
 
155
                            'new\n'
 
156
                            'common very very long line\n'
 
157
                            'plus more text\n'
 
158
                            'different\n'
 
159
                            'moredifferent\n', None)
 
160
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
 
161
 
 
162
    def test_two_nosha_delta(self):
 
163
        compressor = self.compressor()
 
164
        sha1_1, _, _, _ = compressor.compress(('label',),
 
165
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
166
        expected_lines = list(compressor.chunks)
 
167
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
 
168
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
169
        self.assertEqual(sha_string('common long line\n'
 
170
                                    'that needs a 16 byte match\n'
 
171
                                    'different\n'), sha1_2)
 
172
        expected_lines.extend([
 
173
            # 'delta', delta length
 
174
            'd\x0f',
 
175
            # source and target length
 
176
            '\x36',
 
177
            # copy the line common
 
178
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
179
            # add the line different, and the trailing newline
 
180
            '\x0adifferent\n', # insert 10 bytes
 
181
            ])
 
182
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
183
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
184
 
 
185
    def test_three_nosha_delta(self):
 
186
        # The first interesting test: make a change that should use lines from
 
187
        # both parents.
 
188
        compressor = self.compressor()
 
189
        sha1_1, _, _, _ = compressor.compress(('label',),
 
190
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
191
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
 
192
            'different\nmoredifferent\nand then some more\n', None)
 
193
        expected_lines = list(compressor.chunks)
 
194
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
 
195
            'new\ncommon very very long line\nwith some extra text\n'
 
196
            'different\nmoredifferent\nand then some more\n',
 
197
            None)
 
198
        self.assertEqual(
 
199
            sha_string('new\ncommon very very long line\nwith some extra text\n'
 
200
                       'different\nmoredifferent\nand then some more\n'),
 
201
            sha1_3)
 
202
        expected_lines.extend([
 
203
            # 'delta', delta length
 
204
            'd\x0b',
 
205
            # source and target length
 
206
            '\x5f'
 
207
            # insert new
 
208
            '\x03new',
 
209
            # Copy of first parent 'common' range
 
210
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
 
211
            # Copy of second parent 'different' range
 
212
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
213
            ])
 
214
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
215
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
216
 
 
217
 
 
218
class TestPythonGroupCompressor(TestGroupCompressor):
 
219
 
 
220
    compressor = groupcompress.PythonGroupCompressor
 
221
 
 
222
    def test_stats(self):
 
223
        compressor = self.compressor()
 
224
        compressor.compress(('label',),
 
225
                            'strange\n'
 
226
                            'common very very long line\n'
 
227
                            'plus more text\n', None)
 
228
        compressor.compress(('newlabel',),
 
229
                            'common very very long line\n'
 
230
                            'plus more text\n'
 
231
                            'different\n'
 
232
                            'moredifferent\n', None)
 
233
        compressor.compress(('label3',),
 
234
                            'new\n'
 
235
                            'common very very long line\n'
 
236
                            'plus more text\n'
 
237
                            'different\n'
 
238
                            'moredifferent\n', None)
 
239
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
 
240
 
 
241
    def test_two_nosha_delta(self):
 
242
        compressor = self.compressor()
 
243
        sha1_1, _, _, _ = compressor.compress(('label',),
 
244
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
245
        expected_lines = list(compressor.chunks)
 
246
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
 
247
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
248
        self.assertEqual(sha_string('common long line\n'
 
249
                                    'that needs a 16 byte match\n'
 
250
                                    'different\n'), sha1_2)
 
251
        expected_lines.extend([
 
252
            # 'delta', delta length
 
253
            'd\x0f',
 
254
            # target length
 
255
            '\x36',
 
256
            # copy the line common
 
257
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
258
            # add the line different, and the trailing newline
 
259
            '\x0adifferent\n', # insert 10 bytes
 
260
            ])
 
261
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
262
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
263
 
 
264
    def test_three_nosha_delta(self):
 
265
        # The first interesting test: make a change that should use lines from
 
266
        # both parents.
 
267
        compressor = self.compressor()
 
268
        sha1_1, _, _, _ = compressor.compress(('label',),
 
269
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
270
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
 
271
            'different\nmoredifferent\nand then some more\n', None)
 
272
        expected_lines = list(compressor.chunks)
 
273
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
 
274
            'new\ncommon very very long line\nwith some extra text\n'
 
275
            'different\nmoredifferent\nand then some more\n',
 
276
            None)
 
277
        self.assertEqual(
 
278
            sha_string('new\ncommon very very long line\nwith some extra text\n'
 
279
                       'different\nmoredifferent\nand then some more\n'),
 
280
            sha1_3)
 
281
        expected_lines.extend([
 
282
            # 'delta', delta length
 
283
            'd\x0c',
 
284
            # target length
 
285
            '\x5f'
 
286
            # insert new
 
287
            '\x04new\n',
 
288
            # Copy of first parent 'common' range
 
289
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
 
290
            # Copy of second parent 'different' range
 
291
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
292
            ])
 
293
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
294
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
295
 
 
296
 
 
297
class TestGroupCompressBlock(tests.TestCase):
 
298
 
 
299
    def make_block(self, key_to_text):
 
300
        """Create a GroupCompressBlock, filling it with the given texts."""
 
301
        compressor = groupcompress.GroupCompressor()
 
302
        start = 0
 
303
        for key in sorted(key_to_text):
 
304
            compressor.compress(key, key_to_text[key], None)
 
305
        locs = dict((key, (start, end)) for key, (start, _, end, _)
 
306
                    in compressor.labels_deltas.iteritems())
 
307
        block = compressor.flush()
 
308
        raw_bytes = block.to_bytes()
 
309
        # Go through from_bytes(to_bytes()) so that we start with a compressed
 
310
        # content object
 
311
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
 
312
 
 
313
    def test_from_empty_bytes(self):
 
314
        self.assertRaises(ValueError,
 
315
                          groupcompress.GroupCompressBlock.from_bytes, '')
 
316
 
 
317
    def test_from_minimal_bytes(self):
 
318
        block = groupcompress.GroupCompressBlock.from_bytes(
 
319
            'gcb1z\n0\n0\n')
 
320
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
 
321
        self.assertIs(None, block._content)
 
322
        self.assertEqual('', block._z_content)
 
323
        block._ensure_content()
 
324
        self.assertEqual('', block._content)
 
325
        self.assertEqual('', block._z_content)
 
326
        block._ensure_content() # Ensure content is safe to call 2x
 
327
 
 
328
    def test_from_invalid(self):
 
329
        self.assertRaises(ValueError,
 
330
                          groupcompress.GroupCompressBlock.from_bytes,
 
331
                          'this is not a valid header')
 
332
 
 
333
    def test_from_bytes(self):
 
334
        content = ('a tiny bit of content\n')
 
335
        z_content = zlib.compress(content)
 
336
        z_bytes = (
 
337
            'gcb1z\n' # group compress block v1 plain
 
338
            '%d\n' # Length of compressed content
 
339
            '%d\n' # Length of uncompressed content
 
340
            '%s'   # Compressed content
 
341
            ) % (len(z_content), len(content), z_content)
 
342
        block = groupcompress.GroupCompressBlock.from_bytes(
 
343
            z_bytes)
 
344
        self.assertEqual(z_content, block._z_content)
 
345
        self.assertIs(None, block._content)
 
346
        self.assertEqual(len(z_content), block._z_content_length)
 
347
        self.assertEqual(len(content), block._content_length)
 
348
        block._ensure_content()
 
349
        self.assertEqual(z_content, block._z_content)
 
350
        self.assertEqual(content, block._content)
 
351
 
 
352
    def test_to_chunks(self):
 
353
        content_chunks = ['this is some content\n',
 
354
                          'this content will be compressed\n']
 
355
        content_len = sum(map(len, content_chunks))
 
356
        content = ''.join(content_chunks)
 
357
        gcb = groupcompress.GroupCompressBlock()
 
358
        gcb.set_chunked_content(content_chunks, content_len)
 
359
        total_len, block_chunks = gcb.to_chunks()
 
360
        block_bytes = ''.join(block_chunks)
 
361
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
 
362
        self.assertEqual(total_len, len(block_bytes))
 
363
        self.assertEqual(gcb._content_length, content_len)
 
364
        expected_header =('gcb1z\n' # group compress block v1 zlib
 
365
                          '%d\n' # Length of compressed content
 
366
                          '%d\n' # Length of uncompressed content
 
367
                         ) % (gcb._z_content_length, gcb._content_length)
 
368
        # The first chunk should be the header chunk. It is small, fixed size,
 
369
        # and there is no compelling reason to split it up
 
370
        self.assertEqual(expected_header, block_chunks[0])
 
371
        self.assertStartsWith(block_bytes, expected_header)
 
372
        remaining_bytes = block_bytes[len(expected_header):]
 
373
        raw_bytes = zlib.decompress(remaining_bytes)
 
374
        self.assertEqual(content, raw_bytes)
 
375
 
 
376
    def test_to_bytes(self):
 
377
        content = ('this is some content\n'
 
378
                   'this content will be compressed\n')
 
379
        gcb = groupcompress.GroupCompressBlock()
 
380
        gcb.set_content(content)
 
381
        bytes = gcb.to_bytes()
 
382
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
 
383
        self.assertEqual(gcb._content_length, len(content))
 
384
        expected_header =('gcb1z\n' # group compress block v1 zlib
 
385
                          '%d\n' # Length of compressed content
 
386
                          '%d\n' # Length of uncompressed content
 
387
                         ) % (gcb._z_content_length, gcb._content_length)
 
388
        self.assertStartsWith(bytes, expected_header)
 
389
        remaining_bytes = bytes[len(expected_header):]
 
390
        raw_bytes = zlib.decompress(remaining_bytes)
 
391
        self.assertEqual(content, raw_bytes)
 
392
 
 
393
        # we should get the same results if using the chunked version
 
394
        gcb = groupcompress.GroupCompressBlock()
 
395
        gcb.set_chunked_content(['this is some content\n'
 
396
                                 'this content will be compressed\n'],
 
397
                                 len(content))
 
398
        old_bytes = bytes
 
399
        bytes = gcb.to_bytes()
 
400
        self.assertEqual(old_bytes, bytes)
 
401
 
 
402
    def test_partial_decomp(self):
 
403
        content_chunks = []
 
404
        # We need a sufficient amount of data so that zlib.decompress has
 
405
        # partial decompression to work with. Most auto-generated data
 
406
        # compresses a bit too well, we want a combination, so we combine a sha
 
407
        # hash with compressible data.
 
408
        for i in xrange(2048):
 
409
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
410
            content_chunks.append(next_content)
 
411
            next_sha1 = osutils.sha_string(next_content)
 
412
            content_chunks.append(next_sha1 + '\n')
 
413
        content = ''.join(content_chunks)
 
414
        self.assertEqual(158634, len(content))
 
415
        z_content = zlib.compress(content)
 
416
        self.assertEqual(57182, len(z_content))
 
417
        block = groupcompress.GroupCompressBlock()
 
418
        block._z_content_chunks = (z_content,)
 
419
        block._z_content_length = len(z_content)
 
420
        block._compressor_name = 'zlib'
 
421
        block._content_length = 158634
 
422
        self.assertIs(None, block._content)
 
423
        block._ensure_content(100)
 
424
        self.assertIsNot(None, block._content)
 
425
        # We have decompressed at least 100 bytes
 
426
        self.assertTrue(len(block._content) >= 100)
 
427
        # We have not decompressed the whole content
 
428
        self.assertTrue(len(block._content) < 158634)
 
429
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
430
        # ensuring content that we already have shouldn't cause any more data
 
431
        # to be extracted
 
432
        cur_len = len(block._content)
 
433
        block._ensure_content(cur_len - 10)
 
434
        self.assertEqual(cur_len, len(block._content))
 
435
        # Now we want a bit more content
 
436
        cur_len += 10
 
437
        block._ensure_content(cur_len)
 
438
        self.assertTrue(len(block._content) >= cur_len)
 
439
        self.assertTrue(len(block._content) < 158634)
 
440
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
441
        # And now lets finish
 
442
        block._ensure_content(158634)
 
443
        self.assertEqualDiff(content, block._content)
 
444
        # And the decompressor is finalized
 
445
        self.assertIs(None, block._z_content_decompressor)
 
446
 
 
447
    def test__ensure_all_content(self):
 
448
        content_chunks = []
 
449
        # We need a sufficient amount of data so that zlib.decompress has
 
450
        # partial decompression to work with. Most auto-generated data
 
451
        # compresses a bit too well, we want a combination, so we combine a sha
 
452
        # hash with compressible data.
 
453
        for i in xrange(2048):
 
454
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
455
            content_chunks.append(next_content)
 
456
            next_sha1 = osutils.sha_string(next_content)
 
457
            content_chunks.append(next_sha1 + '\n')
 
458
        content = ''.join(content_chunks)
 
459
        self.assertEqual(158634, len(content))
 
460
        z_content = zlib.compress(content)
 
461
        self.assertEqual(57182, len(z_content))
 
462
        block = groupcompress.GroupCompressBlock()
 
463
        block._z_content_chunks = (z_content,)
 
464
        block._z_content_length = len(z_content)
 
465
        block._compressor_name = 'zlib'
 
466
        block._content_length = 158634
 
467
        self.assertIs(None, block._content)
 
468
        # The first _ensure_content got all of the required data
 
469
        block._ensure_content(158634)
 
470
        self.assertEqualDiff(content, block._content)
 
471
        # And we should have released the _z_content_decompressor since it was
 
472
        # fully consumed
 
473
        self.assertIs(None, block._z_content_decompressor)
 
474
 
 
475
    def test__dump(self):
 
476
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
 
477
        key_to_text = {('1',): dup_content + '1 unique\n',
 
478
                       ('2',): dup_content + '2 extra special\n'}
 
479
        locs, block = self.make_block(key_to_text)
 
480
        self.assertEqual([('f', len(key_to_text[('1',)])),
 
481
                          ('d', 21, len(key_to_text[('2',)]),
 
482
                           [('c', 2, len(dup_content)),
 
483
                            ('i', len('2 extra special\n'), '')
 
484
                           ]),
 
485
                         ], block._dump())
 
486
 
 
487
 
 
488
class TestCaseWithGroupCompressVersionedFiles(
 
489
        tests.TestCaseWithMemoryTransport):
 
490
 
 
491
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
 
492
                     dir='.', inconsistency_fatal=True):
 
493
        t = self.get_transport(dir)
 
494
        t.ensure_base()
 
495
        vf = groupcompress.make_pack_factory(graph=create_graph,
 
496
            delta=False, keylength=keylength,
 
497
            inconsistency_fatal=inconsistency_fatal)(t)
 
498
        if do_cleanup:
 
499
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
 
500
        return vf
 
501
 
 
502
 
 
503
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
 
504
 
 
505
    def make_g_index(self, name, ref_lists=0, nodes=[]):
 
506
        builder = btree_index.BTreeBuilder(ref_lists)
 
507
        for node, references, value in nodes:
 
508
            builder.add_node(node, references, value)
 
509
        stream = builder.finish()
 
510
        trans = self.get_transport()
 
511
        size = trans.put_file(name, stream)
 
512
        return btree_index.BTreeGraphIndex(trans, name, size)
 
513
 
 
514
    def make_g_index_missing_parent(self):
 
515
        graph_index = self.make_g_index('missing_parent', 1,
 
516
            [(('parent', ), '2 78 2 10', ([],)),
 
517
             (('tip', ), '2 78 2 10',
 
518
              ([('parent', ), ('missing-parent', )],)),
 
519
              ])
 
520
        return graph_index
 
521
 
 
522
    def test_get_record_stream_as_requested(self):
 
523
        # Consider promoting 'as-requested' to general availability, and
 
524
        # make this a VF interface test
 
525
        vf = self.make_test_vf(False, dir='source')
 
526
        vf.add_lines(('a',), (), ['lines\n'])
 
527
        vf.add_lines(('b',), (), ['lines\n'])
 
528
        vf.add_lines(('c',), (), ['lines\n'])
 
529
        vf.add_lines(('d',), (), ['lines\n'])
 
530
        vf.writer.end()
 
531
        keys = [record.key for record in vf.get_record_stream(
 
532
                    [('a',), ('b',), ('c',), ('d',)],
 
533
                    'as-requested', False)]
 
534
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
535
        keys = [record.key for record in vf.get_record_stream(
 
536
                    [('b',), ('a',), ('d',), ('c',)],
 
537
                    'as-requested', False)]
 
538
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
539
 
 
540
        # It should work even after being repacked into another VF
 
541
        vf2 = self.make_test_vf(False, dir='target')
 
542
        vf2.insert_record_stream(vf.get_record_stream(
 
543
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
 
544
        vf2.writer.end()
 
545
 
 
546
        keys = [record.key for record in vf2.get_record_stream(
 
547
                    [('a',), ('b',), ('c',), ('d',)],
 
548
                    'as-requested', False)]
 
549
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
550
        keys = [record.key for record in vf2.get_record_stream(
 
551
                    [('b',), ('a',), ('d',), ('c',)],
 
552
                    'as-requested', False)]
 
553
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
554
 
 
555
    def test_insert_record_stream_reuses_blocks(self):
 
556
        vf = self.make_test_vf(True, dir='source')
 
557
        def grouped_stream(revision_ids, first_parents=()):
 
558
            parents = first_parents
 
559
            for revision_id in revision_ids:
 
560
                key = (revision_id,)
 
561
                record = versionedfile.FulltextContentFactory(
 
562
                    key, parents, None,
 
563
                    'some content that is\n'
 
564
                    'identical except for\n'
 
565
                    'revision_id:%s\n' % (revision_id,))
 
566
                yield record
 
567
                parents = (key,)
 
568
        # One group, a-d
 
569
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
570
        # Second group, e-h
 
571
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
572
                                               first_parents=(('d',),)))
 
573
        block_bytes = {}
 
574
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
 
575
                                      'unordered', False)
 
576
        num_records = 0
 
577
        for record in stream:
 
578
            if record.key in [('a',), ('e',)]:
 
579
                self.assertEqual('groupcompress-block', record.storage_kind)
 
580
            else:
 
581
                self.assertEqual('groupcompress-block-ref',
 
582
                                 record.storage_kind)
 
583
            block_bytes[record.key] = record._manager._block._z_content
 
584
            num_records += 1
 
585
        self.assertEqual(8, num_records)
 
586
        for r in 'abcd':
 
587
            key = (r,)
 
588
            self.assertIs(block_bytes[key], block_bytes[('a',)])
 
589
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
 
590
        for r in 'efgh':
 
591
            key = (r,)
 
592
            self.assertIs(block_bytes[key], block_bytes[('e',)])
 
593
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
 
594
        # Now copy the blocks into another vf, and ensure that the blocks are
 
595
        # preserved without creating new entries
 
596
        vf2 = self.make_test_vf(True, dir='target')
 
597
        # ordering in 'groupcompress' order, should actually swap the groups in
 
598
        # the target vf, but the groups themselves should not be disturbed.
 
599
        def small_size_stream():
 
600
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
 
601
                                               'groupcompress', False):
 
602
                record._manager._full_enough_block_size = \
 
603
                    record._manager._block._content_length
 
604
                yield record
 
605
                        
 
606
        vf2.insert_record_stream(small_size_stream())
 
607
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
608
                                       'groupcompress', False)
 
609
        vf2.writer.end()
 
610
        num_records = 0
 
611
        for record in stream:
 
612
            num_records += 1
 
613
            self.assertEqual(block_bytes[record.key],
 
614
                             record._manager._block._z_content)
 
615
        self.assertEqual(8, num_records)
 
616
 
 
617
    def test_insert_record_stream_packs_on_the_fly(self):
 
618
        vf = self.make_test_vf(True, dir='source')
 
619
        def grouped_stream(revision_ids, first_parents=()):
 
620
            parents = first_parents
 
621
            for revision_id in revision_ids:
 
622
                key = (revision_id,)
 
623
                record = versionedfile.FulltextContentFactory(
 
624
                    key, parents, None,
 
625
                    'some content that is\n'
 
626
                    'identical except for\n'
 
627
                    'revision_id:%s\n' % (revision_id,))
 
628
                yield record
 
629
                parents = (key,)
 
630
        # One group, a-d
 
631
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
632
        # Second group, e-h
 
633
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
634
                                               first_parents=(('d',),)))
 
635
        # Now copy the blocks into another vf, and see that the
 
636
        # insert_record_stream rebuilt a new block on-the-fly because of
 
637
        # under-utilization
 
638
        vf2 = self.make_test_vf(True, dir='target')
 
639
        vf2.insert_record_stream(vf.get_record_stream(
 
640
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
 
641
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
642
                                       'groupcompress', False)
 
643
        vf2.writer.end()
 
644
        num_records = 0
 
645
        # All of the records should be recombined into a single block
 
646
        block = None
 
647
        for record in stream:
 
648
            num_records += 1
 
649
            if block is None:
 
650
                block = record._manager._block
 
651
            else:
 
652
                self.assertIs(block, record._manager._block)
 
653
        self.assertEqual(8, num_records)
 
654
 
 
655
    def test__insert_record_stream_no_reuse_block(self):
 
656
        vf = self.make_test_vf(True, dir='source')
 
657
        def grouped_stream(revision_ids, first_parents=()):
 
658
            parents = first_parents
 
659
            for revision_id in revision_ids:
 
660
                key = (revision_id,)
 
661
                record = versionedfile.FulltextContentFactory(
 
662
                    key, parents, None,
 
663
                    'some content that is\n'
 
664
                    'identical except for\n'
 
665
                    'revision_id:%s\n' % (revision_id,))
 
666
                yield record
 
667
                parents = (key,)
 
668
        # One group, a-d
 
669
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
670
        # Second group, e-h
 
671
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
672
                                               first_parents=(('d',),)))
 
673
        vf.writer.end()
 
674
        self.assertEqual(8, len(list(vf.get_record_stream(
 
675
                                        [(r,) for r in 'abcdefgh'],
 
676
                                        'unordered', False))))
 
677
        # Now copy the blocks into another vf, and ensure that the blocks are
 
678
        # preserved without creating new entries
 
679
        vf2 = self.make_test_vf(True, dir='target')
 
680
        # ordering in 'groupcompress' order, should actually swap the groups in
 
681
        # the target vf, but the groups themselves should not be disturbed.
 
682
        list(vf2._insert_record_stream(vf.get_record_stream(
 
683
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
 
684
            reuse_blocks=False))
 
685
        vf2.writer.end()
 
686
        # After inserting with reuse_blocks=False, we should have everything in
 
687
        # a single new block.
 
688
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
689
                                       'groupcompress', False)
 
690
        block = None
 
691
        for record in stream:
 
692
            if block is None:
 
693
                block = record._manager._block
 
694
            else:
 
695
                self.assertIs(block, record._manager._block)
 
696
 
 
697
    def test_add_missing_noncompression_parent_unvalidated_index(self):
 
698
        unvalidated = self.make_g_index_missing_parent()
 
699
        combined = _mod_index.CombinedGraphIndex([unvalidated])
 
700
        index = groupcompress._GCGraphIndex(combined,
 
701
            is_locked=lambda: True, parents=True,
 
702
            track_external_parent_refs=True)
 
703
        index.scan_unvalidated_index(unvalidated)
 
704
        self.assertEqual(
 
705
            frozenset([('missing-parent',)]), index.get_missing_parents())
 
706
 
 
707
    def test_track_external_parent_refs(self):
 
708
        g_index = self.make_g_index('empty', 1, [])
 
709
        mod_index = btree_index.BTreeBuilder(1, 1)
 
710
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
 
711
        index = groupcompress._GCGraphIndex(combined,
 
712
            is_locked=lambda: True, parents=True,
 
713
            add_callback=mod_index.add_nodes,
 
714
            track_external_parent_refs=True)
 
715
        index.add_records([
 
716
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
 
717
        self.assertEqual(
 
718
            frozenset([('parent-1',), ('parent-2',)]),
 
719
            index.get_missing_parents())
 
720
 
 
721
    def make_source_with_b(self, a_parent, path):
 
722
        source = self.make_test_vf(True, dir=path)
 
723
        source.add_lines(('a',), (), ['lines\n'])
 
724
        if a_parent:
 
725
            b_parents = (('a',),)
 
726
        else:
 
727
            b_parents = ()
 
728
        source.add_lines(('b',), b_parents, ['lines\n'])
 
729
        return source
 
730
 
 
731
    def do_inconsistent_inserts(self, inconsistency_fatal):
 
732
        target = self.make_test_vf(True, dir='target',
 
733
                                   inconsistency_fatal=inconsistency_fatal)
 
734
        for x in range(2):
 
735
            source = self.make_source_with_b(x==1, 'source%s' % x)
 
736
            target.insert_record_stream(source.get_record_stream(
 
737
                [('b',)], 'unordered', False))
 
738
 
 
739
    def test_inconsistent_redundant_inserts_warn(self):
 
740
        """Should not insert a record that is already present."""
 
741
        warnings = []
 
742
        def warning(template, args):
 
743
            warnings.append(template % args)
 
744
        _trace_warning = trace.warning
 
745
        trace.warning = warning
 
746
        try:
 
747
            self.do_inconsistent_inserts(inconsistency_fatal=False)
 
748
        finally:
 
749
            trace.warning = _trace_warning
 
750
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
 
751
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
 
752
                         warnings)
 
753
 
 
754
    def test_inconsistent_redundant_inserts_raises(self):
 
755
        e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
 
756
                              inconsistency_fatal=True)
 
757
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
 
758
                              " in add_records:"
 
759
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
 
760
                              " 0 8', \(\(\('a',\),\),\)\)")
 
761
 
 
762
    def test_clear_cache(self):
 
763
        vf = self.make_source_with_b(True, 'source')
 
764
        vf.writer.end()
 
765
        for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
 
766
                                           True):
 
767
            pass
 
768
        self.assertTrue(len(vf._group_cache) > 0)
 
769
        vf.clear_cache()
 
770
        self.assertEqual(0, len(vf._group_cache))
 
771
 
 
772
 
 
773
 
 
774
class StubGCVF(object):
 
775
    def __init__(self, canned_get_blocks=None):
 
776
        self._group_cache = {}
 
777
        self._canned_get_blocks = canned_get_blocks or []
 
778
    def _get_blocks(self, read_memos):
 
779
        return iter(self._canned_get_blocks)
 
780
    
 
781
 
 
782
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
 
783
    """Simple whitebox unit tests for _BatchingBlockFetcher."""
 
784
    
 
785
    def test_add_key_new_read_memo(self):
 
786
        """Adding a key with an uncached read_memo new to this batch adds that
 
787
        read_memo to the list of memos to fetch.
 
788
        """
 
789
        # locations are: index_memo, ignored, parents, ignored
 
790
        # where index_memo is: (idx, offset, len, factory_start, factory_end)
 
791
        # and (idx, offset, size) is known as the 'read_memo', identifying the
 
792
        # raw bytes needed.
 
793
        read_memo = ('fake index', 100, 50)
 
794
        locations = {
 
795
            ('key',): (read_memo + (None, None), None, None, None)}
 
796
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
 
797
        total_size = batcher.add_key(('key',))
 
798
        self.assertEqual(50, total_size)
 
799
        self.assertEqual([('key',)], batcher.keys)
 
800
        self.assertEqual([read_memo], batcher.memos_to_get)
 
801
 
 
802
    def test_add_key_duplicate_read_memo(self):
 
803
        """read_memos that occur multiple times in a batch will only be fetched
 
804
        once.
 
805
        """
 
806
        read_memo = ('fake index', 100, 50)
 
807
        # Two keys, both sharing the same read memo (but different overall
 
808
        # index_memos).
 
809
        locations = {
 
810
            ('key1',): (read_memo + (0, 1), None, None, None),
 
811
            ('key2',): (read_memo + (1, 2), None, None, None)}
 
812
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
 
813
        total_size = batcher.add_key(('key1',))
 
814
        total_size = batcher.add_key(('key2',))
 
815
        self.assertEqual(50, total_size)
 
816
        self.assertEqual([('key1',), ('key2',)], batcher.keys)
 
817
        self.assertEqual([read_memo], batcher.memos_to_get)
 
818
 
 
819
    def test_add_key_cached_read_memo(self):
 
820
        """Adding a key with a cached read_memo will not cause that read_memo
 
821
        to be added to the list to fetch.
 
822
        """
 
823
        read_memo = ('fake index', 100, 50)
 
824
        gcvf = StubGCVF()
 
825
        gcvf._group_cache[read_memo] = 'fake block'
 
826
        locations = {
 
827
            ('key',): (read_memo + (None, None), None, None, None)}
 
828
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
 
829
        total_size = batcher.add_key(('key',))
 
830
        self.assertEqual(0, total_size)
 
831
        self.assertEqual([('key',)], batcher.keys)
 
832
        self.assertEqual([], batcher.memos_to_get)
 
833
 
 
834
    def test_yield_factories_empty(self):
 
835
        """An empty batch yields no factories."""
 
836
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
 
837
        self.assertEqual([], list(batcher.yield_factories()))
 
838
 
 
839
    def test_yield_factories_calls_get_blocks(self):
 
840
        """Uncached memos are retrieved via get_blocks."""
 
841
        read_memo1 = ('fake index', 100, 50)
 
842
        read_memo2 = ('fake index', 150, 40)
 
843
        gcvf = StubGCVF(
 
844
            canned_get_blocks=[
 
845
                (read_memo1, groupcompress.GroupCompressBlock()),
 
846
                (read_memo2, groupcompress.GroupCompressBlock())])
 
847
        locations = {
 
848
            ('key1',): (read_memo1 + (None, None), None, None, None),
 
849
            ('key2',): (read_memo2 + (None, None), None, None, None)}
 
850
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
 
851
        batcher.add_key(('key1',))
 
852
        batcher.add_key(('key2',))
 
853
        factories = list(batcher.yield_factories(full_flush=True))
 
854
        self.assertLength(2, factories)
 
855
        keys = [f.key for f in factories]
 
856
        kinds = [f.storage_kind for f in factories]
 
857
        self.assertEqual([('key1',), ('key2',)], keys)
 
858
        self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
 
859
 
 
860
    def test_yield_factories_flushing(self):
 
861
        """yield_factories holds back on yielding results from the final block
 
862
        unless passed full_flush=True.
 
863
        """
 
864
        fake_block = groupcompress.GroupCompressBlock()
 
865
        read_memo = ('fake index', 100, 50)
 
866
        gcvf = StubGCVF()
 
867
        gcvf._group_cache[read_memo] = fake_block
 
868
        locations = {
 
869
            ('key',): (read_memo + (None, None), None, None, None)}
 
870
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
 
871
        batcher.add_key(('key',))
 
872
        self.assertEqual([], list(batcher.yield_factories()))
 
873
        factories = list(batcher.yield_factories(full_flush=True))
 
874
        self.assertLength(1, factories)
 
875
        self.assertEqual(('key',), factories[0].key)
 
876
        self.assertEqual('groupcompress-block', factories[0].storage_kind)
 
877
 
 
878
 
 
879
class TestLazyGroupCompress(tests.TestCaseWithTransport):
 
880
 
 
881
    _texts = {
 
882
        ('key1',): "this is a text\n"
 
883
                   "with a reasonable amount of compressible bytes\n"
 
884
                   "which can be shared between various other texts\n",
 
885
        ('key2',): "another text\n"
 
886
                   "with a reasonable amount of compressible bytes\n"
 
887
                   "which can be shared between various other texts\n",
 
888
        ('key3',): "yet another text which won't be extracted\n"
 
889
                   "with a reasonable amount of compressible bytes\n"
 
890
                   "which can be shared between various other texts\n",
 
891
        ('key4',): "this will be extracted\n"
 
892
                   "but references most of its bytes from\n"
 
893
                   "yet another text which won't be extracted\n"
 
894
                   "with a reasonable amount of compressible bytes\n"
 
895
                   "which can be shared between various other texts\n",
 
896
    }
 
897
    def make_block(self, key_to_text):
 
898
        """Create a GroupCompressBlock, filling it with the given texts."""
 
899
        compressor = groupcompress.GroupCompressor()
 
900
        start = 0
 
901
        for key in sorted(key_to_text):
 
902
            compressor.compress(key, key_to_text[key], None)
 
903
        locs = dict((key, (start, end)) for key, (start, _, end, _)
 
904
                    in compressor.labels_deltas.iteritems())
 
905
        block = compressor.flush()
 
906
        raw_bytes = block.to_bytes()
 
907
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
 
908
 
 
909
    def add_key_to_manager(self, key, locations, block, manager):
 
910
        start, end = locations[key]
 
911
        manager.add_factory(key, (), start, end)
 
912
 
 
913
    def make_block_and_full_manager(self, texts):
 
914
        locations, block = self.make_block(texts)
 
915
        manager = groupcompress._LazyGroupContentManager(block)
 
916
        for key in sorted(texts):
 
917
            self.add_key_to_manager(key, locations, block, manager)
 
918
        return block, manager
 
919
 
 
920
    def test_get_fulltexts(self):
 
921
        locations, block = self.make_block(self._texts)
 
922
        manager = groupcompress._LazyGroupContentManager(block)
 
923
        self.add_key_to_manager(('key1',), locations, block, manager)
 
924
        self.add_key_to_manager(('key2',), locations, block, manager)
 
925
        result_order = []
 
926
        for record in manager.get_record_stream():
 
927
            result_order.append(record.key)
 
928
            text = self._texts[record.key]
 
929
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
930
        self.assertEqual([('key1',), ('key2',)], result_order)
 
931
 
 
932
        # If we build the manager in the opposite order, we should get them
 
933
        # back in the opposite order
 
934
        manager = groupcompress._LazyGroupContentManager(block)
 
935
        self.add_key_to_manager(('key2',), locations, block, manager)
 
936
        self.add_key_to_manager(('key1',), locations, block, manager)
 
937
        result_order = []
 
938
        for record in manager.get_record_stream():
 
939
            result_order.append(record.key)
 
940
            text = self._texts[record.key]
 
941
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
942
        self.assertEqual([('key2',), ('key1',)], result_order)
 
943
 
 
944
    def test__wire_bytes_no_keys(self):
 
945
        locations, block = self.make_block(self._texts)
 
946
        manager = groupcompress._LazyGroupContentManager(block)
 
947
        wire_bytes = manager._wire_bytes()
 
948
        block_length = len(block.to_bytes())
 
949
        # We should have triggered a strip, since we aren't using any content
 
950
        stripped_block = manager._block.to_bytes()
 
951
        self.assertTrue(block_length > len(stripped_block))
 
952
        empty_z_header = zlib.compress('')
 
953
        self.assertEqual('groupcompress-block\n'
 
954
                         '8\n' # len(compress(''))
 
955
                         '0\n' # len('')
 
956
                         '%d\n'# compressed block len
 
957
                         '%s'  # zheader
 
958
                         '%s'  # block
 
959
                         % (len(stripped_block), empty_z_header,
 
960
                            stripped_block),
 
961
                         wire_bytes)
 
962
 
 
963
    def test__wire_bytes(self):
 
964
        locations, block = self.make_block(self._texts)
 
965
        manager = groupcompress._LazyGroupContentManager(block)
 
966
        self.add_key_to_manager(('key1',), locations, block, manager)
 
967
        self.add_key_to_manager(('key4',), locations, block, manager)
 
968
        block_bytes = block.to_bytes()
 
969
        wire_bytes = manager._wire_bytes()
 
970
        (storage_kind, z_header_len, header_len,
 
971
         block_len, rest) = wire_bytes.split('\n', 4)
 
972
        z_header_len = int(z_header_len)
 
973
        header_len = int(header_len)
 
974
        block_len = int(block_len)
 
975
        self.assertEqual('groupcompress-block', storage_kind)
 
976
        self.assertEqual(34, z_header_len)
 
977
        self.assertEqual(26, header_len)
 
978
        self.assertEqual(len(block_bytes), block_len)
 
979
        z_header = rest[:z_header_len]
 
980
        header = zlib.decompress(z_header)
 
981
        self.assertEqual(header_len, len(header))
 
982
        entry1 = locations[('key1',)]
 
983
        entry4 = locations[('key4',)]
 
984
        self.assertEqualDiff('key1\n'
 
985
                             '\n'  # no parents
 
986
                             '%d\n' # start offset
 
987
                             '%d\n' # end offset
 
988
                             'key4\n'
 
989
                             '\n'
 
990
                             '%d\n'
 
991
                             '%d\n'
 
992
                             % (entry1[0], entry1[1],
 
993
                                entry4[0], entry4[1]),
 
994
                            header)
 
995
        z_block = rest[z_header_len:]
 
996
        self.assertEqual(block_bytes, z_block)
 
997
 
 
998
    def test_from_bytes(self):
 
999
        locations, block = self.make_block(self._texts)
 
1000
        manager = groupcompress._LazyGroupContentManager(block)
 
1001
        self.add_key_to_manager(('key1',), locations, block, manager)
 
1002
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1003
        wire_bytes = manager._wire_bytes()
 
1004
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
 
1005
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
 
1006
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
 
1007
        self.assertEqual(2, len(manager._factories))
 
1008
        self.assertEqual(block._z_content, manager._block._z_content)
 
1009
        result_order = []
 
1010
        for record in manager.get_record_stream():
 
1011
            result_order.append(record.key)
 
1012
            text = self._texts[record.key]
 
1013
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
1014
        self.assertEqual([('key1',), ('key4',)], result_order)
 
1015
 
 
1016
    def test__check_rebuild_no_changes(self):
 
1017
        block, manager = self.make_block_and_full_manager(self._texts)
 
1018
        manager._check_rebuild_block()
 
1019
        self.assertIs(block, manager._block)
 
1020
 
 
1021
    def test__check_rebuild_only_one(self):
 
1022
        locations, block = self.make_block(self._texts)
 
1023
        manager = groupcompress._LazyGroupContentManager(block)
 
1024
        # Request just the first key, which should trigger a 'strip' action
 
1025
        self.add_key_to_manager(('key1',), locations, block, manager)
 
1026
        manager._check_rebuild_block()
 
1027
        self.assertIsNot(block, manager._block)
 
1028
        self.assertTrue(block._content_length > manager._block._content_length)
 
1029
        # We should be able to still get the content out of this block, though
 
1030
        # it should only have 1 entry
 
1031
        for record in manager.get_record_stream():
 
1032
            self.assertEqual(('key1',), record.key)
 
1033
            self.assertEqual(self._texts[record.key],
 
1034
                             record.get_bytes_as('fulltext'))
 
1035
 
 
1036
    def test__check_rebuild_middle(self):
 
1037
        locations, block = self.make_block(self._texts)
 
1038
        manager = groupcompress._LazyGroupContentManager(block)
 
1039
        # Request a small key in the middle should trigger a 'rebuild'
 
1040
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1041
        manager._check_rebuild_block()
 
1042
        self.assertIsNot(block, manager._block)
 
1043
        self.assertTrue(block._content_length > manager._block._content_length)
 
1044
        for record in manager.get_record_stream():
 
1045
            self.assertEqual(('key4',), record.key)
 
1046
            self.assertEqual(self._texts[record.key],
 
1047
                             record.get_bytes_as('fulltext'))
 
1048
 
 
1049
    def test_check_is_well_utilized_all_keys(self):
 
1050
        block, manager = self.make_block_and_full_manager(self._texts)
 
1051
        self.assertFalse(manager.check_is_well_utilized())
 
1052
        # Though we can fake it by changing the recommended minimum size
 
1053
        manager._full_enough_block_size = block._content_length
 
1054
        self.assertTrue(manager.check_is_well_utilized())
 
1055
        # Setting it just above causes it to fail
 
1056
        manager._full_enough_block_size = block._content_length + 1
 
1057
        self.assertFalse(manager.check_is_well_utilized())
 
1058
        # Setting the mixed-block size doesn't do anything, because the content
 
1059
        # is considered to not be 'mixed'
 
1060
        manager._full_enough_mixed_block_size = block._content_length
 
1061
        self.assertFalse(manager.check_is_well_utilized())
 
1062
 
 
1063
    def test_check_is_well_utilized_mixed_keys(self):
 
1064
        texts = {}
 
1065
        f1k1 = ('f1', 'k1')
 
1066
        f1k2 = ('f1', 'k2')
 
1067
        f2k1 = ('f2', 'k1')
 
1068
        f2k2 = ('f2', 'k2')
 
1069
        texts[f1k1] = self._texts[('key1',)]
 
1070
        texts[f1k2] = self._texts[('key2',)]
 
1071
        texts[f2k1] = self._texts[('key3',)]
 
1072
        texts[f2k2] = self._texts[('key4',)]
 
1073
        block, manager = self.make_block_and_full_manager(texts)
 
1074
        self.assertFalse(manager.check_is_well_utilized())
 
1075
        manager._full_enough_block_size = block._content_length
 
1076
        self.assertTrue(manager.check_is_well_utilized())
 
1077
        manager._full_enough_block_size = block._content_length + 1
 
1078
        self.assertFalse(manager.check_is_well_utilized())
 
1079
        manager._full_enough_mixed_block_size = block._content_length
 
1080
        self.assertTrue(manager.check_is_well_utilized())
 
1081
 
 
1082
    def test_check_is_well_utilized_partial_use(self):
 
1083
        locations, block = self.make_block(self._texts)
 
1084
        manager = groupcompress._LazyGroupContentManager(block)
 
1085
        manager._full_enough_block_size = block._content_length
 
1086
        self.add_key_to_manager(('key1',), locations, block, manager)
 
1087
        self.add_key_to_manager(('key2',), locations, block, manager)
 
1088
        # Just using the content from key1 and 2 is not enough to be considered
 
1089
        # 'complete'
 
1090
        self.assertFalse(manager.check_is_well_utilized())
 
1091
        # However if we add key3, then we have enough, as we only require 75%
 
1092
        # consumption
 
1093
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1094
        self.assertTrue(manager.check_is_well_utilized())
 
1095
 
 
1096
 
 
1097
class Test_GCBuildDetails(tests.TestCase):
 
1098
 
 
1099
    def test_acts_like_tuple(self):
 
1100
        # _GCBuildDetails inlines some of the data that used to be spread out
 
1101
        # across a bunch of tuples
 
1102
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
 
1103
            ('INDEX', 10, 20, 0, 5))
 
1104
        self.assertEqual(4, len(bd))
 
1105
        self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
 
1106
        self.assertEqual(None, bd[1]) # Compression Parent is always None
 
1107
        self.assertEqual((('parent1',), ('parent2',)), bd[2])
 
1108
        self.assertEqual(('group', None), bd[3]) # Record details
 
1109
 
 
1110
    def test__repr__(self):
 
1111
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
 
1112
            ('INDEX', 10, 20, 0, 5))
 
1113
        self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
 
1114
                         " (('parent1',), ('parent2',)))",
 
1115
                         repr(bd))
 
1116