~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

  • Committer: mbp at sourcefrog
  • Date: 2005-03-25 01:16:46 UTC
  • Revision ID: mbp@sourcefrog.net-20050325011646-e3f0af5d6bd1190c
- update version string
- put it in bzrlib

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2008-2011 Canonical Ltd
2
 
#
3
 
# This program is free software; you can redistribute it and/or modify
4
 
# it under the terms of the GNU General Public License as published by
5
 
# the Free Software Foundation; either version 2 of the License, or
6
 
# (at your option) any later version.
7
 
#
8
 
# This program is distributed in the hope that it will be useful,
9
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
 
# GNU General Public License for more details.
12
 
#
13
 
# You should have received a copy of the GNU General Public License
14
 
# along with this program; if not, write to the Free Software
15
 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
 
 
17
 
"""Tests for group compression."""
18
 
 
19
 
import zlib
20
 
 
21
 
from bzrlib import (
22
 
    btree_index,
23
 
    groupcompress,
24
 
    errors,
25
 
    index as _mod_index,
26
 
    osutils,
27
 
    tests,
28
 
    trace,
29
 
    versionedfile,
30
 
    )
31
 
from bzrlib.osutils import sha_string
32
 
from bzrlib.tests.test__groupcompress import compiled_groupcompress_feature
33
 
from bzrlib.tests.scenarios import load_tests_apply_scenarios
34
 
 
35
 
 
36
 
def group_compress_implementation_scenarios():
37
 
    scenarios = [
38
 
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
39
 
        ]
40
 
    if compiled_groupcompress_feature.available():
41
 
        scenarios.append(('C',
42
 
            {'compressor': groupcompress.PyrexGroupCompressor}))
43
 
    return scenarios
44
 
 
45
 
 
46
 
load_tests = load_tests_apply_scenarios
47
 
 
48
 
 
49
 
class TestGroupCompressor(tests.TestCase):
50
 
 
51
 
    def _chunks_to_repr_lines(self, chunks):
52
 
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
53
 
 
54
 
    def assertEqualDiffEncoded(self, expected, actual):
55
 
        """Compare the actual content to the expected content.
56
 
 
57
 
        :param expected: A group of chunks that we expect to see
58
 
        :param actual: The measured 'chunks'
59
 
 
60
 
        We will transform the chunks back into lines, and then run 'repr()'
61
 
        over them to handle non-ascii characters.
62
 
        """
63
 
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
64
 
                             self._chunks_to_repr_lines(actual))
65
 
 
66
 
 
67
 
class TestAllGroupCompressors(TestGroupCompressor):
68
 
    """Tests for GroupCompressor"""
69
 
 
70
 
    scenarios = group_compress_implementation_scenarios()
71
 
    compressor = None # Set by scenario
72
 
 
73
 
    def test_empty_delta(self):
74
 
        compressor = self.compressor()
75
 
        self.assertEqual([], compressor.chunks)
76
 
 
77
 
    def test_one_nosha_delta(self):
78
 
        # diff against NUKK
79
 
        compressor = self.compressor()
80
 
        sha1, start_point, end_point, _ = compressor.compress(('label',),
81
 
            'strange\ncommon\n', None)
82
 
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
83
 
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
84
 
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
85
 
        self.assertEqual(0, start_point)
86
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
87
 
 
88
 
    def test_empty_content(self):
89
 
        compressor = self.compressor()
90
 
        # Adding empty bytes should return the 'null' record
91
 
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
92
 
                                                                 '', None)
93
 
        self.assertEqual(0, start_point)
94
 
        self.assertEqual(0, end_point)
95
 
        self.assertEqual('fulltext', kind)
96
 
        self.assertEqual(groupcompress._null_sha1, sha1)
97
 
        self.assertEqual(0, compressor.endpoint)
98
 
        self.assertEqual([], compressor.chunks)
99
 
        # Even after adding some content
100
 
        compressor.compress(('content',), 'some\nbytes\n', None)
101
 
        self.assertTrue(compressor.endpoint > 0)
102
 
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
103
 
                                                                 '', None)
104
 
        self.assertEqual(0, start_point)
105
 
        self.assertEqual(0, end_point)
106
 
        self.assertEqual('fulltext', kind)
107
 
        self.assertEqual(groupcompress._null_sha1, sha1)
108
 
 
109
 
    def test_extract_from_compressor(self):
110
 
        # Knit fetching will try to reconstruct texts locally which results in
111
 
        # reading something that is in the compressor stream already.
112
 
        compressor = self.compressor()
113
 
        sha1_1, _, _, _ = compressor.compress(('label',),
114
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
115
 
        expected_lines = list(compressor.chunks)
116
 
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
117
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
118
 
        # get the first out
119
 
        self.assertEqual(('strange\ncommon long line\n'
120
 
                          'that needs a 16 byte match\n', sha1_1),
121
 
                         compressor.extract(('label',)))
122
 
        # and the second
123
 
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
124
 
                          'different\n', sha1_2),
125
 
                         compressor.extract(('newlabel',)))
126
 
 
127
 
    def test_pop_last(self):
128
 
        compressor = self.compressor()
129
 
        _, _, _, _ = compressor.compress(('key1',),
130
 
            'some text\nfor the first entry\n', None)
131
 
        expected_lines = list(compressor.chunks)
132
 
        _, _, _, _ = compressor.compress(('key2',),
133
 
            'some text\nfor the second entry\n', None)
134
 
        compressor.pop_last()
135
 
        self.assertEqual(expected_lines, compressor.chunks)
136
 
 
137
 
 
138
 
class TestPyrexGroupCompressor(TestGroupCompressor):
139
 
 
140
 
    _test_needs_features = [compiled_groupcompress_feature]
141
 
    compressor = groupcompress.PyrexGroupCompressor
142
 
 
143
 
    def test_stats(self):
144
 
        compressor = self.compressor()
145
 
        compressor.compress(('label',),
146
 
                            'strange\n'
147
 
                            'common very very long line\n'
148
 
                            'plus more text\n', None)
149
 
        compressor.compress(('newlabel',),
150
 
                            'common very very long line\n'
151
 
                            'plus more text\n'
152
 
                            'different\n'
153
 
                            'moredifferent\n', None)
154
 
        compressor.compress(('label3',),
155
 
                            'new\n'
156
 
                            'common very very long line\n'
157
 
                            'plus more text\n'
158
 
                            'different\n'
159
 
                            'moredifferent\n', None)
160
 
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
161
 
 
162
 
    def test_two_nosha_delta(self):
163
 
        compressor = self.compressor()
164
 
        sha1_1, _, _, _ = compressor.compress(('label',),
165
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
166
 
        expected_lines = list(compressor.chunks)
167
 
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
168
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
169
 
        self.assertEqual(sha_string('common long line\n'
170
 
                                    'that needs a 16 byte match\n'
171
 
                                    'different\n'), sha1_2)
172
 
        expected_lines.extend([
173
 
            # 'delta', delta length
174
 
            'd\x0f',
175
 
            # source and target length
176
 
            '\x36',
177
 
            # copy the line common
178
 
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
179
 
            # add the line different, and the trailing newline
180
 
            '\x0adifferent\n', # insert 10 bytes
181
 
            ])
182
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
183
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
184
 
 
185
 
    def test_three_nosha_delta(self):
186
 
        # The first interesting test: make a change that should use lines from
187
 
        # both parents.
188
 
        compressor = self.compressor()
189
 
        sha1_1, _, _, _ = compressor.compress(('label',),
190
 
            'strange\ncommon very very long line\nwith some extra text\n', None)
191
 
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
192
 
            'different\nmoredifferent\nand then some more\n', None)
193
 
        expected_lines = list(compressor.chunks)
194
 
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
195
 
            'new\ncommon very very long line\nwith some extra text\n'
196
 
            'different\nmoredifferent\nand then some more\n',
197
 
            None)
198
 
        self.assertEqual(
199
 
            sha_string('new\ncommon very very long line\nwith some extra text\n'
200
 
                       'different\nmoredifferent\nand then some more\n'),
201
 
            sha1_3)
202
 
        expected_lines.extend([
203
 
            # 'delta', delta length
204
 
            'd\x0b',
205
 
            # source and target length
206
 
            '\x5f'
207
 
            # insert new
208
 
            '\x03new',
209
 
            # Copy of first parent 'common' range
210
 
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
211
 
            # Copy of second parent 'different' range
212
 
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
213
 
            ])
214
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
215
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
216
 
 
217
 
 
218
 
class TestPythonGroupCompressor(TestGroupCompressor):
219
 
 
220
 
    compressor = groupcompress.PythonGroupCompressor
221
 
 
222
 
    def test_stats(self):
223
 
        compressor = self.compressor()
224
 
        compressor.compress(('label',),
225
 
                            'strange\n'
226
 
                            'common very very long line\n'
227
 
                            'plus more text\n', None)
228
 
        compressor.compress(('newlabel',),
229
 
                            'common very very long line\n'
230
 
                            'plus more text\n'
231
 
                            'different\n'
232
 
                            'moredifferent\n', None)
233
 
        compressor.compress(('label3',),
234
 
                            'new\n'
235
 
                            'common very very long line\n'
236
 
                            'plus more text\n'
237
 
                            'different\n'
238
 
                            'moredifferent\n', None)
239
 
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
240
 
 
241
 
    def test_two_nosha_delta(self):
242
 
        compressor = self.compressor()
243
 
        sha1_1, _, _, _ = compressor.compress(('label',),
244
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
245
 
        expected_lines = list(compressor.chunks)
246
 
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
247
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
248
 
        self.assertEqual(sha_string('common long line\n'
249
 
                                    'that needs a 16 byte match\n'
250
 
                                    'different\n'), sha1_2)
251
 
        expected_lines.extend([
252
 
            # 'delta', delta length
253
 
            'd\x0f',
254
 
            # target length
255
 
            '\x36',
256
 
            # copy the line common
257
 
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
258
 
            # add the line different, and the trailing newline
259
 
            '\x0adifferent\n', # insert 10 bytes
260
 
            ])
261
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
262
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
263
 
 
264
 
    def test_three_nosha_delta(self):
265
 
        # The first interesting test: make a change that should use lines from
266
 
        # both parents.
267
 
        compressor = self.compressor()
268
 
        sha1_1, _, _, _ = compressor.compress(('label',),
269
 
            'strange\ncommon very very long line\nwith some extra text\n', None)
270
 
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
271
 
            'different\nmoredifferent\nand then some more\n', None)
272
 
        expected_lines = list(compressor.chunks)
273
 
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
274
 
            'new\ncommon very very long line\nwith some extra text\n'
275
 
            'different\nmoredifferent\nand then some more\n',
276
 
            None)
277
 
        self.assertEqual(
278
 
            sha_string('new\ncommon very very long line\nwith some extra text\n'
279
 
                       'different\nmoredifferent\nand then some more\n'),
280
 
            sha1_3)
281
 
        expected_lines.extend([
282
 
            # 'delta', delta length
283
 
            'd\x0c',
284
 
            # target length
285
 
            '\x5f'
286
 
            # insert new
287
 
            '\x04new\n',
288
 
            # Copy of first parent 'common' range
289
 
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
290
 
            # Copy of second parent 'different' range
291
 
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
292
 
            ])
293
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
294
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
295
 
 
296
 
 
297
 
class TestGroupCompressBlock(tests.TestCase):
298
 
 
299
 
    def make_block(self, key_to_text):
300
 
        """Create a GroupCompressBlock, filling it with the given texts."""
301
 
        compressor = groupcompress.GroupCompressor()
302
 
        start = 0
303
 
        for key in sorted(key_to_text):
304
 
            compressor.compress(key, key_to_text[key], None)
305
 
        locs = dict((key, (start, end)) for key, (start, _, end, _)
306
 
                    in compressor.labels_deltas.iteritems())
307
 
        block = compressor.flush()
308
 
        raw_bytes = block.to_bytes()
309
 
        # Go through from_bytes(to_bytes()) so that we start with a compressed
310
 
        # content object
311
 
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
312
 
 
313
 
    def test_from_empty_bytes(self):
314
 
        self.assertRaises(ValueError,
315
 
                          groupcompress.GroupCompressBlock.from_bytes, '')
316
 
 
317
 
    def test_from_minimal_bytes(self):
318
 
        block = groupcompress.GroupCompressBlock.from_bytes(
319
 
            'gcb1z\n0\n0\n')
320
 
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
321
 
        self.assertIs(None, block._content)
322
 
        self.assertEqual('', block._z_content)
323
 
        block._ensure_content()
324
 
        self.assertEqual('', block._content)
325
 
        self.assertEqual('', block._z_content)
326
 
        block._ensure_content() # Ensure content is safe to call 2x
327
 
 
328
 
    def test_from_invalid(self):
329
 
        self.assertRaises(ValueError,
330
 
                          groupcompress.GroupCompressBlock.from_bytes,
331
 
                          'this is not a valid header')
332
 
 
333
 
    def test_from_bytes(self):
334
 
        content = ('a tiny bit of content\n')
335
 
        z_content = zlib.compress(content)
336
 
        z_bytes = (
337
 
            'gcb1z\n' # group compress block v1 plain
338
 
            '%d\n' # Length of compressed content
339
 
            '%d\n' # Length of uncompressed content
340
 
            '%s'   # Compressed content
341
 
            ) % (len(z_content), len(content), z_content)
342
 
        block = groupcompress.GroupCompressBlock.from_bytes(
343
 
            z_bytes)
344
 
        self.assertEqual(z_content, block._z_content)
345
 
        self.assertIs(None, block._content)
346
 
        self.assertEqual(len(z_content), block._z_content_length)
347
 
        self.assertEqual(len(content), block._content_length)
348
 
        block._ensure_content()
349
 
        self.assertEqual(z_content, block._z_content)
350
 
        self.assertEqual(content, block._content)
351
 
 
352
 
    def test_to_chunks(self):
353
 
        content_chunks = ['this is some content\n',
354
 
                          'this content will be compressed\n']
355
 
        content_len = sum(map(len, content_chunks))
356
 
        content = ''.join(content_chunks)
357
 
        gcb = groupcompress.GroupCompressBlock()
358
 
        gcb.set_chunked_content(content_chunks, content_len)
359
 
        total_len, block_chunks = gcb.to_chunks()
360
 
        block_bytes = ''.join(block_chunks)
361
 
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
362
 
        self.assertEqual(total_len, len(block_bytes))
363
 
        self.assertEqual(gcb._content_length, content_len)
364
 
        expected_header =('gcb1z\n' # group compress block v1 zlib
365
 
                          '%d\n' # Length of compressed content
366
 
                          '%d\n' # Length of uncompressed content
367
 
                         ) % (gcb._z_content_length, gcb._content_length)
368
 
        # The first chunk should be the header chunk. It is small, fixed size,
369
 
        # and there is no compelling reason to split it up
370
 
        self.assertEqual(expected_header, block_chunks[0])
371
 
        self.assertStartsWith(block_bytes, expected_header)
372
 
        remaining_bytes = block_bytes[len(expected_header):]
373
 
        raw_bytes = zlib.decompress(remaining_bytes)
374
 
        self.assertEqual(content, raw_bytes)
375
 
 
376
 
    def test_to_bytes(self):
377
 
        content = ('this is some content\n'
378
 
                   'this content will be compressed\n')
379
 
        gcb = groupcompress.GroupCompressBlock()
380
 
        gcb.set_content(content)
381
 
        bytes = gcb.to_bytes()
382
 
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
383
 
        self.assertEqual(gcb._content_length, len(content))
384
 
        expected_header =('gcb1z\n' # group compress block v1 zlib
385
 
                          '%d\n' # Length of compressed content
386
 
                          '%d\n' # Length of uncompressed content
387
 
                         ) % (gcb._z_content_length, gcb._content_length)
388
 
        self.assertStartsWith(bytes, expected_header)
389
 
        remaining_bytes = bytes[len(expected_header):]
390
 
        raw_bytes = zlib.decompress(remaining_bytes)
391
 
        self.assertEqual(content, raw_bytes)
392
 
 
393
 
        # we should get the same results if using the chunked version
394
 
        gcb = groupcompress.GroupCompressBlock()
395
 
        gcb.set_chunked_content(['this is some content\n'
396
 
                                 'this content will be compressed\n'],
397
 
                                 len(content))
398
 
        old_bytes = bytes
399
 
        bytes = gcb.to_bytes()
400
 
        self.assertEqual(old_bytes, bytes)
401
 
 
402
 
    def test_partial_decomp(self):
403
 
        content_chunks = []
404
 
        # We need a sufficient amount of data so that zlib.decompress has
405
 
        # partial decompression to work with. Most auto-generated data
406
 
        # compresses a bit too well, we want a combination, so we combine a sha
407
 
        # hash with compressible data.
408
 
        for i in xrange(2048):
409
 
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
410
 
            content_chunks.append(next_content)
411
 
            next_sha1 = osutils.sha_string(next_content)
412
 
            content_chunks.append(next_sha1 + '\n')
413
 
        content = ''.join(content_chunks)
414
 
        self.assertEqual(158634, len(content))
415
 
        z_content = zlib.compress(content)
416
 
        self.assertEqual(57182, len(z_content))
417
 
        block = groupcompress.GroupCompressBlock()
418
 
        block._z_content_chunks = (z_content,)
419
 
        block._z_content_length = len(z_content)
420
 
        block._compressor_name = 'zlib'
421
 
        block._content_length = 158634
422
 
        self.assertIs(None, block._content)
423
 
        block._ensure_content(100)
424
 
        self.assertIsNot(None, block._content)
425
 
        # We have decompressed at least 100 bytes
426
 
        self.assertTrue(len(block._content) >= 100)
427
 
        # We have not decompressed the whole content
428
 
        self.assertTrue(len(block._content) < 158634)
429
 
        self.assertEqualDiff(content[:len(block._content)], block._content)
430
 
        # ensuring content that we already have shouldn't cause any more data
431
 
        # to be extracted
432
 
        cur_len = len(block._content)
433
 
        block._ensure_content(cur_len - 10)
434
 
        self.assertEqual(cur_len, len(block._content))
435
 
        # Now we want a bit more content
436
 
        cur_len += 10
437
 
        block._ensure_content(cur_len)
438
 
        self.assertTrue(len(block._content) >= cur_len)
439
 
        self.assertTrue(len(block._content) < 158634)
440
 
        self.assertEqualDiff(content[:len(block._content)], block._content)
441
 
        # And now lets finish
442
 
        block._ensure_content(158634)
443
 
        self.assertEqualDiff(content, block._content)
444
 
        # And the decompressor is finalized
445
 
        self.assertIs(None, block._z_content_decompressor)
446
 
 
447
 
    def test__ensure_all_content(self):
448
 
        content_chunks = []
449
 
        # We need a sufficient amount of data so that zlib.decompress has
450
 
        # partial decompression to work with. Most auto-generated data
451
 
        # compresses a bit too well, we want a combination, so we combine a sha
452
 
        # hash with compressible data.
453
 
        for i in xrange(2048):
454
 
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
455
 
            content_chunks.append(next_content)
456
 
            next_sha1 = osutils.sha_string(next_content)
457
 
            content_chunks.append(next_sha1 + '\n')
458
 
        content = ''.join(content_chunks)
459
 
        self.assertEqual(158634, len(content))
460
 
        z_content = zlib.compress(content)
461
 
        self.assertEqual(57182, len(z_content))
462
 
        block = groupcompress.GroupCompressBlock()
463
 
        block._z_content_chunks = (z_content,)
464
 
        block._z_content_length = len(z_content)
465
 
        block._compressor_name = 'zlib'
466
 
        block._content_length = 158634
467
 
        self.assertIs(None, block._content)
468
 
        # The first _ensure_content got all of the required data
469
 
        block._ensure_content(158634)
470
 
        self.assertEqualDiff(content, block._content)
471
 
        # And we should have released the _z_content_decompressor since it was
472
 
        # fully consumed
473
 
        self.assertIs(None, block._z_content_decompressor)
474
 
 
475
 
    def test__dump(self):
476
 
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
477
 
        key_to_text = {('1',): dup_content + '1 unique\n',
478
 
                       ('2',): dup_content + '2 extra special\n'}
479
 
        locs, block = self.make_block(key_to_text)
480
 
        self.assertEqual([('f', len(key_to_text[('1',)])),
481
 
                          ('d', 21, len(key_to_text[('2',)]),
482
 
                           [('c', 2, len(dup_content)),
483
 
                            ('i', len('2 extra special\n'), '')
484
 
                           ]),
485
 
                         ], block._dump())
486
 
 
487
 
 
488
 
class TestCaseWithGroupCompressVersionedFiles(
489
 
        tests.TestCaseWithMemoryTransport):
490
 
 
491
 
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
492
 
                     dir='.', inconsistency_fatal=True):
493
 
        t = self.get_transport(dir)
494
 
        t.ensure_base()
495
 
        vf = groupcompress.make_pack_factory(graph=create_graph,
496
 
            delta=False, keylength=keylength,
497
 
            inconsistency_fatal=inconsistency_fatal)(t)
498
 
        if do_cleanup:
499
 
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
500
 
        return vf
501
 
 
502
 
 
503
 
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
504
 
 
505
 
    def make_g_index(self, name, ref_lists=0, nodes=[]):
506
 
        builder = btree_index.BTreeBuilder(ref_lists)
507
 
        for node, references, value in nodes:
508
 
            builder.add_node(node, references, value)
509
 
        stream = builder.finish()
510
 
        trans = self.get_transport()
511
 
        size = trans.put_file(name, stream)
512
 
        return btree_index.BTreeGraphIndex(trans, name, size)
513
 
 
514
 
    def make_g_index_missing_parent(self):
515
 
        graph_index = self.make_g_index('missing_parent', 1,
516
 
            [(('parent', ), '2 78 2 10', ([],)),
517
 
             (('tip', ), '2 78 2 10',
518
 
              ([('parent', ), ('missing-parent', )],)),
519
 
              ])
520
 
        return graph_index
521
 
 
522
 
    def test_get_record_stream_as_requested(self):
523
 
        # Consider promoting 'as-requested' to general availability, and
524
 
        # make this a VF interface test
525
 
        vf = self.make_test_vf(False, dir='source')
526
 
        vf.add_lines(('a',), (), ['lines\n'])
527
 
        vf.add_lines(('b',), (), ['lines\n'])
528
 
        vf.add_lines(('c',), (), ['lines\n'])
529
 
        vf.add_lines(('d',), (), ['lines\n'])
530
 
        vf.writer.end()
531
 
        keys = [record.key for record in vf.get_record_stream(
532
 
                    [('a',), ('b',), ('c',), ('d',)],
533
 
                    'as-requested', False)]
534
 
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
535
 
        keys = [record.key for record in vf.get_record_stream(
536
 
                    [('b',), ('a',), ('d',), ('c',)],
537
 
                    'as-requested', False)]
538
 
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
539
 
 
540
 
        # It should work even after being repacked into another VF
541
 
        vf2 = self.make_test_vf(False, dir='target')
542
 
        vf2.insert_record_stream(vf.get_record_stream(
543
 
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
544
 
        vf2.writer.end()
545
 
 
546
 
        keys = [record.key for record in vf2.get_record_stream(
547
 
                    [('a',), ('b',), ('c',), ('d',)],
548
 
                    'as-requested', False)]
549
 
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
550
 
        keys = [record.key for record in vf2.get_record_stream(
551
 
                    [('b',), ('a',), ('d',), ('c',)],
552
 
                    'as-requested', False)]
553
 
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
554
 
 
555
 
    def test_insert_record_stream_reuses_blocks(self):
556
 
        vf = self.make_test_vf(True, dir='source')
557
 
        def grouped_stream(revision_ids, first_parents=()):
558
 
            parents = first_parents
559
 
            for revision_id in revision_ids:
560
 
                key = (revision_id,)
561
 
                record = versionedfile.FulltextContentFactory(
562
 
                    key, parents, None,
563
 
                    'some content that is\n'
564
 
                    'identical except for\n'
565
 
                    'revision_id:%s\n' % (revision_id,))
566
 
                yield record
567
 
                parents = (key,)
568
 
        # One group, a-d
569
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
570
 
        # Second group, e-h
571
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
572
 
                                               first_parents=(('d',),)))
573
 
        block_bytes = {}
574
 
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
575
 
                                      'unordered', False)
576
 
        num_records = 0
577
 
        for record in stream:
578
 
            if record.key in [('a',), ('e',)]:
579
 
                self.assertEqual('groupcompress-block', record.storage_kind)
580
 
            else:
581
 
                self.assertEqual('groupcompress-block-ref',
582
 
                                 record.storage_kind)
583
 
            block_bytes[record.key] = record._manager._block._z_content
584
 
            num_records += 1
585
 
        self.assertEqual(8, num_records)
586
 
        for r in 'abcd':
587
 
            key = (r,)
588
 
            self.assertIs(block_bytes[key], block_bytes[('a',)])
589
 
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
590
 
        for r in 'efgh':
591
 
            key = (r,)
592
 
            self.assertIs(block_bytes[key], block_bytes[('e',)])
593
 
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
594
 
        # Now copy the blocks into another vf, and ensure that the blocks are
595
 
        # preserved without creating new entries
596
 
        vf2 = self.make_test_vf(True, dir='target')
597
 
        # ordering in 'groupcompress' order, should actually swap the groups in
598
 
        # the target vf, but the groups themselves should not be disturbed.
599
 
        def small_size_stream():
600
 
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
601
 
                                               'groupcompress', False):
602
 
                record._manager._full_enough_block_size = \
603
 
                    record._manager._block._content_length
604
 
                yield record
605
 
                        
606
 
        vf2.insert_record_stream(small_size_stream())
607
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
608
 
                                       'groupcompress', False)
609
 
        vf2.writer.end()
610
 
        num_records = 0
611
 
        for record in stream:
612
 
            num_records += 1
613
 
            self.assertEqual(block_bytes[record.key],
614
 
                             record._manager._block._z_content)
615
 
        self.assertEqual(8, num_records)
616
 
 
617
 
    def test_insert_record_stream_packs_on_the_fly(self):
618
 
        vf = self.make_test_vf(True, dir='source')
619
 
        def grouped_stream(revision_ids, first_parents=()):
620
 
            parents = first_parents
621
 
            for revision_id in revision_ids:
622
 
                key = (revision_id,)
623
 
                record = versionedfile.FulltextContentFactory(
624
 
                    key, parents, None,
625
 
                    'some content that is\n'
626
 
                    'identical except for\n'
627
 
                    'revision_id:%s\n' % (revision_id,))
628
 
                yield record
629
 
                parents = (key,)
630
 
        # One group, a-d
631
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
632
 
        # Second group, e-h
633
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
634
 
                                               first_parents=(('d',),)))
635
 
        # Now copy the blocks into another vf, and see that the
636
 
        # insert_record_stream rebuilt a new block on-the-fly because of
637
 
        # under-utilization
638
 
        vf2 = self.make_test_vf(True, dir='target')
639
 
        vf2.insert_record_stream(vf.get_record_stream(
640
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
641
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
642
 
                                       'groupcompress', False)
643
 
        vf2.writer.end()
644
 
        num_records = 0
645
 
        # All of the records should be recombined into a single block
646
 
        block = None
647
 
        for record in stream:
648
 
            num_records += 1
649
 
            if block is None:
650
 
                block = record._manager._block
651
 
            else:
652
 
                self.assertIs(block, record._manager._block)
653
 
        self.assertEqual(8, num_records)
654
 
 
655
 
    def test__insert_record_stream_no_reuse_block(self):
656
 
        vf = self.make_test_vf(True, dir='source')
657
 
        def grouped_stream(revision_ids, first_parents=()):
658
 
            parents = first_parents
659
 
            for revision_id in revision_ids:
660
 
                key = (revision_id,)
661
 
                record = versionedfile.FulltextContentFactory(
662
 
                    key, parents, None,
663
 
                    'some content that is\n'
664
 
                    'identical except for\n'
665
 
                    'revision_id:%s\n' % (revision_id,))
666
 
                yield record
667
 
                parents = (key,)
668
 
        # One group, a-d
669
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
670
 
        # Second group, e-h
671
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
672
 
                                               first_parents=(('d',),)))
673
 
        vf.writer.end()
674
 
        self.assertEqual(8, len(list(vf.get_record_stream(
675
 
                                        [(r,) for r in 'abcdefgh'],
676
 
                                        'unordered', False))))
677
 
        # Now copy the blocks into another vf, and ensure that the blocks are
678
 
        # preserved without creating new entries
679
 
        vf2 = self.make_test_vf(True, dir='target')
680
 
        # ordering in 'groupcompress' order, should actually swap the groups in
681
 
        # the target vf, but the groups themselves should not be disturbed.
682
 
        list(vf2._insert_record_stream(vf.get_record_stream(
683
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
684
 
            reuse_blocks=False))
685
 
        vf2.writer.end()
686
 
        # After inserting with reuse_blocks=False, we should have everything in
687
 
        # a single new block.
688
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
689
 
                                       'groupcompress', False)
690
 
        block = None
691
 
        for record in stream:
692
 
            if block is None:
693
 
                block = record._manager._block
694
 
            else:
695
 
                self.assertIs(block, record._manager._block)
696
 
 
697
 
    def test_add_missing_noncompression_parent_unvalidated_index(self):
698
 
        unvalidated = self.make_g_index_missing_parent()
699
 
        combined = _mod_index.CombinedGraphIndex([unvalidated])
700
 
        index = groupcompress._GCGraphIndex(combined,
701
 
            is_locked=lambda: True, parents=True,
702
 
            track_external_parent_refs=True)
703
 
        index.scan_unvalidated_index(unvalidated)
704
 
        self.assertEqual(
705
 
            frozenset([('missing-parent',)]), index.get_missing_parents())
706
 
 
707
 
    def test_track_external_parent_refs(self):
708
 
        g_index = self.make_g_index('empty', 1, [])
709
 
        mod_index = btree_index.BTreeBuilder(1, 1)
710
 
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
711
 
        index = groupcompress._GCGraphIndex(combined,
712
 
            is_locked=lambda: True, parents=True,
713
 
            add_callback=mod_index.add_nodes,
714
 
            track_external_parent_refs=True)
715
 
        index.add_records([
716
 
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
717
 
        self.assertEqual(
718
 
            frozenset([('parent-1',), ('parent-2',)]),
719
 
            index.get_missing_parents())
720
 
 
721
 
    def make_source_with_b(self, a_parent, path):
722
 
        source = self.make_test_vf(True, dir=path)
723
 
        source.add_lines(('a',), (), ['lines\n'])
724
 
        if a_parent:
725
 
            b_parents = (('a',),)
726
 
        else:
727
 
            b_parents = ()
728
 
        source.add_lines(('b',), b_parents, ['lines\n'])
729
 
        return source
730
 
 
731
 
    def do_inconsistent_inserts(self, inconsistency_fatal):
732
 
        target = self.make_test_vf(True, dir='target',
733
 
                                   inconsistency_fatal=inconsistency_fatal)
734
 
        for x in range(2):
735
 
            source = self.make_source_with_b(x==1, 'source%s' % x)
736
 
            target.insert_record_stream(source.get_record_stream(
737
 
                [('b',)], 'unordered', False))
738
 
 
739
 
    def test_inconsistent_redundant_inserts_warn(self):
740
 
        """Should not insert a record that is already present."""
741
 
        warnings = []
742
 
        def warning(template, args):
743
 
            warnings.append(template % args)
744
 
        _trace_warning = trace.warning
745
 
        trace.warning = warning
746
 
        try:
747
 
            self.do_inconsistent_inserts(inconsistency_fatal=False)
748
 
        finally:
749
 
            trace.warning = _trace_warning
750
 
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
751
 
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
752
 
                         warnings)
753
 
 
754
 
    def test_inconsistent_redundant_inserts_raises(self):
755
 
        e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
756
 
                              inconsistency_fatal=True)
757
 
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
758
 
                              " in add_records:"
759
 
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
760
 
                              " 0 8', \(\(\('a',\),\),\)\)")
761
 
 
762
 
    def test_clear_cache(self):
763
 
        vf = self.make_source_with_b(True, 'source')
764
 
        vf.writer.end()
765
 
        for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
766
 
                                           True):
767
 
            pass
768
 
        self.assertTrue(len(vf._group_cache) > 0)
769
 
        vf.clear_cache()
770
 
        self.assertEqual(0, len(vf._group_cache))
771
 
 
772
 
 
773
 
 
774
 
class StubGCVF(object):
775
 
    def __init__(self, canned_get_blocks=None):
776
 
        self._group_cache = {}
777
 
        self._canned_get_blocks = canned_get_blocks or []
778
 
    def _get_blocks(self, read_memos):
779
 
        return iter(self._canned_get_blocks)
780
 
    
781
 
 
782
 
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
783
 
    """Simple whitebox unit tests for _BatchingBlockFetcher."""
784
 
    
785
 
    def test_add_key_new_read_memo(self):
786
 
        """Adding a key with an uncached read_memo new to this batch adds that
787
 
        read_memo to the list of memos to fetch.
788
 
        """
789
 
        # locations are: index_memo, ignored, parents, ignored
790
 
        # where index_memo is: (idx, offset, len, factory_start, factory_end)
791
 
        # and (idx, offset, size) is known as the 'read_memo', identifying the
792
 
        # raw bytes needed.
793
 
        read_memo = ('fake index', 100, 50)
794
 
        locations = {
795
 
            ('key',): (read_memo + (None, None), None, None, None)}
796
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
797
 
        total_size = batcher.add_key(('key',))
798
 
        self.assertEqual(50, total_size)
799
 
        self.assertEqual([('key',)], batcher.keys)
800
 
        self.assertEqual([read_memo], batcher.memos_to_get)
801
 
 
802
 
    def test_add_key_duplicate_read_memo(self):
803
 
        """read_memos that occur multiple times in a batch will only be fetched
804
 
        once.
805
 
        """
806
 
        read_memo = ('fake index', 100, 50)
807
 
        # Two keys, both sharing the same read memo (but different overall
808
 
        # index_memos).
809
 
        locations = {
810
 
            ('key1',): (read_memo + (0, 1), None, None, None),
811
 
            ('key2',): (read_memo + (1, 2), None, None, None)}
812
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
813
 
        total_size = batcher.add_key(('key1',))
814
 
        total_size = batcher.add_key(('key2',))
815
 
        self.assertEqual(50, total_size)
816
 
        self.assertEqual([('key1',), ('key2',)], batcher.keys)
817
 
        self.assertEqual([read_memo], batcher.memos_to_get)
818
 
 
819
 
    def test_add_key_cached_read_memo(self):
820
 
        """Adding a key with a cached read_memo will not cause that read_memo
821
 
        to be added to the list to fetch.
822
 
        """
823
 
        read_memo = ('fake index', 100, 50)
824
 
        gcvf = StubGCVF()
825
 
        gcvf._group_cache[read_memo] = 'fake block'
826
 
        locations = {
827
 
            ('key',): (read_memo + (None, None), None, None, None)}
828
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
829
 
        total_size = batcher.add_key(('key',))
830
 
        self.assertEqual(0, total_size)
831
 
        self.assertEqual([('key',)], batcher.keys)
832
 
        self.assertEqual([], batcher.memos_to_get)
833
 
 
834
 
    def test_yield_factories_empty(self):
835
 
        """An empty batch yields no factories."""
836
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
837
 
        self.assertEqual([], list(batcher.yield_factories()))
838
 
 
839
 
    def test_yield_factories_calls_get_blocks(self):
840
 
        """Uncached memos are retrieved via get_blocks."""
841
 
        read_memo1 = ('fake index', 100, 50)
842
 
        read_memo2 = ('fake index', 150, 40)
843
 
        gcvf = StubGCVF(
844
 
            canned_get_blocks=[
845
 
                (read_memo1, groupcompress.GroupCompressBlock()),
846
 
                (read_memo2, groupcompress.GroupCompressBlock())])
847
 
        locations = {
848
 
            ('key1',): (read_memo1 + (None, None), None, None, None),
849
 
            ('key2',): (read_memo2 + (None, None), None, None, None)}
850
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
851
 
        batcher.add_key(('key1',))
852
 
        batcher.add_key(('key2',))
853
 
        factories = list(batcher.yield_factories(full_flush=True))
854
 
        self.assertLength(2, factories)
855
 
        keys = [f.key for f in factories]
856
 
        kinds = [f.storage_kind for f in factories]
857
 
        self.assertEqual([('key1',), ('key2',)], keys)
858
 
        self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
859
 
 
860
 
    def test_yield_factories_flushing(self):
861
 
        """yield_factories holds back on yielding results from the final block
862
 
        unless passed full_flush=True.
863
 
        """
864
 
        fake_block = groupcompress.GroupCompressBlock()
865
 
        read_memo = ('fake index', 100, 50)
866
 
        gcvf = StubGCVF()
867
 
        gcvf._group_cache[read_memo] = fake_block
868
 
        locations = {
869
 
            ('key',): (read_memo + (None, None), None, None, None)}
870
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
871
 
        batcher.add_key(('key',))
872
 
        self.assertEqual([], list(batcher.yield_factories()))
873
 
        factories = list(batcher.yield_factories(full_flush=True))
874
 
        self.assertLength(1, factories)
875
 
        self.assertEqual(('key',), factories[0].key)
876
 
        self.assertEqual('groupcompress-block', factories[0].storage_kind)
877
 
 
878
 
 
879
 
class TestLazyGroupCompress(tests.TestCaseWithTransport):
880
 
 
881
 
    _texts = {
882
 
        ('key1',): "this is a text\n"
883
 
                   "with a reasonable amount of compressible bytes\n"
884
 
                   "which can be shared between various other texts\n",
885
 
        ('key2',): "another text\n"
886
 
                   "with a reasonable amount of compressible bytes\n"
887
 
                   "which can be shared between various other texts\n",
888
 
        ('key3',): "yet another text which won't be extracted\n"
889
 
                   "with a reasonable amount of compressible bytes\n"
890
 
                   "which can be shared between various other texts\n",
891
 
        ('key4',): "this will be extracted\n"
892
 
                   "but references most of its bytes from\n"
893
 
                   "yet another text which won't be extracted\n"
894
 
                   "with a reasonable amount of compressible bytes\n"
895
 
                   "which can be shared between various other texts\n",
896
 
    }
897
 
    def make_block(self, key_to_text):
898
 
        """Create a GroupCompressBlock, filling it with the given texts."""
899
 
        compressor = groupcompress.GroupCompressor()
900
 
        start = 0
901
 
        for key in sorted(key_to_text):
902
 
            compressor.compress(key, key_to_text[key], None)
903
 
        locs = dict((key, (start, end)) for key, (start, _, end, _)
904
 
                    in compressor.labels_deltas.iteritems())
905
 
        block = compressor.flush()
906
 
        raw_bytes = block.to_bytes()
907
 
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
908
 
 
909
 
    def add_key_to_manager(self, key, locations, block, manager):
910
 
        start, end = locations[key]
911
 
        manager.add_factory(key, (), start, end)
912
 
 
913
 
    def make_block_and_full_manager(self, texts):
914
 
        locations, block = self.make_block(texts)
915
 
        manager = groupcompress._LazyGroupContentManager(block)
916
 
        for key in sorted(texts):
917
 
            self.add_key_to_manager(key, locations, block, manager)
918
 
        return block, manager
919
 
 
920
 
    def test_get_fulltexts(self):
921
 
        locations, block = self.make_block(self._texts)
922
 
        manager = groupcompress._LazyGroupContentManager(block)
923
 
        self.add_key_to_manager(('key1',), locations, block, manager)
924
 
        self.add_key_to_manager(('key2',), locations, block, manager)
925
 
        result_order = []
926
 
        for record in manager.get_record_stream():
927
 
            result_order.append(record.key)
928
 
            text = self._texts[record.key]
929
 
            self.assertEqual(text, record.get_bytes_as('fulltext'))
930
 
        self.assertEqual([('key1',), ('key2',)], result_order)
931
 
 
932
 
        # If we build the manager in the opposite order, we should get them
933
 
        # back in the opposite order
934
 
        manager = groupcompress._LazyGroupContentManager(block)
935
 
        self.add_key_to_manager(('key2',), locations, block, manager)
936
 
        self.add_key_to_manager(('key1',), locations, block, manager)
937
 
        result_order = []
938
 
        for record in manager.get_record_stream():
939
 
            result_order.append(record.key)
940
 
            text = self._texts[record.key]
941
 
            self.assertEqual(text, record.get_bytes_as('fulltext'))
942
 
        self.assertEqual([('key2',), ('key1',)], result_order)
943
 
 
944
 
    def test__wire_bytes_no_keys(self):
945
 
        locations, block = self.make_block(self._texts)
946
 
        manager = groupcompress._LazyGroupContentManager(block)
947
 
        wire_bytes = manager._wire_bytes()
948
 
        block_length = len(block.to_bytes())
949
 
        # We should have triggered a strip, since we aren't using any content
950
 
        stripped_block = manager._block.to_bytes()
951
 
        self.assertTrue(block_length > len(stripped_block))
952
 
        empty_z_header = zlib.compress('')
953
 
        self.assertEqual('groupcompress-block\n'
954
 
                         '8\n' # len(compress(''))
955
 
                         '0\n' # len('')
956
 
                         '%d\n'# compressed block len
957
 
                         '%s'  # zheader
958
 
                         '%s'  # block
959
 
                         % (len(stripped_block), empty_z_header,
960
 
                            stripped_block),
961
 
                         wire_bytes)
962
 
 
963
 
    def test__wire_bytes(self):
964
 
        locations, block = self.make_block(self._texts)
965
 
        manager = groupcompress._LazyGroupContentManager(block)
966
 
        self.add_key_to_manager(('key1',), locations, block, manager)
967
 
        self.add_key_to_manager(('key4',), locations, block, manager)
968
 
        block_bytes = block.to_bytes()
969
 
        wire_bytes = manager._wire_bytes()
970
 
        (storage_kind, z_header_len, header_len,
971
 
         block_len, rest) = wire_bytes.split('\n', 4)
972
 
        z_header_len = int(z_header_len)
973
 
        header_len = int(header_len)
974
 
        block_len = int(block_len)
975
 
        self.assertEqual('groupcompress-block', storage_kind)
976
 
        self.assertEqual(34, z_header_len)
977
 
        self.assertEqual(26, header_len)
978
 
        self.assertEqual(len(block_bytes), block_len)
979
 
        z_header = rest[:z_header_len]
980
 
        header = zlib.decompress(z_header)
981
 
        self.assertEqual(header_len, len(header))
982
 
        entry1 = locations[('key1',)]
983
 
        entry4 = locations[('key4',)]
984
 
        self.assertEqualDiff('key1\n'
985
 
                             '\n'  # no parents
986
 
                             '%d\n' # start offset
987
 
                             '%d\n' # end offset
988
 
                             'key4\n'
989
 
                             '\n'
990
 
                             '%d\n'
991
 
                             '%d\n'
992
 
                             % (entry1[0], entry1[1],
993
 
                                entry4[0], entry4[1]),
994
 
                            header)
995
 
        z_block = rest[z_header_len:]
996
 
        self.assertEqual(block_bytes, z_block)
997
 
 
998
 
    def test_from_bytes(self):
999
 
        locations, block = self.make_block(self._texts)
1000
 
        manager = groupcompress._LazyGroupContentManager(block)
1001
 
        self.add_key_to_manager(('key1',), locations, block, manager)
1002
 
        self.add_key_to_manager(('key4',), locations, block, manager)
1003
 
        wire_bytes = manager._wire_bytes()
1004
 
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
1005
 
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
1006
 
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
1007
 
        self.assertEqual(2, len(manager._factories))
1008
 
        self.assertEqual(block._z_content, manager._block._z_content)
1009
 
        result_order = []
1010
 
        for record in manager.get_record_stream():
1011
 
            result_order.append(record.key)
1012
 
            text = self._texts[record.key]
1013
 
            self.assertEqual(text, record.get_bytes_as('fulltext'))
1014
 
        self.assertEqual([('key1',), ('key4',)], result_order)
1015
 
 
1016
 
    def test__check_rebuild_no_changes(self):
1017
 
        block, manager = self.make_block_and_full_manager(self._texts)
1018
 
        manager._check_rebuild_block()
1019
 
        self.assertIs(block, manager._block)
1020
 
 
1021
 
    def test__check_rebuild_only_one(self):
1022
 
        locations, block = self.make_block(self._texts)
1023
 
        manager = groupcompress._LazyGroupContentManager(block)
1024
 
        # Request just the first key, which should trigger a 'strip' action
1025
 
        self.add_key_to_manager(('key1',), locations, block, manager)
1026
 
        manager._check_rebuild_block()
1027
 
        self.assertIsNot(block, manager._block)
1028
 
        self.assertTrue(block._content_length > manager._block._content_length)
1029
 
        # We should be able to still get the content out of this block, though
1030
 
        # it should only have 1 entry
1031
 
        for record in manager.get_record_stream():
1032
 
            self.assertEqual(('key1',), record.key)
1033
 
            self.assertEqual(self._texts[record.key],
1034
 
                             record.get_bytes_as('fulltext'))
1035
 
 
1036
 
    def test__check_rebuild_middle(self):
1037
 
        locations, block = self.make_block(self._texts)
1038
 
        manager = groupcompress._LazyGroupContentManager(block)
1039
 
        # Request a small key in the middle should trigger a 'rebuild'
1040
 
        self.add_key_to_manager(('key4',), locations, block, manager)
1041
 
        manager._check_rebuild_block()
1042
 
        self.assertIsNot(block, manager._block)
1043
 
        self.assertTrue(block._content_length > manager._block._content_length)
1044
 
        for record in manager.get_record_stream():
1045
 
            self.assertEqual(('key4',), record.key)
1046
 
            self.assertEqual(self._texts[record.key],
1047
 
                             record.get_bytes_as('fulltext'))
1048
 
 
1049
 
    def test_check_is_well_utilized_all_keys(self):
1050
 
        block, manager = self.make_block_and_full_manager(self._texts)
1051
 
        self.assertFalse(manager.check_is_well_utilized())
1052
 
        # Though we can fake it by changing the recommended minimum size
1053
 
        manager._full_enough_block_size = block._content_length
1054
 
        self.assertTrue(manager.check_is_well_utilized())
1055
 
        # Setting it just above causes it to fail
1056
 
        manager._full_enough_block_size = block._content_length + 1
1057
 
        self.assertFalse(manager.check_is_well_utilized())
1058
 
        # Setting the mixed-block size doesn't do anything, because the content
1059
 
        # is considered to not be 'mixed'
1060
 
        manager._full_enough_mixed_block_size = block._content_length
1061
 
        self.assertFalse(manager.check_is_well_utilized())
1062
 
 
1063
 
    def test_check_is_well_utilized_mixed_keys(self):
1064
 
        texts = {}
1065
 
        f1k1 = ('f1', 'k1')
1066
 
        f1k2 = ('f1', 'k2')
1067
 
        f2k1 = ('f2', 'k1')
1068
 
        f2k2 = ('f2', 'k2')
1069
 
        texts[f1k1] = self._texts[('key1',)]
1070
 
        texts[f1k2] = self._texts[('key2',)]
1071
 
        texts[f2k1] = self._texts[('key3',)]
1072
 
        texts[f2k2] = self._texts[('key4',)]
1073
 
        block, manager = self.make_block_and_full_manager(texts)
1074
 
        self.assertFalse(manager.check_is_well_utilized())
1075
 
        manager._full_enough_block_size = block._content_length
1076
 
        self.assertTrue(manager.check_is_well_utilized())
1077
 
        manager._full_enough_block_size = block._content_length + 1
1078
 
        self.assertFalse(manager.check_is_well_utilized())
1079
 
        manager._full_enough_mixed_block_size = block._content_length
1080
 
        self.assertTrue(manager.check_is_well_utilized())
1081
 
 
1082
 
    def test_check_is_well_utilized_partial_use(self):
1083
 
        locations, block = self.make_block(self._texts)
1084
 
        manager = groupcompress._LazyGroupContentManager(block)
1085
 
        manager._full_enough_block_size = block._content_length
1086
 
        self.add_key_to_manager(('key1',), locations, block, manager)
1087
 
        self.add_key_to_manager(('key2',), locations, block, manager)
1088
 
        # Just using the content from key1 and 2 is not enough to be considered
1089
 
        # 'complete'
1090
 
        self.assertFalse(manager.check_is_well_utilized())
1091
 
        # However if we add key3, then we have enough, as we only require 75%
1092
 
        # consumption
1093
 
        self.add_key_to_manager(('key4',), locations, block, manager)
1094
 
        self.assertTrue(manager.check_is_well_utilized())
1095
 
 
1096
 
 
1097
 
class Test_GCBuildDetails(tests.TestCase):
1098
 
 
1099
 
    def test_acts_like_tuple(self):
1100
 
        # _GCBuildDetails inlines some of the data that used to be spread out
1101
 
        # across a bunch of tuples
1102
 
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1103
 
            ('INDEX', 10, 20, 0, 5))
1104
 
        self.assertEqual(4, len(bd))
1105
 
        self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
1106
 
        self.assertEqual(None, bd[1]) # Compression Parent is always None
1107
 
        self.assertEqual((('parent1',), ('parent2',)), bd[2])
1108
 
        self.assertEqual(('group', None), bd[3]) # Record details
1109
 
 
1110
 
    def test__repr__(self):
1111
 
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1112
 
            ('INDEX', 10, 20, 0, 5))
1113
 
        self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
1114
 
                         " (('parent1',), ('parent2',)))",
1115
 
                         repr(bd))
1116