~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

  • Committer: Naoki INADA
  • Date: 2009-10-29 10:01:19 UTC
  • mto: (4634.97.3 2.0)
  • mto: This revision was merged to the branch mainline in revision 4798.
  • Revision ID: inada-n@klab.jp-20091029100119-uckv9t7ej2qrghw3
import doc-ja rev90

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# Copyright (C) 2008, 2009 Canonical Ltd
 
2
#
 
3
# This program is free software; you can redistribute it and/or modify
 
4
# it under the terms of the GNU General Public License as published by
 
5
# the Free Software Foundation; either version 2 of the License, or
 
6
# (at your option) any later version.
 
7
#
 
8
# This program is distributed in the hope that it will be useful,
 
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
11
# GNU General Public License for more details.
 
12
#
 
13
# You should have received a copy of the GNU General Public License
 
14
# along with this program; if not, write to the Free Software
 
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 
16
 
 
17
"""Tests for group compression."""
 
18
 
 
19
import zlib
 
20
 
 
21
from bzrlib import (
 
22
    btree_index,
 
23
    groupcompress,
 
24
    errors,
 
25
    index as _mod_index,
 
26
    osutils,
 
27
    tests,
 
28
    trace,
 
29
    versionedfile,
 
30
    )
 
31
from bzrlib.osutils import sha_string
 
32
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
 
33
 
 
34
 
 
35
def load_tests(standard_tests, module, loader):
 
36
    """Parameterize tests for all versions of groupcompress."""
 
37
    to_adapt, result = tests.split_suite_by_condition(
 
38
        standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
 
39
    scenarios = [
 
40
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
 
41
        ]
 
42
    if CompiledGroupCompressFeature.available():
 
43
        scenarios.append(('C',
 
44
            {'compressor': groupcompress.PyrexGroupCompressor}))
 
45
    return tests.multiply_tests(to_adapt, scenarios, result)
 
46
 
 
47
 
 
48
class TestGroupCompressor(tests.TestCase):
 
49
 
 
50
    def _chunks_to_repr_lines(self, chunks):
 
51
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
 
52
 
 
53
    def assertEqualDiffEncoded(self, expected, actual):
 
54
        """Compare the actual content to the expected content.
 
55
 
 
56
        :param expected: A group of chunks that we expect to see
 
57
        :param actual: The measured 'chunks'
 
58
 
 
59
        We will transform the chunks back into lines, and then run 'repr()'
 
60
        over them to handle non-ascii characters.
 
61
        """
 
62
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
 
63
                             self._chunks_to_repr_lines(actual))
 
64
 
 
65
 
 
66
class TestAllGroupCompressors(TestGroupCompressor):
 
67
    """Tests for GroupCompressor"""
 
68
 
 
69
    compressor = None # Set by multiply_tests
 
70
 
 
71
    def test_empty_delta(self):
 
72
        compressor = self.compressor()
 
73
        self.assertEqual([], compressor.chunks)
 
74
 
 
75
    def test_one_nosha_delta(self):
 
76
        # diff against NUKK
 
77
        compressor = self.compressor()
 
78
        sha1, start_point, end_point, _ = compressor.compress(('label',),
 
79
            'strange\ncommon\n', None)
 
80
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
 
81
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
 
82
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
 
83
        self.assertEqual(0, start_point)
 
84
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
85
 
 
86
    def test_empty_content(self):
 
87
        compressor = self.compressor()
 
88
        # Adding empty bytes should return the 'null' record
 
89
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
 
90
                                                                 '', None)
 
91
        self.assertEqual(0, start_point)
 
92
        self.assertEqual(0, end_point)
 
93
        self.assertEqual('fulltext', kind)
 
94
        self.assertEqual(groupcompress._null_sha1, sha1)
 
95
        self.assertEqual(0, compressor.endpoint)
 
96
        self.assertEqual([], compressor.chunks)
 
97
        # Even after adding some content
 
98
        compressor.compress(('content',), 'some\nbytes\n', None)
 
99
        self.assertTrue(compressor.endpoint > 0)
 
100
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
 
101
                                                                 '', None)
 
102
        self.assertEqual(0, start_point)
 
103
        self.assertEqual(0, end_point)
 
104
        self.assertEqual('fulltext', kind)
 
105
        self.assertEqual(groupcompress._null_sha1, sha1)
 
106
 
 
107
    def test_extract_from_compressor(self):
 
108
        # Knit fetching will try to reconstruct texts locally which results in
 
109
        # reading something that is in the compressor stream already.
 
110
        compressor = self.compressor()
 
111
        sha1_1, _, _, _ = compressor.compress(('label',),
 
112
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
113
        expected_lines = list(compressor.chunks)
 
114
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
 
115
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
116
        # get the first out
 
117
        self.assertEqual(('strange\ncommon long line\n'
 
118
                          'that needs a 16 byte match\n', sha1_1),
 
119
                         compressor.extract(('label',)))
 
120
        # and the second
 
121
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
 
122
                          'different\n', sha1_2),
 
123
                         compressor.extract(('newlabel',)))
 
124
 
 
125
    def test_pop_last(self):
 
126
        compressor = self.compressor()
 
127
        _, _, _, _ = compressor.compress(('key1',),
 
128
            'some text\nfor the first entry\n', None)
 
129
        expected_lines = list(compressor.chunks)
 
130
        _, _, _, _ = compressor.compress(('key2',),
 
131
            'some text\nfor the second entry\n', None)
 
132
        compressor.pop_last()
 
133
        self.assertEqual(expected_lines, compressor.chunks)
 
134
 
 
135
 
 
136
class TestPyrexGroupCompressor(TestGroupCompressor):
 
137
 
 
138
    _test_needs_features = [CompiledGroupCompressFeature]
 
139
    compressor = groupcompress.PyrexGroupCompressor
 
140
 
 
141
    def test_stats(self):
 
142
        compressor = self.compressor()
 
143
        compressor.compress(('label',),
 
144
                            'strange\n'
 
145
                            'common very very long line\n'
 
146
                            'plus more text\n', None)
 
147
        compressor.compress(('newlabel',),
 
148
                            'common very very long line\n'
 
149
                            'plus more text\n'
 
150
                            'different\n'
 
151
                            'moredifferent\n', None)
 
152
        compressor.compress(('label3',),
 
153
                            'new\n'
 
154
                            'common very very long line\n'
 
155
                            'plus more text\n'
 
156
                            'different\n'
 
157
                            'moredifferent\n', None)
 
158
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
 
159
 
 
160
    def test_two_nosha_delta(self):
 
161
        compressor = self.compressor()
 
162
        sha1_1, _, _, _ = compressor.compress(('label',),
 
163
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
164
        expected_lines = list(compressor.chunks)
 
165
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
 
166
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
167
        self.assertEqual(sha_string('common long line\n'
 
168
                                    'that needs a 16 byte match\n'
 
169
                                    'different\n'), sha1_2)
 
170
        expected_lines.extend([
 
171
            # 'delta', delta length
 
172
            'd\x0f',
 
173
            # source and target length
 
174
            '\x36',
 
175
            # copy the line common
 
176
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
177
            # add the line different, and the trailing newline
 
178
            '\x0adifferent\n', # insert 10 bytes
 
179
            ])
 
180
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
181
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
182
 
 
183
    def test_three_nosha_delta(self):
 
184
        # The first interesting test: make a change that should use lines from
 
185
        # both parents.
 
186
        compressor = self.compressor()
 
187
        sha1_1, _, _, _ = compressor.compress(('label',),
 
188
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
189
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
 
190
            'different\nmoredifferent\nand then some more\n', None)
 
191
        expected_lines = list(compressor.chunks)
 
192
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
 
193
            'new\ncommon very very long line\nwith some extra text\n'
 
194
            'different\nmoredifferent\nand then some more\n',
 
195
            None)
 
196
        self.assertEqual(
 
197
            sha_string('new\ncommon very very long line\nwith some extra text\n'
 
198
                       'different\nmoredifferent\nand then some more\n'),
 
199
            sha1_3)
 
200
        expected_lines.extend([
 
201
            # 'delta', delta length
 
202
            'd\x0b',
 
203
            # source and target length
 
204
            '\x5f'
 
205
            # insert new
 
206
            '\x03new',
 
207
            # Copy of first parent 'common' range
 
208
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
 
209
            # Copy of second parent 'different' range
 
210
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
211
            ])
 
212
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
213
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
214
 
 
215
 
 
216
class TestPythonGroupCompressor(TestGroupCompressor):
 
217
 
 
218
    compressor = groupcompress.PythonGroupCompressor
 
219
 
 
220
    def test_stats(self):
 
221
        compressor = self.compressor()
 
222
        compressor.compress(('label',),
 
223
                            'strange\n'
 
224
                            'common very very long line\n'
 
225
                            'plus more text\n', None)
 
226
        compressor.compress(('newlabel',),
 
227
                            'common very very long line\n'
 
228
                            'plus more text\n'
 
229
                            'different\n'
 
230
                            'moredifferent\n', None)
 
231
        compressor.compress(('label3',),
 
232
                            'new\n'
 
233
                            'common very very long line\n'
 
234
                            'plus more text\n'
 
235
                            'different\n'
 
236
                            'moredifferent\n', None)
 
237
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
 
238
 
 
239
    def test_two_nosha_delta(self):
 
240
        compressor = self.compressor()
 
241
        sha1_1, _, _, _ = compressor.compress(('label',),
 
242
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
243
        expected_lines = list(compressor.chunks)
 
244
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
 
245
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
246
        self.assertEqual(sha_string('common long line\n'
 
247
                                    'that needs a 16 byte match\n'
 
248
                                    'different\n'), sha1_2)
 
249
        expected_lines.extend([
 
250
            # 'delta', delta length
 
251
            'd\x0f',
 
252
            # target length
 
253
            '\x36',
 
254
            # copy the line common
 
255
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
256
            # add the line different, and the trailing newline
 
257
            '\x0adifferent\n', # insert 10 bytes
 
258
            ])
 
259
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
260
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
261
 
 
262
    def test_three_nosha_delta(self):
 
263
        # The first interesting test: make a change that should use lines from
 
264
        # both parents.
 
265
        compressor = self.compressor()
 
266
        sha1_1, _, _, _ = compressor.compress(('label',),
 
267
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
268
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
 
269
            'different\nmoredifferent\nand then some more\n', None)
 
270
        expected_lines = list(compressor.chunks)
 
271
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
 
272
            'new\ncommon very very long line\nwith some extra text\n'
 
273
            'different\nmoredifferent\nand then some more\n',
 
274
            None)
 
275
        self.assertEqual(
 
276
            sha_string('new\ncommon very very long line\nwith some extra text\n'
 
277
                       'different\nmoredifferent\nand then some more\n'),
 
278
            sha1_3)
 
279
        expected_lines.extend([
 
280
            # 'delta', delta length
 
281
            'd\x0c',
 
282
            # target length
 
283
            '\x5f'
 
284
            # insert new
 
285
            '\x04new\n',
 
286
            # Copy of first parent 'common' range
 
287
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
 
288
            # Copy of second parent 'different' range
 
289
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
290
            ])
 
291
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
292
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
293
 
 
294
 
 
295
class TestGroupCompressBlock(tests.TestCase):
 
296
 
 
297
    def make_block(self, key_to_text):
 
298
        """Create a GroupCompressBlock, filling it with the given texts."""
 
299
        compressor = groupcompress.GroupCompressor()
 
300
        start = 0
 
301
        for key in sorted(key_to_text):
 
302
            compressor.compress(key, key_to_text[key], None)
 
303
        locs = dict((key, (start, end)) for key, (start, _, end, _)
 
304
                    in compressor.labels_deltas.iteritems())
 
305
        block = compressor.flush()
 
306
        raw_bytes = block.to_bytes()
 
307
        # Go through from_bytes(to_bytes()) so that we start with a compressed
 
308
        # content object
 
309
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
 
310
 
 
311
    def test_from_empty_bytes(self):
 
312
        self.assertRaises(ValueError,
 
313
                          groupcompress.GroupCompressBlock.from_bytes, '')
 
314
 
 
315
    def test_from_minimal_bytes(self):
 
316
        block = groupcompress.GroupCompressBlock.from_bytes(
 
317
            'gcb1z\n0\n0\n')
 
318
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
 
319
        self.assertIs(None, block._content)
 
320
        self.assertEqual('', block._z_content)
 
321
        block._ensure_content()
 
322
        self.assertEqual('', block._content)
 
323
        self.assertEqual('', block._z_content)
 
324
        block._ensure_content() # Ensure content is safe to call 2x
 
325
 
 
326
    def test_from_invalid(self):
 
327
        self.assertRaises(ValueError,
 
328
                          groupcompress.GroupCompressBlock.from_bytes,
 
329
                          'this is not a valid header')
 
330
 
 
331
    def test_from_bytes(self):
 
332
        content = ('a tiny bit of content\n')
 
333
        z_content = zlib.compress(content)
 
334
        z_bytes = (
 
335
            'gcb1z\n' # group compress block v1 plain
 
336
            '%d\n' # Length of compressed content
 
337
            '%d\n' # Length of uncompressed content
 
338
            '%s'   # Compressed content
 
339
            ) % (len(z_content), len(content), z_content)
 
340
        block = groupcompress.GroupCompressBlock.from_bytes(
 
341
            z_bytes)
 
342
        self.assertEqual(z_content, block._z_content)
 
343
        self.assertIs(None, block._content)
 
344
        self.assertEqual(len(z_content), block._z_content_length)
 
345
        self.assertEqual(len(content), block._content_length)
 
346
        block._ensure_content()
 
347
        self.assertEqual(z_content, block._z_content)
 
348
        self.assertEqual(content, block._content)
 
349
 
 
350
    def test_to_bytes(self):
 
351
        content = ('this is some content\n'
 
352
                   'this content will be compressed\n')
 
353
        gcb = groupcompress.GroupCompressBlock()
 
354
        gcb.set_content(content)
 
355
        bytes = gcb.to_bytes()
 
356
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
 
357
        self.assertEqual(gcb._content_length, len(content))
 
358
        expected_header =('gcb1z\n' # group compress block v1 zlib
 
359
                          '%d\n' # Length of compressed content
 
360
                          '%d\n' # Length of uncompressed content
 
361
                         ) % (gcb._z_content_length, gcb._content_length)
 
362
        self.assertStartsWith(bytes, expected_header)
 
363
        remaining_bytes = bytes[len(expected_header):]
 
364
        raw_bytes = zlib.decompress(remaining_bytes)
 
365
        self.assertEqual(content, raw_bytes)
 
366
 
 
367
        # we should get the same results if using the chunked version
 
368
        gcb = groupcompress.GroupCompressBlock()
 
369
        gcb.set_chunked_content(['this is some content\n'
 
370
                                 'this content will be compressed\n'],
 
371
                                 len(content))
 
372
        old_bytes = bytes
 
373
        bytes = gcb.to_bytes()
 
374
        self.assertEqual(old_bytes, bytes)
 
375
 
 
376
    def test_partial_decomp(self):
 
377
        content_chunks = []
 
378
        # We need a sufficient amount of data so that zlib.decompress has
 
379
        # partial decompression to work with. Most auto-generated data
 
380
        # compresses a bit too well, we want a combination, so we combine a sha
 
381
        # hash with compressible data.
 
382
        for i in xrange(2048):
 
383
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
384
            content_chunks.append(next_content)
 
385
            next_sha1 = osutils.sha_string(next_content)
 
386
            content_chunks.append(next_sha1 + '\n')
 
387
        content = ''.join(content_chunks)
 
388
        self.assertEqual(158634, len(content))
 
389
        z_content = zlib.compress(content)
 
390
        self.assertEqual(57182, len(z_content))
 
391
        block = groupcompress.GroupCompressBlock()
 
392
        block._z_content = z_content
 
393
        block._z_content_length = len(z_content)
 
394
        block._compressor_name = 'zlib'
 
395
        block._content_length = 158634
 
396
        self.assertIs(None, block._content)
 
397
        block._ensure_content(100)
 
398
        self.assertIsNot(None, block._content)
 
399
        # We have decompressed at least 100 bytes
 
400
        self.assertTrue(len(block._content) >= 100)
 
401
        # We have not decompressed the whole content
 
402
        self.assertTrue(len(block._content) < 158634)
 
403
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
404
        # ensuring content that we already have shouldn't cause any more data
 
405
        # to be extracted
 
406
        cur_len = len(block._content)
 
407
        block._ensure_content(cur_len - 10)
 
408
        self.assertEqual(cur_len, len(block._content))
 
409
        # Now we want a bit more content
 
410
        cur_len += 10
 
411
        block._ensure_content(cur_len)
 
412
        self.assertTrue(len(block._content) >= cur_len)
 
413
        self.assertTrue(len(block._content) < 158634)
 
414
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
415
        # And now lets finish
 
416
        block._ensure_content(158634)
 
417
        self.assertEqualDiff(content, block._content)
 
418
        # And the decompressor is finalized
 
419
        self.assertIs(None, block._z_content_decompressor)
 
420
 
 
421
    def test_partial_decomp_no_known_length(self):
 
422
        content_chunks = []
 
423
        for i in xrange(2048):
 
424
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
425
            content_chunks.append(next_content)
 
426
            next_sha1 = osutils.sha_string(next_content)
 
427
            content_chunks.append(next_sha1 + '\n')
 
428
        content = ''.join(content_chunks)
 
429
        self.assertEqual(158634, len(content))
 
430
        z_content = zlib.compress(content)
 
431
        self.assertEqual(57182, len(z_content))
 
432
        block = groupcompress.GroupCompressBlock()
 
433
        block._z_content = z_content
 
434
        block._z_content_length = len(z_content)
 
435
        block._compressor_name = 'zlib'
 
436
        block._content_length = None # Don't tell the decompressed length
 
437
        self.assertIs(None, block._content)
 
438
        block._ensure_content(100)
 
439
        self.assertIsNot(None, block._content)
 
440
        # We have decompressed at least 100 bytes
 
441
        self.assertTrue(len(block._content) >= 100)
 
442
        # We have not decompressed the whole content
 
443
        self.assertTrue(len(block._content) < 158634)
 
444
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
445
        # ensuring content that we already have shouldn't cause any more data
 
446
        # to be extracted
 
447
        cur_len = len(block._content)
 
448
        block._ensure_content(cur_len - 10)
 
449
        self.assertEqual(cur_len, len(block._content))
 
450
        # Now we want a bit more content
 
451
        cur_len += 10
 
452
        block._ensure_content(cur_len)
 
453
        self.assertTrue(len(block._content) >= cur_len)
 
454
        self.assertTrue(len(block._content) < 158634)
 
455
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
456
        # And now lets finish
 
457
        block._ensure_content()
 
458
        self.assertEqualDiff(content, block._content)
 
459
        # And the decompressor is finalized
 
460
        self.assertIs(None, block._z_content_decompressor)
 
461
 
 
462
    def test__dump(self):
 
463
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
 
464
        key_to_text = {('1',): dup_content + '1 unique\n',
 
465
                       ('2',): dup_content + '2 extra special\n'}
 
466
        locs, block = self.make_block(key_to_text)
 
467
        self.assertEqual([('f', len(key_to_text[('1',)])),
 
468
                          ('d', 21, len(key_to_text[('2',)]),
 
469
                           [('c', 2, len(dup_content)),
 
470
                            ('i', len('2 extra special\n'), '')
 
471
                           ]),
 
472
                         ], block._dump())
 
473
 
 
474
 
 
475
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
 
476
 
 
477
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
 
478
                     dir='.', inconsistency_fatal=True):
 
479
        t = self.get_transport(dir)
 
480
        t.ensure_base()
 
481
        vf = groupcompress.make_pack_factory(graph=create_graph,
 
482
            delta=False, keylength=keylength,
 
483
            inconsistency_fatal=inconsistency_fatal)(t)
 
484
        if do_cleanup:
 
485
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
 
486
        return vf
 
487
 
 
488
 
 
489
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
 
490
 
 
491
    def make_g_index(self, name, ref_lists=0, nodes=[]):
 
492
        builder = btree_index.BTreeBuilder(ref_lists)
 
493
        for node, references, value in nodes:
 
494
            builder.add_node(node, references, value)
 
495
        stream = builder.finish()
 
496
        trans = self.get_transport()
 
497
        size = trans.put_file(name, stream)
 
498
        return btree_index.BTreeGraphIndex(trans, name, size)
 
499
 
 
500
    def make_g_index_missing_parent(self):
 
501
        graph_index = self.make_g_index('missing_parent', 1,
 
502
            [(('parent', ), '2 78 2 10', ([],)),
 
503
             (('tip', ), '2 78 2 10',
 
504
              ([('parent', ), ('missing-parent', )],)),
 
505
              ])
 
506
        return graph_index
 
507
 
 
508
    def test_get_record_stream_as_requested(self):
 
509
        # Consider promoting 'as-requested' to general availability, and
 
510
        # make this a VF interface test
 
511
        vf = self.make_test_vf(False, dir='source')
 
512
        vf.add_lines(('a',), (), ['lines\n'])
 
513
        vf.add_lines(('b',), (), ['lines\n'])
 
514
        vf.add_lines(('c',), (), ['lines\n'])
 
515
        vf.add_lines(('d',), (), ['lines\n'])
 
516
        vf.writer.end()
 
517
        keys = [record.key for record in vf.get_record_stream(
 
518
                    [('a',), ('b',), ('c',), ('d',)],
 
519
                    'as-requested', False)]
 
520
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
521
        keys = [record.key for record in vf.get_record_stream(
 
522
                    [('b',), ('a',), ('d',), ('c',)],
 
523
                    'as-requested', False)]
 
524
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
525
 
 
526
        # It should work even after being repacked into another VF
 
527
        vf2 = self.make_test_vf(False, dir='target')
 
528
        vf2.insert_record_stream(vf.get_record_stream(
 
529
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
 
530
        vf2.writer.end()
 
531
 
 
532
        keys = [record.key for record in vf2.get_record_stream(
 
533
                    [('a',), ('b',), ('c',), ('d',)],
 
534
                    'as-requested', False)]
 
535
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
536
        keys = [record.key for record in vf2.get_record_stream(
 
537
                    [('b',), ('a',), ('d',), ('c',)],
 
538
                    'as-requested', False)]
 
539
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
540
 
 
541
    def test_insert_record_stream_reuses_blocks(self):
 
542
        vf = self.make_test_vf(True, dir='source')
 
543
        def grouped_stream(revision_ids, first_parents=()):
 
544
            parents = first_parents
 
545
            for revision_id in revision_ids:
 
546
                key = (revision_id,)
 
547
                record = versionedfile.FulltextContentFactory(
 
548
                    key, parents, None,
 
549
                    'some content that is\n'
 
550
                    'identical except for\n'
 
551
                    'revision_id:%s\n' % (revision_id,))
 
552
                yield record
 
553
                parents = (key,)
 
554
        # One group, a-d
 
555
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
556
        # Second group, e-h
 
557
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
558
                                               first_parents=(('d',),)))
 
559
        block_bytes = {}
 
560
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
 
561
                                      'unordered', False)
 
562
        num_records = 0
 
563
        for record in stream:
 
564
            if record.key in [('a',), ('e',)]:
 
565
                self.assertEqual('groupcompress-block', record.storage_kind)
 
566
            else:
 
567
                self.assertEqual('groupcompress-block-ref',
 
568
                                 record.storage_kind)
 
569
            block_bytes[record.key] = record._manager._block._z_content
 
570
            num_records += 1
 
571
        self.assertEqual(8, num_records)
 
572
        for r in 'abcd':
 
573
            key = (r,)
 
574
            self.assertIs(block_bytes[key], block_bytes[('a',)])
 
575
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
 
576
        for r in 'efgh':
 
577
            key = (r,)
 
578
            self.assertIs(block_bytes[key], block_bytes[('e',)])
 
579
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
 
580
        # Now copy the blocks into another vf, and ensure that the blocks are
 
581
        # preserved without creating new entries
 
582
        vf2 = self.make_test_vf(True, dir='target')
 
583
        # ordering in 'groupcompress' order, should actually swap the groups in
 
584
        # the target vf, but the groups themselves should not be disturbed.
 
585
        def small_size_stream():
 
586
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
 
587
                                               'groupcompress', False):
 
588
                record._manager._full_enough_block_size = \
 
589
                    record._manager._block._content_length
 
590
                yield record
 
591
                        
 
592
        vf2.insert_record_stream(small_size_stream())
 
593
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
594
                                       'groupcompress', False)
 
595
        vf2.writer.end()
 
596
        num_records = 0
 
597
        for record in stream:
 
598
            num_records += 1
 
599
            self.assertEqual(block_bytes[record.key],
 
600
                             record._manager._block._z_content)
 
601
        self.assertEqual(8, num_records)
 
602
 
 
603
    def test_insert_record_stream_packs_on_the_fly(self):
 
604
        vf = self.make_test_vf(True, dir='source')
 
605
        def grouped_stream(revision_ids, first_parents=()):
 
606
            parents = first_parents
 
607
            for revision_id in revision_ids:
 
608
                key = (revision_id,)
 
609
                record = versionedfile.FulltextContentFactory(
 
610
                    key, parents, None,
 
611
                    'some content that is\n'
 
612
                    'identical except for\n'
 
613
                    'revision_id:%s\n' % (revision_id,))
 
614
                yield record
 
615
                parents = (key,)
 
616
        # One group, a-d
 
617
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
618
        # Second group, e-h
 
619
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
620
                                               first_parents=(('d',),)))
 
621
        # Now copy the blocks into another vf, and see that the
 
622
        # insert_record_stream rebuilt a new block on-the-fly because of
 
623
        # under-utilization
 
624
        vf2 = self.make_test_vf(True, dir='target')
 
625
        vf2.insert_record_stream(vf.get_record_stream(
 
626
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
 
627
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
628
                                       'groupcompress', False)
 
629
        vf2.writer.end()
 
630
        num_records = 0
 
631
        # All of the records should be recombined into a single block
 
632
        block = None
 
633
        for record in stream:
 
634
            num_records += 1
 
635
            if block is None:
 
636
                block = record._manager._block
 
637
            else:
 
638
                self.assertIs(block, record._manager._block)
 
639
        self.assertEqual(8, num_records)
 
640
 
 
641
    def test__insert_record_stream_no_reuse_block(self):
 
642
        vf = self.make_test_vf(True, dir='source')
 
643
        def grouped_stream(revision_ids, first_parents=()):
 
644
            parents = first_parents
 
645
            for revision_id in revision_ids:
 
646
                key = (revision_id,)
 
647
                record = versionedfile.FulltextContentFactory(
 
648
                    key, parents, None,
 
649
                    'some content that is\n'
 
650
                    'identical except for\n'
 
651
                    'revision_id:%s\n' % (revision_id,))
 
652
                yield record
 
653
                parents = (key,)
 
654
        # One group, a-d
 
655
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
656
        # Second group, e-h
 
657
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
658
                                               first_parents=(('d',),)))
 
659
        vf.writer.end()
 
660
        self.assertEqual(8, len(list(vf.get_record_stream(
 
661
                                        [(r,) for r in 'abcdefgh'],
 
662
                                        'unordered', False))))
 
663
        # Now copy the blocks into another vf, and ensure that the blocks are
 
664
        # preserved without creating new entries
 
665
        vf2 = self.make_test_vf(True, dir='target')
 
666
        # ordering in 'groupcompress' order, should actually swap the groups in
 
667
        # the target vf, but the groups themselves should not be disturbed.
 
668
        list(vf2._insert_record_stream(vf.get_record_stream(
 
669
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
 
670
            reuse_blocks=False))
 
671
        vf2.writer.end()
 
672
        # After inserting with reuse_blocks=False, we should have everything in
 
673
        # a single new block.
 
674
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
675
                                       'groupcompress', False)
 
676
        block = None
 
677
        for record in stream:
 
678
            if block is None:
 
679
                block = record._manager._block
 
680
            else:
 
681
                self.assertIs(block, record._manager._block)
 
682
 
 
683
    def test_add_missing_noncompression_parent_unvalidated_index(self):
 
684
        unvalidated = self.make_g_index_missing_parent()
 
685
        combined = _mod_index.CombinedGraphIndex([unvalidated])
 
686
        index = groupcompress._GCGraphIndex(combined,
 
687
            is_locked=lambda: True, parents=True,
 
688
            track_external_parent_refs=True)
 
689
        index.scan_unvalidated_index(unvalidated)
 
690
        self.assertEqual(
 
691
            frozenset([('missing-parent',)]), index.get_missing_parents())
 
692
 
 
693
    def test_track_external_parent_refs(self):
 
694
        g_index = self.make_g_index('empty', 1, [])
 
695
        mod_index = btree_index.BTreeBuilder(1, 1)
 
696
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
 
697
        index = groupcompress._GCGraphIndex(combined,
 
698
            is_locked=lambda: True, parents=True,
 
699
            add_callback=mod_index.add_nodes,
 
700
            track_external_parent_refs=True)
 
701
        index.add_records([
 
702
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
 
703
        self.assertEqual(
 
704
            frozenset([('parent-1',), ('parent-2',)]),
 
705
            index.get_missing_parents())
 
706
 
 
707
    def make_source_with_b(self, a_parent, path):
 
708
        source = self.make_test_vf(True, dir=path)
 
709
        source.add_lines(('a',), (), ['lines\n'])
 
710
        if a_parent:
 
711
            b_parents = (('a',),)
 
712
        else:
 
713
            b_parents = ()
 
714
        source.add_lines(('b',), b_parents, ['lines\n'])
 
715
        return source
 
716
 
 
717
    def do_inconsistent_inserts(self, inconsistency_fatal):
 
718
        target = self.make_test_vf(True, dir='target',
 
719
                                   inconsistency_fatal=inconsistency_fatal)
 
720
        for x in range(2):
 
721
            source = self.make_source_with_b(x==1, 'source%s' % x)
 
722
            target.insert_record_stream(source.get_record_stream(
 
723
                [('b',)], 'unordered', False))
 
724
 
 
725
    def test_inconsistent_redundant_inserts_warn(self):
 
726
        """Should not insert a record that is already present."""
 
727
        warnings = []
 
728
        def warning(template, args):
 
729
            warnings.append(template % args)
 
730
        _trace_warning = trace.warning
 
731
        trace.warning = warning
 
732
        try:
 
733
            self.do_inconsistent_inserts(inconsistency_fatal=False)
 
734
        finally:
 
735
            trace.warning = _trace_warning
 
736
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
 
737
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
 
738
                         warnings)
 
739
 
 
740
    def test_inconsistent_redundant_inserts_raises(self):
 
741
        e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
 
742
                              inconsistency_fatal=True)
 
743
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
 
744
                              " in add_records:"
 
745
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
 
746
                              " 0 8', \(\(\('a',\),\),\)\)")
 
747
 
 
748
 
 
749
class TestLazyGroupCompress(tests.TestCaseWithTransport):
 
750
 
 
751
    _texts = {
 
752
        ('key1',): "this is a text\n"
 
753
                   "with a reasonable amount of compressible bytes\n"
 
754
                   "which can be shared between various other texts\n",
 
755
        ('key2',): "another text\n"
 
756
                   "with a reasonable amount of compressible bytes\n"
 
757
                   "which can be shared between various other texts\n",
 
758
        ('key3',): "yet another text which won't be extracted\n"
 
759
                   "with a reasonable amount of compressible bytes\n"
 
760
                   "which can be shared between various other texts\n",
 
761
        ('key4',): "this will be extracted\n"
 
762
                   "but references most of its bytes from\n"
 
763
                   "yet another text which won't be extracted\n"
 
764
                   "with a reasonable amount of compressible bytes\n"
 
765
                   "which can be shared between various other texts\n",
 
766
    }
 
767
    def make_block(self, key_to_text):
 
768
        """Create a GroupCompressBlock, filling it with the given texts."""
 
769
        compressor = groupcompress.GroupCompressor()
 
770
        start = 0
 
771
        for key in sorted(key_to_text):
 
772
            compressor.compress(key, key_to_text[key], None)
 
773
        locs = dict((key, (start, end)) for key, (start, _, end, _)
 
774
                    in compressor.labels_deltas.iteritems())
 
775
        block = compressor.flush()
 
776
        raw_bytes = block.to_bytes()
 
777
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
 
778
 
 
779
    def add_key_to_manager(self, key, locations, block, manager):
 
780
        start, end = locations[key]
 
781
        manager.add_factory(key, (), start, end)
 
782
 
 
783
    def make_block_and_full_manager(self, texts):
 
784
        locations, block = self.make_block(texts)
 
785
        manager = groupcompress._LazyGroupContentManager(block)
 
786
        for key in sorted(texts):
 
787
            self.add_key_to_manager(key, locations, block, manager)
 
788
        return block, manager
 
789
 
 
790
    def test_get_fulltexts(self):
 
791
        locations, block = self.make_block(self._texts)
 
792
        manager = groupcompress._LazyGroupContentManager(block)
 
793
        self.add_key_to_manager(('key1',), locations, block, manager)
 
794
        self.add_key_to_manager(('key2',), locations, block, manager)
 
795
        result_order = []
 
796
        for record in manager.get_record_stream():
 
797
            result_order.append(record.key)
 
798
            text = self._texts[record.key]
 
799
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
800
        self.assertEqual([('key1',), ('key2',)], result_order)
 
801
 
 
802
        # If we build the manager in the opposite order, we should get them
 
803
        # back in the opposite order
 
804
        manager = groupcompress._LazyGroupContentManager(block)
 
805
        self.add_key_to_manager(('key2',), locations, block, manager)
 
806
        self.add_key_to_manager(('key1',), locations, block, manager)
 
807
        result_order = []
 
808
        for record in manager.get_record_stream():
 
809
            result_order.append(record.key)
 
810
            text = self._texts[record.key]
 
811
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
812
        self.assertEqual([('key2',), ('key1',)], result_order)
 
813
 
 
814
    def test__wire_bytes_no_keys(self):
 
815
        locations, block = self.make_block(self._texts)
 
816
        manager = groupcompress._LazyGroupContentManager(block)
 
817
        wire_bytes = manager._wire_bytes()
 
818
        block_length = len(block.to_bytes())
 
819
        # We should have triggered a strip, since we aren't using any content
 
820
        stripped_block = manager._block.to_bytes()
 
821
        self.assertTrue(block_length > len(stripped_block))
 
822
        empty_z_header = zlib.compress('')
 
823
        self.assertEqual('groupcompress-block\n'
 
824
                         '8\n' # len(compress(''))
 
825
                         '0\n' # len('')
 
826
                         '%d\n'# compressed block len
 
827
                         '%s'  # zheader
 
828
                         '%s'  # block
 
829
                         % (len(stripped_block), empty_z_header,
 
830
                            stripped_block),
 
831
                         wire_bytes)
 
832
 
 
833
    def test__wire_bytes(self):
 
834
        locations, block = self.make_block(self._texts)
 
835
        manager = groupcompress._LazyGroupContentManager(block)
 
836
        self.add_key_to_manager(('key1',), locations, block, manager)
 
837
        self.add_key_to_manager(('key4',), locations, block, manager)
 
838
        block_bytes = block.to_bytes()
 
839
        wire_bytes = manager._wire_bytes()
 
840
        (storage_kind, z_header_len, header_len,
 
841
         block_len, rest) = wire_bytes.split('\n', 4)
 
842
        z_header_len = int(z_header_len)
 
843
        header_len = int(header_len)
 
844
        block_len = int(block_len)
 
845
        self.assertEqual('groupcompress-block', storage_kind)
 
846
        self.assertEqual(34, z_header_len)
 
847
        self.assertEqual(26, header_len)
 
848
        self.assertEqual(len(block_bytes), block_len)
 
849
        z_header = rest[:z_header_len]
 
850
        header = zlib.decompress(z_header)
 
851
        self.assertEqual(header_len, len(header))
 
852
        entry1 = locations[('key1',)]
 
853
        entry4 = locations[('key4',)]
 
854
        self.assertEqualDiff('key1\n'
 
855
                             '\n'  # no parents
 
856
                             '%d\n' # start offset
 
857
                             '%d\n' # end offset
 
858
                             'key4\n'
 
859
                             '\n'
 
860
                             '%d\n'
 
861
                             '%d\n'
 
862
                             % (entry1[0], entry1[1],
 
863
                                entry4[0], entry4[1]),
 
864
                            header)
 
865
        z_block = rest[z_header_len:]
 
866
        self.assertEqual(block_bytes, z_block)
 
867
 
 
868
    def test_from_bytes(self):
 
869
        locations, block = self.make_block(self._texts)
 
870
        manager = groupcompress._LazyGroupContentManager(block)
 
871
        self.add_key_to_manager(('key1',), locations, block, manager)
 
872
        self.add_key_to_manager(('key4',), locations, block, manager)
 
873
        wire_bytes = manager._wire_bytes()
 
874
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
 
875
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
 
876
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
 
877
        self.assertEqual(2, len(manager._factories))
 
878
        self.assertEqual(block._z_content, manager._block._z_content)
 
879
        result_order = []
 
880
        for record in manager.get_record_stream():
 
881
            result_order.append(record.key)
 
882
            text = self._texts[record.key]
 
883
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
884
        self.assertEqual([('key1',), ('key4',)], result_order)
 
885
 
 
886
    def test__check_rebuild_no_changes(self):
 
887
        block, manager = self.make_block_and_full_manager(self._texts)
 
888
        manager._check_rebuild_block()
 
889
        self.assertIs(block, manager._block)
 
890
 
 
891
    def test__check_rebuild_only_one(self):
 
892
        locations, block = self.make_block(self._texts)
 
893
        manager = groupcompress._LazyGroupContentManager(block)
 
894
        # Request just the first key, which should trigger a 'strip' action
 
895
        self.add_key_to_manager(('key1',), locations, block, manager)
 
896
        manager._check_rebuild_block()
 
897
        self.assertIsNot(block, manager._block)
 
898
        self.assertTrue(block._content_length > manager._block._content_length)
 
899
        # We should be able to still get the content out of this block, though
 
900
        # it should only have 1 entry
 
901
        for record in manager.get_record_stream():
 
902
            self.assertEqual(('key1',), record.key)
 
903
            self.assertEqual(self._texts[record.key],
 
904
                             record.get_bytes_as('fulltext'))
 
905
 
 
906
    def test__check_rebuild_middle(self):
 
907
        locations, block = self.make_block(self._texts)
 
908
        manager = groupcompress._LazyGroupContentManager(block)
 
909
        # Request a small key in the middle should trigger a 'rebuild'
 
910
        self.add_key_to_manager(('key4',), locations, block, manager)
 
911
        manager._check_rebuild_block()
 
912
        self.assertIsNot(block, manager._block)
 
913
        self.assertTrue(block._content_length > manager._block._content_length)
 
914
        for record in manager.get_record_stream():
 
915
            self.assertEqual(('key4',), record.key)
 
916
            self.assertEqual(self._texts[record.key],
 
917
                             record.get_bytes_as('fulltext'))
 
918
 
 
919
    def test_check_is_well_utilized_all_keys(self):
 
920
        block, manager = self.make_block_and_full_manager(self._texts)
 
921
        self.assertFalse(manager.check_is_well_utilized())
 
922
        # Though we can fake it by changing the recommended minimum size
 
923
        manager._full_enough_block_size = block._content_length
 
924
        self.assertTrue(manager.check_is_well_utilized())
 
925
        # Setting it just above causes it to fail
 
926
        manager._full_enough_block_size = block._content_length + 1
 
927
        self.assertFalse(manager.check_is_well_utilized())
 
928
        # Setting the mixed-block size doesn't do anything, because the content
 
929
        # is considered to not be 'mixed'
 
930
        manager._full_enough_mixed_block_size = block._content_length
 
931
        self.assertFalse(manager.check_is_well_utilized())
 
932
 
 
933
    def test_check_is_well_utilized_mixed_keys(self):
 
934
        texts = {}
 
935
        f1k1 = ('f1', 'k1')
 
936
        f1k2 = ('f1', 'k2')
 
937
        f2k1 = ('f2', 'k1')
 
938
        f2k2 = ('f2', 'k2')
 
939
        texts[f1k1] = self._texts[('key1',)]
 
940
        texts[f1k2] = self._texts[('key2',)]
 
941
        texts[f2k1] = self._texts[('key3',)]
 
942
        texts[f2k2] = self._texts[('key4',)]
 
943
        block, manager = self.make_block_and_full_manager(texts)
 
944
        self.assertFalse(manager.check_is_well_utilized())
 
945
        manager._full_enough_block_size = block._content_length
 
946
        self.assertTrue(manager.check_is_well_utilized())
 
947
        manager._full_enough_block_size = block._content_length + 1
 
948
        self.assertFalse(manager.check_is_well_utilized())
 
949
        manager._full_enough_mixed_block_size = block._content_length
 
950
        self.assertTrue(manager.check_is_well_utilized())
 
951
 
 
952
    def test_check_is_well_utilized_partial_use(self):
 
953
        locations, block = self.make_block(self._texts)
 
954
        manager = groupcompress._LazyGroupContentManager(block)
 
955
        manager._full_enough_block_size = block._content_length
 
956
        self.add_key_to_manager(('key1',), locations, block, manager)
 
957
        self.add_key_to_manager(('key2',), locations, block, manager)
 
958
        # Just using the content from key1 and 2 is not enough to be considered
 
959
        # 'complete'
 
960
        self.assertFalse(manager.check_is_well_utilized())
 
961
        # However if we add key3, then we have enough, as we only require 75%
 
962
        # consumption
 
963
        self.add_key_to_manager(('key4',), locations, block, manager)
 
964
        self.assertTrue(manager.check_is_well_utilized())