~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

  • Committer: Canonical.com Patch Queue Manager
  • Date: 2009-07-30 14:24:06 UTC
  • mfrom: (4576.1.1 export-to-dir)
  • Revision ID: pqm@pqm.ubuntu.com-20090730142406-wg8gmxpcjz4c1z00
(bialix) Allow 'bzr export' to export into an existing (but empty)
        directory

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# Copyright (C) 2008, 2009 Canonical Ltd
 
2
#
 
3
# This program is free software; you can redistribute it and/or modify
 
4
# it under the terms of the GNU General Public License as published by
 
5
# the Free Software Foundation; either version 2 of the License, or
 
6
# (at your option) any later version.
 
7
#
 
8
# This program is distributed in the hope that it will be useful,
 
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
11
# GNU General Public License for more details.
 
12
#
 
13
# You should have received a copy of the GNU General Public License
 
14
# along with this program; if not, write to the Free Software
 
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 
16
 
 
17
"""Tests for group compression."""
 
18
 
 
19
import zlib
 
20
 
 
21
from bzrlib import (
 
22
    btree_index,
 
23
    groupcompress,
 
24
    errors,
 
25
    index as _mod_index,
 
26
    osutils,
 
27
    tests,
 
28
    trace,
 
29
    versionedfile,
 
30
    )
 
31
from bzrlib.osutils import sha_string
 
32
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
 
33
 
 
34
 
 
35
def load_tests(standard_tests, module, loader):
 
36
    """Parameterize tests for all versions of groupcompress."""
 
37
    to_adapt, result = tests.split_suite_by_condition(
 
38
        standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
 
39
    scenarios = [
 
40
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
 
41
        ]
 
42
    if CompiledGroupCompressFeature.available():
 
43
        scenarios.append(('C',
 
44
            {'compressor': groupcompress.PyrexGroupCompressor}))
 
45
    return tests.multiply_tests(to_adapt, scenarios, result)
 
46
 
 
47
 
 
48
class TestGroupCompressor(tests.TestCase):
 
49
 
 
50
    def _chunks_to_repr_lines(self, chunks):
 
51
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
 
52
 
 
53
    def assertEqualDiffEncoded(self, expected, actual):
 
54
        """Compare the actual content to the expected content.
 
55
 
 
56
        :param expected: A group of chunks that we expect to see
 
57
        :param actual: The measured 'chunks'
 
58
 
 
59
        We will transform the chunks back into lines, and then run 'repr()'
 
60
        over them to handle non-ascii characters.
 
61
        """
 
62
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
 
63
                             self._chunks_to_repr_lines(actual))
 
64
 
 
65
 
 
66
class TestAllGroupCompressors(TestGroupCompressor):
 
67
    """Tests for GroupCompressor"""
 
68
 
 
69
    compressor = None # Set by multiply_tests
 
70
 
 
71
    def test_empty_delta(self):
 
72
        compressor = self.compressor()
 
73
        self.assertEqual([], compressor.chunks)
 
74
 
 
75
    def test_one_nosha_delta(self):
 
76
        # diff against NUKK
 
77
        compressor = self.compressor()
 
78
        sha1, start_point, end_point, _ = compressor.compress(('label',),
 
79
            'strange\ncommon\n', None)
 
80
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
 
81
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
 
82
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
 
83
        self.assertEqual(0, start_point)
 
84
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
85
 
 
86
    def test_empty_content(self):
 
87
        compressor = self.compressor()
 
88
        # Adding empty bytes should return the 'null' record
 
89
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
 
90
                                                                 '', None)
 
91
        self.assertEqual(0, start_point)
 
92
        self.assertEqual(0, end_point)
 
93
        self.assertEqual('fulltext', kind)
 
94
        self.assertEqual(groupcompress._null_sha1, sha1)
 
95
        self.assertEqual(0, compressor.endpoint)
 
96
        self.assertEqual([], compressor.chunks)
 
97
        # Even after adding some content
 
98
        compressor.compress(('content',), 'some\nbytes\n', None)
 
99
        self.assertTrue(compressor.endpoint > 0)
 
100
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
 
101
                                                                 '', None)
 
102
        self.assertEqual(0, start_point)
 
103
        self.assertEqual(0, end_point)
 
104
        self.assertEqual('fulltext', kind)
 
105
        self.assertEqual(groupcompress._null_sha1, sha1)
 
106
 
 
107
    def test_extract_from_compressor(self):
 
108
        # Knit fetching will try to reconstruct texts locally which results in
 
109
        # reading something that is in the compressor stream already.
 
110
        compressor = self.compressor()
 
111
        sha1_1, _, _, _ = compressor.compress(('label',),
 
112
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
113
        expected_lines = list(compressor.chunks)
 
114
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
 
115
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
116
        # get the first out
 
117
        self.assertEqual(('strange\ncommon long line\n'
 
118
                          'that needs a 16 byte match\n', sha1_1),
 
119
                         compressor.extract(('label',)))
 
120
        # and the second
 
121
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
 
122
                          'different\n', sha1_2),
 
123
                         compressor.extract(('newlabel',)))
 
124
 
 
125
    def test_pop_last(self):
 
126
        compressor = self.compressor()
 
127
        _, _, _, _ = compressor.compress(('key1',),
 
128
            'some text\nfor the first entry\n', None)
 
129
        expected_lines = list(compressor.chunks)
 
130
        _, _, _, _ = compressor.compress(('key2',),
 
131
            'some text\nfor the second entry\n', None)
 
132
        compressor.pop_last()
 
133
        self.assertEqual(expected_lines, compressor.chunks)
 
134
 
 
135
 
 
136
class TestPyrexGroupCompressor(TestGroupCompressor):
 
137
 
 
138
    _test_needs_features = [CompiledGroupCompressFeature]
 
139
    compressor = groupcompress.PyrexGroupCompressor
 
140
 
 
141
    def test_stats(self):
 
142
        compressor = self.compressor()
 
143
        compressor.compress(('label',),
 
144
                            'strange\n'
 
145
                            'common very very long line\n'
 
146
                            'plus more text\n', None)
 
147
        compressor.compress(('newlabel',),
 
148
                            'common very very long line\n'
 
149
                            'plus more text\n'
 
150
                            'different\n'
 
151
                            'moredifferent\n', None)
 
152
        compressor.compress(('label3',),
 
153
                            'new\n'
 
154
                            'common very very long line\n'
 
155
                            'plus more text\n'
 
156
                            'different\n'
 
157
                            'moredifferent\n', None)
 
158
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
 
159
 
 
160
    def test_two_nosha_delta(self):
 
161
        compressor = self.compressor()
 
162
        sha1_1, _, _, _ = compressor.compress(('label',),
 
163
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
164
        expected_lines = list(compressor.chunks)
 
165
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
 
166
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
167
        self.assertEqual(sha_string('common long line\n'
 
168
                                    'that needs a 16 byte match\n'
 
169
                                    'different\n'), sha1_2)
 
170
        expected_lines.extend([
 
171
            # 'delta', delta length
 
172
            'd\x0f',
 
173
            # source and target length
 
174
            '\x36',
 
175
            # copy the line common
 
176
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
177
            # add the line different, and the trailing newline
 
178
            '\x0adifferent\n', # insert 10 bytes
 
179
            ])
 
180
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
181
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
182
 
 
183
    def test_three_nosha_delta(self):
 
184
        # The first interesting test: make a change that should use lines from
 
185
        # both parents.
 
186
        compressor = self.compressor()
 
187
        sha1_1, _, _, _ = compressor.compress(('label',),
 
188
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
189
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
 
190
            'different\nmoredifferent\nand then some more\n', None)
 
191
        expected_lines = list(compressor.chunks)
 
192
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
 
193
            'new\ncommon very very long line\nwith some extra text\n'
 
194
            'different\nmoredifferent\nand then some more\n',
 
195
            None)
 
196
        self.assertEqual(
 
197
            sha_string('new\ncommon very very long line\nwith some extra text\n'
 
198
                       'different\nmoredifferent\nand then some more\n'),
 
199
            sha1_3)
 
200
        expected_lines.extend([
 
201
            # 'delta', delta length
 
202
            'd\x0b',
 
203
            # source and target length
 
204
            '\x5f'
 
205
            # insert new
 
206
            '\x03new',
 
207
            # Copy of first parent 'common' range
 
208
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
 
209
            # Copy of second parent 'different' range
 
210
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
211
            ])
 
212
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
213
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
214
 
 
215
 
 
216
class TestPythonGroupCompressor(TestGroupCompressor):
 
217
 
 
218
    compressor = groupcompress.PythonGroupCompressor
 
219
 
 
220
    def test_stats(self):
 
221
        compressor = self.compressor()
 
222
        compressor.compress(('label',),
 
223
                            'strange\n'
 
224
                            'common very very long line\n'
 
225
                            'plus more text\n', None)
 
226
        compressor.compress(('newlabel',),
 
227
                            'common very very long line\n'
 
228
                            'plus more text\n'
 
229
                            'different\n'
 
230
                            'moredifferent\n', None)
 
231
        compressor.compress(('label3',),
 
232
                            'new\n'
 
233
                            'common very very long line\n'
 
234
                            'plus more text\n'
 
235
                            'different\n'
 
236
                            'moredifferent\n', None)
 
237
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
 
238
 
 
239
    def test_two_nosha_delta(self):
 
240
        compressor = self.compressor()
 
241
        sha1_1, _, _, _ = compressor.compress(('label',),
 
242
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
243
        expected_lines = list(compressor.chunks)
 
244
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
 
245
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
246
        self.assertEqual(sha_string('common long line\n'
 
247
                                    'that needs a 16 byte match\n'
 
248
                                    'different\n'), sha1_2)
 
249
        expected_lines.extend([
 
250
            # 'delta', delta length
 
251
            'd\x0f',
 
252
            # target length
 
253
            '\x36',
 
254
            # copy the line common
 
255
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
256
            # add the line different, and the trailing newline
 
257
            '\x0adifferent\n', # insert 10 bytes
 
258
            ])
 
259
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
260
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
261
 
 
262
    def test_three_nosha_delta(self):
 
263
        # The first interesting test: make a change that should use lines from
 
264
        # both parents.
 
265
        compressor = self.compressor()
 
266
        sha1_1, _, _, _ = compressor.compress(('label',),
 
267
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
268
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
 
269
            'different\nmoredifferent\nand then some more\n', None)
 
270
        expected_lines = list(compressor.chunks)
 
271
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
 
272
            'new\ncommon very very long line\nwith some extra text\n'
 
273
            'different\nmoredifferent\nand then some more\n',
 
274
            None)
 
275
        self.assertEqual(
 
276
            sha_string('new\ncommon very very long line\nwith some extra text\n'
 
277
                       'different\nmoredifferent\nand then some more\n'),
 
278
            sha1_3)
 
279
        expected_lines.extend([
 
280
            # 'delta', delta length
 
281
            'd\x0c',
 
282
            # target length
 
283
            '\x5f'
 
284
            # insert new
 
285
            '\x04new\n',
 
286
            # Copy of first parent 'common' range
 
287
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
 
288
            # Copy of second parent 'different' range
 
289
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
290
            ])
 
291
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
292
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
293
 
 
294
 
 
295
class TestGroupCompressBlock(tests.TestCase):
 
296
 
 
297
    def make_block(self, key_to_text):
 
298
        """Create a GroupCompressBlock, filling it with the given texts."""
 
299
        compressor = groupcompress.GroupCompressor()
 
300
        start = 0
 
301
        for key in sorted(key_to_text):
 
302
            compressor.compress(key, key_to_text[key], None)
 
303
        locs = dict((key, (start, end)) for key, (start, _, end, _)
 
304
                    in compressor.labels_deltas.iteritems())
 
305
        block = compressor.flush()
 
306
        raw_bytes = block.to_bytes()
 
307
        # Go through from_bytes(to_bytes()) so that we start with a compressed
 
308
        # content object
 
309
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
 
310
 
 
311
    def test_from_empty_bytes(self):
 
312
        self.assertRaises(ValueError,
 
313
                          groupcompress.GroupCompressBlock.from_bytes, '')
 
314
 
 
315
    def test_from_minimal_bytes(self):
 
316
        block = groupcompress.GroupCompressBlock.from_bytes(
 
317
            'gcb1z\n0\n0\n')
 
318
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
 
319
        self.assertIs(None, block._content)
 
320
        self.assertEqual('', block._z_content)
 
321
        block._ensure_content()
 
322
        self.assertEqual('', block._content)
 
323
        self.assertEqual('', block._z_content)
 
324
        block._ensure_content() # Ensure content is safe to call 2x
 
325
 
 
326
    def test_from_invalid(self):
 
327
        self.assertRaises(ValueError,
 
328
                          groupcompress.GroupCompressBlock.from_bytes,
 
329
                          'this is not a valid header')
 
330
 
 
331
    def test_from_bytes(self):
 
332
        content = ('a tiny bit of content\n')
 
333
        z_content = zlib.compress(content)
 
334
        z_bytes = (
 
335
            'gcb1z\n' # group compress block v1 plain
 
336
            '%d\n' # Length of compressed content
 
337
            '%d\n' # Length of uncompressed content
 
338
            '%s'   # Compressed content
 
339
            ) % (len(z_content), len(content), z_content)
 
340
        block = groupcompress.GroupCompressBlock.from_bytes(
 
341
            z_bytes)
 
342
        self.assertEqual(z_content, block._z_content)
 
343
        self.assertIs(None, block._content)
 
344
        self.assertEqual(len(z_content), block._z_content_length)
 
345
        self.assertEqual(len(content), block._content_length)
 
346
        block._ensure_content()
 
347
        self.assertEqual(z_content, block._z_content)
 
348
        self.assertEqual(content, block._content)
 
349
 
 
350
    def test_to_bytes(self):
 
351
        content = ('this is some content\n'
 
352
                   'this content will be compressed\n')
 
353
        gcb = groupcompress.GroupCompressBlock()
 
354
        gcb.set_content(content)
 
355
        bytes = gcb.to_bytes()
 
356
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
 
357
        self.assertEqual(gcb._content_length, len(content))
 
358
        expected_header =('gcb1z\n' # group compress block v1 zlib
 
359
                          '%d\n' # Length of compressed content
 
360
                          '%d\n' # Length of uncompressed content
 
361
                         ) % (gcb._z_content_length, gcb._content_length)
 
362
        self.assertStartsWith(bytes, expected_header)
 
363
        remaining_bytes = bytes[len(expected_header):]
 
364
        raw_bytes = zlib.decompress(remaining_bytes)
 
365
        self.assertEqual(content, raw_bytes)
 
366
 
 
367
        # we should get the same results if using the chunked version
 
368
        gcb = groupcompress.GroupCompressBlock()
 
369
        gcb.set_chunked_content(['this is some content\n'
 
370
                                 'this content will be compressed\n'],
 
371
                                 len(content))
 
372
        old_bytes = bytes
 
373
        bytes = gcb.to_bytes()
 
374
        self.assertEqual(old_bytes, bytes)
 
375
 
 
376
    def test_partial_decomp(self):
 
377
        content_chunks = []
 
378
        # We need a sufficient amount of data so that zlib.decompress has
 
379
        # partial decompression to work with. Most auto-generated data
 
380
        # compresses a bit too well, we want a combination, so we combine a sha
 
381
        # hash with compressible data.
 
382
        for i in xrange(2048):
 
383
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
384
            content_chunks.append(next_content)
 
385
            next_sha1 = osutils.sha_string(next_content)
 
386
            content_chunks.append(next_sha1 + '\n')
 
387
        content = ''.join(content_chunks)
 
388
        self.assertEqual(158634, len(content))
 
389
        z_content = zlib.compress(content)
 
390
        self.assertEqual(57182, len(z_content))
 
391
        block = groupcompress.GroupCompressBlock()
 
392
        block._z_content = z_content
 
393
        block._z_content_length = len(z_content)
 
394
        block._compressor_name = 'zlib'
 
395
        block._content_length = 158634
 
396
        self.assertIs(None, block._content)
 
397
        block._ensure_content(100)
 
398
        self.assertIsNot(None, block._content)
 
399
        # We have decompressed at least 100 bytes
 
400
        self.assertTrue(len(block._content) >= 100)
 
401
        # We have not decompressed the whole content
 
402
        self.assertTrue(len(block._content) < 158634)
 
403
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
404
        # ensuring content that we already have shouldn't cause any more data
 
405
        # to be extracted
 
406
        cur_len = len(block._content)
 
407
        block._ensure_content(cur_len - 10)
 
408
        self.assertEqual(cur_len, len(block._content))
 
409
        # Now we want a bit more content
 
410
        cur_len += 10
 
411
        block._ensure_content(cur_len)
 
412
        self.assertTrue(len(block._content) >= cur_len)
 
413
        self.assertTrue(len(block._content) < 158634)
 
414
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
415
        # And now lets finish
 
416
        block._ensure_content(158634)
 
417
        self.assertEqualDiff(content, block._content)
 
418
        # And the decompressor is finalized
 
419
        self.assertIs(None, block._z_content_decompressor)
 
420
 
 
421
    def test_partial_decomp_no_known_length(self):
 
422
        content_chunks = []
 
423
        for i in xrange(2048):
 
424
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
425
            content_chunks.append(next_content)
 
426
            next_sha1 = osutils.sha_string(next_content)
 
427
            content_chunks.append(next_sha1 + '\n')
 
428
        content = ''.join(content_chunks)
 
429
        self.assertEqual(158634, len(content))
 
430
        z_content = zlib.compress(content)
 
431
        self.assertEqual(57182, len(z_content))
 
432
        block = groupcompress.GroupCompressBlock()
 
433
        block._z_content = z_content
 
434
        block._z_content_length = len(z_content)
 
435
        block._compressor_name = 'zlib'
 
436
        block._content_length = None # Don't tell the decompressed length
 
437
        self.assertIs(None, block._content)
 
438
        block._ensure_content(100)
 
439
        self.assertIsNot(None, block._content)
 
440
        # We have decompressed at least 100 bytes
 
441
        self.assertTrue(len(block._content) >= 100)
 
442
        # We have not decompressed the whole content
 
443
        self.assertTrue(len(block._content) < 158634)
 
444
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
445
        # ensuring content that we already have shouldn't cause any more data
 
446
        # to be extracted
 
447
        cur_len = len(block._content)
 
448
        block._ensure_content(cur_len - 10)
 
449
        self.assertEqual(cur_len, len(block._content))
 
450
        # Now we want a bit more content
 
451
        cur_len += 10
 
452
        block._ensure_content(cur_len)
 
453
        self.assertTrue(len(block._content) >= cur_len)
 
454
        self.assertTrue(len(block._content) < 158634)
 
455
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
456
        # And now lets finish
 
457
        block._ensure_content()
 
458
        self.assertEqualDiff(content, block._content)
 
459
        # And the decompressor is finalized
 
460
        self.assertIs(None, block._z_content_decompressor)
 
461
 
 
462
    def test__dump(self):
 
463
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
 
464
        key_to_text = {('1',): dup_content + '1 unique\n',
 
465
                       ('2',): dup_content + '2 extra special\n'}
 
466
        locs, block = self.make_block(key_to_text)
 
467
        self.assertEqual([('f', len(key_to_text[('1',)])),
 
468
                          ('d', 21, len(key_to_text[('2',)]),
 
469
                           [('c', 2, len(dup_content)),
 
470
                            ('i', len('2 extra special\n'), '')
 
471
                           ]),
 
472
                         ], block._dump())
 
473
 
 
474
 
 
475
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
 
476
 
 
477
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
 
478
                     dir='.', inconsistency_fatal=True):
 
479
        t = self.get_transport(dir)
 
480
        t.ensure_base()
 
481
        vf = groupcompress.make_pack_factory(graph=create_graph,
 
482
            delta=False, keylength=keylength,
 
483
            inconsistency_fatal=inconsistency_fatal)(t)
 
484
        if do_cleanup:
 
485
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
 
486
        return vf
 
487
 
 
488
 
 
489
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
 
490
 
 
491
    def make_g_index(self, name, ref_lists=0, nodes=[]):
 
492
        builder = btree_index.BTreeBuilder(ref_lists)
 
493
        for node, references, value in nodes:
 
494
            builder.add_node(node, references, value)
 
495
        stream = builder.finish()
 
496
        trans = self.get_transport()
 
497
        size = trans.put_file(name, stream)
 
498
        return btree_index.BTreeGraphIndex(trans, name, size)
 
499
 
 
500
    def make_g_index_missing_parent(self):
 
501
        graph_index = self.make_g_index('missing_parent', 1,
 
502
            [(('parent', ), '2 78 2 10', ([],)),
 
503
             (('tip', ), '2 78 2 10',
 
504
              ([('parent', ), ('missing-parent', )],)),
 
505
              ])
 
506
        return graph_index
 
507
 
 
508
    def test_get_record_stream_as_requested(self):
 
509
        # Consider promoting 'as-requested' to general availability, and
 
510
        # make this a VF interface test
 
511
        vf = self.make_test_vf(False, dir='source')
 
512
        vf.add_lines(('a',), (), ['lines\n'])
 
513
        vf.add_lines(('b',), (), ['lines\n'])
 
514
        vf.add_lines(('c',), (), ['lines\n'])
 
515
        vf.add_lines(('d',), (), ['lines\n'])
 
516
        vf.writer.end()
 
517
        keys = [record.key for record in vf.get_record_stream(
 
518
                    [('a',), ('b',), ('c',), ('d',)],
 
519
                    'as-requested', False)]
 
520
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
521
        keys = [record.key for record in vf.get_record_stream(
 
522
                    [('b',), ('a',), ('d',), ('c',)],
 
523
                    'as-requested', False)]
 
524
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
525
 
 
526
        # It should work even after being repacked into another VF
 
527
        vf2 = self.make_test_vf(False, dir='target')
 
528
        vf2.insert_record_stream(vf.get_record_stream(
 
529
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
 
530
        vf2.writer.end()
 
531
 
 
532
        keys = [record.key for record in vf2.get_record_stream(
 
533
                    [('a',), ('b',), ('c',), ('d',)],
 
534
                    'as-requested', False)]
 
535
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
536
        keys = [record.key for record in vf2.get_record_stream(
 
537
                    [('b',), ('a',), ('d',), ('c',)],
 
538
                    'as-requested', False)]
 
539
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
540
 
 
541
    def test_insert_record_stream_re_uses_blocks(self):
 
542
        vf = self.make_test_vf(True, dir='source')
 
543
        def grouped_stream(revision_ids, first_parents=()):
 
544
            parents = first_parents
 
545
            for revision_id in revision_ids:
 
546
                key = (revision_id,)
 
547
                record = versionedfile.FulltextContentFactory(
 
548
                    key, parents, None,
 
549
                    'some content that is\n'
 
550
                    'identical except for\n'
 
551
                    'revision_id:%s\n' % (revision_id,))
 
552
                yield record
 
553
                parents = (key,)
 
554
        # One group, a-d
 
555
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
556
        # Second group, e-h
 
557
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
558
                                               first_parents=(('d',),)))
 
559
        block_bytes = {}
 
560
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
 
561
                                      'unordered', False)
 
562
        num_records = 0
 
563
        for record in stream:
 
564
            if record.key in [('a',), ('e',)]:
 
565
                self.assertEqual('groupcompress-block', record.storage_kind)
 
566
            else:
 
567
                self.assertEqual('groupcompress-block-ref',
 
568
                                 record.storage_kind)
 
569
            block_bytes[record.key] = record._manager._block._z_content
 
570
            num_records += 1
 
571
        self.assertEqual(8, num_records)
 
572
        for r in 'abcd':
 
573
            key = (r,)
 
574
            self.assertIs(block_bytes[key], block_bytes[('a',)])
 
575
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
 
576
        for r in 'efgh':
 
577
            key = (r,)
 
578
            self.assertIs(block_bytes[key], block_bytes[('e',)])
 
579
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
 
580
        # Now copy the blocks into another vf, and ensure that the blocks are
 
581
        # preserved without creating new entries
 
582
        vf2 = self.make_test_vf(True, dir='target')
 
583
        # ordering in 'groupcompress' order, should actually swap the groups in
 
584
        # the target vf, but the groups themselves should not be disturbed.
 
585
        vf2.insert_record_stream(vf.get_record_stream(
 
586
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
 
587
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
588
                                       'groupcompress', False)
 
589
        vf2.writer.end()
 
590
        num_records = 0
 
591
        for record in stream:
 
592
            num_records += 1
 
593
            self.assertEqual(block_bytes[record.key],
 
594
                             record._manager._block._z_content)
 
595
        self.assertEqual(8, num_records)
 
596
 
 
597
    def test__insert_record_stream_no_reuse_block(self):
 
598
        vf = self.make_test_vf(True, dir='source')
 
599
        def grouped_stream(revision_ids, first_parents=()):
 
600
            parents = first_parents
 
601
            for revision_id in revision_ids:
 
602
                key = (revision_id,)
 
603
                record = versionedfile.FulltextContentFactory(
 
604
                    key, parents, None,
 
605
                    'some content that is\n'
 
606
                    'identical except for\n'
 
607
                    'revision_id:%s\n' % (revision_id,))
 
608
                yield record
 
609
                parents = (key,)
 
610
        # One group, a-d
 
611
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
612
        # Second group, e-h
 
613
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
614
                                               first_parents=(('d',),)))
 
615
        vf.writer.end()
 
616
        self.assertEqual(8, len(list(vf.get_record_stream(
 
617
                                        [(r,) for r in 'abcdefgh'],
 
618
                                        'unordered', False))))
 
619
        # Now copy the blocks into another vf, and ensure that the blocks are
 
620
        # preserved without creating new entries
 
621
        vf2 = self.make_test_vf(True, dir='target')
 
622
        # ordering in 'groupcompress' order, should actually swap the groups in
 
623
        # the target vf, but the groups themselves should not be disturbed.
 
624
        list(vf2._insert_record_stream(vf.get_record_stream(
 
625
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
 
626
            reuse_blocks=False))
 
627
        vf2.writer.end()
 
628
        # After inserting with reuse_blocks=False, we should have everything in
 
629
        # a single new block.
 
630
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
631
                                       'groupcompress', False)
 
632
        block = None
 
633
        for record in stream:
 
634
            if block is None:
 
635
                block = record._manager._block
 
636
            else:
 
637
                self.assertIs(block, record._manager._block)
 
638
 
 
639
    def test_add_missing_noncompression_parent_unvalidated_index(self):
 
640
        unvalidated = self.make_g_index_missing_parent()
 
641
        combined = _mod_index.CombinedGraphIndex([unvalidated])
 
642
        index = groupcompress._GCGraphIndex(combined,
 
643
            is_locked=lambda: True, parents=True,
 
644
            track_external_parent_refs=True)
 
645
        index.scan_unvalidated_index(unvalidated)
 
646
        self.assertEqual(
 
647
            frozenset([('missing-parent',)]), index.get_missing_parents())
 
648
 
 
649
    def test_track_external_parent_refs(self):
 
650
        g_index = self.make_g_index('empty', 1, [])
 
651
        mod_index = btree_index.BTreeBuilder(1, 1)
 
652
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
 
653
        index = groupcompress._GCGraphIndex(combined,
 
654
            is_locked=lambda: True, parents=True,
 
655
            add_callback=mod_index.add_nodes,
 
656
            track_external_parent_refs=True)
 
657
        index.add_records([
 
658
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
 
659
        self.assertEqual(
 
660
            frozenset([('parent-1',), ('parent-2',)]),
 
661
            index.get_missing_parents())
 
662
 
 
663
    def make_source_with_b(self, a_parent, path):
 
664
        source = self.make_test_vf(True, dir=path)
 
665
        source.add_lines(('a',), (), ['lines\n'])
 
666
        if a_parent:
 
667
            b_parents = (('a',),)
 
668
        else:
 
669
            b_parents = ()
 
670
        source.add_lines(('b',), b_parents, ['lines\n'])
 
671
        return source
 
672
 
 
673
    def do_inconsistent_inserts(self, inconsistency_fatal):
 
674
        target = self.make_test_vf(True, dir='target',
 
675
                                   inconsistency_fatal=inconsistency_fatal)
 
676
        for x in range(2):
 
677
            source = self.make_source_with_b(x==1, 'source%s' % x)
 
678
            target.insert_record_stream(source.get_record_stream(
 
679
                [('b',)], 'unordered', False))
 
680
 
 
681
    def test_inconsistent_redundant_inserts_warn(self):
 
682
        """Should not insert a record that is already present."""
 
683
        warnings = []
 
684
        def warning(template, args):
 
685
            warnings.append(template % args)
 
686
        _trace_warning = trace.warning
 
687
        trace.warning = warning
 
688
        try:
 
689
            self.do_inconsistent_inserts(inconsistency_fatal=False)
 
690
        finally:
 
691
            trace.warning = _trace_warning
 
692
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
 
693
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
 
694
                         warnings)
 
695
 
 
696
    def test_inconsistent_redundant_inserts_raises(self):
 
697
        e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
 
698
                              inconsistency_fatal=True)
 
699
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
 
700
                              " in add_records:"
 
701
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
 
702
                              " 0 8', \(\(\('a',\),\),\)\)")
 
703
 
 
704
 
 
705
class TestLazyGroupCompress(tests.TestCaseWithTransport):
 
706
 
 
707
    _texts = {
 
708
        ('key1',): "this is a text\n"
 
709
                   "with a reasonable amount of compressible bytes\n",
 
710
        ('key2',): "another text\n"
 
711
                   "with a reasonable amount of compressible bytes\n",
 
712
        ('key3',): "yet another text which won't be extracted\n"
 
713
                   "with a reasonable amount of compressible bytes\n",
 
714
        ('key4',): "this will be extracted\n"
 
715
                   "but references most of its bytes from\n"
 
716
                   "yet another text which won't be extracted\n"
 
717
                   "with a reasonable amount of compressible bytes\n",
 
718
    }
 
719
    def make_block(self, key_to_text):
 
720
        """Create a GroupCompressBlock, filling it with the given texts."""
 
721
        compressor = groupcompress.GroupCompressor()
 
722
        start = 0
 
723
        for key in sorted(key_to_text):
 
724
            compressor.compress(key, key_to_text[key], None)
 
725
        locs = dict((key, (start, end)) for key, (start, _, end, _)
 
726
                    in compressor.labels_deltas.iteritems())
 
727
        block = compressor.flush()
 
728
        raw_bytes = block.to_bytes()
 
729
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
 
730
 
 
731
    def add_key_to_manager(self, key, locations, block, manager):
 
732
        start, end = locations[key]
 
733
        manager.add_factory(key, (), start, end)
 
734
 
 
735
    def test_get_fulltexts(self):
 
736
        locations, block = self.make_block(self._texts)
 
737
        manager = groupcompress._LazyGroupContentManager(block)
 
738
        self.add_key_to_manager(('key1',), locations, block, manager)
 
739
        self.add_key_to_manager(('key2',), locations, block, manager)
 
740
        result_order = []
 
741
        for record in manager.get_record_stream():
 
742
            result_order.append(record.key)
 
743
            text = self._texts[record.key]
 
744
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
745
        self.assertEqual([('key1',), ('key2',)], result_order)
 
746
 
 
747
        # If we build the manager in the opposite order, we should get them
 
748
        # back in the opposite order
 
749
        manager = groupcompress._LazyGroupContentManager(block)
 
750
        self.add_key_to_manager(('key2',), locations, block, manager)
 
751
        self.add_key_to_manager(('key1',), locations, block, manager)
 
752
        result_order = []
 
753
        for record in manager.get_record_stream():
 
754
            result_order.append(record.key)
 
755
            text = self._texts[record.key]
 
756
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
757
        self.assertEqual([('key2',), ('key1',)], result_order)
 
758
 
 
759
    def test__wire_bytes_no_keys(self):
 
760
        locations, block = self.make_block(self._texts)
 
761
        manager = groupcompress._LazyGroupContentManager(block)
 
762
        wire_bytes = manager._wire_bytes()
 
763
        block_length = len(block.to_bytes())
 
764
        # We should have triggered a strip, since we aren't using any content
 
765
        stripped_block = manager._block.to_bytes()
 
766
        self.assertTrue(block_length > len(stripped_block))
 
767
        empty_z_header = zlib.compress('')
 
768
        self.assertEqual('groupcompress-block\n'
 
769
                         '8\n' # len(compress(''))
 
770
                         '0\n' # len('')
 
771
                         '%d\n'# compressed block len
 
772
                         '%s'  # zheader
 
773
                         '%s'  # block
 
774
                         % (len(stripped_block), empty_z_header,
 
775
                            stripped_block),
 
776
                         wire_bytes)
 
777
 
 
778
    def test__wire_bytes(self):
 
779
        locations, block = self.make_block(self._texts)
 
780
        manager = groupcompress._LazyGroupContentManager(block)
 
781
        self.add_key_to_manager(('key1',), locations, block, manager)
 
782
        self.add_key_to_manager(('key4',), locations, block, manager)
 
783
        block_bytes = block.to_bytes()
 
784
        wire_bytes = manager._wire_bytes()
 
785
        (storage_kind, z_header_len, header_len,
 
786
         block_len, rest) = wire_bytes.split('\n', 4)
 
787
        z_header_len = int(z_header_len)
 
788
        header_len = int(header_len)
 
789
        block_len = int(block_len)
 
790
        self.assertEqual('groupcompress-block', storage_kind)
 
791
        self.assertEqual(33, z_header_len)
 
792
        self.assertEqual(25, header_len)
 
793
        self.assertEqual(len(block_bytes), block_len)
 
794
        z_header = rest[:z_header_len]
 
795
        header = zlib.decompress(z_header)
 
796
        self.assertEqual(header_len, len(header))
 
797
        entry1 = locations[('key1',)]
 
798
        entry4 = locations[('key4',)]
 
799
        self.assertEqualDiff('key1\n'
 
800
                             '\n'  # no parents
 
801
                             '%d\n' # start offset
 
802
                             '%d\n' # end offset
 
803
                             'key4\n'
 
804
                             '\n'
 
805
                             '%d\n'
 
806
                             '%d\n'
 
807
                             % (entry1[0], entry1[1],
 
808
                                entry4[0], entry4[1]),
 
809
                            header)
 
810
        z_block = rest[z_header_len:]
 
811
        self.assertEqual(block_bytes, z_block)
 
812
 
 
813
    def test_from_bytes(self):
 
814
        locations, block = self.make_block(self._texts)
 
815
        manager = groupcompress._LazyGroupContentManager(block)
 
816
        self.add_key_to_manager(('key1',), locations, block, manager)
 
817
        self.add_key_to_manager(('key4',), locations, block, manager)
 
818
        wire_bytes = manager._wire_bytes()
 
819
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
 
820
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
 
821
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
 
822
        self.assertEqual(2, len(manager._factories))
 
823
        self.assertEqual(block._z_content, manager._block._z_content)
 
824
        result_order = []
 
825
        for record in manager.get_record_stream():
 
826
            result_order.append(record.key)
 
827
            text = self._texts[record.key]
 
828
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
829
        self.assertEqual([('key1',), ('key4',)], result_order)
 
830
 
 
831
    def test__check_rebuild_no_changes(self):
 
832
        locations, block = self.make_block(self._texts)
 
833
        manager = groupcompress._LazyGroupContentManager(block)
 
834
        # Request all the keys, which ensures that we won't rebuild
 
835
        self.add_key_to_manager(('key1',), locations, block, manager)
 
836
        self.add_key_to_manager(('key2',), locations, block, manager)
 
837
        self.add_key_to_manager(('key3',), locations, block, manager)
 
838
        self.add_key_to_manager(('key4',), locations, block, manager)
 
839
        manager._check_rebuild_block()
 
840
        self.assertIs(block, manager._block)
 
841
 
 
842
    def test__check_rebuild_only_one(self):
 
843
        locations, block = self.make_block(self._texts)
 
844
        manager = groupcompress._LazyGroupContentManager(block)
 
845
        # Request just the first key, which should trigger a 'strip' action
 
846
        self.add_key_to_manager(('key1',), locations, block, manager)
 
847
        manager._check_rebuild_block()
 
848
        self.assertIsNot(block, manager._block)
 
849
        self.assertTrue(block._content_length > manager._block._content_length)
 
850
        # We should be able to still get the content out of this block, though
 
851
        # it should only have 1 entry
 
852
        for record in manager.get_record_stream():
 
853
            self.assertEqual(('key1',), record.key)
 
854
            self.assertEqual(self._texts[record.key],
 
855
                             record.get_bytes_as('fulltext'))
 
856
 
 
857
    def test__check_rebuild_middle(self):
 
858
        locations, block = self.make_block(self._texts)
 
859
        manager = groupcompress._LazyGroupContentManager(block)
 
860
        # Request a small key in the middle should trigger a 'rebuild'
 
861
        self.add_key_to_manager(('key4',), locations, block, manager)
 
862
        manager._check_rebuild_block()
 
863
        self.assertIsNot(block, manager._block)
 
864
        self.assertTrue(block._content_length > manager._block._content_length)
 
865
        for record in manager.get_record_stream():
 
866
            self.assertEqual(('key4',), record.key)
 
867
            self.assertEqual(self._texts[record.key],
 
868
                             record.get_bytes_as('fulltext'))