~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

  • Committer: Robert Collins
  • Date: 2005-08-23 06:52:09 UTC
  • mto: (974.1.50) (1185.1.10) (1092.3.1)
  • mto: This revision was merged to the branch mainline in revision 1139.
  • Revision ID: robertc@robertcollins.net-20050823065209-81cd5962c401751b
move io redirection into each test case from the global runner

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2008, 2009 Canonical Ltd
2
 
#
3
 
# This program is free software; you can redistribute it and/or modify
4
 
# it under the terms of the GNU General Public License as published by
5
 
# the Free Software Foundation; either version 2 of the License, or
6
 
# (at your option) any later version.
7
 
#
8
 
# This program is distributed in the hope that it will be useful,
9
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
 
# GNU General Public License for more details.
12
 
#
13
 
# You should have received a copy of the GNU General Public License
14
 
# along with this program; if not, write to the Free Software
15
 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
 
 
17
 
"""Tests for group compression."""
18
 
 
19
 
import zlib
20
 
 
21
 
from bzrlib import (
22
 
    btree_index,
23
 
    groupcompress,
24
 
    errors,
25
 
    index as _mod_index,
26
 
    osutils,
27
 
    tests,
28
 
    trace,
29
 
    versionedfile,
30
 
    )
31
 
from bzrlib.osutils import sha_string
32
 
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
33
 
 
34
 
 
35
 
def load_tests(standard_tests, module, loader):
36
 
    """Parameterize tests for all versions of groupcompress."""
37
 
    to_adapt, result = tests.split_suite_by_condition(
38
 
        standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
39
 
    scenarios = [
40
 
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
41
 
        ]
42
 
    if CompiledGroupCompressFeature.available():
43
 
        scenarios.append(('C',
44
 
            {'compressor': groupcompress.PyrexGroupCompressor}))
45
 
    return tests.multiply_tests(to_adapt, scenarios, result)
46
 
 
47
 
 
48
 
class TestGroupCompressor(tests.TestCase):
49
 
 
50
 
    def _chunks_to_repr_lines(self, chunks):
51
 
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
52
 
 
53
 
    def assertEqualDiffEncoded(self, expected, actual):
54
 
        """Compare the actual content to the expected content.
55
 
 
56
 
        :param expected: A group of chunks that we expect to see
57
 
        :param actual: The measured 'chunks'
58
 
 
59
 
        We will transform the chunks back into lines, and then run 'repr()'
60
 
        over them to handle non-ascii characters.
61
 
        """
62
 
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
63
 
                             self._chunks_to_repr_lines(actual))
64
 
 
65
 
 
66
 
class TestAllGroupCompressors(TestGroupCompressor):
67
 
    """Tests for GroupCompressor"""
68
 
 
69
 
    compressor = None # Set by multiply_tests
70
 
 
71
 
    def test_empty_delta(self):
72
 
        compressor = self.compressor()
73
 
        self.assertEqual([], compressor.chunks)
74
 
 
75
 
    def test_one_nosha_delta(self):
76
 
        # diff against NUKK
77
 
        compressor = self.compressor()
78
 
        sha1, start_point, end_point, _ = compressor.compress(('label',),
79
 
            'strange\ncommon\n', None)
80
 
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
81
 
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
82
 
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
83
 
        self.assertEqual(0, start_point)
84
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
85
 
 
86
 
    def test_empty_content(self):
87
 
        compressor = self.compressor()
88
 
        # Adding empty bytes should return the 'null' record
89
 
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
90
 
                                                                 '', None)
91
 
        self.assertEqual(0, start_point)
92
 
        self.assertEqual(0, end_point)
93
 
        self.assertEqual('fulltext', kind)
94
 
        self.assertEqual(groupcompress._null_sha1, sha1)
95
 
        self.assertEqual(0, compressor.endpoint)
96
 
        self.assertEqual([], compressor.chunks)
97
 
        # Even after adding some content
98
 
        compressor.compress(('content',), 'some\nbytes\n', None)
99
 
        self.assertTrue(compressor.endpoint > 0)
100
 
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
101
 
                                                                 '', None)
102
 
        self.assertEqual(0, start_point)
103
 
        self.assertEqual(0, end_point)
104
 
        self.assertEqual('fulltext', kind)
105
 
        self.assertEqual(groupcompress._null_sha1, sha1)
106
 
 
107
 
    def test_extract_from_compressor(self):
108
 
        # Knit fetching will try to reconstruct texts locally which results in
109
 
        # reading something that is in the compressor stream already.
110
 
        compressor = self.compressor()
111
 
        sha1_1, _, _, _ = compressor.compress(('label',),
112
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
113
 
        expected_lines = list(compressor.chunks)
114
 
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
115
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
116
 
        # get the first out
117
 
        self.assertEqual(('strange\ncommon long line\n'
118
 
                          'that needs a 16 byte match\n', sha1_1),
119
 
                         compressor.extract(('label',)))
120
 
        # and the second
121
 
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
122
 
                          'different\n', sha1_2),
123
 
                         compressor.extract(('newlabel',)))
124
 
 
125
 
    def test_pop_last(self):
126
 
        compressor = self.compressor()
127
 
        _, _, _, _ = compressor.compress(('key1',),
128
 
            'some text\nfor the first entry\n', None)
129
 
        expected_lines = list(compressor.chunks)
130
 
        _, _, _, _ = compressor.compress(('key2',),
131
 
            'some text\nfor the second entry\n', None)
132
 
        compressor.pop_last()
133
 
        self.assertEqual(expected_lines, compressor.chunks)
134
 
 
135
 
 
136
 
class TestPyrexGroupCompressor(TestGroupCompressor):
137
 
 
138
 
    _test_needs_features = [CompiledGroupCompressFeature]
139
 
    compressor = groupcompress.PyrexGroupCompressor
140
 
 
141
 
    def test_stats(self):
142
 
        compressor = self.compressor()
143
 
        compressor.compress(('label',),
144
 
                            'strange\n'
145
 
                            'common very very long line\n'
146
 
                            'plus more text\n', None)
147
 
        compressor.compress(('newlabel',),
148
 
                            'common very very long line\n'
149
 
                            'plus more text\n'
150
 
                            'different\n'
151
 
                            'moredifferent\n', None)
152
 
        compressor.compress(('label3',),
153
 
                            'new\n'
154
 
                            'common very very long line\n'
155
 
                            'plus more text\n'
156
 
                            'different\n'
157
 
                            'moredifferent\n', None)
158
 
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
159
 
 
160
 
    def test_two_nosha_delta(self):
161
 
        compressor = self.compressor()
162
 
        sha1_1, _, _, _ = compressor.compress(('label',),
163
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
164
 
        expected_lines = list(compressor.chunks)
165
 
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
166
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
167
 
        self.assertEqual(sha_string('common long line\n'
168
 
                                    'that needs a 16 byte match\n'
169
 
                                    'different\n'), sha1_2)
170
 
        expected_lines.extend([
171
 
            # 'delta', delta length
172
 
            'd\x0f',
173
 
            # source and target length
174
 
            '\x36',
175
 
            # copy the line common
176
 
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
177
 
            # add the line different, and the trailing newline
178
 
            '\x0adifferent\n', # insert 10 bytes
179
 
            ])
180
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
181
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
182
 
 
183
 
    def test_three_nosha_delta(self):
184
 
        # The first interesting test: make a change that should use lines from
185
 
        # both parents.
186
 
        compressor = self.compressor()
187
 
        sha1_1, _, _, _ = compressor.compress(('label',),
188
 
            'strange\ncommon very very long line\nwith some extra text\n', None)
189
 
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
190
 
            'different\nmoredifferent\nand then some more\n', None)
191
 
        expected_lines = list(compressor.chunks)
192
 
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
193
 
            'new\ncommon very very long line\nwith some extra text\n'
194
 
            'different\nmoredifferent\nand then some more\n',
195
 
            None)
196
 
        self.assertEqual(
197
 
            sha_string('new\ncommon very very long line\nwith some extra text\n'
198
 
                       'different\nmoredifferent\nand then some more\n'),
199
 
            sha1_3)
200
 
        expected_lines.extend([
201
 
            # 'delta', delta length
202
 
            'd\x0b',
203
 
            # source and target length
204
 
            '\x5f'
205
 
            # insert new
206
 
            '\x03new',
207
 
            # Copy of first parent 'common' range
208
 
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
209
 
            # Copy of second parent 'different' range
210
 
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
211
 
            ])
212
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
213
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
214
 
 
215
 
 
216
 
class TestPythonGroupCompressor(TestGroupCompressor):
217
 
 
218
 
    compressor = groupcompress.PythonGroupCompressor
219
 
 
220
 
    def test_stats(self):
221
 
        compressor = self.compressor()
222
 
        compressor.compress(('label',),
223
 
                            'strange\n'
224
 
                            'common very very long line\n'
225
 
                            'plus more text\n', None)
226
 
        compressor.compress(('newlabel',),
227
 
                            'common very very long line\n'
228
 
                            'plus more text\n'
229
 
                            'different\n'
230
 
                            'moredifferent\n', None)
231
 
        compressor.compress(('label3',),
232
 
                            'new\n'
233
 
                            'common very very long line\n'
234
 
                            'plus more text\n'
235
 
                            'different\n'
236
 
                            'moredifferent\n', None)
237
 
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
238
 
 
239
 
    def test_two_nosha_delta(self):
240
 
        compressor = self.compressor()
241
 
        sha1_1, _, _, _ = compressor.compress(('label',),
242
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
243
 
        expected_lines = list(compressor.chunks)
244
 
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
245
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
246
 
        self.assertEqual(sha_string('common long line\n'
247
 
                                    'that needs a 16 byte match\n'
248
 
                                    'different\n'), sha1_2)
249
 
        expected_lines.extend([
250
 
            # 'delta', delta length
251
 
            'd\x0f',
252
 
            # target length
253
 
            '\x36',
254
 
            # copy the line common
255
 
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
256
 
            # add the line different, and the trailing newline
257
 
            '\x0adifferent\n', # insert 10 bytes
258
 
            ])
259
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
260
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
261
 
 
262
 
    def test_three_nosha_delta(self):
263
 
        # The first interesting test: make a change that should use lines from
264
 
        # both parents.
265
 
        compressor = self.compressor()
266
 
        sha1_1, _, _, _ = compressor.compress(('label',),
267
 
            'strange\ncommon very very long line\nwith some extra text\n', None)
268
 
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
269
 
            'different\nmoredifferent\nand then some more\n', None)
270
 
        expected_lines = list(compressor.chunks)
271
 
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
272
 
            'new\ncommon very very long line\nwith some extra text\n'
273
 
            'different\nmoredifferent\nand then some more\n',
274
 
            None)
275
 
        self.assertEqual(
276
 
            sha_string('new\ncommon very very long line\nwith some extra text\n'
277
 
                       'different\nmoredifferent\nand then some more\n'),
278
 
            sha1_3)
279
 
        expected_lines.extend([
280
 
            # 'delta', delta length
281
 
            'd\x0c',
282
 
            # target length
283
 
            '\x5f'
284
 
            # insert new
285
 
            '\x04new\n',
286
 
            # Copy of first parent 'common' range
287
 
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
288
 
            # Copy of second parent 'different' range
289
 
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
290
 
            ])
291
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
292
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
293
 
 
294
 
 
295
 
class TestGroupCompressBlock(tests.TestCase):
296
 
 
297
 
    def make_block(self, key_to_text):
298
 
        """Create a GroupCompressBlock, filling it with the given texts."""
299
 
        compressor = groupcompress.GroupCompressor()
300
 
        start = 0
301
 
        for key in sorted(key_to_text):
302
 
            compressor.compress(key, key_to_text[key], None)
303
 
        locs = dict((key, (start, end)) for key, (start, _, end, _)
304
 
                    in compressor.labels_deltas.iteritems())
305
 
        block = compressor.flush()
306
 
        raw_bytes = block.to_bytes()
307
 
        # Go through from_bytes(to_bytes()) so that we start with a compressed
308
 
        # content object
309
 
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
310
 
 
311
 
    def test_from_empty_bytes(self):
312
 
        self.assertRaises(ValueError,
313
 
                          groupcompress.GroupCompressBlock.from_bytes, '')
314
 
 
315
 
    def test_from_minimal_bytes(self):
316
 
        block = groupcompress.GroupCompressBlock.from_bytes(
317
 
            'gcb1z\n0\n0\n')
318
 
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
319
 
        self.assertIs(None, block._content)
320
 
        self.assertEqual('', block._z_content)
321
 
        block._ensure_content()
322
 
        self.assertEqual('', block._content)
323
 
        self.assertEqual('', block._z_content)
324
 
        block._ensure_content() # Ensure content is safe to call 2x
325
 
 
326
 
    def test_from_invalid(self):
327
 
        self.assertRaises(ValueError,
328
 
                          groupcompress.GroupCompressBlock.from_bytes,
329
 
                          'this is not a valid header')
330
 
 
331
 
    def test_from_bytes(self):
332
 
        content = ('a tiny bit of content\n')
333
 
        z_content = zlib.compress(content)
334
 
        z_bytes = (
335
 
            'gcb1z\n' # group compress block v1 plain
336
 
            '%d\n' # Length of compressed content
337
 
            '%d\n' # Length of uncompressed content
338
 
            '%s'   # Compressed content
339
 
            ) % (len(z_content), len(content), z_content)
340
 
        block = groupcompress.GroupCompressBlock.from_bytes(
341
 
            z_bytes)
342
 
        self.assertEqual(z_content, block._z_content)
343
 
        self.assertIs(None, block._content)
344
 
        self.assertEqual(len(z_content), block._z_content_length)
345
 
        self.assertEqual(len(content), block._content_length)
346
 
        block._ensure_content()
347
 
        self.assertEqual(z_content, block._z_content)
348
 
        self.assertEqual(content, block._content)
349
 
 
350
 
    def test_to_bytes(self):
351
 
        content = ('this is some content\n'
352
 
                   'this content will be compressed\n')
353
 
        gcb = groupcompress.GroupCompressBlock()
354
 
        gcb.set_content(content)
355
 
        bytes = gcb.to_bytes()
356
 
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
357
 
        self.assertEqual(gcb._content_length, len(content))
358
 
        expected_header =('gcb1z\n' # group compress block v1 zlib
359
 
                          '%d\n' # Length of compressed content
360
 
                          '%d\n' # Length of uncompressed content
361
 
                         ) % (gcb._z_content_length, gcb._content_length)
362
 
        self.assertStartsWith(bytes, expected_header)
363
 
        remaining_bytes = bytes[len(expected_header):]
364
 
        raw_bytes = zlib.decompress(remaining_bytes)
365
 
        self.assertEqual(content, raw_bytes)
366
 
 
367
 
        # we should get the same results if using the chunked version
368
 
        gcb = groupcompress.GroupCompressBlock()
369
 
        gcb.set_chunked_content(['this is some content\n'
370
 
                                 'this content will be compressed\n'],
371
 
                                 len(content))
372
 
        old_bytes = bytes
373
 
        bytes = gcb.to_bytes()
374
 
        self.assertEqual(old_bytes, bytes)
375
 
 
376
 
    def test_partial_decomp(self):
377
 
        content_chunks = []
378
 
        # We need a sufficient amount of data so that zlib.decompress has
379
 
        # partial decompression to work with. Most auto-generated data
380
 
        # compresses a bit too well, we want a combination, so we combine a sha
381
 
        # hash with compressible data.
382
 
        for i in xrange(2048):
383
 
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
384
 
            content_chunks.append(next_content)
385
 
            next_sha1 = osutils.sha_string(next_content)
386
 
            content_chunks.append(next_sha1 + '\n')
387
 
        content = ''.join(content_chunks)
388
 
        self.assertEqual(158634, len(content))
389
 
        z_content = zlib.compress(content)
390
 
        self.assertEqual(57182, len(z_content))
391
 
        block = groupcompress.GroupCompressBlock()
392
 
        block._z_content = z_content
393
 
        block._z_content_length = len(z_content)
394
 
        block._compressor_name = 'zlib'
395
 
        block._content_length = 158634
396
 
        self.assertIs(None, block._content)
397
 
        block._ensure_content(100)
398
 
        self.assertIsNot(None, block._content)
399
 
        # We have decompressed at least 100 bytes
400
 
        self.assertTrue(len(block._content) >= 100)
401
 
        # We have not decompressed the whole content
402
 
        self.assertTrue(len(block._content) < 158634)
403
 
        self.assertEqualDiff(content[:len(block._content)], block._content)
404
 
        # ensuring content that we already have shouldn't cause any more data
405
 
        # to be extracted
406
 
        cur_len = len(block._content)
407
 
        block._ensure_content(cur_len - 10)
408
 
        self.assertEqual(cur_len, len(block._content))
409
 
        # Now we want a bit more content
410
 
        cur_len += 10
411
 
        block._ensure_content(cur_len)
412
 
        self.assertTrue(len(block._content) >= cur_len)
413
 
        self.assertTrue(len(block._content) < 158634)
414
 
        self.assertEqualDiff(content[:len(block._content)], block._content)
415
 
        # And now lets finish
416
 
        block._ensure_content(158634)
417
 
        self.assertEqualDiff(content, block._content)
418
 
        # And the decompressor is finalized
419
 
        self.assertIs(None, block._z_content_decompressor)
420
 
 
421
 
    def test_partial_decomp_no_known_length(self):
422
 
        content_chunks = []
423
 
        for i in xrange(2048):
424
 
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
425
 
            content_chunks.append(next_content)
426
 
            next_sha1 = osutils.sha_string(next_content)
427
 
            content_chunks.append(next_sha1 + '\n')
428
 
        content = ''.join(content_chunks)
429
 
        self.assertEqual(158634, len(content))
430
 
        z_content = zlib.compress(content)
431
 
        self.assertEqual(57182, len(z_content))
432
 
        block = groupcompress.GroupCompressBlock()
433
 
        block._z_content = z_content
434
 
        block._z_content_length = len(z_content)
435
 
        block._compressor_name = 'zlib'
436
 
        block._content_length = None # Don't tell the decompressed length
437
 
        self.assertIs(None, block._content)
438
 
        block._ensure_content(100)
439
 
        self.assertIsNot(None, block._content)
440
 
        # We have decompressed at least 100 bytes
441
 
        self.assertTrue(len(block._content) >= 100)
442
 
        # We have not decompressed the whole content
443
 
        self.assertTrue(len(block._content) < 158634)
444
 
        self.assertEqualDiff(content[:len(block._content)], block._content)
445
 
        # ensuring content that we already have shouldn't cause any more data
446
 
        # to be extracted
447
 
        cur_len = len(block._content)
448
 
        block._ensure_content(cur_len - 10)
449
 
        self.assertEqual(cur_len, len(block._content))
450
 
        # Now we want a bit more content
451
 
        cur_len += 10
452
 
        block._ensure_content(cur_len)
453
 
        self.assertTrue(len(block._content) >= cur_len)
454
 
        self.assertTrue(len(block._content) < 158634)
455
 
        self.assertEqualDiff(content[:len(block._content)], block._content)
456
 
        # And now lets finish
457
 
        block._ensure_content()
458
 
        self.assertEqualDiff(content, block._content)
459
 
        # And the decompressor is finalized
460
 
        self.assertIs(None, block._z_content_decompressor)
461
 
 
462
 
    def test__dump(self):
463
 
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
464
 
        key_to_text = {('1',): dup_content + '1 unique\n',
465
 
                       ('2',): dup_content + '2 extra special\n'}
466
 
        locs, block = self.make_block(key_to_text)
467
 
        self.assertEqual([('f', len(key_to_text[('1',)])),
468
 
                          ('d', 21, len(key_to_text[('2',)]),
469
 
                           [('c', 2, len(dup_content)),
470
 
                            ('i', len('2 extra special\n'), '')
471
 
                           ]),
472
 
                         ], block._dump())
473
 
 
474
 
 
475
 
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
476
 
 
477
 
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
478
 
                     dir='.', inconsistency_fatal=True):
479
 
        t = self.get_transport(dir)
480
 
        t.ensure_base()
481
 
        vf = groupcompress.make_pack_factory(graph=create_graph,
482
 
            delta=False, keylength=keylength,
483
 
            inconsistency_fatal=inconsistency_fatal)(t)
484
 
        if do_cleanup:
485
 
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
486
 
        return vf
487
 
 
488
 
 
489
 
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
490
 
 
491
 
    def make_g_index(self, name, ref_lists=0, nodes=[]):
492
 
        builder = btree_index.BTreeBuilder(ref_lists)
493
 
        for node, references, value in nodes:
494
 
            builder.add_node(node, references, value)
495
 
        stream = builder.finish()
496
 
        trans = self.get_transport()
497
 
        size = trans.put_file(name, stream)
498
 
        return btree_index.BTreeGraphIndex(trans, name, size)
499
 
 
500
 
    def make_g_index_missing_parent(self):
501
 
        graph_index = self.make_g_index('missing_parent', 1,
502
 
            [(('parent', ), '2 78 2 10', ([],)),
503
 
             (('tip', ), '2 78 2 10',
504
 
              ([('parent', ), ('missing-parent', )],)),
505
 
              ])
506
 
        return graph_index
507
 
 
508
 
    def test_get_record_stream_as_requested(self):
509
 
        # Consider promoting 'as-requested' to general availability, and
510
 
        # make this a VF interface test
511
 
        vf = self.make_test_vf(False, dir='source')
512
 
        vf.add_lines(('a',), (), ['lines\n'])
513
 
        vf.add_lines(('b',), (), ['lines\n'])
514
 
        vf.add_lines(('c',), (), ['lines\n'])
515
 
        vf.add_lines(('d',), (), ['lines\n'])
516
 
        vf.writer.end()
517
 
        keys = [record.key for record in vf.get_record_stream(
518
 
                    [('a',), ('b',), ('c',), ('d',)],
519
 
                    'as-requested', False)]
520
 
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
521
 
        keys = [record.key for record in vf.get_record_stream(
522
 
                    [('b',), ('a',), ('d',), ('c',)],
523
 
                    'as-requested', False)]
524
 
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
525
 
 
526
 
        # It should work even after being repacked into another VF
527
 
        vf2 = self.make_test_vf(False, dir='target')
528
 
        vf2.insert_record_stream(vf.get_record_stream(
529
 
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
530
 
        vf2.writer.end()
531
 
 
532
 
        keys = [record.key for record in vf2.get_record_stream(
533
 
                    [('a',), ('b',), ('c',), ('d',)],
534
 
                    'as-requested', False)]
535
 
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
536
 
        keys = [record.key for record in vf2.get_record_stream(
537
 
                    [('b',), ('a',), ('d',), ('c',)],
538
 
                    'as-requested', False)]
539
 
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
540
 
 
541
 
    def test_insert_record_stream_re_uses_blocks(self):
542
 
        vf = self.make_test_vf(True, dir='source')
543
 
        def grouped_stream(revision_ids, first_parents=()):
544
 
            parents = first_parents
545
 
            for revision_id in revision_ids:
546
 
                key = (revision_id,)
547
 
                record = versionedfile.FulltextContentFactory(
548
 
                    key, parents, None,
549
 
                    'some content that is\n'
550
 
                    'identical except for\n'
551
 
                    'revision_id:%s\n' % (revision_id,))
552
 
                yield record
553
 
                parents = (key,)
554
 
        # One group, a-d
555
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
556
 
        # Second group, e-h
557
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
558
 
                                               first_parents=(('d',),)))
559
 
        block_bytes = {}
560
 
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
561
 
                                      'unordered', False)
562
 
        num_records = 0
563
 
        for record in stream:
564
 
            if record.key in [('a',), ('e',)]:
565
 
                self.assertEqual('groupcompress-block', record.storage_kind)
566
 
            else:
567
 
                self.assertEqual('groupcompress-block-ref',
568
 
                                 record.storage_kind)
569
 
            block_bytes[record.key] = record._manager._block._z_content
570
 
            num_records += 1
571
 
        self.assertEqual(8, num_records)
572
 
        for r in 'abcd':
573
 
            key = (r,)
574
 
            self.assertIs(block_bytes[key], block_bytes[('a',)])
575
 
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
576
 
        for r in 'efgh':
577
 
            key = (r,)
578
 
            self.assertIs(block_bytes[key], block_bytes[('e',)])
579
 
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
580
 
        # Now copy the blocks into another vf, and ensure that the blocks are
581
 
        # preserved without creating new entries
582
 
        vf2 = self.make_test_vf(True, dir='target')
583
 
        # ordering in 'groupcompress' order, should actually swap the groups in
584
 
        # the target vf, but the groups themselves should not be disturbed.
585
 
        vf2.insert_record_stream(vf.get_record_stream(
586
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
587
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
588
 
                                       'groupcompress', False)
589
 
        vf2.writer.end()
590
 
        num_records = 0
591
 
        for record in stream:
592
 
            num_records += 1
593
 
            self.assertEqual(block_bytes[record.key],
594
 
                             record._manager._block._z_content)
595
 
        self.assertEqual(8, num_records)
596
 
 
597
 
    def test__insert_record_stream_no_reuse_block(self):
598
 
        vf = self.make_test_vf(True, dir='source')
599
 
        def grouped_stream(revision_ids, first_parents=()):
600
 
            parents = first_parents
601
 
            for revision_id in revision_ids:
602
 
                key = (revision_id,)
603
 
                record = versionedfile.FulltextContentFactory(
604
 
                    key, parents, None,
605
 
                    'some content that is\n'
606
 
                    'identical except for\n'
607
 
                    'revision_id:%s\n' % (revision_id,))
608
 
                yield record
609
 
                parents = (key,)
610
 
        # One group, a-d
611
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
612
 
        # Second group, e-h
613
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
614
 
                                               first_parents=(('d',),)))
615
 
        vf.writer.end()
616
 
        self.assertEqual(8, len(list(vf.get_record_stream(
617
 
                                        [(r,) for r in 'abcdefgh'],
618
 
                                        'unordered', False))))
619
 
        # Now copy the blocks into another vf, and ensure that the blocks are
620
 
        # preserved without creating new entries
621
 
        vf2 = self.make_test_vf(True, dir='target')
622
 
        # ordering in 'groupcompress' order, should actually swap the groups in
623
 
        # the target vf, but the groups themselves should not be disturbed.
624
 
        list(vf2._insert_record_stream(vf.get_record_stream(
625
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
626
 
            reuse_blocks=False))
627
 
        vf2.writer.end()
628
 
        # After inserting with reuse_blocks=False, we should have everything in
629
 
        # a single new block.
630
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
631
 
                                       'groupcompress', False)
632
 
        block = None
633
 
        for record in stream:
634
 
            if block is None:
635
 
                block = record._manager._block
636
 
            else:
637
 
                self.assertIs(block, record._manager._block)
638
 
 
639
 
    def test_add_missing_noncompression_parent_unvalidated_index(self):
640
 
        unvalidated = self.make_g_index_missing_parent()
641
 
        combined = _mod_index.CombinedGraphIndex([unvalidated])
642
 
        index = groupcompress._GCGraphIndex(combined,
643
 
            is_locked=lambda: True, parents=True,
644
 
            track_external_parent_refs=True)
645
 
        index.scan_unvalidated_index(unvalidated)
646
 
        self.assertEqual(
647
 
            frozenset([('missing-parent',)]), index.get_missing_parents())
648
 
 
649
 
    def test_track_external_parent_refs(self):
650
 
        g_index = self.make_g_index('empty', 1, [])
651
 
        mod_index = btree_index.BTreeBuilder(1, 1)
652
 
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
653
 
        index = groupcompress._GCGraphIndex(combined,
654
 
            is_locked=lambda: True, parents=True,
655
 
            add_callback=mod_index.add_nodes,
656
 
            track_external_parent_refs=True)
657
 
        index.add_records([
658
 
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
659
 
        self.assertEqual(
660
 
            frozenset([('parent-1',), ('parent-2',)]),
661
 
            index.get_missing_parents())
662
 
 
663
 
    def make_source_with_b(self, a_parent, path):
664
 
        source = self.make_test_vf(True, dir=path)
665
 
        source.add_lines(('a',), (), ['lines\n'])
666
 
        if a_parent:
667
 
            b_parents = (('a',),)
668
 
        else:
669
 
            b_parents = ()
670
 
        source.add_lines(('b',), b_parents, ['lines\n'])
671
 
        return source
672
 
 
673
 
    def do_inconsistent_inserts(self, inconsistency_fatal):
674
 
        target = self.make_test_vf(True, dir='target',
675
 
                                   inconsistency_fatal=inconsistency_fatal)
676
 
        for x in range(2):
677
 
            source = self.make_source_with_b(x==1, 'source%s' % x)
678
 
            target.insert_record_stream(source.get_record_stream(
679
 
                [('b',)], 'unordered', False))
680
 
 
681
 
    def test_inconsistent_redundant_inserts_warn(self):
682
 
        """Should not insert a record that is already present."""
683
 
        warnings = []
684
 
        def warning(template, args):
685
 
            warnings.append(template % args)
686
 
        _trace_warning = trace.warning
687
 
        trace.warning = warning
688
 
        try:
689
 
            self.do_inconsistent_inserts(inconsistency_fatal=False)
690
 
        finally:
691
 
            trace.warning = _trace_warning
692
 
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
693
 
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
694
 
                         warnings)
695
 
 
696
 
    def test_inconsistent_redundant_inserts_raises(self):
697
 
        e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
698
 
                              inconsistency_fatal=True)
699
 
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
700
 
                              " in add_records:"
701
 
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
702
 
                              " 0 8', \(\(\('a',\),\),\)\)")
703
 
 
704
 
 
705
 
class TestLazyGroupCompress(tests.TestCaseWithTransport):
706
 
 
707
 
    _texts = {
708
 
        ('key1',): "this is a text\n"
709
 
                   "with a reasonable amount of compressible bytes\n",
710
 
        ('key2',): "another text\n"
711
 
                   "with a reasonable amount of compressible bytes\n",
712
 
        ('key3',): "yet another text which won't be extracted\n"
713
 
                   "with a reasonable amount of compressible bytes\n",
714
 
        ('key4',): "this will be extracted\n"
715
 
                   "but references most of its bytes from\n"
716
 
                   "yet another text which won't be extracted\n"
717
 
                   "with a reasonable amount of compressible bytes\n",
718
 
    }
719
 
    def make_block(self, key_to_text):
720
 
        """Create a GroupCompressBlock, filling it with the given texts."""
721
 
        compressor = groupcompress.GroupCompressor()
722
 
        start = 0
723
 
        for key in sorted(key_to_text):
724
 
            compressor.compress(key, key_to_text[key], None)
725
 
        locs = dict((key, (start, end)) for key, (start, _, end, _)
726
 
                    in compressor.labels_deltas.iteritems())
727
 
        block = compressor.flush()
728
 
        raw_bytes = block.to_bytes()
729
 
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
730
 
 
731
 
    def add_key_to_manager(self, key, locations, block, manager):
732
 
        start, end = locations[key]
733
 
        manager.add_factory(key, (), start, end)
734
 
 
735
 
    def test_get_fulltexts(self):
736
 
        locations, block = self.make_block(self._texts)
737
 
        manager = groupcompress._LazyGroupContentManager(block)
738
 
        self.add_key_to_manager(('key1',), locations, block, manager)
739
 
        self.add_key_to_manager(('key2',), locations, block, manager)
740
 
        result_order = []
741
 
        for record in manager.get_record_stream():
742
 
            result_order.append(record.key)
743
 
            text = self._texts[record.key]
744
 
            self.assertEqual(text, record.get_bytes_as('fulltext'))
745
 
        self.assertEqual([('key1',), ('key2',)], result_order)
746
 
 
747
 
        # If we build the manager in the opposite order, we should get them
748
 
        # back in the opposite order
749
 
        manager = groupcompress._LazyGroupContentManager(block)
750
 
        self.add_key_to_manager(('key2',), locations, block, manager)
751
 
        self.add_key_to_manager(('key1',), locations, block, manager)
752
 
        result_order = []
753
 
        for record in manager.get_record_stream():
754
 
            result_order.append(record.key)
755
 
            text = self._texts[record.key]
756
 
            self.assertEqual(text, record.get_bytes_as('fulltext'))
757
 
        self.assertEqual([('key2',), ('key1',)], result_order)
758
 
 
759
 
    def test__wire_bytes_no_keys(self):
760
 
        locations, block = self.make_block(self._texts)
761
 
        manager = groupcompress._LazyGroupContentManager(block)
762
 
        wire_bytes = manager._wire_bytes()
763
 
        block_length = len(block.to_bytes())
764
 
        # We should have triggered a strip, since we aren't using any content
765
 
        stripped_block = manager._block.to_bytes()
766
 
        self.assertTrue(block_length > len(stripped_block))
767
 
        empty_z_header = zlib.compress('')
768
 
        self.assertEqual('groupcompress-block\n'
769
 
                         '8\n' # len(compress(''))
770
 
                         '0\n' # len('')
771
 
                         '%d\n'# compressed block len
772
 
                         '%s'  # zheader
773
 
                         '%s'  # block
774
 
                         % (len(stripped_block), empty_z_header,
775
 
                            stripped_block),
776
 
                         wire_bytes)
777
 
 
778
 
    def test__wire_bytes(self):
779
 
        locations, block = self.make_block(self._texts)
780
 
        manager = groupcompress._LazyGroupContentManager(block)
781
 
        self.add_key_to_manager(('key1',), locations, block, manager)
782
 
        self.add_key_to_manager(('key4',), locations, block, manager)
783
 
        block_bytes = block.to_bytes()
784
 
        wire_bytes = manager._wire_bytes()
785
 
        (storage_kind, z_header_len, header_len,
786
 
         block_len, rest) = wire_bytes.split('\n', 4)
787
 
        z_header_len = int(z_header_len)
788
 
        header_len = int(header_len)
789
 
        block_len = int(block_len)
790
 
        self.assertEqual('groupcompress-block', storage_kind)
791
 
        self.assertEqual(33, z_header_len)
792
 
        self.assertEqual(25, header_len)
793
 
        self.assertEqual(len(block_bytes), block_len)
794
 
        z_header = rest[:z_header_len]
795
 
        header = zlib.decompress(z_header)
796
 
        self.assertEqual(header_len, len(header))
797
 
        entry1 = locations[('key1',)]
798
 
        entry4 = locations[('key4',)]
799
 
        self.assertEqualDiff('key1\n'
800
 
                             '\n'  # no parents
801
 
                             '%d\n' # start offset
802
 
                             '%d\n' # end offset
803
 
                             'key4\n'
804
 
                             '\n'
805
 
                             '%d\n'
806
 
                             '%d\n'
807
 
                             % (entry1[0], entry1[1],
808
 
                                entry4[0], entry4[1]),
809
 
                            header)
810
 
        z_block = rest[z_header_len:]
811
 
        self.assertEqual(block_bytes, z_block)
812
 
 
813
 
    def test_from_bytes(self):
814
 
        locations, block = self.make_block(self._texts)
815
 
        manager = groupcompress._LazyGroupContentManager(block)
816
 
        self.add_key_to_manager(('key1',), locations, block, manager)
817
 
        self.add_key_to_manager(('key4',), locations, block, manager)
818
 
        wire_bytes = manager._wire_bytes()
819
 
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
820
 
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
821
 
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
822
 
        self.assertEqual(2, len(manager._factories))
823
 
        self.assertEqual(block._z_content, manager._block._z_content)
824
 
        result_order = []
825
 
        for record in manager.get_record_stream():
826
 
            result_order.append(record.key)
827
 
            text = self._texts[record.key]
828
 
            self.assertEqual(text, record.get_bytes_as('fulltext'))
829
 
        self.assertEqual([('key1',), ('key4',)], result_order)
830
 
 
831
 
    def test__check_rebuild_no_changes(self):
832
 
        locations, block = self.make_block(self._texts)
833
 
        manager = groupcompress._LazyGroupContentManager(block)
834
 
        # Request all the keys, which ensures that we won't rebuild
835
 
        self.add_key_to_manager(('key1',), locations, block, manager)
836
 
        self.add_key_to_manager(('key2',), locations, block, manager)
837
 
        self.add_key_to_manager(('key3',), locations, block, manager)
838
 
        self.add_key_to_manager(('key4',), locations, block, manager)
839
 
        manager._check_rebuild_block()
840
 
        self.assertIs(block, manager._block)
841
 
 
842
 
    def test__check_rebuild_only_one(self):
843
 
        locations, block = self.make_block(self._texts)
844
 
        manager = groupcompress._LazyGroupContentManager(block)
845
 
        # Request just the first key, which should trigger a 'strip' action
846
 
        self.add_key_to_manager(('key1',), locations, block, manager)
847
 
        manager._check_rebuild_block()
848
 
        self.assertIsNot(block, manager._block)
849
 
        self.assertTrue(block._content_length > manager._block._content_length)
850
 
        # We should be able to still get the content out of this block, though
851
 
        # it should only have 1 entry
852
 
        for record in manager.get_record_stream():
853
 
            self.assertEqual(('key1',), record.key)
854
 
            self.assertEqual(self._texts[record.key],
855
 
                             record.get_bytes_as('fulltext'))
856
 
 
857
 
    def test__check_rebuild_middle(self):
858
 
        locations, block = self.make_block(self._texts)
859
 
        manager = groupcompress._LazyGroupContentManager(block)
860
 
        # Request a small key in the middle should trigger a 'rebuild'
861
 
        self.add_key_to_manager(('key4',), locations, block, manager)
862
 
        manager._check_rebuild_block()
863
 
        self.assertIsNot(block, manager._block)
864
 
        self.assertTrue(block._content_length > manager._block._content_length)
865
 
        for record in manager.get_record_stream():
866
 
            self.assertEqual(('key4',), record.key)
867
 
            self.assertEqual(self._texts[record.key],
868
 
                             record.get_bytes_as('fulltext'))