~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

Exclude more files from dumb-rsync upload

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2008, 2009 Canonical Ltd
2
 
#
3
 
# This program is free software; you can redistribute it and/or modify
4
 
# it under the terms of the GNU General Public License as published by
5
 
# the Free Software Foundation; either version 2 of the License, or
6
 
# (at your option) any later version.
7
 
#
8
 
# This program is distributed in the hope that it will be useful,
9
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
 
# GNU General Public License for more details.
12
 
#
13
 
# You should have received a copy of the GNU General Public License
14
 
# along with this program; if not, write to the Free Software
15
 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
 
 
17
 
"""Tests for group compression."""
18
 
 
19
 
import zlib
20
 
 
21
 
from bzrlib import (
22
 
    btree_index,
23
 
    groupcompress,
24
 
    errors,
25
 
    index as _mod_index,
26
 
    osutils,
27
 
    tests,
28
 
    versionedfile,
29
 
    )
30
 
from bzrlib.osutils import sha_string
31
 
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
32
 
 
33
 
 
34
 
def load_tests(standard_tests, module, loader):
35
 
    """Parameterize tests for all versions of groupcompress."""
36
 
    to_adapt, result = tests.split_suite_by_condition(
37
 
        standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
38
 
    scenarios = [
39
 
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
40
 
        ]
41
 
    if CompiledGroupCompressFeature.available():
42
 
        scenarios.append(('C',
43
 
            {'compressor': groupcompress.PyrexGroupCompressor}))
44
 
    return tests.multiply_tests(to_adapt, scenarios, result)
45
 
 
46
 
 
47
 
class TestGroupCompressor(tests.TestCase):
48
 
 
49
 
    def _chunks_to_repr_lines(self, chunks):
50
 
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
51
 
 
52
 
    def assertEqualDiffEncoded(self, expected, actual):
53
 
        """Compare the actual content to the expected content.
54
 
 
55
 
        :param expected: A group of chunks that we expect to see
56
 
        :param actual: The measured 'chunks'
57
 
 
58
 
        We will transform the chunks back into lines, and then run 'repr()'
59
 
        over them to handle non-ascii characters.
60
 
        """
61
 
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
62
 
                             self._chunks_to_repr_lines(actual))
63
 
 
64
 
 
65
 
class TestAllGroupCompressors(TestGroupCompressor):
66
 
    """Tests for GroupCompressor"""
67
 
 
68
 
    compressor = None # Set by multiply_tests
69
 
 
70
 
    def test_empty_delta(self):
71
 
        compressor = self.compressor()
72
 
        self.assertEqual([], compressor.chunks)
73
 
 
74
 
    def test_one_nosha_delta(self):
75
 
        # diff against NUKK
76
 
        compressor = self.compressor()
77
 
        sha1, start_point, end_point, _ = compressor.compress(('label',),
78
 
            'strange\ncommon\n', None)
79
 
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
80
 
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
81
 
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
82
 
        self.assertEqual(0, start_point)
83
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
84
 
 
85
 
    def test_empty_content(self):
86
 
        compressor = self.compressor()
87
 
        # Adding empty bytes should return the 'null' record
88
 
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
89
 
                                                                 '', None)
90
 
        self.assertEqual(0, start_point)
91
 
        self.assertEqual(0, end_point)
92
 
        self.assertEqual('fulltext', kind)
93
 
        self.assertEqual(groupcompress._null_sha1, sha1)
94
 
        self.assertEqual(0, compressor.endpoint)
95
 
        self.assertEqual([], compressor.chunks)
96
 
        # Even after adding some content
97
 
        compressor.compress(('content',), 'some\nbytes\n', None)
98
 
        self.assertTrue(compressor.endpoint > 0)
99
 
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
100
 
                                                                 '', None)
101
 
        self.assertEqual(0, start_point)
102
 
        self.assertEqual(0, end_point)
103
 
        self.assertEqual('fulltext', kind)
104
 
        self.assertEqual(groupcompress._null_sha1, sha1)
105
 
 
106
 
    def test_extract_from_compressor(self):
107
 
        # Knit fetching will try to reconstruct texts locally which results in
108
 
        # reading something that is in the compressor stream already.
109
 
        compressor = self.compressor()
110
 
        sha1_1, _, _, _ = compressor.compress(('label',),
111
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
112
 
        expected_lines = list(compressor.chunks)
113
 
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
114
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
115
 
        # get the first out
116
 
        self.assertEqual(('strange\ncommon long line\n'
117
 
                          'that needs a 16 byte match\n', sha1_1),
118
 
                         compressor.extract(('label',)))
119
 
        # and the second
120
 
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
121
 
                          'different\n', sha1_2),
122
 
                         compressor.extract(('newlabel',)))
123
 
 
124
 
    def test_pop_last(self):
125
 
        compressor = self.compressor()
126
 
        _, _, _, _ = compressor.compress(('key1',),
127
 
            'some text\nfor the first entry\n', None)
128
 
        expected_lines = list(compressor.chunks)
129
 
        _, _, _, _ = compressor.compress(('key2',),
130
 
            'some text\nfor the second entry\n', None)
131
 
        compressor.pop_last()
132
 
        self.assertEqual(expected_lines, compressor.chunks)
133
 
 
134
 
 
135
 
class TestPyrexGroupCompressor(TestGroupCompressor):
136
 
 
137
 
    _test_needs_features = [CompiledGroupCompressFeature]
138
 
    compressor = groupcompress.PyrexGroupCompressor
139
 
 
140
 
    def test_stats(self):
141
 
        compressor = self.compressor()
142
 
        compressor.compress(('label',),
143
 
                            'strange\n'
144
 
                            'common very very long line\n'
145
 
                            'plus more text\n', None)
146
 
        compressor.compress(('newlabel',),
147
 
                            'common very very long line\n'
148
 
                            'plus more text\n'
149
 
                            'different\n'
150
 
                            'moredifferent\n', None)
151
 
        compressor.compress(('label3',),
152
 
                            'new\n'
153
 
                            'common very very long line\n'
154
 
                            'plus more text\n'
155
 
                            'different\n'
156
 
                            'moredifferent\n', None)
157
 
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
158
 
 
159
 
    def test_two_nosha_delta(self):
160
 
        compressor = self.compressor()
161
 
        sha1_1, _, _, _ = compressor.compress(('label',),
162
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
163
 
        expected_lines = list(compressor.chunks)
164
 
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
165
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
166
 
        self.assertEqual(sha_string('common long line\n'
167
 
                                    'that needs a 16 byte match\n'
168
 
                                    'different\n'), sha1_2)
169
 
        expected_lines.extend([
170
 
            # 'delta', delta length
171
 
            'd\x0f',
172
 
            # source and target length
173
 
            '\x36',
174
 
            # copy the line common
175
 
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
176
 
            # add the line different, and the trailing newline
177
 
            '\x0adifferent\n', # insert 10 bytes
178
 
            ])
179
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
180
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
181
 
 
182
 
    def test_three_nosha_delta(self):
183
 
        # The first interesting test: make a change that should use lines from
184
 
        # both parents.
185
 
        compressor = self.compressor()
186
 
        sha1_1, _, _, _ = compressor.compress(('label',),
187
 
            'strange\ncommon very very long line\nwith some extra text\n', None)
188
 
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
189
 
            'different\nmoredifferent\nand then some more\n', None)
190
 
        expected_lines = list(compressor.chunks)
191
 
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
192
 
            'new\ncommon very very long line\nwith some extra text\n'
193
 
            'different\nmoredifferent\nand then some more\n',
194
 
            None)
195
 
        self.assertEqual(
196
 
            sha_string('new\ncommon very very long line\nwith some extra text\n'
197
 
                       'different\nmoredifferent\nand then some more\n'),
198
 
            sha1_3)
199
 
        expected_lines.extend([
200
 
            # 'delta', delta length
201
 
            'd\x0b',
202
 
            # source and target length
203
 
            '\x5f'
204
 
            # insert new
205
 
            '\x03new',
206
 
            # Copy of first parent 'common' range
207
 
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
208
 
            # Copy of second parent 'different' range
209
 
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
210
 
            ])
211
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
212
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
213
 
 
214
 
 
215
 
class TestPythonGroupCompressor(TestGroupCompressor):
216
 
 
217
 
    compressor = groupcompress.PythonGroupCompressor
218
 
 
219
 
    def test_stats(self):
220
 
        compressor = self.compressor()
221
 
        compressor.compress(('label',),
222
 
                            'strange\n'
223
 
                            'common very very long line\n'
224
 
                            'plus more text\n', None)
225
 
        compressor.compress(('newlabel',),
226
 
                            'common very very long line\n'
227
 
                            'plus more text\n'
228
 
                            'different\n'
229
 
                            'moredifferent\n', None)
230
 
        compressor.compress(('label3',),
231
 
                            'new\n'
232
 
                            'common very very long line\n'
233
 
                            'plus more text\n'
234
 
                            'different\n'
235
 
                            'moredifferent\n', None)
236
 
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
237
 
 
238
 
    def test_two_nosha_delta(self):
239
 
        compressor = self.compressor()
240
 
        sha1_1, _, _, _ = compressor.compress(('label',),
241
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
242
 
        expected_lines = list(compressor.chunks)
243
 
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
244
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
245
 
        self.assertEqual(sha_string('common long line\n'
246
 
                                    'that needs a 16 byte match\n'
247
 
                                    'different\n'), sha1_2)
248
 
        expected_lines.extend([
249
 
            # 'delta', delta length
250
 
            'd\x0f',
251
 
            # target length
252
 
            '\x36',
253
 
            # copy the line common
254
 
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
255
 
            # add the line different, and the trailing newline
256
 
            '\x0adifferent\n', # insert 10 bytes
257
 
            ])
258
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
259
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
260
 
 
261
 
    def test_three_nosha_delta(self):
262
 
        # The first interesting test: make a change that should use lines from
263
 
        # both parents.
264
 
        compressor = self.compressor()
265
 
        sha1_1, _, _, _ = compressor.compress(('label',),
266
 
            'strange\ncommon very very long line\nwith some extra text\n', None)
267
 
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
268
 
            'different\nmoredifferent\nand then some more\n', None)
269
 
        expected_lines = list(compressor.chunks)
270
 
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
271
 
            'new\ncommon very very long line\nwith some extra text\n'
272
 
            'different\nmoredifferent\nand then some more\n',
273
 
            None)
274
 
        self.assertEqual(
275
 
            sha_string('new\ncommon very very long line\nwith some extra text\n'
276
 
                       'different\nmoredifferent\nand then some more\n'),
277
 
            sha1_3)
278
 
        expected_lines.extend([
279
 
            # 'delta', delta length
280
 
            'd\x0c',
281
 
            # target length
282
 
            '\x5f'
283
 
            # insert new
284
 
            '\x04new\n',
285
 
            # Copy of first parent 'common' range
286
 
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
287
 
            # Copy of second parent 'different' range
288
 
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
289
 
            ])
290
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
291
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
292
 
 
293
 
 
294
 
class TestGroupCompressBlock(tests.TestCase):
295
 
 
296
 
    def make_block(self, key_to_text):
297
 
        """Create a GroupCompressBlock, filling it with the given texts."""
298
 
        compressor = groupcompress.GroupCompressor()
299
 
        start = 0
300
 
        for key in sorted(key_to_text):
301
 
            compressor.compress(key, key_to_text[key], None)
302
 
        locs = dict((key, (start, end)) for key, (start, _, end, _)
303
 
                    in compressor.labels_deltas.iteritems())
304
 
        block = compressor.flush()
305
 
        raw_bytes = block.to_bytes()
306
 
        # Go through from_bytes(to_bytes()) so that we start with a compressed
307
 
        # content object
308
 
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
309
 
 
310
 
    def test_from_empty_bytes(self):
311
 
        self.assertRaises(ValueError,
312
 
                          groupcompress.GroupCompressBlock.from_bytes, '')
313
 
 
314
 
    def test_from_minimal_bytes(self):
315
 
        block = groupcompress.GroupCompressBlock.from_bytes(
316
 
            'gcb1z\n0\n0\n')
317
 
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
318
 
        self.assertIs(None, block._content)
319
 
        self.assertEqual('', block._z_content)
320
 
        block._ensure_content()
321
 
        self.assertEqual('', block._content)
322
 
        self.assertEqual('', block._z_content)
323
 
        block._ensure_content() # Ensure content is safe to call 2x
324
 
 
325
 
    def test_from_invalid(self):
326
 
        self.assertRaises(ValueError,
327
 
                          groupcompress.GroupCompressBlock.from_bytes,
328
 
                          'this is not a valid header')
329
 
 
330
 
    def test_from_bytes(self):
331
 
        content = ('a tiny bit of content\n')
332
 
        z_content = zlib.compress(content)
333
 
        z_bytes = (
334
 
            'gcb1z\n' # group compress block v1 plain
335
 
            '%d\n' # Length of compressed content
336
 
            '%d\n' # Length of uncompressed content
337
 
            '%s'   # Compressed content
338
 
            ) % (len(z_content), len(content), z_content)
339
 
        block = groupcompress.GroupCompressBlock.from_bytes(
340
 
            z_bytes)
341
 
        self.assertEqual(z_content, block._z_content)
342
 
        self.assertIs(None, block._content)
343
 
        self.assertEqual(len(z_content), block._z_content_length)
344
 
        self.assertEqual(len(content), block._content_length)
345
 
        block._ensure_content()
346
 
        self.assertEqual(z_content, block._z_content)
347
 
        self.assertEqual(content, block._content)
348
 
 
349
 
    def test_to_bytes(self):
350
 
        content = ('this is some content\n'
351
 
                   'this content will be compressed\n')
352
 
        gcb = groupcompress.GroupCompressBlock()
353
 
        gcb.set_content(content)
354
 
        bytes = gcb.to_bytes()
355
 
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
356
 
        self.assertEqual(gcb._content_length, len(content))
357
 
        expected_header =('gcb1z\n' # group compress block v1 zlib
358
 
                          '%d\n' # Length of compressed content
359
 
                          '%d\n' # Length of uncompressed content
360
 
                         ) % (gcb._z_content_length, gcb._content_length)
361
 
        self.assertStartsWith(bytes, expected_header)
362
 
        remaining_bytes = bytes[len(expected_header):]
363
 
        raw_bytes = zlib.decompress(remaining_bytes)
364
 
        self.assertEqual(content, raw_bytes)
365
 
 
366
 
    def test_partial_decomp(self):
367
 
        content_chunks = []
368
 
        # We need a sufficient amount of data so that zlib.decompress has
369
 
        # partial decompression to work with. Most auto-generated data
370
 
        # compresses a bit too well, we want a combination, so we combine a sha
371
 
        # hash with compressible data.
372
 
        for i in xrange(2048):
373
 
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
374
 
            content_chunks.append(next_content)
375
 
            next_sha1 = osutils.sha_string(next_content)
376
 
            content_chunks.append(next_sha1 + '\n')
377
 
        content = ''.join(content_chunks)
378
 
        self.assertEqual(158634, len(content))
379
 
        z_content = zlib.compress(content)
380
 
        self.assertEqual(57182, len(z_content))
381
 
        block = groupcompress.GroupCompressBlock()
382
 
        block._z_content = z_content
383
 
        block._z_content_length = len(z_content)
384
 
        block._compressor_name = 'zlib'
385
 
        block._content_length = 158634
386
 
        self.assertIs(None, block._content)
387
 
        block._ensure_content(100)
388
 
        self.assertIsNot(None, block._content)
389
 
        # We have decompressed at least 100 bytes
390
 
        self.assertTrue(len(block._content) >= 100)
391
 
        # We have not decompressed the whole content
392
 
        self.assertTrue(len(block._content) < 158634)
393
 
        self.assertEqualDiff(content[:len(block._content)], block._content)
394
 
        # ensuring content that we already have shouldn't cause any more data
395
 
        # to be extracted
396
 
        cur_len = len(block._content)
397
 
        block._ensure_content(cur_len - 10)
398
 
        self.assertEqual(cur_len, len(block._content))
399
 
        # Now we want a bit more content
400
 
        cur_len += 10
401
 
        block._ensure_content(cur_len)
402
 
        self.assertTrue(len(block._content) >= cur_len)
403
 
        self.assertTrue(len(block._content) < 158634)
404
 
        self.assertEqualDiff(content[:len(block._content)], block._content)
405
 
        # And now lets finish
406
 
        block._ensure_content(158634)
407
 
        self.assertEqualDiff(content, block._content)
408
 
        # And the decompressor is finalized
409
 
        self.assertIs(None, block._z_content_decompressor)
410
 
 
411
 
    def test_partial_decomp_no_known_length(self):
412
 
        content_chunks = []
413
 
        for i in xrange(2048):
414
 
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
415
 
            content_chunks.append(next_content)
416
 
            next_sha1 = osutils.sha_string(next_content)
417
 
            content_chunks.append(next_sha1 + '\n')
418
 
        content = ''.join(content_chunks)
419
 
        self.assertEqual(158634, len(content))
420
 
        z_content = zlib.compress(content)
421
 
        self.assertEqual(57182, len(z_content))
422
 
        block = groupcompress.GroupCompressBlock()
423
 
        block._z_content = z_content
424
 
        block._z_content_length = len(z_content)
425
 
        block._compressor_name = 'zlib'
426
 
        block._content_length = None # Don't tell the decompressed length
427
 
        self.assertIs(None, block._content)
428
 
        block._ensure_content(100)
429
 
        self.assertIsNot(None, block._content)
430
 
        # We have decompressed at least 100 bytes
431
 
        self.assertTrue(len(block._content) >= 100)
432
 
        # We have not decompressed the whole content
433
 
        self.assertTrue(len(block._content) < 158634)
434
 
        self.assertEqualDiff(content[:len(block._content)], block._content)
435
 
        # ensuring content that we already have shouldn't cause any more data
436
 
        # to be extracted
437
 
        cur_len = len(block._content)
438
 
        block._ensure_content(cur_len - 10)
439
 
        self.assertEqual(cur_len, len(block._content))
440
 
        # Now we want a bit more content
441
 
        cur_len += 10
442
 
        block._ensure_content(cur_len)
443
 
        self.assertTrue(len(block._content) >= cur_len)
444
 
        self.assertTrue(len(block._content) < 158634)
445
 
        self.assertEqualDiff(content[:len(block._content)], block._content)
446
 
        # And now lets finish
447
 
        block._ensure_content()
448
 
        self.assertEqualDiff(content, block._content)
449
 
        # And the decompressor is finalized
450
 
        self.assertIs(None, block._z_content_decompressor)
451
 
 
452
 
    def test__dump(self):
453
 
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
454
 
        key_to_text = {('1',): dup_content + '1 unique\n',
455
 
                       ('2',): dup_content + '2 extra special\n'}
456
 
        locs, block = self.make_block(key_to_text)
457
 
        self.assertEqual([('f', len(key_to_text[('1',)])),
458
 
                          ('d', 21, len(key_to_text[('2',)]),
459
 
                           [('c', 2, len(dup_content)),
460
 
                            ('i', len('2 extra special\n'), '')
461
 
                           ]),
462
 
                         ], block._dump())
463
 
 
464
 
 
465
 
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
466
 
 
467
 
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
468
 
                     dir='.'):
469
 
        t = self.get_transport(dir)
470
 
        t.ensure_base()
471
 
        vf = groupcompress.make_pack_factory(graph=create_graph,
472
 
            delta=False, keylength=keylength)(t)
473
 
        if do_cleanup:
474
 
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
475
 
        return vf
476
 
 
477
 
 
478
 
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
479
 
 
480
 
    def make_g_index(self, name, ref_lists=0, nodes=[]):
481
 
        builder = btree_index.BTreeBuilder(ref_lists)
482
 
        for node, references, value in nodes:
483
 
            builder.add_node(node, references, value)
484
 
        stream = builder.finish()
485
 
        trans = self.get_transport()
486
 
        size = trans.put_file(name, stream)
487
 
        return btree_index.BTreeGraphIndex(trans, name, size)
488
 
 
489
 
    def make_g_index_missing_parent(self):
490
 
        graph_index = self.make_g_index('missing_parent', 1,
491
 
            [(('parent', ), '2 78 2 10', ([],)),
492
 
             (('tip', ), '2 78 2 10',
493
 
              ([('parent', ), ('missing-parent', )],)),
494
 
              ])
495
 
        return graph_index
496
 
 
497
 
    def test_get_record_stream_as_requested(self):
498
 
        # Consider promoting 'as-requested' to general availability, and
499
 
        # make this a VF interface test
500
 
        vf = self.make_test_vf(False, dir='source')
501
 
        vf.add_lines(('a',), (), ['lines\n'])
502
 
        vf.add_lines(('b',), (), ['lines\n'])
503
 
        vf.add_lines(('c',), (), ['lines\n'])
504
 
        vf.add_lines(('d',), (), ['lines\n'])
505
 
        vf.writer.end()
506
 
        keys = [record.key for record in vf.get_record_stream(
507
 
                    [('a',), ('b',), ('c',), ('d',)],
508
 
                    'as-requested', False)]
509
 
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
510
 
        keys = [record.key for record in vf.get_record_stream(
511
 
                    [('b',), ('a',), ('d',), ('c',)],
512
 
                    'as-requested', False)]
513
 
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
514
 
 
515
 
        # It should work even after being repacked into another VF
516
 
        vf2 = self.make_test_vf(False, dir='target')
517
 
        vf2.insert_record_stream(vf.get_record_stream(
518
 
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
519
 
        vf2.writer.end()
520
 
 
521
 
        keys = [record.key for record in vf2.get_record_stream(
522
 
                    [('a',), ('b',), ('c',), ('d',)],
523
 
                    'as-requested', False)]
524
 
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
525
 
        keys = [record.key for record in vf2.get_record_stream(
526
 
                    [('b',), ('a',), ('d',), ('c',)],
527
 
                    'as-requested', False)]
528
 
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
529
 
 
530
 
    def test_insert_record_stream_re_uses_blocks(self):
531
 
        vf = self.make_test_vf(True, dir='source')
532
 
        def grouped_stream(revision_ids, first_parents=()):
533
 
            parents = first_parents
534
 
            for revision_id in revision_ids:
535
 
                key = (revision_id,)
536
 
                record = versionedfile.FulltextContentFactory(
537
 
                    key, parents, None,
538
 
                    'some content that is\n'
539
 
                    'identical except for\n'
540
 
                    'revision_id:%s\n' % (revision_id,))
541
 
                yield record
542
 
                parents = (key,)
543
 
        # One group, a-d
544
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
545
 
        # Second group, e-h
546
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
547
 
                                               first_parents=(('d',),)))
548
 
        block_bytes = {}
549
 
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
550
 
                                      'unordered', False)
551
 
        num_records = 0
552
 
        for record in stream:
553
 
            if record.key in [('a',), ('e',)]:
554
 
                self.assertEqual('groupcompress-block', record.storage_kind)
555
 
            else:
556
 
                self.assertEqual('groupcompress-block-ref',
557
 
                                 record.storage_kind)
558
 
            block_bytes[record.key] = record._manager._block._z_content
559
 
            num_records += 1
560
 
        self.assertEqual(8, num_records)
561
 
        for r in 'abcd':
562
 
            key = (r,)
563
 
            self.assertIs(block_bytes[key], block_bytes[('a',)])
564
 
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
565
 
        for r in 'efgh':
566
 
            key = (r,)
567
 
            self.assertIs(block_bytes[key], block_bytes[('e',)])
568
 
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
569
 
        # Now copy the blocks into another vf, and ensure that the blocks are
570
 
        # preserved without creating new entries
571
 
        vf2 = self.make_test_vf(True, dir='target')
572
 
        # ordering in 'groupcompress' order, should actually swap the groups in
573
 
        # the target vf, but the groups themselves should not be disturbed.
574
 
        vf2.insert_record_stream(vf.get_record_stream(
575
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
576
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
577
 
                                       'groupcompress', False)
578
 
        vf2.writer.end()
579
 
        num_records = 0
580
 
        for record in stream:
581
 
            num_records += 1
582
 
            self.assertEqual(block_bytes[record.key],
583
 
                             record._manager._block._z_content)
584
 
        self.assertEqual(8, num_records)
585
 
 
586
 
    def test__insert_record_stream_no_reuse_block(self):
587
 
        vf = self.make_test_vf(True, dir='source')
588
 
        def grouped_stream(revision_ids, first_parents=()):
589
 
            parents = first_parents
590
 
            for revision_id in revision_ids:
591
 
                key = (revision_id,)
592
 
                record = versionedfile.FulltextContentFactory(
593
 
                    key, parents, None,
594
 
                    'some content that is\n'
595
 
                    'identical except for\n'
596
 
                    'revision_id:%s\n' % (revision_id,))
597
 
                yield record
598
 
                parents = (key,)
599
 
        # One group, a-d
600
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
601
 
        # Second group, e-h
602
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
603
 
                                               first_parents=(('d',),)))
604
 
        vf.writer.end()
605
 
        self.assertEqual(8, len(list(vf.get_record_stream(
606
 
                                        [(r,) for r in 'abcdefgh'],
607
 
                                        'unordered', False))))
608
 
        # Now copy the blocks into another vf, and ensure that the blocks are
609
 
        # preserved without creating new entries
610
 
        vf2 = self.make_test_vf(True, dir='target')
611
 
        # ordering in 'groupcompress' order, should actually swap the groups in
612
 
        # the target vf, but the groups themselves should not be disturbed.
613
 
        list(vf2._insert_record_stream(vf.get_record_stream(
614
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
615
 
            reuse_blocks=False))
616
 
        vf2.writer.end()
617
 
        # After inserting with reuse_blocks=False, we should have everything in
618
 
        # a single new block.
619
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
620
 
                                       'groupcompress', False)
621
 
        block = None
622
 
        for record in stream:
623
 
            if block is None:
624
 
                block = record._manager._block
625
 
            else:
626
 
                self.assertIs(block, record._manager._block)
627
 
 
628
 
    def test_add_missing_noncompression_parent_unvalidated_index(self):
629
 
        unvalidated = self.make_g_index_missing_parent()
630
 
        combined = _mod_index.CombinedGraphIndex([unvalidated])
631
 
        index = groupcompress._GCGraphIndex(combined,
632
 
            is_locked=lambda: True, parents=True,
633
 
            track_external_parent_refs=True)
634
 
        index.scan_unvalidated_index(unvalidated)
635
 
        self.assertEqual(
636
 
            frozenset([('missing-parent',)]), index.get_missing_parents())
637
 
 
638
 
    def test_track_external_parent_refs(self):
639
 
        g_index = self.make_g_index('empty', 1, [])
640
 
        mod_index = btree_index.BTreeBuilder(1, 1)
641
 
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
642
 
        index = groupcompress._GCGraphIndex(combined,
643
 
            is_locked=lambda: True, parents=True,
644
 
            add_callback=mod_index.add_nodes,
645
 
            track_external_parent_refs=True)
646
 
        index.add_records([
647
 
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
648
 
        self.assertEqual(
649
 
            frozenset([('parent-1',), ('parent-2',)]),
650
 
            index.get_missing_parents())
651
 
 
652
 
 
653
 
class TestLazyGroupCompress(tests.TestCaseWithTransport):
654
 
 
655
 
    _texts = {
656
 
        ('key1',): "this is a text\n"
657
 
                   "with a reasonable amount of compressible bytes\n",
658
 
        ('key2',): "another text\n"
659
 
                   "with a reasonable amount of compressible bytes\n",
660
 
        ('key3',): "yet another text which won't be extracted\n"
661
 
                   "with a reasonable amount of compressible bytes\n",
662
 
        ('key4',): "this will be extracted\n"
663
 
                   "but references most of its bytes from\n"
664
 
                   "yet another text which won't be extracted\n"
665
 
                   "with a reasonable amount of compressible bytes\n",
666
 
    }
667
 
    def make_block(self, key_to_text):
668
 
        """Create a GroupCompressBlock, filling it with the given texts."""
669
 
        compressor = groupcompress.GroupCompressor()
670
 
        start = 0
671
 
        for key in sorted(key_to_text):
672
 
            compressor.compress(key, key_to_text[key], None)
673
 
        locs = dict((key, (start, end)) for key, (start, _, end, _)
674
 
                    in compressor.labels_deltas.iteritems())
675
 
        block = compressor.flush()
676
 
        raw_bytes = block.to_bytes()
677
 
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
678
 
 
679
 
    def add_key_to_manager(self, key, locations, block, manager):
680
 
        start, end = locations[key]
681
 
        manager.add_factory(key, (), start, end)
682
 
 
683
 
    def test_get_fulltexts(self):
684
 
        locations, block = self.make_block(self._texts)
685
 
        manager = groupcompress._LazyGroupContentManager(block)
686
 
        self.add_key_to_manager(('key1',), locations, block, manager)
687
 
        self.add_key_to_manager(('key2',), locations, block, manager)
688
 
        result_order = []
689
 
        for record in manager.get_record_stream():
690
 
            result_order.append(record.key)
691
 
            text = self._texts[record.key]
692
 
            self.assertEqual(text, record.get_bytes_as('fulltext'))
693
 
        self.assertEqual([('key1',), ('key2',)], result_order)
694
 
 
695
 
        # If we build the manager in the opposite order, we should get them
696
 
        # back in the opposite order
697
 
        manager = groupcompress._LazyGroupContentManager(block)
698
 
        self.add_key_to_manager(('key2',), locations, block, manager)
699
 
        self.add_key_to_manager(('key1',), locations, block, manager)
700
 
        result_order = []
701
 
        for record in manager.get_record_stream():
702
 
            result_order.append(record.key)
703
 
            text = self._texts[record.key]
704
 
            self.assertEqual(text, record.get_bytes_as('fulltext'))
705
 
        self.assertEqual([('key2',), ('key1',)], result_order)
706
 
 
707
 
    def test__wire_bytes_no_keys(self):
708
 
        locations, block = self.make_block(self._texts)
709
 
        manager = groupcompress._LazyGroupContentManager(block)
710
 
        wire_bytes = manager._wire_bytes()
711
 
        block_length = len(block.to_bytes())
712
 
        # We should have triggered a strip, since we aren't using any content
713
 
        stripped_block = manager._block.to_bytes()
714
 
        self.assertTrue(block_length > len(stripped_block))
715
 
        empty_z_header = zlib.compress('')
716
 
        self.assertEqual('groupcompress-block\n'
717
 
                         '8\n' # len(compress(''))
718
 
                         '0\n' # len('')
719
 
                         '%d\n'# compressed block len
720
 
                         '%s'  # zheader
721
 
                         '%s'  # block
722
 
                         % (len(stripped_block), empty_z_header,
723
 
                            stripped_block),
724
 
                         wire_bytes)
725
 
 
726
 
    def test__wire_bytes(self):
727
 
        locations, block = self.make_block(self._texts)
728
 
        manager = groupcompress._LazyGroupContentManager(block)
729
 
        self.add_key_to_manager(('key1',), locations, block, manager)
730
 
        self.add_key_to_manager(('key4',), locations, block, manager)
731
 
        block_bytes = block.to_bytes()
732
 
        wire_bytes = manager._wire_bytes()
733
 
        (storage_kind, z_header_len, header_len,
734
 
         block_len, rest) = wire_bytes.split('\n', 4)
735
 
        z_header_len = int(z_header_len)
736
 
        header_len = int(header_len)
737
 
        block_len = int(block_len)
738
 
        self.assertEqual('groupcompress-block', storage_kind)
739
 
        self.assertEqual(33, z_header_len)
740
 
        self.assertEqual(25, header_len)
741
 
        self.assertEqual(len(block_bytes), block_len)
742
 
        z_header = rest[:z_header_len]
743
 
        header = zlib.decompress(z_header)
744
 
        self.assertEqual(header_len, len(header))
745
 
        entry1 = locations[('key1',)]
746
 
        entry4 = locations[('key4',)]
747
 
        self.assertEqualDiff('key1\n'
748
 
                             '\n'  # no parents
749
 
                             '%d\n' # start offset
750
 
                             '%d\n' # end offset
751
 
                             'key4\n'
752
 
                             '\n'
753
 
                             '%d\n'
754
 
                             '%d\n'
755
 
                             % (entry1[0], entry1[1],
756
 
                                entry4[0], entry4[1]),
757
 
                            header)
758
 
        z_block = rest[z_header_len:]
759
 
        self.assertEqual(block_bytes, z_block)
760
 
 
761
 
    def test_from_bytes(self):
762
 
        locations, block = self.make_block(self._texts)
763
 
        manager = groupcompress._LazyGroupContentManager(block)
764
 
        self.add_key_to_manager(('key1',), locations, block, manager)
765
 
        self.add_key_to_manager(('key4',), locations, block, manager)
766
 
        wire_bytes = manager._wire_bytes()
767
 
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
768
 
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
769
 
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
770
 
        self.assertEqual(2, len(manager._factories))
771
 
        self.assertEqual(block._z_content, manager._block._z_content)
772
 
        result_order = []
773
 
        for record in manager.get_record_stream():
774
 
            result_order.append(record.key)
775
 
            text = self._texts[record.key]
776
 
            self.assertEqual(text, record.get_bytes_as('fulltext'))
777
 
        self.assertEqual([('key1',), ('key4',)], result_order)
778
 
 
779
 
    def test__check_rebuild_no_changes(self):
780
 
        locations, block = self.make_block(self._texts)
781
 
        manager = groupcompress._LazyGroupContentManager(block)
782
 
        # Request all the keys, which ensures that we won't rebuild
783
 
        self.add_key_to_manager(('key1',), locations, block, manager)
784
 
        self.add_key_to_manager(('key2',), locations, block, manager)
785
 
        self.add_key_to_manager(('key3',), locations, block, manager)
786
 
        self.add_key_to_manager(('key4',), locations, block, manager)
787
 
        manager._check_rebuild_block()
788
 
        self.assertIs(block, manager._block)
789
 
 
790
 
    def test__check_rebuild_only_one(self):
791
 
        locations, block = self.make_block(self._texts)
792
 
        manager = groupcompress._LazyGroupContentManager(block)
793
 
        # Request just the first key, which should trigger a 'strip' action
794
 
        self.add_key_to_manager(('key1',), locations, block, manager)
795
 
        manager._check_rebuild_block()
796
 
        self.assertIsNot(block, manager._block)
797
 
        self.assertTrue(block._content_length > manager._block._content_length)
798
 
        # We should be able to still get the content out of this block, though
799
 
        # it should only have 1 entry
800
 
        for record in manager.get_record_stream():
801
 
            self.assertEqual(('key1',), record.key)
802
 
            self.assertEqual(self._texts[record.key],
803
 
                             record.get_bytes_as('fulltext'))
804
 
 
805
 
    def test__check_rebuild_middle(self):
806
 
        locations, block = self.make_block(self._texts)
807
 
        manager = groupcompress._LazyGroupContentManager(block)
808
 
        # Request a small key in the middle should trigger a 'rebuild'
809
 
        self.add_key_to_manager(('key4',), locations, block, manager)
810
 
        manager._check_rebuild_block()
811
 
        self.assertIsNot(block, manager._block)
812
 
        self.assertTrue(block._content_length > manager._block._content_length)
813
 
        for record in manager.get_record_stream():
814
 
            self.assertEqual(('key4',), record.key)
815
 
            self.assertEqual(self._texts[record.key],
816
 
                             record.get_bytes_as('fulltext'))