~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

  • Committer: Canonical.com Patch Queue Manager
  • Date: 2009-06-10 01:02:49 UTC
  • mfrom: (4420.2.2 1.16-bencode-compat-385212)
  • Revision ID: pqm@pqm.ubuntu.com-20090610010249-5iyq9oics6tysru4
(jam) restore a compatibility module at bzrlib.util.bencode (bug
        #385212)

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# Copyright (C) 2008, 2009 Canonical Ltd
 
2
#
 
3
# This program is free software; you can redistribute it and/or modify
 
4
# it under the terms of the GNU General Public License as published by
 
5
# the Free Software Foundation; either version 2 of the License, or
 
6
# (at your option) any later version.
 
7
#
 
8
# This program is distributed in the hope that it will be useful,
 
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
11
# GNU General Public License for more details.
 
12
#
 
13
# You should have received a copy of the GNU General Public License
 
14
# along with this program; if not, write to the Free Software
 
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 
16
 
 
17
"""Tests for group compression."""
 
18
 
 
19
import zlib
 
20
 
 
21
from bzrlib import (
 
22
    btree_index,
 
23
    groupcompress,
 
24
    errors,
 
25
    index as _mod_index,
 
26
    osutils,
 
27
    tests,
 
28
    versionedfile,
 
29
    )
 
30
from bzrlib.osutils import sha_string
 
31
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
 
32
 
 
33
 
 
34
def load_tests(standard_tests, module, loader):
 
35
    """Parameterize tests for all versions of groupcompress."""
 
36
    to_adapt, result = tests.split_suite_by_condition(
 
37
        standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
 
38
    scenarios = [
 
39
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
 
40
        ]
 
41
    if CompiledGroupCompressFeature.available():
 
42
        scenarios.append(('C',
 
43
            {'compressor': groupcompress.PyrexGroupCompressor}))
 
44
    return tests.multiply_tests(to_adapt, scenarios, result)
 
45
 
 
46
 
 
47
class TestGroupCompressor(tests.TestCase):
 
48
 
 
49
    def _chunks_to_repr_lines(self, chunks):
 
50
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
 
51
 
 
52
    def assertEqualDiffEncoded(self, expected, actual):
 
53
        """Compare the actual content to the expected content.
 
54
 
 
55
        :param expected: A group of chunks that we expect to see
 
56
        :param actual: The measured 'chunks'
 
57
 
 
58
        We will transform the chunks back into lines, and then run 'repr()'
 
59
        over them to handle non-ascii characters.
 
60
        """
 
61
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
 
62
                             self._chunks_to_repr_lines(actual))
 
63
 
 
64
 
 
65
class TestAllGroupCompressors(TestGroupCompressor):
 
66
    """Tests for GroupCompressor"""
 
67
 
 
68
    compressor = None # Set by multiply_tests
 
69
 
 
70
    def test_empty_delta(self):
 
71
        compressor = self.compressor()
 
72
        self.assertEqual([], compressor.chunks)
 
73
 
 
74
    def test_one_nosha_delta(self):
 
75
        # diff against NUKK
 
76
        compressor = self.compressor()
 
77
        sha1, start_point, end_point, _ = compressor.compress(('label',),
 
78
            'strange\ncommon\n', None)
 
79
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
 
80
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
 
81
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
 
82
        self.assertEqual(0, start_point)
 
83
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
84
 
 
85
    def test_empty_content(self):
 
86
        compressor = self.compressor()
 
87
        # Adding empty bytes should return the 'null' record
 
88
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
 
89
                                                                 '', None)
 
90
        self.assertEqual(0, start_point)
 
91
        self.assertEqual(0, end_point)
 
92
        self.assertEqual('fulltext', kind)
 
93
        self.assertEqual(groupcompress._null_sha1, sha1)
 
94
        self.assertEqual(0, compressor.endpoint)
 
95
        self.assertEqual([], compressor.chunks)
 
96
        # Even after adding some content
 
97
        compressor.compress(('content',), 'some\nbytes\n', None)
 
98
        self.assertTrue(compressor.endpoint > 0)
 
99
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
 
100
                                                                 '', None)
 
101
        self.assertEqual(0, start_point)
 
102
        self.assertEqual(0, end_point)
 
103
        self.assertEqual('fulltext', kind)
 
104
        self.assertEqual(groupcompress._null_sha1, sha1)
 
105
 
 
106
    def test_extract_from_compressor(self):
 
107
        # Knit fetching will try to reconstruct texts locally which results in
 
108
        # reading something that is in the compressor stream already.
 
109
        compressor = self.compressor()
 
110
        sha1_1, _, _, _ = compressor.compress(('label',),
 
111
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
112
        expected_lines = list(compressor.chunks)
 
113
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
 
114
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
115
        # get the first out
 
116
        self.assertEqual(('strange\ncommon long line\n'
 
117
                          'that needs a 16 byte match\n', sha1_1),
 
118
                         compressor.extract(('label',)))
 
119
        # and the second
 
120
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
 
121
                          'different\n', sha1_2),
 
122
                         compressor.extract(('newlabel',)))
 
123
 
 
124
    def test_pop_last(self):
 
125
        compressor = self.compressor()
 
126
        _, _, _, _ = compressor.compress(('key1',),
 
127
            'some text\nfor the first entry\n', None)
 
128
        expected_lines = list(compressor.chunks)
 
129
        _, _, _, _ = compressor.compress(('key2',),
 
130
            'some text\nfor the second entry\n', None)
 
131
        compressor.pop_last()
 
132
        self.assertEqual(expected_lines, compressor.chunks)
 
133
 
 
134
 
 
135
class TestPyrexGroupCompressor(TestGroupCompressor):
 
136
 
 
137
    _test_needs_features = [CompiledGroupCompressFeature]
 
138
    compressor = groupcompress.PyrexGroupCompressor
 
139
 
 
140
    def test_stats(self):
 
141
        compressor = self.compressor()
 
142
        compressor.compress(('label',),
 
143
                            'strange\n'
 
144
                            'common very very long line\n'
 
145
                            'plus more text\n', None)
 
146
        compressor.compress(('newlabel',),
 
147
                            'common very very long line\n'
 
148
                            'plus more text\n'
 
149
                            'different\n'
 
150
                            'moredifferent\n', None)
 
151
        compressor.compress(('label3',),
 
152
                            'new\n'
 
153
                            'common very very long line\n'
 
154
                            'plus more text\n'
 
155
                            'different\n'
 
156
                            'moredifferent\n', None)
 
157
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
 
158
 
 
159
    def test_two_nosha_delta(self):
 
160
        compressor = self.compressor()
 
161
        sha1_1, _, _, _ = compressor.compress(('label',),
 
162
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
163
        expected_lines = list(compressor.chunks)
 
164
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
 
165
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
166
        self.assertEqual(sha_string('common long line\n'
 
167
                                    'that needs a 16 byte match\n'
 
168
                                    'different\n'), sha1_2)
 
169
        expected_lines.extend([
 
170
            # 'delta', delta length
 
171
            'd\x0f',
 
172
            # source and target length
 
173
            '\x36',
 
174
            # copy the line common
 
175
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
176
            # add the line different, and the trailing newline
 
177
            '\x0adifferent\n', # insert 10 bytes
 
178
            ])
 
179
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
180
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
181
 
 
182
    def test_three_nosha_delta(self):
 
183
        # The first interesting test: make a change that should use lines from
 
184
        # both parents.
 
185
        compressor = self.compressor()
 
186
        sha1_1, _, _, _ = compressor.compress(('label',),
 
187
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
188
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
 
189
            'different\nmoredifferent\nand then some more\n', None)
 
190
        expected_lines = list(compressor.chunks)
 
191
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
 
192
            'new\ncommon very very long line\nwith some extra text\n'
 
193
            'different\nmoredifferent\nand then some more\n',
 
194
            None)
 
195
        self.assertEqual(
 
196
            sha_string('new\ncommon very very long line\nwith some extra text\n'
 
197
                       'different\nmoredifferent\nand then some more\n'),
 
198
            sha1_3)
 
199
        expected_lines.extend([
 
200
            # 'delta', delta length
 
201
            'd\x0b',
 
202
            # source and target length
 
203
            '\x5f'
 
204
            # insert new
 
205
            '\x03new',
 
206
            # Copy of first parent 'common' range
 
207
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
 
208
            # Copy of second parent 'different' range
 
209
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
210
            ])
 
211
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
212
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
213
 
 
214
 
 
215
class TestPythonGroupCompressor(TestGroupCompressor):
 
216
 
 
217
    compressor = groupcompress.PythonGroupCompressor
 
218
 
 
219
    def test_stats(self):
 
220
        compressor = self.compressor()
 
221
        compressor.compress(('label',),
 
222
                            'strange\n'
 
223
                            'common very very long line\n'
 
224
                            'plus more text\n', None)
 
225
        compressor.compress(('newlabel',),
 
226
                            'common very very long line\n'
 
227
                            'plus more text\n'
 
228
                            'different\n'
 
229
                            'moredifferent\n', None)
 
230
        compressor.compress(('label3',),
 
231
                            'new\n'
 
232
                            'common very very long line\n'
 
233
                            'plus more text\n'
 
234
                            'different\n'
 
235
                            'moredifferent\n', None)
 
236
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
 
237
 
 
238
    def test_two_nosha_delta(self):
 
239
        compressor = self.compressor()
 
240
        sha1_1, _, _, _ = compressor.compress(('label',),
 
241
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
242
        expected_lines = list(compressor.chunks)
 
243
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
 
244
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
245
        self.assertEqual(sha_string('common long line\n'
 
246
                                    'that needs a 16 byte match\n'
 
247
                                    'different\n'), sha1_2)
 
248
        expected_lines.extend([
 
249
            # 'delta', delta length
 
250
            'd\x0f',
 
251
            # target length
 
252
            '\x36',
 
253
            # copy the line common
 
254
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
255
            # add the line different, and the trailing newline
 
256
            '\x0adifferent\n', # insert 10 bytes
 
257
            ])
 
258
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
259
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
260
 
 
261
    def test_three_nosha_delta(self):
 
262
        # The first interesting test: make a change that should use lines from
 
263
        # both parents.
 
264
        compressor = self.compressor()
 
265
        sha1_1, _, _, _ = compressor.compress(('label',),
 
266
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
267
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
 
268
            'different\nmoredifferent\nand then some more\n', None)
 
269
        expected_lines = list(compressor.chunks)
 
270
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
 
271
            'new\ncommon very very long line\nwith some extra text\n'
 
272
            'different\nmoredifferent\nand then some more\n',
 
273
            None)
 
274
        self.assertEqual(
 
275
            sha_string('new\ncommon very very long line\nwith some extra text\n'
 
276
                       'different\nmoredifferent\nand then some more\n'),
 
277
            sha1_3)
 
278
        expected_lines.extend([
 
279
            # 'delta', delta length
 
280
            'd\x0c',
 
281
            # target length
 
282
            '\x5f'
 
283
            # insert new
 
284
            '\x04new\n',
 
285
            # Copy of first parent 'common' range
 
286
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
 
287
            # Copy of second parent 'different' range
 
288
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
289
            ])
 
290
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
291
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
292
 
 
293
 
 
294
class TestGroupCompressBlock(tests.TestCase):
 
295
 
 
296
    def make_block(self, key_to_text):
 
297
        """Create a GroupCompressBlock, filling it with the given texts."""
 
298
        compressor = groupcompress.GroupCompressor()
 
299
        start = 0
 
300
        for key in sorted(key_to_text):
 
301
            compressor.compress(key, key_to_text[key], None)
 
302
        locs = dict((key, (start, end)) for key, (start, _, end, _)
 
303
                    in compressor.labels_deltas.iteritems())
 
304
        block = compressor.flush()
 
305
        raw_bytes = block.to_bytes()
 
306
        # Go through from_bytes(to_bytes()) so that we start with a compressed
 
307
        # content object
 
308
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
 
309
 
 
310
    def test_from_empty_bytes(self):
 
311
        self.assertRaises(ValueError,
 
312
                          groupcompress.GroupCompressBlock.from_bytes, '')
 
313
 
 
314
    def test_from_minimal_bytes(self):
 
315
        block = groupcompress.GroupCompressBlock.from_bytes(
 
316
            'gcb1z\n0\n0\n')
 
317
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
 
318
        self.assertIs(None, block._content)
 
319
        self.assertEqual('', block._z_content)
 
320
        block._ensure_content()
 
321
        self.assertEqual('', block._content)
 
322
        self.assertEqual('', block._z_content)
 
323
        block._ensure_content() # Ensure content is safe to call 2x
 
324
 
 
325
    def test_from_invalid(self):
 
326
        self.assertRaises(ValueError,
 
327
                          groupcompress.GroupCompressBlock.from_bytes,
 
328
                          'this is not a valid header')
 
329
 
 
330
    def test_from_bytes(self):
 
331
        content = ('a tiny bit of content\n')
 
332
        z_content = zlib.compress(content)
 
333
        z_bytes = (
 
334
            'gcb1z\n' # group compress block v1 plain
 
335
            '%d\n' # Length of compressed content
 
336
            '%d\n' # Length of uncompressed content
 
337
            '%s'   # Compressed content
 
338
            ) % (len(z_content), len(content), z_content)
 
339
        block = groupcompress.GroupCompressBlock.from_bytes(
 
340
            z_bytes)
 
341
        self.assertEqual(z_content, block._z_content)
 
342
        self.assertIs(None, block._content)
 
343
        self.assertEqual(len(z_content), block._z_content_length)
 
344
        self.assertEqual(len(content), block._content_length)
 
345
        block._ensure_content()
 
346
        self.assertEqual(z_content, block._z_content)
 
347
        self.assertEqual(content, block._content)
 
348
 
 
349
    def test_to_bytes(self):
 
350
        content = ('this is some content\n'
 
351
                   'this content will be compressed\n')
 
352
        gcb = groupcompress.GroupCompressBlock()
 
353
        gcb.set_content(content)
 
354
        bytes = gcb.to_bytes()
 
355
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
 
356
        self.assertEqual(gcb._content_length, len(content))
 
357
        expected_header =('gcb1z\n' # group compress block v1 zlib
 
358
                          '%d\n' # Length of compressed content
 
359
                          '%d\n' # Length of uncompressed content
 
360
                         ) % (gcb._z_content_length, gcb._content_length)
 
361
        self.assertStartsWith(bytes, expected_header)
 
362
        remaining_bytes = bytes[len(expected_header):]
 
363
        raw_bytes = zlib.decompress(remaining_bytes)
 
364
        self.assertEqual(content, raw_bytes)
 
365
 
 
366
    def test_partial_decomp(self):
 
367
        content_chunks = []
 
368
        # We need a sufficient amount of data so that zlib.decompress has
 
369
        # partial decompression to work with. Most auto-generated data
 
370
        # compresses a bit too well, we want a combination, so we combine a sha
 
371
        # hash with compressible data.
 
372
        for i in xrange(2048):
 
373
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
374
            content_chunks.append(next_content)
 
375
            next_sha1 = osutils.sha_string(next_content)
 
376
            content_chunks.append(next_sha1 + '\n')
 
377
        content = ''.join(content_chunks)
 
378
        self.assertEqual(158634, len(content))
 
379
        z_content = zlib.compress(content)
 
380
        self.assertEqual(57182, len(z_content))
 
381
        block = groupcompress.GroupCompressBlock()
 
382
        block._z_content = z_content
 
383
        block._z_content_length = len(z_content)
 
384
        block._compressor_name = 'zlib'
 
385
        block._content_length = 158634
 
386
        self.assertIs(None, block._content)
 
387
        block._ensure_content(100)
 
388
        self.assertIsNot(None, block._content)
 
389
        # We have decompressed at least 100 bytes
 
390
        self.assertTrue(len(block._content) >= 100)
 
391
        # We have not decompressed the whole content
 
392
        self.assertTrue(len(block._content) < 158634)
 
393
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
394
        # ensuring content that we already have shouldn't cause any more data
 
395
        # to be extracted
 
396
        cur_len = len(block._content)
 
397
        block._ensure_content(cur_len - 10)
 
398
        self.assertEqual(cur_len, len(block._content))
 
399
        # Now we want a bit more content
 
400
        cur_len += 10
 
401
        block._ensure_content(cur_len)
 
402
        self.assertTrue(len(block._content) >= cur_len)
 
403
        self.assertTrue(len(block._content) < 158634)
 
404
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
405
        # And now lets finish
 
406
        block._ensure_content(158634)
 
407
        self.assertEqualDiff(content, block._content)
 
408
        # And the decompressor is finalized
 
409
        self.assertIs(None, block._z_content_decompressor)
 
410
 
 
411
    def test_partial_decomp_no_known_length(self):
 
412
        content_chunks = []
 
413
        for i in xrange(2048):
 
414
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
415
            content_chunks.append(next_content)
 
416
            next_sha1 = osutils.sha_string(next_content)
 
417
            content_chunks.append(next_sha1 + '\n')
 
418
        content = ''.join(content_chunks)
 
419
        self.assertEqual(158634, len(content))
 
420
        z_content = zlib.compress(content)
 
421
        self.assertEqual(57182, len(z_content))
 
422
        block = groupcompress.GroupCompressBlock()
 
423
        block._z_content = z_content
 
424
        block._z_content_length = len(z_content)
 
425
        block._compressor_name = 'zlib'
 
426
        block._content_length = None # Don't tell the decompressed length
 
427
        self.assertIs(None, block._content)
 
428
        block._ensure_content(100)
 
429
        self.assertIsNot(None, block._content)
 
430
        # We have decompressed at least 100 bytes
 
431
        self.assertTrue(len(block._content) >= 100)
 
432
        # We have not decompressed the whole content
 
433
        self.assertTrue(len(block._content) < 158634)
 
434
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
435
        # ensuring content that we already have shouldn't cause any more data
 
436
        # to be extracted
 
437
        cur_len = len(block._content)
 
438
        block._ensure_content(cur_len - 10)
 
439
        self.assertEqual(cur_len, len(block._content))
 
440
        # Now we want a bit more content
 
441
        cur_len += 10
 
442
        block._ensure_content(cur_len)
 
443
        self.assertTrue(len(block._content) >= cur_len)
 
444
        self.assertTrue(len(block._content) < 158634)
 
445
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
446
        # And now lets finish
 
447
        block._ensure_content()
 
448
        self.assertEqualDiff(content, block._content)
 
449
        # And the decompressor is finalized
 
450
        self.assertIs(None, block._z_content_decompressor)
 
451
 
 
452
    def test__dump(self):
 
453
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
 
454
        key_to_text = {('1',): dup_content + '1 unique\n',
 
455
                       ('2',): dup_content + '2 extra special\n'}
 
456
        locs, block = self.make_block(key_to_text)
 
457
        self.assertEqual([('f', len(key_to_text[('1',)])),
 
458
                          ('d', 21, len(key_to_text[('2',)]),
 
459
                           [('c', 2, len(dup_content)),
 
460
                            ('i', len('2 extra special\n'), '')
 
461
                           ]),
 
462
                         ], block._dump())
 
463
 
 
464
 
 
465
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
 
466
 
 
467
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
 
468
                     dir='.'):
 
469
        t = self.get_transport(dir)
 
470
        t.ensure_base()
 
471
        vf = groupcompress.make_pack_factory(graph=create_graph,
 
472
            delta=False, keylength=keylength)(t)
 
473
        if do_cleanup:
 
474
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
 
475
        return vf
 
476
 
 
477
 
 
478
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
 
479
 
 
480
    def make_g_index(self, name, ref_lists=0, nodes=[]):
 
481
        builder = btree_index.BTreeBuilder(ref_lists)
 
482
        for node, references, value in nodes:
 
483
            builder.add_node(node, references, value)
 
484
        stream = builder.finish()
 
485
        trans = self.get_transport()
 
486
        size = trans.put_file(name, stream)
 
487
        return btree_index.BTreeGraphIndex(trans, name, size)
 
488
 
 
489
    def make_g_index_missing_parent(self):
 
490
        graph_index = self.make_g_index('missing_parent', 1,
 
491
            [(('parent', ), '2 78 2 10', ([],)),
 
492
             (('tip', ), '2 78 2 10',
 
493
              ([('parent', ), ('missing-parent', )],)),
 
494
              ])
 
495
        return graph_index
 
496
 
 
497
    def test_get_record_stream_as_requested(self):
 
498
        # Consider promoting 'as-requested' to general availability, and
 
499
        # make this a VF interface test
 
500
        vf = self.make_test_vf(False, dir='source')
 
501
        vf.add_lines(('a',), (), ['lines\n'])
 
502
        vf.add_lines(('b',), (), ['lines\n'])
 
503
        vf.add_lines(('c',), (), ['lines\n'])
 
504
        vf.add_lines(('d',), (), ['lines\n'])
 
505
        vf.writer.end()
 
506
        keys = [record.key for record in vf.get_record_stream(
 
507
                    [('a',), ('b',), ('c',), ('d',)],
 
508
                    'as-requested', False)]
 
509
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
510
        keys = [record.key for record in vf.get_record_stream(
 
511
                    [('b',), ('a',), ('d',), ('c',)],
 
512
                    'as-requested', False)]
 
513
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
514
 
 
515
        # It should work even after being repacked into another VF
 
516
        vf2 = self.make_test_vf(False, dir='target')
 
517
        vf2.insert_record_stream(vf.get_record_stream(
 
518
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
 
519
        vf2.writer.end()
 
520
 
 
521
        keys = [record.key for record in vf2.get_record_stream(
 
522
                    [('a',), ('b',), ('c',), ('d',)],
 
523
                    'as-requested', False)]
 
524
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
525
        keys = [record.key for record in vf2.get_record_stream(
 
526
                    [('b',), ('a',), ('d',), ('c',)],
 
527
                    'as-requested', False)]
 
528
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
529
 
 
530
    def test_insert_record_stream_re_uses_blocks(self):
 
531
        vf = self.make_test_vf(True, dir='source')
 
532
        def grouped_stream(revision_ids, first_parents=()):
 
533
            parents = first_parents
 
534
            for revision_id in revision_ids:
 
535
                key = (revision_id,)
 
536
                record = versionedfile.FulltextContentFactory(
 
537
                    key, parents, None,
 
538
                    'some content that is\n'
 
539
                    'identical except for\n'
 
540
                    'revision_id:%s\n' % (revision_id,))
 
541
                yield record
 
542
                parents = (key,)
 
543
        # One group, a-d
 
544
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
545
        # Second group, e-h
 
546
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
547
                                               first_parents=(('d',),)))
 
548
        block_bytes = {}
 
549
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
 
550
                                      'unordered', False)
 
551
        num_records = 0
 
552
        for record in stream:
 
553
            if record.key in [('a',), ('e',)]:
 
554
                self.assertEqual('groupcompress-block', record.storage_kind)
 
555
            else:
 
556
                self.assertEqual('groupcompress-block-ref',
 
557
                                 record.storage_kind)
 
558
            block_bytes[record.key] = record._manager._block._z_content
 
559
            num_records += 1
 
560
        self.assertEqual(8, num_records)
 
561
        for r in 'abcd':
 
562
            key = (r,)
 
563
            self.assertIs(block_bytes[key], block_bytes[('a',)])
 
564
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
 
565
        for r in 'efgh':
 
566
            key = (r,)
 
567
            self.assertIs(block_bytes[key], block_bytes[('e',)])
 
568
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
 
569
        # Now copy the blocks into another vf, and ensure that the blocks are
 
570
        # preserved without creating new entries
 
571
        vf2 = self.make_test_vf(True, dir='target')
 
572
        # ordering in 'groupcompress' order, should actually swap the groups in
 
573
        # the target vf, but the groups themselves should not be disturbed.
 
574
        vf2.insert_record_stream(vf.get_record_stream(
 
575
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
 
576
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
577
                                       'groupcompress', False)
 
578
        vf2.writer.end()
 
579
        num_records = 0
 
580
        for record in stream:
 
581
            num_records += 1
 
582
            self.assertEqual(block_bytes[record.key],
 
583
                             record._manager._block._z_content)
 
584
        self.assertEqual(8, num_records)
 
585
 
 
586
    def test__insert_record_stream_no_reuse_block(self):
 
587
        vf = self.make_test_vf(True, dir='source')
 
588
        def grouped_stream(revision_ids, first_parents=()):
 
589
            parents = first_parents
 
590
            for revision_id in revision_ids:
 
591
                key = (revision_id,)
 
592
                record = versionedfile.FulltextContentFactory(
 
593
                    key, parents, None,
 
594
                    'some content that is\n'
 
595
                    'identical except for\n'
 
596
                    'revision_id:%s\n' % (revision_id,))
 
597
                yield record
 
598
                parents = (key,)
 
599
        # One group, a-d
 
600
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
601
        # Second group, e-h
 
602
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
603
                                               first_parents=(('d',),)))
 
604
        vf.writer.end()
 
605
        self.assertEqual(8, len(list(vf.get_record_stream(
 
606
                                        [(r,) for r in 'abcdefgh'],
 
607
                                        'unordered', False))))
 
608
        # Now copy the blocks into another vf, and ensure that the blocks are
 
609
        # preserved without creating new entries
 
610
        vf2 = self.make_test_vf(True, dir='target')
 
611
        # ordering in 'groupcompress' order, should actually swap the groups in
 
612
        # the target vf, but the groups themselves should not be disturbed.
 
613
        list(vf2._insert_record_stream(vf.get_record_stream(
 
614
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
 
615
            reuse_blocks=False))
 
616
        vf2.writer.end()
 
617
        # After inserting with reuse_blocks=False, we should have everything in
 
618
        # a single new block.
 
619
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
620
                                       'groupcompress', False)
 
621
        block = None
 
622
        for record in stream:
 
623
            if block is None:
 
624
                block = record._manager._block
 
625
            else:
 
626
                self.assertIs(block, record._manager._block)
 
627
 
 
628
    def test_add_missing_noncompression_parent_unvalidated_index(self):
 
629
        unvalidated = self.make_g_index_missing_parent()
 
630
        combined = _mod_index.CombinedGraphIndex([unvalidated])
 
631
        index = groupcompress._GCGraphIndex(combined,
 
632
            is_locked=lambda: True, parents=True,
 
633
            track_external_parent_refs=True)
 
634
        index.scan_unvalidated_index(unvalidated)
 
635
        self.assertEqual(
 
636
            frozenset([('missing-parent',)]), index.get_missing_parents())
 
637
 
 
638
    def test_track_external_parent_refs(self):
 
639
        g_index = self.make_g_index('empty', 1, [])
 
640
        mod_index = btree_index.BTreeBuilder(1, 1)
 
641
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
 
642
        index = groupcompress._GCGraphIndex(combined,
 
643
            is_locked=lambda: True, parents=True,
 
644
            add_callback=mod_index.add_nodes,
 
645
            track_external_parent_refs=True)
 
646
        index.add_records([
 
647
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
 
648
        self.assertEqual(
 
649
            frozenset([('parent-1',), ('parent-2',)]),
 
650
            index.get_missing_parents())
 
651
 
 
652
 
 
653
class TestLazyGroupCompress(tests.TestCaseWithTransport):
 
654
 
 
655
    _texts = {
 
656
        ('key1',): "this is a text\n"
 
657
                   "with a reasonable amount of compressible bytes\n",
 
658
        ('key2',): "another text\n"
 
659
                   "with a reasonable amount of compressible bytes\n",
 
660
        ('key3',): "yet another text which won't be extracted\n"
 
661
                   "with a reasonable amount of compressible bytes\n",
 
662
        ('key4',): "this will be extracted\n"
 
663
                   "but references most of its bytes from\n"
 
664
                   "yet another text which won't be extracted\n"
 
665
                   "with a reasonable amount of compressible bytes\n",
 
666
    }
 
667
    def make_block(self, key_to_text):
 
668
        """Create a GroupCompressBlock, filling it with the given texts."""
 
669
        compressor = groupcompress.GroupCompressor()
 
670
        start = 0
 
671
        for key in sorted(key_to_text):
 
672
            compressor.compress(key, key_to_text[key], None)
 
673
        locs = dict((key, (start, end)) for key, (start, _, end, _)
 
674
                    in compressor.labels_deltas.iteritems())
 
675
        block = compressor.flush()
 
676
        raw_bytes = block.to_bytes()
 
677
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
 
678
 
 
679
    def add_key_to_manager(self, key, locations, block, manager):
 
680
        start, end = locations[key]
 
681
        manager.add_factory(key, (), start, end)
 
682
 
 
683
    def test_get_fulltexts(self):
 
684
        locations, block = self.make_block(self._texts)
 
685
        manager = groupcompress._LazyGroupContentManager(block)
 
686
        self.add_key_to_manager(('key1',), locations, block, manager)
 
687
        self.add_key_to_manager(('key2',), locations, block, manager)
 
688
        result_order = []
 
689
        for record in manager.get_record_stream():
 
690
            result_order.append(record.key)
 
691
            text = self._texts[record.key]
 
692
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
693
        self.assertEqual([('key1',), ('key2',)], result_order)
 
694
 
 
695
        # If we build the manager in the opposite order, we should get them
 
696
        # back in the opposite order
 
697
        manager = groupcompress._LazyGroupContentManager(block)
 
698
        self.add_key_to_manager(('key2',), locations, block, manager)
 
699
        self.add_key_to_manager(('key1',), locations, block, manager)
 
700
        result_order = []
 
701
        for record in manager.get_record_stream():
 
702
            result_order.append(record.key)
 
703
            text = self._texts[record.key]
 
704
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
705
        self.assertEqual([('key2',), ('key1',)], result_order)
 
706
 
 
707
    def test__wire_bytes_no_keys(self):
 
708
        locations, block = self.make_block(self._texts)
 
709
        manager = groupcompress._LazyGroupContentManager(block)
 
710
        wire_bytes = manager._wire_bytes()
 
711
        block_length = len(block.to_bytes())
 
712
        # We should have triggered a strip, since we aren't using any content
 
713
        stripped_block = manager._block.to_bytes()
 
714
        self.assertTrue(block_length > len(stripped_block))
 
715
        empty_z_header = zlib.compress('')
 
716
        self.assertEqual('groupcompress-block\n'
 
717
                         '8\n' # len(compress(''))
 
718
                         '0\n' # len('')
 
719
                         '%d\n'# compressed block len
 
720
                         '%s'  # zheader
 
721
                         '%s'  # block
 
722
                         % (len(stripped_block), empty_z_header,
 
723
                            stripped_block),
 
724
                         wire_bytes)
 
725
 
 
726
    def test__wire_bytes(self):
 
727
        locations, block = self.make_block(self._texts)
 
728
        manager = groupcompress._LazyGroupContentManager(block)
 
729
        self.add_key_to_manager(('key1',), locations, block, manager)
 
730
        self.add_key_to_manager(('key4',), locations, block, manager)
 
731
        block_bytes = block.to_bytes()
 
732
        wire_bytes = manager._wire_bytes()
 
733
        (storage_kind, z_header_len, header_len,
 
734
         block_len, rest) = wire_bytes.split('\n', 4)
 
735
        z_header_len = int(z_header_len)
 
736
        header_len = int(header_len)
 
737
        block_len = int(block_len)
 
738
        self.assertEqual('groupcompress-block', storage_kind)
 
739
        self.assertEqual(33, z_header_len)
 
740
        self.assertEqual(25, header_len)
 
741
        self.assertEqual(len(block_bytes), block_len)
 
742
        z_header = rest[:z_header_len]
 
743
        header = zlib.decompress(z_header)
 
744
        self.assertEqual(header_len, len(header))
 
745
        entry1 = locations[('key1',)]
 
746
        entry4 = locations[('key4',)]
 
747
        self.assertEqualDiff('key1\n'
 
748
                             '\n'  # no parents
 
749
                             '%d\n' # start offset
 
750
                             '%d\n' # end offset
 
751
                             'key4\n'
 
752
                             '\n'
 
753
                             '%d\n'
 
754
                             '%d\n'
 
755
                             % (entry1[0], entry1[1],
 
756
                                entry4[0], entry4[1]),
 
757
                            header)
 
758
        z_block = rest[z_header_len:]
 
759
        self.assertEqual(block_bytes, z_block)
 
760
 
 
761
    def test_from_bytes(self):
 
762
        locations, block = self.make_block(self._texts)
 
763
        manager = groupcompress._LazyGroupContentManager(block)
 
764
        self.add_key_to_manager(('key1',), locations, block, manager)
 
765
        self.add_key_to_manager(('key4',), locations, block, manager)
 
766
        wire_bytes = manager._wire_bytes()
 
767
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
 
768
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
 
769
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
 
770
        self.assertEqual(2, len(manager._factories))
 
771
        self.assertEqual(block._z_content, manager._block._z_content)
 
772
        result_order = []
 
773
        for record in manager.get_record_stream():
 
774
            result_order.append(record.key)
 
775
            text = self._texts[record.key]
 
776
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
777
        self.assertEqual([('key1',), ('key4',)], result_order)
 
778
 
 
779
    def test__check_rebuild_no_changes(self):
 
780
        locations, block = self.make_block(self._texts)
 
781
        manager = groupcompress._LazyGroupContentManager(block)
 
782
        # Request all the keys, which ensures that we won't rebuild
 
783
        self.add_key_to_manager(('key1',), locations, block, manager)
 
784
        self.add_key_to_manager(('key2',), locations, block, manager)
 
785
        self.add_key_to_manager(('key3',), locations, block, manager)
 
786
        self.add_key_to_manager(('key4',), locations, block, manager)
 
787
        manager._check_rebuild_block()
 
788
        self.assertIs(block, manager._block)
 
789
 
 
790
    def test__check_rebuild_only_one(self):
 
791
        locations, block = self.make_block(self._texts)
 
792
        manager = groupcompress._LazyGroupContentManager(block)
 
793
        # Request just the first key, which should trigger a 'strip' action
 
794
        self.add_key_to_manager(('key1',), locations, block, manager)
 
795
        manager._check_rebuild_block()
 
796
        self.assertIsNot(block, manager._block)
 
797
        self.assertTrue(block._content_length > manager._block._content_length)
 
798
        # We should be able to still get the content out of this block, though
 
799
        # it should only have 1 entry
 
800
        for record in manager.get_record_stream():
 
801
            self.assertEqual(('key1',), record.key)
 
802
            self.assertEqual(self._texts[record.key],
 
803
                             record.get_bytes_as('fulltext'))
 
804
 
 
805
    def test__check_rebuild_middle(self):
 
806
        locations, block = self.make_block(self._texts)
 
807
        manager = groupcompress._LazyGroupContentManager(block)
 
808
        # Request a small key in the middle should trigger a 'rebuild'
 
809
        self.add_key_to_manager(('key4',), locations, block, manager)
 
810
        manager._check_rebuild_block()
 
811
        self.assertIsNot(block, manager._block)
 
812
        self.assertTrue(block._content_length > manager._block._content_length)
 
813
        for record in manager.get_record_stream():
 
814
            self.assertEqual(('key4',), record.key)
 
815
            self.assertEqual(self._texts[record.key],
 
816
                             record.get_bytes_as('fulltext'))