~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

  • Committer: Robert Collins
  • Author(s): Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
  • Date: 2009-04-07 05:42:28 UTC
  • mto: This revision was merged to the branch mainline in revision 4261.
  • Revision ID: robertc@robertcollins.net-20090407054228-zslrfatxy9nw231i
Groupcompress from brisbane-core.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# Copyright (C) 2008, 2009 Canonical Ltd
 
2
#
 
3
# This program is free software; you can redistribute it and/or modify
 
4
# it under the terms of the GNU General Public License as published by
 
5
# the Free Software Foundation; either version 2 of the License, or
 
6
# (at your option) any later version.
 
7
#
 
8
# This program is distributed in the hope that it will be useful,
 
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
11
# GNU General Public License for more details.
 
12
#
 
13
# You should have received a copy of the GNU General Public License
 
14
# along with this program; if not, write to the Free Software
 
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 
16
 
 
17
"""Tests for group compression."""
 
18
 
 
19
import zlib
 
20
 
 
21
from bzrlib import (
 
22
    groupcompress,
 
23
    errors,
 
24
    osutils,
 
25
    tests,
 
26
    versionedfile,
 
27
    )
 
28
from bzrlib.osutils import sha_string
 
29
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
 
30
 
 
31
 
 
32
def load_tests(standard_tests, module, loader):
 
33
    """Parameterize tests for all versions of groupcompress."""
 
34
    to_adapt, result = tests.split_suite_by_condition(
 
35
        standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
 
36
    scenarios = [
 
37
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
 
38
        ]
 
39
    if CompiledGroupCompressFeature.available():
 
40
        scenarios.append(('C',
 
41
            {'compressor': groupcompress.PyrexGroupCompressor}))
 
42
    return tests.multiply_tests(to_adapt, scenarios, result)
 
43
 
 
44
 
 
45
class TestGroupCompressor(tests.TestCase):
 
46
 
 
47
    def _chunks_to_repr_lines(self, chunks):
 
48
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
 
49
 
 
50
    def assertEqualDiffEncoded(self, expected, actual):
 
51
        """Compare the actual content to the expected content.
 
52
 
 
53
        :param expected: A group of chunks that we expect to see
 
54
        :param actual: The measured 'chunks'
 
55
 
 
56
        We will transform the chunks back into lines, and then run 'repr()'
 
57
        over them to handle non-ascii characters.
 
58
        """
 
59
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
 
60
                             self._chunks_to_repr_lines(actual))
 
61
 
 
62
 
 
63
class TestAllGroupCompressors(TestGroupCompressor):
 
64
    """Tests for GroupCompressor"""
 
65
 
 
66
    compressor = None # Set by multiply_tests
 
67
 
 
68
    def test_empty_delta(self):
 
69
        compressor = self.compressor()
 
70
        self.assertEqual([], compressor.chunks)
 
71
 
 
72
    def test_one_nosha_delta(self):
 
73
        # diff against NUKK
 
74
        compressor = self.compressor()
 
75
        sha1, start_point, end_point, _ = compressor.compress(('label',),
 
76
            'strange\ncommon\n', None)
 
77
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
 
78
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
 
79
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
 
80
        self.assertEqual(0, start_point)
 
81
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
82
 
 
83
    def test_empty_content(self):
 
84
        compressor = self.compressor()
 
85
        # Adding empty bytes should return the 'null' record
 
86
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
 
87
                                                                 '', None)
 
88
        self.assertEqual(0, start_point)
 
89
        self.assertEqual(0, end_point)
 
90
        self.assertEqual('fulltext', kind)
 
91
        self.assertEqual(groupcompress._null_sha1, sha1)
 
92
        self.assertEqual(0, compressor.endpoint)
 
93
        self.assertEqual([], compressor.chunks)
 
94
        # Even after adding some content
 
95
        compressor.compress(('content',), 'some\nbytes\n', None)
 
96
        self.assertTrue(compressor.endpoint > 0)
 
97
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
 
98
                                                                 '', None)
 
99
        self.assertEqual(0, start_point)
 
100
        self.assertEqual(0, end_point)
 
101
        self.assertEqual('fulltext', kind)
 
102
        self.assertEqual(groupcompress._null_sha1, sha1)
 
103
 
 
104
    def test_extract_from_compressor(self):
 
105
        # Knit fetching will try to reconstruct texts locally which results in
 
106
        # reading something that is in the compressor stream already.
 
107
        compressor = self.compressor()
 
108
        sha1_1, _, _, _ = compressor.compress(('label',),
 
109
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
110
        expected_lines = list(compressor.chunks)
 
111
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
 
112
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
113
        # get the first out
 
114
        self.assertEqual(('strange\ncommon long line\n'
 
115
                          'that needs a 16 byte match\n', sha1_1),
 
116
                         compressor.extract(('label',)))
 
117
        # and the second
 
118
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
 
119
                          'different\n', sha1_2),
 
120
                         compressor.extract(('newlabel',)))
 
121
 
 
122
 
 
123
class TestPyrexGroupCompressor(TestGroupCompressor):
 
124
 
 
125
    _test_needs_features = [CompiledGroupCompressFeature]
 
126
    compressor = groupcompress.PyrexGroupCompressor
 
127
 
 
128
    def test_stats(self):
 
129
        compressor = self.compressor()
 
130
        compressor.compress(('label',),
 
131
                            'strange\n'
 
132
                            'common very very long line\n'
 
133
                            'plus more text\n', None)
 
134
        compressor.compress(('newlabel',),
 
135
                            'common very very long line\n'
 
136
                            'plus more text\n'
 
137
                            'different\n'
 
138
                            'moredifferent\n', None)
 
139
        compressor.compress(('label3',),
 
140
                            'new\n'
 
141
                            'common very very long line\n'
 
142
                            'plus more text\n'
 
143
                            'different\n'
 
144
                            'moredifferent\n', None)
 
145
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
 
146
 
 
147
    def test_two_nosha_delta(self):
 
148
        compressor = self.compressor()
 
149
        sha1_1, _, _, _ = compressor.compress(('label',),
 
150
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
151
        expected_lines = list(compressor.chunks)
 
152
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
 
153
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
154
        self.assertEqual(sha_string('common long line\n'
 
155
                                    'that needs a 16 byte match\n'
 
156
                                    'different\n'), sha1_2)
 
157
        expected_lines.extend([
 
158
            # 'delta', delta length
 
159
            'd\x0f',
 
160
            # source and target length
 
161
            '\x36',
 
162
            # copy the line common
 
163
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
164
            # add the line different, and the trailing newline
 
165
            '\x0adifferent\n', # insert 10 bytes
 
166
            ])
 
167
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
168
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
169
 
 
170
    def test_three_nosha_delta(self):
 
171
        # The first interesting test: make a change that should use lines from
 
172
        # both parents.
 
173
        compressor = self.compressor()
 
174
        sha1_1, _, _, _ = compressor.compress(('label',),
 
175
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
176
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
 
177
            'different\nmoredifferent\nand then some more\n', None)
 
178
        expected_lines = list(compressor.chunks)
 
179
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
 
180
            'new\ncommon very very long line\nwith some extra text\n'
 
181
            'different\nmoredifferent\nand then some more\n',
 
182
            None)
 
183
        self.assertEqual(
 
184
            sha_string('new\ncommon very very long line\nwith some extra text\n'
 
185
                       'different\nmoredifferent\nand then some more\n'),
 
186
            sha1_3)
 
187
        expected_lines.extend([
 
188
            # 'delta', delta length
 
189
            'd\x0b',
 
190
            # source and target length
 
191
            '\x5f'
 
192
            # insert new
 
193
            '\x03new',
 
194
            # Copy of first parent 'common' range
 
195
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
 
196
            # Copy of second parent 'different' range
 
197
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
198
            ])
 
199
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
200
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
201
 
 
202
 
 
203
class TestPythonGroupCompressor(TestGroupCompressor):
 
204
 
 
205
    compressor = groupcompress.PythonGroupCompressor
 
206
 
 
207
    def test_stats(self):
 
208
        compressor = self.compressor()
 
209
        compressor.compress(('label',),
 
210
                            'strange\n'
 
211
                            'common very very long line\n'
 
212
                            'plus more text\n', None)
 
213
        compressor.compress(('newlabel',),
 
214
                            'common very very long line\n'
 
215
                            'plus more text\n'
 
216
                            'different\n'
 
217
                            'moredifferent\n', None)
 
218
        compressor.compress(('label3',),
 
219
                            'new\n'
 
220
                            'common very very long line\n'
 
221
                            'plus more text\n'
 
222
                            'different\n'
 
223
                            'moredifferent\n', None)
 
224
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
 
225
 
 
226
    def test_two_nosha_delta(self):
 
227
        compressor = self.compressor()
 
228
        sha1_1, _, _, _ = compressor.compress(('label',),
 
229
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
230
        expected_lines = list(compressor.chunks)
 
231
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
 
232
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
233
        self.assertEqual(sha_string('common long line\n'
 
234
                                    'that needs a 16 byte match\n'
 
235
                                    'different\n'), sha1_2)
 
236
        expected_lines.extend([
 
237
            # 'delta', delta length
 
238
            'd\x0f',
 
239
            # target length
 
240
            '\x36',
 
241
            # copy the line common
 
242
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
243
            # add the line different, and the trailing newline
 
244
            '\x0adifferent\n', # insert 10 bytes
 
245
            ])
 
246
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
247
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
248
 
 
249
    def test_three_nosha_delta(self):
 
250
        # The first interesting test: make a change that should use lines from
 
251
        # both parents.
 
252
        compressor = self.compressor()
 
253
        sha1_1, _, _, _ = compressor.compress(('label',),
 
254
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
255
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
 
256
            'different\nmoredifferent\nand then some more\n', None)
 
257
        expected_lines = list(compressor.chunks)
 
258
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
 
259
            'new\ncommon very very long line\nwith some extra text\n'
 
260
            'different\nmoredifferent\nand then some more\n',
 
261
            None)
 
262
        self.assertEqual(
 
263
            sha_string('new\ncommon very very long line\nwith some extra text\n'
 
264
                       'different\nmoredifferent\nand then some more\n'),
 
265
            sha1_3)
 
266
        expected_lines.extend([
 
267
            # 'delta', delta length
 
268
            'd\x0c',
 
269
            # target length
 
270
            '\x5f'
 
271
            # insert new
 
272
            '\x04new\n',
 
273
            # Copy of first parent 'common' range
 
274
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
 
275
            # Copy of second parent 'different' range
 
276
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
277
            ])
 
278
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
279
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
280
 
 
281
 
 
282
class TestGroupCompressBlock(tests.TestCase):
 
283
 
 
284
    def make_block(self, key_to_text):
 
285
        """Create a GroupCompressBlock, filling it with the given texts."""
 
286
        compressor = groupcompress.GroupCompressor()
 
287
        start = 0
 
288
        for key in sorted(key_to_text):
 
289
            compressor.compress(key, key_to_text[key], None)
 
290
        locs = dict((key, (start, end)) for key, (start, _, end, _)
 
291
                    in compressor.labels_deltas.iteritems())
 
292
        block = compressor.flush()
 
293
        raw_bytes = block.to_bytes()
 
294
        # Go through from_bytes(to_bytes()) so that we start with a compressed
 
295
        # content object
 
296
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
 
297
 
 
298
    def test_from_empty_bytes(self):
 
299
        self.assertRaises(ValueError,
 
300
                          groupcompress.GroupCompressBlock.from_bytes, '')
 
301
 
 
302
    def test_from_minimal_bytes(self):
 
303
        block = groupcompress.GroupCompressBlock.from_bytes(
 
304
            'gcb1z\n0\n0\n')
 
305
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
 
306
        self.assertIs(None, block._content)
 
307
        self.assertEqual('', block._z_content)
 
308
        block._ensure_content()
 
309
        self.assertEqual('', block._content)
 
310
        self.assertEqual('', block._z_content)
 
311
        block._ensure_content() # Ensure content is safe to call 2x
 
312
 
 
313
    def test_from_invalid(self):
 
314
        self.assertRaises(ValueError,
 
315
                          groupcompress.GroupCompressBlock.from_bytes,
 
316
                          'this is not a valid header')
 
317
 
 
318
    def test_from_bytes(self):
 
319
        content = ('a tiny bit of content\n')
 
320
        z_content = zlib.compress(content)
 
321
        z_bytes = (
 
322
            'gcb1z\n' # group compress block v1 plain
 
323
            '%d\n' # Length of compressed content
 
324
            '%d\n' # Length of uncompressed content
 
325
            '%s'   # Compressed content
 
326
            ) % (len(z_content), len(content), z_content)
 
327
        block = groupcompress.GroupCompressBlock.from_bytes(
 
328
            z_bytes)
 
329
        self.assertEqual(z_content, block._z_content)
 
330
        self.assertIs(None, block._content)
 
331
        self.assertEqual(len(z_content), block._z_content_length)
 
332
        self.assertEqual(len(content), block._content_length)
 
333
        block._ensure_content()
 
334
        self.assertEqual(z_content, block._z_content)
 
335
        self.assertEqual(content, block._content)
 
336
 
 
337
    def test_to_bytes(self):
 
338
        content = ('this is some content\n'
 
339
                   'this content will be compressed\n')
 
340
        gcb = groupcompress.GroupCompressBlock()
 
341
        gcb.set_content(content)
 
342
        bytes = gcb.to_bytes()
 
343
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
 
344
        self.assertEqual(gcb._content_length, len(content))
 
345
        expected_header =('gcb1z\n' # group compress block v1 zlib
 
346
                          '%d\n' # Length of compressed content
 
347
                          '%d\n' # Length of uncompressed content
 
348
                         ) % (gcb._z_content_length, gcb._content_length)
 
349
        self.assertStartsWith(bytes, expected_header)
 
350
        remaining_bytes = bytes[len(expected_header):]
 
351
        raw_bytes = zlib.decompress(remaining_bytes)
 
352
        self.assertEqual(content, raw_bytes)
 
353
 
 
354
    def test_partial_decomp(self):
 
355
        content_chunks = []
 
356
        # We need a sufficient amount of data so that zlib.decompress has
 
357
        # partial decompression to work with. Most auto-generated data
 
358
        # compresses a bit too well, we want a combination, so we combine a sha
 
359
        # hash with compressible data.
 
360
        for i in xrange(2048):
 
361
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
362
            content_chunks.append(next_content)
 
363
            next_sha1 = osutils.sha_string(next_content)
 
364
            content_chunks.append(next_sha1 + '\n')
 
365
        content = ''.join(content_chunks)
 
366
        self.assertEqual(158634, len(content))
 
367
        z_content = zlib.compress(content)
 
368
        self.assertEqual(57182, len(z_content))
 
369
        block = groupcompress.GroupCompressBlock()
 
370
        block._z_content = z_content
 
371
        block._z_content_length = len(z_content)
 
372
        block._compressor_name = 'zlib'
 
373
        block._content_length = 158634
 
374
        self.assertIs(None, block._content)
 
375
        block._ensure_content(100)
 
376
        self.assertIsNot(None, block._content)
 
377
        # We have decompressed at least 100 bytes
 
378
        self.assertTrue(len(block._content) >= 100)
 
379
        # We have not decompressed the whole content
 
380
        self.assertTrue(len(block._content) < 158634)
 
381
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
382
        # ensuring content that we already have shouldn't cause any more data
 
383
        # to be extracted
 
384
        cur_len = len(block._content)
 
385
        block._ensure_content(cur_len - 10)
 
386
        self.assertEqual(cur_len, len(block._content))
 
387
        # Now we want a bit more content
 
388
        cur_len += 10
 
389
        block._ensure_content(cur_len)
 
390
        self.assertTrue(len(block._content) >= cur_len)
 
391
        self.assertTrue(len(block._content) < 158634)
 
392
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
393
        # And now lets finish
 
394
        block._ensure_content(158634)
 
395
        self.assertEqualDiff(content, block._content)
 
396
        # And the decompressor is finalized
 
397
        self.assertIs(None, block._z_content_decompressor)
 
398
 
 
399
    def test_partial_decomp_no_known_length(self):
 
400
        content_chunks = []
 
401
        for i in xrange(2048):
 
402
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
403
            content_chunks.append(next_content)
 
404
            next_sha1 = osutils.sha_string(next_content)
 
405
            content_chunks.append(next_sha1 + '\n')
 
406
        content = ''.join(content_chunks)
 
407
        self.assertEqual(158634, len(content))
 
408
        z_content = zlib.compress(content)
 
409
        self.assertEqual(57182, len(z_content))
 
410
        block = groupcompress.GroupCompressBlock()
 
411
        block._z_content = z_content
 
412
        block._z_content_length = len(z_content)
 
413
        block._compressor_name = 'zlib'
 
414
        block._content_length = None # Don't tell the decompressed length
 
415
        self.assertIs(None, block._content)
 
416
        block._ensure_content(100)
 
417
        self.assertIsNot(None, block._content)
 
418
        # We have decompressed at least 100 bytes
 
419
        self.assertTrue(len(block._content) >= 100)
 
420
        # We have not decompressed the whole content
 
421
        self.assertTrue(len(block._content) < 158634)
 
422
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
423
        # ensuring content that we already have shouldn't cause any more data
 
424
        # to be extracted
 
425
        cur_len = len(block._content)
 
426
        block._ensure_content(cur_len - 10)
 
427
        self.assertEqual(cur_len, len(block._content))
 
428
        # Now we want a bit more content
 
429
        cur_len += 10
 
430
        block._ensure_content(cur_len)
 
431
        self.assertTrue(len(block._content) >= cur_len)
 
432
        self.assertTrue(len(block._content) < 158634)
 
433
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
434
        # And now lets finish
 
435
        block._ensure_content()
 
436
        self.assertEqualDiff(content, block._content)
 
437
        # And the decompressor is finalized
 
438
        self.assertIs(None, block._z_content_decompressor)
 
439
 
 
440
 
 
441
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
 
442
 
 
443
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
 
444
                     dir='.'):
 
445
        t = self.get_transport(dir)
 
446
        t.ensure_base()
 
447
        vf = groupcompress.make_pack_factory(graph=create_graph,
 
448
            delta=False, keylength=keylength)(t)
 
449
        if do_cleanup:
 
450
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
 
451
        return vf
 
452
 
 
453
 
 
454
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
 
455
 
 
456
    def test_get_record_stream_as_requested(self):
 
457
        # Consider promoting 'as-requested' to general availability, and
 
458
        # make this a VF interface test
 
459
        vf = self.make_test_vf(False, dir='source')
 
460
        vf.add_lines(('a',), (), ['lines\n'])
 
461
        vf.add_lines(('b',), (), ['lines\n'])
 
462
        vf.add_lines(('c',), (), ['lines\n'])
 
463
        vf.add_lines(('d',), (), ['lines\n'])
 
464
        vf.writer.end()
 
465
        keys = [record.key for record in vf.get_record_stream(
 
466
                    [('a',), ('b',), ('c',), ('d',)],
 
467
                    'as-requested', False)]
 
468
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
469
        keys = [record.key for record in vf.get_record_stream(
 
470
                    [('b',), ('a',), ('d',), ('c',)],
 
471
                    'as-requested', False)]
 
472
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
473
 
 
474
        # It should work even after being repacked into another VF
 
475
        vf2 = self.make_test_vf(False, dir='target')
 
476
        vf2.insert_record_stream(vf.get_record_stream(
 
477
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
 
478
        vf2.writer.end()
 
479
 
 
480
        keys = [record.key for record in vf2.get_record_stream(
 
481
                    [('a',), ('b',), ('c',), ('d',)],
 
482
                    'as-requested', False)]
 
483
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
484
        keys = [record.key for record in vf2.get_record_stream(
 
485
                    [('b',), ('a',), ('d',), ('c',)],
 
486
                    'as-requested', False)]
 
487
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
488
 
 
489
    def test_insert_record_stream_re_uses_blocks(self):
 
490
        vf = self.make_test_vf(True, dir='source')
 
491
        def grouped_stream(revision_ids, first_parents=()):
 
492
            parents = first_parents
 
493
            for revision_id in revision_ids:
 
494
                key = (revision_id,)
 
495
                record = versionedfile.FulltextContentFactory(
 
496
                    key, parents, None,
 
497
                    'some content that is\n'
 
498
                    'identical except for\n'
 
499
                    'revision_id:%s\n' % (revision_id,))
 
500
                yield record
 
501
                parents = (key,)
 
502
        # One group, a-d
 
503
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
504
        # Second group, e-h
 
505
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
506
                                               first_parents=(('d',),)))
 
507
        block_bytes = {}
 
508
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
 
509
                                      'unordered', False)
 
510
        num_records = 0
 
511
        for record in stream:
 
512
            if record.key in [('a',), ('e',)]:
 
513
                self.assertEqual('groupcompress-block', record.storage_kind)
 
514
            else:
 
515
                self.assertEqual('groupcompress-block-ref',
 
516
                                 record.storage_kind)
 
517
            block_bytes[record.key] = record._manager._block._z_content
 
518
            num_records += 1
 
519
        self.assertEqual(8, num_records)
 
520
        for r in 'abcd':
 
521
            key = (r,)
 
522
            self.assertIs(block_bytes[key], block_bytes[('a',)])
 
523
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
 
524
        for r in 'efgh':
 
525
            key = (r,)
 
526
            self.assertIs(block_bytes[key], block_bytes[('e',)])
 
527
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
 
528
        # Now copy the blocks into another vf, and ensure that the blocks are
 
529
        # preserved without creating new entries
 
530
        vf2 = self.make_test_vf(True, dir='target')
 
531
        # ordering in 'groupcompress' order, should actually swap the groups in
 
532
        # the target vf, but the groups themselves should not be disturbed.
 
533
        vf2.insert_record_stream(vf.get_record_stream(
 
534
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
 
535
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
536
                                       'groupcompress', False)
 
537
        vf2.writer.end()
 
538
        num_records = 0
 
539
        for record in stream:
 
540
            num_records += 1
 
541
            self.assertEqual(block_bytes[record.key],
 
542
                             record._manager._block._z_content)
 
543
        self.assertEqual(8, num_records)
 
544
 
 
545
    def test__insert_record_stream_no_reuse_block(self):
 
546
        vf = self.make_test_vf(True, dir='source')
 
547
        def grouped_stream(revision_ids, first_parents=()):
 
548
            parents = first_parents
 
549
            for revision_id in revision_ids:
 
550
                key = (revision_id,)
 
551
                record = versionedfile.FulltextContentFactory(
 
552
                    key, parents, None,
 
553
                    'some content that is\n'
 
554
                    'identical except for\n'
 
555
                    'revision_id:%s\n' % (revision_id,))
 
556
                yield record
 
557
                parents = (key,)
 
558
        # One group, a-d
 
559
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
560
        # Second group, e-h
 
561
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
562
                                               first_parents=(('d',),)))
 
563
        vf.writer.end()
 
564
        self.assertEqual(8, len(list(vf.get_record_stream(
 
565
                                        [(r,) for r in 'abcdefgh'],
 
566
                                        'unordered', False))))
 
567
        # Now copy the blocks into another vf, and ensure that the blocks are
 
568
        # preserved without creating new entries
 
569
        vf2 = self.make_test_vf(True, dir='target')
 
570
        # ordering in 'groupcompress' order, should actually swap the groups in
 
571
        # the target vf, but the groups themselves should not be disturbed.
 
572
        list(vf2._insert_record_stream(vf.get_record_stream(
 
573
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
 
574
            reuse_blocks=False))
 
575
        vf2.writer.end()
 
576
        # After inserting with reuse_blocks=False, we should have everything in
 
577
        # a single new block.
 
578
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
579
                                       'groupcompress', False)
 
580
        block = None
 
581
        for record in stream:
 
582
            if block is None:
 
583
                block = record._manager._block
 
584
            else:
 
585
                self.assertIs(block, record._manager._block)
 
586
 
 
587
 
 
588
class TestLazyGroupCompress(tests.TestCaseWithTransport):
 
589
 
 
590
    _texts = {
 
591
        ('key1',): "this is a text\n"
 
592
                   "with a reasonable amount of compressible bytes\n",
 
593
        ('key2',): "another text\n"
 
594
                   "with a reasonable amount of compressible bytes\n",
 
595
        ('key3',): "yet another text which won't be extracted\n"
 
596
                   "with a reasonable amount of compressible bytes\n",
 
597
        ('key4',): "this will be extracted\n"
 
598
                   "but references most of its bytes from\n"
 
599
                   "yet another text which won't be extracted\n"
 
600
                   "with a reasonable amount of compressible bytes\n",
 
601
    }
 
602
    def make_block(self, key_to_text):
 
603
        """Create a GroupCompressBlock, filling it with the given texts."""
 
604
        compressor = groupcompress.GroupCompressor()
 
605
        start = 0
 
606
        for key in sorted(key_to_text):
 
607
            compressor.compress(key, key_to_text[key], None)
 
608
        locs = dict((key, (start, end)) for key, (start, _, end, _)
 
609
                    in compressor.labels_deltas.iteritems())
 
610
        block = compressor.flush()
 
611
        raw_bytes = block.to_bytes()
 
612
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
 
613
 
 
614
    def add_key_to_manager(self, key, locations, block, manager):
 
615
        start, end = locations[key]
 
616
        manager.add_factory(key, (), start, end)
 
617
 
 
618
    def test_get_fulltexts(self):
 
619
        locations, block = self.make_block(self._texts)
 
620
        manager = groupcompress._LazyGroupContentManager(block)
 
621
        self.add_key_to_manager(('key1',), locations, block, manager)
 
622
        self.add_key_to_manager(('key2',), locations, block, manager)
 
623
        result_order = []
 
624
        for record in manager.get_record_stream():
 
625
            result_order.append(record.key)
 
626
            text = self._texts[record.key]
 
627
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
628
        self.assertEqual([('key1',), ('key2',)], result_order)
 
629
 
 
630
        # If we build the manager in the opposite order, we should get them
 
631
        # back in the opposite order
 
632
        manager = groupcompress._LazyGroupContentManager(block)
 
633
        self.add_key_to_manager(('key2',), locations, block, manager)
 
634
        self.add_key_to_manager(('key1',), locations, block, manager)
 
635
        result_order = []
 
636
        for record in manager.get_record_stream():
 
637
            result_order.append(record.key)
 
638
            text = self._texts[record.key]
 
639
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
640
        self.assertEqual([('key2',), ('key1',)], result_order)
 
641
 
 
642
    def test__wire_bytes_no_keys(self):
 
643
        locations, block = self.make_block(self._texts)
 
644
        manager = groupcompress._LazyGroupContentManager(block)
 
645
        wire_bytes = manager._wire_bytes()
 
646
        block_length = len(block.to_bytes())
 
647
        # We should have triggered a strip, since we aren't using any content
 
648
        stripped_block = manager._block.to_bytes()
 
649
        self.assertTrue(block_length > len(stripped_block))
 
650
        empty_z_header = zlib.compress('')
 
651
        self.assertEqual('groupcompress-block\n'
 
652
                         '8\n' # len(compress(''))
 
653
                         '0\n' # len('')
 
654
                         '%d\n'# compressed block len
 
655
                         '%s'  # zheader
 
656
                         '%s'  # block
 
657
                         % (len(stripped_block), empty_z_header,
 
658
                            stripped_block),
 
659
                         wire_bytes)
 
660
 
 
661
    def test__wire_bytes(self):
 
662
        locations, block = self.make_block(self._texts)
 
663
        manager = groupcompress._LazyGroupContentManager(block)
 
664
        self.add_key_to_manager(('key1',), locations, block, manager)
 
665
        self.add_key_to_manager(('key4',), locations, block, manager)
 
666
        block_bytes = block.to_bytes()
 
667
        wire_bytes = manager._wire_bytes()
 
668
        (storage_kind, z_header_len, header_len,
 
669
         block_len, rest) = wire_bytes.split('\n', 4)
 
670
        z_header_len = int(z_header_len)
 
671
        header_len = int(header_len)
 
672
        block_len = int(block_len)
 
673
        self.assertEqual('groupcompress-block', storage_kind)
 
674
        self.assertEqual(33, z_header_len)
 
675
        self.assertEqual(25, header_len)
 
676
        self.assertEqual(len(block_bytes), block_len)
 
677
        z_header = rest[:z_header_len]
 
678
        header = zlib.decompress(z_header)
 
679
        self.assertEqual(header_len, len(header))
 
680
        entry1 = locations[('key1',)]
 
681
        entry4 = locations[('key4',)]
 
682
        self.assertEqualDiff('key1\n'
 
683
                             '\n'  # no parents
 
684
                             '%d\n' # start offset
 
685
                             '%d\n' # end offset
 
686
                             'key4\n'
 
687
                             '\n'
 
688
                             '%d\n'
 
689
                             '%d\n'
 
690
                             % (entry1[0], entry1[1],
 
691
                                entry4[0], entry4[1]),
 
692
                            header)
 
693
        z_block = rest[z_header_len:]
 
694
        self.assertEqual(block_bytes, z_block)
 
695
 
 
696
    def test_from_bytes(self):
 
697
        locations, block = self.make_block(self._texts)
 
698
        manager = groupcompress._LazyGroupContentManager(block)
 
699
        self.add_key_to_manager(('key1',), locations, block, manager)
 
700
        self.add_key_to_manager(('key4',), locations, block, manager)
 
701
        wire_bytes = manager._wire_bytes()
 
702
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
 
703
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
 
704
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
 
705
        self.assertEqual(2, len(manager._factories))
 
706
        self.assertEqual(block._z_content, manager._block._z_content)
 
707
        result_order = []
 
708
        for record in manager.get_record_stream():
 
709
            result_order.append(record.key)
 
710
            text = self._texts[record.key]
 
711
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
712
        self.assertEqual([('key1',), ('key4',)], result_order)
 
713
 
 
714
    def test__check_rebuild_no_changes(self):
 
715
        locations, block = self.make_block(self._texts)
 
716
        manager = groupcompress._LazyGroupContentManager(block)
 
717
        # Request all the keys, which ensures that we won't rebuild
 
718
        self.add_key_to_manager(('key1',), locations, block, manager)
 
719
        self.add_key_to_manager(('key2',), locations, block, manager)
 
720
        self.add_key_to_manager(('key3',), locations, block, manager)
 
721
        self.add_key_to_manager(('key4',), locations, block, manager)
 
722
        manager._check_rebuild_block()
 
723
        self.assertIs(block, manager._block)
 
724
 
 
725
    def test__check_rebuild_only_one(self):
 
726
        locations, block = self.make_block(self._texts)
 
727
        manager = groupcompress._LazyGroupContentManager(block)
 
728
        # Request just the first key, which should trigger a 'strip' action
 
729
        self.add_key_to_manager(('key1',), locations, block, manager)
 
730
        manager._check_rebuild_block()
 
731
        self.assertIsNot(block, manager._block)
 
732
        self.assertTrue(block._content_length > manager._block._content_length)
 
733
        # We should be able to still get the content out of this block, though
 
734
        # it should only have 1 entry
 
735
        for record in manager.get_record_stream():
 
736
            self.assertEqual(('key1',), record.key)
 
737
            self.assertEqual(self._texts[record.key],
 
738
                             record.get_bytes_as('fulltext'))
 
739
 
 
740
    def test__check_rebuild_middle(self):
 
741
        locations, block = self.make_block(self._texts)
 
742
        manager = groupcompress._LazyGroupContentManager(block)
 
743
        # Request a small key in the middle should trigger a 'rebuild'
 
744
        self.add_key_to_manager(('key4',), locations, block, manager)
 
745
        manager._check_rebuild_block()
 
746
        self.assertIsNot(block, manager._block)
 
747
        self.assertTrue(block._content_length > manager._block._content_length)
 
748
        for record in manager.get_record_stream():
 
749
            self.assertEqual(('key4',), record.key)
 
750
            self.assertEqual(self._texts[record.key],
 
751
                             record.get_bytes_as('fulltext'))