~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

  • Committer: Robert Collins
  • Date: 2009-05-14 09:42:07 UTC
  • mto: This revision was merged to the branch mainline in revision 4362.
  • Revision ID: robertc@robertcollins.net-20090514094207-a0pqfjtw4oujd86m
Add bug info.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# Copyright (C) 2008, 2009 Canonical Ltd
 
2
#
 
3
# This program is free software; you can redistribute it and/or modify
 
4
# it under the terms of the GNU General Public License as published by
 
5
# the Free Software Foundation; either version 2 of the License, or
 
6
# (at your option) any later version.
 
7
#
 
8
# This program is distributed in the hope that it will be useful,
 
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
11
# GNU General Public License for more details.
 
12
#
 
13
# You should have received a copy of the GNU General Public License
 
14
# along with this program; if not, write to the Free Software
 
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 
16
 
 
17
"""Tests for group compression."""
 
18
 
 
19
import zlib
 
20
 
 
21
from bzrlib import (
 
22
    groupcompress,
 
23
    errors,
 
24
    osutils,
 
25
    tests,
 
26
    versionedfile,
 
27
    )
 
28
from bzrlib.osutils import sha_string
 
29
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
 
30
 
 
31
 
 
32
def load_tests(standard_tests, module, loader):
 
33
    """Parameterize tests for all versions of groupcompress."""
 
34
    to_adapt, result = tests.split_suite_by_condition(
 
35
        standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
 
36
    scenarios = [
 
37
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
 
38
        ]
 
39
    if CompiledGroupCompressFeature.available():
 
40
        scenarios.append(('C',
 
41
            {'compressor': groupcompress.PyrexGroupCompressor}))
 
42
    return tests.multiply_tests(to_adapt, scenarios, result)
 
43
 
 
44
 
 
45
class TestGroupCompressor(tests.TestCase):
 
46
 
 
47
    def _chunks_to_repr_lines(self, chunks):
 
48
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
 
49
 
 
50
    def assertEqualDiffEncoded(self, expected, actual):
 
51
        """Compare the actual content to the expected content.
 
52
 
 
53
        :param expected: A group of chunks that we expect to see
 
54
        :param actual: The measured 'chunks'
 
55
 
 
56
        We will transform the chunks back into lines, and then run 'repr()'
 
57
        over them to handle non-ascii characters.
 
58
        """
 
59
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
 
60
                             self._chunks_to_repr_lines(actual))
 
61
 
 
62
 
 
63
class TestAllGroupCompressors(TestGroupCompressor):
 
64
    """Tests for GroupCompressor"""
 
65
 
 
66
    compressor = None # Set by multiply_tests
 
67
 
 
68
    def test_empty_delta(self):
 
69
        compressor = self.compressor()
 
70
        self.assertEqual([], compressor.chunks)
 
71
 
 
72
    def test_one_nosha_delta(self):
 
73
        # diff against NUKK
 
74
        compressor = self.compressor()
 
75
        sha1, start_point, end_point, _ = compressor.compress(('label',),
 
76
            'strange\ncommon\n', None)
 
77
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
 
78
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
 
79
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
 
80
        self.assertEqual(0, start_point)
 
81
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
82
 
 
83
    def test_empty_content(self):
 
84
        compressor = self.compressor()
 
85
        # Adding empty bytes should return the 'null' record
 
86
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
 
87
                                                                 '', None)
 
88
        self.assertEqual(0, start_point)
 
89
        self.assertEqual(0, end_point)
 
90
        self.assertEqual('fulltext', kind)
 
91
        self.assertEqual(groupcompress._null_sha1, sha1)
 
92
        self.assertEqual(0, compressor.endpoint)
 
93
        self.assertEqual([], compressor.chunks)
 
94
        # Even after adding some content
 
95
        compressor.compress(('content',), 'some\nbytes\n', None)
 
96
        self.assertTrue(compressor.endpoint > 0)
 
97
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
 
98
                                                                 '', None)
 
99
        self.assertEqual(0, start_point)
 
100
        self.assertEqual(0, end_point)
 
101
        self.assertEqual('fulltext', kind)
 
102
        self.assertEqual(groupcompress._null_sha1, sha1)
 
103
 
 
104
    def test_extract_from_compressor(self):
 
105
        # Knit fetching will try to reconstruct texts locally which results in
 
106
        # reading something that is in the compressor stream already.
 
107
        compressor = self.compressor()
 
108
        sha1_1, _, _, _ = compressor.compress(('label',),
 
109
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
110
        expected_lines = list(compressor.chunks)
 
111
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
 
112
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
113
        # get the first out
 
114
        self.assertEqual(('strange\ncommon long line\n'
 
115
                          'that needs a 16 byte match\n', sha1_1),
 
116
                         compressor.extract(('label',)))
 
117
        # and the second
 
118
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
 
119
                          'different\n', sha1_2),
 
120
                         compressor.extract(('newlabel',)))
 
121
 
 
122
    def test_pop_last(self):
 
123
        compressor = self.compressor()
 
124
        _, _, _, _ = compressor.compress(('key1',),
 
125
            'some text\nfor the first entry\n', None)
 
126
        expected_lines = list(compressor.chunks)
 
127
        _, _, _, _ = compressor.compress(('key2',),
 
128
            'some text\nfor the second entry\n', None)
 
129
        compressor.pop_last()
 
130
        self.assertEqual(expected_lines, compressor.chunks)
 
131
 
 
132
 
 
133
class TestPyrexGroupCompressor(TestGroupCompressor):
 
134
 
 
135
    _test_needs_features = [CompiledGroupCompressFeature]
 
136
    compressor = groupcompress.PyrexGroupCompressor
 
137
 
 
138
    def test_stats(self):
 
139
        compressor = self.compressor()
 
140
        compressor.compress(('label',),
 
141
                            'strange\n'
 
142
                            'common very very long line\n'
 
143
                            'plus more text\n', None)
 
144
        compressor.compress(('newlabel',),
 
145
                            'common very very long line\n'
 
146
                            'plus more text\n'
 
147
                            'different\n'
 
148
                            'moredifferent\n', None)
 
149
        compressor.compress(('label3',),
 
150
                            'new\n'
 
151
                            'common very very long line\n'
 
152
                            'plus more text\n'
 
153
                            'different\n'
 
154
                            'moredifferent\n', None)
 
155
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
 
156
 
 
157
    def test_two_nosha_delta(self):
 
158
        compressor = self.compressor()
 
159
        sha1_1, _, _, _ = compressor.compress(('label',),
 
160
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
161
        expected_lines = list(compressor.chunks)
 
162
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
 
163
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
164
        self.assertEqual(sha_string('common long line\n'
 
165
                                    'that needs a 16 byte match\n'
 
166
                                    'different\n'), sha1_2)
 
167
        expected_lines.extend([
 
168
            # 'delta', delta length
 
169
            'd\x0f',
 
170
            # source and target length
 
171
            '\x36',
 
172
            # copy the line common
 
173
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
174
            # add the line different, and the trailing newline
 
175
            '\x0adifferent\n', # insert 10 bytes
 
176
            ])
 
177
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
178
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
179
 
 
180
    def test_three_nosha_delta(self):
 
181
        # The first interesting test: make a change that should use lines from
 
182
        # both parents.
 
183
        compressor = self.compressor()
 
184
        sha1_1, _, _, _ = compressor.compress(('label',),
 
185
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
186
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
 
187
            'different\nmoredifferent\nand then some more\n', None)
 
188
        expected_lines = list(compressor.chunks)
 
189
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
 
190
            'new\ncommon very very long line\nwith some extra text\n'
 
191
            'different\nmoredifferent\nand then some more\n',
 
192
            None)
 
193
        self.assertEqual(
 
194
            sha_string('new\ncommon very very long line\nwith some extra text\n'
 
195
                       'different\nmoredifferent\nand then some more\n'),
 
196
            sha1_3)
 
197
        expected_lines.extend([
 
198
            # 'delta', delta length
 
199
            'd\x0b',
 
200
            # source and target length
 
201
            '\x5f'
 
202
            # insert new
 
203
            '\x03new',
 
204
            # Copy of first parent 'common' range
 
205
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
 
206
            # Copy of second parent 'different' range
 
207
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
208
            ])
 
209
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
210
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
211
 
 
212
 
 
213
class TestPythonGroupCompressor(TestGroupCompressor):
 
214
 
 
215
    compressor = groupcompress.PythonGroupCompressor
 
216
 
 
217
    def test_stats(self):
 
218
        compressor = self.compressor()
 
219
        compressor.compress(('label',),
 
220
                            'strange\n'
 
221
                            'common very very long line\n'
 
222
                            'plus more text\n', None)
 
223
        compressor.compress(('newlabel',),
 
224
                            'common very very long line\n'
 
225
                            'plus more text\n'
 
226
                            'different\n'
 
227
                            'moredifferent\n', None)
 
228
        compressor.compress(('label3',),
 
229
                            'new\n'
 
230
                            'common very very long line\n'
 
231
                            'plus more text\n'
 
232
                            'different\n'
 
233
                            'moredifferent\n', None)
 
234
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
 
235
 
 
236
    def test_two_nosha_delta(self):
 
237
        compressor = self.compressor()
 
238
        sha1_1, _, _, _ = compressor.compress(('label',),
 
239
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
240
        expected_lines = list(compressor.chunks)
 
241
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
 
242
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
243
        self.assertEqual(sha_string('common long line\n'
 
244
                                    'that needs a 16 byte match\n'
 
245
                                    'different\n'), sha1_2)
 
246
        expected_lines.extend([
 
247
            # 'delta', delta length
 
248
            'd\x0f',
 
249
            # target length
 
250
            '\x36',
 
251
            # copy the line common
 
252
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
253
            # add the line different, and the trailing newline
 
254
            '\x0adifferent\n', # insert 10 bytes
 
255
            ])
 
256
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
257
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
258
 
 
259
    def test_three_nosha_delta(self):
 
260
        # The first interesting test: make a change that should use lines from
 
261
        # both parents.
 
262
        compressor = self.compressor()
 
263
        sha1_1, _, _, _ = compressor.compress(('label',),
 
264
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
265
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
 
266
            'different\nmoredifferent\nand then some more\n', None)
 
267
        expected_lines = list(compressor.chunks)
 
268
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
 
269
            'new\ncommon very very long line\nwith some extra text\n'
 
270
            'different\nmoredifferent\nand then some more\n',
 
271
            None)
 
272
        self.assertEqual(
 
273
            sha_string('new\ncommon very very long line\nwith some extra text\n'
 
274
                       'different\nmoredifferent\nand then some more\n'),
 
275
            sha1_3)
 
276
        expected_lines.extend([
 
277
            # 'delta', delta length
 
278
            'd\x0c',
 
279
            # target length
 
280
            '\x5f'
 
281
            # insert new
 
282
            '\x04new\n',
 
283
            # Copy of first parent 'common' range
 
284
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
 
285
            # Copy of second parent 'different' range
 
286
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
287
            ])
 
288
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
289
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
290
 
 
291
 
 
292
class TestGroupCompressBlock(tests.TestCase):
 
293
 
 
294
    def make_block(self, key_to_text):
 
295
        """Create a GroupCompressBlock, filling it with the given texts."""
 
296
        compressor = groupcompress.GroupCompressor()
 
297
        start = 0
 
298
        for key in sorted(key_to_text):
 
299
            compressor.compress(key, key_to_text[key], None)
 
300
        locs = dict((key, (start, end)) for key, (start, _, end, _)
 
301
                    in compressor.labels_deltas.iteritems())
 
302
        block = compressor.flush()
 
303
        raw_bytes = block.to_bytes()
 
304
        # Go through from_bytes(to_bytes()) so that we start with a compressed
 
305
        # content object
 
306
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
 
307
 
 
308
    def test_from_empty_bytes(self):
 
309
        self.assertRaises(ValueError,
 
310
                          groupcompress.GroupCompressBlock.from_bytes, '')
 
311
 
 
312
    def test_from_minimal_bytes(self):
 
313
        block = groupcompress.GroupCompressBlock.from_bytes(
 
314
            'gcb1z\n0\n0\n')
 
315
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
 
316
        self.assertIs(None, block._content)
 
317
        self.assertEqual('', block._z_content)
 
318
        block._ensure_content()
 
319
        self.assertEqual('', block._content)
 
320
        self.assertEqual('', block._z_content)
 
321
        block._ensure_content() # Ensure content is safe to call 2x
 
322
 
 
323
    def test_from_invalid(self):
 
324
        self.assertRaises(ValueError,
 
325
                          groupcompress.GroupCompressBlock.from_bytes,
 
326
                          'this is not a valid header')
 
327
 
 
328
    def test_from_bytes(self):
 
329
        content = ('a tiny bit of content\n')
 
330
        z_content = zlib.compress(content)
 
331
        z_bytes = (
 
332
            'gcb1z\n' # group compress block v1 plain
 
333
            '%d\n' # Length of compressed content
 
334
            '%d\n' # Length of uncompressed content
 
335
            '%s'   # Compressed content
 
336
            ) % (len(z_content), len(content), z_content)
 
337
        block = groupcompress.GroupCompressBlock.from_bytes(
 
338
            z_bytes)
 
339
        self.assertEqual(z_content, block._z_content)
 
340
        self.assertIs(None, block._content)
 
341
        self.assertEqual(len(z_content), block._z_content_length)
 
342
        self.assertEqual(len(content), block._content_length)
 
343
        block._ensure_content()
 
344
        self.assertEqual(z_content, block._z_content)
 
345
        self.assertEqual(content, block._content)
 
346
 
 
347
    def test_to_bytes(self):
 
348
        content = ('this is some content\n'
 
349
                   'this content will be compressed\n')
 
350
        gcb = groupcompress.GroupCompressBlock()
 
351
        gcb.set_content(content)
 
352
        bytes = gcb.to_bytes()
 
353
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
 
354
        self.assertEqual(gcb._content_length, len(content))
 
355
        expected_header =('gcb1z\n' # group compress block v1 zlib
 
356
                          '%d\n' # Length of compressed content
 
357
                          '%d\n' # Length of uncompressed content
 
358
                         ) % (gcb._z_content_length, gcb._content_length)
 
359
        self.assertStartsWith(bytes, expected_header)
 
360
        remaining_bytes = bytes[len(expected_header):]
 
361
        raw_bytes = zlib.decompress(remaining_bytes)
 
362
        self.assertEqual(content, raw_bytes)
 
363
 
 
364
    def test_partial_decomp(self):
 
365
        content_chunks = []
 
366
        # We need a sufficient amount of data so that zlib.decompress has
 
367
        # partial decompression to work with. Most auto-generated data
 
368
        # compresses a bit too well, we want a combination, so we combine a sha
 
369
        # hash with compressible data.
 
370
        for i in xrange(2048):
 
371
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
372
            content_chunks.append(next_content)
 
373
            next_sha1 = osutils.sha_string(next_content)
 
374
            content_chunks.append(next_sha1 + '\n')
 
375
        content = ''.join(content_chunks)
 
376
        self.assertEqual(158634, len(content))
 
377
        z_content = zlib.compress(content)
 
378
        self.assertEqual(57182, len(z_content))
 
379
        block = groupcompress.GroupCompressBlock()
 
380
        block._z_content = z_content
 
381
        block._z_content_length = len(z_content)
 
382
        block._compressor_name = 'zlib'
 
383
        block._content_length = 158634
 
384
        self.assertIs(None, block._content)
 
385
        block._ensure_content(100)
 
386
        self.assertIsNot(None, block._content)
 
387
        # We have decompressed at least 100 bytes
 
388
        self.assertTrue(len(block._content) >= 100)
 
389
        # We have not decompressed the whole content
 
390
        self.assertTrue(len(block._content) < 158634)
 
391
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
392
        # ensuring content that we already have shouldn't cause any more data
 
393
        # to be extracted
 
394
        cur_len = len(block._content)
 
395
        block._ensure_content(cur_len - 10)
 
396
        self.assertEqual(cur_len, len(block._content))
 
397
        # Now we want a bit more content
 
398
        cur_len += 10
 
399
        block._ensure_content(cur_len)
 
400
        self.assertTrue(len(block._content) >= cur_len)
 
401
        self.assertTrue(len(block._content) < 158634)
 
402
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
403
        # And now lets finish
 
404
        block._ensure_content(158634)
 
405
        self.assertEqualDiff(content, block._content)
 
406
        # And the decompressor is finalized
 
407
        self.assertIs(None, block._z_content_decompressor)
 
408
 
 
409
    def test_partial_decomp_no_known_length(self):
 
410
        content_chunks = []
 
411
        for i in xrange(2048):
 
412
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
413
            content_chunks.append(next_content)
 
414
            next_sha1 = osutils.sha_string(next_content)
 
415
            content_chunks.append(next_sha1 + '\n')
 
416
        content = ''.join(content_chunks)
 
417
        self.assertEqual(158634, len(content))
 
418
        z_content = zlib.compress(content)
 
419
        self.assertEqual(57182, len(z_content))
 
420
        block = groupcompress.GroupCompressBlock()
 
421
        block._z_content = z_content
 
422
        block._z_content_length = len(z_content)
 
423
        block._compressor_name = 'zlib'
 
424
        block._content_length = None # Don't tell the decompressed length
 
425
        self.assertIs(None, block._content)
 
426
        block._ensure_content(100)
 
427
        self.assertIsNot(None, block._content)
 
428
        # We have decompressed at least 100 bytes
 
429
        self.assertTrue(len(block._content) >= 100)
 
430
        # We have not decompressed the whole content
 
431
        self.assertTrue(len(block._content) < 158634)
 
432
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
433
        # ensuring content that we already have shouldn't cause any more data
 
434
        # to be extracted
 
435
        cur_len = len(block._content)
 
436
        block._ensure_content(cur_len - 10)
 
437
        self.assertEqual(cur_len, len(block._content))
 
438
        # Now we want a bit more content
 
439
        cur_len += 10
 
440
        block._ensure_content(cur_len)
 
441
        self.assertTrue(len(block._content) >= cur_len)
 
442
        self.assertTrue(len(block._content) < 158634)
 
443
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
444
        # And now lets finish
 
445
        block._ensure_content()
 
446
        self.assertEqualDiff(content, block._content)
 
447
        # And the decompressor is finalized
 
448
        self.assertIs(None, block._z_content_decompressor)
 
449
 
 
450
    def test__dump(self):
 
451
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
 
452
        key_to_text = {('1',): dup_content + '1 unique\n',
 
453
                       ('2',): dup_content + '2 extra special\n'}
 
454
        locs, block = self.make_block(key_to_text)
 
455
        self.assertEqual([('f', len(key_to_text[('1',)])),
 
456
                          ('d', 21, len(key_to_text[('2',)]),
 
457
                           [('c', 2, len(dup_content)),
 
458
                            ('i', len('2 extra special\n'), '')
 
459
                           ]),
 
460
                         ], block._dump())
 
461
 
 
462
 
 
463
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
 
464
 
 
465
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
 
466
                     dir='.'):
 
467
        t = self.get_transport(dir)
 
468
        t.ensure_base()
 
469
        vf = groupcompress.make_pack_factory(graph=create_graph,
 
470
            delta=False, keylength=keylength)(t)
 
471
        if do_cleanup:
 
472
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
 
473
        return vf
 
474
 
 
475
 
 
476
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
 
477
 
 
478
    def test_get_record_stream_as_requested(self):
 
479
        # Consider promoting 'as-requested' to general availability, and
 
480
        # make this a VF interface test
 
481
        vf = self.make_test_vf(False, dir='source')
 
482
        vf.add_lines(('a',), (), ['lines\n'])
 
483
        vf.add_lines(('b',), (), ['lines\n'])
 
484
        vf.add_lines(('c',), (), ['lines\n'])
 
485
        vf.add_lines(('d',), (), ['lines\n'])
 
486
        vf.writer.end()
 
487
        keys = [record.key for record in vf.get_record_stream(
 
488
                    [('a',), ('b',), ('c',), ('d',)],
 
489
                    'as-requested', False)]
 
490
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
491
        keys = [record.key for record in vf.get_record_stream(
 
492
                    [('b',), ('a',), ('d',), ('c',)],
 
493
                    'as-requested', False)]
 
494
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
495
 
 
496
        # It should work even after being repacked into another VF
 
497
        vf2 = self.make_test_vf(False, dir='target')
 
498
        vf2.insert_record_stream(vf.get_record_stream(
 
499
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
 
500
        vf2.writer.end()
 
501
 
 
502
        keys = [record.key for record in vf2.get_record_stream(
 
503
                    [('a',), ('b',), ('c',), ('d',)],
 
504
                    'as-requested', False)]
 
505
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
506
        keys = [record.key for record in vf2.get_record_stream(
 
507
                    [('b',), ('a',), ('d',), ('c',)],
 
508
                    'as-requested', False)]
 
509
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
510
 
 
511
    def test_insert_record_stream_re_uses_blocks(self):
 
512
        vf = self.make_test_vf(True, dir='source')
 
513
        def grouped_stream(revision_ids, first_parents=()):
 
514
            parents = first_parents
 
515
            for revision_id in revision_ids:
 
516
                key = (revision_id,)
 
517
                record = versionedfile.FulltextContentFactory(
 
518
                    key, parents, None,
 
519
                    'some content that is\n'
 
520
                    'identical except for\n'
 
521
                    'revision_id:%s\n' % (revision_id,))
 
522
                yield record
 
523
                parents = (key,)
 
524
        # One group, a-d
 
525
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
526
        # Second group, e-h
 
527
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
528
                                               first_parents=(('d',),)))
 
529
        block_bytes = {}
 
530
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
 
531
                                      'unordered', False)
 
532
        num_records = 0
 
533
        for record in stream:
 
534
            if record.key in [('a',), ('e',)]:
 
535
                self.assertEqual('groupcompress-block', record.storage_kind)
 
536
            else:
 
537
                self.assertEqual('groupcompress-block-ref',
 
538
                                 record.storage_kind)
 
539
            block_bytes[record.key] = record._manager._block._z_content
 
540
            num_records += 1
 
541
        self.assertEqual(8, num_records)
 
542
        for r in 'abcd':
 
543
            key = (r,)
 
544
            self.assertIs(block_bytes[key], block_bytes[('a',)])
 
545
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
 
546
        for r in 'efgh':
 
547
            key = (r,)
 
548
            self.assertIs(block_bytes[key], block_bytes[('e',)])
 
549
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
 
550
        # Now copy the blocks into another vf, and ensure that the blocks are
 
551
        # preserved without creating new entries
 
552
        vf2 = self.make_test_vf(True, dir='target')
 
553
        # ordering in 'groupcompress' order, should actually swap the groups in
 
554
        # the target vf, but the groups themselves should not be disturbed.
 
555
        vf2.insert_record_stream(vf.get_record_stream(
 
556
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
 
557
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
558
                                       'groupcompress', False)
 
559
        vf2.writer.end()
 
560
        num_records = 0
 
561
        for record in stream:
 
562
            num_records += 1
 
563
            self.assertEqual(block_bytes[record.key],
 
564
                             record._manager._block._z_content)
 
565
        self.assertEqual(8, num_records)
 
566
 
 
567
    def test__insert_record_stream_no_reuse_block(self):
 
568
        vf = self.make_test_vf(True, dir='source')
 
569
        def grouped_stream(revision_ids, first_parents=()):
 
570
            parents = first_parents
 
571
            for revision_id in revision_ids:
 
572
                key = (revision_id,)
 
573
                record = versionedfile.FulltextContentFactory(
 
574
                    key, parents, None,
 
575
                    'some content that is\n'
 
576
                    'identical except for\n'
 
577
                    'revision_id:%s\n' % (revision_id,))
 
578
                yield record
 
579
                parents = (key,)
 
580
        # One group, a-d
 
581
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
582
        # Second group, e-h
 
583
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
584
                                               first_parents=(('d',),)))
 
585
        vf.writer.end()
 
586
        self.assertEqual(8, len(list(vf.get_record_stream(
 
587
                                        [(r,) for r in 'abcdefgh'],
 
588
                                        'unordered', False))))
 
589
        # Now copy the blocks into another vf, and ensure that the blocks are
 
590
        # preserved without creating new entries
 
591
        vf2 = self.make_test_vf(True, dir='target')
 
592
        # ordering in 'groupcompress' order, should actually swap the groups in
 
593
        # the target vf, but the groups themselves should not be disturbed.
 
594
        list(vf2._insert_record_stream(vf.get_record_stream(
 
595
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
 
596
            reuse_blocks=False))
 
597
        vf2.writer.end()
 
598
        # After inserting with reuse_blocks=False, we should have everything in
 
599
        # a single new block.
 
600
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
601
                                       'groupcompress', False)
 
602
        block = None
 
603
        for record in stream:
 
604
            if block is None:
 
605
                block = record._manager._block
 
606
            else:
 
607
                self.assertIs(block, record._manager._block)
 
608
 
 
609
 
 
610
class TestLazyGroupCompress(tests.TestCaseWithTransport):
 
611
 
 
612
    _texts = {
 
613
        ('key1',): "this is a text\n"
 
614
                   "with a reasonable amount of compressible bytes\n",
 
615
        ('key2',): "another text\n"
 
616
                   "with a reasonable amount of compressible bytes\n",
 
617
        ('key3',): "yet another text which won't be extracted\n"
 
618
                   "with a reasonable amount of compressible bytes\n",
 
619
        ('key4',): "this will be extracted\n"
 
620
                   "but references most of its bytes from\n"
 
621
                   "yet another text which won't be extracted\n"
 
622
                   "with a reasonable amount of compressible bytes\n",
 
623
    }
 
624
    def make_block(self, key_to_text):
 
625
        """Create a GroupCompressBlock, filling it with the given texts."""
 
626
        compressor = groupcompress.GroupCompressor()
 
627
        start = 0
 
628
        for key in sorted(key_to_text):
 
629
            compressor.compress(key, key_to_text[key], None)
 
630
        locs = dict((key, (start, end)) for key, (start, _, end, _)
 
631
                    in compressor.labels_deltas.iteritems())
 
632
        block = compressor.flush()
 
633
        raw_bytes = block.to_bytes()
 
634
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
 
635
 
 
636
    def add_key_to_manager(self, key, locations, block, manager):
 
637
        start, end = locations[key]
 
638
        manager.add_factory(key, (), start, end)
 
639
 
 
640
    def test_get_fulltexts(self):
 
641
        locations, block = self.make_block(self._texts)
 
642
        manager = groupcompress._LazyGroupContentManager(block)
 
643
        self.add_key_to_manager(('key1',), locations, block, manager)
 
644
        self.add_key_to_manager(('key2',), locations, block, manager)
 
645
        result_order = []
 
646
        for record in manager.get_record_stream():
 
647
            result_order.append(record.key)
 
648
            text = self._texts[record.key]
 
649
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
650
        self.assertEqual([('key1',), ('key2',)], result_order)
 
651
 
 
652
        # If we build the manager in the opposite order, we should get them
 
653
        # back in the opposite order
 
654
        manager = groupcompress._LazyGroupContentManager(block)
 
655
        self.add_key_to_manager(('key2',), locations, block, manager)
 
656
        self.add_key_to_manager(('key1',), locations, block, manager)
 
657
        result_order = []
 
658
        for record in manager.get_record_stream():
 
659
            result_order.append(record.key)
 
660
            text = self._texts[record.key]
 
661
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
662
        self.assertEqual([('key2',), ('key1',)], result_order)
 
663
 
 
664
    def test__wire_bytes_no_keys(self):
 
665
        locations, block = self.make_block(self._texts)
 
666
        manager = groupcompress._LazyGroupContentManager(block)
 
667
        wire_bytes = manager._wire_bytes()
 
668
        block_length = len(block.to_bytes())
 
669
        # We should have triggered a strip, since we aren't using any content
 
670
        stripped_block = manager._block.to_bytes()
 
671
        self.assertTrue(block_length > len(stripped_block))
 
672
        empty_z_header = zlib.compress('')
 
673
        self.assertEqual('groupcompress-block\n'
 
674
                         '8\n' # len(compress(''))
 
675
                         '0\n' # len('')
 
676
                         '%d\n'# compressed block len
 
677
                         '%s'  # zheader
 
678
                         '%s'  # block
 
679
                         % (len(stripped_block), empty_z_header,
 
680
                            stripped_block),
 
681
                         wire_bytes)
 
682
 
 
683
    def test__wire_bytes(self):
 
684
        locations, block = self.make_block(self._texts)
 
685
        manager = groupcompress._LazyGroupContentManager(block)
 
686
        self.add_key_to_manager(('key1',), locations, block, manager)
 
687
        self.add_key_to_manager(('key4',), locations, block, manager)
 
688
        block_bytes = block.to_bytes()
 
689
        wire_bytes = manager._wire_bytes()
 
690
        (storage_kind, z_header_len, header_len,
 
691
         block_len, rest) = wire_bytes.split('\n', 4)
 
692
        z_header_len = int(z_header_len)
 
693
        header_len = int(header_len)
 
694
        block_len = int(block_len)
 
695
        self.assertEqual('groupcompress-block', storage_kind)
 
696
        self.assertEqual(33, z_header_len)
 
697
        self.assertEqual(25, header_len)
 
698
        self.assertEqual(len(block_bytes), block_len)
 
699
        z_header = rest[:z_header_len]
 
700
        header = zlib.decompress(z_header)
 
701
        self.assertEqual(header_len, len(header))
 
702
        entry1 = locations[('key1',)]
 
703
        entry4 = locations[('key4',)]
 
704
        self.assertEqualDiff('key1\n'
 
705
                             '\n'  # no parents
 
706
                             '%d\n' # start offset
 
707
                             '%d\n' # end offset
 
708
                             'key4\n'
 
709
                             '\n'
 
710
                             '%d\n'
 
711
                             '%d\n'
 
712
                             % (entry1[0], entry1[1],
 
713
                                entry4[0], entry4[1]),
 
714
                            header)
 
715
        z_block = rest[z_header_len:]
 
716
        self.assertEqual(block_bytes, z_block)
 
717
 
 
718
    def test_from_bytes(self):
 
719
        locations, block = self.make_block(self._texts)
 
720
        manager = groupcompress._LazyGroupContentManager(block)
 
721
        self.add_key_to_manager(('key1',), locations, block, manager)
 
722
        self.add_key_to_manager(('key4',), locations, block, manager)
 
723
        wire_bytes = manager._wire_bytes()
 
724
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
 
725
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
 
726
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
 
727
        self.assertEqual(2, len(manager._factories))
 
728
        self.assertEqual(block._z_content, manager._block._z_content)
 
729
        result_order = []
 
730
        for record in manager.get_record_stream():
 
731
            result_order.append(record.key)
 
732
            text = self._texts[record.key]
 
733
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
734
        self.assertEqual([('key1',), ('key4',)], result_order)
 
735
 
 
736
    def test__check_rebuild_no_changes(self):
 
737
        locations, block = self.make_block(self._texts)
 
738
        manager = groupcompress._LazyGroupContentManager(block)
 
739
        # Request all the keys, which ensures that we won't rebuild
 
740
        self.add_key_to_manager(('key1',), locations, block, manager)
 
741
        self.add_key_to_manager(('key2',), locations, block, manager)
 
742
        self.add_key_to_manager(('key3',), locations, block, manager)
 
743
        self.add_key_to_manager(('key4',), locations, block, manager)
 
744
        manager._check_rebuild_block()
 
745
        self.assertIs(block, manager._block)
 
746
 
 
747
    def test__check_rebuild_only_one(self):
 
748
        locations, block = self.make_block(self._texts)
 
749
        manager = groupcompress._LazyGroupContentManager(block)
 
750
        # Request just the first key, which should trigger a 'strip' action
 
751
        self.add_key_to_manager(('key1',), locations, block, manager)
 
752
        manager._check_rebuild_block()
 
753
        self.assertIsNot(block, manager._block)
 
754
        self.assertTrue(block._content_length > manager._block._content_length)
 
755
        # We should be able to still get the content out of this block, though
 
756
        # it should only have 1 entry
 
757
        for record in manager.get_record_stream():
 
758
            self.assertEqual(('key1',), record.key)
 
759
            self.assertEqual(self._texts[record.key],
 
760
                             record.get_bytes_as('fulltext'))
 
761
 
 
762
    def test__check_rebuild_middle(self):
 
763
        locations, block = self.make_block(self._texts)
 
764
        manager = groupcompress._LazyGroupContentManager(block)
 
765
        # Request a small key in the middle should trigger a 'rebuild'
 
766
        self.add_key_to_manager(('key4',), locations, block, manager)
 
767
        manager._check_rebuild_block()
 
768
        self.assertIsNot(block, manager._block)
 
769
        self.assertTrue(block._content_length > manager._block._content_length)
 
770
        for record in manager.get_record_stream():
 
771
            self.assertEqual(('key4',), record.key)
 
772
            self.assertEqual(self._texts[record.key],
 
773
                             record.get_bytes_as('fulltext'))