~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

  • Committer: Canonical.com Patch Queue Manager
  • Date: 2009-03-06 06:48:25 UTC
  • mfrom: (4070.8.6 debug-config)
  • Revision ID: pqm@pqm.ubuntu.com-20090306064825-kbpwggw21dygeix6
(mbp) debug_flags configuration option

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2008, 2009 Canonical Ltd
2
 
#
3
 
# This program is free software; you can redistribute it and/or modify
4
 
# it under the terms of the GNU General Public License as published by
5
 
# the Free Software Foundation; either version 2 of the License, or
6
 
# (at your option) any later version.
7
 
#
8
 
# This program is distributed in the hope that it will be useful,
9
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
 
# GNU General Public License for more details.
12
 
#
13
 
# You should have received a copy of the GNU General Public License
14
 
# along with this program; if not, write to the Free Software
15
 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
 
 
17
 
"""Tests for group compression."""
18
 
 
19
 
import zlib
20
 
 
21
 
from bzrlib import (
22
 
    groupcompress,
23
 
    errors,
24
 
    osutils,
25
 
    tests,
26
 
    versionedfile,
27
 
    )
28
 
from bzrlib.osutils import sha_string
29
 
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
30
 
 
31
 
 
32
 
def load_tests(standard_tests, module, loader):
33
 
    """Parameterize tests for all versions of groupcompress."""
34
 
    to_adapt, result = tests.split_suite_by_condition(
35
 
        standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
36
 
    scenarios = [
37
 
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
38
 
        ]
39
 
    if CompiledGroupCompressFeature.available():
40
 
        scenarios.append(('C',
41
 
            {'compressor': groupcompress.PyrexGroupCompressor}))
42
 
    return tests.multiply_tests(to_adapt, scenarios, result)
43
 
 
44
 
 
45
 
class TestGroupCompressor(tests.TestCase):
46
 
 
47
 
    def _chunks_to_repr_lines(self, chunks):
48
 
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
49
 
 
50
 
    def assertEqualDiffEncoded(self, expected, actual):
51
 
        """Compare the actual content to the expected content.
52
 
 
53
 
        :param expected: A group of chunks that we expect to see
54
 
        :param actual: The measured 'chunks'
55
 
 
56
 
        We will transform the chunks back into lines, and then run 'repr()'
57
 
        over them to handle non-ascii characters.
58
 
        """
59
 
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
60
 
                             self._chunks_to_repr_lines(actual))
61
 
 
62
 
 
63
 
class TestAllGroupCompressors(TestGroupCompressor):
64
 
    """Tests for GroupCompressor"""
65
 
 
66
 
    compressor = None # Set by multiply_tests
67
 
 
68
 
    def test_empty_delta(self):
69
 
        compressor = self.compressor()
70
 
        self.assertEqual([], compressor.chunks)
71
 
 
72
 
    def test_one_nosha_delta(self):
73
 
        # diff against NUKK
74
 
        compressor = self.compressor()
75
 
        sha1, start_point, end_point, _ = compressor.compress(('label',),
76
 
            'strange\ncommon\n', None)
77
 
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
78
 
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
79
 
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
80
 
        self.assertEqual(0, start_point)
81
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
82
 
 
83
 
    def test_empty_content(self):
84
 
        compressor = self.compressor()
85
 
        # Adding empty bytes should return the 'null' record
86
 
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
87
 
                                                                 '', None)
88
 
        self.assertEqual(0, start_point)
89
 
        self.assertEqual(0, end_point)
90
 
        self.assertEqual('fulltext', kind)
91
 
        self.assertEqual(groupcompress._null_sha1, sha1)
92
 
        self.assertEqual(0, compressor.endpoint)
93
 
        self.assertEqual([], compressor.chunks)
94
 
        # Even after adding some content
95
 
        compressor.compress(('content',), 'some\nbytes\n', None)
96
 
        self.assertTrue(compressor.endpoint > 0)
97
 
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
98
 
                                                                 '', None)
99
 
        self.assertEqual(0, start_point)
100
 
        self.assertEqual(0, end_point)
101
 
        self.assertEqual('fulltext', kind)
102
 
        self.assertEqual(groupcompress._null_sha1, sha1)
103
 
 
104
 
    def test_extract_from_compressor(self):
105
 
        # Knit fetching will try to reconstruct texts locally which results in
106
 
        # reading something that is in the compressor stream already.
107
 
        compressor = self.compressor()
108
 
        sha1_1, _, _, _ = compressor.compress(('label',),
109
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
110
 
        expected_lines = list(compressor.chunks)
111
 
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
112
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
113
 
        # get the first out
114
 
        self.assertEqual(('strange\ncommon long line\n'
115
 
                          'that needs a 16 byte match\n', sha1_1),
116
 
                         compressor.extract(('label',)))
117
 
        # and the second
118
 
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
119
 
                          'different\n', sha1_2),
120
 
                         compressor.extract(('newlabel',)))
121
 
 
122
 
 
123
 
class TestPyrexGroupCompressor(TestGroupCompressor):
124
 
 
125
 
    _test_needs_features = [CompiledGroupCompressFeature]
126
 
    compressor = groupcompress.PyrexGroupCompressor
127
 
 
128
 
    def test_stats(self):
129
 
        compressor = self.compressor()
130
 
        compressor.compress(('label',),
131
 
                            'strange\n'
132
 
                            'common very very long line\n'
133
 
                            'plus more text\n', None)
134
 
        compressor.compress(('newlabel',),
135
 
                            'common very very long line\n'
136
 
                            'plus more text\n'
137
 
                            'different\n'
138
 
                            'moredifferent\n', None)
139
 
        compressor.compress(('label3',),
140
 
                            'new\n'
141
 
                            'common very very long line\n'
142
 
                            'plus more text\n'
143
 
                            'different\n'
144
 
                            'moredifferent\n', None)
145
 
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
146
 
 
147
 
    def test_two_nosha_delta(self):
148
 
        compressor = self.compressor()
149
 
        sha1_1, _, _, _ = compressor.compress(('label',),
150
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
151
 
        expected_lines = list(compressor.chunks)
152
 
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
153
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
154
 
        self.assertEqual(sha_string('common long line\n'
155
 
                                    'that needs a 16 byte match\n'
156
 
                                    'different\n'), sha1_2)
157
 
        expected_lines.extend([
158
 
            # 'delta', delta length
159
 
            'd\x0f',
160
 
            # source and target length
161
 
            '\x36',
162
 
            # copy the line common
163
 
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
164
 
            # add the line different, and the trailing newline
165
 
            '\x0adifferent\n', # insert 10 bytes
166
 
            ])
167
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
168
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
169
 
 
170
 
    def test_three_nosha_delta(self):
171
 
        # The first interesting test: make a change that should use lines from
172
 
        # both parents.
173
 
        compressor = self.compressor()
174
 
        sha1_1, _, _, _ = compressor.compress(('label',),
175
 
            'strange\ncommon very very long line\nwith some extra text\n', None)
176
 
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
177
 
            'different\nmoredifferent\nand then some more\n', None)
178
 
        expected_lines = list(compressor.chunks)
179
 
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
180
 
            'new\ncommon very very long line\nwith some extra text\n'
181
 
            'different\nmoredifferent\nand then some more\n',
182
 
            None)
183
 
        self.assertEqual(
184
 
            sha_string('new\ncommon very very long line\nwith some extra text\n'
185
 
                       'different\nmoredifferent\nand then some more\n'),
186
 
            sha1_3)
187
 
        expected_lines.extend([
188
 
            # 'delta', delta length
189
 
            'd\x0b',
190
 
            # source and target length
191
 
            '\x5f'
192
 
            # insert new
193
 
            '\x03new',
194
 
            # Copy of first parent 'common' range
195
 
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
196
 
            # Copy of second parent 'different' range
197
 
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
198
 
            ])
199
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
200
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
201
 
 
202
 
 
203
 
class TestPythonGroupCompressor(TestGroupCompressor):
204
 
 
205
 
    compressor = groupcompress.PythonGroupCompressor
206
 
 
207
 
    def test_stats(self):
208
 
        compressor = self.compressor()
209
 
        compressor.compress(('label',),
210
 
                            'strange\n'
211
 
                            'common very very long line\n'
212
 
                            'plus more text\n', None)
213
 
        compressor.compress(('newlabel',),
214
 
                            'common very very long line\n'
215
 
                            'plus more text\n'
216
 
                            'different\n'
217
 
                            'moredifferent\n', None)
218
 
        compressor.compress(('label3',),
219
 
                            'new\n'
220
 
                            'common very very long line\n'
221
 
                            'plus more text\n'
222
 
                            'different\n'
223
 
                            'moredifferent\n', None)
224
 
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
225
 
 
226
 
    def test_two_nosha_delta(self):
227
 
        compressor = self.compressor()
228
 
        sha1_1, _, _, _ = compressor.compress(('label',),
229
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
230
 
        expected_lines = list(compressor.chunks)
231
 
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
232
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
233
 
        self.assertEqual(sha_string('common long line\n'
234
 
                                    'that needs a 16 byte match\n'
235
 
                                    'different\n'), sha1_2)
236
 
        expected_lines.extend([
237
 
            # 'delta', delta length
238
 
            'd\x0f',
239
 
            # target length
240
 
            '\x36',
241
 
            # copy the line common
242
 
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
243
 
            # add the line different, and the trailing newline
244
 
            '\x0adifferent\n', # insert 10 bytes
245
 
            ])
246
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
247
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
248
 
 
249
 
    def test_three_nosha_delta(self):
250
 
        # The first interesting test: make a change that should use lines from
251
 
        # both parents.
252
 
        compressor = self.compressor()
253
 
        sha1_1, _, _, _ = compressor.compress(('label',),
254
 
            'strange\ncommon very very long line\nwith some extra text\n', None)
255
 
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
256
 
            'different\nmoredifferent\nand then some more\n', None)
257
 
        expected_lines = list(compressor.chunks)
258
 
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
259
 
            'new\ncommon very very long line\nwith some extra text\n'
260
 
            'different\nmoredifferent\nand then some more\n',
261
 
            None)
262
 
        self.assertEqual(
263
 
            sha_string('new\ncommon very very long line\nwith some extra text\n'
264
 
                       'different\nmoredifferent\nand then some more\n'),
265
 
            sha1_3)
266
 
        expected_lines.extend([
267
 
            # 'delta', delta length
268
 
            'd\x0c',
269
 
            # target length
270
 
            '\x5f'
271
 
            # insert new
272
 
            '\x04new\n',
273
 
            # Copy of first parent 'common' range
274
 
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
275
 
            # Copy of second parent 'different' range
276
 
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
277
 
            ])
278
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
279
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
280
 
 
281
 
 
282
 
class TestGroupCompressBlock(tests.TestCase):
283
 
 
284
 
    def make_block(self, key_to_text):
285
 
        """Create a GroupCompressBlock, filling it with the given texts."""
286
 
        compressor = groupcompress.GroupCompressor()
287
 
        start = 0
288
 
        for key in sorted(key_to_text):
289
 
            compressor.compress(key, key_to_text[key], None)
290
 
        locs = dict((key, (start, end)) for key, (start, _, end, _)
291
 
                    in compressor.labels_deltas.iteritems())
292
 
        block = compressor.flush()
293
 
        raw_bytes = block.to_bytes()
294
 
        # Go through from_bytes(to_bytes()) so that we start with a compressed
295
 
        # content object
296
 
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
297
 
 
298
 
    def test_from_empty_bytes(self):
299
 
        self.assertRaises(ValueError,
300
 
                          groupcompress.GroupCompressBlock.from_bytes, '')
301
 
 
302
 
    def test_from_minimal_bytes(self):
303
 
        block = groupcompress.GroupCompressBlock.from_bytes(
304
 
            'gcb1z\n0\n0\n')
305
 
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
306
 
        self.assertIs(None, block._content)
307
 
        self.assertEqual('', block._z_content)
308
 
        block._ensure_content()
309
 
        self.assertEqual('', block._content)
310
 
        self.assertEqual('', block._z_content)
311
 
        block._ensure_content() # Ensure content is safe to call 2x
312
 
 
313
 
    def test_from_invalid(self):
314
 
        self.assertRaises(ValueError,
315
 
                          groupcompress.GroupCompressBlock.from_bytes,
316
 
                          'this is not a valid header')
317
 
 
318
 
    def test_from_bytes(self):
319
 
        content = ('a tiny bit of content\n')
320
 
        z_content = zlib.compress(content)
321
 
        z_bytes = (
322
 
            'gcb1z\n' # group compress block v1 plain
323
 
            '%d\n' # Length of compressed content
324
 
            '%d\n' # Length of uncompressed content
325
 
            '%s'   # Compressed content
326
 
            ) % (len(z_content), len(content), z_content)
327
 
        block = groupcompress.GroupCompressBlock.from_bytes(
328
 
            z_bytes)
329
 
        self.assertEqual(z_content, block._z_content)
330
 
        self.assertIs(None, block._content)
331
 
        self.assertEqual(len(z_content), block._z_content_length)
332
 
        self.assertEqual(len(content), block._content_length)
333
 
        block._ensure_content()
334
 
        self.assertEqual(z_content, block._z_content)
335
 
        self.assertEqual(content, block._content)
336
 
 
337
 
    def test_to_bytes(self):
338
 
        content = ('this is some content\n'
339
 
                   'this content will be compressed\n')
340
 
        gcb = groupcompress.GroupCompressBlock()
341
 
        gcb.set_content(content)
342
 
        bytes = gcb.to_bytes()
343
 
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
344
 
        self.assertEqual(gcb._content_length, len(content))
345
 
        expected_header =('gcb1z\n' # group compress block v1 zlib
346
 
                          '%d\n' # Length of compressed content
347
 
                          '%d\n' # Length of uncompressed content
348
 
                         ) % (gcb._z_content_length, gcb._content_length)
349
 
        self.assertStartsWith(bytes, expected_header)
350
 
        remaining_bytes = bytes[len(expected_header):]
351
 
        raw_bytes = zlib.decompress(remaining_bytes)
352
 
        self.assertEqual(content, raw_bytes)
353
 
 
354
 
    def test_partial_decomp(self):
355
 
        content_chunks = []
356
 
        # We need a sufficient amount of data so that zlib.decompress has
357
 
        # partial decompression to work with. Most auto-generated data
358
 
        # compresses a bit too well, we want a combination, so we combine a sha
359
 
        # hash with compressible data.
360
 
        for i in xrange(2048):
361
 
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
362
 
            content_chunks.append(next_content)
363
 
            next_sha1 = osutils.sha_string(next_content)
364
 
            content_chunks.append(next_sha1 + '\n')
365
 
        content = ''.join(content_chunks)
366
 
        self.assertEqual(158634, len(content))
367
 
        z_content = zlib.compress(content)
368
 
        self.assertEqual(57182, len(z_content))
369
 
        block = groupcompress.GroupCompressBlock()
370
 
        block._z_content = z_content
371
 
        block._z_content_length = len(z_content)
372
 
        block._compressor_name = 'zlib'
373
 
        block._content_length = 158634
374
 
        self.assertIs(None, block._content)
375
 
        block._ensure_content(100)
376
 
        self.assertIsNot(None, block._content)
377
 
        # We have decompressed at least 100 bytes
378
 
        self.assertTrue(len(block._content) >= 100)
379
 
        # We have not decompressed the whole content
380
 
        self.assertTrue(len(block._content) < 158634)
381
 
        self.assertEqualDiff(content[:len(block._content)], block._content)
382
 
        # ensuring content that we already have shouldn't cause any more data
383
 
        # to be extracted
384
 
        cur_len = len(block._content)
385
 
        block._ensure_content(cur_len - 10)
386
 
        self.assertEqual(cur_len, len(block._content))
387
 
        # Now we want a bit more content
388
 
        cur_len += 10
389
 
        block._ensure_content(cur_len)
390
 
        self.assertTrue(len(block._content) >= cur_len)
391
 
        self.assertTrue(len(block._content) < 158634)
392
 
        self.assertEqualDiff(content[:len(block._content)], block._content)
393
 
        # And now lets finish
394
 
        block._ensure_content(158634)
395
 
        self.assertEqualDiff(content, block._content)
396
 
        # And the decompressor is finalized
397
 
        self.assertIs(None, block._z_content_decompressor)
398
 
 
399
 
    def test_partial_decomp_no_known_length(self):
400
 
        content_chunks = []
401
 
        for i in xrange(2048):
402
 
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
403
 
            content_chunks.append(next_content)
404
 
            next_sha1 = osutils.sha_string(next_content)
405
 
            content_chunks.append(next_sha1 + '\n')
406
 
        content = ''.join(content_chunks)
407
 
        self.assertEqual(158634, len(content))
408
 
        z_content = zlib.compress(content)
409
 
        self.assertEqual(57182, len(z_content))
410
 
        block = groupcompress.GroupCompressBlock()
411
 
        block._z_content = z_content
412
 
        block._z_content_length = len(z_content)
413
 
        block._compressor_name = 'zlib'
414
 
        block._content_length = None # Don't tell the decompressed length
415
 
        self.assertIs(None, block._content)
416
 
        block._ensure_content(100)
417
 
        self.assertIsNot(None, block._content)
418
 
        # We have decompressed at least 100 bytes
419
 
        self.assertTrue(len(block._content) >= 100)
420
 
        # We have not decompressed the whole content
421
 
        self.assertTrue(len(block._content) < 158634)
422
 
        self.assertEqualDiff(content[:len(block._content)], block._content)
423
 
        # ensuring content that we already have shouldn't cause any more data
424
 
        # to be extracted
425
 
        cur_len = len(block._content)
426
 
        block._ensure_content(cur_len - 10)
427
 
        self.assertEqual(cur_len, len(block._content))
428
 
        # Now we want a bit more content
429
 
        cur_len += 10
430
 
        block._ensure_content(cur_len)
431
 
        self.assertTrue(len(block._content) >= cur_len)
432
 
        self.assertTrue(len(block._content) < 158634)
433
 
        self.assertEqualDiff(content[:len(block._content)], block._content)
434
 
        # And now lets finish
435
 
        block._ensure_content()
436
 
        self.assertEqualDiff(content, block._content)
437
 
        # And the decompressor is finalized
438
 
        self.assertIs(None, block._z_content_decompressor)
439
 
 
440
 
 
441
 
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
442
 
 
443
 
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
444
 
                     dir='.'):
445
 
        t = self.get_transport(dir)
446
 
        t.ensure_base()
447
 
        vf = groupcompress.make_pack_factory(graph=create_graph,
448
 
            delta=False, keylength=keylength)(t)
449
 
        if do_cleanup:
450
 
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
451
 
        return vf
452
 
 
453
 
 
454
 
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
455
 
 
456
 
    def test_get_record_stream_as_requested(self):
457
 
        # Consider promoting 'as-requested' to general availability, and
458
 
        # make this a VF interface test
459
 
        vf = self.make_test_vf(False, dir='source')
460
 
        vf.add_lines(('a',), (), ['lines\n'])
461
 
        vf.add_lines(('b',), (), ['lines\n'])
462
 
        vf.add_lines(('c',), (), ['lines\n'])
463
 
        vf.add_lines(('d',), (), ['lines\n'])
464
 
        vf.writer.end()
465
 
        keys = [record.key for record in vf.get_record_stream(
466
 
                    [('a',), ('b',), ('c',), ('d',)],
467
 
                    'as-requested', False)]
468
 
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
469
 
        keys = [record.key for record in vf.get_record_stream(
470
 
                    [('b',), ('a',), ('d',), ('c',)],
471
 
                    'as-requested', False)]
472
 
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
473
 
 
474
 
        # It should work even after being repacked into another VF
475
 
        vf2 = self.make_test_vf(False, dir='target')
476
 
        vf2.insert_record_stream(vf.get_record_stream(
477
 
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
478
 
        vf2.writer.end()
479
 
 
480
 
        keys = [record.key for record in vf2.get_record_stream(
481
 
                    [('a',), ('b',), ('c',), ('d',)],
482
 
                    'as-requested', False)]
483
 
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
484
 
        keys = [record.key for record in vf2.get_record_stream(
485
 
                    [('b',), ('a',), ('d',), ('c',)],
486
 
                    'as-requested', False)]
487
 
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
488
 
 
489
 
    def test_insert_record_stream_re_uses_blocks(self):
490
 
        vf = self.make_test_vf(True, dir='source')
491
 
        def grouped_stream(revision_ids, first_parents=()):
492
 
            parents = first_parents
493
 
            for revision_id in revision_ids:
494
 
                key = (revision_id,)
495
 
                record = versionedfile.FulltextContentFactory(
496
 
                    key, parents, None,
497
 
                    'some content that is\n'
498
 
                    'identical except for\n'
499
 
                    'revision_id:%s\n' % (revision_id,))
500
 
                yield record
501
 
                parents = (key,)
502
 
        # One group, a-d
503
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
504
 
        # Second group, e-h
505
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
506
 
                                               first_parents=(('d',),)))
507
 
        block_bytes = {}
508
 
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
509
 
                                      'unordered', False)
510
 
        num_records = 0
511
 
        for record in stream:
512
 
            if record.key in [('a',), ('e',)]:
513
 
                self.assertEqual('groupcompress-block', record.storage_kind)
514
 
            else:
515
 
                self.assertEqual('groupcompress-block-ref',
516
 
                                 record.storage_kind)
517
 
            block_bytes[record.key] = record._manager._block._z_content
518
 
            num_records += 1
519
 
        self.assertEqual(8, num_records)
520
 
        for r in 'abcd':
521
 
            key = (r,)
522
 
            self.assertIs(block_bytes[key], block_bytes[('a',)])
523
 
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
524
 
        for r in 'efgh':
525
 
            key = (r,)
526
 
            self.assertIs(block_bytes[key], block_bytes[('e',)])
527
 
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
528
 
        # Now copy the blocks into another vf, and ensure that the blocks are
529
 
        # preserved without creating new entries
530
 
        vf2 = self.make_test_vf(True, dir='target')
531
 
        # ordering in 'groupcompress' order, should actually swap the groups in
532
 
        # the target vf, but the groups themselves should not be disturbed.
533
 
        vf2.insert_record_stream(vf.get_record_stream(
534
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
535
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
536
 
                                       'groupcompress', False)
537
 
        vf2.writer.end()
538
 
        num_records = 0
539
 
        for record in stream:
540
 
            num_records += 1
541
 
            self.assertEqual(block_bytes[record.key],
542
 
                             record._manager._block._z_content)
543
 
        self.assertEqual(8, num_records)
544
 
 
545
 
    def test__insert_record_stream_no_reuse_block(self):
546
 
        vf = self.make_test_vf(True, dir='source')
547
 
        def grouped_stream(revision_ids, first_parents=()):
548
 
            parents = first_parents
549
 
            for revision_id in revision_ids:
550
 
                key = (revision_id,)
551
 
                record = versionedfile.FulltextContentFactory(
552
 
                    key, parents, None,
553
 
                    'some content that is\n'
554
 
                    'identical except for\n'
555
 
                    'revision_id:%s\n' % (revision_id,))
556
 
                yield record
557
 
                parents = (key,)
558
 
        # One group, a-d
559
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
560
 
        # Second group, e-h
561
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
562
 
                                               first_parents=(('d',),)))
563
 
        vf.writer.end()
564
 
        self.assertEqual(8, len(list(vf.get_record_stream(
565
 
                                        [(r,) for r in 'abcdefgh'],
566
 
                                        'unordered', False))))
567
 
        # Now copy the blocks into another vf, and ensure that the blocks are
568
 
        # preserved without creating new entries
569
 
        vf2 = self.make_test_vf(True, dir='target')
570
 
        # ordering in 'groupcompress' order, should actually swap the groups in
571
 
        # the target vf, but the groups themselves should not be disturbed.
572
 
        list(vf2._insert_record_stream(vf.get_record_stream(
573
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
574
 
            reuse_blocks=False))
575
 
        vf2.writer.end()
576
 
        # After inserting with reuse_blocks=False, we should have everything in
577
 
        # a single new block.
578
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
579
 
                                       'groupcompress', False)
580
 
        block = None
581
 
        for record in stream:
582
 
            if block is None:
583
 
                block = record._manager._block
584
 
            else:
585
 
                self.assertIs(block, record._manager._block)
586
 
 
587
 
 
588
 
class TestLazyGroupCompress(tests.TestCaseWithTransport):
589
 
 
590
 
    _texts = {
591
 
        ('key1',): "this is a text\n"
592
 
                   "with a reasonable amount of compressible bytes\n",
593
 
        ('key2',): "another text\n"
594
 
                   "with a reasonable amount of compressible bytes\n",
595
 
        ('key3',): "yet another text which won't be extracted\n"
596
 
                   "with a reasonable amount of compressible bytes\n",
597
 
        ('key4',): "this will be extracted\n"
598
 
                   "but references most of its bytes from\n"
599
 
                   "yet another text which won't be extracted\n"
600
 
                   "with a reasonable amount of compressible bytes\n",
601
 
    }
602
 
    def make_block(self, key_to_text):
603
 
        """Create a GroupCompressBlock, filling it with the given texts."""
604
 
        compressor = groupcompress.GroupCompressor()
605
 
        start = 0
606
 
        for key in sorted(key_to_text):
607
 
            compressor.compress(key, key_to_text[key], None)
608
 
        locs = dict((key, (start, end)) for key, (start, _, end, _)
609
 
                    in compressor.labels_deltas.iteritems())
610
 
        block = compressor.flush()
611
 
        raw_bytes = block.to_bytes()
612
 
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
613
 
 
614
 
    def add_key_to_manager(self, key, locations, block, manager):
615
 
        start, end = locations[key]
616
 
        manager.add_factory(key, (), start, end)
617
 
 
618
 
    def test_get_fulltexts(self):
619
 
        locations, block = self.make_block(self._texts)
620
 
        manager = groupcompress._LazyGroupContentManager(block)
621
 
        self.add_key_to_manager(('key1',), locations, block, manager)
622
 
        self.add_key_to_manager(('key2',), locations, block, manager)
623
 
        result_order = []
624
 
        for record in manager.get_record_stream():
625
 
            result_order.append(record.key)
626
 
            text = self._texts[record.key]
627
 
            self.assertEqual(text, record.get_bytes_as('fulltext'))
628
 
        self.assertEqual([('key1',), ('key2',)], result_order)
629
 
 
630
 
        # If we build the manager in the opposite order, we should get them
631
 
        # back in the opposite order
632
 
        manager = groupcompress._LazyGroupContentManager(block)
633
 
        self.add_key_to_manager(('key2',), locations, block, manager)
634
 
        self.add_key_to_manager(('key1',), locations, block, manager)
635
 
        result_order = []
636
 
        for record in manager.get_record_stream():
637
 
            result_order.append(record.key)
638
 
            text = self._texts[record.key]
639
 
            self.assertEqual(text, record.get_bytes_as('fulltext'))
640
 
        self.assertEqual([('key2',), ('key1',)], result_order)
641
 
 
642
 
    def test__wire_bytes_no_keys(self):
643
 
        locations, block = self.make_block(self._texts)
644
 
        manager = groupcompress._LazyGroupContentManager(block)
645
 
        wire_bytes = manager._wire_bytes()
646
 
        block_length = len(block.to_bytes())
647
 
        # We should have triggered a strip, since we aren't using any content
648
 
        stripped_block = manager._block.to_bytes()
649
 
        self.assertTrue(block_length > len(stripped_block))
650
 
        empty_z_header = zlib.compress('')
651
 
        self.assertEqual('groupcompress-block\n'
652
 
                         '8\n' # len(compress(''))
653
 
                         '0\n' # len('')
654
 
                         '%d\n'# compressed block len
655
 
                         '%s'  # zheader
656
 
                         '%s'  # block
657
 
                         % (len(stripped_block), empty_z_header,
658
 
                            stripped_block),
659
 
                         wire_bytes)
660
 
 
661
 
    def test__wire_bytes(self):
662
 
        locations, block = self.make_block(self._texts)
663
 
        manager = groupcompress._LazyGroupContentManager(block)
664
 
        self.add_key_to_manager(('key1',), locations, block, manager)
665
 
        self.add_key_to_manager(('key4',), locations, block, manager)
666
 
        block_bytes = block.to_bytes()
667
 
        wire_bytes = manager._wire_bytes()
668
 
        (storage_kind, z_header_len, header_len,
669
 
         block_len, rest) = wire_bytes.split('\n', 4)
670
 
        z_header_len = int(z_header_len)
671
 
        header_len = int(header_len)
672
 
        block_len = int(block_len)
673
 
        self.assertEqual('groupcompress-block', storage_kind)
674
 
        self.assertEqual(33, z_header_len)
675
 
        self.assertEqual(25, header_len)
676
 
        self.assertEqual(len(block_bytes), block_len)
677
 
        z_header = rest[:z_header_len]
678
 
        header = zlib.decompress(z_header)
679
 
        self.assertEqual(header_len, len(header))
680
 
        entry1 = locations[('key1',)]
681
 
        entry4 = locations[('key4',)]
682
 
        self.assertEqualDiff('key1\n'
683
 
                             '\n'  # no parents
684
 
                             '%d\n' # start offset
685
 
                             '%d\n' # end offset
686
 
                             'key4\n'
687
 
                             '\n'
688
 
                             '%d\n'
689
 
                             '%d\n'
690
 
                             % (entry1[0], entry1[1],
691
 
                                entry4[0], entry4[1]),
692
 
                            header)
693
 
        z_block = rest[z_header_len:]
694
 
        self.assertEqual(block_bytes, z_block)
695
 
 
696
 
    def test_from_bytes(self):
697
 
        locations, block = self.make_block(self._texts)
698
 
        manager = groupcompress._LazyGroupContentManager(block)
699
 
        self.add_key_to_manager(('key1',), locations, block, manager)
700
 
        self.add_key_to_manager(('key4',), locations, block, manager)
701
 
        wire_bytes = manager._wire_bytes()
702
 
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
703
 
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
704
 
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
705
 
        self.assertEqual(2, len(manager._factories))
706
 
        self.assertEqual(block._z_content, manager._block._z_content)
707
 
        result_order = []
708
 
        for record in manager.get_record_stream():
709
 
            result_order.append(record.key)
710
 
            text = self._texts[record.key]
711
 
            self.assertEqual(text, record.get_bytes_as('fulltext'))
712
 
        self.assertEqual([('key1',), ('key4',)], result_order)
713
 
 
714
 
    def test__check_rebuild_no_changes(self):
715
 
        locations, block = self.make_block(self._texts)
716
 
        manager = groupcompress._LazyGroupContentManager(block)
717
 
        # Request all the keys, which ensures that we won't rebuild
718
 
        self.add_key_to_manager(('key1',), locations, block, manager)
719
 
        self.add_key_to_manager(('key2',), locations, block, manager)
720
 
        self.add_key_to_manager(('key3',), locations, block, manager)
721
 
        self.add_key_to_manager(('key4',), locations, block, manager)
722
 
        manager._check_rebuild_block()
723
 
        self.assertIs(block, manager._block)
724
 
 
725
 
    def test__check_rebuild_only_one(self):
726
 
        locations, block = self.make_block(self._texts)
727
 
        manager = groupcompress._LazyGroupContentManager(block)
728
 
        # Request just the first key, which should trigger a 'strip' action
729
 
        self.add_key_to_manager(('key1',), locations, block, manager)
730
 
        manager._check_rebuild_block()
731
 
        self.assertIsNot(block, manager._block)
732
 
        self.assertTrue(block._content_length > manager._block._content_length)
733
 
        # We should be able to still get the content out of this block, though
734
 
        # it should only have 1 entry
735
 
        for record in manager.get_record_stream():
736
 
            self.assertEqual(('key1',), record.key)
737
 
            self.assertEqual(self._texts[record.key],
738
 
                             record.get_bytes_as('fulltext'))
739
 
 
740
 
    def test__check_rebuild_middle(self):
741
 
        locations, block = self.make_block(self._texts)
742
 
        manager = groupcompress._LazyGroupContentManager(block)
743
 
        # Request a small key in the middle should trigger a 'rebuild'
744
 
        self.add_key_to_manager(('key4',), locations, block, manager)
745
 
        manager._check_rebuild_block()
746
 
        self.assertIsNot(block, manager._block)
747
 
        self.assertTrue(block._content_length > manager._block._content_length)
748
 
        for record in manager.get_record_stream():
749
 
            self.assertEqual(('key4',), record.key)
750
 
            self.assertEqual(self._texts[record.key],
751
 
                             record.get_bytes_as('fulltext'))