~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

  • Committer: Martin Pool
  • Date: 2007-04-04 06:17:31 UTC
  • mto: This revision was merged to the branch mainline in revision 2397.
  • Revision ID: mbp@sourcefrog.net-20070404061731-tt2xrzllqhbodn83
Contents of TODO file moved into bug tracker

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2008, 2009 Canonical Ltd
2
 
#
3
 
# This program is free software; you can redistribute it and/or modify
4
 
# it under the terms of the GNU General Public License as published by
5
 
# the Free Software Foundation; either version 2 of the License, or
6
 
# (at your option) any later version.
7
 
#
8
 
# This program is distributed in the hope that it will be useful,
9
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
 
# GNU General Public License for more details.
12
 
#
13
 
# You should have received a copy of the GNU General Public License
14
 
# along with this program; if not, write to the Free Software
15
 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
 
 
17
 
"""Tests for group compression."""
18
 
 
19
 
import zlib
20
 
 
21
 
from bzrlib import (
22
 
    groupcompress,
23
 
    errors,
24
 
    osutils,
25
 
    tests,
26
 
    versionedfile,
27
 
    )
28
 
from bzrlib.osutils import sha_string
29
 
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
30
 
 
31
 
 
32
 
def load_tests(standard_tests, module, loader):
33
 
    """Parameterize tests for all versions of groupcompress."""
34
 
    to_adapt, result = tests.split_suite_by_condition(
35
 
        standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
36
 
    scenarios = [
37
 
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
38
 
        ]
39
 
    if CompiledGroupCompressFeature.available():
40
 
        scenarios.append(('C',
41
 
            {'compressor': groupcompress.PyrexGroupCompressor}))
42
 
    return tests.multiply_tests(to_adapt, scenarios, result)
43
 
 
44
 
 
45
 
class TestGroupCompressor(tests.TestCase):
46
 
 
47
 
    def _chunks_to_repr_lines(self, chunks):
48
 
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
49
 
 
50
 
    def assertEqualDiffEncoded(self, expected, actual):
51
 
        """Compare the actual content to the expected content.
52
 
 
53
 
        :param expected: A group of chunks that we expect to see
54
 
        :param actual: The measured 'chunks'
55
 
 
56
 
        We will transform the chunks back into lines, and then run 'repr()'
57
 
        over them to handle non-ascii characters.
58
 
        """
59
 
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
60
 
                             self._chunks_to_repr_lines(actual))
61
 
 
62
 
 
63
 
class TestAllGroupCompressors(TestGroupCompressor):
64
 
    """Tests for GroupCompressor"""
65
 
 
66
 
    compressor = None # Set by multiply_tests
67
 
 
68
 
    def test_empty_delta(self):
69
 
        compressor = self.compressor()
70
 
        self.assertEqual([], compressor.chunks)
71
 
 
72
 
    def test_one_nosha_delta(self):
73
 
        # diff against NUKK
74
 
        compressor = self.compressor()
75
 
        sha1, start_point, end_point, _ = compressor.compress(('label',),
76
 
            'strange\ncommon\n', None)
77
 
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
78
 
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
79
 
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
80
 
        self.assertEqual(0, start_point)
81
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
82
 
 
83
 
    def test_empty_content(self):
84
 
        compressor = self.compressor()
85
 
        # Adding empty bytes should return the 'null' record
86
 
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
87
 
                                                                 '', None)
88
 
        self.assertEqual(0, start_point)
89
 
        self.assertEqual(0, end_point)
90
 
        self.assertEqual('fulltext', kind)
91
 
        self.assertEqual(groupcompress._null_sha1, sha1)
92
 
        self.assertEqual(0, compressor.endpoint)
93
 
        self.assertEqual([], compressor.chunks)
94
 
        # Even after adding some content
95
 
        compressor.compress(('content',), 'some\nbytes\n', None)
96
 
        self.assertTrue(compressor.endpoint > 0)
97
 
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
98
 
                                                                 '', None)
99
 
        self.assertEqual(0, start_point)
100
 
        self.assertEqual(0, end_point)
101
 
        self.assertEqual('fulltext', kind)
102
 
        self.assertEqual(groupcompress._null_sha1, sha1)
103
 
 
104
 
    def test_extract_from_compressor(self):
105
 
        # Knit fetching will try to reconstruct texts locally which results in
106
 
        # reading something that is in the compressor stream already.
107
 
        compressor = self.compressor()
108
 
        sha1_1, _, _, _ = compressor.compress(('label',),
109
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
110
 
        expected_lines = list(compressor.chunks)
111
 
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
112
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
113
 
        # get the first out
114
 
        self.assertEqual(('strange\ncommon long line\n'
115
 
                          'that needs a 16 byte match\n', sha1_1),
116
 
                         compressor.extract(('label',)))
117
 
        # and the second
118
 
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
119
 
                          'different\n', sha1_2),
120
 
                         compressor.extract(('newlabel',)))
121
 
 
122
 
    def test_pop_last(self):
123
 
        compressor = self.compressor()
124
 
        _, _, _, _ = compressor.compress(('key1',),
125
 
            'some text\nfor the first entry\n', None)
126
 
        expected_lines = list(compressor.chunks)
127
 
        _, _, _, _ = compressor.compress(('key2',),
128
 
            'some text\nfor the second entry\n', None)
129
 
        compressor.pop_last()
130
 
        self.assertEqual(expected_lines, compressor.chunks)
131
 
 
132
 
 
133
 
class TestPyrexGroupCompressor(TestGroupCompressor):
134
 
 
135
 
    _test_needs_features = [CompiledGroupCompressFeature]
136
 
    compressor = groupcompress.PyrexGroupCompressor
137
 
 
138
 
    def test_stats(self):
139
 
        compressor = self.compressor()
140
 
        compressor.compress(('label',),
141
 
                            'strange\n'
142
 
                            'common very very long line\n'
143
 
                            'plus more text\n', None)
144
 
        compressor.compress(('newlabel',),
145
 
                            'common very very long line\n'
146
 
                            'plus more text\n'
147
 
                            'different\n'
148
 
                            'moredifferent\n', None)
149
 
        compressor.compress(('label3',),
150
 
                            'new\n'
151
 
                            'common very very long line\n'
152
 
                            'plus more text\n'
153
 
                            'different\n'
154
 
                            'moredifferent\n', None)
155
 
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
156
 
 
157
 
    def test_two_nosha_delta(self):
158
 
        compressor = self.compressor()
159
 
        sha1_1, _, _, _ = compressor.compress(('label',),
160
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
161
 
        expected_lines = list(compressor.chunks)
162
 
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
163
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
164
 
        self.assertEqual(sha_string('common long line\n'
165
 
                                    'that needs a 16 byte match\n'
166
 
                                    'different\n'), sha1_2)
167
 
        expected_lines.extend([
168
 
            # 'delta', delta length
169
 
            'd\x0f',
170
 
            # source and target length
171
 
            '\x36',
172
 
            # copy the line common
173
 
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
174
 
            # add the line different, and the trailing newline
175
 
            '\x0adifferent\n', # insert 10 bytes
176
 
            ])
177
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
178
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
179
 
 
180
 
    def test_three_nosha_delta(self):
181
 
        # The first interesting test: make a change that should use lines from
182
 
        # both parents.
183
 
        compressor = self.compressor()
184
 
        sha1_1, _, _, _ = compressor.compress(('label',),
185
 
            'strange\ncommon very very long line\nwith some extra text\n', None)
186
 
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
187
 
            'different\nmoredifferent\nand then some more\n', None)
188
 
        expected_lines = list(compressor.chunks)
189
 
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
190
 
            'new\ncommon very very long line\nwith some extra text\n'
191
 
            'different\nmoredifferent\nand then some more\n',
192
 
            None)
193
 
        self.assertEqual(
194
 
            sha_string('new\ncommon very very long line\nwith some extra text\n'
195
 
                       'different\nmoredifferent\nand then some more\n'),
196
 
            sha1_3)
197
 
        expected_lines.extend([
198
 
            # 'delta', delta length
199
 
            'd\x0b',
200
 
            # source and target length
201
 
            '\x5f'
202
 
            # insert new
203
 
            '\x03new',
204
 
            # Copy of first parent 'common' range
205
 
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
206
 
            # Copy of second parent 'different' range
207
 
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
208
 
            ])
209
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
210
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
211
 
 
212
 
 
213
 
class TestPythonGroupCompressor(TestGroupCompressor):
214
 
 
215
 
    compressor = groupcompress.PythonGroupCompressor
216
 
 
217
 
    def test_stats(self):
218
 
        compressor = self.compressor()
219
 
        compressor.compress(('label',),
220
 
                            'strange\n'
221
 
                            'common very very long line\n'
222
 
                            'plus more text\n', None)
223
 
        compressor.compress(('newlabel',),
224
 
                            'common very very long line\n'
225
 
                            'plus more text\n'
226
 
                            'different\n'
227
 
                            'moredifferent\n', None)
228
 
        compressor.compress(('label3',),
229
 
                            'new\n'
230
 
                            'common very very long line\n'
231
 
                            'plus more text\n'
232
 
                            'different\n'
233
 
                            'moredifferent\n', None)
234
 
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
235
 
 
236
 
    def test_two_nosha_delta(self):
237
 
        compressor = self.compressor()
238
 
        sha1_1, _, _, _ = compressor.compress(('label',),
239
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
240
 
        expected_lines = list(compressor.chunks)
241
 
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
242
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
243
 
        self.assertEqual(sha_string('common long line\n'
244
 
                                    'that needs a 16 byte match\n'
245
 
                                    'different\n'), sha1_2)
246
 
        expected_lines.extend([
247
 
            # 'delta', delta length
248
 
            'd\x0f',
249
 
            # target length
250
 
            '\x36',
251
 
            # copy the line common
252
 
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
253
 
            # add the line different, and the trailing newline
254
 
            '\x0adifferent\n', # insert 10 bytes
255
 
            ])
256
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
257
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
258
 
 
259
 
    def test_three_nosha_delta(self):
260
 
        # The first interesting test: make a change that should use lines from
261
 
        # both parents.
262
 
        compressor = self.compressor()
263
 
        sha1_1, _, _, _ = compressor.compress(('label',),
264
 
            'strange\ncommon very very long line\nwith some extra text\n', None)
265
 
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
266
 
            'different\nmoredifferent\nand then some more\n', None)
267
 
        expected_lines = list(compressor.chunks)
268
 
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
269
 
            'new\ncommon very very long line\nwith some extra text\n'
270
 
            'different\nmoredifferent\nand then some more\n',
271
 
            None)
272
 
        self.assertEqual(
273
 
            sha_string('new\ncommon very very long line\nwith some extra text\n'
274
 
                       'different\nmoredifferent\nand then some more\n'),
275
 
            sha1_3)
276
 
        expected_lines.extend([
277
 
            # 'delta', delta length
278
 
            'd\x0c',
279
 
            # target length
280
 
            '\x5f'
281
 
            # insert new
282
 
            '\x04new\n',
283
 
            # Copy of first parent 'common' range
284
 
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
285
 
            # Copy of second parent 'different' range
286
 
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
287
 
            ])
288
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
289
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
290
 
 
291
 
 
292
 
class TestGroupCompressBlock(tests.TestCase):
293
 
 
294
 
    def make_block(self, key_to_text):
295
 
        """Create a GroupCompressBlock, filling it with the given texts."""
296
 
        compressor = groupcompress.GroupCompressor()
297
 
        start = 0
298
 
        for key in sorted(key_to_text):
299
 
            compressor.compress(key, key_to_text[key], None)
300
 
        locs = dict((key, (start, end)) for key, (start, _, end, _)
301
 
                    in compressor.labels_deltas.iteritems())
302
 
        block = compressor.flush()
303
 
        raw_bytes = block.to_bytes()
304
 
        # Go through from_bytes(to_bytes()) so that we start with a compressed
305
 
        # content object
306
 
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
307
 
 
308
 
    def test_from_empty_bytes(self):
309
 
        self.assertRaises(ValueError,
310
 
                          groupcompress.GroupCompressBlock.from_bytes, '')
311
 
 
312
 
    def test_from_minimal_bytes(self):
313
 
        block = groupcompress.GroupCompressBlock.from_bytes(
314
 
            'gcb1z\n0\n0\n')
315
 
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
316
 
        self.assertIs(None, block._content)
317
 
        self.assertEqual('', block._z_content)
318
 
        block._ensure_content()
319
 
        self.assertEqual('', block._content)
320
 
        self.assertEqual('', block._z_content)
321
 
        block._ensure_content() # Ensure content is safe to call 2x
322
 
 
323
 
    def test_from_invalid(self):
324
 
        self.assertRaises(ValueError,
325
 
                          groupcompress.GroupCompressBlock.from_bytes,
326
 
                          'this is not a valid header')
327
 
 
328
 
    def test_from_bytes(self):
329
 
        content = ('a tiny bit of content\n')
330
 
        z_content = zlib.compress(content)
331
 
        z_bytes = (
332
 
            'gcb1z\n' # group compress block v1 plain
333
 
            '%d\n' # Length of compressed content
334
 
            '%d\n' # Length of uncompressed content
335
 
            '%s'   # Compressed content
336
 
            ) % (len(z_content), len(content), z_content)
337
 
        block = groupcompress.GroupCompressBlock.from_bytes(
338
 
            z_bytes)
339
 
        self.assertEqual(z_content, block._z_content)
340
 
        self.assertIs(None, block._content)
341
 
        self.assertEqual(len(z_content), block._z_content_length)
342
 
        self.assertEqual(len(content), block._content_length)
343
 
        block._ensure_content()
344
 
        self.assertEqual(z_content, block._z_content)
345
 
        self.assertEqual(content, block._content)
346
 
 
347
 
    def test_to_bytes(self):
348
 
        content = ('this is some content\n'
349
 
                   'this content will be compressed\n')
350
 
        gcb = groupcompress.GroupCompressBlock()
351
 
        gcb.set_content(content)
352
 
        bytes = gcb.to_bytes()
353
 
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
354
 
        self.assertEqual(gcb._content_length, len(content))
355
 
        expected_header =('gcb1z\n' # group compress block v1 zlib
356
 
                          '%d\n' # Length of compressed content
357
 
                          '%d\n' # Length of uncompressed content
358
 
                         ) % (gcb._z_content_length, gcb._content_length)
359
 
        self.assertStartsWith(bytes, expected_header)
360
 
        remaining_bytes = bytes[len(expected_header):]
361
 
        raw_bytes = zlib.decompress(remaining_bytes)
362
 
        self.assertEqual(content, raw_bytes)
363
 
 
364
 
    def test_partial_decomp(self):
365
 
        content_chunks = []
366
 
        # We need a sufficient amount of data so that zlib.decompress has
367
 
        # partial decompression to work with. Most auto-generated data
368
 
        # compresses a bit too well, we want a combination, so we combine a sha
369
 
        # hash with compressible data.
370
 
        for i in xrange(2048):
371
 
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
372
 
            content_chunks.append(next_content)
373
 
            next_sha1 = osutils.sha_string(next_content)
374
 
            content_chunks.append(next_sha1 + '\n')
375
 
        content = ''.join(content_chunks)
376
 
        self.assertEqual(158634, len(content))
377
 
        z_content = zlib.compress(content)
378
 
        self.assertEqual(57182, len(z_content))
379
 
        block = groupcompress.GroupCompressBlock()
380
 
        block._z_content = z_content
381
 
        block._z_content_length = len(z_content)
382
 
        block._compressor_name = 'zlib'
383
 
        block._content_length = 158634
384
 
        self.assertIs(None, block._content)
385
 
        block._ensure_content(100)
386
 
        self.assertIsNot(None, block._content)
387
 
        # We have decompressed at least 100 bytes
388
 
        self.assertTrue(len(block._content) >= 100)
389
 
        # We have not decompressed the whole content
390
 
        self.assertTrue(len(block._content) < 158634)
391
 
        self.assertEqualDiff(content[:len(block._content)], block._content)
392
 
        # ensuring content that we already have shouldn't cause any more data
393
 
        # to be extracted
394
 
        cur_len = len(block._content)
395
 
        block._ensure_content(cur_len - 10)
396
 
        self.assertEqual(cur_len, len(block._content))
397
 
        # Now we want a bit more content
398
 
        cur_len += 10
399
 
        block._ensure_content(cur_len)
400
 
        self.assertTrue(len(block._content) >= cur_len)
401
 
        self.assertTrue(len(block._content) < 158634)
402
 
        self.assertEqualDiff(content[:len(block._content)], block._content)
403
 
        # And now lets finish
404
 
        block._ensure_content(158634)
405
 
        self.assertEqualDiff(content, block._content)
406
 
        # And the decompressor is finalized
407
 
        self.assertIs(None, block._z_content_decompressor)
408
 
 
409
 
    def test_partial_decomp_no_known_length(self):
410
 
        content_chunks = []
411
 
        for i in xrange(2048):
412
 
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
413
 
            content_chunks.append(next_content)
414
 
            next_sha1 = osutils.sha_string(next_content)
415
 
            content_chunks.append(next_sha1 + '\n')
416
 
        content = ''.join(content_chunks)
417
 
        self.assertEqual(158634, len(content))
418
 
        z_content = zlib.compress(content)
419
 
        self.assertEqual(57182, len(z_content))
420
 
        block = groupcompress.GroupCompressBlock()
421
 
        block._z_content = z_content
422
 
        block._z_content_length = len(z_content)
423
 
        block._compressor_name = 'zlib'
424
 
        block._content_length = None # Don't tell the decompressed length
425
 
        self.assertIs(None, block._content)
426
 
        block._ensure_content(100)
427
 
        self.assertIsNot(None, block._content)
428
 
        # We have decompressed at least 100 bytes
429
 
        self.assertTrue(len(block._content) >= 100)
430
 
        # We have not decompressed the whole content
431
 
        self.assertTrue(len(block._content) < 158634)
432
 
        self.assertEqualDiff(content[:len(block._content)], block._content)
433
 
        # ensuring content that we already have shouldn't cause any more data
434
 
        # to be extracted
435
 
        cur_len = len(block._content)
436
 
        block._ensure_content(cur_len - 10)
437
 
        self.assertEqual(cur_len, len(block._content))
438
 
        # Now we want a bit more content
439
 
        cur_len += 10
440
 
        block._ensure_content(cur_len)
441
 
        self.assertTrue(len(block._content) >= cur_len)
442
 
        self.assertTrue(len(block._content) < 158634)
443
 
        self.assertEqualDiff(content[:len(block._content)], block._content)
444
 
        # And now lets finish
445
 
        block._ensure_content()
446
 
        self.assertEqualDiff(content, block._content)
447
 
        # And the decompressor is finalized
448
 
        self.assertIs(None, block._z_content_decompressor)
449
 
 
450
 
    def test__dump(self):
451
 
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
452
 
        key_to_text = {('1',): dup_content + '1 unique\n',
453
 
                       ('2',): dup_content + '2 extra special\n'}
454
 
        locs, block = self.make_block(key_to_text)
455
 
        self.assertEqual([('f', len(key_to_text[('1',)])),
456
 
                          ('d', 21, len(key_to_text[('2',)]),
457
 
                           [('c', 2, len(dup_content)),
458
 
                            ('i', len('2 extra special\n'), '')
459
 
                           ]),
460
 
                         ], block._dump())
461
 
 
462
 
 
463
 
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
464
 
 
465
 
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
466
 
                     dir='.'):
467
 
        t = self.get_transport(dir)
468
 
        t.ensure_base()
469
 
        vf = groupcompress.make_pack_factory(graph=create_graph,
470
 
            delta=False, keylength=keylength)(t)
471
 
        if do_cleanup:
472
 
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
473
 
        return vf
474
 
 
475
 
 
476
 
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
477
 
 
478
 
    def test_get_record_stream_as_requested(self):
479
 
        # Consider promoting 'as-requested' to general availability, and
480
 
        # make this a VF interface test
481
 
        vf = self.make_test_vf(False, dir='source')
482
 
        vf.add_lines(('a',), (), ['lines\n'])
483
 
        vf.add_lines(('b',), (), ['lines\n'])
484
 
        vf.add_lines(('c',), (), ['lines\n'])
485
 
        vf.add_lines(('d',), (), ['lines\n'])
486
 
        vf.writer.end()
487
 
        keys = [record.key for record in vf.get_record_stream(
488
 
                    [('a',), ('b',), ('c',), ('d',)],
489
 
                    'as-requested', False)]
490
 
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
491
 
        keys = [record.key for record in vf.get_record_stream(
492
 
                    [('b',), ('a',), ('d',), ('c',)],
493
 
                    'as-requested', False)]
494
 
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
495
 
 
496
 
        # It should work even after being repacked into another VF
497
 
        vf2 = self.make_test_vf(False, dir='target')
498
 
        vf2.insert_record_stream(vf.get_record_stream(
499
 
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
500
 
        vf2.writer.end()
501
 
 
502
 
        keys = [record.key for record in vf2.get_record_stream(
503
 
                    [('a',), ('b',), ('c',), ('d',)],
504
 
                    'as-requested', False)]
505
 
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
506
 
        keys = [record.key for record in vf2.get_record_stream(
507
 
                    [('b',), ('a',), ('d',), ('c',)],
508
 
                    'as-requested', False)]
509
 
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
510
 
 
511
 
    def test_insert_record_stream_re_uses_blocks(self):
512
 
        vf = self.make_test_vf(True, dir='source')
513
 
        def grouped_stream(revision_ids, first_parents=()):
514
 
            parents = first_parents
515
 
            for revision_id in revision_ids:
516
 
                key = (revision_id,)
517
 
                record = versionedfile.FulltextContentFactory(
518
 
                    key, parents, None,
519
 
                    'some content that is\n'
520
 
                    'identical except for\n'
521
 
                    'revision_id:%s\n' % (revision_id,))
522
 
                yield record
523
 
                parents = (key,)
524
 
        # One group, a-d
525
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
526
 
        # Second group, e-h
527
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
528
 
                                               first_parents=(('d',),)))
529
 
        block_bytes = {}
530
 
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
531
 
                                      'unordered', False)
532
 
        num_records = 0
533
 
        for record in stream:
534
 
            if record.key in [('a',), ('e',)]:
535
 
                self.assertEqual('groupcompress-block', record.storage_kind)
536
 
            else:
537
 
                self.assertEqual('groupcompress-block-ref',
538
 
                                 record.storage_kind)
539
 
            block_bytes[record.key] = record._manager._block._z_content
540
 
            num_records += 1
541
 
        self.assertEqual(8, num_records)
542
 
        for r in 'abcd':
543
 
            key = (r,)
544
 
            self.assertIs(block_bytes[key], block_bytes[('a',)])
545
 
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
546
 
        for r in 'efgh':
547
 
            key = (r,)
548
 
            self.assertIs(block_bytes[key], block_bytes[('e',)])
549
 
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
550
 
        # Now copy the blocks into another vf, and ensure that the blocks are
551
 
        # preserved without creating new entries
552
 
        vf2 = self.make_test_vf(True, dir='target')
553
 
        # ordering in 'groupcompress' order, should actually swap the groups in
554
 
        # the target vf, but the groups themselves should not be disturbed.
555
 
        vf2.insert_record_stream(vf.get_record_stream(
556
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
557
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
558
 
                                       'groupcompress', False)
559
 
        vf2.writer.end()
560
 
        num_records = 0
561
 
        for record in stream:
562
 
            num_records += 1
563
 
            self.assertEqual(block_bytes[record.key],
564
 
                             record._manager._block._z_content)
565
 
        self.assertEqual(8, num_records)
566
 
 
567
 
    def test__insert_record_stream_no_reuse_block(self):
568
 
        vf = self.make_test_vf(True, dir='source')
569
 
        def grouped_stream(revision_ids, first_parents=()):
570
 
            parents = first_parents
571
 
            for revision_id in revision_ids:
572
 
                key = (revision_id,)
573
 
                record = versionedfile.FulltextContentFactory(
574
 
                    key, parents, None,
575
 
                    'some content that is\n'
576
 
                    'identical except for\n'
577
 
                    'revision_id:%s\n' % (revision_id,))
578
 
                yield record
579
 
                parents = (key,)
580
 
        # One group, a-d
581
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
582
 
        # Second group, e-h
583
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
584
 
                                               first_parents=(('d',),)))
585
 
        vf.writer.end()
586
 
        self.assertEqual(8, len(list(vf.get_record_stream(
587
 
                                        [(r,) for r in 'abcdefgh'],
588
 
                                        'unordered', False))))
589
 
        # Now copy the blocks into another vf, and ensure that the blocks are
590
 
        # preserved without creating new entries
591
 
        vf2 = self.make_test_vf(True, dir='target')
592
 
        # ordering in 'groupcompress' order, should actually swap the groups in
593
 
        # the target vf, but the groups themselves should not be disturbed.
594
 
        list(vf2._insert_record_stream(vf.get_record_stream(
595
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
596
 
            reuse_blocks=False))
597
 
        vf2.writer.end()
598
 
        # After inserting with reuse_blocks=False, we should have everything in
599
 
        # a single new block.
600
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
601
 
                                       'groupcompress', False)
602
 
        block = None
603
 
        for record in stream:
604
 
            if block is None:
605
 
                block = record._manager._block
606
 
            else:
607
 
                self.assertIs(block, record._manager._block)
608
 
 
609
 
 
610
 
class TestLazyGroupCompress(tests.TestCaseWithTransport):
611
 
 
612
 
    _texts = {
613
 
        ('key1',): "this is a text\n"
614
 
                   "with a reasonable amount of compressible bytes\n",
615
 
        ('key2',): "another text\n"
616
 
                   "with a reasonable amount of compressible bytes\n",
617
 
        ('key3',): "yet another text which won't be extracted\n"
618
 
                   "with a reasonable amount of compressible bytes\n",
619
 
        ('key4',): "this will be extracted\n"
620
 
                   "but references most of its bytes from\n"
621
 
                   "yet another text which won't be extracted\n"
622
 
                   "with a reasonable amount of compressible bytes\n",
623
 
    }
624
 
    def make_block(self, key_to_text):
625
 
        """Create a GroupCompressBlock, filling it with the given texts."""
626
 
        compressor = groupcompress.GroupCompressor()
627
 
        start = 0
628
 
        for key in sorted(key_to_text):
629
 
            compressor.compress(key, key_to_text[key], None)
630
 
        locs = dict((key, (start, end)) for key, (start, _, end, _)
631
 
                    in compressor.labels_deltas.iteritems())
632
 
        block = compressor.flush()
633
 
        raw_bytes = block.to_bytes()
634
 
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
635
 
 
636
 
    def add_key_to_manager(self, key, locations, block, manager):
637
 
        start, end = locations[key]
638
 
        manager.add_factory(key, (), start, end)
639
 
 
640
 
    def test_get_fulltexts(self):
641
 
        locations, block = self.make_block(self._texts)
642
 
        manager = groupcompress._LazyGroupContentManager(block)
643
 
        self.add_key_to_manager(('key1',), locations, block, manager)
644
 
        self.add_key_to_manager(('key2',), locations, block, manager)
645
 
        result_order = []
646
 
        for record in manager.get_record_stream():
647
 
            result_order.append(record.key)
648
 
            text = self._texts[record.key]
649
 
            self.assertEqual(text, record.get_bytes_as('fulltext'))
650
 
        self.assertEqual([('key1',), ('key2',)], result_order)
651
 
 
652
 
        # If we build the manager in the opposite order, we should get them
653
 
        # back in the opposite order
654
 
        manager = groupcompress._LazyGroupContentManager(block)
655
 
        self.add_key_to_manager(('key2',), locations, block, manager)
656
 
        self.add_key_to_manager(('key1',), locations, block, manager)
657
 
        result_order = []
658
 
        for record in manager.get_record_stream():
659
 
            result_order.append(record.key)
660
 
            text = self._texts[record.key]
661
 
            self.assertEqual(text, record.get_bytes_as('fulltext'))
662
 
        self.assertEqual([('key2',), ('key1',)], result_order)
663
 
 
664
 
    def test__wire_bytes_no_keys(self):
665
 
        locations, block = self.make_block(self._texts)
666
 
        manager = groupcompress._LazyGroupContentManager(block)
667
 
        wire_bytes = manager._wire_bytes()
668
 
        block_length = len(block.to_bytes())
669
 
        # We should have triggered a strip, since we aren't using any content
670
 
        stripped_block = manager._block.to_bytes()
671
 
        self.assertTrue(block_length > len(stripped_block))
672
 
        empty_z_header = zlib.compress('')
673
 
        self.assertEqual('groupcompress-block\n'
674
 
                         '8\n' # len(compress(''))
675
 
                         '0\n' # len('')
676
 
                         '%d\n'# compressed block len
677
 
                         '%s'  # zheader
678
 
                         '%s'  # block
679
 
                         % (len(stripped_block), empty_z_header,
680
 
                            stripped_block),
681
 
                         wire_bytes)
682
 
 
683
 
    def test__wire_bytes(self):
684
 
        locations, block = self.make_block(self._texts)
685
 
        manager = groupcompress._LazyGroupContentManager(block)
686
 
        self.add_key_to_manager(('key1',), locations, block, manager)
687
 
        self.add_key_to_manager(('key4',), locations, block, manager)
688
 
        block_bytes = block.to_bytes()
689
 
        wire_bytes = manager._wire_bytes()
690
 
        (storage_kind, z_header_len, header_len,
691
 
         block_len, rest) = wire_bytes.split('\n', 4)
692
 
        z_header_len = int(z_header_len)
693
 
        header_len = int(header_len)
694
 
        block_len = int(block_len)
695
 
        self.assertEqual('groupcompress-block', storage_kind)
696
 
        self.assertEqual(33, z_header_len)
697
 
        self.assertEqual(25, header_len)
698
 
        self.assertEqual(len(block_bytes), block_len)
699
 
        z_header = rest[:z_header_len]
700
 
        header = zlib.decompress(z_header)
701
 
        self.assertEqual(header_len, len(header))
702
 
        entry1 = locations[('key1',)]
703
 
        entry4 = locations[('key4',)]
704
 
        self.assertEqualDiff('key1\n'
705
 
                             '\n'  # no parents
706
 
                             '%d\n' # start offset
707
 
                             '%d\n' # end offset
708
 
                             'key4\n'
709
 
                             '\n'
710
 
                             '%d\n'
711
 
                             '%d\n'
712
 
                             % (entry1[0], entry1[1],
713
 
                                entry4[0], entry4[1]),
714
 
                            header)
715
 
        z_block = rest[z_header_len:]
716
 
        self.assertEqual(block_bytes, z_block)
717
 
 
718
 
    def test_from_bytes(self):
719
 
        locations, block = self.make_block(self._texts)
720
 
        manager = groupcompress._LazyGroupContentManager(block)
721
 
        self.add_key_to_manager(('key1',), locations, block, manager)
722
 
        self.add_key_to_manager(('key4',), locations, block, manager)
723
 
        wire_bytes = manager._wire_bytes()
724
 
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
725
 
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
726
 
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
727
 
        self.assertEqual(2, len(manager._factories))
728
 
        self.assertEqual(block._z_content, manager._block._z_content)
729
 
        result_order = []
730
 
        for record in manager.get_record_stream():
731
 
            result_order.append(record.key)
732
 
            text = self._texts[record.key]
733
 
            self.assertEqual(text, record.get_bytes_as('fulltext'))
734
 
        self.assertEqual([('key1',), ('key4',)], result_order)
735
 
 
736
 
    def test__check_rebuild_no_changes(self):
737
 
        locations, block = self.make_block(self._texts)
738
 
        manager = groupcompress._LazyGroupContentManager(block)
739
 
        # Request all the keys, which ensures that we won't rebuild
740
 
        self.add_key_to_manager(('key1',), locations, block, manager)
741
 
        self.add_key_to_manager(('key2',), locations, block, manager)
742
 
        self.add_key_to_manager(('key3',), locations, block, manager)
743
 
        self.add_key_to_manager(('key4',), locations, block, manager)
744
 
        manager._check_rebuild_block()
745
 
        self.assertIs(block, manager._block)
746
 
 
747
 
    def test__check_rebuild_only_one(self):
748
 
        locations, block = self.make_block(self._texts)
749
 
        manager = groupcompress._LazyGroupContentManager(block)
750
 
        # Request just the first key, which should trigger a 'strip' action
751
 
        self.add_key_to_manager(('key1',), locations, block, manager)
752
 
        manager._check_rebuild_block()
753
 
        self.assertIsNot(block, manager._block)
754
 
        self.assertTrue(block._content_length > manager._block._content_length)
755
 
        # We should be able to still get the content out of this block, though
756
 
        # it should only have 1 entry
757
 
        for record in manager.get_record_stream():
758
 
            self.assertEqual(('key1',), record.key)
759
 
            self.assertEqual(self._texts[record.key],
760
 
                             record.get_bytes_as('fulltext'))
761
 
 
762
 
    def test__check_rebuild_middle(self):
763
 
        locations, block = self.make_block(self._texts)
764
 
        manager = groupcompress._LazyGroupContentManager(block)
765
 
        # Request a small key in the middle should trigger a 'rebuild'
766
 
        self.add_key_to_manager(('key4',), locations, block, manager)
767
 
        manager._check_rebuild_block()
768
 
        self.assertIsNot(block, manager._block)
769
 
        self.assertTrue(block._content_length > manager._block._content_length)
770
 
        for record in manager.get_record_stream():
771
 
            self.assertEqual(('key4',), record.key)
772
 
            self.assertEqual(self._texts[record.key],
773
 
                             record.get_bytes_as('fulltext'))