~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

  • Committer: Martin Pool
  • Date: 2005-07-22 22:37:53 UTC
  • Revision ID: mbp@sourcefrog.net-20050722223753-7dced4e32d3ce21d
- add the start of a test for inventory file-id matching

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2008, 2009, 2010 Canonical Ltd
2
 
#
3
 
# This program is free software; you can redistribute it and/or modify
4
 
# it under the terms of the GNU General Public License as published by
5
 
# the Free Software Foundation; either version 2 of the License, or
6
 
# (at your option) any later version.
7
 
#
8
 
# This program is distributed in the hope that it will be useful,
9
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
 
# GNU General Public License for more details.
12
 
#
13
 
# You should have received a copy of the GNU General Public License
14
 
# along with this program; if not, write to the Free Software
15
 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
 
 
17
 
"""Tests for group compression."""
18
 
 
19
 
import zlib
20
 
 
21
 
from bzrlib import (
22
 
    btree_index,
23
 
    groupcompress,
24
 
    errors,
25
 
    index as _mod_index,
26
 
    osutils,
27
 
    tests,
28
 
    trace,
29
 
    versionedfile,
30
 
    )
31
 
from bzrlib.osutils import sha_string
32
 
from bzrlib.tests.test__groupcompress import compiled_groupcompress_feature
33
 
 
34
 
 
35
 
def load_tests(standard_tests, module, loader):
36
 
    """Parameterize tests for all versions of groupcompress."""
37
 
    to_adapt, result = tests.split_suite_by_condition(
38
 
        standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
39
 
    scenarios = [
40
 
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
41
 
        ]
42
 
    if compiled_groupcompress_feature.available():
43
 
        scenarios.append(('C',
44
 
            {'compressor': groupcompress.PyrexGroupCompressor}))
45
 
    return tests.multiply_tests(to_adapt, scenarios, result)
46
 
 
47
 
 
48
 
class TestGroupCompressor(tests.TestCase):
49
 
 
50
 
    def _chunks_to_repr_lines(self, chunks):
51
 
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
52
 
 
53
 
    def assertEqualDiffEncoded(self, expected, actual):
54
 
        """Compare the actual content to the expected content.
55
 
 
56
 
        :param expected: A group of chunks that we expect to see
57
 
        :param actual: The measured 'chunks'
58
 
 
59
 
        We will transform the chunks back into lines, and then run 'repr()'
60
 
        over them to handle non-ascii characters.
61
 
        """
62
 
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
63
 
                             self._chunks_to_repr_lines(actual))
64
 
 
65
 
 
66
 
class TestAllGroupCompressors(TestGroupCompressor):
67
 
    """Tests for GroupCompressor"""
68
 
 
69
 
    compressor = None # Set by multiply_tests
70
 
 
71
 
    def test_empty_delta(self):
72
 
        compressor = self.compressor()
73
 
        self.assertEqual([], compressor.chunks)
74
 
 
75
 
    def test_one_nosha_delta(self):
76
 
        # diff against NUKK
77
 
        compressor = self.compressor()
78
 
        sha1, start_point, end_point, _ = compressor.compress(('label',),
79
 
            'strange\ncommon\n', None)
80
 
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
81
 
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
82
 
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
83
 
        self.assertEqual(0, start_point)
84
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
85
 
 
86
 
    def test_empty_content(self):
87
 
        compressor = self.compressor()
88
 
        # Adding empty bytes should return the 'null' record
89
 
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
90
 
                                                                 '', None)
91
 
        self.assertEqual(0, start_point)
92
 
        self.assertEqual(0, end_point)
93
 
        self.assertEqual('fulltext', kind)
94
 
        self.assertEqual(groupcompress._null_sha1, sha1)
95
 
        self.assertEqual(0, compressor.endpoint)
96
 
        self.assertEqual([], compressor.chunks)
97
 
        # Even after adding some content
98
 
        compressor.compress(('content',), 'some\nbytes\n', None)
99
 
        self.assertTrue(compressor.endpoint > 0)
100
 
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
101
 
                                                                 '', None)
102
 
        self.assertEqual(0, start_point)
103
 
        self.assertEqual(0, end_point)
104
 
        self.assertEqual('fulltext', kind)
105
 
        self.assertEqual(groupcompress._null_sha1, sha1)
106
 
 
107
 
    def test_extract_from_compressor(self):
108
 
        # Knit fetching will try to reconstruct texts locally which results in
109
 
        # reading something that is in the compressor stream already.
110
 
        compressor = self.compressor()
111
 
        sha1_1, _, _, _ = compressor.compress(('label',),
112
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
113
 
        expected_lines = list(compressor.chunks)
114
 
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
115
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
116
 
        # get the first out
117
 
        self.assertEqual(('strange\ncommon long line\n'
118
 
                          'that needs a 16 byte match\n', sha1_1),
119
 
                         compressor.extract(('label',)))
120
 
        # and the second
121
 
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
122
 
                          'different\n', sha1_2),
123
 
                         compressor.extract(('newlabel',)))
124
 
 
125
 
    def test_pop_last(self):
126
 
        compressor = self.compressor()
127
 
        _, _, _, _ = compressor.compress(('key1',),
128
 
            'some text\nfor the first entry\n', None)
129
 
        expected_lines = list(compressor.chunks)
130
 
        _, _, _, _ = compressor.compress(('key2',),
131
 
            'some text\nfor the second entry\n', None)
132
 
        compressor.pop_last()
133
 
        self.assertEqual(expected_lines, compressor.chunks)
134
 
 
135
 
 
136
 
class TestPyrexGroupCompressor(TestGroupCompressor):
137
 
 
138
 
    _test_needs_features = [compiled_groupcompress_feature]
139
 
    compressor = groupcompress.PyrexGroupCompressor
140
 
 
141
 
    def test_stats(self):
142
 
        compressor = self.compressor()
143
 
        compressor.compress(('label',),
144
 
                            'strange\n'
145
 
                            'common very very long line\n'
146
 
                            'plus more text\n', None)
147
 
        compressor.compress(('newlabel',),
148
 
                            'common very very long line\n'
149
 
                            'plus more text\n'
150
 
                            'different\n'
151
 
                            'moredifferent\n', None)
152
 
        compressor.compress(('label3',),
153
 
                            'new\n'
154
 
                            'common very very long line\n'
155
 
                            'plus more text\n'
156
 
                            'different\n'
157
 
                            'moredifferent\n', None)
158
 
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
159
 
 
160
 
    def test_two_nosha_delta(self):
161
 
        compressor = self.compressor()
162
 
        sha1_1, _, _, _ = compressor.compress(('label',),
163
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
164
 
        expected_lines = list(compressor.chunks)
165
 
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
166
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
167
 
        self.assertEqual(sha_string('common long line\n'
168
 
                                    'that needs a 16 byte match\n'
169
 
                                    'different\n'), sha1_2)
170
 
        expected_lines.extend([
171
 
            # 'delta', delta length
172
 
            'd\x0f',
173
 
            # source and target length
174
 
            '\x36',
175
 
            # copy the line common
176
 
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
177
 
            # add the line different, and the trailing newline
178
 
            '\x0adifferent\n', # insert 10 bytes
179
 
            ])
180
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
181
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
182
 
 
183
 
    def test_three_nosha_delta(self):
184
 
        # The first interesting test: make a change that should use lines from
185
 
        # both parents.
186
 
        compressor = self.compressor()
187
 
        sha1_1, _, _, _ = compressor.compress(('label',),
188
 
            'strange\ncommon very very long line\nwith some extra text\n', None)
189
 
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
190
 
            'different\nmoredifferent\nand then some more\n', None)
191
 
        expected_lines = list(compressor.chunks)
192
 
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
193
 
            'new\ncommon very very long line\nwith some extra text\n'
194
 
            'different\nmoredifferent\nand then some more\n',
195
 
            None)
196
 
        self.assertEqual(
197
 
            sha_string('new\ncommon very very long line\nwith some extra text\n'
198
 
                       'different\nmoredifferent\nand then some more\n'),
199
 
            sha1_3)
200
 
        expected_lines.extend([
201
 
            # 'delta', delta length
202
 
            'd\x0b',
203
 
            # source and target length
204
 
            '\x5f'
205
 
            # insert new
206
 
            '\x03new',
207
 
            # Copy of first parent 'common' range
208
 
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
209
 
            # Copy of second parent 'different' range
210
 
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
211
 
            ])
212
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
213
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
214
 
 
215
 
 
216
 
class TestPythonGroupCompressor(TestGroupCompressor):
217
 
 
218
 
    compressor = groupcompress.PythonGroupCompressor
219
 
 
220
 
    def test_stats(self):
221
 
        compressor = self.compressor()
222
 
        compressor.compress(('label',),
223
 
                            'strange\n'
224
 
                            'common very very long line\n'
225
 
                            'plus more text\n', None)
226
 
        compressor.compress(('newlabel',),
227
 
                            'common very very long line\n'
228
 
                            'plus more text\n'
229
 
                            'different\n'
230
 
                            'moredifferent\n', None)
231
 
        compressor.compress(('label3',),
232
 
                            'new\n'
233
 
                            'common very very long line\n'
234
 
                            'plus more text\n'
235
 
                            'different\n'
236
 
                            'moredifferent\n', None)
237
 
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
238
 
 
239
 
    def test_two_nosha_delta(self):
240
 
        compressor = self.compressor()
241
 
        sha1_1, _, _, _ = compressor.compress(('label',),
242
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
243
 
        expected_lines = list(compressor.chunks)
244
 
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
245
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
246
 
        self.assertEqual(sha_string('common long line\n'
247
 
                                    'that needs a 16 byte match\n'
248
 
                                    'different\n'), sha1_2)
249
 
        expected_lines.extend([
250
 
            # 'delta', delta length
251
 
            'd\x0f',
252
 
            # target length
253
 
            '\x36',
254
 
            # copy the line common
255
 
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
256
 
            # add the line different, and the trailing newline
257
 
            '\x0adifferent\n', # insert 10 bytes
258
 
            ])
259
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
260
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
261
 
 
262
 
    def test_three_nosha_delta(self):
263
 
        # The first interesting test: make a change that should use lines from
264
 
        # both parents.
265
 
        compressor = self.compressor()
266
 
        sha1_1, _, _, _ = compressor.compress(('label',),
267
 
            'strange\ncommon very very long line\nwith some extra text\n', None)
268
 
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
269
 
            'different\nmoredifferent\nand then some more\n', None)
270
 
        expected_lines = list(compressor.chunks)
271
 
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
272
 
            'new\ncommon very very long line\nwith some extra text\n'
273
 
            'different\nmoredifferent\nand then some more\n',
274
 
            None)
275
 
        self.assertEqual(
276
 
            sha_string('new\ncommon very very long line\nwith some extra text\n'
277
 
                       'different\nmoredifferent\nand then some more\n'),
278
 
            sha1_3)
279
 
        expected_lines.extend([
280
 
            # 'delta', delta length
281
 
            'd\x0c',
282
 
            # target length
283
 
            '\x5f'
284
 
            # insert new
285
 
            '\x04new\n',
286
 
            # Copy of first parent 'common' range
287
 
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
288
 
            # Copy of second parent 'different' range
289
 
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
290
 
            ])
291
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
292
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
293
 
 
294
 
 
295
 
class TestGroupCompressBlock(tests.TestCase):
296
 
 
297
 
    def make_block(self, key_to_text):
298
 
        """Create a GroupCompressBlock, filling it with the given texts."""
299
 
        compressor = groupcompress.GroupCompressor()
300
 
        start = 0
301
 
        for key in sorted(key_to_text):
302
 
            compressor.compress(key, key_to_text[key], None)
303
 
        locs = dict((key, (start, end)) for key, (start, _, end, _)
304
 
                    in compressor.labels_deltas.iteritems())
305
 
        block = compressor.flush()
306
 
        raw_bytes = block.to_bytes()
307
 
        # Go through from_bytes(to_bytes()) so that we start with a compressed
308
 
        # content object
309
 
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
310
 
 
311
 
    def test_from_empty_bytes(self):
312
 
        self.assertRaises(ValueError,
313
 
                          groupcompress.GroupCompressBlock.from_bytes, '')
314
 
 
315
 
    def test_from_minimal_bytes(self):
316
 
        block = groupcompress.GroupCompressBlock.from_bytes(
317
 
            'gcb1z\n0\n0\n')
318
 
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
319
 
        self.assertIs(None, block._content)
320
 
        self.assertEqual('', block._z_content)
321
 
        block._ensure_content()
322
 
        self.assertEqual('', block._content)
323
 
        self.assertEqual('', block._z_content)
324
 
        block._ensure_content() # Ensure content is safe to call 2x
325
 
 
326
 
    def test_from_invalid(self):
327
 
        self.assertRaises(ValueError,
328
 
                          groupcompress.GroupCompressBlock.from_bytes,
329
 
                          'this is not a valid header')
330
 
 
331
 
    def test_from_bytes(self):
332
 
        content = ('a tiny bit of content\n')
333
 
        z_content = zlib.compress(content)
334
 
        z_bytes = (
335
 
            'gcb1z\n' # group compress block v1 plain
336
 
            '%d\n' # Length of compressed content
337
 
            '%d\n' # Length of uncompressed content
338
 
            '%s'   # Compressed content
339
 
            ) % (len(z_content), len(content), z_content)
340
 
        block = groupcompress.GroupCompressBlock.from_bytes(
341
 
            z_bytes)
342
 
        self.assertEqual(z_content, block._z_content)
343
 
        self.assertIs(None, block._content)
344
 
        self.assertEqual(len(z_content), block._z_content_length)
345
 
        self.assertEqual(len(content), block._content_length)
346
 
        block._ensure_content()
347
 
        self.assertEqual(z_content, block._z_content)
348
 
        self.assertEqual(content, block._content)
349
 
 
350
 
    def test_to_chunks(self):
351
 
        content_chunks = ['this is some content\n',
352
 
                          'this content will be compressed\n']
353
 
        content_len = sum(map(len, content_chunks))
354
 
        content = ''.join(content_chunks)
355
 
        gcb = groupcompress.GroupCompressBlock()
356
 
        gcb.set_chunked_content(content_chunks, content_len)
357
 
        total_len, block_chunks = gcb.to_chunks()
358
 
        block_bytes = ''.join(block_chunks)
359
 
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
360
 
        self.assertEqual(total_len, len(block_bytes))
361
 
        self.assertEqual(gcb._content_length, content_len)
362
 
        expected_header =('gcb1z\n' # group compress block v1 zlib
363
 
                          '%d\n' # Length of compressed content
364
 
                          '%d\n' # Length of uncompressed content
365
 
                         ) % (gcb._z_content_length, gcb._content_length)
366
 
        # The first chunk should be the header chunk. It is small, fixed size,
367
 
        # and there is no compelling reason to split it up
368
 
        self.assertEqual(expected_header, block_chunks[0])
369
 
        self.assertStartsWith(block_bytes, expected_header)
370
 
        remaining_bytes = block_bytes[len(expected_header):]
371
 
        raw_bytes = zlib.decompress(remaining_bytes)
372
 
        self.assertEqual(content, raw_bytes)
373
 
 
374
 
    def test_to_bytes(self):
375
 
        content = ('this is some content\n'
376
 
                   'this content will be compressed\n')
377
 
        gcb = groupcompress.GroupCompressBlock()
378
 
        gcb.set_content(content)
379
 
        bytes = gcb.to_bytes()
380
 
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
381
 
        self.assertEqual(gcb._content_length, len(content))
382
 
        expected_header =('gcb1z\n' # group compress block v1 zlib
383
 
                          '%d\n' # Length of compressed content
384
 
                          '%d\n' # Length of uncompressed content
385
 
                         ) % (gcb._z_content_length, gcb._content_length)
386
 
        self.assertStartsWith(bytes, expected_header)
387
 
        remaining_bytes = bytes[len(expected_header):]
388
 
        raw_bytes = zlib.decompress(remaining_bytes)
389
 
        self.assertEqual(content, raw_bytes)
390
 
 
391
 
        # we should get the same results if using the chunked version
392
 
        gcb = groupcompress.GroupCompressBlock()
393
 
        gcb.set_chunked_content(['this is some content\n'
394
 
                                 'this content will be compressed\n'],
395
 
                                 len(content))
396
 
        old_bytes = bytes
397
 
        bytes = gcb.to_bytes()
398
 
        self.assertEqual(old_bytes, bytes)
399
 
 
400
 
    def test_partial_decomp(self):
401
 
        content_chunks = []
402
 
        # We need a sufficient amount of data so that zlib.decompress has
403
 
        # partial decompression to work with. Most auto-generated data
404
 
        # compresses a bit too well, we want a combination, so we combine a sha
405
 
        # hash with compressible data.
406
 
        for i in xrange(2048):
407
 
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
408
 
            content_chunks.append(next_content)
409
 
            next_sha1 = osutils.sha_string(next_content)
410
 
            content_chunks.append(next_sha1 + '\n')
411
 
        content = ''.join(content_chunks)
412
 
        self.assertEqual(158634, len(content))
413
 
        z_content = zlib.compress(content)
414
 
        self.assertEqual(57182, len(z_content))
415
 
        block = groupcompress.GroupCompressBlock()
416
 
        block._z_content_chunks = (z_content,)
417
 
        block._z_content_length = len(z_content)
418
 
        block._compressor_name = 'zlib'
419
 
        block._content_length = 158634
420
 
        self.assertIs(None, block._content)
421
 
        block._ensure_content(100)
422
 
        self.assertIsNot(None, block._content)
423
 
        # We have decompressed at least 100 bytes
424
 
        self.assertTrue(len(block._content) >= 100)
425
 
        # We have not decompressed the whole content
426
 
        self.assertTrue(len(block._content) < 158634)
427
 
        self.assertEqualDiff(content[:len(block._content)], block._content)
428
 
        # ensuring content that we already have shouldn't cause any more data
429
 
        # to be extracted
430
 
        cur_len = len(block._content)
431
 
        block._ensure_content(cur_len - 10)
432
 
        self.assertEqual(cur_len, len(block._content))
433
 
        # Now we want a bit more content
434
 
        cur_len += 10
435
 
        block._ensure_content(cur_len)
436
 
        self.assertTrue(len(block._content) >= cur_len)
437
 
        self.assertTrue(len(block._content) < 158634)
438
 
        self.assertEqualDiff(content[:len(block._content)], block._content)
439
 
        # And now lets finish
440
 
        block._ensure_content(158634)
441
 
        self.assertEqualDiff(content, block._content)
442
 
        # And the decompressor is finalized
443
 
        self.assertIs(None, block._z_content_decompressor)
444
 
 
445
 
    def test__ensure_all_content(self):
446
 
        content_chunks = []
447
 
        # We need a sufficient amount of data so that zlib.decompress has
448
 
        # partial decompression to work with. Most auto-generated data
449
 
        # compresses a bit too well, we want a combination, so we combine a sha
450
 
        # hash with compressible data.
451
 
        for i in xrange(2048):
452
 
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
453
 
            content_chunks.append(next_content)
454
 
            next_sha1 = osutils.sha_string(next_content)
455
 
            content_chunks.append(next_sha1 + '\n')
456
 
        content = ''.join(content_chunks)
457
 
        self.assertEqual(158634, len(content))
458
 
        z_content = zlib.compress(content)
459
 
        self.assertEqual(57182, len(z_content))
460
 
        block = groupcompress.GroupCompressBlock()
461
 
        block._z_content_chunks = (z_content,)
462
 
        block._z_content_length = len(z_content)
463
 
        block._compressor_name = 'zlib'
464
 
        block._content_length = 158634
465
 
        self.assertIs(None, block._content)
466
 
        # The first _ensure_content got all of the required data
467
 
        block._ensure_content(158634)
468
 
        self.assertEqualDiff(content, block._content)
469
 
        # And we should have released the _z_content_decompressor since it was
470
 
        # fully consumed
471
 
        self.assertIs(None, block._z_content_decompressor)
472
 
 
473
 
    def test__dump(self):
474
 
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
475
 
        key_to_text = {('1',): dup_content + '1 unique\n',
476
 
                       ('2',): dup_content + '2 extra special\n'}
477
 
        locs, block = self.make_block(key_to_text)
478
 
        self.assertEqual([('f', len(key_to_text[('1',)])),
479
 
                          ('d', 21, len(key_to_text[('2',)]),
480
 
                           [('c', 2, len(dup_content)),
481
 
                            ('i', len('2 extra special\n'), '')
482
 
                           ]),
483
 
                         ], block._dump())
484
 
 
485
 
 
486
 
class TestCaseWithGroupCompressVersionedFiles(
487
 
        tests.TestCaseWithMemoryTransport):
488
 
 
489
 
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
490
 
                     dir='.', inconsistency_fatal=True):
491
 
        t = self.get_transport(dir)
492
 
        t.ensure_base()
493
 
        vf = groupcompress.make_pack_factory(graph=create_graph,
494
 
            delta=False, keylength=keylength,
495
 
            inconsistency_fatal=inconsistency_fatal)(t)
496
 
        if do_cleanup:
497
 
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
498
 
        return vf
499
 
 
500
 
 
501
 
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
502
 
 
503
 
    def make_g_index(self, name, ref_lists=0, nodes=[]):
504
 
        builder = btree_index.BTreeBuilder(ref_lists)
505
 
        for node, references, value in nodes:
506
 
            builder.add_node(node, references, value)
507
 
        stream = builder.finish()
508
 
        trans = self.get_transport()
509
 
        size = trans.put_file(name, stream)
510
 
        return btree_index.BTreeGraphIndex(trans, name, size)
511
 
 
512
 
    def make_g_index_missing_parent(self):
513
 
        graph_index = self.make_g_index('missing_parent', 1,
514
 
            [(('parent', ), '2 78 2 10', ([],)),
515
 
             (('tip', ), '2 78 2 10',
516
 
              ([('parent', ), ('missing-parent', )],)),
517
 
              ])
518
 
        return graph_index
519
 
 
520
 
    def test_get_record_stream_as_requested(self):
521
 
        # Consider promoting 'as-requested' to general availability, and
522
 
        # make this a VF interface test
523
 
        vf = self.make_test_vf(False, dir='source')
524
 
        vf.add_lines(('a',), (), ['lines\n'])
525
 
        vf.add_lines(('b',), (), ['lines\n'])
526
 
        vf.add_lines(('c',), (), ['lines\n'])
527
 
        vf.add_lines(('d',), (), ['lines\n'])
528
 
        vf.writer.end()
529
 
        keys = [record.key for record in vf.get_record_stream(
530
 
                    [('a',), ('b',), ('c',), ('d',)],
531
 
                    'as-requested', False)]
532
 
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
533
 
        keys = [record.key for record in vf.get_record_stream(
534
 
                    [('b',), ('a',), ('d',), ('c',)],
535
 
                    'as-requested', False)]
536
 
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
537
 
 
538
 
        # It should work even after being repacked into another VF
539
 
        vf2 = self.make_test_vf(False, dir='target')
540
 
        vf2.insert_record_stream(vf.get_record_stream(
541
 
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
542
 
        vf2.writer.end()
543
 
 
544
 
        keys = [record.key for record in vf2.get_record_stream(
545
 
                    [('a',), ('b',), ('c',), ('d',)],
546
 
                    'as-requested', False)]
547
 
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
548
 
        keys = [record.key for record in vf2.get_record_stream(
549
 
                    [('b',), ('a',), ('d',), ('c',)],
550
 
                    'as-requested', False)]
551
 
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
552
 
 
553
 
    def test_insert_record_stream_reuses_blocks(self):
554
 
        vf = self.make_test_vf(True, dir='source')
555
 
        def grouped_stream(revision_ids, first_parents=()):
556
 
            parents = first_parents
557
 
            for revision_id in revision_ids:
558
 
                key = (revision_id,)
559
 
                record = versionedfile.FulltextContentFactory(
560
 
                    key, parents, None,
561
 
                    'some content that is\n'
562
 
                    'identical except for\n'
563
 
                    'revision_id:%s\n' % (revision_id,))
564
 
                yield record
565
 
                parents = (key,)
566
 
        # One group, a-d
567
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
568
 
        # Second group, e-h
569
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
570
 
                                               first_parents=(('d',),)))
571
 
        block_bytes = {}
572
 
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
573
 
                                      'unordered', False)
574
 
        num_records = 0
575
 
        for record in stream:
576
 
            if record.key in [('a',), ('e',)]:
577
 
                self.assertEqual('groupcompress-block', record.storage_kind)
578
 
            else:
579
 
                self.assertEqual('groupcompress-block-ref',
580
 
                                 record.storage_kind)
581
 
            block_bytes[record.key] = record._manager._block._z_content
582
 
            num_records += 1
583
 
        self.assertEqual(8, num_records)
584
 
        for r in 'abcd':
585
 
            key = (r,)
586
 
            self.assertIs(block_bytes[key], block_bytes[('a',)])
587
 
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
588
 
        for r in 'efgh':
589
 
            key = (r,)
590
 
            self.assertIs(block_bytes[key], block_bytes[('e',)])
591
 
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
592
 
        # Now copy the blocks into another vf, and ensure that the blocks are
593
 
        # preserved without creating new entries
594
 
        vf2 = self.make_test_vf(True, dir='target')
595
 
        # ordering in 'groupcompress' order, should actually swap the groups in
596
 
        # the target vf, but the groups themselves should not be disturbed.
597
 
        def small_size_stream():
598
 
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
599
 
                                               'groupcompress', False):
600
 
                record._manager._full_enough_block_size = \
601
 
                    record._manager._block._content_length
602
 
                yield record
603
 
                        
604
 
        vf2.insert_record_stream(small_size_stream())
605
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
606
 
                                       'groupcompress', False)
607
 
        vf2.writer.end()
608
 
        num_records = 0
609
 
        for record in stream:
610
 
            num_records += 1
611
 
            self.assertEqual(block_bytes[record.key],
612
 
                             record._manager._block._z_content)
613
 
        self.assertEqual(8, num_records)
614
 
 
615
 
    def test_insert_record_stream_packs_on_the_fly(self):
616
 
        vf = self.make_test_vf(True, dir='source')
617
 
        def grouped_stream(revision_ids, first_parents=()):
618
 
            parents = first_parents
619
 
            for revision_id in revision_ids:
620
 
                key = (revision_id,)
621
 
                record = versionedfile.FulltextContentFactory(
622
 
                    key, parents, None,
623
 
                    'some content that is\n'
624
 
                    'identical except for\n'
625
 
                    'revision_id:%s\n' % (revision_id,))
626
 
                yield record
627
 
                parents = (key,)
628
 
        # One group, a-d
629
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
630
 
        # Second group, e-h
631
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
632
 
                                               first_parents=(('d',),)))
633
 
        # Now copy the blocks into another vf, and see that the
634
 
        # insert_record_stream rebuilt a new block on-the-fly because of
635
 
        # under-utilization
636
 
        vf2 = self.make_test_vf(True, dir='target')
637
 
        vf2.insert_record_stream(vf.get_record_stream(
638
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
639
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
640
 
                                       'groupcompress', False)
641
 
        vf2.writer.end()
642
 
        num_records = 0
643
 
        # All of the records should be recombined into a single block
644
 
        block = None
645
 
        for record in stream:
646
 
            num_records += 1
647
 
            if block is None:
648
 
                block = record._manager._block
649
 
            else:
650
 
                self.assertIs(block, record._manager._block)
651
 
        self.assertEqual(8, num_records)
652
 
 
653
 
    def test__insert_record_stream_no_reuse_block(self):
654
 
        vf = self.make_test_vf(True, dir='source')
655
 
        def grouped_stream(revision_ids, first_parents=()):
656
 
            parents = first_parents
657
 
            for revision_id in revision_ids:
658
 
                key = (revision_id,)
659
 
                record = versionedfile.FulltextContentFactory(
660
 
                    key, parents, None,
661
 
                    'some content that is\n'
662
 
                    'identical except for\n'
663
 
                    'revision_id:%s\n' % (revision_id,))
664
 
                yield record
665
 
                parents = (key,)
666
 
        # One group, a-d
667
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
668
 
        # Second group, e-h
669
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
670
 
                                               first_parents=(('d',),)))
671
 
        vf.writer.end()
672
 
        self.assertEqual(8, len(list(vf.get_record_stream(
673
 
                                        [(r,) for r in 'abcdefgh'],
674
 
                                        'unordered', False))))
675
 
        # Now copy the blocks into another vf, and ensure that the blocks are
676
 
        # preserved without creating new entries
677
 
        vf2 = self.make_test_vf(True, dir='target')
678
 
        # ordering in 'groupcompress' order, should actually swap the groups in
679
 
        # the target vf, but the groups themselves should not be disturbed.
680
 
        list(vf2._insert_record_stream(vf.get_record_stream(
681
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
682
 
            reuse_blocks=False))
683
 
        vf2.writer.end()
684
 
        # After inserting with reuse_blocks=False, we should have everything in
685
 
        # a single new block.
686
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
687
 
                                       'groupcompress', False)
688
 
        block = None
689
 
        for record in stream:
690
 
            if block is None:
691
 
                block = record._manager._block
692
 
            else:
693
 
                self.assertIs(block, record._manager._block)
694
 
 
695
 
    def test_add_missing_noncompression_parent_unvalidated_index(self):
696
 
        unvalidated = self.make_g_index_missing_parent()
697
 
        combined = _mod_index.CombinedGraphIndex([unvalidated])
698
 
        index = groupcompress._GCGraphIndex(combined,
699
 
            is_locked=lambda: True, parents=True,
700
 
            track_external_parent_refs=True)
701
 
        index.scan_unvalidated_index(unvalidated)
702
 
        self.assertEqual(
703
 
            frozenset([('missing-parent',)]), index.get_missing_parents())
704
 
 
705
 
    def test_track_external_parent_refs(self):
706
 
        g_index = self.make_g_index('empty', 1, [])
707
 
        mod_index = btree_index.BTreeBuilder(1, 1)
708
 
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
709
 
        index = groupcompress._GCGraphIndex(combined,
710
 
            is_locked=lambda: True, parents=True,
711
 
            add_callback=mod_index.add_nodes,
712
 
            track_external_parent_refs=True)
713
 
        index.add_records([
714
 
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
715
 
        self.assertEqual(
716
 
            frozenset([('parent-1',), ('parent-2',)]),
717
 
            index.get_missing_parents())
718
 
 
719
 
    def make_source_with_b(self, a_parent, path):
720
 
        source = self.make_test_vf(True, dir=path)
721
 
        source.add_lines(('a',), (), ['lines\n'])
722
 
        if a_parent:
723
 
            b_parents = (('a',),)
724
 
        else:
725
 
            b_parents = ()
726
 
        source.add_lines(('b',), b_parents, ['lines\n'])
727
 
        return source
728
 
 
729
 
    def do_inconsistent_inserts(self, inconsistency_fatal):
730
 
        target = self.make_test_vf(True, dir='target',
731
 
                                   inconsistency_fatal=inconsistency_fatal)
732
 
        for x in range(2):
733
 
            source = self.make_source_with_b(x==1, 'source%s' % x)
734
 
            target.insert_record_stream(source.get_record_stream(
735
 
                [('b',)], 'unordered', False))
736
 
 
737
 
    def test_inconsistent_redundant_inserts_warn(self):
738
 
        """Should not insert a record that is already present."""
739
 
        warnings = []
740
 
        def warning(template, args):
741
 
            warnings.append(template % args)
742
 
        _trace_warning = trace.warning
743
 
        trace.warning = warning
744
 
        try:
745
 
            self.do_inconsistent_inserts(inconsistency_fatal=False)
746
 
        finally:
747
 
            trace.warning = _trace_warning
748
 
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
749
 
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
750
 
                         warnings)
751
 
 
752
 
    def test_inconsistent_redundant_inserts_raises(self):
753
 
        e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
754
 
                              inconsistency_fatal=True)
755
 
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
756
 
                              " in add_records:"
757
 
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
758
 
                              " 0 8', \(\(\('a',\),\),\)\)")
759
 
 
760
 
    def test_clear_cache(self):
761
 
        vf = self.make_source_with_b(True, 'source')
762
 
        vf.writer.end()
763
 
        for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
764
 
                                           True):
765
 
            pass
766
 
        self.assertTrue(len(vf._group_cache) > 0)
767
 
        vf.clear_cache()
768
 
        self.assertEqual(0, len(vf._group_cache))
769
 
 
770
 
 
771
 
 
772
 
class StubGCVF(object):
773
 
    def __init__(self, canned_get_blocks=None):
774
 
        self._group_cache = {}
775
 
        self._canned_get_blocks = canned_get_blocks or []
776
 
    def _get_blocks(self, read_memos):
777
 
        return iter(self._canned_get_blocks)
778
 
    
779
 
 
780
 
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
781
 
    """Simple whitebox unit tests for _BatchingBlockFetcher."""
782
 
    
783
 
    def test_add_key_new_read_memo(self):
784
 
        """Adding a key with an uncached read_memo new to this batch adds that
785
 
        read_memo to the list of memos to fetch.
786
 
        """
787
 
        # locations are: index_memo, ignored, parents, ignored
788
 
        # where index_memo is: (idx, offset, len, factory_start, factory_end)
789
 
        # and (idx, offset, size) is known as the 'read_memo', identifying the
790
 
        # raw bytes needed.
791
 
        read_memo = ('fake index', 100, 50)
792
 
        locations = {
793
 
            ('key',): (read_memo + (None, None), None, None, None)}
794
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
795
 
        total_size = batcher.add_key(('key',))
796
 
        self.assertEqual(50, total_size)
797
 
        self.assertEqual([('key',)], batcher.keys)
798
 
        self.assertEqual([read_memo], batcher.memos_to_get)
799
 
 
800
 
    def test_add_key_duplicate_read_memo(self):
801
 
        """read_memos that occur multiple times in a batch will only be fetched
802
 
        once.
803
 
        """
804
 
        read_memo = ('fake index', 100, 50)
805
 
        # Two keys, both sharing the same read memo (but different overall
806
 
        # index_memos).
807
 
        locations = {
808
 
            ('key1',): (read_memo + (0, 1), None, None, None),
809
 
            ('key2',): (read_memo + (1, 2), None, None, None)}
810
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
811
 
        total_size = batcher.add_key(('key1',))
812
 
        total_size = batcher.add_key(('key2',))
813
 
        self.assertEqual(50, total_size)
814
 
        self.assertEqual([('key1',), ('key2',)], batcher.keys)
815
 
        self.assertEqual([read_memo], batcher.memos_to_get)
816
 
 
817
 
    def test_add_key_cached_read_memo(self):
818
 
        """Adding a key with a cached read_memo will not cause that read_memo
819
 
        to be added to the list to fetch.
820
 
        """
821
 
        read_memo = ('fake index', 100, 50)
822
 
        gcvf = StubGCVF()
823
 
        gcvf._group_cache[read_memo] = 'fake block'
824
 
        locations = {
825
 
            ('key',): (read_memo + (None, None), None, None, None)}
826
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
827
 
        total_size = batcher.add_key(('key',))
828
 
        self.assertEqual(0, total_size)
829
 
        self.assertEqual([('key',)], batcher.keys)
830
 
        self.assertEqual([], batcher.memos_to_get)
831
 
 
832
 
    def test_yield_factories_empty(self):
833
 
        """An empty batch yields no factories."""
834
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
835
 
        self.assertEqual([], list(batcher.yield_factories()))
836
 
 
837
 
    def test_yield_factories_calls_get_blocks(self):
838
 
        """Uncached memos are retrieved via get_blocks."""
839
 
        read_memo1 = ('fake index', 100, 50)
840
 
        read_memo2 = ('fake index', 150, 40)
841
 
        gcvf = StubGCVF(
842
 
            canned_get_blocks=[
843
 
                (read_memo1, groupcompress.GroupCompressBlock()),
844
 
                (read_memo2, groupcompress.GroupCompressBlock())])
845
 
        locations = {
846
 
            ('key1',): (read_memo1 + (None, None), None, None, None),
847
 
            ('key2',): (read_memo2 + (None, None), None, None, None)}
848
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
849
 
        batcher.add_key(('key1',))
850
 
        batcher.add_key(('key2',))
851
 
        factories = list(batcher.yield_factories(full_flush=True))
852
 
        self.assertLength(2, factories)
853
 
        keys = [f.key for f in factories]
854
 
        kinds = [f.storage_kind for f in factories]
855
 
        self.assertEqual([('key1',), ('key2',)], keys)
856
 
        self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
857
 
 
858
 
    def test_yield_factories_flushing(self):
859
 
        """yield_factories holds back on yielding results from the final block
860
 
        unless passed full_flush=True.
861
 
        """
862
 
        fake_block = groupcompress.GroupCompressBlock()
863
 
        read_memo = ('fake index', 100, 50)
864
 
        gcvf = StubGCVF()
865
 
        gcvf._group_cache[read_memo] = fake_block
866
 
        locations = {
867
 
            ('key',): (read_memo + (None, None), None, None, None)}
868
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
869
 
        batcher.add_key(('key',))
870
 
        self.assertEqual([], list(batcher.yield_factories()))
871
 
        factories = list(batcher.yield_factories(full_flush=True))
872
 
        self.assertLength(1, factories)
873
 
        self.assertEqual(('key',), factories[0].key)
874
 
        self.assertEqual('groupcompress-block', factories[0].storage_kind)
875
 
 
876
 
 
877
 
class TestLazyGroupCompress(tests.TestCaseWithTransport):
878
 
 
879
 
    _texts = {
880
 
        ('key1',): "this is a text\n"
881
 
                   "with a reasonable amount of compressible bytes\n"
882
 
                   "which can be shared between various other texts\n",
883
 
        ('key2',): "another text\n"
884
 
                   "with a reasonable amount of compressible bytes\n"
885
 
                   "which can be shared between various other texts\n",
886
 
        ('key3',): "yet another text which won't be extracted\n"
887
 
                   "with a reasonable amount of compressible bytes\n"
888
 
                   "which can be shared between various other texts\n",
889
 
        ('key4',): "this will be extracted\n"
890
 
                   "but references most of its bytes from\n"
891
 
                   "yet another text which won't be extracted\n"
892
 
                   "with a reasonable amount of compressible bytes\n"
893
 
                   "which can be shared between various other texts\n",
894
 
    }
895
 
    def make_block(self, key_to_text):
896
 
        """Create a GroupCompressBlock, filling it with the given texts."""
897
 
        compressor = groupcompress.GroupCompressor()
898
 
        start = 0
899
 
        for key in sorted(key_to_text):
900
 
            compressor.compress(key, key_to_text[key], None)
901
 
        locs = dict((key, (start, end)) for key, (start, _, end, _)
902
 
                    in compressor.labels_deltas.iteritems())
903
 
        block = compressor.flush()
904
 
        raw_bytes = block.to_bytes()
905
 
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
906
 
 
907
 
    def add_key_to_manager(self, key, locations, block, manager):
908
 
        start, end = locations[key]
909
 
        manager.add_factory(key, (), start, end)
910
 
 
911
 
    def make_block_and_full_manager(self, texts):
912
 
        locations, block = self.make_block(texts)
913
 
        manager = groupcompress._LazyGroupContentManager(block)
914
 
        for key in sorted(texts):
915
 
            self.add_key_to_manager(key, locations, block, manager)
916
 
        return block, manager
917
 
 
918
 
    def test_get_fulltexts(self):
919
 
        locations, block = self.make_block(self._texts)
920
 
        manager = groupcompress._LazyGroupContentManager(block)
921
 
        self.add_key_to_manager(('key1',), locations, block, manager)
922
 
        self.add_key_to_manager(('key2',), locations, block, manager)
923
 
        result_order = []
924
 
        for record in manager.get_record_stream():
925
 
            result_order.append(record.key)
926
 
            text = self._texts[record.key]
927
 
            self.assertEqual(text, record.get_bytes_as('fulltext'))
928
 
        self.assertEqual([('key1',), ('key2',)], result_order)
929
 
 
930
 
        # If we build the manager in the opposite order, we should get them
931
 
        # back in the opposite order
932
 
        manager = groupcompress._LazyGroupContentManager(block)
933
 
        self.add_key_to_manager(('key2',), locations, block, manager)
934
 
        self.add_key_to_manager(('key1',), locations, block, manager)
935
 
        result_order = []
936
 
        for record in manager.get_record_stream():
937
 
            result_order.append(record.key)
938
 
            text = self._texts[record.key]
939
 
            self.assertEqual(text, record.get_bytes_as('fulltext'))
940
 
        self.assertEqual([('key2',), ('key1',)], result_order)
941
 
 
942
 
    def test__wire_bytes_no_keys(self):
943
 
        locations, block = self.make_block(self._texts)
944
 
        manager = groupcompress._LazyGroupContentManager(block)
945
 
        wire_bytes = manager._wire_bytes()
946
 
        block_length = len(block.to_bytes())
947
 
        # We should have triggered a strip, since we aren't using any content
948
 
        stripped_block = manager._block.to_bytes()
949
 
        self.assertTrue(block_length > len(stripped_block))
950
 
        empty_z_header = zlib.compress('')
951
 
        self.assertEqual('groupcompress-block\n'
952
 
                         '8\n' # len(compress(''))
953
 
                         '0\n' # len('')
954
 
                         '%d\n'# compressed block len
955
 
                         '%s'  # zheader
956
 
                         '%s'  # block
957
 
                         % (len(stripped_block), empty_z_header,
958
 
                            stripped_block),
959
 
                         wire_bytes)
960
 
 
961
 
    def test__wire_bytes(self):
962
 
        locations, block = self.make_block(self._texts)
963
 
        manager = groupcompress._LazyGroupContentManager(block)
964
 
        self.add_key_to_manager(('key1',), locations, block, manager)
965
 
        self.add_key_to_manager(('key4',), locations, block, manager)
966
 
        block_bytes = block.to_bytes()
967
 
        wire_bytes = manager._wire_bytes()
968
 
        (storage_kind, z_header_len, header_len,
969
 
         block_len, rest) = wire_bytes.split('\n', 4)
970
 
        z_header_len = int(z_header_len)
971
 
        header_len = int(header_len)
972
 
        block_len = int(block_len)
973
 
        self.assertEqual('groupcompress-block', storage_kind)
974
 
        self.assertEqual(34, z_header_len)
975
 
        self.assertEqual(26, header_len)
976
 
        self.assertEqual(len(block_bytes), block_len)
977
 
        z_header = rest[:z_header_len]
978
 
        header = zlib.decompress(z_header)
979
 
        self.assertEqual(header_len, len(header))
980
 
        entry1 = locations[('key1',)]
981
 
        entry4 = locations[('key4',)]
982
 
        self.assertEqualDiff('key1\n'
983
 
                             '\n'  # no parents
984
 
                             '%d\n' # start offset
985
 
                             '%d\n' # end offset
986
 
                             'key4\n'
987
 
                             '\n'
988
 
                             '%d\n'
989
 
                             '%d\n'
990
 
                             % (entry1[0], entry1[1],
991
 
                                entry4[0], entry4[1]),
992
 
                            header)
993
 
        z_block = rest[z_header_len:]
994
 
        self.assertEqual(block_bytes, z_block)
995
 
 
996
 
    def test_from_bytes(self):
997
 
        locations, block = self.make_block(self._texts)
998
 
        manager = groupcompress._LazyGroupContentManager(block)
999
 
        self.add_key_to_manager(('key1',), locations, block, manager)
1000
 
        self.add_key_to_manager(('key4',), locations, block, manager)
1001
 
        wire_bytes = manager._wire_bytes()
1002
 
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
1003
 
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
1004
 
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
1005
 
        self.assertEqual(2, len(manager._factories))
1006
 
        self.assertEqual(block._z_content, manager._block._z_content)
1007
 
        result_order = []
1008
 
        for record in manager.get_record_stream():
1009
 
            result_order.append(record.key)
1010
 
            text = self._texts[record.key]
1011
 
            self.assertEqual(text, record.get_bytes_as('fulltext'))
1012
 
        self.assertEqual([('key1',), ('key4',)], result_order)
1013
 
 
1014
 
    def test__check_rebuild_no_changes(self):
1015
 
        block, manager = self.make_block_and_full_manager(self._texts)
1016
 
        manager._check_rebuild_block()
1017
 
        self.assertIs(block, manager._block)
1018
 
 
1019
 
    def test__check_rebuild_only_one(self):
1020
 
        locations, block = self.make_block(self._texts)
1021
 
        manager = groupcompress._LazyGroupContentManager(block)
1022
 
        # Request just the first key, which should trigger a 'strip' action
1023
 
        self.add_key_to_manager(('key1',), locations, block, manager)
1024
 
        manager._check_rebuild_block()
1025
 
        self.assertIsNot(block, manager._block)
1026
 
        self.assertTrue(block._content_length > manager._block._content_length)
1027
 
        # We should be able to still get the content out of this block, though
1028
 
        # it should only have 1 entry
1029
 
        for record in manager.get_record_stream():
1030
 
            self.assertEqual(('key1',), record.key)
1031
 
            self.assertEqual(self._texts[record.key],
1032
 
                             record.get_bytes_as('fulltext'))
1033
 
 
1034
 
    def test__check_rebuild_middle(self):
1035
 
        locations, block = self.make_block(self._texts)
1036
 
        manager = groupcompress._LazyGroupContentManager(block)
1037
 
        # Request a small key in the middle should trigger a 'rebuild'
1038
 
        self.add_key_to_manager(('key4',), locations, block, manager)
1039
 
        manager._check_rebuild_block()
1040
 
        self.assertIsNot(block, manager._block)
1041
 
        self.assertTrue(block._content_length > manager._block._content_length)
1042
 
        for record in manager.get_record_stream():
1043
 
            self.assertEqual(('key4',), record.key)
1044
 
            self.assertEqual(self._texts[record.key],
1045
 
                             record.get_bytes_as('fulltext'))
1046
 
 
1047
 
    def test_check_is_well_utilized_all_keys(self):
1048
 
        block, manager = self.make_block_and_full_manager(self._texts)
1049
 
        self.assertFalse(manager.check_is_well_utilized())
1050
 
        # Though we can fake it by changing the recommended minimum size
1051
 
        manager._full_enough_block_size = block._content_length
1052
 
        self.assertTrue(manager.check_is_well_utilized())
1053
 
        # Setting it just above causes it to fail
1054
 
        manager._full_enough_block_size = block._content_length + 1
1055
 
        self.assertFalse(manager.check_is_well_utilized())
1056
 
        # Setting the mixed-block size doesn't do anything, because the content
1057
 
        # is considered to not be 'mixed'
1058
 
        manager._full_enough_mixed_block_size = block._content_length
1059
 
        self.assertFalse(manager.check_is_well_utilized())
1060
 
 
1061
 
    def test_check_is_well_utilized_mixed_keys(self):
1062
 
        texts = {}
1063
 
        f1k1 = ('f1', 'k1')
1064
 
        f1k2 = ('f1', 'k2')
1065
 
        f2k1 = ('f2', 'k1')
1066
 
        f2k2 = ('f2', 'k2')
1067
 
        texts[f1k1] = self._texts[('key1',)]
1068
 
        texts[f1k2] = self._texts[('key2',)]
1069
 
        texts[f2k1] = self._texts[('key3',)]
1070
 
        texts[f2k2] = self._texts[('key4',)]
1071
 
        block, manager = self.make_block_and_full_manager(texts)
1072
 
        self.assertFalse(manager.check_is_well_utilized())
1073
 
        manager._full_enough_block_size = block._content_length
1074
 
        self.assertTrue(manager.check_is_well_utilized())
1075
 
        manager._full_enough_block_size = block._content_length + 1
1076
 
        self.assertFalse(manager.check_is_well_utilized())
1077
 
        manager._full_enough_mixed_block_size = block._content_length
1078
 
        self.assertTrue(manager.check_is_well_utilized())
1079
 
 
1080
 
    def test_check_is_well_utilized_partial_use(self):
1081
 
        locations, block = self.make_block(self._texts)
1082
 
        manager = groupcompress._LazyGroupContentManager(block)
1083
 
        manager._full_enough_block_size = block._content_length
1084
 
        self.add_key_to_manager(('key1',), locations, block, manager)
1085
 
        self.add_key_to_manager(('key2',), locations, block, manager)
1086
 
        # Just using the content from key1 and 2 is not enough to be considered
1087
 
        # 'complete'
1088
 
        self.assertFalse(manager.check_is_well_utilized())
1089
 
        # However if we add key3, then we have enough, as we only require 75%
1090
 
        # consumption
1091
 
        self.add_key_to_manager(('key4',), locations, block, manager)
1092
 
        self.assertTrue(manager.check_is_well_utilized())
1093
 
 
1094
 
 
1095
 
class Test_GCBuildDetails(tests.TestCase):
1096
 
 
1097
 
    def test_acts_like_tuple(self):
1098
 
        # _GCBuildDetails inlines some of the data that used to be spread out
1099
 
        # across a bunch of tuples
1100
 
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1101
 
            ('INDEX', 10, 20, 0, 5))
1102
 
        self.assertEqual(4, len(bd))
1103
 
        self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
1104
 
        self.assertEqual(None, bd[1]) # Compression Parent is always None
1105
 
        self.assertEqual((('parent1',), ('parent2',)), bd[2])
1106
 
        self.assertEqual(('group', None), bd[3]) # Record details
1107
 
 
1108
 
    def test__repr__(self):
1109
 
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1110
 
            ('INDEX', 10, 20, 0, 5))
1111
 
        self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
1112
 
                         " (('parent1',), ('parent2',)))",
1113
 
                         repr(bd))
1114