~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

  • Committer: Vincent Ladeuil
  • Date: 2010-10-26 08:08:23 UTC
  • mfrom: (5514.1.1 665100-content-type)
  • mto: This revision was merged to the branch mainline in revision 5516.
  • Revision ID: v.ladeuil+lp@free.fr-20101026080823-3wggo03b7cpn9908
Correctly set the Content-Type header when POSTing http requests

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# Copyright (C) 2008, 2009, 2010 Canonical Ltd
 
2
#
 
3
# This program is free software; you can redistribute it and/or modify
 
4
# it under the terms of the GNU General Public License as published by
 
5
# the Free Software Foundation; either version 2 of the License, or
 
6
# (at your option) any later version.
 
7
#
 
8
# This program is distributed in the hope that it will be useful,
 
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
11
# GNU General Public License for more details.
 
12
#
 
13
# You should have received a copy of the GNU General Public License
 
14
# along with this program; if not, write to the Free Software
 
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 
16
 
 
17
"""Tests for group compression."""
 
18
 
 
19
import zlib
 
20
 
 
21
from bzrlib import (
 
22
    btree_index,
 
23
    groupcompress,
 
24
    errors,
 
25
    index as _mod_index,
 
26
    osutils,
 
27
    tests,
 
28
    trace,
 
29
    versionedfile,
 
30
    )
 
31
from bzrlib.osutils import sha_string
 
32
from bzrlib.tests.test__groupcompress import compiled_groupcompress_feature
 
33
 
 
34
 
 
35
def load_tests(standard_tests, module, loader):
 
36
    """Parameterize tests for all versions of groupcompress."""
 
37
    to_adapt, result = tests.split_suite_by_condition(
 
38
        standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
 
39
    scenarios = [
 
40
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
 
41
        ]
 
42
    if compiled_groupcompress_feature.available():
 
43
        scenarios.append(('C',
 
44
            {'compressor': groupcompress.PyrexGroupCompressor}))
 
45
    return tests.multiply_tests(to_adapt, scenarios, result)
 
46
 
 
47
 
 
48
class TestGroupCompressor(tests.TestCase):
 
49
 
 
50
    def _chunks_to_repr_lines(self, chunks):
 
51
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
 
52
 
 
53
    def assertEqualDiffEncoded(self, expected, actual):
 
54
        """Compare the actual content to the expected content.
 
55
 
 
56
        :param expected: A group of chunks that we expect to see
 
57
        :param actual: The measured 'chunks'
 
58
 
 
59
        We will transform the chunks back into lines, and then run 'repr()'
 
60
        over them to handle non-ascii characters.
 
61
        """
 
62
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
 
63
                             self._chunks_to_repr_lines(actual))
 
64
 
 
65
 
 
66
class TestAllGroupCompressors(TestGroupCompressor):
 
67
    """Tests for GroupCompressor"""
 
68
 
 
69
    compressor = None # Set by multiply_tests
 
70
 
 
71
    def test_empty_delta(self):
 
72
        compressor = self.compressor()
 
73
        self.assertEqual([], compressor.chunks)
 
74
 
 
75
    def test_one_nosha_delta(self):
 
76
        # diff against NUKK
 
77
        compressor = self.compressor()
 
78
        sha1, start_point, end_point, _ = compressor.compress(('label',),
 
79
            'strange\ncommon\n', None)
 
80
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
 
81
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
 
82
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
 
83
        self.assertEqual(0, start_point)
 
84
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
85
 
 
86
    def test_empty_content(self):
 
87
        compressor = self.compressor()
 
88
        # Adding empty bytes should return the 'null' record
 
89
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
 
90
                                                                 '', None)
 
91
        self.assertEqual(0, start_point)
 
92
        self.assertEqual(0, end_point)
 
93
        self.assertEqual('fulltext', kind)
 
94
        self.assertEqual(groupcompress._null_sha1, sha1)
 
95
        self.assertEqual(0, compressor.endpoint)
 
96
        self.assertEqual([], compressor.chunks)
 
97
        # Even after adding some content
 
98
        compressor.compress(('content',), 'some\nbytes\n', None)
 
99
        self.assertTrue(compressor.endpoint > 0)
 
100
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
 
101
                                                                 '', None)
 
102
        self.assertEqual(0, start_point)
 
103
        self.assertEqual(0, end_point)
 
104
        self.assertEqual('fulltext', kind)
 
105
        self.assertEqual(groupcompress._null_sha1, sha1)
 
106
 
 
107
    def test_extract_from_compressor(self):
 
108
        # Knit fetching will try to reconstruct texts locally which results in
 
109
        # reading something that is in the compressor stream already.
 
110
        compressor = self.compressor()
 
111
        sha1_1, _, _, _ = compressor.compress(('label',),
 
112
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
113
        expected_lines = list(compressor.chunks)
 
114
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
 
115
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
116
        # get the first out
 
117
        self.assertEqual(('strange\ncommon long line\n'
 
118
                          'that needs a 16 byte match\n', sha1_1),
 
119
                         compressor.extract(('label',)))
 
120
        # and the second
 
121
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
 
122
                          'different\n', sha1_2),
 
123
                         compressor.extract(('newlabel',)))
 
124
 
 
125
    def test_pop_last(self):
 
126
        compressor = self.compressor()
 
127
        _, _, _, _ = compressor.compress(('key1',),
 
128
            'some text\nfor the first entry\n', None)
 
129
        expected_lines = list(compressor.chunks)
 
130
        _, _, _, _ = compressor.compress(('key2',),
 
131
            'some text\nfor the second entry\n', None)
 
132
        compressor.pop_last()
 
133
        self.assertEqual(expected_lines, compressor.chunks)
 
134
 
 
135
 
 
136
class TestPyrexGroupCompressor(TestGroupCompressor):
 
137
 
 
138
    _test_needs_features = [compiled_groupcompress_feature]
 
139
    compressor = groupcompress.PyrexGroupCompressor
 
140
 
 
141
    def test_stats(self):
 
142
        compressor = self.compressor()
 
143
        compressor.compress(('label',),
 
144
                            'strange\n'
 
145
                            'common very very long line\n'
 
146
                            'plus more text\n', None)
 
147
        compressor.compress(('newlabel',),
 
148
                            'common very very long line\n'
 
149
                            'plus more text\n'
 
150
                            'different\n'
 
151
                            'moredifferent\n', None)
 
152
        compressor.compress(('label3',),
 
153
                            'new\n'
 
154
                            'common very very long line\n'
 
155
                            'plus more text\n'
 
156
                            'different\n'
 
157
                            'moredifferent\n', None)
 
158
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
 
159
 
 
160
    def test_two_nosha_delta(self):
 
161
        compressor = self.compressor()
 
162
        sha1_1, _, _, _ = compressor.compress(('label',),
 
163
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
164
        expected_lines = list(compressor.chunks)
 
165
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
 
166
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
167
        self.assertEqual(sha_string('common long line\n'
 
168
                                    'that needs a 16 byte match\n'
 
169
                                    'different\n'), sha1_2)
 
170
        expected_lines.extend([
 
171
            # 'delta', delta length
 
172
            'd\x0f',
 
173
            # source and target length
 
174
            '\x36',
 
175
            # copy the line common
 
176
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
177
            # add the line different, and the trailing newline
 
178
            '\x0adifferent\n', # insert 10 bytes
 
179
            ])
 
180
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
181
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
182
 
 
183
    def test_three_nosha_delta(self):
 
184
        # The first interesting test: make a change that should use lines from
 
185
        # both parents.
 
186
        compressor = self.compressor()
 
187
        sha1_1, _, _, _ = compressor.compress(('label',),
 
188
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
189
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
 
190
            'different\nmoredifferent\nand then some more\n', None)
 
191
        expected_lines = list(compressor.chunks)
 
192
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
 
193
            'new\ncommon very very long line\nwith some extra text\n'
 
194
            'different\nmoredifferent\nand then some more\n',
 
195
            None)
 
196
        self.assertEqual(
 
197
            sha_string('new\ncommon very very long line\nwith some extra text\n'
 
198
                       'different\nmoredifferent\nand then some more\n'),
 
199
            sha1_3)
 
200
        expected_lines.extend([
 
201
            # 'delta', delta length
 
202
            'd\x0b',
 
203
            # source and target length
 
204
            '\x5f'
 
205
            # insert new
 
206
            '\x03new',
 
207
            # Copy of first parent 'common' range
 
208
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
 
209
            # Copy of second parent 'different' range
 
210
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
211
            ])
 
212
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
213
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
214
 
 
215
 
 
216
class TestPythonGroupCompressor(TestGroupCompressor):
 
217
 
 
218
    compressor = groupcompress.PythonGroupCompressor
 
219
 
 
220
    def test_stats(self):
 
221
        compressor = self.compressor()
 
222
        compressor.compress(('label',),
 
223
                            'strange\n'
 
224
                            'common very very long line\n'
 
225
                            'plus more text\n', None)
 
226
        compressor.compress(('newlabel',),
 
227
                            'common very very long line\n'
 
228
                            'plus more text\n'
 
229
                            'different\n'
 
230
                            'moredifferent\n', None)
 
231
        compressor.compress(('label3',),
 
232
                            'new\n'
 
233
                            'common very very long line\n'
 
234
                            'plus more text\n'
 
235
                            'different\n'
 
236
                            'moredifferent\n', None)
 
237
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
 
238
 
 
239
    def test_two_nosha_delta(self):
 
240
        compressor = self.compressor()
 
241
        sha1_1, _, _, _ = compressor.compress(('label',),
 
242
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
243
        expected_lines = list(compressor.chunks)
 
244
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
 
245
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
246
        self.assertEqual(sha_string('common long line\n'
 
247
                                    'that needs a 16 byte match\n'
 
248
                                    'different\n'), sha1_2)
 
249
        expected_lines.extend([
 
250
            # 'delta', delta length
 
251
            'd\x0f',
 
252
            # target length
 
253
            '\x36',
 
254
            # copy the line common
 
255
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
256
            # add the line different, and the trailing newline
 
257
            '\x0adifferent\n', # insert 10 bytes
 
258
            ])
 
259
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
260
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
261
 
 
262
    def test_three_nosha_delta(self):
 
263
        # The first interesting test: make a change that should use lines from
 
264
        # both parents.
 
265
        compressor = self.compressor()
 
266
        sha1_1, _, _, _ = compressor.compress(('label',),
 
267
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
268
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
 
269
            'different\nmoredifferent\nand then some more\n', None)
 
270
        expected_lines = list(compressor.chunks)
 
271
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
 
272
            'new\ncommon very very long line\nwith some extra text\n'
 
273
            'different\nmoredifferent\nand then some more\n',
 
274
            None)
 
275
        self.assertEqual(
 
276
            sha_string('new\ncommon very very long line\nwith some extra text\n'
 
277
                       'different\nmoredifferent\nand then some more\n'),
 
278
            sha1_3)
 
279
        expected_lines.extend([
 
280
            # 'delta', delta length
 
281
            'd\x0c',
 
282
            # target length
 
283
            '\x5f'
 
284
            # insert new
 
285
            '\x04new\n',
 
286
            # Copy of first parent 'common' range
 
287
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
 
288
            # Copy of second parent 'different' range
 
289
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
290
            ])
 
291
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
292
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
293
 
 
294
 
 
295
class TestGroupCompressBlock(tests.TestCase):
 
296
 
 
297
    def make_block(self, key_to_text):
 
298
        """Create a GroupCompressBlock, filling it with the given texts."""
 
299
        compressor = groupcompress.GroupCompressor()
 
300
        start = 0
 
301
        for key in sorted(key_to_text):
 
302
            compressor.compress(key, key_to_text[key], None)
 
303
        locs = dict((key, (start, end)) for key, (start, _, end, _)
 
304
                    in compressor.labels_deltas.iteritems())
 
305
        block = compressor.flush()
 
306
        raw_bytes = block.to_bytes()
 
307
        # Go through from_bytes(to_bytes()) so that we start with a compressed
 
308
        # content object
 
309
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
 
310
 
 
311
    def test_from_empty_bytes(self):
 
312
        self.assertRaises(ValueError,
 
313
                          groupcompress.GroupCompressBlock.from_bytes, '')
 
314
 
 
315
    def test_from_minimal_bytes(self):
 
316
        block = groupcompress.GroupCompressBlock.from_bytes(
 
317
            'gcb1z\n0\n0\n')
 
318
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
 
319
        self.assertIs(None, block._content)
 
320
        self.assertEqual('', block._z_content)
 
321
        block._ensure_content()
 
322
        self.assertEqual('', block._content)
 
323
        self.assertEqual('', block._z_content)
 
324
        block._ensure_content() # Ensure content is safe to call 2x
 
325
 
 
326
    def test_from_invalid(self):
 
327
        self.assertRaises(ValueError,
 
328
                          groupcompress.GroupCompressBlock.from_bytes,
 
329
                          'this is not a valid header')
 
330
 
 
331
    def test_from_bytes(self):
 
332
        content = ('a tiny bit of content\n')
 
333
        z_content = zlib.compress(content)
 
334
        z_bytes = (
 
335
            'gcb1z\n' # group compress block v1 plain
 
336
            '%d\n' # Length of compressed content
 
337
            '%d\n' # Length of uncompressed content
 
338
            '%s'   # Compressed content
 
339
            ) % (len(z_content), len(content), z_content)
 
340
        block = groupcompress.GroupCompressBlock.from_bytes(
 
341
            z_bytes)
 
342
        self.assertEqual(z_content, block._z_content)
 
343
        self.assertIs(None, block._content)
 
344
        self.assertEqual(len(z_content), block._z_content_length)
 
345
        self.assertEqual(len(content), block._content_length)
 
346
        block._ensure_content()
 
347
        self.assertEqual(z_content, block._z_content)
 
348
        self.assertEqual(content, block._content)
 
349
 
 
350
    def test_to_chunks(self):
 
351
        content_chunks = ['this is some content\n',
 
352
                          'this content will be compressed\n']
 
353
        content_len = sum(map(len, content_chunks))
 
354
        content = ''.join(content_chunks)
 
355
        gcb = groupcompress.GroupCompressBlock()
 
356
        gcb.set_chunked_content(content_chunks, content_len)
 
357
        total_len, block_chunks = gcb.to_chunks()
 
358
        block_bytes = ''.join(block_chunks)
 
359
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
 
360
        self.assertEqual(total_len, len(block_bytes))
 
361
        self.assertEqual(gcb._content_length, content_len)
 
362
        expected_header =('gcb1z\n' # group compress block v1 zlib
 
363
                          '%d\n' # Length of compressed content
 
364
                          '%d\n' # Length of uncompressed content
 
365
                         ) % (gcb._z_content_length, gcb._content_length)
 
366
        # The first chunk should be the header chunk. It is small, fixed size,
 
367
        # and there is no compelling reason to split it up
 
368
        self.assertEqual(expected_header, block_chunks[0])
 
369
        self.assertStartsWith(block_bytes, expected_header)
 
370
        remaining_bytes = block_bytes[len(expected_header):]
 
371
        raw_bytes = zlib.decompress(remaining_bytes)
 
372
        self.assertEqual(content, raw_bytes)
 
373
 
 
374
    def test_to_bytes(self):
 
375
        content = ('this is some content\n'
 
376
                   'this content will be compressed\n')
 
377
        gcb = groupcompress.GroupCompressBlock()
 
378
        gcb.set_content(content)
 
379
        bytes = gcb.to_bytes()
 
380
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
 
381
        self.assertEqual(gcb._content_length, len(content))
 
382
        expected_header =('gcb1z\n' # group compress block v1 zlib
 
383
                          '%d\n' # Length of compressed content
 
384
                          '%d\n' # Length of uncompressed content
 
385
                         ) % (gcb._z_content_length, gcb._content_length)
 
386
        self.assertStartsWith(bytes, expected_header)
 
387
        remaining_bytes = bytes[len(expected_header):]
 
388
        raw_bytes = zlib.decompress(remaining_bytes)
 
389
        self.assertEqual(content, raw_bytes)
 
390
 
 
391
        # we should get the same results if using the chunked version
 
392
        gcb = groupcompress.GroupCompressBlock()
 
393
        gcb.set_chunked_content(['this is some content\n'
 
394
                                 'this content will be compressed\n'],
 
395
                                 len(content))
 
396
        old_bytes = bytes
 
397
        bytes = gcb.to_bytes()
 
398
        self.assertEqual(old_bytes, bytes)
 
399
 
 
400
    def test_partial_decomp(self):
 
401
        content_chunks = []
 
402
        # We need a sufficient amount of data so that zlib.decompress has
 
403
        # partial decompression to work with. Most auto-generated data
 
404
        # compresses a bit too well, we want a combination, so we combine a sha
 
405
        # hash with compressible data.
 
406
        for i in xrange(2048):
 
407
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
408
            content_chunks.append(next_content)
 
409
            next_sha1 = osutils.sha_string(next_content)
 
410
            content_chunks.append(next_sha1 + '\n')
 
411
        content = ''.join(content_chunks)
 
412
        self.assertEqual(158634, len(content))
 
413
        z_content = zlib.compress(content)
 
414
        self.assertEqual(57182, len(z_content))
 
415
        block = groupcompress.GroupCompressBlock()
 
416
        block._z_content_chunks = (z_content,)
 
417
        block._z_content_length = len(z_content)
 
418
        block._compressor_name = 'zlib'
 
419
        block._content_length = 158634
 
420
        self.assertIs(None, block._content)
 
421
        block._ensure_content(100)
 
422
        self.assertIsNot(None, block._content)
 
423
        # We have decompressed at least 100 bytes
 
424
        self.assertTrue(len(block._content) >= 100)
 
425
        # We have not decompressed the whole content
 
426
        self.assertTrue(len(block._content) < 158634)
 
427
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
428
        # ensuring content that we already have shouldn't cause any more data
 
429
        # to be extracted
 
430
        cur_len = len(block._content)
 
431
        block._ensure_content(cur_len - 10)
 
432
        self.assertEqual(cur_len, len(block._content))
 
433
        # Now we want a bit more content
 
434
        cur_len += 10
 
435
        block._ensure_content(cur_len)
 
436
        self.assertTrue(len(block._content) >= cur_len)
 
437
        self.assertTrue(len(block._content) < 158634)
 
438
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
439
        # And now lets finish
 
440
        block._ensure_content(158634)
 
441
        self.assertEqualDiff(content, block._content)
 
442
        # And the decompressor is finalized
 
443
        self.assertIs(None, block._z_content_decompressor)
 
444
 
 
445
    def test__ensure_all_content(self):
 
446
        content_chunks = []
 
447
        # We need a sufficient amount of data so that zlib.decompress has
 
448
        # partial decompression to work with. Most auto-generated data
 
449
        # compresses a bit too well, we want a combination, so we combine a sha
 
450
        # hash with compressible data.
 
451
        for i in xrange(2048):
 
452
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
453
            content_chunks.append(next_content)
 
454
            next_sha1 = osutils.sha_string(next_content)
 
455
            content_chunks.append(next_sha1 + '\n')
 
456
        content = ''.join(content_chunks)
 
457
        self.assertEqual(158634, len(content))
 
458
        z_content = zlib.compress(content)
 
459
        self.assertEqual(57182, len(z_content))
 
460
        block = groupcompress.GroupCompressBlock()
 
461
        block._z_content_chunks = (z_content,)
 
462
        block._z_content_length = len(z_content)
 
463
        block._compressor_name = 'zlib'
 
464
        block._content_length = 158634
 
465
        self.assertIs(None, block._content)
 
466
        # The first _ensure_content got all of the required data
 
467
        block._ensure_content(158634)
 
468
        self.assertEqualDiff(content, block._content)
 
469
        # And we should have released the _z_content_decompressor since it was
 
470
        # fully consumed
 
471
        self.assertIs(None, block._z_content_decompressor)
 
472
 
 
473
    def test__dump(self):
 
474
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
 
475
        key_to_text = {('1',): dup_content + '1 unique\n',
 
476
                       ('2',): dup_content + '2 extra special\n'}
 
477
        locs, block = self.make_block(key_to_text)
 
478
        self.assertEqual([('f', len(key_to_text[('1',)])),
 
479
                          ('d', 21, len(key_to_text[('2',)]),
 
480
                           [('c', 2, len(dup_content)),
 
481
                            ('i', len('2 extra special\n'), '')
 
482
                           ]),
 
483
                         ], block._dump())
 
484
 
 
485
 
 
486
class TestCaseWithGroupCompressVersionedFiles(
 
487
        tests.TestCaseWithMemoryTransport):
 
488
 
 
489
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
 
490
                     dir='.', inconsistency_fatal=True):
 
491
        t = self.get_transport(dir)
 
492
        t.ensure_base()
 
493
        vf = groupcompress.make_pack_factory(graph=create_graph,
 
494
            delta=False, keylength=keylength,
 
495
            inconsistency_fatal=inconsistency_fatal)(t)
 
496
        if do_cleanup:
 
497
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
 
498
        return vf
 
499
 
 
500
 
 
501
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
 
502
 
 
503
    def make_g_index(self, name, ref_lists=0, nodes=[]):
 
504
        builder = btree_index.BTreeBuilder(ref_lists)
 
505
        for node, references, value in nodes:
 
506
            builder.add_node(node, references, value)
 
507
        stream = builder.finish()
 
508
        trans = self.get_transport()
 
509
        size = trans.put_file(name, stream)
 
510
        return btree_index.BTreeGraphIndex(trans, name, size)
 
511
 
 
512
    def make_g_index_missing_parent(self):
 
513
        graph_index = self.make_g_index('missing_parent', 1,
 
514
            [(('parent', ), '2 78 2 10', ([],)),
 
515
             (('tip', ), '2 78 2 10',
 
516
              ([('parent', ), ('missing-parent', )],)),
 
517
              ])
 
518
        return graph_index
 
519
 
 
520
    def test_get_record_stream_as_requested(self):
 
521
        # Consider promoting 'as-requested' to general availability, and
 
522
        # make this a VF interface test
 
523
        vf = self.make_test_vf(False, dir='source')
 
524
        vf.add_lines(('a',), (), ['lines\n'])
 
525
        vf.add_lines(('b',), (), ['lines\n'])
 
526
        vf.add_lines(('c',), (), ['lines\n'])
 
527
        vf.add_lines(('d',), (), ['lines\n'])
 
528
        vf.writer.end()
 
529
        keys = [record.key for record in vf.get_record_stream(
 
530
                    [('a',), ('b',), ('c',), ('d',)],
 
531
                    'as-requested', False)]
 
532
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
533
        keys = [record.key for record in vf.get_record_stream(
 
534
                    [('b',), ('a',), ('d',), ('c',)],
 
535
                    'as-requested', False)]
 
536
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
537
 
 
538
        # It should work even after being repacked into another VF
 
539
        vf2 = self.make_test_vf(False, dir='target')
 
540
        vf2.insert_record_stream(vf.get_record_stream(
 
541
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
 
542
        vf2.writer.end()
 
543
 
 
544
        keys = [record.key for record in vf2.get_record_stream(
 
545
                    [('a',), ('b',), ('c',), ('d',)],
 
546
                    'as-requested', False)]
 
547
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
548
        keys = [record.key for record in vf2.get_record_stream(
 
549
                    [('b',), ('a',), ('d',), ('c',)],
 
550
                    'as-requested', False)]
 
551
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
552
 
 
553
    def test_insert_record_stream_reuses_blocks(self):
 
554
        vf = self.make_test_vf(True, dir='source')
 
555
        def grouped_stream(revision_ids, first_parents=()):
 
556
            parents = first_parents
 
557
            for revision_id in revision_ids:
 
558
                key = (revision_id,)
 
559
                record = versionedfile.FulltextContentFactory(
 
560
                    key, parents, None,
 
561
                    'some content that is\n'
 
562
                    'identical except for\n'
 
563
                    'revision_id:%s\n' % (revision_id,))
 
564
                yield record
 
565
                parents = (key,)
 
566
        # One group, a-d
 
567
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
568
        # Second group, e-h
 
569
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
570
                                               first_parents=(('d',),)))
 
571
        block_bytes = {}
 
572
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
 
573
                                      'unordered', False)
 
574
        num_records = 0
 
575
        for record in stream:
 
576
            if record.key in [('a',), ('e',)]:
 
577
                self.assertEqual('groupcompress-block', record.storage_kind)
 
578
            else:
 
579
                self.assertEqual('groupcompress-block-ref',
 
580
                                 record.storage_kind)
 
581
            block_bytes[record.key] = record._manager._block._z_content
 
582
            num_records += 1
 
583
        self.assertEqual(8, num_records)
 
584
        for r in 'abcd':
 
585
            key = (r,)
 
586
            self.assertIs(block_bytes[key], block_bytes[('a',)])
 
587
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
 
588
        for r in 'efgh':
 
589
            key = (r,)
 
590
            self.assertIs(block_bytes[key], block_bytes[('e',)])
 
591
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
 
592
        # Now copy the blocks into another vf, and ensure that the blocks are
 
593
        # preserved without creating new entries
 
594
        vf2 = self.make_test_vf(True, dir='target')
 
595
        # ordering in 'groupcompress' order, should actually swap the groups in
 
596
        # the target vf, but the groups themselves should not be disturbed.
 
597
        def small_size_stream():
 
598
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
 
599
                                               'groupcompress', False):
 
600
                record._manager._full_enough_block_size = \
 
601
                    record._manager._block._content_length
 
602
                yield record
 
603
                        
 
604
        vf2.insert_record_stream(small_size_stream())
 
605
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
606
                                       'groupcompress', False)
 
607
        vf2.writer.end()
 
608
        num_records = 0
 
609
        for record in stream:
 
610
            num_records += 1
 
611
            self.assertEqual(block_bytes[record.key],
 
612
                             record._manager._block._z_content)
 
613
        self.assertEqual(8, num_records)
 
614
 
 
615
    def test_insert_record_stream_packs_on_the_fly(self):
 
616
        vf = self.make_test_vf(True, dir='source')
 
617
        def grouped_stream(revision_ids, first_parents=()):
 
618
            parents = first_parents
 
619
            for revision_id in revision_ids:
 
620
                key = (revision_id,)
 
621
                record = versionedfile.FulltextContentFactory(
 
622
                    key, parents, None,
 
623
                    'some content that is\n'
 
624
                    'identical except for\n'
 
625
                    'revision_id:%s\n' % (revision_id,))
 
626
                yield record
 
627
                parents = (key,)
 
628
        # One group, a-d
 
629
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
630
        # Second group, e-h
 
631
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
632
                                               first_parents=(('d',),)))
 
633
        # Now copy the blocks into another vf, and see that the
 
634
        # insert_record_stream rebuilt a new block on-the-fly because of
 
635
        # under-utilization
 
636
        vf2 = self.make_test_vf(True, dir='target')
 
637
        vf2.insert_record_stream(vf.get_record_stream(
 
638
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
 
639
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
640
                                       'groupcompress', False)
 
641
        vf2.writer.end()
 
642
        num_records = 0
 
643
        # All of the records should be recombined into a single block
 
644
        block = None
 
645
        for record in stream:
 
646
            num_records += 1
 
647
            if block is None:
 
648
                block = record._manager._block
 
649
            else:
 
650
                self.assertIs(block, record._manager._block)
 
651
        self.assertEqual(8, num_records)
 
652
 
 
653
    def test__insert_record_stream_no_reuse_block(self):
 
654
        vf = self.make_test_vf(True, dir='source')
 
655
        def grouped_stream(revision_ids, first_parents=()):
 
656
            parents = first_parents
 
657
            for revision_id in revision_ids:
 
658
                key = (revision_id,)
 
659
                record = versionedfile.FulltextContentFactory(
 
660
                    key, parents, None,
 
661
                    'some content that is\n'
 
662
                    'identical except for\n'
 
663
                    'revision_id:%s\n' % (revision_id,))
 
664
                yield record
 
665
                parents = (key,)
 
666
        # One group, a-d
 
667
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
668
        # Second group, e-h
 
669
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
670
                                               first_parents=(('d',),)))
 
671
        vf.writer.end()
 
672
        self.assertEqual(8, len(list(vf.get_record_stream(
 
673
                                        [(r,) for r in 'abcdefgh'],
 
674
                                        'unordered', False))))
 
675
        # Now copy the blocks into another vf, and ensure that the blocks are
 
676
        # preserved without creating new entries
 
677
        vf2 = self.make_test_vf(True, dir='target')
 
678
        # ordering in 'groupcompress' order, should actually swap the groups in
 
679
        # the target vf, but the groups themselves should not be disturbed.
 
680
        list(vf2._insert_record_stream(vf.get_record_stream(
 
681
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
 
682
            reuse_blocks=False))
 
683
        vf2.writer.end()
 
684
        # After inserting with reuse_blocks=False, we should have everything in
 
685
        # a single new block.
 
686
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
687
                                       'groupcompress', False)
 
688
        block = None
 
689
        for record in stream:
 
690
            if block is None:
 
691
                block = record._manager._block
 
692
            else:
 
693
                self.assertIs(block, record._manager._block)
 
694
 
 
695
    def test_add_missing_noncompression_parent_unvalidated_index(self):
 
696
        unvalidated = self.make_g_index_missing_parent()
 
697
        combined = _mod_index.CombinedGraphIndex([unvalidated])
 
698
        index = groupcompress._GCGraphIndex(combined,
 
699
            is_locked=lambda: True, parents=True,
 
700
            track_external_parent_refs=True)
 
701
        index.scan_unvalidated_index(unvalidated)
 
702
        self.assertEqual(
 
703
            frozenset([('missing-parent',)]), index.get_missing_parents())
 
704
 
 
705
    def test_track_external_parent_refs(self):
 
706
        g_index = self.make_g_index('empty', 1, [])
 
707
        mod_index = btree_index.BTreeBuilder(1, 1)
 
708
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
 
709
        index = groupcompress._GCGraphIndex(combined,
 
710
            is_locked=lambda: True, parents=True,
 
711
            add_callback=mod_index.add_nodes,
 
712
            track_external_parent_refs=True)
 
713
        index.add_records([
 
714
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
 
715
        self.assertEqual(
 
716
            frozenset([('parent-1',), ('parent-2',)]),
 
717
            index.get_missing_parents())
 
718
 
 
719
    def make_source_with_b(self, a_parent, path):
 
720
        source = self.make_test_vf(True, dir=path)
 
721
        source.add_lines(('a',), (), ['lines\n'])
 
722
        if a_parent:
 
723
            b_parents = (('a',),)
 
724
        else:
 
725
            b_parents = ()
 
726
        source.add_lines(('b',), b_parents, ['lines\n'])
 
727
        return source
 
728
 
 
729
    def do_inconsistent_inserts(self, inconsistency_fatal):
 
730
        target = self.make_test_vf(True, dir='target',
 
731
                                   inconsistency_fatal=inconsistency_fatal)
 
732
        for x in range(2):
 
733
            source = self.make_source_with_b(x==1, 'source%s' % x)
 
734
            target.insert_record_stream(source.get_record_stream(
 
735
                [('b',)], 'unordered', False))
 
736
 
 
737
    def test_inconsistent_redundant_inserts_warn(self):
 
738
        """Should not insert a record that is already present."""
 
739
        warnings = []
 
740
        def warning(template, args):
 
741
            warnings.append(template % args)
 
742
        _trace_warning = trace.warning
 
743
        trace.warning = warning
 
744
        try:
 
745
            self.do_inconsistent_inserts(inconsistency_fatal=False)
 
746
        finally:
 
747
            trace.warning = _trace_warning
 
748
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
 
749
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
 
750
                         warnings)
 
751
 
 
752
    def test_inconsistent_redundant_inserts_raises(self):
 
753
        e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
 
754
                              inconsistency_fatal=True)
 
755
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
 
756
                              " in add_records:"
 
757
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
 
758
                              " 0 8', \(\(\('a',\),\),\)\)")
 
759
 
 
760
    def test_clear_cache(self):
 
761
        vf = self.make_source_with_b(True, 'source')
 
762
        vf.writer.end()
 
763
        for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
 
764
                                           True):
 
765
            pass
 
766
        self.assertTrue(len(vf._group_cache) > 0)
 
767
        vf.clear_cache()
 
768
        self.assertEqual(0, len(vf._group_cache))
 
769
 
 
770
 
 
771
 
 
772
class StubGCVF(object):
 
773
    def __init__(self, canned_get_blocks=None):
 
774
        self._group_cache = {}
 
775
        self._canned_get_blocks = canned_get_blocks or []
 
776
    def _get_blocks(self, read_memos):
 
777
        return iter(self._canned_get_blocks)
 
778
    
 
779
 
 
780
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
 
781
    """Simple whitebox unit tests for _BatchingBlockFetcher."""
 
782
    
 
783
    def test_add_key_new_read_memo(self):
 
784
        """Adding a key with an uncached read_memo new to this batch adds that
 
785
        read_memo to the list of memos to fetch.
 
786
        """
 
787
        # locations are: index_memo, ignored, parents, ignored
 
788
        # where index_memo is: (idx, offset, len, factory_start, factory_end)
 
789
        # and (idx, offset, size) is known as the 'read_memo', identifying the
 
790
        # raw bytes needed.
 
791
        read_memo = ('fake index', 100, 50)
 
792
        locations = {
 
793
            ('key',): (read_memo + (None, None), None, None, None)}
 
794
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
 
795
        total_size = batcher.add_key(('key',))
 
796
        self.assertEqual(50, total_size)
 
797
        self.assertEqual([('key',)], batcher.keys)
 
798
        self.assertEqual([read_memo], batcher.memos_to_get)
 
799
 
 
800
    def test_add_key_duplicate_read_memo(self):
 
801
        """read_memos that occur multiple times in a batch will only be fetched
 
802
        once.
 
803
        """
 
804
        read_memo = ('fake index', 100, 50)
 
805
        # Two keys, both sharing the same read memo (but different overall
 
806
        # index_memos).
 
807
        locations = {
 
808
            ('key1',): (read_memo + (0, 1), None, None, None),
 
809
            ('key2',): (read_memo + (1, 2), None, None, None)}
 
810
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
 
811
        total_size = batcher.add_key(('key1',))
 
812
        total_size = batcher.add_key(('key2',))
 
813
        self.assertEqual(50, total_size)
 
814
        self.assertEqual([('key1',), ('key2',)], batcher.keys)
 
815
        self.assertEqual([read_memo], batcher.memos_to_get)
 
816
 
 
817
    def test_add_key_cached_read_memo(self):
 
818
        """Adding a key with a cached read_memo will not cause that read_memo
 
819
        to be added to the list to fetch.
 
820
        """
 
821
        read_memo = ('fake index', 100, 50)
 
822
        gcvf = StubGCVF()
 
823
        gcvf._group_cache[read_memo] = 'fake block'
 
824
        locations = {
 
825
            ('key',): (read_memo + (None, None), None, None, None)}
 
826
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
 
827
        total_size = batcher.add_key(('key',))
 
828
        self.assertEqual(0, total_size)
 
829
        self.assertEqual([('key',)], batcher.keys)
 
830
        self.assertEqual([], batcher.memos_to_get)
 
831
 
 
832
    def test_yield_factories_empty(self):
 
833
        """An empty batch yields no factories."""
 
834
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
 
835
        self.assertEqual([], list(batcher.yield_factories()))
 
836
 
 
837
    def test_yield_factories_calls_get_blocks(self):
 
838
        """Uncached memos are retrieved via get_blocks."""
 
839
        read_memo1 = ('fake index', 100, 50)
 
840
        read_memo2 = ('fake index', 150, 40)
 
841
        gcvf = StubGCVF(
 
842
            canned_get_blocks=[
 
843
                (read_memo1, groupcompress.GroupCompressBlock()),
 
844
                (read_memo2, groupcompress.GroupCompressBlock())])
 
845
        locations = {
 
846
            ('key1',): (read_memo1 + (None, None), None, None, None),
 
847
            ('key2',): (read_memo2 + (None, None), None, None, None)}
 
848
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
 
849
        batcher.add_key(('key1',))
 
850
        batcher.add_key(('key2',))
 
851
        factories = list(batcher.yield_factories(full_flush=True))
 
852
        self.assertLength(2, factories)
 
853
        keys = [f.key for f in factories]
 
854
        kinds = [f.storage_kind for f in factories]
 
855
        self.assertEqual([('key1',), ('key2',)], keys)
 
856
        self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
 
857
 
 
858
    def test_yield_factories_flushing(self):
 
859
        """yield_factories holds back on yielding results from the final block
 
860
        unless passed full_flush=True.
 
861
        """
 
862
        fake_block = groupcompress.GroupCompressBlock()
 
863
        read_memo = ('fake index', 100, 50)
 
864
        gcvf = StubGCVF()
 
865
        gcvf._group_cache[read_memo] = fake_block
 
866
        locations = {
 
867
            ('key',): (read_memo + (None, None), None, None, None)}
 
868
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
 
869
        batcher.add_key(('key',))
 
870
        self.assertEqual([], list(batcher.yield_factories()))
 
871
        factories = list(batcher.yield_factories(full_flush=True))
 
872
        self.assertLength(1, factories)
 
873
        self.assertEqual(('key',), factories[0].key)
 
874
        self.assertEqual('groupcompress-block', factories[0].storage_kind)
 
875
 
 
876
 
 
877
class TestLazyGroupCompress(tests.TestCaseWithTransport):
 
878
 
 
879
    _texts = {
 
880
        ('key1',): "this is a text\n"
 
881
                   "with a reasonable amount of compressible bytes\n"
 
882
                   "which can be shared between various other texts\n",
 
883
        ('key2',): "another text\n"
 
884
                   "with a reasonable amount of compressible bytes\n"
 
885
                   "which can be shared between various other texts\n",
 
886
        ('key3',): "yet another text which won't be extracted\n"
 
887
                   "with a reasonable amount of compressible bytes\n"
 
888
                   "which can be shared between various other texts\n",
 
889
        ('key4',): "this will be extracted\n"
 
890
                   "but references most of its bytes from\n"
 
891
                   "yet another text which won't be extracted\n"
 
892
                   "with a reasonable amount of compressible bytes\n"
 
893
                   "which can be shared between various other texts\n",
 
894
    }
 
895
    def make_block(self, key_to_text):
 
896
        """Create a GroupCompressBlock, filling it with the given texts."""
 
897
        compressor = groupcompress.GroupCompressor()
 
898
        start = 0
 
899
        for key in sorted(key_to_text):
 
900
            compressor.compress(key, key_to_text[key], None)
 
901
        locs = dict((key, (start, end)) for key, (start, _, end, _)
 
902
                    in compressor.labels_deltas.iteritems())
 
903
        block = compressor.flush()
 
904
        raw_bytes = block.to_bytes()
 
905
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
 
906
 
 
907
    def add_key_to_manager(self, key, locations, block, manager):
 
908
        start, end = locations[key]
 
909
        manager.add_factory(key, (), start, end)
 
910
 
 
911
    def make_block_and_full_manager(self, texts):
 
912
        locations, block = self.make_block(texts)
 
913
        manager = groupcompress._LazyGroupContentManager(block)
 
914
        for key in sorted(texts):
 
915
            self.add_key_to_manager(key, locations, block, manager)
 
916
        return block, manager
 
917
 
 
918
    def test_get_fulltexts(self):
 
919
        locations, block = self.make_block(self._texts)
 
920
        manager = groupcompress._LazyGroupContentManager(block)
 
921
        self.add_key_to_manager(('key1',), locations, block, manager)
 
922
        self.add_key_to_manager(('key2',), locations, block, manager)
 
923
        result_order = []
 
924
        for record in manager.get_record_stream():
 
925
            result_order.append(record.key)
 
926
            text = self._texts[record.key]
 
927
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
928
        self.assertEqual([('key1',), ('key2',)], result_order)
 
929
 
 
930
        # If we build the manager in the opposite order, we should get them
 
931
        # back in the opposite order
 
932
        manager = groupcompress._LazyGroupContentManager(block)
 
933
        self.add_key_to_manager(('key2',), locations, block, manager)
 
934
        self.add_key_to_manager(('key1',), locations, block, manager)
 
935
        result_order = []
 
936
        for record in manager.get_record_stream():
 
937
            result_order.append(record.key)
 
938
            text = self._texts[record.key]
 
939
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
940
        self.assertEqual([('key2',), ('key1',)], result_order)
 
941
 
 
942
    def test__wire_bytes_no_keys(self):
 
943
        locations, block = self.make_block(self._texts)
 
944
        manager = groupcompress._LazyGroupContentManager(block)
 
945
        wire_bytes = manager._wire_bytes()
 
946
        block_length = len(block.to_bytes())
 
947
        # We should have triggered a strip, since we aren't using any content
 
948
        stripped_block = manager._block.to_bytes()
 
949
        self.assertTrue(block_length > len(stripped_block))
 
950
        empty_z_header = zlib.compress('')
 
951
        self.assertEqual('groupcompress-block\n'
 
952
                         '8\n' # len(compress(''))
 
953
                         '0\n' # len('')
 
954
                         '%d\n'# compressed block len
 
955
                         '%s'  # zheader
 
956
                         '%s'  # block
 
957
                         % (len(stripped_block), empty_z_header,
 
958
                            stripped_block),
 
959
                         wire_bytes)
 
960
 
 
961
    def test__wire_bytes(self):
 
962
        locations, block = self.make_block(self._texts)
 
963
        manager = groupcompress._LazyGroupContentManager(block)
 
964
        self.add_key_to_manager(('key1',), locations, block, manager)
 
965
        self.add_key_to_manager(('key4',), locations, block, manager)
 
966
        block_bytes = block.to_bytes()
 
967
        wire_bytes = manager._wire_bytes()
 
968
        (storage_kind, z_header_len, header_len,
 
969
         block_len, rest) = wire_bytes.split('\n', 4)
 
970
        z_header_len = int(z_header_len)
 
971
        header_len = int(header_len)
 
972
        block_len = int(block_len)
 
973
        self.assertEqual('groupcompress-block', storage_kind)
 
974
        self.assertEqual(34, z_header_len)
 
975
        self.assertEqual(26, header_len)
 
976
        self.assertEqual(len(block_bytes), block_len)
 
977
        z_header = rest[:z_header_len]
 
978
        header = zlib.decompress(z_header)
 
979
        self.assertEqual(header_len, len(header))
 
980
        entry1 = locations[('key1',)]
 
981
        entry4 = locations[('key4',)]
 
982
        self.assertEqualDiff('key1\n'
 
983
                             '\n'  # no parents
 
984
                             '%d\n' # start offset
 
985
                             '%d\n' # end offset
 
986
                             'key4\n'
 
987
                             '\n'
 
988
                             '%d\n'
 
989
                             '%d\n'
 
990
                             % (entry1[0], entry1[1],
 
991
                                entry4[0], entry4[1]),
 
992
                            header)
 
993
        z_block = rest[z_header_len:]
 
994
        self.assertEqual(block_bytes, z_block)
 
995
 
 
996
    def test_from_bytes(self):
 
997
        locations, block = self.make_block(self._texts)
 
998
        manager = groupcompress._LazyGroupContentManager(block)
 
999
        self.add_key_to_manager(('key1',), locations, block, manager)
 
1000
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1001
        wire_bytes = manager._wire_bytes()
 
1002
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
 
1003
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
 
1004
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
 
1005
        self.assertEqual(2, len(manager._factories))
 
1006
        self.assertEqual(block._z_content, manager._block._z_content)
 
1007
        result_order = []
 
1008
        for record in manager.get_record_stream():
 
1009
            result_order.append(record.key)
 
1010
            text = self._texts[record.key]
 
1011
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
1012
        self.assertEqual([('key1',), ('key4',)], result_order)
 
1013
 
 
1014
    def test__check_rebuild_no_changes(self):
 
1015
        block, manager = self.make_block_and_full_manager(self._texts)
 
1016
        manager._check_rebuild_block()
 
1017
        self.assertIs(block, manager._block)
 
1018
 
 
1019
    def test__check_rebuild_only_one(self):
 
1020
        locations, block = self.make_block(self._texts)
 
1021
        manager = groupcompress._LazyGroupContentManager(block)
 
1022
        # Request just the first key, which should trigger a 'strip' action
 
1023
        self.add_key_to_manager(('key1',), locations, block, manager)
 
1024
        manager._check_rebuild_block()
 
1025
        self.assertIsNot(block, manager._block)
 
1026
        self.assertTrue(block._content_length > manager._block._content_length)
 
1027
        # We should be able to still get the content out of this block, though
 
1028
        # it should only have 1 entry
 
1029
        for record in manager.get_record_stream():
 
1030
            self.assertEqual(('key1',), record.key)
 
1031
            self.assertEqual(self._texts[record.key],
 
1032
                             record.get_bytes_as('fulltext'))
 
1033
 
 
1034
    def test__check_rebuild_middle(self):
 
1035
        locations, block = self.make_block(self._texts)
 
1036
        manager = groupcompress._LazyGroupContentManager(block)
 
1037
        # Request a small key in the middle should trigger a 'rebuild'
 
1038
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1039
        manager._check_rebuild_block()
 
1040
        self.assertIsNot(block, manager._block)
 
1041
        self.assertTrue(block._content_length > manager._block._content_length)
 
1042
        for record in manager.get_record_stream():
 
1043
            self.assertEqual(('key4',), record.key)
 
1044
            self.assertEqual(self._texts[record.key],
 
1045
                             record.get_bytes_as('fulltext'))
 
1046
 
 
1047
    def test_check_is_well_utilized_all_keys(self):
 
1048
        block, manager = self.make_block_and_full_manager(self._texts)
 
1049
        self.assertFalse(manager.check_is_well_utilized())
 
1050
        # Though we can fake it by changing the recommended minimum size
 
1051
        manager._full_enough_block_size = block._content_length
 
1052
        self.assertTrue(manager.check_is_well_utilized())
 
1053
        # Setting it just above causes it to fail
 
1054
        manager._full_enough_block_size = block._content_length + 1
 
1055
        self.assertFalse(manager.check_is_well_utilized())
 
1056
        # Setting the mixed-block size doesn't do anything, because the content
 
1057
        # is considered to not be 'mixed'
 
1058
        manager._full_enough_mixed_block_size = block._content_length
 
1059
        self.assertFalse(manager.check_is_well_utilized())
 
1060
 
 
1061
    def test_check_is_well_utilized_mixed_keys(self):
 
1062
        texts = {}
 
1063
        f1k1 = ('f1', 'k1')
 
1064
        f1k2 = ('f1', 'k2')
 
1065
        f2k1 = ('f2', 'k1')
 
1066
        f2k2 = ('f2', 'k2')
 
1067
        texts[f1k1] = self._texts[('key1',)]
 
1068
        texts[f1k2] = self._texts[('key2',)]
 
1069
        texts[f2k1] = self._texts[('key3',)]
 
1070
        texts[f2k2] = self._texts[('key4',)]
 
1071
        block, manager = self.make_block_and_full_manager(texts)
 
1072
        self.assertFalse(manager.check_is_well_utilized())
 
1073
        manager._full_enough_block_size = block._content_length
 
1074
        self.assertTrue(manager.check_is_well_utilized())
 
1075
        manager._full_enough_block_size = block._content_length + 1
 
1076
        self.assertFalse(manager.check_is_well_utilized())
 
1077
        manager._full_enough_mixed_block_size = block._content_length
 
1078
        self.assertTrue(manager.check_is_well_utilized())
 
1079
 
 
1080
    def test_check_is_well_utilized_partial_use(self):
 
1081
        locations, block = self.make_block(self._texts)
 
1082
        manager = groupcompress._LazyGroupContentManager(block)
 
1083
        manager._full_enough_block_size = block._content_length
 
1084
        self.add_key_to_manager(('key1',), locations, block, manager)
 
1085
        self.add_key_to_manager(('key2',), locations, block, manager)
 
1086
        # Just using the content from key1 and 2 is not enough to be considered
 
1087
        # 'complete'
 
1088
        self.assertFalse(manager.check_is_well_utilized())
 
1089
        # However if we add key3, then we have enough, as we only require 75%
 
1090
        # consumption
 
1091
        self.add_key_to_manager(('key4',), locations, block, manager)
 
1092
        self.assertTrue(manager.check_is_well_utilized())
 
1093
 
 
1094
 
 
1095
class Test_GCBuildDetails(tests.TestCase):
 
1096
 
 
1097
    def test_acts_like_tuple(self):
 
1098
        # _GCBuildDetails inlines some of the data that used to be spread out
 
1099
        # across a bunch of tuples
 
1100
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
 
1101
            ('INDEX', 10, 20, 0, 5))
 
1102
        self.assertEqual(4, len(bd))
 
1103
        self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
 
1104
        self.assertEqual(None, bd[1]) # Compression Parent is always None
 
1105
        self.assertEqual((('parent1',), ('parent2',)), bd[2])
 
1106
        self.assertEqual(('group', None), bd[3]) # Record details
 
1107
 
 
1108
    def test__repr__(self):
 
1109
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
 
1110
            ('INDEX', 10, 20, 0, 5))
 
1111
        self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
 
1112
                         " (('parent1',), ('parent2',)))",
 
1113
                         repr(bd))
 
1114