~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

  • Committer: John Arbash Meinel
  • Date: 2006-07-21 04:02:14 UTC
  • mto: This revision was merged to the branch mainline in revision 1877.
  • Revision ID: john@arbash-meinel.com-20060721040214-9db86450785a5c3e
Make set_user_ignores a private function, and update the doc string to recommend it isn't used.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2008, 2009 Canonical Ltd
2
 
#
3
 
# This program is free software; you can redistribute it and/or modify
4
 
# it under the terms of the GNU General Public License as published by
5
 
# the Free Software Foundation; either version 2 of the License, or
6
 
# (at your option) any later version.
7
 
#
8
 
# This program is distributed in the hope that it will be useful,
9
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
 
# GNU General Public License for more details.
12
 
#
13
 
# You should have received a copy of the GNU General Public License
14
 
# along with this program; if not, write to the Free Software
15
 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
 
 
17
 
"""Tests for group compression."""
18
 
 
19
 
import zlib
20
 
 
21
 
from bzrlib import (
22
 
    btree_index,
23
 
    groupcompress,
24
 
    errors,
25
 
    index as _mod_index,
26
 
    osutils,
27
 
    tests,
28
 
    trace,
29
 
    versionedfile,
30
 
    )
31
 
from bzrlib.osutils import sha_string
32
 
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
33
 
 
34
 
 
35
 
def load_tests(standard_tests, module, loader):
36
 
    """Parameterize tests for all versions of groupcompress."""
37
 
    to_adapt, result = tests.split_suite_by_condition(
38
 
        standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
39
 
    scenarios = [
40
 
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
41
 
        ]
42
 
    if CompiledGroupCompressFeature.available():
43
 
        scenarios.append(('C',
44
 
            {'compressor': groupcompress.PyrexGroupCompressor}))
45
 
    return tests.multiply_tests(to_adapt, scenarios, result)
46
 
 
47
 
 
48
 
class TestGroupCompressor(tests.TestCase):
49
 
 
50
 
    def _chunks_to_repr_lines(self, chunks):
51
 
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
52
 
 
53
 
    def assertEqualDiffEncoded(self, expected, actual):
54
 
        """Compare the actual content to the expected content.
55
 
 
56
 
        :param expected: A group of chunks that we expect to see
57
 
        :param actual: The measured 'chunks'
58
 
 
59
 
        We will transform the chunks back into lines, and then run 'repr()'
60
 
        over them to handle non-ascii characters.
61
 
        """
62
 
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
63
 
                             self._chunks_to_repr_lines(actual))
64
 
 
65
 
 
66
 
class TestAllGroupCompressors(TestGroupCompressor):
67
 
    """Tests for GroupCompressor"""
68
 
 
69
 
    compressor = None # Set by multiply_tests
70
 
 
71
 
    def test_empty_delta(self):
72
 
        compressor = self.compressor()
73
 
        self.assertEqual([], compressor.chunks)
74
 
 
75
 
    def test_one_nosha_delta(self):
76
 
        # diff against NUKK
77
 
        compressor = self.compressor()
78
 
        sha1, start_point, end_point, _ = compressor.compress(('label',),
79
 
            'strange\ncommon\n', None)
80
 
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
81
 
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
82
 
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
83
 
        self.assertEqual(0, start_point)
84
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
85
 
 
86
 
    def test_empty_content(self):
87
 
        compressor = self.compressor()
88
 
        # Adding empty bytes should return the 'null' record
89
 
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
90
 
                                                                 '', None)
91
 
        self.assertEqual(0, start_point)
92
 
        self.assertEqual(0, end_point)
93
 
        self.assertEqual('fulltext', kind)
94
 
        self.assertEqual(groupcompress._null_sha1, sha1)
95
 
        self.assertEqual(0, compressor.endpoint)
96
 
        self.assertEqual([], compressor.chunks)
97
 
        # Even after adding some content
98
 
        compressor.compress(('content',), 'some\nbytes\n', None)
99
 
        self.assertTrue(compressor.endpoint > 0)
100
 
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
101
 
                                                                 '', None)
102
 
        self.assertEqual(0, start_point)
103
 
        self.assertEqual(0, end_point)
104
 
        self.assertEqual('fulltext', kind)
105
 
        self.assertEqual(groupcompress._null_sha1, sha1)
106
 
 
107
 
    def test_extract_from_compressor(self):
108
 
        # Knit fetching will try to reconstruct texts locally which results in
109
 
        # reading something that is in the compressor stream already.
110
 
        compressor = self.compressor()
111
 
        sha1_1, _, _, _ = compressor.compress(('label',),
112
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
113
 
        expected_lines = list(compressor.chunks)
114
 
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
115
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
116
 
        # get the first out
117
 
        self.assertEqual(('strange\ncommon long line\n'
118
 
                          'that needs a 16 byte match\n', sha1_1),
119
 
                         compressor.extract(('label',)))
120
 
        # and the second
121
 
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
122
 
                          'different\n', sha1_2),
123
 
                         compressor.extract(('newlabel',)))
124
 
 
125
 
    def test_pop_last(self):
126
 
        compressor = self.compressor()
127
 
        _, _, _, _ = compressor.compress(('key1',),
128
 
            'some text\nfor the first entry\n', None)
129
 
        expected_lines = list(compressor.chunks)
130
 
        _, _, _, _ = compressor.compress(('key2',),
131
 
            'some text\nfor the second entry\n', None)
132
 
        compressor.pop_last()
133
 
        self.assertEqual(expected_lines, compressor.chunks)
134
 
 
135
 
 
136
 
class TestPyrexGroupCompressor(TestGroupCompressor):
137
 
 
138
 
    _test_needs_features = [CompiledGroupCompressFeature]
139
 
    compressor = groupcompress.PyrexGroupCompressor
140
 
 
141
 
    def test_stats(self):
142
 
        compressor = self.compressor()
143
 
        compressor.compress(('label',),
144
 
                            'strange\n'
145
 
                            'common very very long line\n'
146
 
                            'plus more text\n', None)
147
 
        compressor.compress(('newlabel',),
148
 
                            'common very very long line\n'
149
 
                            'plus more text\n'
150
 
                            'different\n'
151
 
                            'moredifferent\n', None)
152
 
        compressor.compress(('label3',),
153
 
                            'new\n'
154
 
                            'common very very long line\n'
155
 
                            'plus more text\n'
156
 
                            'different\n'
157
 
                            'moredifferent\n', None)
158
 
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
159
 
 
160
 
    def test_two_nosha_delta(self):
161
 
        compressor = self.compressor()
162
 
        sha1_1, _, _, _ = compressor.compress(('label',),
163
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
164
 
        expected_lines = list(compressor.chunks)
165
 
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
166
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
167
 
        self.assertEqual(sha_string('common long line\n'
168
 
                                    'that needs a 16 byte match\n'
169
 
                                    'different\n'), sha1_2)
170
 
        expected_lines.extend([
171
 
            # 'delta', delta length
172
 
            'd\x0f',
173
 
            # source and target length
174
 
            '\x36',
175
 
            # copy the line common
176
 
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
177
 
            # add the line different, and the trailing newline
178
 
            '\x0adifferent\n', # insert 10 bytes
179
 
            ])
180
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
181
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
182
 
 
183
 
    def test_three_nosha_delta(self):
184
 
        # The first interesting test: make a change that should use lines from
185
 
        # both parents.
186
 
        compressor = self.compressor()
187
 
        sha1_1, _, _, _ = compressor.compress(('label',),
188
 
            'strange\ncommon very very long line\nwith some extra text\n', None)
189
 
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
190
 
            'different\nmoredifferent\nand then some more\n', None)
191
 
        expected_lines = list(compressor.chunks)
192
 
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
193
 
            'new\ncommon very very long line\nwith some extra text\n'
194
 
            'different\nmoredifferent\nand then some more\n',
195
 
            None)
196
 
        self.assertEqual(
197
 
            sha_string('new\ncommon very very long line\nwith some extra text\n'
198
 
                       'different\nmoredifferent\nand then some more\n'),
199
 
            sha1_3)
200
 
        expected_lines.extend([
201
 
            # 'delta', delta length
202
 
            'd\x0b',
203
 
            # source and target length
204
 
            '\x5f'
205
 
            # insert new
206
 
            '\x03new',
207
 
            # Copy of first parent 'common' range
208
 
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
209
 
            # Copy of second parent 'different' range
210
 
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
211
 
            ])
212
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
213
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
214
 
 
215
 
 
216
 
class TestPythonGroupCompressor(TestGroupCompressor):
217
 
 
218
 
    compressor = groupcompress.PythonGroupCompressor
219
 
 
220
 
    def test_stats(self):
221
 
        compressor = self.compressor()
222
 
        compressor.compress(('label',),
223
 
                            'strange\n'
224
 
                            'common very very long line\n'
225
 
                            'plus more text\n', None)
226
 
        compressor.compress(('newlabel',),
227
 
                            'common very very long line\n'
228
 
                            'plus more text\n'
229
 
                            'different\n'
230
 
                            'moredifferent\n', None)
231
 
        compressor.compress(('label3',),
232
 
                            'new\n'
233
 
                            'common very very long line\n'
234
 
                            'plus more text\n'
235
 
                            'different\n'
236
 
                            'moredifferent\n', None)
237
 
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
238
 
 
239
 
    def test_two_nosha_delta(self):
240
 
        compressor = self.compressor()
241
 
        sha1_1, _, _, _ = compressor.compress(('label',),
242
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
243
 
        expected_lines = list(compressor.chunks)
244
 
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
245
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
246
 
        self.assertEqual(sha_string('common long line\n'
247
 
                                    'that needs a 16 byte match\n'
248
 
                                    'different\n'), sha1_2)
249
 
        expected_lines.extend([
250
 
            # 'delta', delta length
251
 
            'd\x0f',
252
 
            # target length
253
 
            '\x36',
254
 
            # copy the line common
255
 
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
256
 
            # add the line different, and the trailing newline
257
 
            '\x0adifferent\n', # insert 10 bytes
258
 
            ])
259
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
260
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
261
 
 
262
 
    def test_three_nosha_delta(self):
263
 
        # The first interesting test: make a change that should use lines from
264
 
        # both parents.
265
 
        compressor = self.compressor()
266
 
        sha1_1, _, _, _ = compressor.compress(('label',),
267
 
            'strange\ncommon very very long line\nwith some extra text\n', None)
268
 
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
269
 
            'different\nmoredifferent\nand then some more\n', None)
270
 
        expected_lines = list(compressor.chunks)
271
 
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
272
 
            'new\ncommon very very long line\nwith some extra text\n'
273
 
            'different\nmoredifferent\nand then some more\n',
274
 
            None)
275
 
        self.assertEqual(
276
 
            sha_string('new\ncommon very very long line\nwith some extra text\n'
277
 
                       'different\nmoredifferent\nand then some more\n'),
278
 
            sha1_3)
279
 
        expected_lines.extend([
280
 
            # 'delta', delta length
281
 
            'd\x0c',
282
 
            # target length
283
 
            '\x5f'
284
 
            # insert new
285
 
            '\x04new\n',
286
 
            # Copy of first parent 'common' range
287
 
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
288
 
            # Copy of second parent 'different' range
289
 
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
290
 
            ])
291
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
292
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
293
 
 
294
 
 
295
 
class TestGroupCompressBlock(tests.TestCase):
296
 
 
297
 
    def make_block(self, key_to_text):
298
 
        """Create a GroupCompressBlock, filling it with the given texts."""
299
 
        compressor = groupcompress.GroupCompressor()
300
 
        start = 0
301
 
        for key in sorted(key_to_text):
302
 
            compressor.compress(key, key_to_text[key], None)
303
 
        locs = dict((key, (start, end)) for key, (start, _, end, _)
304
 
                    in compressor.labels_deltas.iteritems())
305
 
        block = compressor.flush()
306
 
        raw_bytes = block.to_bytes()
307
 
        # Go through from_bytes(to_bytes()) so that we start with a compressed
308
 
        # content object
309
 
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
310
 
 
311
 
    def test_from_empty_bytes(self):
312
 
        self.assertRaises(ValueError,
313
 
                          groupcompress.GroupCompressBlock.from_bytes, '')
314
 
 
315
 
    def test_from_minimal_bytes(self):
316
 
        block = groupcompress.GroupCompressBlock.from_bytes(
317
 
            'gcb1z\n0\n0\n')
318
 
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
319
 
        self.assertIs(None, block._content)
320
 
        self.assertEqual('', block._z_content)
321
 
        block._ensure_content()
322
 
        self.assertEqual('', block._content)
323
 
        self.assertEqual('', block._z_content)
324
 
        block._ensure_content() # Ensure content is safe to call 2x
325
 
 
326
 
    def test_from_invalid(self):
327
 
        self.assertRaises(ValueError,
328
 
                          groupcompress.GroupCompressBlock.from_bytes,
329
 
                          'this is not a valid header')
330
 
 
331
 
    def test_from_bytes(self):
332
 
        content = ('a tiny bit of content\n')
333
 
        z_content = zlib.compress(content)
334
 
        z_bytes = (
335
 
            'gcb1z\n' # group compress block v1 plain
336
 
            '%d\n' # Length of compressed content
337
 
            '%d\n' # Length of uncompressed content
338
 
            '%s'   # Compressed content
339
 
            ) % (len(z_content), len(content), z_content)
340
 
        block = groupcompress.GroupCompressBlock.from_bytes(
341
 
            z_bytes)
342
 
        self.assertEqual(z_content, block._z_content)
343
 
        self.assertIs(None, block._content)
344
 
        self.assertEqual(len(z_content), block._z_content_length)
345
 
        self.assertEqual(len(content), block._content_length)
346
 
        block._ensure_content()
347
 
        self.assertEqual(z_content, block._z_content)
348
 
        self.assertEqual(content, block._content)
349
 
 
350
 
    def test_to_bytes(self):
351
 
        content = ('this is some content\n'
352
 
                   'this content will be compressed\n')
353
 
        gcb = groupcompress.GroupCompressBlock()
354
 
        gcb.set_content(content)
355
 
        bytes = gcb.to_bytes()
356
 
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
357
 
        self.assertEqual(gcb._content_length, len(content))
358
 
        expected_header =('gcb1z\n' # group compress block v1 zlib
359
 
                          '%d\n' # Length of compressed content
360
 
                          '%d\n' # Length of uncompressed content
361
 
                         ) % (gcb._z_content_length, gcb._content_length)
362
 
        self.assertStartsWith(bytes, expected_header)
363
 
        remaining_bytes = bytes[len(expected_header):]
364
 
        raw_bytes = zlib.decompress(remaining_bytes)
365
 
        self.assertEqual(content, raw_bytes)
366
 
 
367
 
        # we should get the same results if using the chunked version
368
 
        gcb = groupcompress.GroupCompressBlock()
369
 
        gcb.set_chunked_content(['this is some content\n'
370
 
                                 'this content will be compressed\n'],
371
 
                                 len(content))
372
 
        old_bytes = bytes
373
 
        bytes = gcb.to_bytes()
374
 
        self.assertEqual(old_bytes, bytes)
375
 
 
376
 
    def test_partial_decomp(self):
377
 
        content_chunks = []
378
 
        # We need a sufficient amount of data so that zlib.decompress has
379
 
        # partial decompression to work with. Most auto-generated data
380
 
        # compresses a bit too well, we want a combination, so we combine a sha
381
 
        # hash with compressible data.
382
 
        for i in xrange(2048):
383
 
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
384
 
            content_chunks.append(next_content)
385
 
            next_sha1 = osutils.sha_string(next_content)
386
 
            content_chunks.append(next_sha1 + '\n')
387
 
        content = ''.join(content_chunks)
388
 
        self.assertEqual(158634, len(content))
389
 
        z_content = zlib.compress(content)
390
 
        self.assertEqual(57182, len(z_content))
391
 
        block = groupcompress.GroupCompressBlock()
392
 
        block._z_content = z_content
393
 
        block._z_content_length = len(z_content)
394
 
        block._compressor_name = 'zlib'
395
 
        block._content_length = 158634
396
 
        self.assertIs(None, block._content)
397
 
        block._ensure_content(100)
398
 
        self.assertIsNot(None, block._content)
399
 
        # We have decompressed at least 100 bytes
400
 
        self.assertTrue(len(block._content) >= 100)
401
 
        # We have not decompressed the whole content
402
 
        self.assertTrue(len(block._content) < 158634)
403
 
        self.assertEqualDiff(content[:len(block._content)], block._content)
404
 
        # ensuring content that we already have shouldn't cause any more data
405
 
        # to be extracted
406
 
        cur_len = len(block._content)
407
 
        block._ensure_content(cur_len - 10)
408
 
        self.assertEqual(cur_len, len(block._content))
409
 
        # Now we want a bit more content
410
 
        cur_len += 10
411
 
        block._ensure_content(cur_len)
412
 
        self.assertTrue(len(block._content) >= cur_len)
413
 
        self.assertTrue(len(block._content) < 158634)
414
 
        self.assertEqualDiff(content[:len(block._content)], block._content)
415
 
        # And now lets finish
416
 
        block._ensure_content(158634)
417
 
        self.assertEqualDiff(content, block._content)
418
 
        # And the decompressor is finalized
419
 
        self.assertIs(None, block._z_content_decompressor)
420
 
 
421
 
    def test_partial_decomp_no_known_length(self):
422
 
        content_chunks = []
423
 
        for i in xrange(2048):
424
 
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
425
 
            content_chunks.append(next_content)
426
 
            next_sha1 = osutils.sha_string(next_content)
427
 
            content_chunks.append(next_sha1 + '\n')
428
 
        content = ''.join(content_chunks)
429
 
        self.assertEqual(158634, len(content))
430
 
        z_content = zlib.compress(content)
431
 
        self.assertEqual(57182, len(z_content))
432
 
        block = groupcompress.GroupCompressBlock()
433
 
        block._z_content = z_content
434
 
        block._z_content_length = len(z_content)
435
 
        block._compressor_name = 'zlib'
436
 
        block._content_length = None # Don't tell the decompressed length
437
 
        self.assertIs(None, block._content)
438
 
        block._ensure_content(100)
439
 
        self.assertIsNot(None, block._content)
440
 
        # We have decompressed at least 100 bytes
441
 
        self.assertTrue(len(block._content) >= 100)
442
 
        # We have not decompressed the whole content
443
 
        self.assertTrue(len(block._content) < 158634)
444
 
        self.assertEqualDiff(content[:len(block._content)], block._content)
445
 
        # ensuring content that we already have shouldn't cause any more data
446
 
        # to be extracted
447
 
        cur_len = len(block._content)
448
 
        block._ensure_content(cur_len - 10)
449
 
        self.assertEqual(cur_len, len(block._content))
450
 
        # Now we want a bit more content
451
 
        cur_len += 10
452
 
        block._ensure_content(cur_len)
453
 
        self.assertTrue(len(block._content) >= cur_len)
454
 
        self.assertTrue(len(block._content) < 158634)
455
 
        self.assertEqualDiff(content[:len(block._content)], block._content)
456
 
        # And now lets finish
457
 
        block._ensure_content()
458
 
        self.assertEqualDiff(content, block._content)
459
 
        # And the decompressor is finalized
460
 
        self.assertIs(None, block._z_content_decompressor)
461
 
 
462
 
    def test__dump(self):
463
 
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
464
 
        key_to_text = {('1',): dup_content + '1 unique\n',
465
 
                       ('2',): dup_content + '2 extra special\n'}
466
 
        locs, block = self.make_block(key_to_text)
467
 
        self.assertEqual([('f', len(key_to_text[('1',)])),
468
 
                          ('d', 21, len(key_to_text[('2',)]),
469
 
                           [('c', 2, len(dup_content)),
470
 
                            ('i', len('2 extra special\n'), '')
471
 
                           ]),
472
 
                         ], block._dump())
473
 
 
474
 
 
475
 
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
476
 
 
477
 
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
478
 
                     dir='.', inconsistency_fatal=True):
479
 
        t = self.get_transport(dir)
480
 
        t.ensure_base()
481
 
        vf = groupcompress.make_pack_factory(graph=create_graph,
482
 
            delta=False, keylength=keylength,
483
 
            inconsistency_fatal=inconsistency_fatal)(t)
484
 
        if do_cleanup:
485
 
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
486
 
        return vf
487
 
 
488
 
 
489
 
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
490
 
 
491
 
    def make_g_index(self, name, ref_lists=0, nodes=[]):
492
 
        builder = btree_index.BTreeBuilder(ref_lists)
493
 
        for node, references, value in nodes:
494
 
            builder.add_node(node, references, value)
495
 
        stream = builder.finish()
496
 
        trans = self.get_transport()
497
 
        size = trans.put_file(name, stream)
498
 
        return btree_index.BTreeGraphIndex(trans, name, size)
499
 
 
500
 
    def make_g_index_missing_parent(self):
501
 
        graph_index = self.make_g_index('missing_parent', 1,
502
 
            [(('parent', ), '2 78 2 10', ([],)),
503
 
             (('tip', ), '2 78 2 10',
504
 
              ([('parent', ), ('missing-parent', )],)),
505
 
              ])
506
 
        return graph_index
507
 
 
508
 
    def test_get_record_stream_as_requested(self):
509
 
        # Consider promoting 'as-requested' to general availability, and
510
 
        # make this a VF interface test
511
 
        vf = self.make_test_vf(False, dir='source')
512
 
        vf.add_lines(('a',), (), ['lines\n'])
513
 
        vf.add_lines(('b',), (), ['lines\n'])
514
 
        vf.add_lines(('c',), (), ['lines\n'])
515
 
        vf.add_lines(('d',), (), ['lines\n'])
516
 
        vf.writer.end()
517
 
        keys = [record.key for record in vf.get_record_stream(
518
 
                    [('a',), ('b',), ('c',), ('d',)],
519
 
                    'as-requested', False)]
520
 
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
521
 
        keys = [record.key for record in vf.get_record_stream(
522
 
                    [('b',), ('a',), ('d',), ('c',)],
523
 
                    'as-requested', False)]
524
 
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
525
 
 
526
 
        # It should work even after being repacked into another VF
527
 
        vf2 = self.make_test_vf(False, dir='target')
528
 
        vf2.insert_record_stream(vf.get_record_stream(
529
 
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
530
 
        vf2.writer.end()
531
 
 
532
 
        keys = [record.key for record in vf2.get_record_stream(
533
 
                    [('a',), ('b',), ('c',), ('d',)],
534
 
                    'as-requested', False)]
535
 
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
536
 
        keys = [record.key for record in vf2.get_record_stream(
537
 
                    [('b',), ('a',), ('d',), ('c',)],
538
 
                    'as-requested', False)]
539
 
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
540
 
 
541
 
    def test_insert_record_stream_reuses_blocks(self):
542
 
        vf = self.make_test_vf(True, dir='source')
543
 
        def grouped_stream(revision_ids, first_parents=()):
544
 
            parents = first_parents
545
 
            for revision_id in revision_ids:
546
 
                key = (revision_id,)
547
 
                record = versionedfile.FulltextContentFactory(
548
 
                    key, parents, None,
549
 
                    'some content that is\n'
550
 
                    'identical except for\n'
551
 
                    'revision_id:%s\n' % (revision_id,))
552
 
                yield record
553
 
                parents = (key,)
554
 
        # One group, a-d
555
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
556
 
        # Second group, e-h
557
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
558
 
                                               first_parents=(('d',),)))
559
 
        block_bytes = {}
560
 
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
561
 
                                      'unordered', False)
562
 
        num_records = 0
563
 
        for record in stream:
564
 
            if record.key in [('a',), ('e',)]:
565
 
                self.assertEqual('groupcompress-block', record.storage_kind)
566
 
            else:
567
 
                self.assertEqual('groupcompress-block-ref',
568
 
                                 record.storage_kind)
569
 
            block_bytes[record.key] = record._manager._block._z_content
570
 
            num_records += 1
571
 
        self.assertEqual(8, num_records)
572
 
        for r in 'abcd':
573
 
            key = (r,)
574
 
            self.assertIs(block_bytes[key], block_bytes[('a',)])
575
 
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
576
 
        for r in 'efgh':
577
 
            key = (r,)
578
 
            self.assertIs(block_bytes[key], block_bytes[('e',)])
579
 
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
580
 
        # Now copy the blocks into another vf, and ensure that the blocks are
581
 
        # preserved without creating new entries
582
 
        vf2 = self.make_test_vf(True, dir='target')
583
 
        # ordering in 'groupcompress' order, should actually swap the groups in
584
 
        # the target vf, but the groups themselves should not be disturbed.
585
 
        def small_size_stream():
586
 
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
587
 
                                               'groupcompress', False):
588
 
                record._manager._full_enough_block_size = \
589
 
                    record._manager._block._content_length
590
 
                yield record
591
 
                        
592
 
        vf2.insert_record_stream(small_size_stream())
593
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
594
 
                                       'groupcompress', False)
595
 
        vf2.writer.end()
596
 
        num_records = 0
597
 
        for record in stream:
598
 
            num_records += 1
599
 
            self.assertEqual(block_bytes[record.key],
600
 
                             record._manager._block._z_content)
601
 
        self.assertEqual(8, num_records)
602
 
 
603
 
    def test_insert_record_stream_packs_on_the_fly(self):
604
 
        vf = self.make_test_vf(True, dir='source')
605
 
        def grouped_stream(revision_ids, first_parents=()):
606
 
            parents = first_parents
607
 
            for revision_id in revision_ids:
608
 
                key = (revision_id,)
609
 
                record = versionedfile.FulltextContentFactory(
610
 
                    key, parents, None,
611
 
                    'some content that is\n'
612
 
                    'identical except for\n'
613
 
                    'revision_id:%s\n' % (revision_id,))
614
 
                yield record
615
 
                parents = (key,)
616
 
        # One group, a-d
617
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
618
 
        # Second group, e-h
619
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
620
 
                                               first_parents=(('d',),)))
621
 
        # Now copy the blocks into another vf, and see that the
622
 
        # insert_record_stream rebuilt a new block on-the-fly because of
623
 
        # under-utilization
624
 
        vf2 = self.make_test_vf(True, dir='target')
625
 
        vf2.insert_record_stream(vf.get_record_stream(
626
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
627
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
628
 
                                       'groupcompress', False)
629
 
        vf2.writer.end()
630
 
        num_records = 0
631
 
        # All of the records should be recombined into a single block
632
 
        block = None
633
 
        for record in stream:
634
 
            num_records += 1
635
 
            if block is None:
636
 
                block = record._manager._block
637
 
            else:
638
 
                self.assertIs(block, record._manager._block)
639
 
        self.assertEqual(8, num_records)
640
 
 
641
 
    def test__insert_record_stream_no_reuse_block(self):
642
 
        vf = self.make_test_vf(True, dir='source')
643
 
        def grouped_stream(revision_ids, first_parents=()):
644
 
            parents = first_parents
645
 
            for revision_id in revision_ids:
646
 
                key = (revision_id,)
647
 
                record = versionedfile.FulltextContentFactory(
648
 
                    key, parents, None,
649
 
                    'some content that is\n'
650
 
                    'identical except for\n'
651
 
                    'revision_id:%s\n' % (revision_id,))
652
 
                yield record
653
 
                parents = (key,)
654
 
        # One group, a-d
655
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
656
 
        # Second group, e-h
657
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
658
 
                                               first_parents=(('d',),)))
659
 
        vf.writer.end()
660
 
        self.assertEqual(8, len(list(vf.get_record_stream(
661
 
                                        [(r,) for r in 'abcdefgh'],
662
 
                                        'unordered', False))))
663
 
        # Now copy the blocks into another vf, and ensure that the blocks are
664
 
        # preserved without creating new entries
665
 
        vf2 = self.make_test_vf(True, dir='target')
666
 
        # ordering in 'groupcompress' order, should actually swap the groups in
667
 
        # the target vf, but the groups themselves should not be disturbed.
668
 
        list(vf2._insert_record_stream(vf.get_record_stream(
669
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
670
 
            reuse_blocks=False))
671
 
        vf2.writer.end()
672
 
        # After inserting with reuse_blocks=False, we should have everything in
673
 
        # a single new block.
674
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
675
 
                                       'groupcompress', False)
676
 
        block = None
677
 
        for record in stream:
678
 
            if block is None:
679
 
                block = record._manager._block
680
 
            else:
681
 
                self.assertIs(block, record._manager._block)
682
 
 
683
 
    def test_add_missing_noncompression_parent_unvalidated_index(self):
684
 
        unvalidated = self.make_g_index_missing_parent()
685
 
        combined = _mod_index.CombinedGraphIndex([unvalidated])
686
 
        index = groupcompress._GCGraphIndex(combined,
687
 
            is_locked=lambda: True, parents=True,
688
 
            track_external_parent_refs=True)
689
 
        index.scan_unvalidated_index(unvalidated)
690
 
        self.assertEqual(
691
 
            frozenset([('missing-parent',)]), index.get_missing_parents())
692
 
 
693
 
    def test_track_external_parent_refs(self):
694
 
        g_index = self.make_g_index('empty', 1, [])
695
 
        mod_index = btree_index.BTreeBuilder(1, 1)
696
 
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
697
 
        index = groupcompress._GCGraphIndex(combined,
698
 
            is_locked=lambda: True, parents=True,
699
 
            add_callback=mod_index.add_nodes,
700
 
            track_external_parent_refs=True)
701
 
        index.add_records([
702
 
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
703
 
        self.assertEqual(
704
 
            frozenset([('parent-1',), ('parent-2',)]),
705
 
            index.get_missing_parents())
706
 
 
707
 
    def make_source_with_b(self, a_parent, path):
708
 
        source = self.make_test_vf(True, dir=path)
709
 
        source.add_lines(('a',), (), ['lines\n'])
710
 
        if a_parent:
711
 
            b_parents = (('a',),)
712
 
        else:
713
 
            b_parents = ()
714
 
        source.add_lines(('b',), b_parents, ['lines\n'])
715
 
        return source
716
 
 
717
 
    def do_inconsistent_inserts(self, inconsistency_fatal):
718
 
        target = self.make_test_vf(True, dir='target',
719
 
                                   inconsistency_fatal=inconsistency_fatal)
720
 
        for x in range(2):
721
 
            source = self.make_source_with_b(x==1, 'source%s' % x)
722
 
            target.insert_record_stream(source.get_record_stream(
723
 
                [('b',)], 'unordered', False))
724
 
 
725
 
    def test_inconsistent_redundant_inserts_warn(self):
726
 
        """Should not insert a record that is already present."""
727
 
        warnings = []
728
 
        def warning(template, args):
729
 
            warnings.append(template % args)
730
 
        _trace_warning = trace.warning
731
 
        trace.warning = warning
732
 
        try:
733
 
            self.do_inconsistent_inserts(inconsistency_fatal=False)
734
 
        finally:
735
 
            trace.warning = _trace_warning
736
 
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
737
 
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
738
 
                         warnings)
739
 
 
740
 
    def test_inconsistent_redundant_inserts_raises(self):
741
 
        e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
742
 
                              inconsistency_fatal=True)
743
 
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
744
 
                              " in add_records:"
745
 
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
746
 
                              " 0 8', \(\(\('a',\),\),\)\)")
747
 
 
748
 
 
749
 
class StubGCVF(object):
750
 
    def __init__(self, canned_get_blocks=None):
751
 
        self._group_cache = {}
752
 
        self._canned_get_blocks = canned_get_blocks or []
753
 
    def _get_blocks(self, read_memos):
754
 
        return iter(self._canned_get_blocks)
755
 
    
756
 
 
757
 
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
758
 
    """Simple whitebox unit tests for _BatchingBlockFetcher."""
759
 
    
760
 
    def test_add_key_new_read_memo(self):
761
 
        """Adding a key with an uncached read_memo new to this batch adds that
762
 
        read_memo to the list of memos to fetch.
763
 
        """
764
 
        # locations are: index_memo, ignored, parents, ignored
765
 
        # where index_memo is: (idx, offset, len, factory_start, factory_end)
766
 
        # and (idx, offset, size) is known as the 'read_memo', identifying the
767
 
        # raw bytes needed.
768
 
        read_memo = ('fake index', 100, 50)
769
 
        locations = {
770
 
            ('key',): (read_memo + (None, None), None, None, None)}
771
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
772
 
        total_size = batcher.add_key(('key',))
773
 
        self.assertEqual(50, total_size)
774
 
        self.assertEqual([('key',)], batcher.keys)
775
 
        self.assertEqual([read_memo], batcher.memos_to_get)
776
 
 
777
 
    def test_add_key_duplicate_read_memo(self):
778
 
        """read_memos that occur multiple times in a batch will only be fetched
779
 
        once.
780
 
        """
781
 
        read_memo = ('fake index', 100, 50)
782
 
        # Two keys, both sharing the same read memo (but different overall
783
 
        # index_memos).
784
 
        locations = {
785
 
            ('key1',): (read_memo + (0, 1), None, None, None),
786
 
            ('key2',): (read_memo + (1, 2), None, None, None)}
787
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
788
 
        total_size = batcher.add_key(('key1',))
789
 
        total_size = batcher.add_key(('key2',))
790
 
        self.assertEqual(50, total_size)
791
 
        self.assertEqual([('key1',), ('key2',)], batcher.keys)
792
 
        self.assertEqual([read_memo], batcher.memos_to_get)
793
 
 
794
 
    def test_add_key_cached_read_memo(self):
795
 
        """Adding a key with a cached read_memo will not cause that read_memo
796
 
        to be added to the list to fetch.
797
 
        """
798
 
        read_memo = ('fake index', 100, 50)
799
 
        gcvf = StubGCVF()
800
 
        gcvf._group_cache[read_memo] = 'fake block'
801
 
        locations = {
802
 
            ('key',): (read_memo + (None, None), None, None, None)}
803
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
804
 
        total_size = batcher.add_key(('key',))
805
 
        self.assertEqual(0, total_size)
806
 
        self.assertEqual([('key',)], batcher.keys)
807
 
        self.assertEqual([], batcher.memos_to_get)
808
 
 
809
 
    def test_yield_factories_empty(self):
810
 
        """An empty batch yields no factories."""
811
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
812
 
        self.assertEqual([], list(batcher.yield_factories()))
813
 
 
814
 
    def test_yield_factories_calls_get_blocks(self):
815
 
        """Uncached memos are retrieved via get_blocks."""
816
 
        read_memo1 = ('fake index', 100, 50)
817
 
        read_memo2 = ('fake index', 150, 40)
818
 
        gcvf = StubGCVF(
819
 
            canned_get_blocks=[
820
 
                (read_memo1, groupcompress.GroupCompressBlock()),
821
 
                (read_memo2, groupcompress.GroupCompressBlock())])
822
 
        locations = {
823
 
            ('key1',): (read_memo1 + (None, None), None, None, None),
824
 
            ('key2',): (read_memo2 + (None, None), None, None, None)}
825
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
826
 
        batcher.add_key(('key1',))
827
 
        batcher.add_key(('key2',))
828
 
        factories = list(batcher.yield_factories(full_flush=True))
829
 
        self.assertLength(2, factories)
830
 
        keys = [f.key for f in factories]
831
 
        kinds = [f.storage_kind for f in factories]
832
 
        self.assertEqual([('key1',), ('key2',)], keys)
833
 
        self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
834
 
 
835
 
    def test_yield_factories_flushing(self):
836
 
        """yield_factories holds back on yielding results from the final block
837
 
        unless passed full_flush=True.
838
 
        """
839
 
        fake_block = groupcompress.GroupCompressBlock()
840
 
        read_memo = ('fake index', 100, 50)
841
 
        gcvf = StubGCVF()
842
 
        gcvf._group_cache[read_memo] = fake_block
843
 
        locations = {
844
 
            ('key',): (read_memo + (None, None), None, None, None)}
845
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
846
 
        batcher.add_key(('key',))
847
 
        self.assertEqual([], list(batcher.yield_factories()))
848
 
        factories = list(batcher.yield_factories(full_flush=True))
849
 
        self.assertLength(1, factories)
850
 
        self.assertEqual(('key',), factories[0].key)
851
 
        self.assertEqual('groupcompress-block', factories[0].storage_kind)
852
 
 
853
 
 
854
 
class TestLazyGroupCompress(tests.TestCaseWithTransport):
855
 
 
856
 
    _texts = {
857
 
        ('key1',): "this is a text\n"
858
 
                   "with a reasonable amount of compressible bytes\n"
859
 
                   "which can be shared between various other texts\n",
860
 
        ('key2',): "another text\n"
861
 
                   "with a reasonable amount of compressible bytes\n"
862
 
                   "which can be shared between various other texts\n",
863
 
        ('key3',): "yet another text which won't be extracted\n"
864
 
                   "with a reasonable amount of compressible bytes\n"
865
 
                   "which can be shared between various other texts\n",
866
 
        ('key4',): "this will be extracted\n"
867
 
                   "but references most of its bytes from\n"
868
 
                   "yet another text which won't be extracted\n"
869
 
                   "with a reasonable amount of compressible bytes\n"
870
 
                   "which can be shared between various other texts\n",
871
 
    }
872
 
    def make_block(self, key_to_text):
873
 
        """Create a GroupCompressBlock, filling it with the given texts."""
874
 
        compressor = groupcompress.GroupCompressor()
875
 
        start = 0
876
 
        for key in sorted(key_to_text):
877
 
            compressor.compress(key, key_to_text[key], None)
878
 
        locs = dict((key, (start, end)) for key, (start, _, end, _)
879
 
                    in compressor.labels_deltas.iteritems())
880
 
        block = compressor.flush()
881
 
        raw_bytes = block.to_bytes()
882
 
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
883
 
 
884
 
    def add_key_to_manager(self, key, locations, block, manager):
885
 
        start, end = locations[key]
886
 
        manager.add_factory(key, (), start, end)
887
 
 
888
 
    def make_block_and_full_manager(self, texts):
889
 
        locations, block = self.make_block(texts)
890
 
        manager = groupcompress._LazyGroupContentManager(block)
891
 
        for key in sorted(texts):
892
 
            self.add_key_to_manager(key, locations, block, manager)
893
 
        return block, manager
894
 
 
895
 
    def test_get_fulltexts(self):
896
 
        locations, block = self.make_block(self._texts)
897
 
        manager = groupcompress._LazyGroupContentManager(block)
898
 
        self.add_key_to_manager(('key1',), locations, block, manager)
899
 
        self.add_key_to_manager(('key2',), locations, block, manager)
900
 
        result_order = []
901
 
        for record in manager.get_record_stream():
902
 
            result_order.append(record.key)
903
 
            text = self._texts[record.key]
904
 
            self.assertEqual(text, record.get_bytes_as('fulltext'))
905
 
        self.assertEqual([('key1',), ('key2',)], result_order)
906
 
 
907
 
        # If we build the manager in the opposite order, we should get them
908
 
        # back in the opposite order
909
 
        manager = groupcompress._LazyGroupContentManager(block)
910
 
        self.add_key_to_manager(('key2',), locations, block, manager)
911
 
        self.add_key_to_manager(('key1',), locations, block, manager)
912
 
        result_order = []
913
 
        for record in manager.get_record_stream():
914
 
            result_order.append(record.key)
915
 
            text = self._texts[record.key]
916
 
            self.assertEqual(text, record.get_bytes_as('fulltext'))
917
 
        self.assertEqual([('key2',), ('key1',)], result_order)
918
 
 
919
 
    def test__wire_bytes_no_keys(self):
920
 
        locations, block = self.make_block(self._texts)
921
 
        manager = groupcompress._LazyGroupContentManager(block)
922
 
        wire_bytes = manager._wire_bytes()
923
 
        block_length = len(block.to_bytes())
924
 
        # We should have triggered a strip, since we aren't using any content
925
 
        stripped_block = manager._block.to_bytes()
926
 
        self.assertTrue(block_length > len(stripped_block))
927
 
        empty_z_header = zlib.compress('')
928
 
        self.assertEqual('groupcompress-block\n'
929
 
                         '8\n' # len(compress(''))
930
 
                         '0\n' # len('')
931
 
                         '%d\n'# compressed block len
932
 
                         '%s'  # zheader
933
 
                         '%s'  # block
934
 
                         % (len(stripped_block), empty_z_header,
935
 
                            stripped_block),
936
 
                         wire_bytes)
937
 
 
938
 
    def test__wire_bytes(self):
939
 
        locations, block = self.make_block(self._texts)
940
 
        manager = groupcompress._LazyGroupContentManager(block)
941
 
        self.add_key_to_manager(('key1',), locations, block, manager)
942
 
        self.add_key_to_manager(('key4',), locations, block, manager)
943
 
        block_bytes = block.to_bytes()
944
 
        wire_bytes = manager._wire_bytes()
945
 
        (storage_kind, z_header_len, header_len,
946
 
         block_len, rest) = wire_bytes.split('\n', 4)
947
 
        z_header_len = int(z_header_len)
948
 
        header_len = int(header_len)
949
 
        block_len = int(block_len)
950
 
        self.assertEqual('groupcompress-block', storage_kind)
951
 
        self.assertEqual(34, z_header_len)
952
 
        self.assertEqual(26, header_len)
953
 
        self.assertEqual(len(block_bytes), block_len)
954
 
        z_header = rest[:z_header_len]
955
 
        header = zlib.decompress(z_header)
956
 
        self.assertEqual(header_len, len(header))
957
 
        entry1 = locations[('key1',)]
958
 
        entry4 = locations[('key4',)]
959
 
        self.assertEqualDiff('key1\n'
960
 
                             '\n'  # no parents
961
 
                             '%d\n' # start offset
962
 
                             '%d\n' # end offset
963
 
                             'key4\n'
964
 
                             '\n'
965
 
                             '%d\n'
966
 
                             '%d\n'
967
 
                             % (entry1[0], entry1[1],
968
 
                                entry4[0], entry4[1]),
969
 
                            header)
970
 
        z_block = rest[z_header_len:]
971
 
        self.assertEqual(block_bytes, z_block)
972
 
 
973
 
    def test_from_bytes(self):
974
 
        locations, block = self.make_block(self._texts)
975
 
        manager = groupcompress._LazyGroupContentManager(block)
976
 
        self.add_key_to_manager(('key1',), locations, block, manager)
977
 
        self.add_key_to_manager(('key4',), locations, block, manager)
978
 
        wire_bytes = manager._wire_bytes()
979
 
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
980
 
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
981
 
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
982
 
        self.assertEqual(2, len(manager._factories))
983
 
        self.assertEqual(block._z_content, manager._block._z_content)
984
 
        result_order = []
985
 
        for record in manager.get_record_stream():
986
 
            result_order.append(record.key)
987
 
            text = self._texts[record.key]
988
 
            self.assertEqual(text, record.get_bytes_as('fulltext'))
989
 
        self.assertEqual([('key1',), ('key4',)], result_order)
990
 
 
991
 
    def test__check_rebuild_no_changes(self):
992
 
        block, manager = self.make_block_and_full_manager(self._texts)
993
 
        manager._check_rebuild_block()
994
 
        self.assertIs(block, manager._block)
995
 
 
996
 
    def test__check_rebuild_only_one(self):
997
 
        locations, block = self.make_block(self._texts)
998
 
        manager = groupcompress._LazyGroupContentManager(block)
999
 
        # Request just the first key, which should trigger a 'strip' action
1000
 
        self.add_key_to_manager(('key1',), locations, block, manager)
1001
 
        manager._check_rebuild_block()
1002
 
        self.assertIsNot(block, manager._block)
1003
 
        self.assertTrue(block._content_length > manager._block._content_length)
1004
 
        # We should be able to still get the content out of this block, though
1005
 
        # it should only have 1 entry
1006
 
        for record in manager.get_record_stream():
1007
 
            self.assertEqual(('key1',), record.key)
1008
 
            self.assertEqual(self._texts[record.key],
1009
 
                             record.get_bytes_as('fulltext'))
1010
 
 
1011
 
    def test__check_rebuild_middle(self):
1012
 
        locations, block = self.make_block(self._texts)
1013
 
        manager = groupcompress._LazyGroupContentManager(block)
1014
 
        # Request a small key in the middle should trigger a 'rebuild'
1015
 
        self.add_key_to_manager(('key4',), locations, block, manager)
1016
 
        manager._check_rebuild_block()
1017
 
        self.assertIsNot(block, manager._block)
1018
 
        self.assertTrue(block._content_length > manager._block._content_length)
1019
 
        for record in manager.get_record_stream():
1020
 
            self.assertEqual(('key4',), record.key)
1021
 
            self.assertEqual(self._texts[record.key],
1022
 
                             record.get_bytes_as('fulltext'))
1023
 
 
1024
 
    def test_check_is_well_utilized_all_keys(self):
1025
 
        block, manager = self.make_block_and_full_manager(self._texts)
1026
 
        self.assertFalse(manager.check_is_well_utilized())
1027
 
        # Though we can fake it by changing the recommended minimum size
1028
 
        manager._full_enough_block_size = block._content_length
1029
 
        self.assertTrue(manager.check_is_well_utilized())
1030
 
        # Setting it just above causes it to fail
1031
 
        manager._full_enough_block_size = block._content_length + 1
1032
 
        self.assertFalse(manager.check_is_well_utilized())
1033
 
        # Setting the mixed-block size doesn't do anything, because the content
1034
 
        # is considered to not be 'mixed'
1035
 
        manager._full_enough_mixed_block_size = block._content_length
1036
 
        self.assertFalse(manager.check_is_well_utilized())
1037
 
 
1038
 
    def test_check_is_well_utilized_mixed_keys(self):
1039
 
        texts = {}
1040
 
        f1k1 = ('f1', 'k1')
1041
 
        f1k2 = ('f1', 'k2')
1042
 
        f2k1 = ('f2', 'k1')
1043
 
        f2k2 = ('f2', 'k2')
1044
 
        texts[f1k1] = self._texts[('key1',)]
1045
 
        texts[f1k2] = self._texts[('key2',)]
1046
 
        texts[f2k1] = self._texts[('key3',)]
1047
 
        texts[f2k2] = self._texts[('key4',)]
1048
 
        block, manager = self.make_block_and_full_manager(texts)
1049
 
        self.assertFalse(manager.check_is_well_utilized())
1050
 
        manager._full_enough_block_size = block._content_length
1051
 
        self.assertTrue(manager.check_is_well_utilized())
1052
 
        manager._full_enough_block_size = block._content_length + 1
1053
 
        self.assertFalse(manager.check_is_well_utilized())
1054
 
        manager._full_enough_mixed_block_size = block._content_length
1055
 
        self.assertTrue(manager.check_is_well_utilized())
1056
 
 
1057
 
    def test_check_is_well_utilized_partial_use(self):
1058
 
        locations, block = self.make_block(self._texts)
1059
 
        manager = groupcompress._LazyGroupContentManager(block)
1060
 
        manager._full_enough_block_size = block._content_length
1061
 
        self.add_key_to_manager(('key1',), locations, block, manager)
1062
 
        self.add_key_to_manager(('key2',), locations, block, manager)
1063
 
        # Just using the content from key1 and 2 is not enough to be considered
1064
 
        # 'complete'
1065
 
        self.assertFalse(manager.check_is_well_utilized())
1066
 
        # However if we add key3, then we have enough, as we only require 75%
1067
 
        # consumption
1068
 
        self.add_key_to_manager(('key4',), locations, block, manager)
1069
 
        self.assertTrue(manager.check_is_well_utilized())