~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

Merge the _LazyGroupContentManager, et al.

This allows us to stream GroupCompressBlocks in their compressed form, and unpack them
during insert, rather than during get().

Show diffs side-by-side

added added

removed removed

Lines of Context:
20
20
 
21
21
from bzrlib import (
22
22
    groupcompress,
 
23
    errors,
 
24
    osutils,
23
25
    tests,
 
26
    versionedfile,
24
27
    )
25
28
from bzrlib.osutils import sha_string
26
29
from bzrlib.tests import (
29
32
    )
30
33
 
31
34
 
32
 
 
33
 
 
34
35
class TestGroupCompressor(tests.TestCase):
35
36
    """Tests for GroupCompressor"""
36
37
 
37
38
    def test_empty_delta(self):
38
 
        compressor = groupcompress.GroupCompressor(True)
 
39
        compressor = groupcompress.GroupCompressor()
39
40
        self.assertEqual([], compressor.lines)
40
41
 
41
42
    def test_one_nosha_delta(self):
42
43
        # diff against NUKK
43
 
        compressor = groupcompress.GroupCompressor(True)
 
44
        compressor = groupcompress.GroupCompressor()
44
45
        sha1, end_point, _, _ = compressor.compress(('label',),
45
46
            'strange\ncommon\n', None)
46
47
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
66
67
                             self._chunks_to_repr_lines(actual))
67
68
 
68
69
    def test_two_nosha_delta(self):
69
 
        compressor = groupcompress.GroupCompressor(True)
 
70
        compressor = groupcompress.GroupCompressor()
70
71
        sha1_1, _, _, _ = compressor.compress(('label',),
71
72
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
72
73
        expected_lines = list(compressor.lines)
91
92
    def test_three_nosha_delta(self):
92
93
        # The first interesting test: make a change that should use lines from
93
94
        # both parents.
94
 
        compressor = groupcompress.GroupCompressor(True)
 
95
        compressor = groupcompress.GroupCompressor()
95
96
        sha1_1, end_point, _, _ = compressor.compress(('label',),
96
97
            'strange\ncommon very very long line\nwith some extra text\n', None)
97
98
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
121
122
        self.assertEqual(sum(map(len, expected_lines)), end_point)
122
123
 
123
124
    def test_stats(self):
124
 
        compressor = groupcompress.GroupCompressor(True)
 
125
        compressor = groupcompress.GroupCompressor()
125
126
        compressor.compress(('label',), 'strange\ncommon long line\n'
126
127
                                        'plus more text\n', None)
127
128
        compressor.compress(('newlabel',),
135
136
    def test_extract_from_compressor(self):
136
137
        # Knit fetching will try to reconstruct texts locally which results in
137
138
        # reading something that is in the compressor stream already.
138
 
        compressor = groupcompress.GroupCompressor(True)
 
139
        compressor = groupcompress.GroupCompressor()
139
140
        sha1_1, _, _, _ = compressor.compress(('label',),
140
141
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
141
142
        expected_lines = list(compressor.lines)
187
188
 
188
189
class TestGroupCompressBlock(tests.TestCase):
189
190
 
 
191
    def make_block(self, key_to_text):
 
192
        """Create a GroupCompressBlock, filling it with the given texts."""
 
193
        compressor = groupcompress.GroupCompressor()
 
194
        start = 0
 
195
        for key in sorted(key_to_text):
 
196
            compressor.compress(key, key_to_text[key], None)
 
197
        block = compressor.flush()
 
198
        entries = block._entries
 
199
        # Go through from_bytes(to_bytes()) so that we start with a compressed
 
200
        # content object
 
201
        return entries, groupcompress.GroupCompressBlock.from_bytes(
 
202
            block.to_bytes())
 
203
 
190
204
    def test_from_empty_bytes(self):
191
205
        self.assertRaises(ValueError,
192
206
                          groupcompress.GroupCompressBlock.from_bytes, '')
193
207
 
194
208
    def test_from_minimal_bytes(self):
195
 
        block = groupcompress.GroupCompressBlock.from_bytes('gcb1z\n0\n0\n')
 
209
        block = groupcompress.GroupCompressBlock.from_bytes(
 
210
            'gcb1z\n0\n0\n0\n0\n')
196
211
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
197
212
        self.assertEqual({}, block._entries)
 
213
        self.assertIs(None, block._content)
 
214
        self.assertEqual('', block._z_content)
 
215
        block._ensure_content()
 
216
        self.assertEqual('', block._content)
 
217
        self.assertEqual('', block._z_content)
 
218
        block._ensure_content() # Ensure content is safe to call 2x
198
219
 
199
 
    def test_from_bytes(self):
200
 
        z_header_bytes = (
201
 
            'gcb1z\n' # group compress block v1 plain
202
 
            '76\n' # Length of zlib bytes
203
 
            '183\n' # Length of all meta-info
204
 
            + zlib.compress(
205
 
            'key:bing\n'
 
220
    def test_from_bytes_with_labels(self):
 
221
        header = ('key:bing\n'
206
222
            'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
207
223
            'type:fulltext\n'
208
224
            'start:100\n'
213
229
            'type:fulltext\n'
214
230
            'start:0\n'
215
231
            'length:100\n'
216
 
            '\n'))
 
232
            '\n')
 
233
        z_header = zlib.compress(header)
 
234
        content = ('a tiny bit of content\n')
 
235
        z_content = zlib.compress(content)
 
236
        z_bytes = (
 
237
            'gcb1z\n' # group compress block v1 plain
 
238
            '%d\n' # Length of zlib bytes
 
239
            '%d\n' # Length of all meta-info
 
240
            '%d\n' # Length of compressed content
 
241
            '%d\n' # Length of uncompressed content
 
242
            '%s'   # Compressed header
 
243
            '%s'   # Compressed content
 
244
            ) % (len(z_header), len(header),
 
245
                 len(z_content), len(content),
 
246
                 z_header, z_content)
217
247
        block = groupcompress.GroupCompressBlock.from_bytes(
218
 
            z_header_bytes)
219
 
        self.assertIs(None, block._content)
 
248
            z_bytes)
 
249
        block._parse_header()
220
250
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
221
251
        self.assertEqual([('bing',), ('foo', 'bar')], sorted(block._entries))
222
252
        bing = block._entries[('bing',)]
231
261
        self.assertEqual('abcd'*10, foobar.sha1)
232
262
        self.assertEqual(0, foobar.start)
233
263
        self.assertEqual(100, foobar.length)
 
264
        self.assertEqual(z_content, block._z_content)
 
265
        self.assertIs(None, block._content)
 
266
        block._ensure_content()
 
267
        self.assertEqual(z_content, block._z_content)
 
268
        self.assertEqual(content, block._content)
 
269
 
 
270
    def test_from_old_bytes(self):
 
271
        # Backwards compatibility, with groups that didn't define content length
 
272
        content = ('a tiny bit of content\n')
 
273
        z_content = zlib.compress(content)
 
274
        z_bytes = (
 
275
            'gcb1z\n' # group compress block v1 plain
 
276
            '0\n' # Length of zlib bytes
 
277
            '0\n' # Length of all meta-info
 
278
            ''    # Compressed header
 
279
            '%s'   # Compressed content
 
280
            ) % (z_content)
 
281
        block = groupcompress.GroupCompressBlock.from_bytes(
 
282
            z_bytes)
 
283
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
 
284
        block._ensure_content()
 
285
        self.assertEqual(z_content, block._z_content)
 
286
        self.assertEqual(content, block._content)
234
287
 
235
288
    def test_add_entry(self):
236
289
        gcb = groupcompress.GroupCompressBlock()
243
296
        self.assertEqual(100, e.length)
244
297
 
245
298
    def test_to_bytes(self):
 
299
        no_labels = groupcompress._NO_LABELS
 
300
        def reset():
 
301
            groupcompress._NO_LABELS = no_labels
 
302
        self.addCleanup(reset)
 
303
        groupcompress._NO_LABELS = False
246
304
        gcb = groupcompress.GroupCompressBlock()
247
305
        gcb.add_entry(('foo', 'bar'), 'fulltext', 'abcd'*10, 0, 100)
248
306
        gcb.add_entry(('bing',), 'fulltext', 'abcd'*10, 100, 100)
 
307
        gcb.set_content('this is some content\n'
 
308
                        'this content will be compressed\n')
249
309
        bytes = gcb.to_bytes()
250
 
        self.assertStartsWith(bytes,
251
 
                              'gcb1z\n' # group compress block v1 zlib
252
 
                              '76\n' # Length of compressed bytes
253
 
                              '183\n' # Length of all meta-info
254
 
                             )
255
 
        remaining_bytes = bytes[13:]
 
310
        expected_header =('gcb1z\n' # group compress block v1 zlib
 
311
                          '76\n' # Length of compressed bytes
 
312
                          '183\n' # Length of uncompressed meta-info
 
313
                          '50\n' # Length of compressed content
 
314
                          '53\n' # Length of uncompressed content
 
315
                         )
 
316
        self.assertStartsWith(bytes, expected_header)
 
317
        remaining_bytes = bytes[len(expected_header):]
256
318
        raw_bytes = zlib.decompress(remaining_bytes)
257
319
        self.assertEqualDiff('key:bing\n'
258
320
                             'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
266
328
                             'start:0\n'
267
329
                             'length:100\n'
268
330
                             '\n', raw_bytes)
 
331
 
 
332
    def test_extract_no_end(self):
 
333
        # We should be able to extract a record, even if we only know the start
 
334
        # of the bytes.
 
335
        texts = {
 
336
            ('key1',): 'text for key1\nhas bytes that are common\n',
 
337
            ('key2',): 'text for key2\nhas bytes that are common\n',
 
338
        }
 
339
        entries, block = self.make_block(texts)
 
340
        self.assertEqualDiff('text for key1\nhas bytes that are common\n',
 
341
                             block.extract(('key1',), entries[('key1',)].start,
 
342
                                           end=None)[1])
 
343
        self.assertEqualDiff('text for key2\nhas bytes that are common\n',
 
344
                             block.extract(('key2',), entries[('key2',)].start,
 
345
                                           end=None)[1])
 
346
 
 
347
    def test_partial_decomp(self):
 
348
        content_chunks = []
 
349
        # We need a sufficient amount of data so that zlib.decompress has
 
350
        # partial decompression to work with. Most auto-generated data
 
351
        # compresses a bit too well, we want a combination, so we combine a sha
 
352
        # hash with compressible data.
 
353
        for i in xrange(2048):
 
354
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
355
            content_chunks.append(next_content)
 
356
            next_sha1 = osutils.sha_string(next_content)
 
357
            content_chunks.append(next_sha1 + '\n')
 
358
        content = ''.join(content_chunks)
 
359
        self.assertEqual(158634, len(content))
 
360
        z_content = zlib.compress(content)
 
361
        self.assertEqual(57182, len(z_content))
 
362
        block = groupcompress.GroupCompressBlock()
 
363
        block._z_content = z_content
 
364
        block._z_content_length = len(z_content)
 
365
        block._compressor_name = 'zlib'
 
366
        block._content_length = 158634
 
367
        self.assertIs(None, block._content)
 
368
        block._ensure_content(100)
 
369
        self.assertIsNot(None, block._content)
 
370
        # We have decompressed at least 100 bytes
 
371
        self.assertTrue(len(block._content) >= 100)
 
372
        # We have not decompressed the whole content
 
373
        self.assertTrue(len(block._content) < 158634)
 
374
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
375
        # ensuring content that we already have shouldn't cause any more data
 
376
        # to be extracted
 
377
        cur_len = len(block._content)
 
378
        block._ensure_content(cur_len - 10)
 
379
        self.assertEqual(cur_len, len(block._content))
 
380
        # Now we want a bit more content
 
381
        cur_len += 10
 
382
        block._ensure_content(cur_len)
 
383
        self.assertTrue(len(block._content) >= cur_len)
 
384
        self.assertTrue(len(block._content) < 158634)
 
385
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
386
        # And now lets finish
 
387
        block._ensure_content(158634)
 
388
        self.assertEqualDiff(content, block._content)
 
389
        # And the decompressor is finalized
 
390
        self.assertIs(None, block._z_content_decompressor)
 
391
 
 
392
    def test_partial_decomp_no_known_length(self):
 
393
        content_chunks = []
 
394
        for i in xrange(2048):
 
395
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
 
396
            content_chunks.append(next_content)
 
397
            next_sha1 = osutils.sha_string(next_content)
 
398
            content_chunks.append(next_sha1 + '\n')
 
399
        content = ''.join(content_chunks)
 
400
        self.assertEqual(158634, len(content))
 
401
        z_content = zlib.compress(content)
 
402
        self.assertEqual(57182, len(z_content))
 
403
        block = groupcompress.GroupCompressBlock()
 
404
        block._z_content = z_content
 
405
        block._z_content_length = len(z_content)
 
406
        block._compressor_name = 'zlib'
 
407
        block._content_length = None # Don't tell the decompressed length
 
408
        self.assertIs(None, block._content)
 
409
        block._ensure_content(100)
 
410
        self.assertIsNot(None, block._content)
 
411
        # We have decompressed at least 100 bytes
 
412
        self.assertTrue(len(block._content) >= 100)
 
413
        # We have not decompressed the whole content
 
414
        self.assertTrue(len(block._content) < 158634)
 
415
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
416
        # ensuring content that we already have shouldn't cause any more data
 
417
        # to be extracted
 
418
        cur_len = len(block._content)
 
419
        block._ensure_content(cur_len - 10)
 
420
        self.assertEqual(cur_len, len(block._content))
 
421
        # Now we want a bit more content
 
422
        cur_len += 10
 
423
        block._ensure_content(cur_len)
 
424
        self.assertTrue(len(block._content) >= cur_len)
 
425
        self.assertTrue(len(block._content) < 158634)
 
426
        self.assertEqualDiff(content[:len(block._content)], block._content)
 
427
        # And now lets finish
 
428
        block._ensure_content()
 
429
        self.assertEqualDiff(content, block._content)
 
430
        # And the decompressor is finalized
 
431
        self.assertIs(None, block._z_content_decompressor)
 
432
 
 
433
 
 
434
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
 
435
 
 
436
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
 
437
                     dir='.'):
 
438
        t = self.get_transport(dir)
 
439
        t.ensure_base()
 
440
        vf = groupcompress.make_pack_factory(graph=create_graph,
 
441
            delta=False, keylength=keylength)(t)
 
442
        if do_cleanup:
 
443
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
 
444
        return vf
 
445
 
 
446
 
 
447
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
 
448
 
 
449
    def test_get_record_stream_as_requested(self):
 
450
        # Consider promoting 'as-requested' to general availability, and
 
451
        # make this a VF interface test
 
452
        vf = self.make_test_vf(False, dir='source')
 
453
        vf.add_lines(('a',), (), ['lines\n'])
 
454
        vf.add_lines(('b',), (), ['lines\n'])
 
455
        vf.add_lines(('c',), (), ['lines\n'])
 
456
        vf.add_lines(('d',), (), ['lines\n'])
 
457
        vf.writer.end()
 
458
        keys = [record.key for record in vf.get_record_stream(
 
459
                    [('a',), ('b',), ('c',), ('d',)],
 
460
                    'as-requested', False)]
 
461
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
462
        keys = [record.key for record in vf.get_record_stream(
 
463
                    [('b',), ('a',), ('d',), ('c',)],
 
464
                    'as-requested', False)]
 
465
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
466
 
 
467
        # It should work even after being repacked into another VF
 
468
        vf2 = self.make_test_vf(False, dir='target')
 
469
        vf2.insert_record_stream(vf.get_record_stream(
 
470
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
 
471
        vf2.writer.end()
 
472
 
 
473
        keys = [record.key for record in vf2.get_record_stream(
 
474
                    [('a',), ('b',), ('c',), ('d',)],
 
475
                    'as-requested', False)]
 
476
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
 
477
        keys = [record.key for record in vf2.get_record_stream(
 
478
                    [('b',), ('a',), ('d',), ('c',)],
 
479
                    'as-requested', False)]
 
480
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
 
481
 
 
482
    def test_insert_record_stream_re_uses_blocks(self):
 
483
        vf = self.make_test_vf(True, dir='source')
 
484
        def grouped_stream(revision_ids, first_parents=()):
 
485
            parents = first_parents
 
486
            for revision_id in revision_ids:
 
487
                key = (revision_id,)
 
488
                record = versionedfile.FulltextContentFactory(
 
489
                    key, parents, None,
 
490
                    'some content that is\n'
 
491
                    'identical except for\n'
 
492
                    'revision_id:%s\n' % (revision_id,))
 
493
                yield record
 
494
                parents = (key,)
 
495
        # One group, a-d
 
496
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
497
        # Second group, e-h
 
498
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
499
                                               first_parents=(('d',),)))
 
500
        block_bytes = {}
 
501
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
 
502
                                      'unordered', False)
 
503
        num_records = 0
 
504
        for record in stream:
 
505
            if record.key in [('a',), ('e',)]:
 
506
                self.assertEqual('groupcompress-block', record.storage_kind)
 
507
            else:
 
508
                self.assertEqual('groupcompress-block-ref',
 
509
                                 record.storage_kind)
 
510
            block_bytes[record.key] = record._manager._block._z_content
 
511
            num_records += 1
 
512
        self.assertEqual(8, num_records)
 
513
        for r in 'abcd':
 
514
            key = (r,)
 
515
            self.assertIs(block_bytes[key], block_bytes[('a',)])
 
516
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
 
517
        for r in 'efgh':
 
518
            key = (r,)
 
519
            self.assertIs(block_bytes[key], block_bytes[('e',)])
 
520
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
 
521
        # Now copy the blocks into another vf, and ensure that the blocks are
 
522
        # preserved without creating new entries
 
523
        vf2 = self.make_test_vf(True, dir='target')
 
524
        # ordering in 'groupcompress' order, should actually swap the groups in
 
525
        # the target vf, but the groups themselves should not be disturbed.
 
526
        vf2.insert_record_stream(vf.get_record_stream(
 
527
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
 
528
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
529
                                       'groupcompress', False)
 
530
        vf2.writer.end()
 
531
        num_records = 0
 
532
        for record in stream:
 
533
            num_records += 1
 
534
            self.assertEqual(block_bytes[record.key],
 
535
                             record._manager._block._z_content)
 
536
        self.assertEqual(8, num_records)
 
537
 
 
538
    def test__insert_record_stream_no_reuse_block(self):
 
539
        vf = self.make_test_vf(True, dir='source')
 
540
        def grouped_stream(revision_ids, first_parents=()):
 
541
            parents = first_parents
 
542
            for revision_id in revision_ids:
 
543
                key = (revision_id,)
 
544
                record = versionedfile.FulltextContentFactory(
 
545
                    key, parents, None,
 
546
                    'some content that is\n'
 
547
                    'identical except for\n'
 
548
                    'revision_id:%s\n' % (revision_id,))
 
549
                yield record
 
550
                parents = (key,)
 
551
        # One group, a-d
 
552
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
 
553
        # Second group, e-h
 
554
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
 
555
                                               first_parents=(('d',),)))
 
556
        vf.writer.end()
 
557
        self.assertEqual(8, len(list(vf.get_record_stream(
 
558
                                        [(r,) for r in 'abcdefgh'],
 
559
                                        'unordered', False))))
 
560
        # Now copy the blocks into another vf, and ensure that the blocks are
 
561
        # preserved without creating new entries
 
562
        vf2 = self.make_test_vf(True, dir='target')
 
563
        # ordering in 'groupcompress' order, should actually swap the groups in
 
564
        # the target vf, but the groups themselves should not be disturbed.
 
565
        list(vf2._insert_record_stream(vf.get_record_stream(
 
566
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
 
567
            reuse_blocks=False))
 
568
        vf2.writer.end()
 
569
        # After inserting with reuse_blocks=False, we should have everything in
 
570
        # a single new block.
 
571
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
 
572
                                       'groupcompress', False)
 
573
        block = None
 
574
        for record in stream:
 
575
            if block is None:
 
576
                block = record._manager._block
 
577
            else:
 
578
                self.assertIs(block, record._manager._block)
 
579
 
 
580
 
 
581
class TestLazyGroupCompress(tests.TestCaseWithTransport):
 
582
 
 
583
    _texts = {
 
584
        ('key1',): "this is a text\n"
 
585
                   "with a reasonable amount of compressible bytes\n",
 
586
        ('key2',): "another text\n"
 
587
                   "with a reasonable amount of compressible bytes\n",
 
588
        ('key3',): "yet another text which won't be extracted\n"
 
589
                   "with a reasonable amount of compressible bytes\n",
 
590
        ('key4',): "this will be extracted\n"
 
591
                   "but references bytes from\n"
 
592
                   "yet another text which won't be extracted\n"
 
593
                   "with a reasonable amount of compressible bytes\n",
 
594
    }
 
595
    def make_block(self, key_to_text):
 
596
        """Create a GroupCompressBlock, filling it with the given texts."""
 
597
        compressor = groupcompress.GroupCompressor()
 
598
        start = 0
 
599
        for key in sorted(key_to_text):
 
600
            compressor.compress(key, key_to_text[key], None)
 
601
        block = compressor.flush()
 
602
        entries = block._entries
 
603
        raw_bytes = block.to_bytes()
 
604
        return entries, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
 
605
 
 
606
    def add_key_to_manager(self, key, entries, block, manager):
 
607
        entry = entries[key]
 
608
        manager.add_factory(entry.key, (), entry.start, entry.end)
 
609
 
 
610
    def test_get_fulltexts(self):
 
611
        entries, block = self.make_block(self._texts)
 
612
        manager = groupcompress._LazyGroupContentManager(block)
 
613
        self.add_key_to_manager(('key1',), entries, block, manager)
 
614
        self.add_key_to_manager(('key2',), entries, block, manager)
 
615
        result_order = []
 
616
        for record in manager.get_record_stream():
 
617
            result_order.append(record.key)
 
618
            text = self._texts[record.key]
 
619
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
620
        self.assertEqual([('key1',), ('key2',)], result_order)
 
621
 
 
622
        # If we build the manager in the opposite order, we should get them
 
623
        # back in the opposite order
 
624
        manager = groupcompress._LazyGroupContentManager(block)
 
625
        self.add_key_to_manager(('key2',), entries, block, manager)
 
626
        self.add_key_to_manager(('key1',), entries, block, manager)
 
627
        result_order = []
 
628
        for record in manager.get_record_stream():
 
629
            result_order.append(record.key)
 
630
            text = self._texts[record.key]
 
631
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
632
        self.assertEqual([('key2',), ('key1',)], result_order)
 
633
 
 
634
    def test__wire_bytes_no_keys(self):
 
635
        entries, block = self.make_block(self._texts)
 
636
        manager = groupcompress._LazyGroupContentManager(block)
 
637
        wire_bytes = manager._wire_bytes()
 
638
        block_length = len(block.to_bytes())
 
639
        # We should have triggered a strip, since we aren't using any content
 
640
        stripped_block = manager._block.to_bytes()
 
641
        self.assertTrue(block_length > len(stripped_block))
 
642
        empty_z_header = zlib.compress('')
 
643
        self.assertEqual('groupcompress-block\n'
 
644
                         '8\n' # len(compress(''))
 
645
                         '0\n' # len('')
 
646
                         '%d\n'# compressed block len
 
647
                         '%s'  # zheader
 
648
                         '%s'  # block
 
649
                         % (len(stripped_block), empty_z_header,
 
650
                            stripped_block),
 
651
                         wire_bytes)
 
652
 
 
653
    def test__wire_bytes(self):
 
654
        entries, block = self.make_block(self._texts)
 
655
        manager = groupcompress._LazyGroupContentManager(block)
 
656
        self.add_key_to_manager(('key1',), entries, block, manager)
 
657
        self.add_key_to_manager(('key4',), entries, block, manager)
 
658
        block_bytes = block.to_bytes()
 
659
        wire_bytes = manager._wire_bytes()
 
660
        (storage_kind, z_header_len, header_len,
 
661
         block_len, rest) = wire_bytes.split('\n', 4)
 
662
        z_header_len = int(z_header_len)
 
663
        header_len = int(header_len)
 
664
        block_len = int(block_len)
 
665
        self.assertEqual('groupcompress-block', storage_kind)
 
666
        self.assertEqual(33, z_header_len)
 
667
        self.assertEqual(25, header_len)
 
668
        self.assertEqual(len(block_bytes), block_len)
 
669
        z_header = rest[:z_header_len]
 
670
        header = zlib.decompress(z_header)
 
671
        self.assertEqual(header_len, len(header))
 
672
        entry1 = entries[('key1',)]
 
673
        entry4 = entries[('key4',)]
 
674
        self.assertEqualDiff('key1\n'
 
675
                             '\n'  # no parents
 
676
                             '%d\n' # start offset
 
677
                             '%d\n' # end byte
 
678
                             'key4\n'
 
679
                             '\n'
 
680
                             '%d\n'
 
681
                             '%d\n'
 
682
                             % (entry1.start, entry1.end,
 
683
                                entry4.start, entry4.end),
 
684
                            header)
 
685
        z_block = rest[z_header_len:]
 
686
        self.assertEqual(block_bytes, z_block)
 
687
 
 
688
    def test_from_bytes(self):
 
689
        entries, block = self.make_block(self._texts)
 
690
        manager = groupcompress._LazyGroupContentManager(block)
 
691
        self.add_key_to_manager(('key1',), entries, block, manager)
 
692
        self.add_key_to_manager(('key4',), entries, block, manager)
 
693
        wire_bytes = manager._wire_bytes()
 
694
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
 
695
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
 
696
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
 
697
        self.assertEqual(2, len(manager._factories))
 
698
        self.assertEqual(block._z_content, manager._block._z_content)
 
699
        result_order = []
 
700
        for record in manager.get_record_stream():
 
701
            result_order.append(record.key)
 
702
            text = self._texts[record.key]
 
703
            self.assertEqual(text, record.get_bytes_as('fulltext'))
 
704
        self.assertEqual([('key1',), ('key4',)], result_order)
 
705
 
 
706
    def test__check_rebuild_no_changes(self):
 
707
        entries, block = self.make_block(self._texts)
 
708
        manager = groupcompress._LazyGroupContentManager(block)
 
709
        # Request all the keys, which ensures that we won't rebuild
 
710
        self.add_key_to_manager(('key1',), entries, block, manager)
 
711
        self.add_key_to_manager(('key2',), entries, block, manager)
 
712
        self.add_key_to_manager(('key3',), entries, block, manager)
 
713
        self.add_key_to_manager(('key4',), entries, block, manager)
 
714
        manager._check_rebuild_block()
 
715
        self.assertIs(block, manager._block)
 
716
 
 
717
    def test__check_rebuild_only_one(self):
 
718
        entries, block = self.make_block(self._texts)
 
719
        manager = groupcompress._LazyGroupContentManager(block)
 
720
        # Request just the first key, which should trigger a 'strip' action
 
721
        self.add_key_to_manager(('key1',), entries, block, manager)
 
722
        manager._check_rebuild_block()
 
723
        self.assertIsNot(block, manager._block)
 
724
        self.assertTrue(block._content_length > manager._block._content_length)
 
725
        # We should be able to still get the content out of this block, though
 
726
        # it should only have 1 entry
 
727
        for record in manager.get_record_stream():
 
728
            self.assertEqual(('key1',), record.key)
 
729
            self.assertEqual(self._texts[record.key],
 
730
                             record.get_bytes_as('fulltext'))
 
731
 
 
732
    def test__check_rebuild_middle(self):
 
733
        entries, block = self.make_block(self._texts)
 
734
        manager = groupcompress._LazyGroupContentManager(block)
 
735
        # Request a small key in the middle should trigger a 'rebuild'
 
736
        self.add_key_to_manager(('key4',), entries, block, manager)
 
737
        manager._check_rebuild_block()
 
738
        self.assertIsNot(block, manager._block)
 
739
        self.assertTrue(block._content_length > manager._block._content_length)
 
740
        for record in manager.get_record_stream():
 
741
            self.assertEqual(('key4',), record.key)
 
742
            self.assertEqual(self._texts[record.key],
 
743
                             record.get_bytes_as('fulltext'))