332
def test_extract_no_end(self):
333
# We should be able to extract a record, even if we only know the start
336
('key1',): 'text for key1\nhas bytes that are common\n',
337
('key2',): 'text for key2\nhas bytes that are common\n',
339
entries, block = self.make_block(texts)
340
self.assertEqualDiff('text for key1\nhas bytes that are common\n',
341
block.extract(('key1',), entries[('key1',)].start,
343
self.assertEqualDiff('text for key2\nhas bytes that are common\n',
344
block.extract(('key2',), entries[('key2',)].start,
347
def test_partial_decomp(self):
349
# We need a sufficient amount of data so that zlib.decompress has
350
# partial decompression to work with. Most auto-generated data
351
# compresses a bit too well, we want a combination, so we combine a sha
352
# hash with compressible data.
353
for i in xrange(2048):
354
next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
355
content_chunks.append(next_content)
356
next_sha1 = osutils.sha_string(next_content)
357
content_chunks.append(next_sha1 + '\n')
358
content = ''.join(content_chunks)
359
self.assertEqual(158634, len(content))
360
z_content = zlib.compress(content)
361
self.assertEqual(57182, len(z_content))
362
block = groupcompress.GroupCompressBlock()
363
block._z_content = z_content
364
block._z_content_length = len(z_content)
365
block._compressor_name = 'zlib'
366
block._content_length = 158634
367
self.assertIs(None, block._content)
368
block._ensure_content(100)
369
self.assertIsNot(None, block._content)
370
# We have decompressed at least 100 bytes
371
self.assertTrue(len(block._content) >= 100)
372
# We have not decompressed the whole content
373
self.assertTrue(len(block._content) < 158634)
374
self.assertEqualDiff(content[:len(block._content)], block._content)
375
# ensuring content that we already have shouldn't cause any more data
377
cur_len = len(block._content)
378
block._ensure_content(cur_len - 10)
379
self.assertEqual(cur_len, len(block._content))
380
# Now we want a bit more content
382
block._ensure_content(cur_len)
383
self.assertTrue(len(block._content) >= cur_len)
384
self.assertTrue(len(block._content) < 158634)
385
self.assertEqualDiff(content[:len(block._content)], block._content)
386
# And now lets finish
387
block._ensure_content(158634)
388
self.assertEqualDiff(content, block._content)
389
# And the decompressor is finalized
390
self.assertIs(None, block._z_content_decompressor)
392
def test_partial_decomp_no_known_length(self):
394
for i in xrange(2048):
395
next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
396
content_chunks.append(next_content)
397
next_sha1 = osutils.sha_string(next_content)
398
content_chunks.append(next_sha1 + '\n')
399
content = ''.join(content_chunks)
400
self.assertEqual(158634, len(content))
401
z_content = zlib.compress(content)
402
self.assertEqual(57182, len(z_content))
403
block = groupcompress.GroupCompressBlock()
404
block._z_content = z_content
405
block._z_content_length = len(z_content)
406
block._compressor_name = 'zlib'
407
block._content_length = None # Don't tell the decompressed length
408
self.assertIs(None, block._content)
409
block._ensure_content(100)
410
self.assertIsNot(None, block._content)
411
# We have decompressed at least 100 bytes
412
self.assertTrue(len(block._content) >= 100)
413
# We have not decompressed the whole content
414
self.assertTrue(len(block._content) < 158634)
415
self.assertEqualDiff(content[:len(block._content)], block._content)
416
# ensuring content that we already have shouldn't cause any more data
418
cur_len = len(block._content)
419
block._ensure_content(cur_len - 10)
420
self.assertEqual(cur_len, len(block._content))
421
# Now we want a bit more content
423
block._ensure_content(cur_len)
424
self.assertTrue(len(block._content) >= cur_len)
425
self.assertTrue(len(block._content) < 158634)
426
self.assertEqualDiff(content[:len(block._content)], block._content)
427
# And now lets finish
428
block._ensure_content()
429
self.assertEqualDiff(content, block._content)
430
# And the decompressor is finalized
431
self.assertIs(None, block._z_content_decompressor)
434
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
436
def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
438
t = self.get_transport(dir)
440
vf = groupcompress.make_pack_factory(graph=create_graph,
441
delta=False, keylength=keylength)(t)
443
self.addCleanup(groupcompress.cleanup_pack_group, vf)
447
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
449
def test_get_record_stream_as_requested(self):
450
# Consider promoting 'as-requested' to general availability, and
451
# make this a VF interface test
452
vf = self.make_test_vf(False, dir='source')
453
vf.add_lines(('a',), (), ['lines\n'])
454
vf.add_lines(('b',), (), ['lines\n'])
455
vf.add_lines(('c',), (), ['lines\n'])
456
vf.add_lines(('d',), (), ['lines\n'])
458
keys = [record.key for record in vf.get_record_stream(
459
[('a',), ('b',), ('c',), ('d',)],
460
'as-requested', False)]
461
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
462
keys = [record.key for record in vf.get_record_stream(
463
[('b',), ('a',), ('d',), ('c',)],
464
'as-requested', False)]
465
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
467
# It should work even after being repacked into another VF
468
vf2 = self.make_test_vf(False, dir='target')
469
vf2.insert_record_stream(vf.get_record_stream(
470
[('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
473
keys = [record.key for record in vf2.get_record_stream(
474
[('a',), ('b',), ('c',), ('d',)],
475
'as-requested', False)]
476
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
477
keys = [record.key for record in vf2.get_record_stream(
478
[('b',), ('a',), ('d',), ('c',)],
479
'as-requested', False)]
480
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
482
def test_insert_record_stream_re_uses_blocks(self):
483
vf = self.make_test_vf(True, dir='source')
484
def grouped_stream(revision_ids, first_parents=()):
485
parents = first_parents
486
for revision_id in revision_ids:
488
record = versionedfile.FulltextContentFactory(
490
'some content that is\n'
491
'identical except for\n'
492
'revision_id:%s\n' % (revision_id,))
496
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
498
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
499
first_parents=(('d',),)))
501
stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
504
for record in stream:
505
if record.key in [('a',), ('e',)]:
506
self.assertEqual('groupcompress-block', record.storage_kind)
508
self.assertEqual('groupcompress-block-ref',
510
block_bytes[record.key] = record._manager._block._z_content
512
self.assertEqual(8, num_records)
515
self.assertIs(block_bytes[key], block_bytes[('a',)])
516
self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
519
self.assertIs(block_bytes[key], block_bytes[('e',)])
520
self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
521
# Now copy the blocks into another vf, and ensure that the blocks are
522
# preserved without creating new entries
523
vf2 = self.make_test_vf(True, dir='target')
524
# ordering in 'groupcompress' order, should actually swap the groups in
525
# the target vf, but the groups themselves should not be disturbed.
526
vf2.insert_record_stream(vf.get_record_stream(
527
[(r,) for r in 'abcdefgh'], 'groupcompress', False))
528
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
529
'groupcompress', False)
532
for record in stream:
534
self.assertEqual(block_bytes[record.key],
535
record._manager._block._z_content)
536
self.assertEqual(8, num_records)
538
def test__insert_record_stream_no_reuse_block(self):
539
vf = self.make_test_vf(True, dir='source')
540
def grouped_stream(revision_ids, first_parents=()):
541
parents = first_parents
542
for revision_id in revision_ids:
544
record = versionedfile.FulltextContentFactory(
546
'some content that is\n'
547
'identical except for\n'
548
'revision_id:%s\n' % (revision_id,))
552
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
554
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
555
first_parents=(('d',),)))
557
self.assertEqual(8, len(list(vf.get_record_stream(
558
[(r,) for r in 'abcdefgh'],
559
'unordered', False))))
560
# Now copy the blocks into another vf, and ensure that the blocks are
561
# preserved without creating new entries
562
vf2 = self.make_test_vf(True, dir='target')
563
# ordering in 'groupcompress' order, should actually swap the groups in
564
# the target vf, but the groups themselves should not be disturbed.
565
list(vf2._insert_record_stream(vf.get_record_stream(
566
[(r,) for r in 'abcdefgh'], 'groupcompress', False),
569
# After inserting with reuse_blocks=False, we should have everything in
570
# a single new block.
571
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
572
'groupcompress', False)
574
for record in stream:
576
block = record._manager._block
578
self.assertIs(block, record._manager._block)
581
class TestLazyGroupCompress(tests.TestCaseWithTransport):
584
('key1',): "this is a text\n"
585
"with a reasonable amount of compressible bytes\n",
586
('key2',): "another text\n"
587
"with a reasonable amount of compressible bytes\n",
588
('key3',): "yet another text which won't be extracted\n"
589
"with a reasonable amount of compressible bytes\n",
590
('key4',): "this will be extracted\n"
591
"but references bytes from\n"
592
"yet another text which won't be extracted\n"
593
"with a reasonable amount of compressible bytes\n",
595
def make_block(self, key_to_text):
596
"""Create a GroupCompressBlock, filling it with the given texts."""
597
compressor = groupcompress.GroupCompressor()
599
for key in sorted(key_to_text):
600
compressor.compress(key, key_to_text[key], None)
601
block = compressor.flush()
602
entries = block._entries
603
raw_bytes = block.to_bytes()
604
return entries, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
606
def add_key_to_manager(self, key, entries, block, manager):
608
manager.add_factory(entry.key, (), entry.start, entry.end)
610
def test_get_fulltexts(self):
611
entries, block = self.make_block(self._texts)
612
manager = groupcompress._LazyGroupContentManager(block)
613
self.add_key_to_manager(('key1',), entries, block, manager)
614
self.add_key_to_manager(('key2',), entries, block, manager)
616
for record in manager.get_record_stream():
617
result_order.append(record.key)
618
text = self._texts[record.key]
619
self.assertEqual(text, record.get_bytes_as('fulltext'))
620
self.assertEqual([('key1',), ('key2',)], result_order)
622
# If we build the manager in the opposite order, we should get them
623
# back in the opposite order
624
manager = groupcompress._LazyGroupContentManager(block)
625
self.add_key_to_manager(('key2',), entries, block, manager)
626
self.add_key_to_manager(('key1',), entries, block, manager)
628
for record in manager.get_record_stream():
629
result_order.append(record.key)
630
text = self._texts[record.key]
631
self.assertEqual(text, record.get_bytes_as('fulltext'))
632
self.assertEqual([('key2',), ('key1',)], result_order)
634
def test__wire_bytes_no_keys(self):
635
entries, block = self.make_block(self._texts)
636
manager = groupcompress._LazyGroupContentManager(block)
637
wire_bytes = manager._wire_bytes()
638
block_length = len(block.to_bytes())
639
# We should have triggered a strip, since we aren't using any content
640
stripped_block = manager._block.to_bytes()
641
self.assertTrue(block_length > len(stripped_block))
642
empty_z_header = zlib.compress('')
643
self.assertEqual('groupcompress-block\n'
644
'8\n' # len(compress(''))
646
'%d\n'# compressed block len
649
% (len(stripped_block), empty_z_header,
653
def test__wire_bytes(self):
654
entries, block = self.make_block(self._texts)
655
manager = groupcompress._LazyGroupContentManager(block)
656
self.add_key_to_manager(('key1',), entries, block, manager)
657
self.add_key_to_manager(('key4',), entries, block, manager)
658
block_bytes = block.to_bytes()
659
wire_bytes = manager._wire_bytes()
660
(storage_kind, z_header_len, header_len,
661
block_len, rest) = wire_bytes.split('\n', 4)
662
z_header_len = int(z_header_len)
663
header_len = int(header_len)
664
block_len = int(block_len)
665
self.assertEqual('groupcompress-block', storage_kind)
666
self.assertEqual(33, z_header_len)
667
self.assertEqual(25, header_len)
668
self.assertEqual(len(block_bytes), block_len)
669
z_header = rest[:z_header_len]
670
header = zlib.decompress(z_header)
671
self.assertEqual(header_len, len(header))
672
entry1 = entries[('key1',)]
673
entry4 = entries[('key4',)]
674
self.assertEqualDiff('key1\n'
676
'%d\n' # start offset
682
% (entry1.start, entry1.end,
683
entry4.start, entry4.end),
685
z_block = rest[z_header_len:]
686
self.assertEqual(block_bytes, z_block)
688
def test_from_bytes(self):
689
entries, block = self.make_block(self._texts)
690
manager = groupcompress._LazyGroupContentManager(block)
691
self.add_key_to_manager(('key1',), entries, block, manager)
692
self.add_key_to_manager(('key4',), entries, block, manager)
693
wire_bytes = manager._wire_bytes()
694
self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
695
manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
696
self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
697
self.assertEqual(2, len(manager._factories))
698
self.assertEqual(block._z_content, manager._block._z_content)
700
for record in manager.get_record_stream():
701
result_order.append(record.key)
702
text = self._texts[record.key]
703
self.assertEqual(text, record.get_bytes_as('fulltext'))
704
self.assertEqual([('key1',), ('key4',)], result_order)
706
def test__check_rebuild_no_changes(self):
707
entries, block = self.make_block(self._texts)
708
manager = groupcompress._LazyGroupContentManager(block)
709
# Request all the keys, which ensures that we won't rebuild
710
self.add_key_to_manager(('key1',), entries, block, manager)
711
self.add_key_to_manager(('key2',), entries, block, manager)
712
self.add_key_to_manager(('key3',), entries, block, manager)
713
self.add_key_to_manager(('key4',), entries, block, manager)
714
manager._check_rebuild_block()
715
self.assertIs(block, manager._block)
717
def test__check_rebuild_only_one(self):
718
entries, block = self.make_block(self._texts)
719
manager = groupcompress._LazyGroupContentManager(block)
720
# Request just the first key, which should trigger a 'strip' action
721
self.add_key_to_manager(('key1',), entries, block, manager)
722
manager._check_rebuild_block()
723
self.assertIsNot(block, manager._block)
724
self.assertTrue(block._content_length > manager._block._content_length)
725
# We should be able to still get the content out of this block, though
726
# it should only have 1 entry
727
for record in manager.get_record_stream():
728
self.assertEqual(('key1',), record.key)
729
self.assertEqual(self._texts[record.key],
730
record.get_bytes_as('fulltext'))
732
def test__check_rebuild_middle(self):
733
entries, block = self.make_block(self._texts)
734
manager = groupcompress._LazyGroupContentManager(block)
735
# Request a small key in the middle should trigger a 'rebuild'
736
self.add_key_to_manager(('key4',), entries, block, manager)
737
manager._check_rebuild_block()
738
self.assertIsNot(block, manager._block)
739
self.assertTrue(block._content_length > manager._block._content_length)
740
for record in manager.get_record_stream():
741
self.assertEqual(('key4',), record.key)
742
self.assertEqual(self._texts[record.key],
743
record.get_bytes_as('fulltext'))