~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

Committer: Martin Pool
Date: 2009-09-14 01:48:28 UTC
mfrom: (4685 +trunk)
mto: This revision was merged to the branch mainline in revision 4688.
Revision ID: mbp@sourcefrog.net-20090914014828-ydr9rlkdfq2sv57z

Merge news

files added:
bzrlib/crash.py

bzrlib/tests/features.py

bzrlib/tests/per_repository/test_merge_directive.py

bzrlib/tests/per_workingtree/test_check.py

bzrlib/tests/test_crash.py

bzrlib/tests/test_lock.py

doc/developers/apport.txt

doc/developers/check.txt

doc/developers/content-filtering.txt

files removed:
doc/en/migration

doc/en/migration/index.txt

files renamed:
bzrlib/tests/test_pack_repository.py => bzrlib/tests/per_pack_repository.py

bzrlib/tests/test_versionedfile.py => bzrlib/tests/per_versionedfile.py

files modified:
.bzrignore

Makefile

NEWS

bzrlib/__init__.py

bzrlib/_btree_serializer_pyx.pyx

bzrlib/_dirstate_helpers_pyx.pyx

bzrlib/_known_graph_py.py

bzrlib/_known_graph_pyx.pyx

bzrlib/annotate.py

bzrlib/branch.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/builtins.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/chk_map.py

bzrlib/chk_serializer.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/counted_lock.py

bzrlib/diff-delta.c

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/errors.py

bzrlib/export/dir_exporter.py

bzrlib/fetch.py

bzrlib/graph.py

bzrlib/groupcompress.py

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/debug-flags.txt

bzrlib/hooks.py

bzrlib/index.py

bzrlib/inventory.py

bzrlib/inventory_delta.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/merge.py

bzrlib/missing.py

bzrlib/mutabletree.py

bzrlib/plugin.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/remote.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/send.py

bzrlib/serializer.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/http_utils.py

bzrlib/tests/per_branch/__init__.py

bzrlib/tests/per_branch/test_check.py

bzrlib/tests/per_branch/test_permissions.py

bzrlib/tests/per_branch/test_push.py

bzrlib/tests/per_branch/test_stacking.py

bzrlib/tests/per_bzrdir/test_bzrdir.py

bzrlib/tests/per_interbranch/test_push.py

bzrlib/tests/per_interrepository/__init__.py

bzrlib/tests/per_interrepository/test_fetch.py

bzrlib/tests/per_intertree/test_compare.py

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_chk/__init__.py

bzrlib/tests/per_repository_chk/test_supported.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/per_tree/__init__.py

bzrlib/tests/per_tree/test_path_content_summary.py

bzrlib/tests/per_workingtree/__init__.py

bzrlib/tests/per_workingtree/test_basis_inventory.py

bzrlib/tests/per_workingtree/test_commit.py

bzrlib/tests/per_workingtree/test_content_filters.py

bzrlib/tests/per_workingtree/test_executable.py

bzrlib/tests/per_workingtree/test_flush.py

bzrlib/tests/per_workingtree/test_locking.py

bzrlib/tests/per_workingtree/test_parents.py

bzrlib/tests/per_workingtree/test_set_root_id.py

bzrlib/tests/per_workingtree/test_workingtree.py

bzrlib/tests/test__known_graph.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_export.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_index.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_inventory_delta.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_xml.py

bzrlib/trace.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/ui/text.py

bzrlib/version.py

bzrlib/versionedfile.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml5.py

doc/_templates/index.html

doc/contents.txt

doc/developers/cycle.txt

doc/developers/index.txt

doc/developers/inventory.txt

doc/developers/plugin-api.txt

doc/developers/releasing.txt

doc/developers/testing.txt

doc/en/developer-guide/HACKING.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/writing_a_plugin.txt

doc/index.txt

setup.py

tools/win32/build_release.py

tools/win32/buildout-templates/bin/build-installer.bat.in

tools/win32/buildout.cfg

tools/win32/bzr.iss.cog

Show diffs side-by-side

added added

removed removed

bzrlib/tests/test_groupcompress.py

538

'as-requested', False)]

539

self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)

540

541

def test_insert_record_stream_re_uses_blocks(self):

541

def test_insert_record_stream_reuses_blocks(self):

542

vf = self.make_test_vf(True, dir='source')

543

def grouped_stream(revision_ids, first_parents=()):

544

parents = first_parents

582

vf2 = self.make_test_vf(True, dir='target')

583

# ordering in 'groupcompress' order, should actually swap the groups in

584

# the target vf, but the groups themselves should not be disturbed.

585

vf2.insert_record_stream(vf.get_record_stream(

586

[(r,) for r in 'abcdefgh'], 'groupcompress', False))

585

def small_size_stream():

586

for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],

587

'groupcompress', False):

588

record._manager._full_enough_block_size = \

589

record._manager._block._content_length

590

yield record

591

592

vf2.insert_record_stream(small_size_stream())

587

593

stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],

588

594

'groupcompress', False)

589

595

vf2.writer.end()

594

600

record._manager._block._z_content)

595

601

self.assertEqual(8, num_records)

596

602

603

def test_insert_record_stream_packs_on_the_fly(self):

604

vf = self.make_test_vf(True, dir='source')

605

def grouped_stream(revision_ids, first_parents=()):

606

parents = first_parents

607

for revision_id in revision_ids:

608

key = (revision_id,)

609

record = versionedfile.FulltextContentFactory(

610

key, parents, None,

611

'some content that is\n'

612

'identical except for\n'

613

'revision_id:%s\n' % (revision_id,))

614

yield record

615

parents = (key,)

616

# One group, a-d

617

vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))

618

# Second group, e-h

619

vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],

620

first_parents=(('d',),)))

621

# Now copy the blocks into another vf, and see that the

622

# insert_record_stream rebuilt a new block on-the-fly because of

623

# under-utilization

624

vf2 = self.make_test_vf(True, dir='target')

625

vf2.insert_record_stream(vf.get_record_stream(

626

[(r,) for r in 'abcdefgh'], 'groupcompress', False))

627

stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],

628

'groupcompress', False)

629

vf2.writer.end()

630

num_records = 0

631

# All of the records should be recombined into a single block

632

block = None

633

for record in stream:

634

num_records += 1

635

if block is None:

636

block = record._manager._block

637

else:

638

self.assertIs(block, record._manager._block)

639

self.assertEqual(8, num_records)

640

597

641

def test__insert_record_stream_no_reuse_block(self):

598

642

vf = self.make_test_vf(True, dir='source')

599

643

def grouped_stream(revision_ids, first_parents=()):

702

746

" 0 8', \(\(\('a',\),\),\)\)")

703

747

704

748

749

class StubGCVF(object):

750

def __init__(self, canned_get_blocks=None):

751

self._group_cache = {}

752

self._canned_get_blocks = canned_get_blocks or []

753

def _get_blocks(self, read_memos):

754

return iter(self._canned_get_blocks)

755

756

757

class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):

758

"""Simple whitebox unit tests for _BatchingBlockFetcher."""

759

760

def test_add_key_new_read_memo(self):

761

"""Adding a key with an uncached read_memo new to this batch adds that

762

read_memo to the list of memos to fetch.

763

"""

764

# locations are: index_memo, ignored, parents, ignored

765

# where index_memo is: (idx, offset, len, factory_start, factory_end)

766

# and (idx, offset, size) is known as the 'read_memo', identifying the

767

# raw bytes needed.

768

read_memo = ('fake index', 100, 50)

769

locations = {

770

('key',): (read_memo + (None, None), None, None, None)}

771

batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)

772

total_size = batcher.add_key(('key',))

773

self.assertEqual(50, total_size)

774

self.assertEqual([('key',)], batcher.keys)

775

self.assertEqual([read_memo], batcher.memos_to_get)

776

777

def test_add_key_duplicate_read_memo(self):

778

"""read_memos that occur multiple times in a batch will only be fetched

779

once.

780

"""

781

read_memo = ('fake index', 100, 50)

782

# Two keys, both sharing the same read memo (but different overall

783

# index_memos).

784

locations = {

785

('key1',): (read_memo + (0, 1), None, None, None),

786

('key2',): (read_memo + (1, 2), None, None, None)}

787

batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)

788

total_size = batcher.add_key(('key1',))

789

total_size = batcher.add_key(('key2',))

790

self.assertEqual(50, total_size)

791

self.assertEqual([('key1',), ('key2',)], batcher.keys)

792

self.assertEqual([read_memo], batcher.memos_to_get)

793

794

def test_add_key_cached_read_memo(self):

795

"""Adding a key with a cached read_memo will not cause that read_memo

796

to be added to the list to fetch.

797

"""

798

read_memo = ('fake index', 100, 50)

799

gcvf = StubGCVF()

800

gcvf._group_cache[read_memo] = 'fake block'

801

locations = {

802

('key',): (read_memo + (None, None), None, None, None)}

803

batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)

804

total_size = batcher.add_key(('key',))

805

self.assertEqual(0, total_size)

806

self.assertEqual([('key',)], batcher.keys)

807

self.assertEqual([], batcher.memos_to_get)

808

809

def test_yield_factories_empty(self):

810

"""An empty batch yields no factories."""

811

batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})

812

self.assertEqual([], list(batcher.yield_factories()))

813

814

def test_yield_factories_calls_get_blocks(self):

815

"""Uncached memos are retrieved via get_blocks."""

816

read_memo1 = ('fake index', 100, 50)

817

read_memo2 = ('fake index', 150, 40)

818

gcvf = StubGCVF(

819

canned_get_blocks=[

820

(read_memo1, groupcompress.GroupCompressBlock()),

821

(read_memo2, groupcompress.GroupCompressBlock())])

822

locations = {

823

('key1',): (read_memo1 + (None, None), None, None, None),

824

('key2',): (read_memo2 + (None, None), None, None, None)}

825

batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)

826

batcher.add_key(('key1',))

827

batcher.add_key(('key2',))

828

factories = list(batcher.yield_factories(full_flush=True))

829

self.assertLength(2, factories)

830

keys = [f.key for f in factories]

831

kinds = [f.storage_kind for f in factories]

832

self.assertEqual([('key1',), ('key2',)], keys)

833

self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)

834

835

def test_yield_factories_flushing(self):

836

"""yield_factories holds back on yielding results from the final block

837

unless passed full_flush=True.

838

"""

839

fake_block = groupcompress.GroupCompressBlock()

840

read_memo = ('fake index', 100, 50)

841

gcvf = StubGCVF()

842

gcvf._group_cache[read_memo] = fake_block

843

locations = {

844

('key',): (read_memo + (None, None), None, None, None)}

845

batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)

846

batcher.add_key(('key',))

847

self.assertEqual([], list(batcher.yield_factories()))

848

factories = list(batcher.yield_factories(full_flush=True))

849

self.assertLength(1, factories)

850

self.assertEqual(('key',), factories[0].key)

851

self.assertEqual('groupcompress-block', factories[0].storage_kind)

852

853

705

854

class TestLazyGroupCompress(tests.TestCaseWithTransport):

706

855

707

856

_texts = {

708

857

('key1',): "this is a text\n"

709

"with a reasonable amount of compressible bytes\n",

858

"with a reasonable amount of compressible bytes\n"

859

"which can be shared between various other texts\n",

710

860

('key2',): "another text\n"

711

"with a reasonable amount of compressible bytes\n",

861

"with a reasonable amount of compressible bytes\n"

862

"which can be shared between various other texts\n",

712

863

('key3',): "yet another text which won't be extracted\n"

713

"with a reasonable amount of compressible bytes\n",

864

"with a reasonable amount of compressible bytes\n"

865

"which can be shared between various other texts\n",

714

866

('key4',): "this will be extracted\n"

715

867

"but references most of its bytes from\n"

716

868

"yet another text which won't be extracted\n"

717

"with a reasonable amount of compressible bytes\n",

869

"with a reasonable amount of compressible bytes\n"

870

"which can be shared between various other texts\n",

718

871

}

719

872

def make_block(self, key_to_text):

720

873

"""Create a GroupCompressBlock, filling it with the given texts."""

732

885

start, end = locations[key]

733

886

manager.add_factory(key, (), start, end)

734

887

888

def make_block_and_full_manager(self, texts):

889

locations, block = self.make_block(texts)

890

manager = groupcompress._LazyGroupContentManager(block)

891

for key in sorted(texts):

892

self.add_key_to_manager(key, locations, block, manager)

893

return block, manager

894

735

895

def test_get_fulltexts(self):

736

896

locations, block = self.make_block(self._texts)

737

897

manager = groupcompress._LazyGroupContentManager(block)

788

948

header_len = int(header_len)

789

949

block_len = int(block_len)

790

950

self.assertEqual('groupcompress-block', storage_kind)

791

self.assertEqual(33, z_header_len)

792

self.assertEqual(25, header_len)

951

self.assertEqual(34, z_header_len)

952

self.assertEqual(26, header_len)

793

953

self.assertEqual(len(block_bytes), block_len)

794

954

z_header = rest[:z_header_len]

795

955

header = zlib.decompress(z_header)

829

989

self.assertEqual([('key1',), ('key4',)], result_order)

830

990

831

991

def test__check_rebuild_no_changes(self):

832

locations, block = self.make_block(self._texts)

833

manager = groupcompress._LazyGroupContentManager(block)

834

# Request all the keys, which ensures that we won't rebuild

835

self.add_key_to_manager(('key1',), locations, block, manager)

836

self.add_key_to_manager(('key2',), locations, block, manager)

837

self.add_key_to_manager(('key3',), locations, block, manager)

838

self.add_key_to_manager(('key4',), locations, block, manager)

992

block, manager = self.make_block_and_full_manager(self._texts)

839

993

manager._check_rebuild_block()

840

994

self.assertIs(block, manager._block)

841

995

866

1020

self.assertEqual(('key4',), record.key)

867

1021

self.assertEqual(self._texts[record.key],

868

1022

record.get_bytes_as('fulltext'))

1023

1024

def test_check_is_well_utilized_all_keys(self):

1025

block, manager = self.make_block_and_full_manager(self._texts)

1026

self.assertFalse(manager.check_is_well_utilized())

1027

# Though we can fake it by changing the recommended minimum size

1028

manager._full_enough_block_size = block._content_length

1029

self.assertTrue(manager.check_is_well_utilized())

1030

# Setting it just above causes it to fail

1031

manager._full_enough_block_size = block._content_length + 1

1032

self.assertFalse(manager.check_is_well_utilized())

1033

# Setting the mixed-block size doesn't do anything, because the content

1034

# is considered to not be 'mixed'

1035

manager._full_enough_mixed_block_size = block._content_length

1036

self.assertFalse(manager.check_is_well_utilized())

1037

1038

def test_check_is_well_utilized_mixed_keys(self):

1039

texts = {}

1040

f1k1 = ('f1', 'k1')

1041

f1k2 = ('f1', 'k2')

1042

f2k1 = ('f2', 'k1')

1043

f2k2 = ('f2', 'k2')

1044

texts[f1k1] = self._texts[('key1',)]

1045

texts[f1k2] = self._texts[('key2',)]

1046

texts[f2k1] = self._texts[('key3',)]

1047

texts[f2k2] = self._texts[('key4',)]

1048

block, manager = self.make_block_and_full_manager(texts)

1049

self.assertFalse(manager.check_is_well_utilized())

1050

manager._full_enough_block_size = block._content_length

1051

self.assertTrue(manager.check_is_well_utilized())

1052

manager._full_enough_block_size = block._content_length + 1

1053

self.assertFalse(manager.check_is_well_utilized())

1054

manager._full_enough_mixed_block_size = block._content_length

1055

self.assertTrue(manager.check_is_well_utilized())

1056

1057

def test_check_is_well_utilized_partial_use(self):

1058

locations, block = self.make_block(self._texts)

1059

manager = groupcompress._LazyGroupContentManager(block)

1060

manager._full_enough_block_size = block._content_length

1061

self.add_key_to_manager(('key1',), locations, block, manager)

1062

self.add_key_to_manager(('key2',), locations, block, manager)

1063

# Just using the content from key1 and 2 is not enough to be considered

1064

# 'complete'

1065

self.assertFalse(manager.check_is_well_utilized())

1066

# However if we add key3, then we have enough, as we only require 75%

1067

# consumption

1068

self.add_key_to_manager(('key4',), locations, block, manager)

1069

self.assertTrue(manager.check_is_well_utilized())

Older »