~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/repofmt/groupcompress_repo.py

Committer: Robert J. Tanner
Date: 2009-06-10 03:56:49 UTC
mfrom: (4423 +trunk)
mto: This revision was merged to the branch mainline in revision 4425.
Revision ID: tanner@real-time.com-20090610035649-7rfx4cls4550zc3c

Merge 1.15.1 back to trunk

files added:
bzrlib/_bencode_pyx.h

bzrlib/_bencode_pyx.pyx

bzrlib/_rio_py.py

bzrlib/_rio_pyx.pyx

bzrlib/benchmarks/bench_tags.py

bzrlib/bencode.py

bzrlib/send.py

bzrlib/tests/per_repository_reference/test_fetch.py

bzrlib/tests/per_repository_reference/test_initialize.py

bzrlib/tests/per_repository_reference/test_unlock.py

bzrlib/tests/test__rio.py

bzrlib/tests/test_bencode.py

bzrlib/tests/test_chk_serializer.py

bzrlib/util/bencode.py

files removed:
bzrlib/util/tests/test_bencode.py

files renamed:
bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py => bzrlib/tests/tree_implementations/test_get_file_with_stat.py

bzrlib/util/bencode.py => bzrlib/util/_bencode_py.py

files modified:
.bzrignore

Makefile

NEWS

bzrlib/__init__.py

bzrlib/_groupcompress_pyx.pyx

bzrlib/benchmarks/__init__.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/chk_serializer.py

bzrlib/commands.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/foreign.py

bzrlib/graph.py

bzrlib/groupcompress.py

bzrlib/index.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/mail_client.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/osutils.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/python-compat.h

bzrlib/reconcile.py

bzrlib/remote.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repository.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/serializer.py

bzrlib/shelf.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/server.py

bzrlib/tag.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_dpush.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_check.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_reconcile.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_iter_reverse_revision_history.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_revision.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_default_stacking.py

bzrlib/tests/test__groupcompress.py

bzrlib/tests/test_config.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_http.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_serializer.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_source.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_ui.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/sftp.py

bzrlib/tree.py

bzrlib/ui/__init__.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

doc/en/user-guide/svn_plugin.txt

setup.py

Show diffs side-by-side

added added

removed removed

bzrlib/repofmt/groupcompress_repo.py

PackRootCommitBuilder,

RepositoryPackCollection,

RepositoryFormatPack,

ResumedPack,

Packer,

)

# have a regular 2-list index giving parents and compression

# source.

index_builder_class(reference_lists=1),

# Texts: compression and per file graph, for all fileids - so two

# reference lists and two elements in the key tuple.

# Texts: per file graph, for all fileids - so one reference list

# and two elements in the key tuple.

index_builder_class(reference_lists=1, key_elements=2),

# Signatures: Just blobs to store, no compression, no parents

# listing.

163

164

have deltas based on a fallback repository.

164

165

(See <https://bugs.launchpad.net/bzr/+bug/288751>)

165

166

"""

166

# Groupcompress packs don't have any external references

167

# Groupcompress packs don't have any external references, arguably CHK

168

# pages have external references, but we cannot 'cheaply' determine

169

# them without actually walking all of the chk pages.

170

171

172

class ResumedGCPack(ResumedPack):

173

174

def _check_references(self):

175

"""Make sure our external compression parents are present."""

176

# See GCPack._check_references for why this is empty

177

178

def _get_external_refs(self, index):

179

# GC repositories don't have compression parents external to a given

180

# pack file

181

return set()

167

182

168

183

169

184

class GCCHKPacker(Packer):

540

555

class GCRepositoryPackCollection(RepositoryPackCollection):

541

556

542

557

pack_factory = GCPack

558

resumed_pack_factory = ResumedGCPack

543

559

544

560

def _already_packed(self):

545

561

"""Is the collection already packed?"""

609

625

self.revisions = GroupCompressVersionedFiles(

610

626

_GCGraphIndex(self._pack_collection.revision_index.combined_index,

611

627

add_callback=self._pack_collection.revision_index.add_callback,

612

parents=True, is_locked=self.is_locked),

628

parents=True, is_locked=self.is_locked,

629

track_external_parent_refs=True),

613

630

access=self._pack_collection.revision_index.data_access,

614

631

delta=False)

615

632

self.signatures = GroupCompressVersionedFiles(

719

736

# make it raise to trap naughty direct users.

720

737

raise NotImplementedError(self._iter_inventory_xmls)

721

738

722

def _find_revision_outside_set(self, revision_ids):

723

revision_set = frozenset(revision_ids)

724

for revid in revision_ids:

725

parent_ids = self.get_parent_map([revid]).get(revid, ())

726

for parent in parent_ids:

727

if parent in revision_set:

728

# Parent is not outside the set

729

continue

730

if parent not in self.get_parent_map([parent]):

731

# Parent is a ghost

732

continue

733

return parent

734

return _mod_revision.NULL_REVISION

739

def _find_parent_ids_of_revisions(self, revision_ids):

740

# TODO: we probably want to make this a helper that other code can get

741

# at

742

parent_map = self.get_parent_map(revision_ids)

743

parents = set()

744

map(parents.update, parent_map.itervalues())

745

parents.difference_update(revision_ids)

746

parents.discard(_mod_revision.NULL_REVISION)

747

return parents

735

748

736

def _find_file_keys_to_fetch(self, revision_ids, pb):

737

rich_root = self.supports_rich_root()

738

revision_outside_set = self._find_revision_outside_set(revision_ids)

739

if revision_outside_set == _mod_revision.NULL_REVISION:

740

uninteresting_root_keys = set()

741

else:

742

uninteresting_inv = self.get_inventory(revision_outside_set)

743

uninteresting_root_keys = set([uninteresting_inv.id_to_entry.key()])

744

interesting_root_keys = set()

745

for idx, inv in enumerate(self.iter_inventories(revision_ids)):

746

interesting_root_keys.add(inv.id_to_entry.key())

747

revision_ids = frozenset(revision_ids)

748

file_id_revisions = {}

749

bytes_to_info = inventory.CHKInventory._bytes_to_utf8name_key

750

for record, items in chk_map.iter_interesting_nodes(self.chk_bytes,

751

interesting_root_keys, uninteresting_root_keys,

752

pb=pb):

753

# This is cheating a bit to use the last grabbed 'inv', but it

754

# works

755

for name, bytes in items:

756

(name_utf8, file_id, revision_id) = bytes_to_info(bytes)

757

if not rich_root and name_utf8 == '':

758

continue

759

if revision_id in revision_ids:

760

# Would we rather build this up into file_id => revision

761

# maps?

762

try:

763

file_id_revisions[file_id].add(revision_id)

764

except KeyError:

765

file_id_revisions[file_id] = set([revision_id])

766

for file_id, revisions in file_id_revisions.iteritems():

767

yield ('file', file_id, revisions)

749

def _find_present_inventory_ids(self, revision_ids):

750

keys = [(r,) for r in revision_ids]

751

parent_map = self.inventories.get_parent_map(keys)

752

present_inventory_ids = set(k[-1] for k in parent_map)

753

return present_inventory_ids

768

754

769

755

def fileids_altered_by_revision_ids(self, revision_ids, _inv_weave=None):

770

756

"""Find the file ids and versions affected by revisions.

776

762

revision_ids. Each altered file-ids has the exact revision_ids that

777

763

altered it listed explicitly.

778

764

"""

779

rich_roots = self.supports_rich_root()

780

result = {}

765

rich_root = self.supports_rich_root()

766

bytes_to_info = inventory.CHKInventory._bytes_to_utf8name_key

767

file_id_revisions = {}

781

768

pb = ui.ui_factory.nested_progress_bar()

782

769

try:

783

total = len(revision_ids)

784

for pos, inv in enumerate(self.iter_inventories(revision_ids)):

785

pb.update("Finding text references", pos, total)

786

for entry in inv.iter_just_entries():

787

if entry.revision != inv.revision_id:

788

continue

789

if not rich_roots and entry.file_id == inv.root_id:

790

continue

791

alterations = result.setdefault(entry.file_id, set([]))

792

alterations.add(entry.revision)

793

return result

770

parent_ids = self._find_parent_ids_of_revisions(revision_ids)

771

present_parent_inv_ids = self._find_present_inventory_ids(parent_ids)

772

uninteresting_root_keys = set()

773

interesting_root_keys = set()

774

inventories_to_read = set(present_parent_inv_ids)

775

inventories_to_read.update(revision_ids)

776

for inv in self.iter_inventories(inventories_to_read):

777

entry_chk_root_key = inv.id_to_entry.key()

778

if inv.revision_id in present_parent_inv_ids:

779

uninteresting_root_keys.add(entry_chk_root_key)

780

else:

781

interesting_root_keys.add(entry_chk_root_key)

782

783

chk_bytes = self.chk_bytes

784

for record, items in chk_map.iter_interesting_nodes(chk_bytes,

785

interesting_root_keys, uninteresting_root_keys,

786

pb=pb):

787

for name, bytes in items:

788

(name_utf8, file_id, revision_id) = bytes_to_info(bytes)

789

if not rich_root and name_utf8 == '':

790

continue

791

try:

792

file_id_revisions[file_id].add(revision_id)

793

except KeyError:

794

file_id_revisions[file_id] = set([revision_id])

794

795

finally:

795

796

pb.finished()

797

return file_id_revisions

796

798

797

799

def find_text_key_references(self):

798

800

"""Find the text key references within the repository.

843

845

return GroupCHKStreamSource(self, to_format)

844

846

return super(CHKInventoryRepository, self)._get_source(to_format)

845

847

846

def suspend_write_group(self):

847

raise errors.UnsuspendableWriteGroup(self)

848

849

def _resume_write_group(self, tokens):

850

raise errors.UnsuspendableWriteGroup(self)

851

852

848

853

849

class GroupCHKStreamSource(repository.StreamSource):

854

850

"""Used when both the source and target repo are GroupCHK repos."""

861

857

self._chk_id_roots = None

862

858

self._chk_p_id_roots = None

863

859

864

def _get_filtered_inv_stream(self):

860

def _get_inventory_stream(self, inventory_keys, allow_absent=False):

865

861

"""Get a stream of inventory texts.

866

862

867

863

When this function returns, self._chk_id_roots and self._chk_p_id_roots

873

869

id_roots_set = set()

874

870

p_id_roots_set = set()

875

871

source_vf = self.from_repository.inventories

876

stream = source_vf.get_record_stream(self._revision_keys,

872

stream = source_vf.get_record_stream(inventory_keys,

877

873

'groupcompress', True)

878

874

for record in stream:

875

if record.storage_kind == 'absent':

876

if allow_absent:

877

continue

878

else:

879

raise errors.NoSuchRevision(self, record.key)

879

880

bytes = record.get_bytes_as('fulltext')

880

881

chk_inv = inventory.CHKInventory.deserialise(None, bytes,

881

882

record.key)

897

898

p_id_roots_set.clear()

898

899

return ('inventories', _filtered_inv_stream())

899

900

def _get_filtered_chk_streams(self, excluded_keys):

901

def _find_present_inventories(self, revision_ids):

902

revision_keys = [(r,) for r in revision_ids]

903

inventories = self.from_repository.inventories

904

present_inventories = inventories.get_parent_map(revision_keys)

905

return [p[-1] for p in present_inventories]

906

907

def _get_filtered_chk_streams(self, excluded_revision_ids):

901

908

self._text_keys = set()

902

excluded_keys.discard(_mod_revision.NULL_REVISION)

903

if not excluded_keys:

909

excluded_revision_ids.discard(_mod_revision.NULL_REVISION)

910

if not excluded_revision_ids:

904

911

uninteresting_root_keys = set()

905

912

uninteresting_pid_root_keys = set()

906

913

else:

914

# filter out any excluded revisions whose inventories are not

915

# actually present

916

# TODO: Update Repository.iter_inventories() to add

917

# ignore_missing=True

918

present_ids = self.from_repository._find_present_inventory_ids(

919

excluded_revision_ids)

920

present_ids = self._find_present_inventories(excluded_revision_ids)

907

921

uninteresting_root_keys = set()

908

922

uninteresting_pid_root_keys = set()

909

for inv in self.from_repository.iter_inventories(excluded_keys):

923

for inv in self.from_repository.iter_inventories(present_ids):

910

924

uninteresting_root_keys.add(inv.id_to_entry.key())

911

925

uninteresting_pid_root_keys.add(

912

926

inv.parent_id_basename_to_file_id.key())

922

936

self._text_keys.add((file_id, revision_id))

923

937

if record is not None:

924

938

yield record

939

# Consumed

940

self._chk_id_roots = None

925

941

yield 'chk_bytes', _filter_id_to_entry()

926

942

def _get_parent_id_basename_to_file_id_pages():

927

943

for record, items in chk_map.iter_interesting_nodes(chk_bytes,

928

944

self._chk_p_id_roots, uninteresting_pid_root_keys):

929

945

if record is not None:

930

946

yield record

947

# Consumed

948

self._chk_p_id_roots = None

931

949

yield 'chk_bytes', _get_parent_id_basename_to_file_id_pages()

932

950

933

951

def _get_text_stream(self):

943

961

for stream_info in self._fetch_revision_texts(revision_ids):

944

962

yield stream_info

945

963

self._revision_keys = [(rev_id,) for rev_id in revision_ids]

946

yield self._get_filtered_inv_stream()

947

# The keys to exclude are part of the search recipe

948

_, _, exclude_keys, _ = search.get_recipe()

949

for stream_info in self._get_filtered_chk_streams(exclude_keys):

964

yield self._get_inventory_stream(self._revision_keys)

965

# TODO: The keys to exclude might be part of the search recipe

966

# For now, exclude all parents that are at the edge of ancestry, for

967

# which we have inventories

968

from_repo = self.from_repository

969

parent_ids = from_repo._find_parent_ids_of_revisions(revision_ids)

970

for stream_info in self._get_filtered_chk_streams(parent_ids):

950

971

yield stream_info

951

972

yield self._get_text_stream()

952

973

974

def get_stream_for_missing_keys(self, missing_keys):

975

# missing keys can only occur when we are byte copying and not

976

# translating (because translation means we don't send

977

# unreconstructable deltas ever).

978

missing_inventory_keys = set()

979

for key in missing_keys:

980

if key[0] != 'inventories':

981

raise AssertionError('The only missing keys we should'

982

' be filling in are inventory keys, not %s'

983

% (key[0],))

984

missing_inventory_keys.add(key[1:])

985

if self._chk_id_roots or self._chk_p_id_roots:

986

raise AssertionError('Cannot call get_stream_for_missing_keys'

987

' untill all of get_stream() has been consumed.')

988

# Yield the inventory stream, so we can find the chk stream

989

# Some of the missing_keys will be missing because they are ghosts.

990

# As such, we can ignore them. The Sink is required to verify there are

991

# no unavailable texts when the ghost inventories are not filled in.

992

yield self._get_inventory_stream(missing_inventory_keys,

993

allow_absent=True)

994

# We use the empty set for excluded_revision_ids, to make it clear that

995

# we want to transmit all referenced chk pages.

996

for stream_info in self._get_filtered_chk_streams(set()):

997

yield stream_info

998

953

999

954

1000

class RepositoryFormatCHK1(RepositoryFormatPack):

955

1001

"""A hashed CHK+group compress pack repository."""

956

1002

957

1003

repository_class = CHKInventoryRepository

1004

supports_external_lookups = True

958

1005

supports_chks = True

959

1006

# For right now, setting this to True gives us InterModel1And2 rather

960

1007

# than InterDifferingSerializer

1002

1049

'Does not support nested trees', target_format)

1003

1050

1004

1051

1052

1053

class RepositoryFormatCHK2(RepositoryFormatCHK1):

1054

"""A CHK repository that uses the bencode revision serializer."""

1055

1056

_serializer = chk_serializer.chk_bencode_serializer

1057

1058

def _get_matching_bzrdir(self):

1059

return bzrdir.format_registry.make_bzrdir('development7-rich-root')

1060

1061

def _ignore_setting_bzrdir(self, format):

1062

pass

1063

1064

_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)

1065

1066

def get_format_string(self):

1067

"""See RepositoryFormat.get_format_string()."""

1068

return ('Bazaar development format - chk repository with bencode '

1069

'revision serialization (needs bzr.dev from 1.15)\n')

1070

1071

Older »