~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/repository.py

Committer: Robert Collins
Date: 2007-08-24 06:56:06 UTC
mto: (2592.5.3 pack-repository)
mto: This revision was merged to the branch mainline in revision 2933.
Revision ID: robertc@robertcollins.net-20070824065606-9mfmnd3rync2r6i6

Filter out texts and signatures not referenced by the revisions being copied during pack to pack fetching.

files modified:
bzrlib/repofmt/pack_repo.py

bzrlib/repository.py

Show diffs side-by-side

added added

removed removed

bzrlib/repository.py

663

signature,

664

self.get_transaction())

665

666

def fileids_altered_by_revision_ids(self, revision_ids):

667

"""Find the file ids and versions affected by revisions.

668

669

:param revisions: an iterable containing revision ids.

666

def _find_file_ids_from_xml_inventory_lines(self, line_iterator,

667

revision_ids):

668

"""Helper routine for fileids_altered_by_revision_ids.

669

670

This performs the translation of xml lines to revision ids.

671

672

:param line_iterator: An iterator of lines

673

:param revision_ids: The revision ids to filter for.

670

674

:return: a dictionary mapping altered file-ids to an iterable of

671

675

revision_ids. Each altered file-ids has the exact revision_ids that

672

676

altered it listed explicitly.

673

677

"""

674

assert self._serializer.support_altered_by_hack, \

675

("fileids_altered_by_revision_ids only supported for branches "

676

"which store inventory as unnested xml, not on %r" % self)

677

selected_revision_ids = set(osutils.safe_revision_id(r)

678

for r in revision_ids)

679

w = self.get_inventory_weave()

680

678

result = {}

681

679

682

680

# this code needs to read every new line in every inventory for the

698

696

search = self._file_ids_altered_regex.search

699

697

unescape = _unescape_xml

700

698

setdefault = result.setdefault

699

for line in line_iterator:

700

match = search(line)

701

if match is None:

702

continue

703

# One call to match.group() returning multiple items is quite a

704

# bit faster than 2 calls to match.group() each returning 1

705

file_id, revision_id = match.group('file_id', 'revision_id')

706

707

# Inlining the cache lookups helps a lot when you make 170,000

708

# lines and 350k ids, versus 8.4 unique ids.

709

# Using a cache helps in 2 ways:

710

# 1) Avoids unnecessary decoding calls

711

# 2) Re-uses cached strings, which helps in future set and

712

# equality checks.

713

# (2) is enough that removing encoding entirely along with

714

# the cache (so we are using plain strings) results in no

715

# performance improvement.

716

try:

717

revision_id = unescape_revid_cache[revision_id]

718

except KeyError:

719

unescaped = unescape(revision_id)

720

unescape_revid_cache[revision_id] = unescaped

721

revision_id = unescaped

722

723

if revision_id in revision_ids:

724

try:

725

file_id = unescape_fileid_cache[file_id]

726

except KeyError:

727

unescaped = unescape(file_id)

728

unescape_fileid_cache[file_id] = unescaped

729

file_id = unescaped

730

setdefault(file_id, set()).add(revision_id)

731

return result

732

733

def fileids_altered_by_revision_ids(self, revision_ids):

734

"""Find the file ids and versions affected by revisions.

735

736

:param revisions: an iterable containing revision ids.

737

:return: a dictionary mapping altered file-ids to an iterable of

738

revision_ids. Each altered file-ids has the exact revision_ids that

739

altered it listed explicitly.

740

"""

741

assert self._serializer.support_altered_by_hack, \

742

("fileids_altered_by_revision_ids only supported for branches "

743

"which store inventory as unnested xml, not on %r" % self)

744

selected_revision_ids = set(osutils.safe_revision_id(r)

745

for r in revision_ids)

746

w = self.get_inventory_weave()

701

747

pb = ui.ui_factory.nested_progress_bar()

702

748

try:

703

for line in w.iter_lines_added_or_present_in_versions(

704

selected_revision_ids, pb=pb):

705

match = search(line)

706

if match is None:

707

continue

708

# One call to match.group() returning multiple items is quite a

709

# bit faster than 2 calls to match.group() each returning 1

710

file_id, revision_id = match.group('file_id', 'revision_id')

711

712

# Inlining the cache lookups helps a lot when you make 170,000

713

# lines and 350k ids, versus 8.4 unique ids.

714

# Using a cache helps in 2 ways:

715

# 1) Avoids unnecessary decoding calls

716

# 2) Re-uses cached strings, which helps in future set and

717

# equality checks.

718

# (2) is enough that removing encoding entirely along with

719

# the cache (so we are using plain strings) results in no

720

# performance improvement.

721

try:

722

revision_id = unescape_revid_cache[revision_id]

723

except KeyError:

724

unescaped = unescape(revision_id)

725

unescape_revid_cache[revision_id] = unescaped

726

revision_id = unescaped

727

728

if revision_id in selected_revision_ids:

729

try:

730

file_id = unescape_fileid_cache[file_id]

731

except KeyError:

732

unescaped = unescape(file_id)

733

unescape_fileid_cache[file_id] = unescaped

734

file_id = unescaped

735

setdefault(file_id, set()).add(revision_id)

749

return self._find_file_ids_from_xml_inventory_lines(

750

w.iter_lines_added_or_present_in_versions(

751

selected_revision_ids, pb=pb),

752

selected_revision_ids)

736

753

finally:

737

754

pb.finished()

738

return result

739

755

740

756

def iter_files_bytes(self, desired_files):

741

757

"""Iterate through file versions.

Older »