664
664
self.get_transaction())
666
def fileids_altered_by_revision_ids(self, revision_ids):
667
"""Find the file ids and versions affected by revisions.
669
:param revisions: an iterable containing revision ids.
666
def _find_file_ids_from_xml_inventory_lines(self, line_iterator,
668
"""Helper routine for fileids_altered_by_revision_ids.
670
This performs the translation of xml lines to revision ids.
672
:param line_iterator: An iterator of lines
673
:param revision_ids: The revision ids to filter for.
670
674
:return: a dictionary mapping altered file-ids to an iterable of
671
675
revision_ids. Each altered file-ids has the exact revision_ids that
672
676
altered it listed explicitly.
674
assert self._serializer.support_altered_by_hack, \
675
("fileids_altered_by_revision_ids only supported for branches "
676
"which store inventory as unnested xml, not on %r" % self)
677
selected_revision_ids = set(osutils.safe_revision_id(r)
678
for r in revision_ids)
679
w = self.get_inventory_weave()
682
680
# this code needs to read every new line in every inventory for the
698
696
search = self._file_ids_altered_regex.search
699
697
unescape = _unescape_xml
700
698
setdefault = result.setdefault
699
for line in line_iterator:
703
# One call to match.group() returning multiple items is quite a
704
# bit faster than 2 calls to match.group() each returning 1
705
file_id, revision_id = match.group('file_id', 'revision_id')
707
# Inlining the cache lookups helps a lot when you make 170,000
708
# lines and 350k ids, versus 8.4 unique ids.
709
# Using a cache helps in 2 ways:
710
# 1) Avoids unnecessary decoding calls
711
# 2) Re-uses cached strings, which helps in future set and
713
# (2) is enough that removing encoding entirely along with
714
# the cache (so we are using plain strings) results in no
715
# performance improvement.
717
revision_id = unescape_revid_cache[revision_id]
719
unescaped = unescape(revision_id)
720
unescape_revid_cache[revision_id] = unescaped
721
revision_id = unescaped
723
if revision_id in revision_ids:
725
file_id = unescape_fileid_cache[file_id]
727
unescaped = unescape(file_id)
728
unescape_fileid_cache[file_id] = unescaped
730
setdefault(file_id, set()).add(revision_id)
733
def fileids_altered_by_revision_ids(self, revision_ids):
734
"""Find the file ids and versions affected by revisions.
736
:param revisions: an iterable containing revision ids.
737
:return: a dictionary mapping altered file-ids to an iterable of
738
revision_ids. Each altered file-ids has the exact revision_ids that
739
altered it listed explicitly.
741
assert self._serializer.support_altered_by_hack, \
742
("fileids_altered_by_revision_ids only supported for branches "
743
"which store inventory as unnested xml, not on %r" % self)
744
selected_revision_ids = set(osutils.safe_revision_id(r)
745
for r in revision_ids)
746
w = self.get_inventory_weave()
701
747
pb = ui.ui_factory.nested_progress_bar()
703
for line in w.iter_lines_added_or_present_in_versions(
704
selected_revision_ids, pb=pb):
708
# One call to match.group() returning multiple items is quite a
709
# bit faster than 2 calls to match.group() each returning 1
710
file_id, revision_id = match.group('file_id', 'revision_id')
712
# Inlining the cache lookups helps a lot when you make 170,000
713
# lines and 350k ids, versus 8.4 unique ids.
714
# Using a cache helps in 2 ways:
715
# 1) Avoids unnecessary decoding calls
716
# 2) Re-uses cached strings, which helps in future set and
718
# (2) is enough that removing encoding entirely along with
719
# the cache (so we are using plain strings) results in no
720
# performance improvement.
722
revision_id = unescape_revid_cache[revision_id]
724
unescaped = unescape(revision_id)
725
unescape_revid_cache[revision_id] = unescaped
726
revision_id = unescaped
728
if revision_id in selected_revision_ids:
730
file_id = unescape_fileid_cache[file_id]
732
unescaped = unescape(file_id)
733
unescape_fileid_cache[file_id] = unescaped
735
setdefault(file_id, set()).add(revision_id)
749
return self._find_file_ids_from_xml_inventory_lines(
750
w.iter_lines_added_or_present_in_versions(
751
selected_revision_ids, pb=pb),
752
selected_revision_ids)
740
756
def iter_files_bytes(self, desired_files):
741
757
"""Iterate through file versions.