2812
2812
return _unescape_re.sub(_unescaper, data)
2815
class _RevisionTextVersionCache(object):
2816
"""A cache of the versionedfile versions for revision and file-id."""
2818
def __init__(self, repository):
2819
self.repository = repository
2820
self.revision_versions = {}
2821
self.revision_parents = {}
2822
self.repo_graph = self.repository.get_graph()
2823
# XXX: RBC: I haven't tracked down what uses this, but it would be
2824
# better to use the headscache directly I think.
2825
self.heads = graph.HeadsCache(self.repo_graph).heads
2827
def add_revision_text_versions(self, tree):
2828
"""Cache text version data from the supplied revision tree"""
2830
for path, entry in tree.iter_entries_by_dir():
2831
inv_revisions[entry.file_id] = entry.revision
2832
self.revision_versions[tree.get_revision_id()] = inv_revisions
2833
return inv_revisions
2835
def get_text_version(self, file_id, revision_id):
2836
"""Determine the text version for a given file-id and revision-id"""
2838
inv_revisions = self.revision_versions[revision_id]
2841
tree = self.repository.revision_tree(revision_id)
2842
except errors.RevisionNotPresent:
2843
self.revision_versions[revision_id] = inv_revisions = {}
2845
inv_revisions = self.add_revision_text_versions(tree)
2846
return inv_revisions.get(file_id)
2848
def prepopulate_revs(self, revision_ids):
2849
# Filter out versions that we don't have an inventory for, so that the
2850
# revision_trees() call won't fail.
2851
inv_weave = self.repository.get_inventory_weave()
2852
revs = [r for r in revision_ids if inv_weave.has_version(r)]
2853
# XXX: this loop is very similar to
2854
# bzrlib.fetch.Inter1and2Helper.iter_rev_trees.
2856
mutter('%d revisions left to prepopulate', len(revs))
2857
for tree in self.repository.revision_trees(revs[:100]):
2858
if tree.inventory.revision_id is None:
2859
tree.inventory.revision_id = tree.get_revision_id()
2860
self.add_revision_text_versions(tree)
2863
def get_parents(self, revision_id):
2865
return self.revision_parents[revision_id]
2867
parents = self.repository.get_parents([revision_id])[0]
2868
self.revision_parents[revision_id] = parents
2871
def used_file_versions(self):
2872
"""Return a set of (revision_id, file_id) pairs for each file version
2873
referenced by any inventory cached by this _RevisionTextVersionCache.
2875
If the entire repository has been cached, this can be used to find all
2876
file versions that are actually referenced by inventories. Thus any
2877
other file version is completely unused and can be removed safely.
2880
for inventory_summary in self.revision_versions.itervalues():
2881
result.update(inventory_summary.items())
2885
2815
class VersionedFileChecker(object):
2887
2817
def __init__(self, repository):
2888
2818
self.repository = repository
2819
self.text_index = self.repository._generate_text_key_index()
2890
2821
def calculate_file_version_parents(self, revision_id, file_id):
2891
2822
"""Calculate the correct parents for a file version according to
2892
2823
the inventories.
2894
text_revision = self.revision_versions.get_text_version(
2895
file_id, revision_id)
2896
if text_revision is None:
2898
parents_of_text_revision = self.revision_versions.get_parents(
2900
parents_from_inventories = []
2901
for parent in parents_of_text_revision:
2902
if parent == _mod_revision.NULL_REVISION:
2904
introduced_in = self.revision_versions.get_text_version(file_id,
2906
if introduced_in is not None:
2907
parents_from_inventories.append(introduced_in)
2908
heads = set(self.revision_versions.heads(parents_from_inventories))
2910
for parent in parents_from_inventories:
2911
if parent in heads and parent not in new_parents:
2912
new_parents.append(parent)
2913
return tuple(new_parents)
2825
parent_keys = self.text_index[(file_id, revision_id)]
2826
if parent_keys == [_mod_revision.NULL_REVISION]:
2828
# strip the file_id, for the weave api
2829
return tuple([revision_id for file_id, revision_id in parent_keys])
2915
def check_file_version_parents(self, weave, file_id, planned_revisions,
2831
def check_file_version_parents(self, weave, file_id, planned_revisions):
2917
2832
"""Check the parents stored in a versioned file are correct.
2919
2834
It also detects file versions that are not referenced by their
2926
2841
revision_id) tuples for versions that are present in this versioned
2927
2842
file, but not used by the corresponding inventory.
2929
# store the current task in instance variables.
2930
self.planned_revisions = planned_revisions
2931
self.revision_versions = revision_versions
2932
2844
wrong_parents = {}
2933
dangling_file_versions = set()
2934
for num, revision_id in enumerate(self.planned_revisions):
2935
correct_parents = self.calculate_file_version_parents(
2936
revision_id, file_id)
2937
if correct_parents is None:
2939
text_revision = self.revision_versions.get_text_version(
2940
file_id, revision_id)
2845
unused_versions = set()
2846
for num, revision_id in enumerate(planned_revisions):
2942
knit_parents = tuple(weave.get_parents(revision_id))
2943
except errors.RevisionNotPresent:
2945
if text_revision != revision_id:
2946
# This file version is not referenced by its corresponding
2948
dangling_file_versions.add((file_id, revision_id))
2949
if correct_parents != knit_parents:
2950
wrong_parents[revision_id] = (knit_parents, correct_parents)
2951
return wrong_parents, dangling_file_versions
2848
correct_parents = self.calculate_file_version_parents(
2849
revision_id, file_id)
2851
# we were asked to investigate a non-existant version.
2852
unused_versions.add(revision_id)
2855
knit_parents = tuple(weave.get_parents(revision_id))
2856
except errors.RevisionNotPresent:
2858
if correct_parents != knit_parents:
2859
wrong_parents[revision_id] = (knit_parents, correct_parents)
2860
return wrong_parents, unused_versions