1034
1107
def _get_record_map(self, version_ids):
1035
1108
"""Produce a dictionary of knit records.
1037
The keys are version_ids, the values are tuples of (method, content,
1039
method is the way the content should be applied.
1040
content is a KnitContent object.
1041
digest is the SHA1 digest of this version id after all steps are done
1042
next is the build-parent of the version, i.e. the leftmost ancestor.
1043
If the method is fulltext, next will be None.
1110
:return: {version_id:(record, record_details, digest, next)}
1112
data returned from read_records
1114
opaque information to pass to parse_record
1116
SHA1 digest of the full text after all steps are done
1118
build-parent of the version, i.e. the leftmost ancestor.
1119
Will be None if the record is not a delta.
1045
1121
position_map = self._get_components_positions(version_ids)
1046
# c = component_id, m = method, i_m = index_memo, n = next
1047
records = [(c, i_m) for c, (m, i_m, n) in position_map.iteritems()]
1122
# c = component_id, r = record_details, i_m = index_memo, n = next
1123
records = [(c, i_m) for c, (r, i_m, n)
1124
in position_map.iteritems()]
1048
1125
record_map = {}
1049
for component_id, content, digest in \
1126
for component_id, record, digest in \
1050
1127
self._data.read_records_iter(records):
1051
method, index_memo, next = position_map[component_id]
1052
record_map[component_id] = method, content, digest, next
1128
(record_details, index_memo, next) = position_map[component_id]
1129
record_map[component_id] = record, record_details, digest, next
1054
1131
return record_map
1056
1133
def get_text(self, version_id):
2735
2882
It will work for knits with cached annotations, but this is not
2738
ancestry = knit.get_ancestry(revision_id)
2739
fulltext = dict(zip(ancestry, knit.get_line_list(ancestry)))
2741
for candidate in ancestry:
2742
if candidate in annotations:
2744
parents = knit.get_parents(candidate)
2745
if len(parents) == 0:
2747
elif knit._index.get_method(candidate) != 'line-delta':
2885
annotator = _KnitAnnotator(knit)
2886
return iter(annotator.annotate(revision_id))
2889
class _KnitAnnotator(object):
2890
"""Build up the annotations for a text."""
2892
def __init__(self, knit):
2895
# Content objects, differs from fulltexts because of how final newlines
2896
# are treated by knits. the content objects here will always have a
2898
self._fulltext_contents = {}
2900
# Annotated lines of specific revisions
2901
self._annotated_lines = {}
2903
# Track the raw data for nodes that we could not process yet.
2904
# This maps the revision_id of the base to a list of children that will
2905
# annotated from it.
2906
self._pending_children = {}
2908
# Nodes which cannot be extracted
2909
self._ghosts = set()
2911
# Track how many children this node has, so we know if we need to keep
2913
self._annotate_children = {}
2914
self._compression_children = {}
2916
self._all_build_details = {}
2917
# The children => parent revision_id graph
2918
self._revision_id_graph = {}
2920
self._heads_provider = None
2922
self._nodes_to_keep_annotations = set()
2923
self._generations_until_keep = 100
2925
def set_generations_until_keep(self, value):
2926
"""Set the number of generations before caching a node.
2928
Setting this to -1 will cache every merge node, setting this higher
2929
will cache fewer nodes.
2931
self._generations_until_keep = value
2933
def _add_fulltext_content(self, revision_id, content_obj):
2934
self._fulltext_contents[revision_id] = content_obj
2935
# TODO: jam 20080305 It might be good to check the sha1digest here
2936
return content_obj.text()
2938
def _check_parents(self, child, nodes_to_annotate):
2939
"""Check if all parents have been processed.
2941
:param child: A tuple of (rev_id, parents, raw_content)
2942
:param nodes_to_annotate: If child is ready, add it to
2943
nodes_to_annotate, otherwise put it back in self._pending_children
2945
for parent_id in child[1]:
2946
if (parent_id not in self._annotated_lines):
2947
# This parent is present, but another parent is missing
2948
self._pending_children.setdefault(parent_id,
2750
parent, sha1, noeol, delta = knit.get_delta(candidate)
2751
blocks = KnitContent.get_line_delta_blocks(delta,
2752
fulltext[parents[0]], fulltext[candidate])
2753
annotations[candidate] = list(annotate.reannotate([annotations[p]
2754
for p in parents], fulltext[candidate], candidate, blocks))
2755
return iter(annotations[revision_id])
2952
# This one is ready to be processed
2953
nodes_to_annotate.append(child)
2955
def _add_annotation(self, revision_id, fulltext, parent_ids,
2956
left_matching_blocks=None):
2957
"""Add an annotation entry.
2959
All parents should already have been annotated.
2960
:return: A list of children that now have their parents satisfied.
2962
a = self._annotated_lines
2963
annotated_parent_lines = [a[p] for p in parent_ids]
2964
annotated_lines = list(annotate.reannotate(annotated_parent_lines,
2965
fulltext, revision_id, left_matching_blocks,
2966
heads_provider=self._get_heads_provider()))
2967
self._annotated_lines[revision_id] = annotated_lines
2968
for p in parent_ids:
2969
ann_children = self._annotate_children[p]
2970
ann_children.remove(revision_id)
2971
if (not ann_children
2972
and p not in self._nodes_to_keep_annotations):
2973
del self._annotated_lines[p]
2974
del self._all_build_details[p]
2975
if p in self._fulltext_contents:
2976
del self._fulltext_contents[p]
2977
# Now that we've added this one, see if there are any pending
2978
# deltas to be done, certainly this parent is finished
2979
nodes_to_annotate = []
2980
for child in self._pending_children.pop(revision_id, []):
2981
self._check_parents(child, nodes_to_annotate)
2982
return nodes_to_annotate
2984
def _get_build_graph(self, revision_id):
2985
"""Get the graphs for building texts and annotations.
2987
The data you need for creating a full text may be different than the
2988
data you need to annotate that text. (At a minimum, you need both
2989
parents to create an annotation, but only need 1 parent to generate the
2992
:return: A list of (revision_id, index_memo) records, suitable for
2993
passing to read_records_iter to start reading in the raw data fro/
2996
if revision_id in self._annotated_lines:
2999
pending = set([revision_id])
3004
# get all pending nodes
3006
this_iteration = pending
3007
build_details = self._knit._index.get_build_details(this_iteration)
3008
self._all_build_details.update(build_details)
3009
# new_nodes = self._knit._index._get_entries(this_iteration)
3011
for rev_id, details in build_details.iteritems():
3012
(index_memo, compression_parent, parents,
3013
record_details) = details
3014
self._revision_id_graph[rev_id] = parents
3015
records.append((rev_id, index_memo))
3016
# Do we actually need to check _annotated_lines?
3017
pending.update(p for p in parents
3018
if p not in self._all_build_details)
3019
if compression_parent:
3020
self._compression_children.setdefault(compression_parent,
3023
for parent in parents:
3024
self._annotate_children.setdefault(parent,
3026
num_gens = generation - kept_generation
3027
if ((num_gens >= self._generations_until_keep)
3028
and len(parents) > 1):
3029
kept_generation = generation
3030
self._nodes_to_keep_annotations.add(rev_id)
3032
missing_versions = this_iteration.difference(build_details.keys())
3033
self._ghosts.update(missing_versions)
3034
for missing_version in missing_versions:
3035
# add a key, no parents
3036
self._revision_id_graph[missing_version] = ()
3037
pending.discard(missing_version) # don't look for it
3038
# XXX: This should probably be a real exception, as it is a data
3040
assert not self._ghosts.intersection(self._compression_children), \
3041
"We cannot have nodes which have a compression parent of a ghost."
3042
# Cleanout anything that depends on a ghost so that we don't wait for
3043
# the ghost to show up
3044
for node in self._ghosts:
3045
if node in self._annotate_children:
3046
# We won't be building this node
3047
del self._annotate_children[node]
3048
# Generally we will want to read the records in reverse order, because
3049
# we find the parent nodes after the children
3053
def _annotate_records(self, records):
3054
"""Build the annotations for the listed records."""
3055
# We iterate in the order read, rather than a strict order requested
3056
# However, process what we can, and put off to the side things that
3057
# still need parents, cleaning them up when those parents are
3059
for (rev_id, record,
3060
digest) in self._knit._data.read_records_iter(records):
3061
if rev_id in self._annotated_lines:
3063
parent_ids = self._revision_id_graph[rev_id]
3064
parent_ids = [p for p in parent_ids if p not in self._ghosts]
3065
details = self._all_build_details[rev_id]
3066
(index_memo, compression_parent, parents,
3067
record_details) = details
3068
nodes_to_annotate = []
3069
# TODO: Remove the punning between compression parents, and
3070
# parent_ids, we should be able to do this without assuming
3072
if len(parent_ids) == 0:
3073
# There are no parents for this node, so just add it
3074
# TODO: This probably needs to be decoupled
3075
assert compression_parent is None
3076
fulltext_content, delta = self._knit.factory.parse_record(
3077
rev_id, record, record_details, None)
3078
fulltext = self._add_fulltext_content(rev_id, fulltext_content)
3079
nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,
3080
parent_ids, left_matching_blocks=None))
3082
child = (rev_id, parent_ids, record)
3083
# Check if all the parents are present
3084
self._check_parents(child, nodes_to_annotate)
3085
while nodes_to_annotate:
3086
# Should we use a queue here instead of a stack?
3087
(rev_id, parent_ids, record) = nodes_to_annotate.pop()
3088
(index_memo, compression_parent, parents,
3089
record_details) = self._all_build_details[rev_id]
3090
if compression_parent is not None:
3091
comp_children = self._compression_children[compression_parent]
3092
assert rev_id in comp_children
3093
# If there is only 1 child, it is safe to reuse this
3095
reuse_content = (len(comp_children) == 1
3096
and compression_parent not in
3097
self._nodes_to_keep_annotations)
3099
# Remove it from the cache since it will be changing
3100
parent_fulltext_content = self._fulltext_contents.pop(compression_parent)
3101
# Make sure to copy the fulltext since it might be
3103
parent_fulltext = list(parent_fulltext_content.text())
3105
parent_fulltext_content = self._fulltext_contents[compression_parent]
3106
parent_fulltext = parent_fulltext_content.text()
3107
comp_children.remove(rev_id)
3108
fulltext_content, delta = self._knit.factory.parse_record(
3109
rev_id, record, record_details,
3110
parent_fulltext_content,
3111
copy_base_content=(not reuse_content))
3112
fulltext = self._add_fulltext_content(rev_id,
3114
blocks = KnitContent.get_line_delta_blocks(delta,
3115
parent_fulltext, fulltext)
3117
fulltext_content = self._knit.factory.parse_fulltext(
3119
fulltext = self._add_fulltext_content(rev_id,
3122
nodes_to_annotate.extend(
3123
self._add_annotation(rev_id, fulltext, parent_ids,
3124
left_matching_blocks=blocks))
3126
def _get_heads_provider(self):
3127
"""Create a heads provider for resolving ancestry issues."""
3128
if self._heads_provider is not None:
3129
return self._heads_provider
3130
parent_provider = _mod_graph.DictParentsProvider(
3131
self._revision_id_graph)
3132
graph_obj = _mod_graph.Graph(parent_provider)
3133
head_cache = _mod_graph.FrozenHeadsCache(graph_obj)
3134
self._heads_provider = head_cache
3137
def annotate(self, revision_id):
3138
"""Return the annotated fulltext at the given revision.
3140
:param revision_id: The revision id for this file
3142
records = self._get_build_graph(revision_id)
3143
if revision_id in self._ghosts:
3144
raise errors.RevisionNotPresent(revision_id, self._knit)
3145
self._annotate_records(records)
3146
return self._annotated_lines[revision_id]