1034
1101
def _get_record_map(self, version_ids):
1035
1102
"""Produce a dictionary of knit records.
1037
The keys are version_ids, the values are tuples of (method, content,
1039
method is the way the content should be applied.
1040
content is a KnitContent object.
1041
digest is the SHA1 digest of this version id after all steps are done
1042
next is the build-parent of the version, i.e. the leftmost ancestor.
1043
If the method is fulltext, next will be None.
1104
:return: {version_id:(record, record_details, digest, next)}
1106
data returned from read_records
1108
opaque information to pass to parse_record
1110
SHA1 digest of the full text after all steps are done
1112
build-parent of the version, i.e. the leftmost ancestor.
1113
Will be None if the record is not a delta.
1045
1115
position_map = self._get_components_positions(version_ids)
1046
# c = component_id, m = method, i_m = index_memo, n = next
1047
records = [(c, i_m) for c, (m, i_m, n) in position_map.iteritems()]
1116
# c = component_id, r = record_details, i_m = index_memo, n = next
1117
records = [(c, i_m) for c, (r, i_m, n)
1118
in position_map.iteritems()]
1048
1119
record_map = {}
1049
for component_id, content, digest in \
1120
for component_id, record, digest in \
1050
1121
self._data.read_records_iter(records):
1051
method, index_memo, next = position_map[component_id]
1052
record_map[component_id] = method, content, digest, next
1122
(record_details, index_memo, next) = position_map[component_id]
1123
record_map[component_id] = record, record_details, digest, next
1054
1125
return record_map
1056
1127
def get_text(self, version_id):
2735
2863
It will work for knits with cached annotations, but this is not
2738
ancestry = knit.get_ancestry(revision_id)
2739
fulltext = dict(zip(ancestry, knit.get_line_list(ancestry)))
2741
for candidate in ancestry:
2742
if candidate in annotations:
2744
parents = knit.get_parents(candidate)
2745
if len(parents) == 0:
2747
elif knit._index.get_method(candidate) != 'line-delta':
2866
annotator = _KnitAnnotator(knit)
2867
return iter(annotator.annotate(revision_id))
2870
class _KnitAnnotator(object):
2871
"""Build up the annotations for a text."""
2873
def __init__(self, knit):
2876
# Content objects, differs from fulltexts because of how final newlines
2877
# are treated by knits. the content objects here will always have a
2879
self._fulltext_contents = {}
2881
# Annotated lines of specific revisions
2882
self._annotated_lines = {}
2884
# Track the raw data for nodes that we could not process yet.
2885
# This maps the revision_id of the base to a list of children that will
2886
# annotated from it.
2887
self._pending_children = {}
2889
# Nodes which cannot be extracted
2890
self._ghosts = set()
2892
# Track how many children this node has, so we know if we need to keep
2894
self._annotate_children = {}
2895
self._compression_children = {}
2897
self._all_build_details = {}
2898
# The children => parent revision_id graph
2899
self._revision_id_graph = {}
2901
self._heads_provider = None
2903
self._nodes_to_keep_annotations = set()
2904
self._generations_until_keep = 100
2906
def set_generations_until_keep(self, value):
2907
"""Set the number of generations before caching a node.
2909
Setting this to -1 will cache every merge node, setting this higher
2910
will cache fewer nodes.
2912
self._generations_until_keep = value
2914
def _add_fulltext_content(self, revision_id, content_obj):
2915
self._fulltext_contents[revision_id] = content_obj
2916
# TODO: jam 20080305 It might be good to check the sha1digest here
2917
return content_obj.text()
2919
def _check_parents(self, child, nodes_to_annotate):
2920
"""Check if all parents have been processed.
2922
:param child: A tuple of (rev_id, parents, raw_content)
2923
:param nodes_to_annotate: If child is ready, add it to
2924
nodes_to_annotate, otherwise put it back in self._pending_children
2926
for parent_id in child[1]:
2927
if (parent_id not in self._annotated_lines):
2928
# This parent is present, but another parent is missing
2929
self._pending_children.setdefault(parent_id,
2750
parent, sha1, noeol, delta = knit.get_delta(candidate)
2751
blocks = KnitContent.get_line_delta_blocks(delta,
2752
fulltext[parents[0]], fulltext[candidate])
2753
annotations[candidate] = list(annotate.reannotate([annotations[p]
2754
for p in parents], fulltext[candidate], candidate, blocks))
2755
return iter(annotations[revision_id])
2933
# This one is ready to be processed
2934
nodes_to_annotate.append(child)
2936
def _add_annotation(self, revision_id, fulltext, parent_ids,
2937
left_matching_blocks=None):
2938
"""Add an annotation entry.
2940
All parents should already have been annotated.
2941
:return: A list of children that now have their parents satisfied.
2943
a = self._annotated_lines
2944
annotated_parent_lines = [a[p] for p in parent_ids]
2945
annotated_lines = list(annotate.reannotate(annotated_parent_lines,
2946
fulltext, revision_id, left_matching_blocks,
2947
heads_provider=self._get_heads_provider()))
2948
self._annotated_lines[revision_id] = annotated_lines
2949
for p in parent_ids:
2950
ann_children = self._annotate_children[p]
2951
ann_children.remove(revision_id)
2952
if (not ann_children
2953
and p not in self._nodes_to_keep_annotations):
2954
del self._annotated_lines[p]
2955
del self._all_build_details[p]
2956
if p in self._fulltext_contents:
2957
del self._fulltext_contents[p]
2958
# Now that we've added this one, see if there are any pending
2959
# deltas to be done, certainly this parent is finished
2960
nodes_to_annotate = []
2961
for child in self._pending_children.pop(revision_id, []):
2962
self._check_parents(child, nodes_to_annotate)
2963
return nodes_to_annotate
2965
def _get_build_graph(self, revision_id):
2966
"""Get the graphs for building texts and annotations.
2968
The data you need for creating a full text may be different than the
2969
data you need to annotate that text. (At a minimum, you need both
2970
parents to create an annotation, but only need 1 parent to generate the
2973
:return: A list of (revision_id, index_memo) records, suitable for
2974
passing to read_records_iter to start reading in the raw data fro/
2977
if revision_id in self._annotated_lines:
2980
pending = set([revision_id])
2985
# get all pending nodes
2987
this_iteration = pending
2988
build_details = self._knit._index.get_build_details(this_iteration)
2989
self._all_build_details.update(build_details)
2990
# new_nodes = self._knit._index._get_entries(this_iteration)
2992
for rev_id, details in build_details.iteritems():
2993
(index_memo, compression_parent, parents,
2994
record_details) = details
2995
self._revision_id_graph[rev_id] = parents
2996
records.append((rev_id, index_memo))
2997
# Do we actually need to check _annotated_lines?
2998
pending.update(p for p in parents
2999
if p not in self._all_build_details)
3000
if compression_parent:
3001
self._compression_children.setdefault(compression_parent,
3004
for parent in parents:
3005
self._annotate_children.setdefault(parent,
3007
num_gens = generation - kept_generation
3008
if ((num_gens >= self._generations_until_keep)
3009
and len(parents) > 1):
3010
kept_generation = generation
3011
self._nodes_to_keep_annotations.add(rev_id)
3013
missing_versions = this_iteration.difference(build_details.keys())
3014
self._ghosts.update(missing_versions)
3015
for missing_version in missing_versions:
3016
# add a key, no parents
3017
self._revision_id_graph[missing_version] = ()
3018
pending.discard(missing_version) # don't look for it
3019
# XXX: This should probably be a real exception, as it is a data
3021
assert not self._ghosts.intersection(self._compression_children), \
3022
"We cannot have nodes which have a compression parent of a ghost."
3023
# Cleanout anything that depends on a ghost so that we don't wait for
3024
# the ghost to show up
3025
for node in self._ghosts:
3026
if node in self._annotate_children:
3027
# We won't be building this node
3028
del self._annotate_children[node]
3029
# Generally we will want to read the records in reverse order, because
3030
# we find the parent nodes after the children
3034
def _annotate_records(self, records):
3035
"""Build the annotations for the listed records."""
3036
# We iterate in the order read, rather than a strict order requested
3037
# However, process what we can, and put off to the side things that
3038
# still need parents, cleaning them up when those parents are
3040
for (rev_id, record,
3041
digest) in self._knit._data.read_records_iter(records):
3042
if rev_id in self._annotated_lines:
3044
parent_ids = self._revision_id_graph[rev_id]
3045
parent_ids = [p for p in parent_ids if p not in self._ghosts]
3046
details = self._all_build_details[rev_id]
3047
(index_memo, compression_parent, parents,
3048
record_details) = details
3049
nodes_to_annotate = []
3050
# TODO: Remove the punning between compression parents, and
3051
# parent_ids, we should be able to do this without assuming
3053
if len(parent_ids) == 0:
3054
# There are no parents for this node, so just add it
3055
# TODO: This probably needs to be decoupled
3056
assert compression_parent is None
3057
fulltext_content, delta = self._knit.factory.parse_record(
3058
rev_id, record, record_details, None)
3059
fulltext = self._add_fulltext_content(rev_id, fulltext_content)
3060
nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,
3061
parent_ids, left_matching_blocks=None))
3063
child = (rev_id, parent_ids, record)
3064
# Check if all the parents are present
3065
self._check_parents(child, nodes_to_annotate)
3066
while nodes_to_annotate:
3067
# Should we use a queue here instead of a stack?
3068
(rev_id, parent_ids, record) = nodes_to_annotate.pop()
3069
(index_memo, compression_parent, parents,
3070
record_details) = self._all_build_details[rev_id]
3071
if compression_parent is not None:
3072
comp_children = self._compression_children[compression_parent]
3073
assert rev_id in comp_children
3074
# If there is only 1 child, it is safe to reuse this
3076
reuse_content = (len(comp_children) == 1
3077
and compression_parent not in
3078
self._nodes_to_keep_annotations)
3080
# Remove it from the cache since it will be changing
3081
parent_fulltext_content = self._fulltext_contents.pop(compression_parent)
3082
# Make sure to copy the fulltext since it might be
3084
parent_fulltext = list(parent_fulltext_content.text())
3086
parent_fulltext_content = self._fulltext_contents[compression_parent]
3087
parent_fulltext = parent_fulltext_content.text()
3088
comp_children.remove(rev_id)
3089
fulltext_content, delta = self._knit.factory.parse_record(
3090
rev_id, record, record_details,
3091
parent_fulltext_content,
3092
copy_base_content=(not reuse_content))
3093
fulltext = self._add_fulltext_content(rev_id,
3095
blocks = KnitContent.get_line_delta_blocks(delta,
3096
parent_fulltext, fulltext)
3098
fulltext_content = self._knit.factory.parse_fulltext(
3100
fulltext = self._add_fulltext_content(rev_id,
3103
nodes_to_annotate.extend(
3104
self._add_annotation(rev_id, fulltext, parent_ids,
3105
left_matching_blocks=blocks))
3107
def _get_heads_provider(self):
3108
"""Create a heads provider for resolving ancestry issues."""
3109
if self._heads_provider is not None:
3110
return self._heads_provider
3111
parent_provider = _mod_graph.DictParentsProvider(
3112
self._revision_id_graph)
3113
graph_obj = _mod_graph.Graph(parent_provider)
3114
head_cache = _mod_graph.FrozenHeadsCache(graph_obj)
3115
self._heads_provider = head_cache
3118
def annotate(self, revision_id):
3119
"""Return the annotated fulltext at the given revision.
3121
:param revision_id: The revision id for this file
3123
records = self._get_build_graph(revision_id)
3124
if revision_id in self._ghosts:
3125
raise errors.RevisionNotPresent(revision_id, self._knit)
3126
self._annotate_records(records)
3127
return self._annotated_lines[revision_id]