1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
|
# Copyright (C) 2009 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
"""Functionality for doing annotations in the 'optimal' way"""
from bzrlib import (
annotate,
errors,
graph as _mod_graph,
osutils,
patiencediff,
)
class AnnotatorPolicy(object):
"""Variables that define annotations."""
class Annotator(object):
"""Class that drives performing annotations."""
def __init__(self, vf):
"""Create a new Annotator from a VersionedFile."""
self._vf = vf
self._parent_map = {}
self._text_cache = {}
self._annotations_cache = {}
self._heads_provider = None
def _get_needed_texts(self, key):
"""Get the texts we need to properly annotate key.
:param key: A Key that is present in self._vf
:return: Yield (this_key, text, num_lines)
'text' is an opaque object that just has to work with whatever
matcher object we are using. Currently it is always 'lines' but
future improvements may change this to a simple text string.
"""
graph = _mod_graph.Graph(self._vf)
parent_map = dict((k, v) for k, v in graph.iter_ancestry([key])
if v is not None)
self._parent_map.update(parent_map)
keys = parent_map.keys()
for record in self._vf.get_record_stream(keys, 'topological', True):
this_key = record.key
lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
num_lines = len(lines)
yield this_key, lines, num_lines
def _get_parent_annotations_and_matches(self, text, parent_key):
"""Get the list of annotations for the parent, and the matching lines.
:param text: The opaque value given by _get_needed_texts
:param parent_key: The key for the parent text
:return: (parent_annotations, matching_blocks)
parent_annotations is a list as long as the number of lines in
parent
matching_blocks is a list of (parent_idx, text_idx, len) tuples
indicating which lines match between the two texts
"""
parent_lines = self._text_cache[parent_key]
parent_annotations = self._annotations_cache[parent_key]
# PatienceSequenceMatcher should probably be part of Policy
matcher = patiencediff.PatienceSequenceMatcher(None,
parent_lines, text)
matching_blocks = matcher.get_matching_blocks()
return parent_annotations, matching_blocks
def _update_from_one_parent(self, annotations, lines, parent_key):
"""Reannotate this text relative to its first parent."""
parent_annotations, matching_blocks = self._get_parent_annotations_and_matches(
lines, parent_key)
for parent_idx, lines_idx, match_len in matching_blocks:
# For all matching regions we copy across the parent annotations
annotations[lines_idx:lines_idx + match_len] = \
parent_annotations[parent_idx:parent_idx + match_len]
def _update_from_other_parents(self, annotations, lines, this_annotation,
parent_key):
"""Reannotate this text relative to a second (or more) parent."""
parent_annotations, matching_blocks = self._get_parent_annotations_and_matches(
lines, parent_key)
last_ann = None
last_parent = None
last_res = None
# TODO: consider making all annotations unique and then using 'is'
# everywhere. Current results claim that isn't any faster,
# because of the time spent deduping
for parent_idx, lines_idx, match_len in matching_blocks:
# For lines which match this parent, we will now resolve whether
# this parent wins over the current annotation
for idx in xrange(match_len):
ann_idx = lines_idx + idx
ann = annotations[ann_idx]
par_ann = parent_annotations[parent_idx + idx]
if ann == par_ann:
# Nothing to change
continue
if ann == this_annotation:
# Originally claimed 'this', but it was really in this
# parent
annotations[ann_idx] = par_ann
continue
# Resolve the fact that both sides have a different value for
# last modified
if ann == last_ann and par_ann == last_parent:
annotations[ann_idx] = last_res
else:
new_ann = set(ann)
new_ann.update(par_ann)
new_ann = tuple(sorted(new_ann))
annotations[ann_idx] = new_ann
last_ann = ann
last_parent = par_ann
last_res = new_ann
def annotate(self, key):
"""Return annotated fulltext for the given key."""
keys = self._get_needed_texts(key)
for text_key, text, num_lines in self._get_needed_texts(key):
self._text_cache[text_key] = text
this_annotation = (text_key,)
# Note: annotations will be mutated by calls to _update_from*
annotations = [this_annotation] * num_lines
self._annotations_cache[text_key] = annotations
parents = self._parent_map[text_key]
if not parents:
continue
self._update_from_one_parent(annotations, text, parents[0])
for parent in parents[1:]:
self._update_from_other_parents(annotations, text,
this_annotation, parent)
try:
annotations = self._annotations_cache[key]
except KeyError:
raise errors.RevisionNotPresent(key, self._vf)
return annotations, self._text_cache[key]
def annotate_flat(self, key):
"""Determine the single-best-revision to source for each line.
This is meant as a compatibility thunk to how annotate() used to work.
"""
annotations, lines = self.annotate(key)
assert len(annotations) == len(lines)
out = []
graph = _mod_graph.KnownGraph(self._parent_map)
heads = graph.heads
append = out.append
for annotation, line in zip(annotations, lines):
if len(annotation) == 1:
append((annotation[0], line))
else:
the_heads = heads(annotation)
if len(the_heads) == 1:
for head in the_heads:
break
else:
# We need to resolve the ambiguity, for now just pick the
# sorted smallest
head = sorted(the_heads)[0]
append((head, line))
last_ann = annotation
last_head = head
return out
|