4454.3.1
by John Arbash Meinel
Initial api for Annotator. |
1 |
# Copyright (C) 2009 Canonical Ltd
|
2 |
#
|
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
16 |
||
17 |
"""Functionality for doing annotations in the 'optimal' way"""
|
|
18 |
||
4454.3.77
by John Arbash Meinel
Add support for compatibility with old '_break_annotation_tie' function. |
19 |
from bzrlib.lazy_import import lazy_import |
20 |
lazy_import(globals(), """ |
|
21 |
from bzrlib import annotate # Must be lazy to avoid circular importing
|
|
22 |
""") |
|
4454.3.1
by John Arbash Meinel
Initial api for Annotator. |
23 |
from bzrlib import ( |
24 |
errors, |
|
25 |
graph as _mod_graph, |
|
26 |
osutils, |
|
4454.3.3
by John Arbash Meinel
Start implementing the reannotation functionality directly. |
27 |
patiencediff, |
4454.3.21
by John Arbash Meinel
Assert that entries in the annotation cache also get cleaned up. |
28 |
ui, |
4454.3.1
by John Arbash Meinel
Initial api for Annotator. |
29 |
)
|
30 |
||
31 |
||
32 |
class Annotator(object): |
|
33 |
"""Class that drives performing annotations."""
|
|
34 |
||
35 |
def __init__(self, vf): |
|
36 |
"""Create a new Annotator from a VersionedFile."""
|
|
37 |
self._vf = vf |
|
4454.3.2
by John Arbash Meinel
Start moving bits into helper functions. Add tests for multiple revs. |
38 |
self._parent_map = {} |
4454.3.8
by John Arbash Meinel
Factor out the 'get the lines to annotate' into a helper. |
39 |
self._text_cache = {} |
4454.3.18
by John Arbash Meinel
Start tracking the number of children that need a given text. |
40 |
# Map from key => number of nexts that will be built from this key
|
41 |
self._num_needed_children = {} |
|
4454.3.3
by John Arbash Meinel
Start implementing the reannotation functionality directly. |
42 |
self._annotations_cache = {} |
4454.3.41
by John Arbash Meinel
Cache the heads provider as long as we know that the parent_map hasn't changed. |
43 |
self._heads_provider = None |
4454.3.73
by John Arbash Meinel
inherit from _annotator_py.Annotator in _annotator_pyx.Annotator. |
44 |
self._ann_tuple_cache = {} |
4454.3.1
by John Arbash Meinel
Initial api for Annotator. |
45 |
|
4454.3.61
by John Arbash Meinel
Start implementing an Annotator.add_special_text functionality. |
46 |
def _update_needed_children(self, key, parent_keys): |
47 |
for parent_key in parent_keys: |
|
48 |
if parent_key in self._num_needed_children: |
|
49 |
self._num_needed_children[parent_key] += 1 |
|
50 |
else: |
|
51 |
self._num_needed_children[parent_key] = 1 |
|
52 |
||
4454.3.18
by John Arbash Meinel
Start tracking the number of children that need a given text. |
53 |
def _get_needed_keys(self, key): |
4454.3.61
by John Arbash Meinel
Start implementing an Annotator.add_special_text functionality. |
54 |
"""Determine the texts we need to get from the backing vf.
|
55 |
||
56 |
:return: (vf_keys_needed, ann_keys_needed)
|
|
57 |
vf_keys_needed These are keys that we need to get from the vf
|
|
58 |
ann_keys_needed Texts which we have in self._text_cache but we
|
|
59 |
don't have annotations for. We need to yield these
|
|
60 |
in the proper order so that we can get proper
|
|
61 |
annotations.
|
|
62 |
"""
|
|
63 |
parent_map = self._parent_map |
|
4454.3.18
by John Arbash Meinel
Start tracking the number of children that need a given text. |
64 |
# We need 1 extra copy of the node we will be looking at when we are
|
65 |
# done
|
|
66 |
self._num_needed_children[key] = 1 |
|
4454.3.61
by John Arbash Meinel
Start implementing an Annotator.add_special_text functionality. |
67 |
vf_keys_needed = set() |
68 |
ann_keys_needed = set() |
|
69 |
needed_keys = set([key]) |
|
70 |
while needed_keys: |
|
71 |
parent_lookup = [] |
|
72 |
next_parent_map = {} |
|
73 |
for key in needed_keys: |
|
74 |
if key in self._parent_map: |
|
75 |
# We don't need to lookup this key in the vf
|
|
76 |
if key not in self._text_cache: |
|
77 |
# Extract this text from the vf
|
|
78 |
vf_keys_needed.add(key) |
|
79 |
elif key not in self._annotations_cache: |
|
80 |
# We do need to annotate
|
|
81 |
ann_keys_needed.add(key) |
|
82 |
next_parent_map[key] = self._parent_map[key] |
|
4454.3.18
by John Arbash Meinel
Start tracking the number of children that need a given text. |
83 |
else: |
4454.3.61
by John Arbash Meinel
Start implementing an Annotator.add_special_text functionality. |
84 |
parent_lookup.append(key) |
85 |
vf_keys_needed.add(key) |
|
86 |
needed_keys = set() |
|
87 |
next_parent_map.update(self._vf.get_parent_map(parent_lookup)) |
|
88 |
for key, parent_keys in next_parent_map.iteritems(): |
|
4454.3.66
by John Arbash Meinel
Implement no-graph support for the Python version. |
89 |
if parent_keys is None: # No graph versionedfile |
90 |
parent_keys = () |
|
91 |
next_parent_map[key] = () |
|
4454.3.61
by John Arbash Meinel
Start implementing an Annotator.add_special_text functionality. |
92 |
self._update_needed_children(key, parent_keys) |
93 |
needed_keys.update([key for key in parent_keys |
|
94 |
if key not in parent_map]) |
|
95 |
parent_map.update(next_parent_map) |
|
96 |
# _heads_provider does some graph caching, so it is only valid while
|
|
97 |
# self._parent_map hasn't changed
|
|
98 |
self._heads_provider = None |
|
99 |
return vf_keys_needed, ann_keys_needed |
|
4454.3.18
by John Arbash Meinel
Start tracking the number of children that need a given text. |
100 |
|
4454.3.21
by John Arbash Meinel
Assert that entries in the annotation cache also get cleaned up. |
101 |
def _get_needed_texts(self, key, pb=None): |
4454.3.8
by John Arbash Meinel
Factor out the 'get the lines to annotate' into a helper. |
102 |
"""Get the texts we need to properly annotate key.
|
103 |
||
104 |
:param key: A Key that is present in self._vf
|
|
105 |
:return: Yield (this_key, text, num_lines)
|
|
106 |
'text' is an opaque object that just has to work with whatever
|
|
107 |
matcher object we are using. Currently it is always 'lines' but
|
|
108 |
future improvements may change this to a simple text string.
|
|
109 |
"""
|
|
4454.3.61
by John Arbash Meinel
Start implementing an Annotator.add_special_text functionality. |
110 |
keys, ann_keys = self._get_needed_keys(key) |
4454.3.21
by John Arbash Meinel
Assert that entries in the annotation cache also get cleaned up. |
111 |
if pb is not None: |
112 |
pb.update('getting stream', 0, len(keys)) |
|
113 |
stream = self._vf.get_record_stream(keys, 'topological', True) |
|
114 |
for idx, record in enumerate(stream): |
|
115 |
if pb is not None: |
|
116 |
pb.update('extracting', 0, len(keys)) |
|
4454.3.61
by John Arbash Meinel
Start implementing an Annotator.add_special_text functionality. |
117 |
if record.storage_kind == 'absent': |
118 |
raise errors.RevisionNotPresent(record.key, self._vf) |
|
4454.3.8
by John Arbash Meinel
Factor out the 'get the lines to annotate' into a helper. |
119 |
this_key = record.key |
120 |
lines = osutils.chunks_to_lines(record.get_bytes_as('chunked')) |
|
121 |
num_lines = len(lines) |
|
4454.3.16
by John Arbash Meinel
Move more access patterns into helper functions. |
122 |
self._text_cache[this_key] = lines |
4454.3.8
by John Arbash Meinel
Factor out the 'get the lines to annotate' into a helper. |
123 |
yield this_key, lines, num_lines |
4454.3.61
by John Arbash Meinel
Start implementing an Annotator.add_special_text functionality. |
124 |
for key in ann_keys: |
125 |
lines = self._text_cache[key] |
|
126 |
num_lines = len(lines) |
|
127 |
yield key, lines, num_lines |
|
4454.3.2
by John Arbash Meinel
Start moving bits into helper functions. Add tests for multiple revs. |
128 |
|
4454.3.38
by John Arbash Meinel
Start using left-matching-blocks during the actual annotation. |
129 |
def _get_parent_annotations_and_matches(self, key, text, parent_key): |
4454.3.9
by John Arbash Meinel
Remove heads_provider, as we don't use it now. |
130 |
"""Get the list of annotations for the parent, and the matching lines.
|
4454.3.2
by John Arbash Meinel
Start moving bits into helper functions. Add tests for multiple revs. |
131 |
|
4454.3.9
by John Arbash Meinel
Remove heads_provider, as we don't use it now. |
132 |
:param text: The opaque value given by _get_needed_texts
|
133 |
:param parent_key: The key for the parent text
|
|
134 |
:return: (parent_annotations, matching_blocks)
|
|
135 |
parent_annotations is a list as long as the number of lines in
|
|
136 |
parent
|
|
137 |
matching_blocks is a list of (parent_idx, text_idx, len) tuples
|
|
138 |
indicating which lines match between the two texts
|
|
139 |
"""
|
|
4454.3.8
by John Arbash Meinel
Factor out the 'get the lines to annotate' into a helper. |
140 |
parent_lines = self._text_cache[parent_key] |
4454.3.3
by John Arbash Meinel
Start implementing the reannotation functionality directly. |
141 |
parent_annotations = self._annotations_cache[parent_key] |
142 |
# PatienceSequenceMatcher should probably be part of Policy
|
|
143 |
matcher = patiencediff.PatienceSequenceMatcher(None, |
|
4454.3.9
by John Arbash Meinel
Remove heads_provider, as we don't use it now. |
144 |
parent_lines, text) |
4454.3.3
by John Arbash Meinel
Start implementing the reannotation functionality directly. |
145 |
matching_blocks = matcher.get_matching_blocks() |
4454.3.4
by John Arbash Meinel
New work on how to resolve conflict lines. |
146 |
return parent_annotations, matching_blocks |
147 |
||
4454.3.73
by John Arbash Meinel
inherit from _annotator_py.Annotator in _annotator_pyx.Annotator. |
148 |
def _update_from_first_parent(self, key, annotations, lines, parent_key): |
4454.3.4
by John Arbash Meinel
New work on how to resolve conflict lines. |
149 |
"""Reannotate this text relative to its first parent."""
|
4454.3.75
by John Arbash Meinel
Move the core loops into module-level helpers. |
150 |
(parent_annotations, |
151 |
matching_blocks) = self._get_parent_annotations_and_matches( |
|
152 |
key, lines, parent_key) |
|
4454.3.3
by John Arbash Meinel
Start implementing the reannotation functionality directly. |
153 |
|
154 |
for parent_idx, lines_idx, match_len in matching_blocks: |
|
155 |
# For all matching regions we copy across the parent annotations
|
|
156 |
annotations[lines_idx:lines_idx + match_len] = \ |
|
157 |
parent_annotations[parent_idx:parent_idx + match_len] |
|
158 |
||
4454.3.38
by John Arbash Meinel
Start using left-matching-blocks during the actual annotation. |
159 |
def _update_from_other_parents(self, key, annotations, lines, |
160 |
this_annotation, parent_key): |
|
4454.3.4
by John Arbash Meinel
New work on how to resolve conflict lines. |
161 |
"""Reannotate this text relative to a second (or more) parent."""
|
4454.3.75
by John Arbash Meinel
Move the core loops into module-level helpers. |
162 |
(parent_annotations, |
163 |
matching_blocks) = self._get_parent_annotations_and_matches( |
|
164 |
key, lines, parent_key) |
|
4454.3.4
by John Arbash Meinel
New work on how to resolve conflict lines. |
165 |
|
4454.3.6
by John Arbash Meinel
Adding a trivial 'last_entry' cache drops the time from 56s down to 40s |
166 |
last_ann = None |
167 |
last_parent = None |
|
168 |
last_res = None |
|
4454.3.7
by John Arbash Meinel
Some minor changes |
169 |
# TODO: consider making all annotations unique and then using 'is'
|
170 |
# everywhere. Current results claim that isn't any faster,
|
|
171 |
# because of the time spent deduping
|
|
4454.3.21
by John Arbash Meinel
Assert that entries in the annotation cache also get cleaned up. |
172 |
# deduping also saves a bit of memory. For NEWS it saves ~1MB,
|
173 |
# but that is out of 200-300MB for extracting everything, so a
|
|
174 |
# fairly trivial amount
|
|
4454.3.4
by John Arbash Meinel
New work on how to resolve conflict lines. |
175 |
for parent_idx, lines_idx, match_len in matching_blocks: |
176 |
# For lines which match this parent, we will now resolve whether
|
|
177 |
# this parent wins over the current annotation
|
|
4454.3.40
by John Arbash Meinel
Shave a bit more time off by using subset matching to skip whole regions. |
178 |
ann_sub = annotations[lines_idx:lines_idx + match_len] |
179 |
par_sub = parent_annotations[parent_idx:parent_idx + match_len] |
|
180 |
if ann_sub == par_sub: |
|
181 |
continue
|
|
4454.3.4
by John Arbash Meinel
New work on how to resolve conflict lines. |
182 |
for idx in xrange(match_len): |
4454.3.40
by John Arbash Meinel
Shave a bit more time off by using subset matching to skip whole regions. |
183 |
ann = ann_sub[idx] |
184 |
par_ann = par_sub[idx] |
|
4454.3.4
by John Arbash Meinel
New work on how to resolve conflict lines. |
185 |
ann_idx = lines_idx + idx |
186 |
if ann == par_ann: |
|
187 |
# Nothing to change
|
|
188 |
continue
|
|
4454.3.7
by John Arbash Meinel
Some minor changes |
189 |
if ann == this_annotation: |
4454.3.4
by John Arbash Meinel
New work on how to resolve conflict lines. |
190 |
# Originally claimed 'this', but it was really in this
|
191 |
# parent
|
|
192 |
annotations[ann_idx] = par_ann |
|
193 |
continue
|
|
4454.3.7
by John Arbash Meinel
Some minor changes |
194 |
# Resolve the fact that both sides have a different value for
|
195 |
# last modified
|
|
4454.3.6
by John Arbash Meinel
Adding a trivial 'last_entry' cache drops the time from 56s down to 40s |
196 |
if ann == last_ann and par_ann == last_parent: |
197 |
annotations[ann_idx] = last_res |
|
198 |
else: |
|
199 |
new_ann = set(ann) |
|
200 |
new_ann.update(par_ann) |
|
201 |
new_ann = tuple(sorted(new_ann)) |
|
202 |
annotations[ann_idx] = new_ann |
|
203 |
last_ann = ann |
|
204 |
last_parent = par_ann |
|
205 |
last_res = new_ann |
|
4454.3.4
by John Arbash Meinel
New work on how to resolve conflict lines. |
206 |
|
4454.3.19
by John Arbash Meinel
Have _record_annotation start to remove texts when they are no longer needed. |
207 |
def _record_annotation(self, key, parent_keys, annotations): |
4454.3.16
by John Arbash Meinel
Move more access patterns into helper functions. |
208 |
self._annotations_cache[key] = annotations |
4454.3.19
by John Arbash Meinel
Have _record_annotation start to remove texts when they are no longer needed. |
209 |
for parent_key in parent_keys: |
210 |
num = self._num_needed_children[parent_key] |
|
211 |
num -= 1 |
|
212 |
if num == 0: |
|
213 |
del self._text_cache[parent_key] |
|
4454.3.21
by John Arbash Meinel
Assert that entries in the annotation cache also get cleaned up. |
214 |
del self._annotations_cache[parent_key] |
4454.3.19
by John Arbash Meinel
Have _record_annotation start to remove texts when they are no longer needed. |
215 |
# Do we want to clean up _num_needed_children at this point as
|
216 |
# well?
|
|
217 |
self._num_needed_children[parent_key] = num |
|
4454.3.16
by John Arbash Meinel
Move more access patterns into helper functions. |
218 |
|
4454.3.22
by John Arbash Meinel
Need to record the other annotations before we can record this, |
219 |
def _annotate_one(self, key, text, num_lines): |
220 |
this_annotation = (key,) |
|
221 |
# Note: annotations will be mutated by calls to _update_from*
|
|
222 |
annotations = [this_annotation] * num_lines |
|
223 |
parent_keys = self._parent_map[key] |
|
224 |
if parent_keys: |
|
4454.3.73
by John Arbash Meinel
inherit from _annotator_py.Annotator in _annotator_pyx.Annotator. |
225 |
self._update_from_first_parent(key, annotations, text, |
226 |
parent_keys[0]) |
|
4454.3.22
by John Arbash Meinel
Need to record the other annotations before we can record this, |
227 |
for parent in parent_keys[1:]: |
4454.3.38
by John Arbash Meinel
Start using left-matching-blocks during the actual annotation. |
228 |
self._update_from_other_parents(key, annotations, text, |
4454.3.22
by John Arbash Meinel
Need to record the other annotations before we can record this, |
229 |
this_annotation, parent) |
230 |
self._record_annotation(key, parent_keys, annotations) |
|
231 |
||
4454.3.61
by John Arbash Meinel
Start implementing an Annotator.add_special_text functionality. |
232 |
def add_special_text(self, key, parent_keys, text): |
4454.3.74
by John Arbash Meinel
Some small tweaks, add more documentation for 'add_special_text'. |
233 |
"""Add a specific text to the graph.
|
234 |
||
235 |
This is used to add a text which is not otherwise present in the
|
|
236 |
versioned file. (eg. a WorkingTree injecting 'current:' into the
|
|
237 |
graph to annotate the edited content.)
|
|
238 |
||
239 |
:param key: The key to use to request this text be annotated
|
|
240 |
:param parent_keys: The parents of this text
|
|
241 |
:param text: A string containing the content of the text
|
|
242 |
"""
|
|
4454.3.61
by John Arbash Meinel
Start implementing an Annotator.add_special_text functionality. |
243 |
self._parent_map[key] = parent_keys |
244 |
self._text_cache[key] = osutils.split_lines(text) |
|
245 |
self._heads_provider = None |
|
246 |
||
4454.3.2
by John Arbash Meinel
Start moving bits into helper functions. Add tests for multiple revs. |
247 |
def annotate(self, key): |
4454.3.75
by John Arbash Meinel
Move the core loops into module-level helpers. |
248 |
"""Return annotated fulltext for the given key.
|
249 |
||
250 |
:param key: A tuple defining the text to annotate
|
|
251 |
:return: ([annotations], [lines])
|
|
252 |
annotations is a list of tuples of keys, one for each line in lines
|
|
253 |
each key is a possible source for the given line.
|
|
254 |
lines the text of "key" as a list of lines
|
|
255 |
"""
|
|
4454.3.21
by John Arbash Meinel
Assert that entries in the annotation cache also get cleaned up. |
256 |
pb = ui.ui_factory.nested_progress_bar() |
257 |
try: |
|
258 |
for text_key, text, num_lines in self._get_needed_texts(key, pb=pb): |
|
4454.3.22
by John Arbash Meinel
Need to record the other annotations before we can record this, |
259 |
self._annotate_one(text_key, text, num_lines) |
4454.3.21
by John Arbash Meinel
Assert that entries in the annotation cache also get cleaned up. |
260 |
finally: |
261 |
pb.finished() |
|
4454.3.1
by John Arbash Meinel
Initial api for Annotator. |
262 |
try: |
4454.3.3
by John Arbash Meinel
Start implementing the reannotation functionality directly. |
263 |
annotations = self._annotations_cache[key] |
264 |
except KeyError: |
|
4454.3.1
by John Arbash Meinel
Initial api for Annotator. |
265 |
raise errors.RevisionNotPresent(key, self._vf) |
4454.3.8
by John Arbash Meinel
Factor out the 'get the lines to annotate' into a helper. |
266 |
return annotations, self._text_cache[key] |
4454.3.10
by John Arbash Meinel
Start working on 'annotate_flat' which conforms to the original spec. |
267 |
|
4454.3.41
by John Arbash Meinel
Cache the heads provider as long as we know that the parent_map hasn't changed. |
268 |
def _get_heads_provider(self): |
269 |
if self._heads_provider is None: |
|
270 |
self._heads_provider = _mod_graph.KnownGraph(self._parent_map) |
|
271 |
return self._heads_provider |
|
272 |
||
4454.3.77
by John Arbash Meinel
Add support for compatibility with old '_break_annotation_tie' function. |
273 |
def _resolve_annotation_tie(self, the_heads, line, tiebreaker): |
274 |
if tiebreaker is None: |
|
275 |
head = sorted(the_heads)[0] |
|
276 |
else: |
|
277 |
# Backwards compatibility, break up the heads into pairs and
|
|
278 |
# resolve the result
|
|
279 |
next_head = iter(the_heads) |
|
280 |
head = next_head.next() |
|
281 |
for possible_head in next_head: |
|
282 |
annotated_lines = ((head, line), (possible_head, line)) |
|
283 |
head = tiebreaker(annotated_lines)[0] |
|
284 |
return head |
|
285 |
||
4454.3.10
by John Arbash Meinel
Start working on 'annotate_flat' which conforms to the original spec. |
286 |
def annotate_flat(self, key): |
287 |
"""Determine the single-best-revision to source for each line.
|
|
288 |
||
289 |
This is meant as a compatibility thunk to how annotate() used to work.
|
|
4454.3.75
by John Arbash Meinel
Move the core loops into module-level helpers. |
290 |
:return: [(ann_key, line)]
|
291 |
A list of tuples with a single annotation key for each line.
|
|
4454.3.10
by John Arbash Meinel
Start working on 'annotate_flat' which conforms to the original spec. |
292 |
"""
|
4454.3.77
by John Arbash Meinel
Add support for compatibility with old '_break_annotation_tie' function. |
293 |
custom_tiebreaker = annotate._break_annotation_tie |
4454.3.10
by John Arbash Meinel
Start working on 'annotate_flat' which conforms to the original spec. |
294 |
annotations, lines = self.annotate(key) |
295 |
out = [] |
|
4454.3.41
by John Arbash Meinel
Cache the heads provider as long as we know that the parent_map hasn't changed. |
296 |
heads = self._get_heads_provider().heads |
4454.3.13
by John Arbash Meinel
A bit of simplification to the annotate_flat logic. |
297 |
append = out.append |
4454.3.10
by John Arbash Meinel
Start working on 'annotate_flat' which conforms to the original spec. |
298 |
for annotation, line in zip(annotations, lines): |
299 |
if len(annotation) == 1: |
|
4454.3.77
by John Arbash Meinel
Add support for compatibility with old '_break_annotation_tie' function. |
300 |
head = annotation[0] |
4454.3.12
by John Arbash Meinel
Finish fleshing out the ability to determine a revision after conflicts. |
301 |
else: |
302 |
the_heads = heads(annotation) |
|
303 |
if len(the_heads) == 1: |
|
4454.3.73
by John Arbash Meinel
inherit from _annotator_py.Annotator in _annotator_pyx.Annotator. |
304 |
for head in the_heads: break # get the item out of the set |
4454.3.12
by John Arbash Meinel
Finish fleshing out the ability to determine a revision after conflicts. |
305 |
else: |
4454.3.77
by John Arbash Meinel
Add support for compatibility with old '_break_annotation_tie' function. |
306 |
head = self._resolve_annotation_tie(the_heads, line, |
307 |
custom_tiebreaker) |
|
308 |
append((head, line)) |
|
4454.3.10
by John Arbash Meinel
Start working on 'annotate_flat' which conforms to the original spec. |
309 |
return out |