4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
1 |
# Copyright (C) 2009 Canonical Ltd
|
2 |
#
|
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
16 |
||
17 |
"""Functionality for doing annotations in the 'optimal' way"""
|
|
18 |
||
4454.3.44
by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent. |
19 |
cdef extern from "python-compat.h": |
20 |
pass
|
|
21 |
||
22 |
cdef extern from "Python.h": |
|
23 |
ctypedef int Py_ssize_t |
|
24 |
ctypedef struct PyObject: |
|
25 |
pass
|
|
4454.3.54
by John Arbash Meinel
Access the list object directly, and copy the pointers. |
26 |
ctypedef struct PyListObject: |
27 |
PyObject **ob_item |
|
4454.3.44
by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent. |
28 |
int PyList_CheckExact(object) |
29 |
PyObject *PyList_GET_ITEM(object, Py_ssize_t o) |
|
30 |
Py_ssize_t PyList_GET_SIZE(object) |
|
4454.3.46
by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples. |
31 |
int PyList_Append(object, object) except -1 |
4454.3.44
by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent. |
32 |
int PyList_SetItem(object, Py_ssize_t o, object) except -1 |
4454.3.46
by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples. |
33 |
int PyList_Sort(object) except -1 |
34 |
||
35 |
int PyTuple_CheckExact(object) |
|
36 |
object PyTuple_New(Py_ssize_t len) |
|
37 |
void PyTuple_SET_ITEM(object, Py_ssize_t pos, object) |
|
4454.3.50
by John Arbash Meinel
Some internal cleanup, a few more counters. |
38 |
void PyTuple_SET_ITEM_ptr "PyTuple_SET_ITEM" (object, Py_ssize_t, |
39 |
PyObject *) |
|
4454.3.46
by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples. |
40 |
int PyTuple_Resize(PyObject **, Py_ssize_t newlen) |
41 |
PyObject *PyTuple_GET_ITEM(object, Py_ssize_t o) |
|
42 |
Py_ssize_t PyTuple_GET_SIZE(object) |
|
43 |
||
44 |
PyObject *PyDict_GetItem(object d, object k) |
|
45 |
int PyDict_SetItem(object d, object k, object v) except -1 |
|
46 |
||
4454.3.44
by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent. |
47 |
void Py_INCREF(object) |
4454.3.50
by John Arbash Meinel
Some internal cleanup, a few more counters. |
48 |
void Py_INCREF_ptr "Py_INCREF" (PyObject *) |
4454.3.54
by John Arbash Meinel
Access the list object directly, and copy the pointers. |
49 |
void Py_DECREF_ptr "Py_DECREF" (PyObject *) |
4454.3.44
by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent. |
50 |
|
4454.3.45
by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s |
51 |
int Py_EQ |
4454.3.50
by John Arbash Meinel
Some internal cleanup, a few more counters. |
52 |
int Py_LT |
4454.3.45
by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s |
53 |
int PyObject_RichCompareBool(object, object, int opid) except -1 |
4454.3.50
by John Arbash Meinel
Some internal cleanup, a few more counters. |
54 |
int PyObject_RichCompareBool_ptr "PyObject_RichCompareBool" ( |
55 |
PyObject *, PyObject *, int opid) |
|
4454.3.44
by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent. |
56 |
|
4454.3.55
by John Arbash Meinel
Remove some debugging counters. |
57 |
|
4454.3.73
by John Arbash Meinel
inherit from _annotator_py.Annotator in _annotator_pyx.Annotator. |
58 |
from bzrlib import _annotator_py |
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
59 |
|
60 |
||
4454.3.45
by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s |
61 |
cdef int _check_annotations_are_lists(annotations, |
62 |
parent_annotations) except -1: |
|
63 |
if not PyList_CheckExact(annotations): |
|
64 |
raise TypeError('annotations must be a list') |
|
65 |
if not PyList_CheckExact(parent_annotations): |
|
66 |
raise TypeError('parent_annotations must be a list') |
|
67 |
return 0 |
|
68 |
||
69 |
||
70 |
cdef int _check_match_ranges(parent_annotations, annotations, |
|
4454.3.54
by John Arbash Meinel
Access the list object directly, and copy the pointers. |
71 |
Py_ssize_t parent_idx, Py_ssize_t lines_idx, |
72 |
Py_ssize_t match_len) except -1: |
|
4454.3.45
by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s |
73 |
if parent_idx + match_len > PyList_GET_SIZE(parent_annotations): |
74 |
raise ValueError('Match length exceeds len of' |
|
75 |
' parent_annotations %s > %s' |
|
76 |
% (parent_idx + match_len, |
|
77 |
PyList_GET_SIZE(parent_annotations))) |
|
78 |
if lines_idx + match_len > PyList_GET_SIZE(annotations): |
|
79 |
raise ValueError('Match length exceeds len of' |
|
80 |
' annotations %s > %s' |
|
81 |
% (lines_idx + match_len, |
|
82 |
PyList_GET_SIZE(annotations))) |
|
83 |
return 0 |
|
84 |
||
85 |
||
4454.3.50
by John Arbash Meinel
Some internal cleanup, a few more counters. |
86 |
cdef PyObject *_next_tuple_entry(object tpl, Py_ssize_t *pos): |
87 |
pos[0] = pos[0] + 1 |
|
88 |
if pos[0] >= PyTuple_GET_SIZE(tpl): |
|
89 |
return NULL |
|
90 |
return PyTuple_GET_ITEM(tpl, pos[0]) |
|
91 |
||
92 |
||
4454.3.46
by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples. |
93 |
cdef object _combine_annotations(ann_one, ann_two, cache): |
94 |
"""Combine the annotations from both sides."""
|
|
95 |
cdef Py_ssize_t pos_one, pos_two, len_one, len_two |
|
96 |
cdef Py_ssize_t out_pos |
|
4454.3.50
by John Arbash Meinel
Some internal cleanup, a few more counters. |
97 |
cdef PyObject *temp, *left, *right |
4454.3.46
by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples. |
98 |
|
4454.3.51
by John Arbash Meinel
Using a global cache gives us key uniqueness. |
99 |
if (PyObject_RichCompareBool(ann_one, ann_two, Py_LT)): |
100 |
cache_key = (ann_one, ann_two) |
|
101 |
else: |
|
102 |
cache_key = (ann_two, ann_one) |
|
4454.3.46
by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples. |
103 |
temp = PyDict_GetItem(cache, cache_key) |
104 |
if temp != NULL: |
|
105 |
return <object>temp |
|
106 |
||
107 |
if not PyTuple_CheckExact(ann_one) or not PyTuple_CheckExact(ann_two): |
|
108 |
raise TypeError('annotations must be tuples') |
|
109 |
# We know that annotations are tuples, and that both sides are already
|
|
110 |
# sorted, so we can just walk and update a new list.
|
|
4454.3.50
by John Arbash Meinel
Some internal cleanup, a few more counters. |
111 |
pos_one = -1 |
112 |
pos_two = -1 |
|
4454.3.46
by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples. |
113 |
out_pos = 0 |
4454.3.50
by John Arbash Meinel
Some internal cleanup, a few more counters. |
114 |
left = _next_tuple_entry(ann_one, &pos_one) |
115 |
right = _next_tuple_entry(ann_two, &pos_two) |
|
116 |
new_ann = PyTuple_New(PyTuple_GET_SIZE(ann_one) |
|
117 |
+ PyTuple_GET_SIZE(ann_two)) |
|
118 |
while left != NULL and right != NULL: |
|
4454.3.74
by John Arbash Meinel
Some small tweaks, add more documentation for 'add_special_text'. |
119 |
# left == right is done by PyObject_RichCompareBool_ptr, however it
|
120 |
# avoids a function call for a very common case. Drops 'time bzr
|
|
121 |
# annotate NEWS' from 7.25s to 7.16s, so it *is* a visible impact.
|
|
122 |
if (left == right |
|
123 |
or PyObject_RichCompareBool_ptr(left, right, Py_EQ)): |
|
4454.3.50
by John Arbash Meinel
Some internal cleanup, a few more counters. |
124 |
# Identical values, step both
|
125 |
Py_INCREF_ptr(left) |
|
126 |
PyTuple_SET_ITEM_ptr(new_ann, out_pos, left) |
|
127 |
left = _next_tuple_entry(ann_one, &pos_one) |
|
128 |
right = _next_tuple_entry(ann_two, &pos_two) |
|
129 |
elif (PyObject_RichCompareBool_ptr(left, right, Py_LT)): |
|
130 |
# left < right or right == NULL
|
|
131 |
Py_INCREF_ptr(left) |
|
132 |
PyTuple_SET_ITEM_ptr(new_ann, out_pos, left) |
|
133 |
left = _next_tuple_entry(ann_one, &pos_one) |
|
134 |
else: # right < left or left == NULL |
|
135 |
Py_INCREF_ptr(right) |
|
136 |
PyTuple_SET_ITEM_ptr(new_ann, out_pos, right) |
|
137 |
right = _next_tuple_entry(ann_two, &pos_two) |
|
138 |
out_pos = out_pos + 1 |
|
139 |
while left != NULL: |
|
140 |
Py_INCREF_ptr(left) |
|
141 |
PyTuple_SET_ITEM_ptr(new_ann, out_pos, left) |
|
142 |
left = _next_tuple_entry(ann_one, &pos_one) |
|
143 |
out_pos = out_pos + 1 |
|
144 |
while right != NULL: |
|
145 |
Py_INCREF_ptr(right) |
|
146 |
PyTuple_SET_ITEM_ptr(new_ann, out_pos, right) |
|
147 |
right = _next_tuple_entry(ann_two, &pos_two) |
|
148 |
out_pos = out_pos + 1 |
|
149 |
if out_pos != PyTuple_GET_SIZE(new_ann): |
|
150 |
# Timing _PyTuple_Resize was not significantly faster that slicing
|
|
4454.3.46
by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples. |
151 |
# PyTuple_Resize((<PyObject **>new_ann), out_pos)
|
152 |
new_ann = new_ann[0:out_pos] |
|
153 |
PyDict_SetItem(cache, cache_key, new_ann) |
|
154 |
return new_ann |
|
155 |
||
156 |
||
4454.3.75
by John Arbash Meinel
Move the core loops into module-level helpers. |
157 |
cdef int _apply_parent_annotations(annotations, parent_annotations, |
158 |
matching_blocks) except -1: |
|
4454.3.54
by John Arbash Meinel
Access the list object directly, and copy the pointers. |
159 |
"""Apply the annotations from parent_annotations into annotations.
|
160 |
||
161 |
matching_blocks defines the ranges that match.
|
|
162 |
"""
|
|
163 |
cdef Py_ssize_t parent_idx, lines_idx, match_len, idx |
|
164 |
cdef PyListObject *par_list, *ann_list |
|
165 |
cdef PyObject **par_temp, **ann_temp |
|
166 |
||
167 |
_check_annotations_are_lists(annotations, parent_annotations) |
|
168 |
par_list = <PyListObject *>parent_annotations |
|
169 |
ann_list = <PyListObject *>annotations |
|
4454.3.55
by John Arbash Meinel
Remove some debugging counters. |
170 |
# For NEWS and bzrlib/builtins.py, over 99% of the lines are simply copied
|
171 |
# across from the parent entry. So this routine is heavily optimized for
|
|
172 |
# that. Would be interesting if we could use memcpy() but we have to incref
|
|
173 |
# and decref
|
|
4454.3.54
by John Arbash Meinel
Access the list object directly, and copy the pointers. |
174 |
for parent_idx, lines_idx, match_len in matching_blocks: |
175 |
_check_match_ranges(parent_annotations, annotations, |
|
176 |
parent_idx, lines_idx, match_len) |
|
177 |
par_temp = par_list.ob_item + parent_idx |
|
178 |
ann_temp = ann_list.ob_item + lines_idx |
|
179 |
for idx from 0 <= idx < match_len: |
|
180 |
Py_INCREF_ptr(par_temp[idx]) |
|
181 |
Py_DECREF_ptr(ann_temp[idx]) |
|
182 |
ann_temp[idx] = par_temp[idx] |
|
4454.3.75
by John Arbash Meinel
Move the core loops into module-level helpers. |
183 |
return 0 |
184 |
||
185 |
||
186 |
cdef int _merge_annotations(this_annotation, annotations, parent_annotations, |
|
187 |
matching_blocks, ann_cache) except -1: |
|
188 |
cdef Py_ssize_t parent_idx, ann_idx, lines_idx, match_len, idx |
|
189 |
cdef Py_ssize_t pos |
|
190 |
cdef PyObject *ann_temp, *par_temp |
|
191 |
||
192 |
_check_annotations_are_lists(annotations, parent_annotations) |
|
193 |
last_ann = None |
|
194 |
last_parent = None |
|
195 |
last_res = None |
|
196 |
for parent_idx, lines_idx, match_len in matching_blocks: |
|
197 |
_check_match_ranges(parent_annotations, annotations, |
|
198 |
parent_idx, lines_idx, match_len) |
|
199 |
# For lines which match this parent, we will now resolve whether
|
|
200 |
# this parent wins over the current annotation
|
|
201 |
for idx from 0 <= idx < match_len: |
|
202 |
ann_idx = lines_idx + idx |
|
203 |
ann_temp = PyList_GET_ITEM(annotations, ann_idx) |
|
204 |
par_temp = PyList_GET_ITEM(parent_annotations, parent_idx + idx) |
|
205 |
if (ann_temp == par_temp): |
|
206 |
# This is parent, do nothing
|
|
207 |
# Pointer comparison is fine here. Value comparison would
|
|
208 |
# be ok, but it will be handled in the final if clause by
|
|
209 |
# merging the two tuples into the same tuple
|
|
210 |
# Avoiding the Py_INCREF and function call to
|
|
211 |
# PyObject_RichCompareBool using pointer comparison drops
|
|
212 |
# timing from 215ms => 125ms
|
|
213 |
continue
|
|
214 |
par_ann = <object>par_temp |
|
215 |
ann = <object>ann_temp |
|
216 |
if (ann is this_annotation): |
|
217 |
# Originally claimed 'this', but it was really in this
|
|
218 |
# parent
|
|
219 |
Py_INCREF(par_ann) |
|
220 |
PyList_SetItem(annotations, ann_idx, par_ann) |
|
221 |
continue
|
|
222 |
# Resolve the fact that both sides have a different value for
|
|
223 |
# last modified
|
|
224 |
if (ann is last_ann and par_ann is last_parent): |
|
225 |
Py_INCREF(last_res) |
|
226 |
PyList_SetItem(annotations, ann_idx, last_res) |
|
227 |
else: |
|
228 |
new_ann = _combine_annotations(ann, par_ann, ann_cache) |
|
229 |
Py_INCREF(new_ann) |
|
230 |
PyList_SetItem(annotations, ann_idx, new_ann) |
|
231 |
last_ann = ann |
|
232 |
last_parent = par_ann |
|
233 |
last_res = new_ann |
|
4454.3.76
by John Arbash Meinel
Always return a value, although Pyrex takes care of that for you if you let it. |
234 |
return 0 |
4454.3.54
by John Arbash Meinel
Access the list object directly, and copy the pointers. |
235 |
|
236 |
||
4454.3.73
by John Arbash Meinel
inherit from _annotator_py.Annotator in _annotator_pyx.Annotator. |
237 |
class Annotator(_annotator_py.Annotator): |
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
238 |
"""Class that drives performing annotations."""
|
239 |
||
4454.3.73
by John Arbash Meinel
inherit from _annotator_py.Annotator in _annotator_pyx.Annotator. |
240 |
def _update_from_first_parent(self, key, annotations, lines, parent_key): |
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
241 |
"""Reannotate this text relative to its first parent."""
|
4454.3.75
by John Arbash Meinel
Move the core loops into module-level helpers. |
242 |
(parent_annotations, |
243 |
matching_blocks) = self._get_parent_annotations_and_matches( |
|
244 |
key, lines, parent_key) |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
245 |
|
4454.3.54
by John Arbash Meinel
Access the list object directly, and copy the pointers. |
246 |
_apply_parent_annotations(annotations, parent_annotations, |
247 |
matching_blocks) |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
248 |
|
249 |
def _update_from_other_parents(self, key, annotations, lines, |
|
250 |
this_annotation, parent_key): |
|
251 |
"""Reannotate this text relative to a second (or more) parent."""
|
|
4454.3.75
by John Arbash Meinel
Move the core loops into module-level helpers. |
252 |
(parent_annotations, |
253 |
matching_blocks) = self._get_parent_annotations_and_matches( |
|
254 |
key, lines, parent_key) |
|
255 |
_merge_annotations(this_annotation, annotations, parent_annotations, |
|
256 |
matching_blocks, self._ann_tuple_cache) |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
257 |
|
258 |
def annotate_flat(self, key): |
|
259 |
"""Determine the single-best-revision to source for each line.
|
|
260 |
||
261 |
This is meant as a compatibility thunk to how annotate() used to work.
|
|
262 |
"""
|
|
4454.3.55
by John Arbash Meinel
Remove some debugging counters. |
263 |
cdef Py_ssize_t pos, num_lines |
4454.3.77
by John Arbash Meinel
Add support for compatibility with old '_break_annotation_tie' function. |
264 |
|
265 |
from bzrlib import annotate |
|
266 |
||
267 |
custom_tiebreaker = annotate._break_annotation_tie |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
268 |
annotations, lines = self.annotate(key) |
4454.3.55
by John Arbash Meinel
Remove some debugging counters. |
269 |
num_lines = len(lines) |
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
270 |
out = [] |
271 |
heads = self._get_heads_provider().heads |
|
4454.3.55
by John Arbash Meinel
Remove some debugging counters. |
272 |
for pos from 0 <= pos < num_lines: |
273 |
annotation = annotations[pos] |
|
274 |
line = lines[pos] |
|
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
275 |
if len(annotation) == 1: |
4454.3.55
by John Arbash Meinel
Remove some debugging counters. |
276 |
head = annotation[0] |
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
277 |
else: |
278 |
the_heads = heads(annotation) |
|
279 |
if len(the_heads) == 1: |
|
4454.3.73
by John Arbash Meinel
inherit from _annotator_py.Annotator in _annotator_pyx.Annotator. |
280 |
for head in the_heads: break # get the item out of the set |
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
281 |
else: |
282 |
# We need to resolve the ambiguity, for now just pick the
|
|
283 |
# sorted smallest
|
|
4454.3.77
by John Arbash Meinel
Add support for compatibility with old '_break_annotation_tie' function. |
284 |
head = self._resolve_annotation_tie(the_heads, line, |
285 |
custom_tiebreaker) |
|
4454.3.55
by John Arbash Meinel
Remove some debugging counters. |
286 |
PyList_Append(out, (head, line)) |
4454.3.43
by John Arbash Meinel
Initial implementation of a Pyrex annotator. |
287 |
return out |