~bzr-pqm/bzr/bzr.dev

4763.2.4 by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry.
1
# Copyright (C) 2009, 2010 Canonical Ltd
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
17
"""Functionality for doing annotations in the 'optimal' way"""
18
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
19
cdef extern from "python-compat.h":
20
    pass
21
22
cdef extern from "Python.h":
23
    ctypedef int Py_ssize_t
24
    ctypedef struct PyObject:
25
        pass
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
26
    ctypedef struct PyListObject:
27
        PyObject **ob_item
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
28
    int PyList_CheckExact(object)
29
    PyObject *PyList_GET_ITEM(object, Py_ssize_t o)
30
    Py_ssize_t PyList_GET_SIZE(object)
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
31
    int PyList_Append(object, object) except -1
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
32
    int PyList_SetItem(object, Py_ssize_t o, object) except -1
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
33
    int PyList_Sort(object) except -1
34
35
    int PyTuple_CheckExact(object)
36
    object PyTuple_New(Py_ssize_t len)
37
    void PyTuple_SET_ITEM(object, Py_ssize_t pos, object)
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
38
    void PyTuple_SET_ITEM_ptr "PyTuple_SET_ITEM" (object, Py_ssize_t,
39
                                                  PyObject *)
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
40
    int PyTuple_Resize(PyObject **, Py_ssize_t newlen)
41
    PyObject *PyTuple_GET_ITEM(object, Py_ssize_t o)
42
    Py_ssize_t PyTuple_GET_SIZE(object)
43
44
    PyObject *PyDict_GetItem(object d, object k)
45
    int PyDict_SetItem(object d, object k, object v) except -1
46
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
47
    void Py_INCREF(object)
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
48
    void Py_INCREF_ptr "Py_INCREF" (PyObject *)
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
49
    void Py_DECREF_ptr "Py_DECREF" (PyObject *)
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
50
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
51
    int Py_EQ
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
52
    int Py_LT
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
53
    int PyObject_RichCompareBool(object, object, int opid) except -1
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
54
    int PyObject_RichCompareBool_ptr "PyObject_RichCompareBool" (
55
        PyObject *, PyObject *, int opid)
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
56
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
57
4454.3.73 by John Arbash Meinel
inherit from _annotator_py.Annotator in _annotator_pyx.Annotator.
58
from bzrlib import _annotator_py
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
59
60
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
61
cdef int _check_annotations_are_lists(annotations,
62
                                      parent_annotations) except -1:
63
    if not PyList_CheckExact(annotations):
64
        raise TypeError('annotations must be a list')
65
    if not PyList_CheckExact(parent_annotations):
66
        raise TypeError('parent_annotations must be a list')
67
    return 0
68
69
70
cdef int _check_match_ranges(parent_annotations, annotations,
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
71
                             Py_ssize_t parent_idx, Py_ssize_t lines_idx,
72
                             Py_ssize_t match_len) except -1:
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
73
    if parent_idx + match_len > PyList_GET_SIZE(parent_annotations):
74
        raise ValueError('Match length exceeds len of'
75
                         ' parent_annotations %s > %s'
76
                         % (parent_idx + match_len,
77
                            PyList_GET_SIZE(parent_annotations)))
78
    if lines_idx + match_len > PyList_GET_SIZE(annotations):
79
        raise ValueError('Match length exceeds len of'
80
                         ' annotations %s > %s'
81
                         % (lines_idx + match_len,
82
                            PyList_GET_SIZE(annotations)))
83
    return 0
84
85
4634.117.10 by John Arbash Meinel
Change 'no except' to 'cannot_raise'
86
cdef PyObject *_next_tuple_entry(object tpl, Py_ssize_t *pos): # cannot_raise
4634.117.4 by John Arbash Meinel
Check _annotator_pyx for missing except clauses.
87
    """Return the next entry from this tuple.
88
89
    :param tpl: The tuple we are investigating, *must* be a PyTuple
90
    :param pos: The last item we found. Will be updated to the new position.
91
    
92
    This cannot raise an exception, as it does no error checking.
93
    """
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
94
    pos[0] = pos[0] + 1
95
    if pos[0] >= PyTuple_GET_SIZE(tpl):
96
        return NULL
97
    return PyTuple_GET_ITEM(tpl, pos[0])
98
99
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
100
cdef object _combine_annotations(ann_one, ann_two, cache):
101
    """Combine the annotations from both sides."""
102
    cdef Py_ssize_t pos_one, pos_two, len_one, len_two
103
    cdef Py_ssize_t out_pos
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
104
    cdef PyObject *temp, *left, *right
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
105
4454.3.51 by John Arbash Meinel
Using a global cache gives us key uniqueness.
106
    if (PyObject_RichCompareBool(ann_one, ann_two, Py_LT)):
107
        cache_key = (ann_one, ann_two)
108
    else:
109
        cache_key = (ann_two, ann_one)
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
110
    temp = PyDict_GetItem(cache, cache_key)
111
    if temp != NULL:
112
        return <object>temp
113
114
    if not PyTuple_CheckExact(ann_one) or not PyTuple_CheckExact(ann_two):
115
        raise TypeError('annotations must be tuples')
116
    # We know that annotations are tuples, and that both sides are already
117
    # sorted, so we can just walk and update a new list.
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
118
    pos_one = -1
119
    pos_two = -1
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
120
    out_pos = 0
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
121
    left = _next_tuple_entry(ann_one, &pos_one)
122
    right = _next_tuple_entry(ann_two, &pos_two)
123
    new_ann = PyTuple_New(PyTuple_GET_SIZE(ann_one)
124
                          + PyTuple_GET_SIZE(ann_two))
125
    while left != NULL and right != NULL:
4454.3.74 by John Arbash Meinel
Some small tweaks, add more documentation for 'add_special_text'.
126
        # left == right is done by PyObject_RichCompareBool_ptr, however it
127
        # avoids a function call for a very common case. Drops 'time bzr
128
        # annotate NEWS' from 7.25s to 7.16s, so it *is* a visible impact.
129
        if (left == right
130
            or PyObject_RichCompareBool_ptr(left, right, Py_EQ)):
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
131
            # Identical values, step both
132
            Py_INCREF_ptr(left)
133
            PyTuple_SET_ITEM_ptr(new_ann, out_pos, left)
134
            left = _next_tuple_entry(ann_one, &pos_one)
135
            right = _next_tuple_entry(ann_two, &pos_two)
136
        elif (PyObject_RichCompareBool_ptr(left, right, Py_LT)):
137
            # left < right or right == NULL
138
            Py_INCREF_ptr(left)
139
            PyTuple_SET_ITEM_ptr(new_ann, out_pos, left)
140
            left = _next_tuple_entry(ann_one, &pos_one)
141
        else: # right < left or left == NULL
142
            Py_INCREF_ptr(right)
143
            PyTuple_SET_ITEM_ptr(new_ann, out_pos, right)
144
            right = _next_tuple_entry(ann_two, &pos_two)
145
        out_pos = out_pos + 1
146
    while left != NULL:
147
        Py_INCREF_ptr(left)
148
        PyTuple_SET_ITEM_ptr(new_ann, out_pos, left)
149
        left = _next_tuple_entry(ann_one, &pos_one)
150
        out_pos = out_pos + 1
151
    while right != NULL:
152
        Py_INCREF_ptr(right)
153
        PyTuple_SET_ITEM_ptr(new_ann, out_pos, right)
154
        right = _next_tuple_entry(ann_two, &pos_two)
155
        out_pos = out_pos + 1
156
    if out_pos != PyTuple_GET_SIZE(new_ann):
157
        # Timing _PyTuple_Resize was not significantly faster that slicing
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
158
        # PyTuple_Resize((<PyObject **>new_ann), out_pos)
159
        new_ann = new_ann[0:out_pos]
160
    PyDict_SetItem(cache, cache_key, new_ann)
161
    return new_ann
162
163
4454.3.75 by John Arbash Meinel
Move the core loops into module-level helpers.
164
cdef int _apply_parent_annotations(annotations, parent_annotations,
165
                                   matching_blocks) except -1:
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
166
    """Apply the annotations from parent_annotations into annotations.
167
168
    matching_blocks defines the ranges that match.
169
    """
170
    cdef Py_ssize_t parent_idx, lines_idx, match_len, idx
171
    cdef PyListObject *par_list, *ann_list
172
    cdef PyObject **par_temp, **ann_temp
173
174
    _check_annotations_are_lists(annotations, parent_annotations)
175
    par_list = <PyListObject *>parent_annotations
176
    ann_list = <PyListObject *>annotations
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
177
    # For NEWS and bzrlib/builtins.py, over 99% of the lines are simply copied
178
    # across from the parent entry. So this routine is heavily optimized for
179
    # that. Would be interesting if we could use memcpy() but we have to incref
180
    # and decref
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
181
    for parent_idx, lines_idx, match_len in matching_blocks:
182
        _check_match_ranges(parent_annotations, annotations,
183
                            parent_idx, lines_idx, match_len)
184
        par_temp = par_list.ob_item + parent_idx
185
        ann_temp = ann_list.ob_item + lines_idx
186
        for idx from 0 <= idx < match_len:
187
            Py_INCREF_ptr(par_temp[idx])
188
            Py_DECREF_ptr(ann_temp[idx])
189
            ann_temp[idx] = par_temp[idx]
4454.3.75 by John Arbash Meinel
Move the core loops into module-level helpers.
190
    return 0
191
192
193
cdef int _merge_annotations(this_annotation, annotations, parent_annotations,
194
                            matching_blocks, ann_cache) except -1:
195
    cdef Py_ssize_t parent_idx, ann_idx, lines_idx, match_len, idx
196
    cdef Py_ssize_t pos
197
    cdef PyObject *ann_temp, *par_temp
198
199
    _check_annotations_are_lists(annotations, parent_annotations)
200
    last_ann = None
201
    last_parent = None
202
    last_res = None
203
    for parent_idx, lines_idx, match_len in matching_blocks:
204
        _check_match_ranges(parent_annotations, annotations,
205
                            parent_idx, lines_idx, match_len)
206
        # For lines which match this parent, we will now resolve whether
207
        # this parent wins over the current annotation
208
        for idx from 0 <= idx < match_len:
209
            ann_idx = lines_idx + idx
210
            ann_temp = PyList_GET_ITEM(annotations, ann_idx)
211
            par_temp = PyList_GET_ITEM(parent_annotations, parent_idx + idx)
212
            if (ann_temp == par_temp):
213
                # This is parent, do nothing
214
                # Pointer comparison is fine here. Value comparison would
215
                # be ok, but it will be handled in the final if clause by
216
                # merging the two tuples into the same tuple
217
                # Avoiding the Py_INCREF and function call to
218
                # PyObject_RichCompareBool using pointer comparison drops
219
                # timing from 215ms => 125ms
220
                continue
221
            par_ann = <object>par_temp
222
            ann = <object>ann_temp
223
            if (ann is this_annotation):
224
                # Originally claimed 'this', but it was really in this
225
                # parent
226
                Py_INCREF(par_ann)
227
                PyList_SetItem(annotations, ann_idx, par_ann)
228
                continue
229
            # Resolve the fact that both sides have a different value for
230
            # last modified
231
            if (ann is last_ann and par_ann is last_parent):
232
                Py_INCREF(last_res)
233
                PyList_SetItem(annotations, ann_idx, last_res)
234
            else:
235
                new_ann = _combine_annotations(ann, par_ann, ann_cache)
236
                Py_INCREF(new_ann)
237
                PyList_SetItem(annotations, ann_idx, new_ann)
238
                last_ann = ann
239
                last_parent = par_ann
240
                last_res = new_ann
4454.3.76 by John Arbash Meinel
Always return a value, although Pyrex takes care of that for you if you let it.
241
    return 0
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
242
243
4454.3.73 by John Arbash Meinel
inherit from _annotator_py.Annotator in _annotator_pyx.Annotator.
244
class Annotator(_annotator_py.Annotator):
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
245
    """Class that drives performing annotations."""
246
4454.3.73 by John Arbash Meinel
inherit from _annotator_py.Annotator in _annotator_pyx.Annotator.
247
    def _update_from_first_parent(self, key, annotations, lines, parent_key):
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
248
        """Reannotate this text relative to its first parent."""
4454.3.75 by John Arbash Meinel
Move the core loops into module-level helpers.
249
        (parent_annotations,
250
         matching_blocks) = self._get_parent_annotations_and_matches(
251
                                key, lines, parent_key)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
252
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
253
        _apply_parent_annotations(annotations, parent_annotations,
254
                                  matching_blocks)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
255
256
    def _update_from_other_parents(self, key, annotations, lines,
257
                                   this_annotation, parent_key):
258
        """Reannotate this text relative to a second (or more) parent."""
4454.3.75 by John Arbash Meinel
Move the core loops into module-level helpers.
259
        (parent_annotations,
260
         matching_blocks) = self._get_parent_annotations_and_matches(
261
                                key, lines, parent_key)
262
        _merge_annotations(this_annotation, annotations, parent_annotations,
263
                           matching_blocks, self._ann_tuple_cache)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
264
265
    def annotate_flat(self, key):
266
        """Determine the single-best-revision to source for each line.
267
268
        This is meant as a compatibility thunk to how annotate() used to work.
269
        """
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
270
        cdef Py_ssize_t pos, num_lines
4454.3.77 by John Arbash Meinel
Add support for compatibility with old '_break_annotation_tie' function.
271
272
        from bzrlib import annotate
273
274
        custom_tiebreaker = annotate._break_annotation_tie
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
275
        annotations, lines = self.annotate(key)
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
276
        num_lines = len(lines)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
277
        out = []
278
        heads = self._get_heads_provider().heads
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
279
        for pos from 0 <= pos < num_lines:
280
            annotation = annotations[pos]
281
            line = lines[pos]
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
282
            if len(annotation) == 1:
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
283
                head = annotation[0]
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
284
            else:
285
                the_heads = heads(annotation)
286
                if len(the_heads) == 1:
4454.3.73 by John Arbash Meinel
inherit from _annotator_py.Annotator in _annotator_pyx.Annotator.
287
                    for head in the_heads: break # get the item out of the set
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
288
                else:
289
                    # We need to resolve the ambiguity, for now just pick the
290
                    # sorted smallest
4454.3.77 by John Arbash Meinel
Add support for compatibility with old '_break_annotation_tie' function.
291
                    head = self._resolve_annotation_tie(the_heads, line,
292
                                                        custom_tiebreaker)
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
293
            PyList_Append(out, (head, line))
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
294
        return out