~bzr-pqm/bzr/bzr.dev

4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
1
# Copyright (C) 2009 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
17
"""Functionality for doing annotations in the 'optimal' way"""
18
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
19
cdef extern from "python-compat.h":
20
    pass
21
22
cdef extern from "Python.h":
23
    ctypedef int Py_ssize_t
24
    ctypedef struct PyObject:
25
        pass
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
26
    ctypedef struct PyListObject:
27
        PyObject **ob_item
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
28
    int PyList_CheckExact(object)
29
    PyObject *PyList_GET_ITEM(object, Py_ssize_t o)
30
    Py_ssize_t PyList_GET_SIZE(object)
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
31
    int PyList_Append(object, object) except -1
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
32
    int PyList_SetItem(object, Py_ssize_t o, object) except -1
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
33
    int PyList_Sort(object) except -1
34
35
    int PyTuple_CheckExact(object)
36
    object PyTuple_New(Py_ssize_t len)
37
    void PyTuple_SET_ITEM(object, Py_ssize_t pos, object)
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
38
    void PyTuple_SET_ITEM_ptr "PyTuple_SET_ITEM" (object, Py_ssize_t,
39
                                                  PyObject *)
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
40
    int PyTuple_Resize(PyObject **, Py_ssize_t newlen)
41
    PyObject *PyTuple_GET_ITEM(object, Py_ssize_t o)
42
    Py_ssize_t PyTuple_GET_SIZE(object)
43
44
    PyObject *PyDict_GetItem(object d, object k)
45
    int PyDict_SetItem(object d, object k, object v) except -1
46
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
47
    void Py_INCREF(object)
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
48
    void Py_INCREF_ptr "Py_INCREF" (PyObject *)
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
49
    void Py_DECREF_ptr "Py_DECREF" (PyObject *)
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
50
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
51
    int Py_EQ
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
52
    int Py_LT
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
53
    int PyObject_RichCompareBool(object, object, int opid) except -1
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
54
    int PyObject_RichCompareBool_ptr "PyObject_RichCompareBool" (
55
        PyObject *, PyObject *, int opid)
4454.3.44 by John Arbash Meinel
Initial results show no improvement for pyrexifing _update_from_one_parent.
56
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
57
4454.3.73 by John Arbash Meinel
inherit from _annotator_py.Annotator in _annotator_pyx.Annotator.
58
from bzrlib import _annotator_py
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
59
60
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
61
cdef int _check_annotations_are_lists(annotations,
62
                                      parent_annotations) except -1:
63
    if not PyList_CheckExact(annotations):
64
        raise TypeError('annotations must be a list')
65
    if not PyList_CheckExact(parent_annotations):
66
        raise TypeError('parent_annotations must be a list')
67
    return 0
68
69
70
cdef int _check_match_ranges(parent_annotations, annotations,
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
71
                             Py_ssize_t parent_idx, Py_ssize_t lines_idx,
72
                             Py_ssize_t match_len) except -1:
4454.3.45 by John Arbash Meinel
A bit of tweaking to the _update_from_other_parents drops us to 7.67s
73
    if parent_idx + match_len > PyList_GET_SIZE(parent_annotations):
74
        raise ValueError('Match length exceeds len of'
75
                         ' parent_annotations %s > %s'
76
                         % (parent_idx + match_len,
77
                            PyList_GET_SIZE(parent_annotations)))
78
    if lines_idx + match_len > PyList_GET_SIZE(annotations):
79
        raise ValueError('Match length exceeds len of'
80
                         ' annotations %s > %s'
81
                         % (lines_idx + match_len,
82
                            PyList_GET_SIZE(annotations)))
83
    return 0
84
85
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
86
cdef PyObject *_next_tuple_entry(object tpl, Py_ssize_t *pos):
87
    pos[0] = pos[0] + 1
88
    if pos[0] >= PyTuple_GET_SIZE(tpl):
89
        return NULL
90
    return PyTuple_GET_ITEM(tpl, pos[0])
91
92
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
93
cdef object _combine_annotations(ann_one, ann_two, cache):
94
    """Combine the annotations from both sides."""
95
    cdef Py_ssize_t pos_one, pos_two, len_one, len_two
96
    cdef Py_ssize_t out_pos
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
97
    cdef PyObject *temp, *left, *right
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
98
4454.3.51 by John Arbash Meinel
Using a global cache gives us key uniqueness.
99
    if (PyObject_RichCompareBool(ann_one, ann_two, Py_LT)):
100
        cache_key = (ann_one, ann_two)
101
    else:
102
        cache_key = (ann_two, ann_one)
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
103
    temp = PyDict_GetItem(cache, cache_key)
104
    if temp != NULL:
105
        return <object>temp
106
107
    if not PyTuple_CheckExact(ann_one) or not PyTuple_CheckExact(ann_two):
108
        raise TypeError('annotations must be tuples')
109
    # We know that annotations are tuples, and that both sides are already
110
    # sorted, so we can just walk and update a new list.
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
111
    pos_one = -1
112
    pos_two = -1
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
113
    out_pos = 0
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
114
    left = _next_tuple_entry(ann_one, &pos_one)
115
    right = _next_tuple_entry(ann_two, &pos_two)
116
    new_ann = PyTuple_New(PyTuple_GET_SIZE(ann_one)
117
                          + PyTuple_GET_SIZE(ann_two))
118
    while left != NULL and right != NULL:
4454.3.74 by John Arbash Meinel
Some small tweaks, add more documentation for 'add_special_text'.
119
        # left == right is done by PyObject_RichCompareBool_ptr, however it
120
        # avoids a function call for a very common case. Drops 'time bzr
121
        # annotate NEWS' from 7.25s to 7.16s, so it *is* a visible impact.
122
        if (left == right
123
            or PyObject_RichCompareBool_ptr(left, right, Py_EQ)):
4454.3.50 by John Arbash Meinel
Some internal cleanup, a few more counters.
124
            # Identical values, step both
125
            Py_INCREF_ptr(left)
126
            PyTuple_SET_ITEM_ptr(new_ann, out_pos, left)
127
            left = _next_tuple_entry(ann_one, &pos_one)
128
            right = _next_tuple_entry(ann_two, &pos_two)
129
        elif (PyObject_RichCompareBool_ptr(left, right, Py_LT)):
130
            # left < right or right == NULL
131
            Py_INCREF_ptr(left)
132
            PyTuple_SET_ITEM_ptr(new_ann, out_pos, left)
133
            left = _next_tuple_entry(ann_one, &pos_one)
134
        else: # right < left or left == NULL
135
            Py_INCREF_ptr(right)
136
            PyTuple_SET_ITEM_ptr(new_ann, out_pos, right)
137
            right = _next_tuple_entry(ann_two, &pos_two)
138
        out_pos = out_pos + 1
139
    while left != NULL:
140
        Py_INCREF_ptr(left)
141
        PyTuple_SET_ITEM_ptr(new_ann, out_pos, left)
142
        left = _next_tuple_entry(ann_one, &pos_one)
143
        out_pos = out_pos + 1
144
    while right != NULL:
145
        Py_INCREF_ptr(right)
146
        PyTuple_SET_ITEM_ptr(new_ann, out_pos, right)
147
        right = _next_tuple_entry(ann_two, &pos_two)
148
        out_pos = out_pos + 1
149
    if out_pos != PyTuple_GET_SIZE(new_ann):
150
        # Timing _PyTuple_Resize was not significantly faster that slicing
4454.3.46 by John Arbash Meinel
Combine left and right knowing that we already have sorted tuples.
151
        # PyTuple_Resize((<PyObject **>new_ann), out_pos)
152
        new_ann = new_ann[0:out_pos]
153
    PyDict_SetItem(cache, cache_key, new_ann)
154
    return new_ann
155
156
4454.3.75 by John Arbash Meinel
Move the core loops into module-level helpers.
157
cdef int _apply_parent_annotations(annotations, parent_annotations,
158
                                   matching_blocks) except -1:
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
159
    """Apply the annotations from parent_annotations into annotations.
160
161
    matching_blocks defines the ranges that match.
162
    """
163
    cdef Py_ssize_t parent_idx, lines_idx, match_len, idx
164
    cdef PyListObject *par_list, *ann_list
165
    cdef PyObject **par_temp, **ann_temp
166
167
    _check_annotations_are_lists(annotations, parent_annotations)
168
    par_list = <PyListObject *>parent_annotations
169
    ann_list = <PyListObject *>annotations
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
170
    # For NEWS and bzrlib/builtins.py, over 99% of the lines are simply copied
171
    # across from the parent entry. So this routine is heavily optimized for
172
    # that. Would be interesting if we could use memcpy() but we have to incref
173
    # and decref
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
174
    for parent_idx, lines_idx, match_len in matching_blocks:
175
        _check_match_ranges(parent_annotations, annotations,
176
                            parent_idx, lines_idx, match_len)
177
        par_temp = par_list.ob_item + parent_idx
178
        ann_temp = ann_list.ob_item + lines_idx
179
        for idx from 0 <= idx < match_len:
180
            Py_INCREF_ptr(par_temp[idx])
181
            Py_DECREF_ptr(ann_temp[idx])
182
            ann_temp[idx] = par_temp[idx]
4454.3.75 by John Arbash Meinel
Move the core loops into module-level helpers.
183
    return 0
184
185
186
cdef int _merge_annotations(this_annotation, annotations, parent_annotations,
187
                            matching_blocks, ann_cache) except -1:
188
    cdef Py_ssize_t parent_idx, ann_idx, lines_idx, match_len, idx
189
    cdef Py_ssize_t pos
190
    cdef PyObject *ann_temp, *par_temp
191
192
    _check_annotations_are_lists(annotations, parent_annotations)
193
    last_ann = None
194
    last_parent = None
195
    last_res = None
196
    for parent_idx, lines_idx, match_len in matching_blocks:
197
        _check_match_ranges(parent_annotations, annotations,
198
                            parent_idx, lines_idx, match_len)
199
        # For lines which match this parent, we will now resolve whether
200
        # this parent wins over the current annotation
201
        for idx from 0 <= idx < match_len:
202
            ann_idx = lines_idx + idx
203
            ann_temp = PyList_GET_ITEM(annotations, ann_idx)
204
            par_temp = PyList_GET_ITEM(parent_annotations, parent_idx + idx)
205
            if (ann_temp == par_temp):
206
                # This is parent, do nothing
207
                # Pointer comparison is fine here. Value comparison would
208
                # be ok, but it will be handled in the final if clause by
209
                # merging the two tuples into the same tuple
210
                # Avoiding the Py_INCREF and function call to
211
                # PyObject_RichCompareBool using pointer comparison drops
212
                # timing from 215ms => 125ms
213
                continue
214
            par_ann = <object>par_temp
215
            ann = <object>ann_temp
216
            if (ann is this_annotation):
217
                # Originally claimed 'this', but it was really in this
218
                # parent
219
                Py_INCREF(par_ann)
220
                PyList_SetItem(annotations, ann_idx, par_ann)
221
                continue
222
            # Resolve the fact that both sides have a different value for
223
            # last modified
224
            if (ann is last_ann and par_ann is last_parent):
225
                Py_INCREF(last_res)
226
                PyList_SetItem(annotations, ann_idx, last_res)
227
            else:
228
                new_ann = _combine_annotations(ann, par_ann, ann_cache)
229
                Py_INCREF(new_ann)
230
                PyList_SetItem(annotations, ann_idx, new_ann)
231
                last_ann = ann
232
                last_parent = par_ann
233
                last_res = new_ann
4454.3.76 by John Arbash Meinel
Always return a value, although Pyrex takes care of that for you if you let it.
234
    return 0
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
235
236
4454.3.73 by John Arbash Meinel
inherit from _annotator_py.Annotator in _annotator_pyx.Annotator.
237
class Annotator(_annotator_py.Annotator):
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
238
    """Class that drives performing annotations."""
239
4454.3.73 by John Arbash Meinel
inherit from _annotator_py.Annotator in _annotator_pyx.Annotator.
240
    def _update_from_first_parent(self, key, annotations, lines, parent_key):
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
241
        """Reannotate this text relative to its first parent."""
4454.3.75 by John Arbash Meinel
Move the core loops into module-level helpers.
242
        (parent_annotations,
243
         matching_blocks) = self._get_parent_annotations_and_matches(
244
                                key, lines, parent_key)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
245
4454.3.54 by John Arbash Meinel
Access the list object directly, and copy the pointers.
246
        _apply_parent_annotations(annotations, parent_annotations,
247
                                  matching_blocks)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
248
249
    def _update_from_other_parents(self, key, annotations, lines,
250
                                   this_annotation, parent_key):
251
        """Reannotate this text relative to a second (or more) parent."""
4454.3.75 by John Arbash Meinel
Move the core loops into module-level helpers.
252
        (parent_annotations,
253
         matching_blocks) = self._get_parent_annotations_and_matches(
254
                                key, lines, parent_key)
255
        _merge_annotations(this_annotation, annotations, parent_annotations,
256
                           matching_blocks, self._ann_tuple_cache)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
257
258
    def annotate_flat(self, key):
259
        """Determine the single-best-revision to source for each line.
260
261
        This is meant as a compatibility thunk to how annotate() used to work.
262
        """
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
263
        cdef Py_ssize_t pos, num_lines
4454.3.77 by John Arbash Meinel
Add support for compatibility with old '_break_annotation_tie' function.
264
265
        from bzrlib import annotate
266
267
        custom_tiebreaker = annotate._break_annotation_tie
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
268
        annotations, lines = self.annotate(key)
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
269
        num_lines = len(lines)
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
270
        out = []
271
        heads = self._get_heads_provider().heads
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
272
        for pos from 0 <= pos < num_lines:
273
            annotation = annotations[pos]
274
            line = lines[pos]
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
275
            if len(annotation) == 1:
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
276
                head = annotation[0]
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
277
            else:
278
                the_heads = heads(annotation)
279
                if len(the_heads) == 1:
4454.3.73 by John Arbash Meinel
inherit from _annotator_py.Annotator in _annotator_pyx.Annotator.
280
                    for head in the_heads: break # get the item out of the set
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
281
                else:
282
                    # We need to resolve the ambiguity, for now just pick the
283
                    # sorted smallest
4454.3.77 by John Arbash Meinel
Add support for compatibility with old '_break_annotation_tie' function.
284
                    head = self._resolve_annotation_tie(the_heads, line,
285
                                                        custom_tiebreaker)
4454.3.55 by John Arbash Meinel
Remove some debugging counters.
286
            PyList_Append(out, (head, line))
4454.3.43 by John Arbash Meinel
Initial implementation of a Pyrex annotator.
287
        return out