~bzr-pqm/bzr/bzr.dev

4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
1
# Copyright (C) 2009 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
17
"""Implementation of Graph algorithms when we have already loaded everything.
18
"""
19
20
cdef extern from "python-compat.h":
21
    pass
22
23
cdef extern from "Python.h":
24
    ctypedef int Py_ssize_t
25
    ctypedef struct PyObject:
26
        pass
27
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
28
    int PyString_CheckExact(object)
4641.5.12 by John Arbash Meinel
small cleanup.
29
30
    int PyObject_RichCompareBool(object, object, int)
4641.5.11 by John Arbash Meinel
Using PyObject_RichCompareBool is a significant benefit. 8.8ms down to 7.7ms.
31
    int Py_LT
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
32
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
33
    int PyTuple_CheckExact(object)
4371.3.31 by John Arbash Meinel
Change _KnownGraph.parents to be a tuple rather than a list.
34
    object PyTuple_New(Py_ssize_t n)
4371.4.23 by John Arbash Meinel
Clean up the imports at the top.
35
    Py_ssize_t PyTuple_GET_SIZE(object t)
36
    PyObject * PyTuple_GET_ITEM(object t, Py_ssize_t o)
4371.3.31 by John Arbash Meinel
Change _KnownGraph.parents to be a tuple rather than a list.
37
    void PyTuple_SET_ITEM(object t, Py_ssize_t o, object v)
4371.4.23 by John Arbash Meinel
Clean up the imports at the top.
38
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
39
    int PyList_CheckExact(object)
4371.4.23 by John Arbash Meinel
Clean up the imports at the top.
40
    Py_ssize_t PyList_GET_SIZE(object l)
41
    PyObject * PyList_GET_ITEM(object l, Py_ssize_t o)
42
    int PyList_SetItem(object l, Py_ssize_t o, object l) except -1
43
    int PyList_Append(object l, object v) except -1
44
45
    int PyDict_CheckExact(object d)
46
    Py_ssize_t PyDict_Size(object d) except -1
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
47
    PyObject * PyDict_GetItem(object d, object k)
4371.4.23 by John Arbash Meinel
Clean up the imports at the top.
48
    int PyDict_SetItem(object d, object k, object v) except -1
4371.4.18 by John Arbash Meinel
Big performance win, back to 650ms.
49
    int PyDict_DelItem(object d, object k) except -1
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
50
    int PyDict_Next(object d, Py_ssize_t *pos, PyObject **k, PyObject **v)
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
51
4371.3.20 by John Arbash Meinel
A few tweaks to the pyrex version.
52
    void Py_INCREF(object)
53
4593.5.15 by John Arbash Meinel
Use the same implementation of stack handling to avoid append/pop.
54
import gc
4371.3.21 by John Arbash Meinel
Attempt to make things better by splitting out left_parent and right_parent into
55
4593.5.3 by John Arbash Meinel
Bring in the optimized tsort code.
56
from bzrlib import errors, revision
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
57
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
58
cdef object NULL_REVISION
59
NULL_REVISION = revision.NULL_REVISION
60
61
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
62
cdef class _KnownGraphNode:
63
    """Represents a single object in the known graph."""
64
4371.3.35 by John Arbash Meinel
Changing the heapq algorithm actually ends up with better overall performance.
65
    cdef object key
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
66
    cdef object parents
67
    cdef object children
4371.4.13 by Vincent Ladeuil
Fixed as per John's review.
68
    cdef public long gdfo
4371.4.22 by John Arbash Meinel
Changing from a set of seen to a list to cleanup and a seen attribute.
69
    cdef int seen
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
70
    cdef object extra
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
71
4371.3.31 by John Arbash Meinel
Change _KnownGraph.parents to be a tuple rather than a list.
72
    def __init__(self, key):
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
73
        self.key = key
4371.3.31 by John Arbash Meinel
Change _KnownGraph.parents to be a tuple rather than a list.
74
        self.parents = None
4371.3.21 by John Arbash Meinel
Attempt to make things better by splitting out left_parent and right_parent into
75
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
76
        self.children = []
77
        # Greatest distance from origin
78
        self.gdfo = -1
4371.4.22 by John Arbash Meinel
Changing from a set of seen to a list to cleanup and a seen attribute.
79
        self.seen = 0
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
80
        self.extra = None
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
81
82
    property child_keys:
83
        def __get__(self):
84
            cdef _KnownGraphNode child
85
86
            keys = []
87
            for child in self.children:
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
88
                PyList_Append(keys, child.key)
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
89
            return keys
90
91
    cdef clear_references(self):
92
        self.parents = None
93
        self.children = None
94
95
    def __repr__(self):
4371.3.35 by John Arbash Meinel
Changing the heapq algorithm actually ends up with better overall performance.
96
        cdef _KnownGraphNode node
97
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
98
        parent_keys = []
4371.3.35 by John Arbash Meinel
Changing the heapq algorithm actually ends up with better overall performance.
99
        if self.parents is not None:
100
            for node in self.parents:
101
                parent_keys.append(node.key)
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
102
        child_keys = []
4371.3.35 by John Arbash Meinel
Changing the heapq algorithm actually ends up with better overall performance.
103
        if self.children is not None:
104
            for node in self.children:
105
                child_keys.append(node.key)
4371.4.10 by Vincent Ladeuil
Cleanup.
106
        return '%s(%s  gdfo:%s par:%s child:%s)' % (
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
107
            self.__class__.__name__, self.key, self.gdfo,
4371.4.12 by Vincent Ladeuil
Fix typos.
108
            parent_keys, child_keys)
4371.4.10 by Vincent Ladeuil
Cleanup.
109
4371.3.37 by John Arbash Meinel
cleanup passes.
110
4634.117.8 by John Arbash Meinel
Use a class regex to find other signatures to exclude.
111
cdef _KnownGraphNode _get_list_node(lst, Py_ssize_t pos):
4371.4.14 by John Arbash Meinel
Optimize _find_gdfo and make known_parent_gdfos a member of KnownGraphNode
112
    cdef PyObject *temp_node
113
114
    temp_node = PyList_GET_ITEM(lst, pos)
115
    return <_KnownGraphNode>temp_node
116
117
4634.117.8 by John Arbash Meinel
Use a class regex to find other signatures to exclude.
118
cdef _KnownGraphNode _get_tuple_node(tpl, Py_ssize_t pos):
4371.4.14 by John Arbash Meinel
Optimize _find_gdfo and make known_parent_gdfos a member of KnownGraphNode
119
    cdef PyObject *temp_node
120
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
121
    temp_node = PyTuple_GET_ITEM(tpl, pos)
4371.4.14 by John Arbash Meinel
Optimize _find_gdfo and make known_parent_gdfos a member of KnownGraphNode
122
    return <_KnownGraphNode>temp_node
123
124
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
125
def get_key(node):
126
    cdef _KnownGraphNode real_node
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
127
    real_node = node
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
128
    return real_node.key
129
130
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
131
cdef object _sort_list_nodes(object lst_or_tpl, int reverse):
132
    """Sort a list of _KnownGraphNode objects.
133
134
    If lst_or_tpl is a list, it is allowed to mutate in place. It may also
135
    just return the input list if everything is already sorted.
136
    """
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
137
    cdef _KnownGraphNode node1, node2
4641.5.9 by John Arbash Meinel
Explore just walking the parents directly, without sorting.
138
    cdef int do_swap, is_tuple
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
139
    cdef Py_ssize_t length
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
140
4641.5.9 by John Arbash Meinel
Explore just walking the parents directly, without sorting.
141
    is_tuple = PyTuple_CheckExact(lst_or_tpl)
142
    if not (is_tuple or PyList_CheckExact(lst_or_tpl)):
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
143
        raise TypeError('lst_or_tpl must be a list or tuple.')
144
    length = len(lst_or_tpl)
145
    if length == 0 or length == 1:
146
        return lst_or_tpl
147
    if length == 2:
4641.5.9 by John Arbash Meinel
Explore just walking the parents directly, without sorting.
148
        if is_tuple:
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
149
            node1 = _get_tuple_node(lst_or_tpl, 0)
150
            node2 = _get_tuple_node(lst_or_tpl, 1)
151
        else:
152
            node1 = _get_list_node(lst_or_tpl, 0)
153
            node2 = _get_list_node(lst_or_tpl, 1)
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
154
        if reverse:
4641.5.11 by John Arbash Meinel
Using PyObject_RichCompareBool is a significant benefit. 8.8ms down to 7.7ms.
155
            do_swap = PyObject_RichCompareBool(node1.key, node2.key, Py_LT)
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
156
        else:
4641.5.11 by John Arbash Meinel
Using PyObject_RichCompareBool is a significant benefit. 8.8ms down to 7.7ms.
157
            do_swap = PyObject_RichCompareBool(node2.key, node1.key, Py_LT)
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
158
        if not do_swap:
159
            return lst_or_tpl
4641.5.9 by John Arbash Meinel
Explore just walking the parents directly, without sorting.
160
        if is_tuple:
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
161
            return (node2, node1)
162
        else:
4641.5.9 by John Arbash Meinel
Explore just walking the parents directly, without sorting.
163
            # Swap 'in-place', since lists are mutable
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
164
            Py_INCREF(node1)
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
165
            PyList_SetItem(lst_or_tpl, 1, node1)
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
166
            Py_INCREF(node2)
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
167
            PyList_SetItem(lst_or_tpl, 0, node2)
168
            return lst_or_tpl
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
169
    # For all other sizes, we just use 'sorted()'
4641.5.9 by John Arbash Meinel
Explore just walking the parents directly, without sorting.
170
    if is_tuple:
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
171
        # Note that sorted() is just list(iterable).sort()
172
        lst_or_tpl = list(lst_or_tpl)
173
    lst_or_tpl.sort(key=get_key, reverse=reverse)
174
    return lst_or_tpl
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
175
176
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
177
cdef class _MergeSorter
178
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
179
cdef class KnownGraph:
180
    """This is a class which assumes we already know the full graph."""
181
182
    cdef public object _nodes
4371.3.26 by John Arbash Meinel
A few more cleanups. Start moving away from pyrex 0.9.8isms
183
    cdef object _known_heads
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
184
    cdef public int do_cache
185
186
    def __init__(self, parent_map, do_cache=True):
187
        """Create a new KnownGraph instance.
188
189
        :param parent_map: A dictionary mapping key => parent_keys
190
        """
4371.4.29 by John Arbash Meinel
Pre-allocating self._nodes turned out to be slower.
191
        # tests at pre-allocating the node dict actually slowed things down
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
192
        self._nodes = {}
193
        # Maps {sorted(revision_id, revision_id): heads}
194
        self._known_heads = {}
195
        self.do_cache = int(do_cache)
4593.5.29 by John Arbash Meinel
Remove the gc enable/disable code.
196
        # TODO: consider disabling gc since we are allocating a lot of nodes
197
        #       that won't be collectable anyway. real world testing has not
198
        #       shown a specific impact, yet.
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
199
        self._initialize_nodes(parent_map)
200
        self._find_gdfo()
201
202
    def __dealloc__(self):
203
        cdef _KnownGraphNode child
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
204
        cdef Py_ssize_t pos
205
        cdef PyObject *temp_node
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
206
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
207
        while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
208
            child = <_KnownGraphNode>temp_node
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
209
            child.clear_references()
210
4634.117.8 by John Arbash Meinel
Use a class regex to find other signatures to exclude.
211
    cdef _KnownGraphNode _get_or_create_node(self, key):
4371.3.31 by John Arbash Meinel
Change _KnownGraph.parents to be a tuple rather than a list.
212
        cdef PyObject *temp_node
213
        cdef _KnownGraphNode node
214
215
        temp_node = PyDict_GetItem(self._nodes, key)
216
        if temp_node == NULL:
217
            node = _KnownGraphNode(key)
218
            PyDict_SetItem(self._nodes, key, node)
219
        else:
220
            node = <_KnownGraphNode>temp_node
221
        return node
222
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
223
    def _initialize_nodes(self, parent_map):
224
        """Populate self._nodes.
225
4371.4.9 by Vincent Ladeuil
Simplify gdfo computing by finding tails when at graph build time.
226
        After this has finished:
227
        - self._nodes will have an entry for every entry in parent_map.
228
        - ghosts will have a parent_keys = None,
4371.4.12 by Vincent Ladeuil
Fix typos.
229
        - all nodes found will also have child_keys populated with all known
230
          child keys,
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
231
        """
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
232
        cdef PyObject *temp_key, *temp_parent_keys, *temp_node
4371.3.31 by John Arbash Meinel
Change _KnownGraph.parents to be a tuple rather than a list.
233
        cdef Py_ssize_t pos, pos2, num_parent_keys
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
234
        cdef _KnownGraphNode node
235
        cdef _KnownGraphNode parent_node
236
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
237
        if not PyDict_CheckExact(parent_map):
238
            raise TypeError('parent_map should be a dict of {key:parent_keys}')
239
        # for key, parent_keys in parent_map.iteritems():
240
        pos = 0
241
        while PyDict_Next(parent_map, &pos, &temp_key, &temp_parent_keys):
242
            key = <object>temp_key
243
            parent_keys = <object>temp_parent_keys
4371.4.9 by Vincent Ladeuil
Simplify gdfo computing by finding tails when at graph build time.
244
            num_parent_keys = len(parent_keys)
4371.4.28 by John Arbash Meinel
We spent a lot of time adding and removing keys from tails.
245
            node = self._get_or_create_node(key)
4593.5.26 by John Arbash Meinel
small cleanup, a bit of comment tweaking.
246
            # We know how many parents, so we pre allocate the tuple
4371.3.31 by John Arbash Meinel
Change _KnownGraph.parents to be a tuple rather than a list.
247
            parent_nodes = PyTuple_New(num_parent_keys)
248
            for pos2 from 0 <= pos2 < num_parent_keys:
4593.5.26 by John Arbash Meinel
small cleanup, a bit of comment tweaking.
249
                # Note: it costs us 10ms out of 40ms to lookup all of these
250
                #       parents, it doesn't seem to be an allocation overhead,
251
                #       but rather a lookup overhead. There doesn't seem to be
252
                #       a way around it, and that is one reason why
253
                #       KnownGraphNode maintains a direct pointer to the parent
254
                #       node.
255
                # We use [] because parent_keys may be a tuple or list
4371.4.28 by John Arbash Meinel
We spent a lot of time adding and removing keys from tails.
256
                parent_node = self._get_or_create_node(parent_keys[pos2])
4371.3.31 by John Arbash Meinel
Change _KnownGraph.parents to be a tuple rather than a list.
257
                # PyTuple_SET_ITEM will steal a reference, so INCREF first
258
                Py_INCREF(parent_node)
259
                PyTuple_SET_ITEM(parent_nodes, pos2, parent_node)
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
260
                PyList_Append(parent_node.children, node)
261
            node.parents = parent_nodes
4371.4.30 by John Arbash Meinel
We don't need self._tails anymore, nor does it need to be a set.
262
263
    def _find_tails(self):
264
        cdef PyObject *temp_node
265
        cdef _KnownGraphNode node
266
        cdef Py_ssize_t pos
267
268
        tails = []
4371.4.28 by John Arbash Meinel
We spent a lot of time adding and removing keys from tails.
269
        pos = 0
270
        while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
271
            node = <_KnownGraphNode>temp_node
272
            if node.parents is None or PyTuple_GET_SIZE(node.parents) == 0:
4371.4.30 by John Arbash Meinel
We don't need self._tails anymore, nor does it need to be a set.
273
                node.gdfo = 1
274
                PyList_Append(tails, node)
275
        return tails
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
276
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
277
    def _find_tips(self):
278
        cdef PyObject *temp_node
279
        cdef _KnownGraphNode node
280
        cdef Py_ssize_t pos
281
282
        tips = []
283
        pos = 0
284
        while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
285
            node = <_KnownGraphNode>temp_node
286
            if PyList_GET_SIZE(node.children) == 0:
287
                PyList_Append(tips, node)
288
        return tips
289
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
290
    def _find_gdfo(self):
4371.4.8 by Vincent Ladeuil
Replace the existing KnownGraph._find_gdfo.
291
        cdef _KnownGraphNode node
292
        cdef _KnownGraphNode child
4371.4.14 by John Arbash Meinel
Optimize _find_gdfo and make known_parent_gdfos a member of KnownGraphNode
293
        cdef PyObject *temp
4371.4.30 by John Arbash Meinel
We don't need self._tails anymore, nor does it need to be a set.
294
        cdef Py_ssize_t pos
4371.4.14 by John Arbash Meinel
Optimize _find_gdfo and make known_parent_gdfos a member of KnownGraphNode
295
        cdef int replace
4371.4.26 by John Arbash Meinel
Don't shrink the pending list.
296
        cdef Py_ssize_t last_item
4371.4.25 by John Arbash Meinel
We had an 'child_node.gdfo is None' check in the inner loop.
297
        cdef long next_gdfo
4371.4.8 by Vincent Ladeuil
Replace the existing KnownGraph._find_gdfo.
298
4371.4.30 by John Arbash Meinel
We don't need self._tails anymore, nor does it need to be a set.
299
        pending = self._find_tails()
4371.4.8 by Vincent Ladeuil
Replace the existing KnownGraph._find_gdfo.
300
4371.4.26 by John Arbash Meinel
Don't shrink the pending list.
301
        last_item = PyList_GET_SIZE(pending) - 1
302
        while last_item >= 0:
4371.4.14 by John Arbash Meinel
Optimize _find_gdfo and make known_parent_gdfos a member of KnownGraphNode
303
            # Avoid pop followed by push, instead, peek, and replace
4371.4.16 by John Arbash Meinel
Document some perf issues with using known_parent_gdfos, but leave it as a dict.
304
            # timing shows this is 930ms => 770ms for OOo
4371.4.26 by John Arbash Meinel
Don't shrink the pending list.
305
            node = _get_list_node(pending, last_item)
306
            last_item = last_item - 1
4371.4.25 by John Arbash Meinel
We had an 'child_node.gdfo is None' check in the inner loop.
307
            next_gdfo = node.gdfo + 1
4371.4.30 by John Arbash Meinel
We don't need self._tails anymore, nor does it need to be a set.
308
            for pos from 0 <= pos < PyList_GET_SIZE(node.children):
309
                child = _get_list_node(node.children, pos)
4371.4.25 by John Arbash Meinel
We had an 'child_node.gdfo is None' check in the inner loop.
310
                if next_gdfo > child.gdfo:
311
                    child.gdfo = next_gdfo
4371.4.27 by John Arbash Meinel
Re-use the child.seen attribute for tracking num parents.
312
                child.seen = child.seen + 1
313
                if child.seen == PyTuple_GET_SIZE(child.parents):
4371.4.17 by John Arbash Meinel
tried a different tack, to use a new list and append.
314
                    # This child is populated, queue it to be walked
4371.4.26 by John Arbash Meinel
Don't shrink the pending list.
315
                    last_item = last_item + 1
316
                    if last_item < PyList_GET_SIZE(pending):
4371.4.14 by John Arbash Meinel
Optimize _find_gdfo and make known_parent_gdfos a member of KnownGraphNode
317
                        Py_INCREF(child) # SetItem steals a ref
4371.4.26 by John Arbash Meinel
Don't shrink the pending list.
318
                        PyList_SetItem(pending, last_item, child)
4371.4.14 by John Arbash Meinel
Optimize _find_gdfo and make known_parent_gdfos a member of KnownGraphNode
319
                    else:
320
                        PyList_Append(pending, child)
4371.4.27 by John Arbash Meinel
Re-use the child.seen attribute for tracking num parents.
321
                    # We have queued this node, we don't need to track it
322
                    # anymore
323
                    child.seen = 0
4371.4.8 by Vincent Ladeuil
Replace the existing KnownGraph._find_gdfo.
324
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
325
    def heads(self, keys):
326
        """Return the heads from amongst keys.
327
328
        This is done by searching the ancestries of each key.  Any key that is
329
        reachable from another key is not returned; all the others are.
330
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
331
        This operation scales with the relative depth between any two keys. It
332
        uses gdfo to avoid walking all ancestry.
333
334
        :param keys: An iterable of keys.
335
        :return: A set of the heads. Note that as a set there is no ordering
336
            information. Callers will need to filter their input to create
337
            order if they need it.
338
        """
339
        cdef PyObject *maybe_node
340
        cdef PyObject *maybe_heads
341
        cdef PyObject *temp_node
342
        cdef _KnownGraphNode node
4371.4.31 by John Arbash Meinel
Apply the same 'never pop' logic to the heads() code.
343
        cdef Py_ssize_t pos, last_item
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
344
        cdef long min_gdfo
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
345
4526.10.1 by John Arbash Meinel
Avoid using PyFrozenSet_New
346
        heads_key = frozenset(keys)
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
347
        maybe_heads = PyDict_GetItem(self._known_heads, heads_key)
348
        if maybe_heads != NULL:
349
            return <object>maybe_heads
350
        # Not cached, compute it ourselves
351
        candidate_nodes = {}
352
        for key in keys:
4371.4.28 by John Arbash Meinel
We spent a lot of time adding and removing keys from tails.
353
            maybe_node = PyDict_GetItem(self._nodes, key)
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
354
            if maybe_node == NULL:
355
                raise KeyError('key %s not in nodes' % (key,))
356
            PyDict_SetItem(candidate_nodes, key, <object>maybe_node)
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
357
        maybe_node = PyDict_GetItem(candidate_nodes, NULL_REVISION)
358
        if maybe_node != NULL:
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
359
            # NULL_REVISION is only a head if it is the only entry
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
360
            candidate_nodes.pop(NULL_REVISION)
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
361
            if not candidate_nodes:
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
362
                return frozenset([NULL_REVISION])
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
363
            # The keys changed, so recalculate heads_key
4526.10.1 by John Arbash Meinel
Avoid using PyFrozenSet_New
364
            heads_key = frozenset(candidate_nodes)
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
365
        if PyDict_Size(candidate_nodes) < 2:
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
366
            return heads_key
367
4371.4.22 by John Arbash Meinel
Changing from a set of seen to a list to cleanup and a seen attribute.
368
        cleanup = []
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
369
        pending = []
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
370
        # we know a gdfo cannot be longer than a linear chain of all nodes
371
        min_gdfo = PyDict_Size(self._nodes) + 1
372
        # Build up nodes that need to be walked, note that starting nodes are
373
        # not added to seen()
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
374
        pos = 0
375
        while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node):
376
            node = <_KnownGraphNode>temp_node
377
            if node.parents is not None:
378
                pending.extend(node.parents)
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
379
            if node.gdfo < min_gdfo:
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
380
                min_gdfo = node.gdfo
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
381
382
        # Now do all the real work
4371.4.31 by John Arbash Meinel
Apply the same 'never pop' logic to the heads() code.
383
        last_item = PyList_GET_SIZE(pending) - 1
384
        while last_item >= 0:
385
            node = _get_list_node(pending, last_item)
386
            last_item = last_item - 1
4371.4.22 by John Arbash Meinel
Changing from a set of seen to a list to cleanup and a seen attribute.
387
            if node.seen:
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
388
                # node already appears in some ancestry
389
                continue
4371.4.22 by John Arbash Meinel
Changing from a set of seen to a list to cleanup and a seen attribute.
390
            PyList_Append(cleanup, node)
391
            node.seen = 1
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
392
            if node.gdfo <= min_gdfo:
393
                continue
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
394
            if node.parents is not None and PyTuple_GET_SIZE(node.parents) > 0:
395
                for pos from 0 <= pos < PyTuple_GET_SIZE(node.parents):
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
396
                    parent_node = _get_tuple_node(node.parents, pos)
4371.4.31 by John Arbash Meinel
Apply the same 'never pop' logic to the heads() code.
397
                    last_item = last_item + 1
398
                    if last_item < PyList_GET_SIZE(pending):
399
                        Py_INCREF(parent_node) # SetItem steals a ref
400
                        PyList_SetItem(pending, last_item, parent_node)
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
401
                    else:
402
                        PyList_Append(pending, parent_node)
4371.4.22 by John Arbash Meinel
Changing from a set of seen to a list to cleanup and a seen attribute.
403
        heads = []
404
        pos = 0
405
        while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node):
406
            node = <_KnownGraphNode>temp_node
407
            if not node.seen:
408
                PyList_Append(heads, node.key)
4526.10.1 by John Arbash Meinel
Avoid using PyFrozenSet_New
409
        heads = frozenset(heads)
4371.4.22 by John Arbash Meinel
Changing from a set of seen to a list to cleanup and a seen attribute.
410
        for pos from 0 <= pos < PyList_GET_SIZE(cleanup):
411
            node = _get_list_node(cleanup, pos)
412
            node.seen = 0
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
413
        if self.do_cache:
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
414
            PyDict_SetItem(self._known_heads, heads_key, heads)
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
415
        return heads
4577.3.2 by John Arbash Meinel
Implement KnownGraph.topo_sort.
416
417
    def topo_sort(self):
418
        """Return the nodes in topological order.
419
420
        All parents must occur before all children.
421
        """
422
        # This is, for the most part, the same iteration order that we used for
423
        # _find_gdfo, consider finding a way to remove the duplication
424
        # In general, we find the 'tails' (nodes with no parents), and then
425
        # walk to the children. For children that have all of their parents
426
        # yielded, we queue up the child to be yielded as well.
427
        cdef _KnownGraphNode node
428
        cdef _KnownGraphNode child
429
        cdef PyObject *temp
430
        cdef Py_ssize_t pos
431
        cdef int replace
432
        cdef Py_ssize_t last_item
433
434
        pending = self._find_tails()
4593.5.3 by John Arbash Meinel
Bring in the optimized tsort code.
435
        if PyList_GET_SIZE(pending) == 0 and len(self._nodes) > 0:
436
            raise errors.GraphCycleError(self._nodes)
4577.3.2 by John Arbash Meinel
Implement KnownGraph.topo_sort.
437
438
        topo_order = []
439
440
        last_item = PyList_GET_SIZE(pending) - 1
441
        while last_item >= 0:
442
            # Avoid pop followed by push, instead, peek, and replace
443
            # timing shows this is 930ms => 770ms for OOo
444
            node = _get_list_node(pending, last_item)
445
            last_item = last_item - 1
4593.5.3 by John Arbash Meinel
Bring in the optimized tsort code.
446
            if node.parents is not None:
447
                # We don't include ghost parents
448
                PyList_Append(topo_order, node.key)
4577.3.2 by John Arbash Meinel
Implement KnownGraph.topo_sort.
449
            for pos from 0 <= pos < PyList_GET_SIZE(node.children):
450
                child = _get_list_node(node.children, pos)
4593.5.3 by John Arbash Meinel
Bring in the optimized tsort code.
451
                if child.gdfo == -1:
452
                    # We know we have a graph cycle because a node has a parent
453
                    # which we couldn't find
454
                    raise errors.GraphCycleError(self._nodes)
4577.3.2 by John Arbash Meinel
Implement KnownGraph.topo_sort.
455
                child.seen = child.seen + 1
456
                if child.seen == PyTuple_GET_SIZE(child.parents):
457
                    # All parents of this child have been yielded, queue this
458
                    # one to be yielded as well
459
                    last_item = last_item + 1
460
                    if last_item < PyList_GET_SIZE(pending):
461
                        Py_INCREF(child) # SetItem steals a ref
462
                        PyList_SetItem(pending, last_item, child)
463
                    else:
464
                        PyList_Append(pending, child)
465
                    # We have queued this node, we don't need to track it
466
                    # anymore
467
                    child.seen = 0
468
        # We started from the parents, so we don't need to do anymore work
469
        return topo_order
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
470
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
471
    def gc_sort(self):
472
        """Return a reverse topological ordering which is 'stable'.
473
474
        There are a few constraints:
475
          1) Reverse topological (all children before all parents)
476
          2) Grouped by prefix
477
          3) 'stable' sorting, so that we get the same result, independent of
478
             machine, or extra data.
479
        To do this, we use the same basic algorithm as topo_sort, but when we
480
        aren't sure what node to access next, we sort them lexicographically.
481
        """
482
        cdef PyObject *temp
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
483
        cdef Py_ssize_t pos, last_item
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
484
        cdef _KnownGraphNode node, node2, parent_node
485
486
        tips = self._find_tips()
487
        # Split the tips based on prefix
488
        prefix_tips = {}
489
        for pos from 0 <= pos < PyList_GET_SIZE(tips):
490
            node = _get_list_node(tips, pos)
491
            if PyString_CheckExact(node.key) or len(node.key) == 1:
492
                prefix = ''
493
            else:
494
                prefix = node.key[0]
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
495
            temp = PyDict_GetItem(prefix_tips, prefix)
496
            if temp == NULL:
497
                prefix_tips[prefix] = [node]
498
            else:
499
                tip_nodes = <object>temp
500
                PyList_Append(tip_nodes, node)
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
501
502
        result = []
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
503
        for prefix in sorted(prefix_tips):
504
            temp = PyDict_GetItem(prefix_tips, prefix)
505
            assert temp != NULL
506
            tip_nodes = <object>temp
507
            pending = _sort_list_nodes(tip_nodes, 1)
508
            last_item = PyList_GET_SIZE(pending) - 1
509
            while last_item >= 0:
510
                node = _get_list_node(pending, last_item)
511
                last_item = last_item - 1
4641.5.6 by John Arbash Meinel
Add support for skipping ghost nodes.
512
                if node.parents is None:
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
513
                    # Ghost
4641.5.6 by John Arbash Meinel
Add support for skipping ghost nodes.
514
                    continue
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
515
                PyList_Append(result, node.key)
4641.5.11 by John Arbash Meinel
Using PyObject_RichCompareBool is a significant benefit. 8.8ms down to 7.7ms.
516
                # Sorting the parent keys isn't strictly necessary for stable
517
                # sorting of a given graph. But it does help minimize the
518
                # differences between graphs
519
                # For bzr.dev ancestry:
520
                #   4.73ms  no sort
521
                #   7.73ms  RichCompareBool sort
4641.5.10 by John Arbash Meinel
Go back to sorted parents mode.
522
                parents = _sort_list_nodes(node.parents, 1)
523
                for pos from 0 <= pos < len(parents):
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
524
                    if PyTuple_CheckExact(parents):
525
                        parent_node = _get_tuple_node(parents, pos)
526
                    else:
527
                        parent_node = _get_list_node(parents, pos)
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
528
                    # TODO: GraphCycle detection
529
                    parent_node.seen = parent_node.seen + 1
530
                    if (parent_node.seen
531
                        == PyList_GET_SIZE(parent_node.children)):
532
                        # All children have been processed, queue up this
533
                        # parent
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
534
                        last_item = last_item + 1
535
                        if last_item < PyList_GET_SIZE(pending):
536
                            Py_INCREF(parent_node) # SetItem steals a ref
537
                            PyList_SetItem(pending, last_item, parent_node)
538
                        else:
539
                            PyList_Append(pending, parent_node)
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
540
                        parent_node.seen = 0
541
        return result
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
542
543
    def merge_sort(self, tip_key):
544
        """Compute the merge sorted graph output."""
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
545
        cdef _MergeSorter sorter
546
4593.5.29 by John Arbash Meinel
Remove the gc enable/disable code.
547
        # TODO: consider disabling gc since we are allocating a lot of nodes
548
        #       that won't be collectable anyway. real world testing has not
549
        #       shown a specific impact, yet.
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
550
        sorter = _MergeSorter(self, tip_key)
551
        return sorter.topo_order()
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
552
553
554
cdef class _MergeSortNode:
555
    """Tracks information about a node during the merge_sort operation."""
556
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
557
    # Public api
558
    cdef public object key
559
    cdef public long merge_depth
560
    cdef public object end_of_merge # True/False Is this the end of the current merge
561
562
    # Private api, used while computing the information
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
563
    cdef _KnownGraphNode left_parent
564
    cdef _KnownGraphNode left_pending_parent
565
    cdef object pending_parents # list of _KnownGraphNode for non-left parents
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
566
    cdef long _revno_first
567
    cdef long _revno_second
568
    cdef long _revno_last
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
569
    # TODO: turn these into flag/bit fields rather than individual members
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
570
    cdef int is_first_child # Is this the first child?
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
571
    cdef int seen_by_child # A child node has seen this parent
572
    cdef int completed # Fully Processed
573
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
574
    def __init__(self, key):
575
        self.key = key
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
576
        self.merge_depth = -1
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
577
        self.left_parent = None
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
578
        self.left_pending_parent = None
579
        self.pending_parents = None
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
580
        self._revno_first = -1
581
        self._revno_second = -1
582
        self._revno_last = -1
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
583
        self.is_first_child = 0
584
        self.seen_by_child = 0
585
        self.completed = 0
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
586
587
    def __repr__(self):
4634.11.1 by John Arbash Meinel
Fix bug #419241. If a graph had a mainline ghost
588
        return '%s(%s depth:%s rev:%s,%s,%s first:%s seen:%s)' % (
589
            self.__class__.__name__, self.key,
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
590
            self.merge_depth,
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
591
            self._revno_first, self._revno_second, self._revno_last,
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
592
            self.is_first_child, self.seen_by_child)
593
4634.117.10 by John Arbash Meinel
Change 'no except' to 'cannot_raise'
594
    cdef int has_pending_parents(self): # cannot_raise
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
595
        if self.left_pending_parent is not None or self.pending_parents:
596
            return 1
597
        return 0
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
598
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
599
    cdef object _revno(self):
600
        if self._revno_first == -1:
601
            if self._revno_second != -1:
602
                raise RuntimeError('Something wrong with: %s' % (self,))
603
            return (self._revno_last,)
604
        else:
605
            return (self._revno_first, self._revno_second, self._revno_last)
606
607
    property revno:
608
        def __get__(self):
609
            return self._revno()
610
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
611
612
cdef class _MergeSorter:
613
    """This class does the work of computing the merge_sort ordering.
614
615
    We have some small advantages, in that we get all the extra information
616
    that KnownGraph knows, like knowing the child lists, etc.
617
    """
618
4593.5.11 by John Arbash Meinel
Without doing any real tuning yet, we see a decent speedup for merge_sort:
619
    # Current performance numbers for merge_sort(bzr_dev_parent_map):
4593.5.25 by John Arbash Meinel
Add tests that we detect GraphCycleError
620
    #  302ms tsort.merge_sort()
621
    #   91ms graph.KnownGraph().merge_sort()
4593.5.28 by John Arbash Meinel
Cleanup pass
622
    #   40ms kg.merge_sort()
4593.5.11 by John Arbash Meinel
Without doing any real tuning yet, we see a decent speedup for merge_sort:
623
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
624
    cdef KnownGraph graph
4593.5.15 by John Arbash Meinel
Use the same implementation of stack handling to avoid append/pop.
625
    cdef object _depth_first_stack  # list
626
    cdef Py_ssize_t _last_stack_item # offset to last item on stack
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
627
    # cdef object _ms_nodes # dict of key => _MergeSortNode
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
628
    cdef object _revno_to_branch_count # {revno => num child branches}
629
    cdef object _scheduled_nodes # List of nodes ready to be yielded
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
630
631
    def __init__(self, known_graph, tip_key):
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
632
        cdef _KnownGraphNode node
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
633
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
634
        self.graph = known_graph
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
635
        # self._ms_nodes = {}
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
636
        self._revno_to_branch_count = {}
4593.5.15 by John Arbash Meinel
Use the same implementation of stack handling to avoid append/pop.
637
        self._depth_first_stack = []
638
        self._last_stack_item = -1
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
639
        self._scheduled_nodes = []
4593.5.42 by John Arbash Meinel
Turns out that some code assumed passing NULL_REVISION to merge_sort
640
        if (tip_key is not None and tip_key != NULL_REVISION
641
            and tip_key != (NULL_REVISION,)):
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
642
            node = self.graph._nodes[tip_key]
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
643
            self._push_node(node, 0)
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
644
4634.117.8 by John Arbash Meinel
Use a class regex to find other signatures to exclude.
645
    cdef _MergeSortNode _get_ms_node(self, _KnownGraphNode node):
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
646
        cdef PyObject *temp_node
647
        cdef _MergeSortNode ms_node
648
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
649
        if node.extra is None:
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
650
            ms_node = _MergeSortNode(node.key)
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
651
            node.extra = ms_node
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
652
        else:
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
653
            ms_node = <_MergeSortNode>node.extra
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
654
        return ms_node
655
4593.5.24 by John Arbash Meinel
Switch some variables from Py_ssize_t to long
656
    cdef _push_node(self, _KnownGraphNode node, long merge_depth):
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
657
        cdef _KnownGraphNode parent_node
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
658
        cdef _MergeSortNode ms_node, ms_parent_node
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
659
        cdef Py_ssize_t pos
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
660
4593.5.27 by John Arbash Meinel
bit of cleanup passes
661
        ms_node = self._get_ms_node(node)
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
662
        ms_node.merge_depth = merge_depth
4634.11.1 by John Arbash Meinel
Fix bug #419241. If a graph had a mainline ghost
663
        if node.parents is None:
664
            raise RuntimeError('ghost nodes should not be pushed'
665
                               ' onto the stack: %s' % (node,))
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
666
        if PyTuple_GET_SIZE(node.parents) > 0:
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
667
            parent_node = _get_tuple_node(node.parents, 0)
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
668
            ms_node.left_parent = parent_node
4634.11.1 by John Arbash Meinel
Fix bug #419241. If a graph had a mainline ghost
669
            if parent_node.parents is None: # left-hand ghost
670
                ms_node.left_pending_parent = None
671
                ms_node.left_parent = None
672
            else:
673
                ms_node.left_pending_parent = parent_node
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
674
        if PyTuple_GET_SIZE(node.parents) > 1:
4593.5.20 by John Arbash Meinel
Expose KnownGraph off of VersionedFiles
675
            ms_node.pending_parents = []
676
            for pos from 1 <= pos < PyTuple_GET_SIZE(node.parents):
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
677
                parent_node = _get_tuple_node(node.parents, pos)
4593.5.20 by John Arbash Meinel
Expose KnownGraph off of VersionedFiles
678
                if parent_node.parents is None: # ghost
679
                    continue
680
                PyList_Append(ms_node.pending_parents, parent_node)
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
681
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
682
        ms_node.is_first_child = 1
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
683
        if ms_node.left_parent is not None:
4593.5.27 by John Arbash Meinel
bit of cleanup passes
684
            ms_parent_node = self._get_ms_node(ms_node.left_parent)
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
685
            if ms_parent_node.seen_by_child:
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
686
                ms_node.is_first_child = 0
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
687
            ms_parent_node.seen_by_child = 1
4593.5.15 by John Arbash Meinel
Use the same implementation of stack handling to avoid append/pop.
688
        self._last_stack_item = self._last_stack_item + 1
689
        if self._last_stack_item < PyList_GET_SIZE(self._depth_first_stack):
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
690
            Py_INCREF(node) # SetItem steals a ref
4593.5.15 by John Arbash Meinel
Use the same implementation of stack handling to avoid append/pop.
691
            PyList_SetItem(self._depth_first_stack, self._last_stack_item,
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
692
                           node)
4593.5.15 by John Arbash Meinel
Use the same implementation of stack handling to avoid append/pop.
693
        else:
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
694
            PyList_Append(self._depth_first_stack, node)
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
695
696
    cdef _pop_node(self):
4593.5.16 by John Arbash Meinel
Change the revno handling code.
697
        cdef PyObject *temp
4593.5.23 by John Arbash Meinel
Move the end-of-merge computation into _pop_node
698
        cdef _MergeSortNode ms_node, ms_parent_node, ms_prev_node
699
        cdef _KnownGraphNode node, parent_node, prev_node
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
700
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
701
        node = _get_list_node(self._depth_first_stack, self._last_stack_item)
702
        ms_node = <_MergeSortNode>node.extra
4593.5.15 by John Arbash Meinel
Use the same implementation of stack handling to avoid append/pop.
703
        self._last_stack_item = self._last_stack_item - 1
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
704
        if ms_node.left_parent is not None:
4593.5.27 by John Arbash Meinel
bit of cleanup passes
705
            # Assign the revision number from the left-hand parent
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
706
            ms_parent_node = <_MergeSortNode>ms_node.left_parent.extra
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
707
            if ms_node.is_first_child:
708
                # First child just increments the final digit
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
709
                ms_node._revno_first = ms_parent_node._revno_first
710
                ms_node._revno_second = ms_parent_node._revno_second
711
                ms_node._revno_last = ms_parent_node._revno_last + 1
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
712
            else:
4593.5.10 by John Arbash Meinel
finish getting the revno numbers working correctly.
713
                # Not the first child, make a new branch
4593.5.27 by John Arbash Meinel
bit of cleanup passes
714
                #  (mainline_revno, branch_count, 1)
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
715
                if ms_parent_node._revno_first == -1:
4593.5.16 by John Arbash Meinel
Change the revno handling code.
716
                    # Mainline ancestor, the increment is on the last digit
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
717
                    base_revno = ms_parent_node._revno_last
4593.5.16 by John Arbash Meinel
Change the revno handling code.
718
                else:
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
719
                    base_revno = ms_parent_node._revno_first
4593.5.16 by John Arbash Meinel
Change the revno handling code.
720
                temp = PyDict_GetItem(self._revno_to_branch_count,
4593.5.17 by John Arbash Meinel
No need to pop one item off the stack after another.
721
                                      base_revno)
4593.5.16 by John Arbash Meinel
Change the revno handling code.
722
                if temp == NULL:
723
                    branch_count = 1
724
                else:
725
                    branch_count = (<object>temp) + 1
726
                PyDict_SetItem(self._revno_to_branch_count, base_revno,
727
                               branch_count)
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
728
                ms_node._revno_first = base_revno
729
                ms_node._revno_second = branch_count
730
                ms_node._revno_last = 1
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
731
        else:
4593.5.27 by John Arbash Meinel
bit of cleanup passes
732
            temp = PyDict_GetItem(self._revno_to_branch_count, 0)
733
            if temp == NULL:
4593.5.16 by John Arbash Meinel
Change the revno handling code.
734
                # The first root node doesn't have a 3-digit revno
4593.5.27 by John Arbash Meinel
bit of cleanup passes
735
                root_count = 0
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
736
                ms_node._revno_first = -1
737
                ms_node._revno_second = -1
738
                ms_node._revno_last = 1
4593.5.27 by John Arbash Meinel
bit of cleanup passes
739
            else:
740
                root_count = (<object>temp) + 1
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
741
                ms_node._revno_first = 0
742
                ms_node._revno_second = root_count
743
                ms_node._revno_last = 1
4593.5.27 by John Arbash Meinel
bit of cleanup passes
744
            PyDict_SetItem(self._revno_to_branch_count, 0, root_count)
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
745
        ms_node.completed = 1
4593.5.23 by John Arbash Meinel
Move the end-of-merge computation into _pop_node
746
        if PyList_GET_SIZE(self._scheduled_nodes) == 0:
4593.5.27 by John Arbash Meinel
bit of cleanup passes
747
            # The first scheduled node is always the end of merge
4593.5.23 by John Arbash Meinel
Move the end-of-merge computation into _pop_node
748
            ms_node.end_of_merge = True
749
        else:
750
            prev_node = _get_list_node(self._scheduled_nodes,
751
                                    PyList_GET_SIZE(self._scheduled_nodes) - 1)
752
            ms_prev_node = <_MergeSortNode>prev_node.extra
753
            if ms_prev_node.merge_depth < ms_node.merge_depth:
754
                # The previously pushed node is to our left, so this is the end
755
                # of this right-hand chain
756
                ms_node.end_of_merge = True
757
            elif (ms_prev_node.merge_depth == ms_node.merge_depth
758
                  and prev_node not in node.parents):
759
                # The next node is not a direct parent of this node
760
                ms_node.end_of_merge = True
761
            else:
762
                ms_node.end_of_merge = False
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
763
        PyList_Append(self._scheduled_nodes, node)
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
764
765
    cdef _schedule_stack(self):
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
766
        cdef _KnownGraphNode last_node, next_node
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
767
        cdef _MergeSortNode ms_node, ms_last_node, ms_next_node
4593.5.24 by John Arbash Meinel
Switch some variables from Py_ssize_t to long
768
        cdef long next_merge_depth
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
769
        ordered = []
4593.5.15 by John Arbash Meinel
Use the same implementation of stack handling to avoid append/pop.
770
        while self._last_stack_item >= 0:
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
771
            # Peek at the last item on the stack
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
772
            last_node = _get_list_node(self._depth_first_stack,
773
                                       self._last_stack_item)
4593.5.25 by John Arbash Meinel
Add tests that we detect GraphCycleError
774
            if last_node.gdfo == -1:
4593.5.28 by John Arbash Meinel
Cleanup pass
775
                # if _find_gdfo skipped a node, that means there is a graph
776
                # cycle, error out now
4593.5.25 by John Arbash Meinel
Add tests that we detect GraphCycleError
777
                raise errors.GraphCycleError(self.graph._nodes)
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
778
            ms_last_node = <_MergeSortNode>last_node.extra
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
779
            if not ms_last_node.has_pending_parents():
780
                # Processed all parents, pop this node
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
781
                self._pop_node()
782
                continue
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
783
            while ms_last_node.has_pending_parents():
784
                if ms_last_node.left_pending_parent is not None:
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
785
                    # recurse depth first into the primary parent
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
786
                    next_node = ms_last_node.left_pending_parent
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
787
                    ms_last_node.left_pending_parent = None
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
788
                else:
789
                    # place any merges in right-to-left order for scheduling
790
                    # which gives us left-to-right order after we reverse
4593.5.28 by John Arbash Meinel
Cleanup pass
791
                    # the scheduled queue.
792
                    # Note: This has the effect of allocating common-new
793
                    #       revisions to the right-most subtree rather than the
794
                    #       left most, which will display nicely (you get
795
                    #       smaller trees at the top of the combined merge).
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
796
                    next_node = ms_last_node.pending_parents.pop()
4593.5.27 by John Arbash Meinel
bit of cleanup passes
797
                ms_next_node = self._get_ms_node(next_node)
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
798
                if ms_next_node.completed:
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
799
                    # this parent was completed by a child on the
800
                    # call stack. skip it.
801
                    continue
802
                # otherwise transfer it from the source graph into the
803
                # top of the current depth first search stack.
804
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
805
                if next_node is ms_last_node.left_parent:
4593.5.24 by John Arbash Meinel
Switch some variables from Py_ssize_t to long
806
                    next_merge_depth = ms_last_node.merge_depth
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
807
                else:
4593.5.24 by John Arbash Meinel
Switch some variables from Py_ssize_t to long
808
                    next_merge_depth = ms_last_node.merge_depth + 1
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
809
                self._push_node(next_node, next_merge_depth)
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
810
                # and do not continue processing parents until this 'call'
811
                # has recursed.
812
                break
813
814
    cdef topo_order(self):
4593.5.23 by John Arbash Meinel
Move the end-of-merge computation into _pop_node
815
        cdef _MergeSortNode ms_node
816
        cdef _KnownGraphNode node
4593.5.17 by John Arbash Meinel
No need to pop one item off the stack after another.
817
        cdef Py_ssize_t pos
4593.5.28 by John Arbash Meinel
Cleanup pass
818
        cdef PyObject *temp_key, *temp_node
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
819
4593.5.28 by John Arbash Meinel
Cleanup pass
820
        # Note: allocating a _MergeSortNode and deallocating it for all nodes
821
        #       costs approx 8.52ms (21%) of the total runtime
822
        #       We might consider moving the attributes into the base
823
        #       KnownGraph object.
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
824
        self._schedule_stack()
825
826
        # We've set up the basic schedule, now we can continue processing the
827
        # output.
4593.5.29 by John Arbash Meinel
Remove the gc enable/disable code.
828
        # Note: This final loop costs us 40.0ms => 28.8ms (11ms, 25%) on
4593.5.24 by John Arbash Meinel
Switch some variables from Py_ssize_t to long
829
        #       bzr.dev, to convert the internal Object representation into a
830
        #       Tuple representation...
831
        #       2ms is walking the data and computing revno tuples
832
        #       7ms is computing the return tuple
833
        #       4ms is PyList_Append()
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
834
        ordered = []
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
835
        # output the result in reverse order, and separate the generated info
4593.5.23 by John Arbash Meinel
Move the end-of-merge computation into _pop_node
836
        for pos from PyList_GET_SIZE(self._scheduled_nodes) > pos >= 0:
837
            node = _get_list_node(self._scheduled_nodes, pos)
838
            ms_node = <_MergeSortNode>node.extra
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
839
            PyList_Append(ordered, ms_node)
4593.5.23 by John Arbash Meinel
Move the end-of-merge computation into _pop_node
840
            node.extra = None
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
841
        # Clear out the scheduled nodes now that we're done
4593.5.17 by John Arbash Meinel
No need to pop one item off the stack after another.
842
        self._scheduled_nodes = []
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
843
        return ordered