~bzr-pqm/bzr/bzr.dev

4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
1
# Copyright (C) 2009 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
17
"""Implementation of Graph algorithms when we have already loaded everything.
18
"""
19
20
cdef extern from "python-compat.h":
21
    pass
22
23
cdef extern from "Python.h":
24
    ctypedef int Py_ssize_t
25
    ctypedef struct PyObject:
26
        pass
27
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
28
    int PyString_CheckExact(object)
4641.5.12 by John Arbash Meinel
small cleanup.
29
30
    int PyObject_RichCompareBool(object, object, int)
4641.5.11 by John Arbash Meinel
Using PyObject_RichCompareBool is a significant benefit. 8.8ms down to 7.7ms.
31
    int Py_LT
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
32
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
33
    int PyTuple_CheckExact(object)
4371.3.31 by John Arbash Meinel
Change _KnownGraph.parents to be a tuple rather than a list.
34
    object PyTuple_New(Py_ssize_t n)
4371.4.23 by John Arbash Meinel
Clean up the imports at the top.
35
    Py_ssize_t PyTuple_GET_SIZE(object t)
36
    PyObject * PyTuple_GET_ITEM(object t, Py_ssize_t o)
4371.3.31 by John Arbash Meinel
Change _KnownGraph.parents to be a tuple rather than a list.
37
    void PyTuple_SET_ITEM(object t, Py_ssize_t o, object v)
4371.4.23 by John Arbash Meinel
Clean up the imports at the top.
38
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
39
    int PyList_CheckExact(object)
4371.4.23 by John Arbash Meinel
Clean up the imports at the top.
40
    Py_ssize_t PyList_GET_SIZE(object l)
41
    PyObject * PyList_GET_ITEM(object l, Py_ssize_t o)
42
    int PyList_SetItem(object l, Py_ssize_t o, object l) except -1
43
    int PyList_Append(object l, object v) except -1
44
45
    int PyDict_CheckExact(object d)
46
    Py_ssize_t PyDict_Size(object d) except -1
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
47
    PyObject * PyDict_GetItem(object d, object k)
4371.4.23 by John Arbash Meinel
Clean up the imports at the top.
48
    int PyDict_SetItem(object d, object k, object v) except -1
4371.4.18 by John Arbash Meinel
Big performance win, back to 650ms.
49
    int PyDict_DelItem(object d, object k) except -1
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
50
    int PyDict_Next(object d, Py_ssize_t *pos, PyObject **k, PyObject **v)
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
51
4371.3.20 by John Arbash Meinel
A few tweaks to the pyrex version.
52
    void Py_INCREF(object)
53
4593.5.15 by John Arbash Meinel
Use the same implementation of stack handling to avoid append/pop.
54
import gc
4371.3.21 by John Arbash Meinel
Attempt to make things better by splitting out left_parent and right_parent into
55
4593.5.3 by John Arbash Meinel
Bring in the optimized tsort code.
56
from bzrlib import errors, revision
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
57
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
58
cdef object NULL_REVISION
59
NULL_REVISION = revision.NULL_REVISION
60
61
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
62
cdef class _KnownGraphNode:
63
    """Represents a single object in the known graph."""
64
4371.3.35 by John Arbash Meinel
Changing the heapq algorithm actually ends up with better overall performance.
65
    cdef object key
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
66
    cdef object parents
67
    cdef object children
4371.4.13 by Vincent Ladeuil
Fixed as per John's review.
68
    cdef public long gdfo
4371.4.22 by John Arbash Meinel
Changing from a set of seen to a list to cleanup and a seen attribute.
69
    cdef int seen
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
70
    cdef object extra
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
71
4371.3.31 by John Arbash Meinel
Change _KnownGraph.parents to be a tuple rather than a list.
72
    def __init__(self, key):
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
73
        self.key = key
4371.3.31 by John Arbash Meinel
Change _KnownGraph.parents to be a tuple rather than a list.
74
        self.parents = None
4371.3.21 by John Arbash Meinel
Attempt to make things better by splitting out left_parent and right_parent into
75
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
76
        self.children = []
77
        # Greatest distance from origin
78
        self.gdfo = -1
4371.4.22 by John Arbash Meinel
Changing from a set of seen to a list to cleanup and a seen attribute.
79
        self.seen = 0
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
80
        self.extra = None
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
81
82
    property child_keys:
83
        def __get__(self):
84
            cdef _KnownGraphNode child
85
86
            keys = []
87
            for child in self.children:
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
88
                PyList_Append(keys, child.key)
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
89
            return keys
90
4648.2.1 by Gary van der Merwe
Add get_parent_keys, and get_child_keys to KnownGraph.
91
    property parent_keys:
92
        def __get__(self):
93
            if self.parents is None:
94
                return None
95
            
96
            cdef _KnownGraphNode parent
97
98
            keys = []
99
            for parent in self.parents:
100
                PyList_Append(keys, parent.key)
101
            return keys
102
    
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
103
    cdef clear_references(self):
104
        self.parents = None
105
        self.children = None
106
107
    def __repr__(self):
4371.3.35 by John Arbash Meinel
Changing the heapq algorithm actually ends up with better overall performance.
108
        cdef _KnownGraphNode node
109
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
110
        parent_keys = []
4371.3.35 by John Arbash Meinel
Changing the heapq algorithm actually ends up with better overall performance.
111
        if self.parents is not None:
112
            for node in self.parents:
113
                parent_keys.append(node.key)
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
114
        child_keys = []
4371.3.35 by John Arbash Meinel
Changing the heapq algorithm actually ends up with better overall performance.
115
        if self.children is not None:
116
            for node in self.children:
117
                child_keys.append(node.key)
4371.4.10 by Vincent Ladeuil
Cleanup.
118
        return '%s(%s  gdfo:%s par:%s child:%s)' % (
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
119
            self.__class__.__name__, self.key, self.gdfo,
4371.4.12 by Vincent Ladeuil
Fix typos.
120
            parent_keys, child_keys)
4371.4.10 by Vincent Ladeuil
Cleanup.
121
4371.3.37 by John Arbash Meinel
cleanup passes.
122
4371.4.14 by John Arbash Meinel
Optimize _find_gdfo and make known_parent_gdfos a member of KnownGraphNode
123
cdef _KnownGraphNode _get_list_node(lst, Py_ssize_t pos):
124
    cdef PyObject *temp_node
125
126
    temp_node = PyList_GET_ITEM(lst, pos)
127
    return <_KnownGraphNode>temp_node
128
129
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
130
cdef _KnownGraphNode _get_tuple_node(tpl, Py_ssize_t pos):
4371.4.14 by John Arbash Meinel
Optimize _find_gdfo and make known_parent_gdfos a member of KnownGraphNode
131
    cdef PyObject *temp_node
132
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
133
    temp_node = PyTuple_GET_ITEM(tpl, pos)
4371.4.14 by John Arbash Meinel
Optimize _find_gdfo and make known_parent_gdfos a member of KnownGraphNode
134
    return <_KnownGraphNode>temp_node
135
136
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
137
def get_key(node):
138
    cdef _KnownGraphNode real_node
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
139
    real_node = node
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
140
    return real_node.key
141
142
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
143
cdef object _sort_list_nodes(object lst_or_tpl, int reverse):
144
    """Sort a list of _KnownGraphNode objects.
145
146
    If lst_or_tpl is a list, it is allowed to mutate in place. It may also
147
    just return the input list if everything is already sorted.
148
    """
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
149
    cdef _KnownGraphNode node1, node2
4641.5.9 by John Arbash Meinel
Explore just walking the parents directly, without sorting.
150
    cdef int do_swap, is_tuple
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
151
    cdef Py_ssize_t length
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
152
4641.5.9 by John Arbash Meinel
Explore just walking the parents directly, without sorting.
153
    is_tuple = PyTuple_CheckExact(lst_or_tpl)
154
    if not (is_tuple or PyList_CheckExact(lst_or_tpl)):
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
155
        raise TypeError('lst_or_tpl must be a list or tuple.')
156
    length = len(lst_or_tpl)
157
    if length == 0 or length == 1:
158
        return lst_or_tpl
159
    if length == 2:
4641.5.9 by John Arbash Meinel
Explore just walking the parents directly, without sorting.
160
        if is_tuple:
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
161
            node1 = _get_tuple_node(lst_or_tpl, 0)
162
            node2 = _get_tuple_node(lst_or_tpl, 1)
163
        else:
164
            node1 = _get_list_node(lst_or_tpl, 0)
165
            node2 = _get_list_node(lst_or_tpl, 1)
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
166
        if reverse:
4641.5.11 by John Arbash Meinel
Using PyObject_RichCompareBool is a significant benefit. 8.8ms down to 7.7ms.
167
            do_swap = PyObject_RichCompareBool(node1.key, node2.key, Py_LT)
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
168
        else:
4641.5.11 by John Arbash Meinel
Using PyObject_RichCompareBool is a significant benefit. 8.8ms down to 7.7ms.
169
            do_swap = PyObject_RichCompareBool(node2.key, node1.key, Py_LT)
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
170
        if not do_swap:
171
            return lst_or_tpl
4641.5.9 by John Arbash Meinel
Explore just walking the parents directly, without sorting.
172
        if is_tuple:
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
173
            return (node2, node1)
174
        else:
4641.5.9 by John Arbash Meinel
Explore just walking the parents directly, without sorting.
175
            # Swap 'in-place', since lists are mutable
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
176
            Py_INCREF(node1)
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
177
            PyList_SetItem(lst_or_tpl, 1, node1)
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
178
            Py_INCREF(node2)
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
179
            PyList_SetItem(lst_or_tpl, 0, node2)
180
            return lst_or_tpl
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
181
    # For all other sizes, we just use 'sorted()'
4641.5.9 by John Arbash Meinel
Explore just walking the parents directly, without sorting.
182
    if is_tuple:
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
183
        # Note that sorted() is just list(iterable).sort()
184
        lst_or_tpl = list(lst_or_tpl)
185
    lst_or_tpl.sort(key=get_key, reverse=reverse)
186
    return lst_or_tpl
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
187
188
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
189
cdef class _MergeSorter
190
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
191
cdef class KnownGraph:
192
    """This is a class which assumes we already know the full graph."""
193
194
    cdef public object _nodes
4371.3.26 by John Arbash Meinel
A few more cleanups. Start moving away from pyrex 0.9.8isms
195
    cdef object _known_heads
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
196
    cdef public int do_cache
197
198
    def __init__(self, parent_map, do_cache=True):
199
        """Create a new KnownGraph instance.
200
201
        :param parent_map: A dictionary mapping key => parent_keys
202
        """
4371.4.29 by John Arbash Meinel
Pre-allocating self._nodes turned out to be slower.
203
        # tests at pre-allocating the node dict actually slowed things down
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
204
        self._nodes = {}
205
        # Maps {sorted(revision_id, revision_id): heads}
206
        self._known_heads = {}
207
        self.do_cache = int(do_cache)
4593.5.29 by John Arbash Meinel
Remove the gc enable/disable code.
208
        # TODO: consider disabling gc since we are allocating a lot of nodes
209
        #       that won't be collectable anyway. real world testing has not
210
        #       shown a specific impact, yet.
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
211
        self._initialize_nodes(parent_map)
212
        self._find_gdfo()
213
214
    def __dealloc__(self):
215
        cdef _KnownGraphNode child
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
216
        cdef Py_ssize_t pos
217
        cdef PyObject *temp_node
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
218
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
219
        while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
220
            child = <_KnownGraphNode>temp_node
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
221
            child.clear_references()
222
4371.4.28 by John Arbash Meinel
We spent a lot of time adding and removing keys from tails.
223
    cdef _KnownGraphNode _get_or_create_node(self, key):
4371.3.31 by John Arbash Meinel
Change _KnownGraph.parents to be a tuple rather than a list.
224
        cdef PyObject *temp_node
225
        cdef _KnownGraphNode node
226
227
        temp_node = PyDict_GetItem(self._nodes, key)
228
        if temp_node == NULL:
229
            node = _KnownGraphNode(key)
230
            PyDict_SetItem(self._nodes, key, node)
231
        else:
232
            node = <_KnownGraphNode>temp_node
233
        return node
234
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
235
    def _initialize_nodes(self, parent_map):
236
        """Populate self._nodes.
237
4371.4.9 by Vincent Ladeuil
Simplify gdfo computing by finding tails when at graph build time.
238
        After this has finished:
239
        - self._nodes will have an entry for every entry in parent_map.
240
        - ghosts will have a parent_keys = None,
4371.4.12 by Vincent Ladeuil
Fix typos.
241
        - all nodes found will also have child_keys populated with all known
242
          child keys,
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
243
        """
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
244
        cdef PyObject *temp_key, *temp_parent_keys, *temp_node
4371.3.31 by John Arbash Meinel
Change _KnownGraph.parents to be a tuple rather than a list.
245
        cdef Py_ssize_t pos, pos2, num_parent_keys
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
246
        cdef _KnownGraphNode node
247
        cdef _KnownGraphNode parent_node
248
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
249
        if not PyDict_CheckExact(parent_map):
250
            raise TypeError('parent_map should be a dict of {key:parent_keys}')
251
        # for key, parent_keys in parent_map.iteritems():
252
        pos = 0
253
        while PyDict_Next(parent_map, &pos, &temp_key, &temp_parent_keys):
254
            key = <object>temp_key
255
            parent_keys = <object>temp_parent_keys
4371.4.9 by Vincent Ladeuil
Simplify gdfo computing by finding tails when at graph build time.
256
            num_parent_keys = len(parent_keys)
4371.4.28 by John Arbash Meinel
We spent a lot of time adding and removing keys from tails.
257
            node = self._get_or_create_node(key)
4593.5.26 by John Arbash Meinel
small cleanup, a bit of comment tweaking.
258
            # We know how many parents, so we pre allocate the tuple
4371.3.31 by John Arbash Meinel
Change _KnownGraph.parents to be a tuple rather than a list.
259
            parent_nodes = PyTuple_New(num_parent_keys)
260
            for pos2 from 0 <= pos2 < num_parent_keys:
4593.5.26 by John Arbash Meinel
small cleanup, a bit of comment tweaking.
261
                # Note: it costs us 10ms out of 40ms to lookup all of these
262
                #       parents, it doesn't seem to be an allocation overhead,
263
                #       but rather a lookup overhead. There doesn't seem to be
264
                #       a way around it, and that is one reason why
265
                #       KnownGraphNode maintains a direct pointer to the parent
266
                #       node.
267
                # We use [] because parent_keys may be a tuple or list
4371.4.28 by John Arbash Meinel
We spent a lot of time adding and removing keys from tails.
268
                parent_node = self._get_or_create_node(parent_keys[pos2])
4371.3.31 by John Arbash Meinel
Change _KnownGraph.parents to be a tuple rather than a list.
269
                # PyTuple_SET_ITEM will steal a reference, so INCREF first
270
                Py_INCREF(parent_node)
271
                PyTuple_SET_ITEM(parent_nodes, pos2, parent_node)
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
272
                PyList_Append(parent_node.children, node)
273
            node.parents = parent_nodes
4371.4.30 by John Arbash Meinel
We don't need self._tails anymore, nor does it need to be a set.
274
275
    def _find_tails(self):
276
        cdef PyObject *temp_node
277
        cdef _KnownGraphNode node
278
        cdef Py_ssize_t pos
279
280
        tails = []
4371.4.28 by John Arbash Meinel
We spent a lot of time adding and removing keys from tails.
281
        pos = 0
282
        while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
283
            node = <_KnownGraphNode>temp_node
284
            if node.parents is None or PyTuple_GET_SIZE(node.parents) == 0:
4371.4.30 by John Arbash Meinel
We don't need self._tails anymore, nor does it need to be a set.
285
                node.gdfo = 1
286
                PyList_Append(tails, node)
287
        return tails
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
288
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
289
    def _find_tips(self):
290
        cdef PyObject *temp_node
291
        cdef _KnownGraphNode node
292
        cdef Py_ssize_t pos
293
294
        tips = []
295
        pos = 0
296
        while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
297
            node = <_KnownGraphNode>temp_node
298
            if PyList_GET_SIZE(node.children) == 0:
299
                PyList_Append(tips, node)
300
        return tips
301
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
302
    def _find_gdfo(self):
4371.4.8 by Vincent Ladeuil
Replace the existing KnownGraph._find_gdfo.
303
        cdef _KnownGraphNode node
304
        cdef _KnownGraphNode child
4371.4.14 by John Arbash Meinel
Optimize _find_gdfo and make known_parent_gdfos a member of KnownGraphNode
305
        cdef PyObject *temp
4371.4.30 by John Arbash Meinel
We don't need self._tails anymore, nor does it need to be a set.
306
        cdef Py_ssize_t pos
4371.4.14 by John Arbash Meinel
Optimize _find_gdfo and make known_parent_gdfos a member of KnownGraphNode
307
        cdef int replace
4371.4.26 by John Arbash Meinel
Don't shrink the pending list.
308
        cdef Py_ssize_t last_item
4371.4.25 by John Arbash Meinel
We had an 'child_node.gdfo is None' check in the inner loop.
309
        cdef long next_gdfo
4371.4.8 by Vincent Ladeuil
Replace the existing KnownGraph._find_gdfo.
310
4371.4.30 by John Arbash Meinel
We don't need self._tails anymore, nor does it need to be a set.
311
        pending = self._find_tails()
4371.4.8 by Vincent Ladeuil
Replace the existing KnownGraph._find_gdfo.
312
4371.4.26 by John Arbash Meinel
Don't shrink the pending list.
313
        last_item = PyList_GET_SIZE(pending) - 1
314
        while last_item >= 0:
4371.4.14 by John Arbash Meinel
Optimize _find_gdfo and make known_parent_gdfos a member of KnownGraphNode
315
            # Avoid pop followed by push, instead, peek, and replace
4371.4.16 by John Arbash Meinel
Document some perf issues with using known_parent_gdfos, but leave it as a dict.
316
            # timing shows this is 930ms => 770ms for OOo
4371.4.26 by John Arbash Meinel
Don't shrink the pending list.
317
            node = _get_list_node(pending, last_item)
318
            last_item = last_item - 1
4371.4.25 by John Arbash Meinel
We had an 'child_node.gdfo is None' check in the inner loop.
319
            next_gdfo = node.gdfo + 1
4371.4.30 by John Arbash Meinel
We don't need self._tails anymore, nor does it need to be a set.
320
            for pos from 0 <= pos < PyList_GET_SIZE(node.children):
321
                child = _get_list_node(node.children, pos)
4371.4.25 by John Arbash Meinel
We had an 'child_node.gdfo is None' check in the inner loop.
322
                if next_gdfo > child.gdfo:
323
                    child.gdfo = next_gdfo
4371.4.27 by John Arbash Meinel
Re-use the child.seen attribute for tracking num parents.
324
                child.seen = child.seen + 1
325
                if child.seen == PyTuple_GET_SIZE(child.parents):
4371.4.17 by John Arbash Meinel
tried a different tack, to use a new list and append.
326
                    # This child is populated, queue it to be walked
4371.4.26 by John Arbash Meinel
Don't shrink the pending list.
327
                    last_item = last_item + 1
328
                    if last_item < PyList_GET_SIZE(pending):
4371.4.14 by John Arbash Meinel
Optimize _find_gdfo and make known_parent_gdfos a member of KnownGraphNode
329
                        Py_INCREF(child) # SetItem steals a ref
4371.4.26 by John Arbash Meinel
Don't shrink the pending list.
330
                        PyList_SetItem(pending, last_item, child)
4371.4.14 by John Arbash Meinel
Optimize _find_gdfo and make known_parent_gdfos a member of KnownGraphNode
331
                    else:
332
                        PyList_Append(pending, child)
4371.4.27 by John Arbash Meinel
Re-use the child.seen attribute for tracking num parents.
333
                    # We have queued this node, we don't need to track it
334
                    # anymore
335
                    child.seen = 0
4371.4.8 by Vincent Ladeuil
Replace the existing KnownGraph._find_gdfo.
336
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
337
    def heads(self, keys):
338
        """Return the heads from amongst keys.
339
340
        This is done by searching the ancestries of each key.  Any key that is
341
        reachable from another key is not returned; all the others are.
342
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
343
        This operation scales with the relative depth between any two keys. It
344
        uses gdfo to avoid walking all ancestry.
345
346
        :param keys: An iterable of keys.
347
        :return: A set of the heads. Note that as a set there is no ordering
348
            information. Callers will need to filter their input to create
349
            order if they need it.
350
        """
351
        cdef PyObject *maybe_node
352
        cdef PyObject *maybe_heads
353
        cdef PyObject *temp_node
354
        cdef _KnownGraphNode node
4371.4.31 by John Arbash Meinel
Apply the same 'never pop' logic to the heads() code.
355
        cdef Py_ssize_t pos, last_item
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
356
        cdef long min_gdfo
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
357
4526.10.1 by John Arbash Meinel
Avoid using PyFrozenSet_New
358
        heads_key = frozenset(keys)
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
359
        maybe_heads = PyDict_GetItem(self._known_heads, heads_key)
360
        if maybe_heads != NULL:
361
            return <object>maybe_heads
362
        # Not cached, compute it ourselves
363
        candidate_nodes = {}
364
        for key in keys:
4371.4.28 by John Arbash Meinel
We spent a lot of time adding and removing keys from tails.
365
            maybe_node = PyDict_GetItem(self._nodes, key)
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
366
            if maybe_node == NULL:
367
                raise KeyError('key %s not in nodes' % (key,))
368
            PyDict_SetItem(candidate_nodes, key, <object>maybe_node)
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
369
        maybe_node = PyDict_GetItem(candidate_nodes, NULL_REVISION)
370
        if maybe_node != NULL:
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
371
            # NULL_REVISION is only a head if it is the only entry
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
372
            candidate_nodes.pop(NULL_REVISION)
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
373
            if not candidate_nodes:
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
374
                return frozenset([NULL_REVISION])
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
375
            # The keys changed, so recalculate heads_key
4526.10.1 by John Arbash Meinel
Avoid using PyFrozenSet_New
376
            heads_key = frozenset(candidate_nodes)
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
377
        if PyDict_Size(candidate_nodes) < 2:
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
378
            return heads_key
379
4371.4.22 by John Arbash Meinel
Changing from a set of seen to a list to cleanup and a seen attribute.
380
        cleanup = []
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
381
        pending = []
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
382
        # we know a gdfo cannot be longer than a linear chain of all nodes
383
        min_gdfo = PyDict_Size(self._nodes) + 1
384
        # Build up nodes that need to be walked, note that starting nodes are
385
        # not added to seen()
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
386
        pos = 0
387
        while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node):
388
            node = <_KnownGraphNode>temp_node
389
            if node.parents is not None:
390
                pending.extend(node.parents)
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
391
            if node.gdfo < min_gdfo:
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
392
                min_gdfo = node.gdfo
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
393
394
        # Now do all the real work
4371.4.31 by John Arbash Meinel
Apply the same 'never pop' logic to the heads() code.
395
        last_item = PyList_GET_SIZE(pending) - 1
396
        while last_item >= 0:
397
            node = _get_list_node(pending, last_item)
398
            last_item = last_item - 1
4371.4.22 by John Arbash Meinel
Changing from a set of seen to a list to cleanup and a seen attribute.
399
            if node.seen:
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
400
                # node already appears in some ancestry
401
                continue
4371.4.22 by John Arbash Meinel
Changing from a set of seen to a list to cleanup and a seen attribute.
402
            PyList_Append(cleanup, node)
403
            node.seen = 1
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
404
            if node.gdfo <= min_gdfo:
405
                continue
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
406
            if node.parents is not None and PyTuple_GET_SIZE(node.parents) > 0:
407
                for pos from 0 <= pos < PyTuple_GET_SIZE(node.parents):
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
408
                    parent_node = _get_tuple_node(node.parents, pos)
4371.4.31 by John Arbash Meinel
Apply the same 'never pop' logic to the heads() code.
409
                    last_item = last_item + 1
410
                    if last_item < PyList_GET_SIZE(pending):
411
                        Py_INCREF(parent_node) # SetItem steals a ref
412
                        PyList_SetItem(pending, last_item, parent_node)
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
413
                    else:
414
                        PyList_Append(pending, parent_node)
4371.4.22 by John Arbash Meinel
Changing from a set of seen to a list to cleanup and a seen attribute.
415
        heads = []
416
        pos = 0
417
        while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node):
418
            node = <_KnownGraphNode>temp_node
419
            if not node.seen:
420
                PyList_Append(heads, node.key)
4526.10.1 by John Arbash Meinel
Avoid using PyFrozenSet_New
421
        heads = frozenset(heads)
4371.4.22 by John Arbash Meinel
Changing from a set of seen to a list to cleanup and a seen attribute.
422
        for pos from 0 <= pos < PyList_GET_SIZE(cleanup):
423
            node = _get_list_node(cleanup, pos)
424
            node.seen = 0
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
425
        if self.do_cache:
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
426
            PyDict_SetItem(self._known_heads, heads_key, heads)
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
427
        return heads
4577.3.2 by John Arbash Meinel
Implement KnownGraph.topo_sort.
428
429
    def topo_sort(self):
430
        """Return the nodes in topological order.
431
432
        All parents must occur before all children.
433
        """
434
        # This is, for the most part, the same iteration order that we used for
435
        # _find_gdfo, consider finding a way to remove the duplication
436
        # In general, we find the 'tails' (nodes with no parents), and then
437
        # walk to the children. For children that have all of their parents
438
        # yielded, we queue up the child to be yielded as well.
439
        cdef _KnownGraphNode node
440
        cdef _KnownGraphNode child
441
        cdef PyObject *temp
442
        cdef Py_ssize_t pos
443
        cdef int replace
444
        cdef Py_ssize_t last_item
445
446
        pending = self._find_tails()
4593.5.3 by John Arbash Meinel
Bring in the optimized tsort code.
447
        if PyList_GET_SIZE(pending) == 0 and len(self._nodes) > 0:
448
            raise errors.GraphCycleError(self._nodes)
4577.3.2 by John Arbash Meinel
Implement KnownGraph.topo_sort.
449
450
        topo_order = []
451
452
        last_item = PyList_GET_SIZE(pending) - 1
453
        while last_item >= 0:
454
            # Avoid pop followed by push, instead, peek, and replace
455
            # timing shows this is 930ms => 770ms for OOo
456
            node = _get_list_node(pending, last_item)
457
            last_item = last_item - 1
4593.5.3 by John Arbash Meinel
Bring in the optimized tsort code.
458
            if node.parents is not None:
459
                # We don't include ghost parents
460
                PyList_Append(topo_order, node.key)
4577.3.2 by John Arbash Meinel
Implement KnownGraph.topo_sort.
461
            for pos from 0 <= pos < PyList_GET_SIZE(node.children):
462
                child = _get_list_node(node.children, pos)
4593.5.3 by John Arbash Meinel
Bring in the optimized tsort code.
463
                if child.gdfo == -1:
464
                    # We know we have a graph cycle because a node has a parent
465
                    # which we couldn't find
466
                    raise errors.GraphCycleError(self._nodes)
4577.3.2 by John Arbash Meinel
Implement KnownGraph.topo_sort.
467
                child.seen = child.seen + 1
468
                if child.seen == PyTuple_GET_SIZE(child.parents):
469
                    # All parents of this child have been yielded, queue this
470
                    # one to be yielded as well
471
                    last_item = last_item + 1
472
                    if last_item < PyList_GET_SIZE(pending):
473
                        Py_INCREF(child) # SetItem steals a ref
474
                        PyList_SetItem(pending, last_item, child)
475
                    else:
476
                        PyList_Append(pending, child)
477
                    # We have queued this node, we don't need to track it
478
                    # anymore
479
                    child.seen = 0
480
        # We started from the parents, so we don't need to do anymore work
481
        return topo_order
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
482
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
483
    def gc_sort(self):
484
        """Return a reverse topological ordering which is 'stable'.
485
486
        There are a few constraints:
487
          1) Reverse topological (all children before all parents)
488
          2) Grouped by prefix
489
          3) 'stable' sorting, so that we get the same result, independent of
490
             machine, or extra data.
491
        To do this, we use the same basic algorithm as topo_sort, but when we
492
        aren't sure what node to access next, we sort them lexicographically.
493
        """
494
        cdef PyObject *temp
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
495
        cdef Py_ssize_t pos, last_item
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
496
        cdef _KnownGraphNode node, node2, parent_node
497
498
        tips = self._find_tips()
499
        # Split the tips based on prefix
500
        prefix_tips = {}
501
        for pos from 0 <= pos < PyList_GET_SIZE(tips):
502
            node = _get_list_node(tips, pos)
503
            if PyString_CheckExact(node.key) or len(node.key) == 1:
504
                prefix = ''
505
            else:
506
                prefix = node.key[0]
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
507
            temp = PyDict_GetItem(prefix_tips, prefix)
508
            if temp == NULL:
509
                prefix_tips[prefix] = [node]
510
            else:
511
                tip_nodes = <object>temp
512
                PyList_Append(tip_nodes, node)
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
513
514
        result = []
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
515
        for prefix in sorted(prefix_tips):
516
            temp = PyDict_GetItem(prefix_tips, prefix)
517
            assert temp != NULL
518
            tip_nodes = <object>temp
519
            pending = _sort_list_nodes(tip_nodes, 1)
520
            last_item = PyList_GET_SIZE(pending) - 1
521
            while last_item >= 0:
522
                node = _get_list_node(pending, last_item)
523
                last_item = last_item - 1
4641.5.6 by John Arbash Meinel
Add support for skipping ghost nodes.
524
                if node.parents is None:
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
525
                    # Ghost
4641.5.6 by John Arbash Meinel
Add support for skipping ghost nodes.
526
                    continue
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
527
                PyList_Append(result, node.key)
4641.5.11 by John Arbash Meinel
Using PyObject_RichCompareBool is a significant benefit. 8.8ms down to 7.7ms.
528
                # Sorting the parent keys isn't strictly necessary for stable
529
                # sorting of a given graph. But it does help minimize the
530
                # differences between graphs
531
                # For bzr.dev ancestry:
532
                #   4.73ms  no sort
533
                #   7.73ms  RichCompareBool sort
4641.5.10 by John Arbash Meinel
Go back to sorted parents mode.
534
                parents = _sort_list_nodes(node.parents, 1)
535
                for pos from 0 <= pos < len(parents):
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
536
                    if PyTuple_CheckExact(parents):
537
                        parent_node = _get_tuple_node(parents, pos)
538
                    else:
539
                        parent_node = _get_list_node(parents, pos)
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
540
                    # TODO: GraphCycle detection
541
                    parent_node.seen = parent_node.seen + 1
542
                    if (parent_node.seen
543
                        == PyList_GET_SIZE(parent_node.children)):
544
                        # All children have been processed, queue up this
545
                        # parent
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
546
                        last_item = last_item + 1
547
                        if last_item < PyList_GET_SIZE(pending):
548
                            Py_INCREF(parent_node) # SetItem steals a ref
549
                            PyList_SetItem(pending, last_item, parent_node)
550
                        else:
551
                            PyList_Append(pending, parent_node)
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
552
                        parent_node.seen = 0
553
        return result
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
554
555
    def merge_sort(self, tip_key):
556
        """Compute the merge sorted graph output."""
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
557
        cdef _MergeSorter sorter
558
4593.5.29 by John Arbash Meinel
Remove the gc enable/disable code.
559
        # TODO: consider disabling gc since we are allocating a lot of nodes
560
        #       that won't be collectable anyway. real world testing has not
561
        #       shown a specific impact, yet.
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
562
        sorter = _MergeSorter(self, tip_key)
563
        return sorter.topo_order()
4648.2.1 by Gary van der Merwe
Add get_parent_keys, and get_child_keys to KnownGraph.
564
    
565
    def get_parent_keys(self, key):
566
        """Get the parents for a key
567
        
568
        Returns a list containg the parents keys. If the key is a ghost,
569
        None is returned. A KeyError will be raised if the key is not in
570
        the graph.
571
        
572
        :param keys: Key to check (eg revision_id)
573
        :return: A list of parents
574
        """
575
        return self._nodes[key].parent_keys 
576
577
    def get_child_keys(self, key):
578
        """Get the children for a key
579
        
580
        Returns a list containg the children keys. A KeyError will be raised
581
        if the key is not in the graph.
582
        
583
        :param keys: Key to check (eg revision_id)
584
        :return: A list of children
585
        """
586
        return self._nodes[key].child_keys    
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
587
588
589
cdef class _MergeSortNode:
590
    """Tracks information about a node during the merge_sort operation."""
591
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
592
    # Public api
593
    cdef public object key
594
    cdef public long merge_depth
595
    cdef public object end_of_merge # True/False Is this the end of the current merge
596
597
    # Private api, used while computing the information
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
598
    cdef _KnownGraphNode left_parent
599
    cdef _KnownGraphNode left_pending_parent
600
    cdef object pending_parents # list of _KnownGraphNode for non-left parents
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
601
    cdef long _revno_first
602
    cdef long _revno_second
603
    cdef long _revno_last
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
604
    # TODO: turn these into flag/bit fields rather than individual members
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
605
    cdef int is_first_child # Is this the first child?
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
606
    cdef int seen_by_child # A child node has seen this parent
607
    cdef int completed # Fully Processed
608
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
609
    def __init__(self, key):
610
        self.key = key
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
611
        self.merge_depth = -1
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
612
        self.left_parent = None
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
613
        self.left_pending_parent = None
614
        self.pending_parents = None
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
615
        self._revno_first = -1
616
        self._revno_second = -1
617
        self._revno_last = -1
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
618
        self.is_first_child = 0
619
        self.seen_by_child = 0
620
        self.completed = 0
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
621
622
    def __repr__(self):
4634.11.1 by John Arbash Meinel
Fix bug #419241. If a graph had a mainline ghost
623
        return '%s(%s depth:%s rev:%s,%s,%s first:%s seen:%s)' % (
624
            self.__class__.__name__, self.key,
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
625
            self.merge_depth,
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
626
            self._revno_first, self._revno_second, self._revno_last,
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
627
            self.is_first_child, self.seen_by_child)
628
629
    cdef int has_pending_parents(self):
630
        if self.left_pending_parent is not None or self.pending_parents:
631
            return 1
632
        return 0
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
633
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
634
    cdef object _revno(self):
635
        if self._revno_first == -1:
636
            if self._revno_second != -1:
637
                raise RuntimeError('Something wrong with: %s' % (self,))
638
            return (self._revno_last,)
639
        else:
640
            return (self._revno_first, self._revno_second, self._revno_last)
641
642
    property revno:
643
        def __get__(self):
644
            return self._revno()
645
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
646
647
cdef class _MergeSorter:
648
    """This class does the work of computing the merge_sort ordering.
649
650
    We have some small advantages, in that we get all the extra information
651
    that KnownGraph knows, like knowing the child lists, etc.
652
    """
653
4593.5.11 by John Arbash Meinel
Without doing any real tuning yet, we see a decent speedup for merge_sort:
654
    # Current performance numbers for merge_sort(bzr_dev_parent_map):
4593.5.25 by John Arbash Meinel
Add tests that we detect GraphCycleError
655
    #  302ms tsort.merge_sort()
656
    #   91ms graph.KnownGraph().merge_sort()
4593.5.28 by John Arbash Meinel
Cleanup pass
657
    #   40ms kg.merge_sort()
4593.5.11 by John Arbash Meinel
Without doing any real tuning yet, we see a decent speedup for merge_sort:
658
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
659
    cdef KnownGraph graph
4593.5.15 by John Arbash Meinel
Use the same implementation of stack handling to avoid append/pop.
660
    cdef object _depth_first_stack  # list
661
    cdef Py_ssize_t _last_stack_item # offset to last item on stack
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
662
    # cdef object _ms_nodes # dict of key => _MergeSortNode
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
663
    cdef object _revno_to_branch_count # {revno => num child branches}
664
    cdef object _scheduled_nodes # List of nodes ready to be yielded
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
665
666
    def __init__(self, known_graph, tip_key):
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
667
        cdef _KnownGraphNode node
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
668
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
669
        self.graph = known_graph
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
670
        # self._ms_nodes = {}
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
671
        self._revno_to_branch_count = {}
4593.5.15 by John Arbash Meinel
Use the same implementation of stack handling to avoid append/pop.
672
        self._depth_first_stack = []
673
        self._last_stack_item = -1
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
674
        self._scheduled_nodes = []
4593.5.42 by John Arbash Meinel
Turns out that some code assumed passing NULL_REVISION to merge_sort
675
        if (tip_key is not None and tip_key != NULL_REVISION
676
            and tip_key != (NULL_REVISION,)):
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
677
            node = self.graph._nodes[tip_key]
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
678
            self._push_node(node, 0)
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
679
4593.5.27 by John Arbash Meinel
bit of cleanup passes
680
    cdef _MergeSortNode _get_ms_node(self, _KnownGraphNode node):
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
681
        cdef PyObject *temp_node
682
        cdef _MergeSortNode ms_node
683
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
684
        if node.extra is None:
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
685
            ms_node = _MergeSortNode(node.key)
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
686
            node.extra = ms_node
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
687
        else:
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
688
            ms_node = <_MergeSortNode>node.extra
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
689
        return ms_node
690
4593.5.24 by John Arbash Meinel
Switch some variables from Py_ssize_t to long
691
    cdef _push_node(self, _KnownGraphNode node, long merge_depth):
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
692
        cdef _KnownGraphNode parent_node
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
693
        cdef _MergeSortNode ms_node, ms_parent_node
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
694
        cdef Py_ssize_t pos
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
695
4593.5.27 by John Arbash Meinel
bit of cleanup passes
696
        ms_node = self._get_ms_node(node)
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
697
        ms_node.merge_depth = merge_depth
4634.11.1 by John Arbash Meinel
Fix bug #419241. If a graph had a mainline ghost
698
        if node.parents is None:
699
            raise RuntimeError('ghost nodes should not be pushed'
700
                               ' onto the stack: %s' % (node,))
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
701
        if PyTuple_GET_SIZE(node.parents) > 0:
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
702
            parent_node = _get_tuple_node(node.parents, 0)
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
703
            ms_node.left_parent = parent_node
4634.11.1 by John Arbash Meinel
Fix bug #419241. If a graph had a mainline ghost
704
            if parent_node.parents is None: # left-hand ghost
705
                ms_node.left_pending_parent = None
706
                ms_node.left_parent = None
707
            else:
708
                ms_node.left_pending_parent = parent_node
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
709
        if PyTuple_GET_SIZE(node.parents) > 1:
4593.5.20 by John Arbash Meinel
Expose KnownGraph off of VersionedFiles
710
            ms_node.pending_parents = []
711
            for pos from 1 <= pos < PyTuple_GET_SIZE(node.parents):
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
712
                parent_node = _get_tuple_node(node.parents, pos)
4593.5.20 by John Arbash Meinel
Expose KnownGraph off of VersionedFiles
713
                if parent_node.parents is None: # ghost
714
                    continue
715
                PyList_Append(ms_node.pending_parents, parent_node)
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
716
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
717
        ms_node.is_first_child = 1
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
718
        if ms_node.left_parent is not None:
4593.5.27 by John Arbash Meinel
bit of cleanup passes
719
            ms_parent_node = self._get_ms_node(ms_node.left_parent)
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
720
            if ms_parent_node.seen_by_child:
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
721
                ms_node.is_first_child = 0
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
722
            ms_parent_node.seen_by_child = 1
4593.5.15 by John Arbash Meinel
Use the same implementation of stack handling to avoid append/pop.
723
        self._last_stack_item = self._last_stack_item + 1
724
        if self._last_stack_item < PyList_GET_SIZE(self._depth_first_stack):
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
725
            Py_INCREF(node) # SetItem steals a ref
4593.5.15 by John Arbash Meinel
Use the same implementation of stack handling to avoid append/pop.
726
            PyList_SetItem(self._depth_first_stack, self._last_stack_item,
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
727
                           node)
4593.5.15 by John Arbash Meinel
Use the same implementation of stack handling to avoid append/pop.
728
        else:
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
729
            PyList_Append(self._depth_first_stack, node)
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
730
731
    cdef _pop_node(self):
4593.5.16 by John Arbash Meinel
Change the revno handling code.
732
        cdef PyObject *temp
4593.5.23 by John Arbash Meinel
Move the end-of-merge computation into _pop_node
733
        cdef _MergeSortNode ms_node, ms_parent_node, ms_prev_node
734
        cdef _KnownGraphNode node, parent_node, prev_node
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
735
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
736
        node = _get_list_node(self._depth_first_stack, self._last_stack_item)
737
        ms_node = <_MergeSortNode>node.extra
4593.5.15 by John Arbash Meinel
Use the same implementation of stack handling to avoid append/pop.
738
        self._last_stack_item = self._last_stack_item - 1
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
739
        if ms_node.left_parent is not None:
4593.5.27 by John Arbash Meinel
bit of cleanup passes
740
            # Assign the revision number from the left-hand parent
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
741
            ms_parent_node = <_MergeSortNode>ms_node.left_parent.extra
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
742
            if ms_node.is_first_child:
743
                # First child just increments the final digit
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
744
                ms_node._revno_first = ms_parent_node._revno_first
745
                ms_node._revno_second = ms_parent_node._revno_second
746
                ms_node._revno_last = ms_parent_node._revno_last + 1
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
747
            else:
4593.5.10 by John Arbash Meinel
finish getting the revno numbers working correctly.
748
                # Not the first child, make a new branch
4593.5.27 by John Arbash Meinel
bit of cleanup passes
749
                #  (mainline_revno, branch_count, 1)
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
750
                if ms_parent_node._revno_first == -1:
4593.5.16 by John Arbash Meinel
Change the revno handling code.
751
                    # Mainline ancestor, the increment is on the last digit
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
752
                    base_revno = ms_parent_node._revno_last
4593.5.16 by John Arbash Meinel
Change the revno handling code.
753
                else:
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
754
                    base_revno = ms_parent_node._revno_first
4593.5.16 by John Arbash Meinel
Change the revno handling code.
755
                temp = PyDict_GetItem(self._revno_to_branch_count,
4593.5.17 by John Arbash Meinel
No need to pop one item off the stack after another.
756
                                      base_revno)
4593.5.16 by John Arbash Meinel
Change the revno handling code.
757
                if temp == NULL:
758
                    branch_count = 1
759
                else:
760
                    branch_count = (<object>temp) + 1
761
                PyDict_SetItem(self._revno_to_branch_count, base_revno,
762
                               branch_count)
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
763
                ms_node._revno_first = base_revno
764
                ms_node._revno_second = branch_count
765
                ms_node._revno_last = 1
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
766
        else:
4593.5.27 by John Arbash Meinel
bit of cleanup passes
767
            temp = PyDict_GetItem(self._revno_to_branch_count, 0)
768
            if temp == NULL:
4593.5.16 by John Arbash Meinel
Change the revno handling code.
769
                # The first root node doesn't have a 3-digit revno
4593.5.27 by John Arbash Meinel
bit of cleanup passes
770
                root_count = 0
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
771
                ms_node._revno_first = -1
772
                ms_node._revno_second = -1
773
                ms_node._revno_last = 1
4593.5.27 by John Arbash Meinel
bit of cleanup passes
774
            else:
775
                root_count = (<object>temp) + 1
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
776
                ms_node._revno_first = 0
777
                ms_node._revno_second = root_count
778
                ms_node._revno_last = 1
4593.5.27 by John Arbash Meinel
bit of cleanup passes
779
            PyDict_SetItem(self._revno_to_branch_count, 0, root_count)
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
780
        ms_node.completed = 1
4593.5.23 by John Arbash Meinel
Move the end-of-merge computation into _pop_node
781
        if PyList_GET_SIZE(self._scheduled_nodes) == 0:
4593.5.27 by John Arbash Meinel
bit of cleanup passes
782
            # The first scheduled node is always the end of merge
4593.5.23 by John Arbash Meinel
Move the end-of-merge computation into _pop_node
783
            ms_node.end_of_merge = True
784
        else:
785
            prev_node = _get_list_node(self._scheduled_nodes,
786
                                    PyList_GET_SIZE(self._scheduled_nodes) - 1)
787
            ms_prev_node = <_MergeSortNode>prev_node.extra
788
            if ms_prev_node.merge_depth < ms_node.merge_depth:
789
                # The previously pushed node is to our left, so this is the end
790
                # of this right-hand chain
791
                ms_node.end_of_merge = True
792
            elif (ms_prev_node.merge_depth == ms_node.merge_depth
793
                  and prev_node not in node.parents):
794
                # The next node is not a direct parent of this node
795
                ms_node.end_of_merge = True
796
            else:
797
                ms_node.end_of_merge = False
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
798
        PyList_Append(self._scheduled_nodes, node)
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
799
800
    cdef _schedule_stack(self):
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
801
        cdef _KnownGraphNode last_node, next_node
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
802
        cdef _MergeSortNode ms_node, ms_last_node, ms_next_node
4593.5.24 by John Arbash Meinel
Switch some variables from Py_ssize_t to long
803
        cdef long next_merge_depth
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
804
        ordered = []
4593.5.15 by John Arbash Meinel
Use the same implementation of stack handling to avoid append/pop.
805
        while self._last_stack_item >= 0:
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
806
            # Peek at the last item on the stack
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
807
            last_node = _get_list_node(self._depth_first_stack,
808
                                       self._last_stack_item)
4593.5.25 by John Arbash Meinel
Add tests that we detect GraphCycleError
809
            if last_node.gdfo == -1:
4593.5.28 by John Arbash Meinel
Cleanup pass
810
                # if _find_gdfo skipped a node, that means there is a graph
811
                # cycle, error out now
4593.5.25 by John Arbash Meinel
Add tests that we detect GraphCycleError
812
                raise errors.GraphCycleError(self.graph._nodes)
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
813
            ms_last_node = <_MergeSortNode>last_node.extra
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
814
            if not ms_last_node.has_pending_parents():
815
                # Processed all parents, pop this node
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
816
                self._pop_node()
817
                continue
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
818
            while ms_last_node.has_pending_parents():
819
                if ms_last_node.left_pending_parent is not None:
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
820
                    # recurse depth first into the primary parent
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
821
                    next_node = ms_last_node.left_pending_parent
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
822
                    ms_last_node.left_pending_parent = None
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
823
                else:
824
                    # place any merges in right-to-left order for scheduling
825
                    # which gives us left-to-right order after we reverse
4593.5.28 by John Arbash Meinel
Cleanup pass
826
                    # the scheduled queue.
827
                    # Note: This has the effect of allocating common-new
828
                    #       revisions to the right-most subtree rather than the
829
                    #       left most, which will display nicely (you get
830
                    #       smaller trees at the top of the combined merge).
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
831
                    next_node = ms_last_node.pending_parents.pop()
4593.5.27 by John Arbash Meinel
bit of cleanup passes
832
                ms_next_node = self._get_ms_node(next_node)
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
833
                if ms_next_node.completed:
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
834
                    # this parent was completed by a child on the
835
                    # call stack. skip it.
836
                    continue
837
                # otherwise transfer it from the source graph into the
838
                # top of the current depth first search stack.
839
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
840
                if next_node is ms_last_node.left_parent:
4593.5.24 by John Arbash Meinel
Switch some variables from Py_ssize_t to long
841
                    next_merge_depth = ms_last_node.merge_depth
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
842
                else:
4593.5.24 by John Arbash Meinel
Switch some variables from Py_ssize_t to long
843
                    next_merge_depth = ms_last_node.merge_depth + 1
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
844
                self._push_node(next_node, next_merge_depth)
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
845
                # and do not continue processing parents until this 'call'
846
                # has recursed.
847
                break
848
849
    cdef topo_order(self):
4593.5.23 by John Arbash Meinel
Move the end-of-merge computation into _pop_node
850
        cdef _MergeSortNode ms_node
851
        cdef _KnownGraphNode node
4593.5.17 by John Arbash Meinel
No need to pop one item off the stack after another.
852
        cdef Py_ssize_t pos
4593.5.28 by John Arbash Meinel
Cleanup pass
853
        cdef PyObject *temp_key, *temp_node
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
854
4593.5.28 by John Arbash Meinel
Cleanup pass
855
        # Note: allocating a _MergeSortNode and deallocating it for all nodes
856
        #       costs approx 8.52ms (21%) of the total runtime
857
        #       We might consider moving the attributes into the base
858
        #       KnownGraph object.
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
859
        self._schedule_stack()
860
861
        # We've set up the basic schedule, now we can continue processing the
862
        # output.
4593.5.29 by John Arbash Meinel
Remove the gc enable/disable code.
863
        # Note: This final loop costs us 40.0ms => 28.8ms (11ms, 25%) on
4593.5.24 by John Arbash Meinel
Switch some variables from Py_ssize_t to long
864
        #       bzr.dev, to convert the internal Object representation into a
865
        #       Tuple representation...
866
        #       2ms is walking the data and computing revno tuples
867
        #       7ms is computing the return tuple
868
        #       4ms is PyList_Append()
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
869
        ordered = []
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
870
        # output the result in reverse order, and separate the generated info
4593.5.23 by John Arbash Meinel
Move the end-of-merge computation into _pop_node
871
        for pos from PyList_GET_SIZE(self._scheduled_nodes) > pos >= 0:
872
            node = _get_list_node(self._scheduled_nodes, pos)
873
            ms_node = <_MergeSortNode>node.extra
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
874
            PyList_Append(ordered, ms_node)
4593.5.23 by John Arbash Meinel
Move the end-of-merge computation into _pop_node
875
            node.extra = None
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
876
        # Clear out the scheduled nodes now that we're done
4593.5.17 by John Arbash Meinel
No need to pop one item off the stack after another.
877
        self._scheduled_nodes = []
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
878
        return ordered