~bzr-pqm/bzr/bzr.dev

4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
1
# Copyright (C) 2009 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
17
"""Implementation of Graph algorithms when we have already loaded everything.
18
"""
19
20
cdef extern from "python-compat.h":
21
    pass
22
23
cdef extern from "Python.h":
24
    ctypedef int Py_ssize_t
25
    ctypedef struct PyObject:
26
        pass
27
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
28
    int PyString_CheckExact(object)
4641.5.12 by John Arbash Meinel
small cleanup.
29
30
    int PyObject_RichCompareBool(object, object, int)
4641.5.11 by John Arbash Meinel
Using PyObject_RichCompareBool is a significant benefit. 8.8ms down to 7.7ms.
31
    int Py_LT
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
32
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
33
    int PyTuple_CheckExact(object)
4371.3.31 by John Arbash Meinel
Change _KnownGraph.parents to be a tuple rather than a list.
34
    object PyTuple_New(Py_ssize_t n)
4371.4.23 by John Arbash Meinel
Clean up the imports at the top.
35
    Py_ssize_t PyTuple_GET_SIZE(object t)
36
    PyObject * PyTuple_GET_ITEM(object t, Py_ssize_t o)
4371.3.31 by John Arbash Meinel
Change _KnownGraph.parents to be a tuple rather than a list.
37
    void PyTuple_SET_ITEM(object t, Py_ssize_t o, object v)
4371.4.23 by John Arbash Meinel
Clean up the imports at the top.
38
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
39
    int PyList_CheckExact(object)
4371.4.23 by John Arbash Meinel
Clean up the imports at the top.
40
    Py_ssize_t PyList_GET_SIZE(object l)
41
    PyObject * PyList_GET_ITEM(object l, Py_ssize_t o)
42
    int PyList_SetItem(object l, Py_ssize_t o, object l) except -1
43
    int PyList_Append(object l, object v) except -1
44
45
    int PyDict_CheckExact(object d)
46
    Py_ssize_t PyDict_Size(object d) except -1
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
47
    PyObject * PyDict_GetItem(object d, object k)
4371.4.23 by John Arbash Meinel
Clean up the imports at the top.
48
    int PyDict_SetItem(object d, object k, object v) except -1
4371.4.18 by John Arbash Meinel
Big performance win, back to 650ms.
49
    int PyDict_DelItem(object d, object k) except -1
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
50
    int PyDict_Next(object d, Py_ssize_t *pos, PyObject **k, PyObject **v)
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
51
4371.3.20 by John Arbash Meinel
A few tweaks to the pyrex version.
52
    void Py_INCREF(object)
53
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
54
from collections import deque
4593.5.15 by John Arbash Meinel
Use the same implementation of stack handling to avoid append/pop.
55
import gc
4371.3.21 by John Arbash Meinel
Attempt to make things better by splitting out left_parent and right_parent into
56
4593.5.3 by John Arbash Meinel
Bring in the optimized tsort code.
57
from bzrlib import errors, revision
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
58
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
59
cdef object NULL_REVISION
60
NULL_REVISION = revision.NULL_REVISION
61
62
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
63
cdef class _KnownGraphNode:
64
    """Represents a single object in the known graph."""
65
4371.3.35 by John Arbash Meinel
Changing the heapq algorithm actually ends up with better overall performance.
66
    cdef object key
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
67
    cdef object parents
68
    cdef object children
4371.4.13 by Vincent Ladeuil
Fixed as per John's review.
69
    cdef public long gdfo
4371.4.22 by John Arbash Meinel
Changing from a set of seen to a list to cleanup and a seen attribute.
70
    cdef int seen
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
71
    cdef object extra
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
72
4371.3.31 by John Arbash Meinel
Change _KnownGraph.parents to be a tuple rather than a list.
73
    def __init__(self, key):
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
74
        self.key = key
4371.3.31 by John Arbash Meinel
Change _KnownGraph.parents to be a tuple rather than a list.
75
        self.parents = None
4371.3.21 by John Arbash Meinel
Attempt to make things better by splitting out left_parent and right_parent into
76
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
77
        self.children = []
78
        # Greatest distance from origin
79
        self.gdfo = -1
4371.4.22 by John Arbash Meinel
Changing from a set of seen to a list to cleanup and a seen attribute.
80
        self.seen = 0
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
81
        self.extra = None
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
82
83
    property child_keys:
84
        def __get__(self):
85
            cdef _KnownGraphNode child
86
87
            keys = []
88
            for child in self.children:
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
89
                PyList_Append(keys, child.key)
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
90
            return keys
91
4648.2.1 by Gary van der Merwe
Add get_parent_keys, and get_child_keys to KnownGraph.
92
    property parent_keys:
93
        def __get__(self):
94
            if self.parents is None:
95
                return None
96
            
97
            cdef _KnownGraphNode parent
98
99
            keys = []
100
            for parent in self.parents:
101
                PyList_Append(keys, parent.key)
102
            return keys
103
    
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
104
    cdef clear_references(self):
105
        self.parents = None
106
        self.children = None
107
108
    def __repr__(self):
4371.3.35 by John Arbash Meinel
Changing the heapq algorithm actually ends up with better overall performance.
109
        cdef _KnownGraphNode node
110
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
111
        parent_keys = []
4371.3.35 by John Arbash Meinel
Changing the heapq algorithm actually ends up with better overall performance.
112
        if self.parents is not None:
113
            for node in self.parents:
114
                parent_keys.append(node.key)
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
115
        child_keys = []
4371.3.35 by John Arbash Meinel
Changing the heapq algorithm actually ends up with better overall performance.
116
        if self.children is not None:
117
            for node in self.children:
118
                child_keys.append(node.key)
4371.4.10 by Vincent Ladeuil
Cleanup.
119
        return '%s(%s  gdfo:%s par:%s child:%s)' % (
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
120
            self.__class__.__name__, self.key, self.gdfo,
4371.4.12 by Vincent Ladeuil
Fix typos.
121
            parent_keys, child_keys)
4371.4.10 by Vincent Ladeuil
Cleanup.
122
4371.3.37 by John Arbash Meinel
cleanup passes.
123
4634.117.8 by John Arbash Meinel
Use a class regex to find other signatures to exclude.
124
cdef _KnownGraphNode _get_list_node(lst, Py_ssize_t pos):
4371.4.14 by John Arbash Meinel
Optimize _find_gdfo and make known_parent_gdfos a member of KnownGraphNode
125
    cdef PyObject *temp_node
126
127
    temp_node = PyList_GET_ITEM(lst, pos)
128
    return <_KnownGraphNode>temp_node
129
130
4634.117.8 by John Arbash Meinel
Use a class regex to find other signatures to exclude.
131
cdef _KnownGraphNode _get_tuple_node(tpl, Py_ssize_t pos):
4371.4.14 by John Arbash Meinel
Optimize _find_gdfo and make known_parent_gdfos a member of KnownGraphNode
132
    cdef PyObject *temp_node
133
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
134
    temp_node = PyTuple_GET_ITEM(tpl, pos)
4371.4.14 by John Arbash Meinel
Optimize _find_gdfo and make known_parent_gdfos a member of KnownGraphNode
135
    return <_KnownGraphNode>temp_node
136
137
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
138
def get_key(node):
139
    cdef _KnownGraphNode real_node
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
140
    real_node = node
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
141
    return real_node.key
142
143
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
144
cdef object _sort_list_nodes(object lst_or_tpl, int reverse):
145
    """Sort a list of _KnownGraphNode objects.
146
147
    If lst_or_tpl is a list, it is allowed to mutate in place. It may also
148
    just return the input list if everything is already sorted.
149
    """
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
150
    cdef _KnownGraphNode node1, node2
4641.5.9 by John Arbash Meinel
Explore just walking the parents directly, without sorting.
151
    cdef int do_swap, is_tuple
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
152
    cdef Py_ssize_t length
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
153
4641.5.9 by John Arbash Meinel
Explore just walking the parents directly, without sorting.
154
    is_tuple = PyTuple_CheckExact(lst_or_tpl)
155
    if not (is_tuple or PyList_CheckExact(lst_or_tpl)):
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
156
        raise TypeError('lst_or_tpl must be a list or tuple.')
157
    length = len(lst_or_tpl)
158
    if length == 0 or length == 1:
159
        return lst_or_tpl
160
    if length == 2:
4641.5.9 by John Arbash Meinel
Explore just walking the parents directly, without sorting.
161
        if is_tuple:
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
162
            node1 = _get_tuple_node(lst_or_tpl, 0)
163
            node2 = _get_tuple_node(lst_or_tpl, 1)
164
        else:
165
            node1 = _get_list_node(lst_or_tpl, 0)
166
            node2 = _get_list_node(lst_or_tpl, 1)
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
167
        if reverse:
4641.5.11 by John Arbash Meinel
Using PyObject_RichCompareBool is a significant benefit. 8.8ms down to 7.7ms.
168
            do_swap = PyObject_RichCompareBool(node1.key, node2.key, Py_LT)
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
169
        else:
4641.5.11 by John Arbash Meinel
Using PyObject_RichCompareBool is a significant benefit. 8.8ms down to 7.7ms.
170
            do_swap = PyObject_RichCompareBool(node2.key, node1.key, Py_LT)
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
171
        if not do_swap:
172
            return lst_or_tpl
4641.5.9 by John Arbash Meinel
Explore just walking the parents directly, without sorting.
173
        if is_tuple:
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
174
            return (node2, node1)
175
        else:
4641.5.9 by John Arbash Meinel
Explore just walking the parents directly, without sorting.
176
            # Swap 'in-place', since lists are mutable
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
177
            Py_INCREF(node1)
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
178
            PyList_SetItem(lst_or_tpl, 1, node1)
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
179
            Py_INCREF(node2)
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
180
            PyList_SetItem(lst_or_tpl, 0, node2)
181
            return lst_or_tpl
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
182
    # For all other sizes, we just use 'sorted()'
4641.5.9 by John Arbash Meinel
Explore just walking the parents directly, without sorting.
183
    if is_tuple:
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
184
        # Note that sorted() is just list(iterable).sort()
185
        lst_or_tpl = list(lst_or_tpl)
186
    lst_or_tpl.sort(key=get_key, reverse=reverse)
187
    return lst_or_tpl
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
188
189
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
190
cdef class _MergeSorter
191
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
192
cdef class KnownGraph:
193
    """This is a class which assumes we already know the full graph."""
194
195
    cdef public object _nodes
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
196
    cdef public object _known_heads
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
197
    cdef public int do_cache
198
199
    def __init__(self, parent_map, do_cache=True):
200
        """Create a new KnownGraph instance.
201
202
        :param parent_map: A dictionary mapping key => parent_keys
203
        """
4371.4.29 by John Arbash Meinel
Pre-allocating self._nodes turned out to be slower.
204
        # tests at pre-allocating the node dict actually slowed things down
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
205
        self._nodes = {}
206
        # Maps {sorted(revision_id, revision_id): heads}
207
        self._known_heads = {}
208
        self.do_cache = int(do_cache)
4593.5.29 by John Arbash Meinel
Remove the gc enable/disable code.
209
        # TODO: consider disabling gc since we are allocating a lot of nodes
210
        #       that won't be collectable anyway. real world testing has not
211
        #       shown a specific impact, yet.
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
212
        self._initialize_nodes(parent_map)
213
        self._find_gdfo()
214
215
    def __dealloc__(self):
216
        cdef _KnownGraphNode child
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
217
        cdef Py_ssize_t pos
218
        cdef PyObject *temp_node
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
219
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
220
        while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
221
            child = <_KnownGraphNode>temp_node
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
222
            child.clear_references()
223
4634.117.8 by John Arbash Meinel
Use a class regex to find other signatures to exclude.
224
    cdef _KnownGraphNode _get_or_create_node(self, key):
4371.3.31 by John Arbash Meinel
Change _KnownGraph.parents to be a tuple rather than a list.
225
        cdef PyObject *temp_node
226
        cdef _KnownGraphNode node
227
228
        temp_node = PyDict_GetItem(self._nodes, key)
229
        if temp_node == NULL:
230
            node = _KnownGraphNode(key)
231
            PyDict_SetItem(self._nodes, key, node)
232
        else:
233
            node = <_KnownGraphNode>temp_node
234
        return node
235
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
236
    cdef _populate_parents(self, _KnownGraphNode node, parent_keys):
237
        cdef Py_ssize_t num_parent_keys, pos
238
        cdef _KnownGraphNode parent_node
239
240
        num_parent_keys = len(parent_keys)
241
        # We know how many parents, so we pre allocate the tuple
242
        parent_nodes = PyTuple_New(num_parent_keys)
243
        for pos from 0 <= pos < num_parent_keys:
244
            # Note: it costs us 10ms out of 40ms to lookup all of these
245
            #       parents, it doesn't seem to be an allocation overhead,
246
            #       but rather a lookup overhead. There doesn't seem to be
247
            #       a way around it, and that is one reason why
248
            #       KnownGraphNode maintains a direct pointer to the parent
249
            #       node.
250
            # We use [] because parent_keys may be a tuple or list
251
            parent_node = self._get_or_create_node(parent_keys[pos])
252
            # PyTuple_SET_ITEM will steal a reference, so INCREF first
253
            Py_INCREF(parent_node)
254
            PyTuple_SET_ITEM(parent_nodes, pos, parent_node)
255
            PyList_Append(parent_node.children, node)
256
        node.parents = parent_nodes
257
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
258
    def _initialize_nodes(self, parent_map):
259
        """Populate self._nodes.
260
4371.4.9 by Vincent Ladeuil
Simplify gdfo computing by finding tails when at graph build time.
261
        After this has finished:
262
        - self._nodes will have an entry for every entry in parent_map.
263
        - ghosts will have a parent_keys = None,
4371.4.12 by Vincent Ladeuil
Fix typos.
264
        - all nodes found will also have child_keys populated with all known
265
          child keys,
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
266
        """
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
267
        cdef PyObject *temp_key, *temp_parent_keys, *temp_node
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
268
        cdef Py_ssize_t pos
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
269
        cdef _KnownGraphNode node
270
        cdef _KnownGraphNode parent_node
271
4371.3.28 by John Arbash Meinel
Finish converting away from 'cdef list' syntax.
272
        if not PyDict_CheckExact(parent_map):
273
            raise TypeError('parent_map should be a dict of {key:parent_keys}')
274
        # for key, parent_keys in parent_map.iteritems():
275
        pos = 0
276
        while PyDict_Next(parent_map, &pos, &temp_key, &temp_parent_keys):
277
            key = <object>temp_key
278
            parent_keys = <object>temp_parent_keys
4371.4.28 by John Arbash Meinel
We spent a lot of time adding and removing keys from tails.
279
            node = self._get_or_create_node(key)
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
280
            self._populate_parents(node, parent_keys)
4371.4.30 by John Arbash Meinel
We don't need self._tails anymore, nor does it need to be a set.
281
282
    def _find_tails(self):
283
        cdef PyObject *temp_node
284
        cdef _KnownGraphNode node
285
        cdef Py_ssize_t pos
286
287
        tails = []
4371.4.28 by John Arbash Meinel
We spent a lot of time adding and removing keys from tails.
288
        pos = 0
289
        while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
290
            node = <_KnownGraphNode>temp_node
291
            if node.parents is None or PyTuple_GET_SIZE(node.parents) == 0:
4371.4.30 by John Arbash Meinel
We don't need self._tails anymore, nor does it need to be a set.
292
                node.gdfo = 1
293
                PyList_Append(tails, node)
294
        return tails
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
295
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
296
    def _find_tips(self):
297
        cdef PyObject *temp_node
298
        cdef _KnownGraphNode node
299
        cdef Py_ssize_t pos
300
301
        tips = []
302
        pos = 0
303
        while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
304
            node = <_KnownGraphNode>temp_node
305
            if PyList_GET_SIZE(node.children) == 0:
306
                PyList_Append(tips, node)
307
        return tips
308
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
309
    def _find_gdfo(self):
4371.4.8 by Vincent Ladeuil
Replace the existing KnownGraph._find_gdfo.
310
        cdef _KnownGraphNode node
311
        cdef _KnownGraphNode child
4371.4.14 by John Arbash Meinel
Optimize _find_gdfo and make known_parent_gdfos a member of KnownGraphNode
312
        cdef PyObject *temp
4371.4.30 by John Arbash Meinel
We don't need self._tails anymore, nor does it need to be a set.
313
        cdef Py_ssize_t pos
4371.4.14 by John Arbash Meinel
Optimize _find_gdfo and make known_parent_gdfos a member of KnownGraphNode
314
        cdef int replace
4371.4.26 by John Arbash Meinel
Don't shrink the pending list.
315
        cdef Py_ssize_t last_item
4371.4.25 by John Arbash Meinel
We had an 'child_node.gdfo is None' check in the inner loop.
316
        cdef long next_gdfo
4371.4.8 by Vincent Ladeuil
Replace the existing KnownGraph._find_gdfo.
317
4371.4.30 by John Arbash Meinel
We don't need self._tails anymore, nor does it need to be a set.
318
        pending = self._find_tails()
4371.4.8 by Vincent Ladeuil
Replace the existing KnownGraph._find_gdfo.
319
4371.4.26 by John Arbash Meinel
Don't shrink the pending list.
320
        last_item = PyList_GET_SIZE(pending) - 1
321
        while last_item >= 0:
4371.4.14 by John Arbash Meinel
Optimize _find_gdfo and make known_parent_gdfos a member of KnownGraphNode
322
            # Avoid pop followed by push, instead, peek, and replace
4371.4.16 by John Arbash Meinel
Document some perf issues with using known_parent_gdfos, but leave it as a dict.
323
            # timing shows this is 930ms => 770ms for OOo
4371.4.26 by John Arbash Meinel
Don't shrink the pending list.
324
            node = _get_list_node(pending, last_item)
325
            last_item = last_item - 1
4371.4.25 by John Arbash Meinel
We had an 'child_node.gdfo is None' check in the inner loop.
326
            next_gdfo = node.gdfo + 1
4371.4.30 by John Arbash Meinel
We don't need self._tails anymore, nor does it need to be a set.
327
            for pos from 0 <= pos < PyList_GET_SIZE(node.children):
328
                child = _get_list_node(node.children, pos)
4371.4.25 by John Arbash Meinel
We had an 'child_node.gdfo is None' check in the inner loop.
329
                if next_gdfo > child.gdfo:
330
                    child.gdfo = next_gdfo
4371.4.27 by John Arbash Meinel
Re-use the child.seen attribute for tracking num parents.
331
                child.seen = child.seen + 1
332
                if child.seen == PyTuple_GET_SIZE(child.parents):
4371.4.17 by John Arbash Meinel
tried a different tack, to use a new list and append.
333
                    # This child is populated, queue it to be walked
4371.4.26 by John Arbash Meinel
Don't shrink the pending list.
334
                    last_item = last_item + 1
335
                    if last_item < PyList_GET_SIZE(pending):
4371.4.14 by John Arbash Meinel
Optimize _find_gdfo and make known_parent_gdfos a member of KnownGraphNode
336
                        Py_INCREF(child) # SetItem steals a ref
4371.4.26 by John Arbash Meinel
Don't shrink the pending list.
337
                        PyList_SetItem(pending, last_item, child)
4371.4.14 by John Arbash Meinel
Optimize _find_gdfo and make known_parent_gdfos a member of KnownGraphNode
338
                    else:
339
                        PyList_Append(pending, child)
4371.4.27 by John Arbash Meinel
Re-use the child.seen attribute for tracking num parents.
340
                    # We have queued this node, we don't need to track it
341
                    # anymore
342
                    child.seen = 0
4371.4.8 by Vincent Ladeuil
Replace the existing KnownGraph._find_gdfo.
343
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
344
    def add_node(self, key, parent_keys):
345
        """Add a new node to the graph.
346
347
        If this fills in a ghost, then the gdfos of all children will be
348
        updated accordingly.
349
        
350
        :param key: The node being added. If this is a duplicate, this is a
351
            no-op.
352
        :param parent_keys: The parents of the given node.
353
        :return: None (should we return if this was a ghost, etc?)
354
        """
355
        cdef PyObject *maybe_node
356
        cdef _KnownGraphNode node, parent_node, child_node
357
        cdef long parent_gdfo, next_gdfo
358
359
        maybe_node = PyDict_GetItem(self._nodes, key)
360
        if maybe_node != NULL:
361
            node = <_KnownGraphNode>maybe_node
362
            if node.parents is None:
363
                # We are filling in a ghost
364
                self._populate_parents(node, parent_keys)
365
                # We can't trust cached heads anymore
366
                self._known_heads.clear()
367
            else: # Ensure that the parent_key list matches
368
                existing_parent_keys = []
369
                for parent_node in node.parents:
370
                    existing_parent_keys.append(parent_node.key)
371
                # Make sure we use a list for the comparison, in case it was a
372
                # tuple, etc
373
                parent_keys = list(parent_keys)
374
                if existing_parent_keys == parent_keys:
375
                    # Exact match, nothing more to do
376
                    return
377
                else:
378
                    raise ValueError('Parent key mismatch, existing node %s'
379
                        ' has parents of %s not %s'
380
                        % (key, existing_parent_keys, parent_keys))
381
        else:
382
            node = _KnownGraphNode(key)
383
            PyDict_SetItem(self._nodes, key, node)
384
            self._populate_parents(node, parent_keys)
385
        parent_gdfo = 0
386
        for parent_node in node.parents:
387
            if parent_node.gdfo == -1:
388
                # This is a newly introduced ghost, so it gets gdfo of 1
389
                parent_node.gdfo = 1
390
            if parent_gdfo < parent_node.gdfo:
391
                parent_gdfo = parent_node.gdfo
392
        node.gdfo = parent_gdfo + 1
393
        # Now fill the gdfo to all children
394
        # Note that this loop is slightly inefficient, in that we may visit the
395
        # same child (and its decendents) more than once, however, it is
396
        # 'efficient' in that we only walk to nodes that would be updated,
397
        # rather than all nodes
398
        # We use a deque rather than a simple list stack, to go for BFD rather
399
        # than DFD. So that if a longer path is possible, we walk it before we
400
        # get to the final child
401
        pending = deque([node])
402
        pending_popleft = pending.popleft
403
        pending_append = pending.append
404
        while pending:
405
            node = pending_popleft()
406
            next_gdfo = node.gdfo + 1
407
            for child_node in node.children:
408
                if child_node.gdfo < next_gdfo:
409
                    # This child is being updated, we need to check its
410
                    # children
411
                    child_node.gdfo = next_gdfo
412
                    pending_append(child_node)
413
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
414
    def heads(self, keys):
415
        """Return the heads from amongst keys.
416
417
        This is done by searching the ancestries of each key.  Any key that is
418
        reachable from another key is not returned; all the others are.
419
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
420
        This operation scales with the relative depth between any two keys. It
421
        uses gdfo to avoid walking all ancestry.
422
423
        :param keys: An iterable of keys.
424
        :return: A set of the heads. Note that as a set there is no ordering
425
            information. Callers will need to filter their input to create
426
            order if they need it.
427
        """
428
        cdef PyObject *maybe_node
429
        cdef PyObject *maybe_heads
430
        cdef PyObject *temp_node
431
        cdef _KnownGraphNode node
4371.4.31 by John Arbash Meinel
Apply the same 'never pop' logic to the heads() code.
432
        cdef Py_ssize_t pos, last_item
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
433
        cdef long min_gdfo
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
434
4526.10.1 by John Arbash Meinel
Avoid using PyFrozenSet_New
435
        heads_key = frozenset(keys)
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
436
        maybe_heads = PyDict_GetItem(self._known_heads, heads_key)
437
        if maybe_heads != NULL:
438
            return <object>maybe_heads
439
        # Not cached, compute it ourselves
440
        candidate_nodes = {}
441
        for key in keys:
4371.4.28 by John Arbash Meinel
We spent a lot of time adding and removing keys from tails.
442
            maybe_node = PyDict_GetItem(self._nodes, key)
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
443
            if maybe_node == NULL:
444
                raise KeyError('key %s not in nodes' % (key,))
445
            PyDict_SetItem(candidate_nodes, key, <object>maybe_node)
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
446
        maybe_node = PyDict_GetItem(candidate_nodes, NULL_REVISION)
447
        if maybe_node != NULL:
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
448
            # NULL_REVISION is only a head if it is the only entry
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
449
            candidate_nodes.pop(NULL_REVISION)
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
450
            if not candidate_nodes:
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
451
                return frozenset([NULL_REVISION])
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
452
            # The keys changed, so recalculate heads_key
4526.10.1 by John Arbash Meinel
Avoid using PyFrozenSet_New
453
            heads_key = frozenset(candidate_nodes)
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
454
        if PyDict_Size(candidate_nodes) < 2:
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
455
            return heads_key
456
4371.4.22 by John Arbash Meinel
Changing from a set of seen to a list to cleanup and a seen attribute.
457
        cleanup = []
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
458
        pending = []
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
459
        # we know a gdfo cannot be longer than a linear chain of all nodes
460
        min_gdfo = PyDict_Size(self._nodes) + 1
461
        # Build up nodes that need to be walked, note that starting nodes are
462
        # not added to seen()
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
463
        pos = 0
464
        while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node):
465
            node = <_KnownGraphNode>temp_node
466
            if node.parents is not None:
467
                pending.extend(node.parents)
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
468
            if node.gdfo < min_gdfo:
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
469
                min_gdfo = node.gdfo
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
470
471
        # Now do all the real work
4371.4.31 by John Arbash Meinel
Apply the same 'never pop' logic to the heads() code.
472
        last_item = PyList_GET_SIZE(pending) - 1
473
        while last_item >= 0:
474
            node = _get_list_node(pending, last_item)
475
            last_item = last_item - 1
4371.4.22 by John Arbash Meinel
Changing from a set of seen to a list to cleanup and a seen attribute.
476
            if node.seen:
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
477
                # node already appears in some ancestry
478
                continue
4371.4.22 by John Arbash Meinel
Changing from a set of seen to a list to cleanup and a seen attribute.
479
            PyList_Append(cleanup, node)
480
            node.seen = 1
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
481
            if node.gdfo <= min_gdfo:
482
                continue
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
483
            if node.parents is not None and PyTuple_GET_SIZE(node.parents) > 0:
484
                for pos from 0 <= pos < PyTuple_GET_SIZE(node.parents):
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
485
                    parent_node = _get_tuple_node(node.parents, pos)
4371.4.31 by John Arbash Meinel
Apply the same 'never pop' logic to the heads() code.
486
                    last_item = last_item + 1
487
                    if last_item < PyList_GET_SIZE(pending):
488
                        Py_INCREF(parent_node) # SetItem steals a ref
489
                        PyList_SetItem(pending, last_item, parent_node)
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
490
                    else:
491
                        PyList_Append(pending, parent_node)
4371.4.22 by John Arbash Meinel
Changing from a set of seen to a list to cleanup and a seen attribute.
492
        heads = []
493
        pos = 0
494
        while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node):
495
            node = <_KnownGraphNode>temp_node
496
            if not node.seen:
497
                PyList_Append(heads, node.key)
4526.10.1 by John Arbash Meinel
Avoid using PyFrozenSet_New
498
        heads = frozenset(heads)
4371.4.22 by John Arbash Meinel
Changing from a set of seen to a list to cleanup and a seen attribute.
499
        for pos from 0 <= pos < PyList_GET_SIZE(cleanup):
500
            node = _get_list_node(cleanup, pos)
501
            node.seen = 0
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
502
        if self.do_cache:
4371.4.21 by John Arbash Meinel
Spend some time optimizing the inner heads() loop, approx 2x faster.
503
            PyDict_SetItem(self._known_heads, heads_key, heads)
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
504
        return heads
4577.3.2 by John Arbash Meinel
Implement KnownGraph.topo_sort.
505
506
    def topo_sort(self):
507
        """Return the nodes in topological order.
508
509
        All parents must occur before all children.
510
        """
511
        # This is, for the most part, the same iteration order that we used for
512
        # _find_gdfo, consider finding a way to remove the duplication
513
        # In general, we find the 'tails' (nodes with no parents), and then
514
        # walk to the children. For children that have all of their parents
515
        # yielded, we queue up the child to be yielded as well.
516
        cdef _KnownGraphNode node
517
        cdef _KnownGraphNode child
518
        cdef PyObject *temp
519
        cdef Py_ssize_t pos
520
        cdef int replace
521
        cdef Py_ssize_t last_item
522
523
        pending = self._find_tails()
4593.5.3 by John Arbash Meinel
Bring in the optimized tsort code.
524
        if PyList_GET_SIZE(pending) == 0 and len(self._nodes) > 0:
525
            raise errors.GraphCycleError(self._nodes)
4577.3.2 by John Arbash Meinel
Implement KnownGraph.topo_sort.
526
527
        topo_order = []
528
529
        last_item = PyList_GET_SIZE(pending) - 1
530
        while last_item >= 0:
531
            # Avoid pop followed by push, instead, peek, and replace
532
            # timing shows this is 930ms => 770ms for OOo
533
            node = _get_list_node(pending, last_item)
534
            last_item = last_item - 1
4593.5.3 by John Arbash Meinel
Bring in the optimized tsort code.
535
            if node.parents is not None:
536
                # We don't include ghost parents
537
                PyList_Append(topo_order, node.key)
4577.3.2 by John Arbash Meinel
Implement KnownGraph.topo_sort.
538
            for pos from 0 <= pos < PyList_GET_SIZE(node.children):
539
                child = _get_list_node(node.children, pos)
4593.5.3 by John Arbash Meinel
Bring in the optimized tsort code.
540
                if child.gdfo == -1:
541
                    # We know we have a graph cycle because a node has a parent
542
                    # which we couldn't find
543
                    raise errors.GraphCycleError(self._nodes)
4577.3.2 by John Arbash Meinel
Implement KnownGraph.topo_sort.
544
                child.seen = child.seen + 1
545
                if child.seen == PyTuple_GET_SIZE(child.parents):
546
                    # All parents of this child have been yielded, queue this
547
                    # one to be yielded as well
548
                    last_item = last_item + 1
549
                    if last_item < PyList_GET_SIZE(pending):
550
                        Py_INCREF(child) # SetItem steals a ref
551
                        PyList_SetItem(pending, last_item, child)
552
                    else:
553
                        PyList_Append(pending, child)
554
                    # We have queued this node, we don't need to track it
555
                    # anymore
556
                    child.seen = 0
557
        # We started from the parents, so we don't need to do anymore work
558
        return topo_order
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
559
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
560
    def gc_sort(self):
561
        """Return a reverse topological ordering which is 'stable'.
562
563
        There are a few constraints:
564
          1) Reverse topological (all children before all parents)
565
          2) Grouped by prefix
566
          3) 'stable' sorting, so that we get the same result, independent of
567
             machine, or extra data.
568
        To do this, we use the same basic algorithm as topo_sort, but when we
569
        aren't sure what node to access next, we sort them lexicographically.
570
        """
571
        cdef PyObject *temp
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
572
        cdef Py_ssize_t pos, last_item
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
573
        cdef _KnownGraphNode node, node2, parent_node
574
575
        tips = self._find_tips()
576
        # Split the tips based on prefix
577
        prefix_tips = {}
578
        for pos from 0 <= pos < PyList_GET_SIZE(tips):
579
            node = _get_list_node(tips, pos)
580
            if PyString_CheckExact(node.key) or len(node.key) == 1:
581
                prefix = ''
582
            else:
583
                prefix = node.key[0]
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
584
            temp = PyDict_GetItem(prefix_tips, prefix)
585
            if temp == NULL:
586
                prefix_tips[prefix] = [node]
587
            else:
588
                tip_nodes = <object>temp
589
                PyList_Append(tip_nodes, node)
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
590
591
        result = []
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
592
        for prefix in sorted(prefix_tips):
593
            temp = PyDict_GetItem(prefix_tips, prefix)
594
            assert temp != NULL
595
            tip_nodes = <object>temp
596
            pending = _sort_list_nodes(tip_nodes, 1)
597
            last_item = PyList_GET_SIZE(pending) - 1
598
            while last_item >= 0:
599
                node = _get_list_node(pending, last_item)
600
                last_item = last_item - 1
4641.5.6 by John Arbash Meinel
Add support for skipping ghost nodes.
601
                if node.parents is None:
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
602
                    # Ghost
4641.5.6 by John Arbash Meinel
Add support for skipping ghost nodes.
603
                    continue
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
604
                PyList_Append(result, node.key)
4641.5.11 by John Arbash Meinel
Using PyObject_RichCompareBool is a significant benefit. 8.8ms down to 7.7ms.
605
                # Sorting the parent keys isn't strictly necessary for stable
606
                # sorting of a given graph. But it does help minimize the
607
                # differences between graphs
608
                # For bzr.dev ancestry:
609
                #   4.73ms  no sort
610
                #   7.73ms  RichCompareBool sort
4641.5.10 by John Arbash Meinel
Go back to sorted parents mode.
611
                parents = _sort_list_nodes(node.parents, 1)
612
                for pos from 0 <= pos < len(parents):
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
613
                    if PyTuple_CheckExact(parents):
614
                        parent_node = _get_tuple_node(parents, pos)
615
                    else:
616
                        parent_node = _get_list_node(parents, pos)
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
617
                    # TODO: GraphCycle detection
618
                    parent_node.seen = parent_node.seen + 1
619
                    if (parent_node.seen
620
                        == PyList_GET_SIZE(parent_node.children)):
621
                        # All children have been processed, queue up this
622
                        # parent
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
623
                        last_item = last_item + 1
624
                        if last_item < PyList_GET_SIZE(pending):
625
                            Py_INCREF(parent_node) # SetItem steals a ref
626
                            PyList_SetItem(pending, last_item, parent_node)
627
                        else:
628
                            PyList_Append(pending, parent_node)
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
629
                        parent_node.seen = 0
630
        return result
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
631
632
    def merge_sort(self, tip_key):
633
        """Compute the merge sorted graph output."""
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
634
        cdef _MergeSorter sorter
635
4593.5.29 by John Arbash Meinel
Remove the gc enable/disable code.
636
        # TODO: consider disabling gc since we are allocating a lot of nodes
637
        #       that won't be collectable anyway. real world testing has not
638
        #       shown a specific impact, yet.
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
639
        sorter = _MergeSorter(self, tip_key)
640
        return sorter.topo_order()
4648.2.1 by Gary van der Merwe
Add get_parent_keys, and get_child_keys to KnownGraph.
641
    
642
    def get_parent_keys(self, key):
643
        """Get the parents for a key
644
        
645
        Returns a list containg the parents keys. If the key is a ghost,
646
        None is returned. A KeyError will be raised if the key is not in
647
        the graph.
648
        
649
        :param keys: Key to check (eg revision_id)
650
        :return: A list of parents
651
        """
652
        return self._nodes[key].parent_keys 
653
654
    def get_child_keys(self, key):
655
        """Get the children for a key
656
        
657
        Returns a list containg the children keys. A KeyError will be raised
658
        if the key is not in the graph.
659
        
660
        :param keys: Key to check (eg revision_id)
661
        :return: A list of children
662
        """
663
        return self._nodes[key].child_keys    
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
664
665
666
cdef class _MergeSortNode:
667
    """Tracks information about a node during the merge_sort operation."""
668
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
669
    # Public api
670
    cdef public object key
671
    cdef public long merge_depth
672
    cdef public object end_of_merge # True/False Is this the end of the current merge
673
674
    # Private api, used while computing the information
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
675
    cdef _KnownGraphNode left_parent
676
    cdef _KnownGraphNode left_pending_parent
677
    cdef object pending_parents # list of _KnownGraphNode for non-left parents
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
678
    cdef long _revno_first
679
    cdef long _revno_second
680
    cdef long _revno_last
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
681
    # TODO: turn these into flag/bit fields rather than individual members
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
682
    cdef int is_first_child # Is this the first child?
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
683
    cdef int seen_by_child # A child node has seen this parent
684
    cdef int completed # Fully Processed
685
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
686
    def __init__(self, key):
687
        self.key = key
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
688
        self.merge_depth = -1
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
689
        self.left_parent = None
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
690
        self.left_pending_parent = None
691
        self.pending_parents = None
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
692
        self._revno_first = -1
693
        self._revno_second = -1
694
        self._revno_last = -1
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
695
        self.is_first_child = 0
696
        self.seen_by_child = 0
697
        self.completed = 0
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
698
699
    def __repr__(self):
4634.11.1 by John Arbash Meinel
Fix bug #419241. If a graph had a mainline ghost
700
        return '%s(%s depth:%s rev:%s,%s,%s first:%s seen:%s)' % (
701
            self.__class__.__name__, self.key,
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
702
            self.merge_depth,
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
703
            self._revno_first, self._revno_second, self._revno_last,
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
704
            self.is_first_child, self.seen_by_child)
705
4634.117.10 by John Arbash Meinel
Change 'no except' to 'cannot_raise'
706
    cdef int has_pending_parents(self): # cannot_raise
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
707
        if self.left_pending_parent is not None or self.pending_parents:
708
            return 1
709
        return 0
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
710
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
711
    cdef object _revno(self):
712
        if self._revno_first == -1:
713
            if self._revno_second != -1:
714
                raise RuntimeError('Something wrong with: %s' % (self,))
715
            return (self._revno_last,)
716
        else:
717
            return (self._revno_first, self._revno_second, self._revno_last)
718
719
    property revno:
720
        def __get__(self):
721
            return self._revno()
722
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
723
724
cdef class _MergeSorter:
725
    """This class does the work of computing the merge_sort ordering.
726
727
    We have some small advantages, in that we get all the extra information
728
    that KnownGraph knows, like knowing the child lists, etc.
729
    """
730
4593.5.11 by John Arbash Meinel
Without doing any real tuning yet, we see a decent speedup for merge_sort:
731
    # Current performance numbers for merge_sort(bzr_dev_parent_map):
4593.5.25 by John Arbash Meinel
Add tests that we detect GraphCycleError
732
    #  302ms tsort.merge_sort()
733
    #   91ms graph.KnownGraph().merge_sort()
4593.5.28 by John Arbash Meinel
Cleanup pass
734
    #   40ms kg.merge_sort()
4593.5.11 by John Arbash Meinel
Without doing any real tuning yet, we see a decent speedup for merge_sort:
735
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
736
    cdef KnownGraph graph
4593.5.15 by John Arbash Meinel
Use the same implementation of stack handling to avoid append/pop.
737
    cdef object _depth_first_stack  # list
738
    cdef Py_ssize_t _last_stack_item # offset to last item on stack
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
739
    # cdef object _ms_nodes # dict of key => _MergeSortNode
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
740
    cdef object _revno_to_branch_count # {revno => num child branches}
741
    cdef object _scheduled_nodes # List of nodes ready to be yielded
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
742
743
    def __init__(self, known_graph, tip_key):
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
744
        cdef _KnownGraphNode node
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
745
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
746
        self.graph = known_graph
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
747
        # self._ms_nodes = {}
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
748
        self._revno_to_branch_count = {}
4593.5.15 by John Arbash Meinel
Use the same implementation of stack handling to avoid append/pop.
749
        self._depth_first_stack = []
750
        self._last_stack_item = -1
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
751
        self._scheduled_nodes = []
4593.5.42 by John Arbash Meinel
Turns out that some code assumed passing NULL_REVISION to merge_sort
752
        if (tip_key is not None and tip_key != NULL_REVISION
753
            and tip_key != (NULL_REVISION,)):
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
754
            node = self.graph._nodes[tip_key]
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
755
            self._push_node(node, 0)
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
756
4634.117.8 by John Arbash Meinel
Use a class regex to find other signatures to exclude.
757
    cdef _MergeSortNode _get_ms_node(self, _KnownGraphNode node):
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
758
        cdef PyObject *temp_node
759
        cdef _MergeSortNode ms_node
760
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
761
        if node.extra is None:
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
762
            ms_node = _MergeSortNode(node.key)
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
763
            node.extra = ms_node
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
764
        else:
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
765
            ms_node = <_MergeSortNode>node.extra
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
766
        return ms_node
767
4593.5.24 by John Arbash Meinel
Switch some variables from Py_ssize_t to long
768
    cdef _push_node(self, _KnownGraphNode node, long merge_depth):
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
769
        cdef _KnownGraphNode parent_node
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
770
        cdef _MergeSortNode ms_node, ms_parent_node
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
771
        cdef Py_ssize_t pos
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
772
4593.5.27 by John Arbash Meinel
bit of cleanup passes
773
        ms_node = self._get_ms_node(node)
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
774
        ms_node.merge_depth = merge_depth
4634.11.1 by John Arbash Meinel
Fix bug #419241. If a graph had a mainline ghost
775
        if node.parents is None:
776
            raise RuntimeError('ghost nodes should not be pushed'
777
                               ' onto the stack: %s' % (node,))
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
778
        if PyTuple_GET_SIZE(node.parents) > 0:
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
779
            parent_node = _get_tuple_node(node.parents, 0)
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
780
            ms_node.left_parent = parent_node
4634.11.1 by John Arbash Meinel
Fix bug #419241. If a graph had a mainline ghost
781
            if parent_node.parents is None: # left-hand ghost
782
                ms_node.left_pending_parent = None
783
                ms_node.left_parent = None
784
            else:
785
                ms_node.left_pending_parent = parent_node
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
786
        if PyTuple_GET_SIZE(node.parents) > 1:
4593.5.20 by John Arbash Meinel
Expose KnownGraph off of VersionedFiles
787
            ms_node.pending_parents = []
788
            for pos from 1 <= pos < PyTuple_GET_SIZE(node.parents):
4641.5.7 by John Arbash Meinel
A few tweaks to the internals, and we are down to:
789
                parent_node = _get_tuple_node(node.parents, pos)
4593.5.20 by John Arbash Meinel
Expose KnownGraph off of VersionedFiles
790
                if parent_node.parents is None: # ghost
791
                    continue
792
                PyList_Append(ms_node.pending_parents, parent_node)
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
793
4593.5.7 by John Arbash Meinel
start working on a pyrex implementation of MergeSorter
794
        ms_node.is_first_child = 1
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
795
        if ms_node.left_parent is not None:
4593.5.27 by John Arbash Meinel
bit of cleanup passes
796
            ms_parent_node = self._get_ms_node(ms_node.left_parent)
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
797
            if ms_parent_node.seen_by_child:
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
798
                ms_node.is_first_child = 0
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
799
            ms_parent_node.seen_by_child = 1
4593.5.15 by John Arbash Meinel
Use the same implementation of stack handling to avoid append/pop.
800
        self._last_stack_item = self._last_stack_item + 1
801
        if self._last_stack_item < PyList_GET_SIZE(self._depth_first_stack):
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
802
            Py_INCREF(node) # SetItem steals a ref
4593.5.15 by John Arbash Meinel
Use the same implementation of stack handling to avoid append/pop.
803
            PyList_SetItem(self._depth_first_stack, self._last_stack_item,
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
804
                           node)
4593.5.15 by John Arbash Meinel
Use the same implementation of stack handling to avoid append/pop.
805
        else:
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
806
            PyList_Append(self._depth_first_stack, node)
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
807
808
    cdef _pop_node(self):
4593.5.16 by John Arbash Meinel
Change the revno handling code.
809
        cdef PyObject *temp
4593.5.23 by John Arbash Meinel
Move the end-of-merge computation into _pop_node
810
        cdef _MergeSortNode ms_node, ms_parent_node, ms_prev_node
811
        cdef _KnownGraphNode node, parent_node, prev_node
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
812
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
813
        node = _get_list_node(self._depth_first_stack, self._last_stack_item)
814
        ms_node = <_MergeSortNode>node.extra
4593.5.15 by John Arbash Meinel
Use the same implementation of stack handling to avoid append/pop.
815
        self._last_stack_item = self._last_stack_item - 1
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
816
        if ms_node.left_parent is not None:
4593.5.27 by John Arbash Meinel
bit of cleanup passes
817
            # Assign the revision number from the left-hand parent
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
818
            ms_parent_node = <_MergeSortNode>ms_node.left_parent.extra
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
819
            if ms_node.is_first_child:
820
                # First child just increments the final digit
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
821
                ms_node._revno_first = ms_parent_node._revno_first
822
                ms_node._revno_second = ms_parent_node._revno_second
823
                ms_node._revno_last = ms_parent_node._revno_last + 1
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
824
            else:
4593.5.10 by John Arbash Meinel
finish getting the revno numbers working correctly.
825
                # Not the first child, make a new branch
4593.5.27 by John Arbash Meinel
bit of cleanup passes
826
                #  (mainline_revno, branch_count, 1)
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
827
                if ms_parent_node._revno_first == -1:
4593.5.16 by John Arbash Meinel
Change the revno handling code.
828
                    # Mainline ancestor, the increment is on the last digit
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
829
                    base_revno = ms_parent_node._revno_last
4593.5.16 by John Arbash Meinel
Change the revno handling code.
830
                else:
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
831
                    base_revno = ms_parent_node._revno_first
4593.5.16 by John Arbash Meinel
Change the revno handling code.
832
                temp = PyDict_GetItem(self._revno_to_branch_count,
4593.5.17 by John Arbash Meinel
No need to pop one item off the stack after another.
833
                                      base_revno)
4593.5.16 by John Arbash Meinel
Change the revno handling code.
834
                if temp == NULL:
835
                    branch_count = 1
836
                else:
837
                    branch_count = (<object>temp) + 1
838
                PyDict_SetItem(self._revno_to_branch_count, base_revno,
839
                               branch_count)
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
840
                ms_node._revno_first = base_revno
841
                ms_node._revno_second = branch_count
842
                ms_node._revno_last = 1
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
843
        else:
4593.5.27 by John Arbash Meinel
bit of cleanup passes
844
            temp = PyDict_GetItem(self._revno_to_branch_count, 0)
845
            if temp == NULL:
4593.5.16 by John Arbash Meinel
Change the revno handling code.
846
                # The first root node doesn't have a 3-digit revno
4593.5.27 by John Arbash Meinel
bit of cleanup passes
847
                root_count = 0
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
848
                ms_node._revno_first = -1
849
                ms_node._revno_second = -1
850
                ms_node._revno_last = 1
4593.5.27 by John Arbash Meinel
bit of cleanup passes
851
            else:
852
                root_count = (<object>temp) + 1
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
853
                ms_node._revno_first = 0
854
                ms_node._revno_second = root_count
855
                ms_node._revno_last = 1
4593.5.27 by John Arbash Meinel
bit of cleanup passes
856
            PyDict_SetItem(self._revno_to_branch_count, 0, root_count)
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
857
        ms_node.completed = 1
4593.5.23 by John Arbash Meinel
Move the end-of-merge computation into _pop_node
858
        if PyList_GET_SIZE(self._scheduled_nodes) == 0:
4593.5.27 by John Arbash Meinel
bit of cleanup passes
859
            # The first scheduled node is always the end of merge
4593.5.23 by John Arbash Meinel
Move the end-of-merge computation into _pop_node
860
            ms_node.end_of_merge = True
861
        else:
862
            prev_node = _get_list_node(self._scheduled_nodes,
863
                                    PyList_GET_SIZE(self._scheduled_nodes) - 1)
864
            ms_prev_node = <_MergeSortNode>prev_node.extra
865
            if ms_prev_node.merge_depth < ms_node.merge_depth:
866
                # The previously pushed node is to our left, so this is the end
867
                # of this right-hand chain
868
                ms_node.end_of_merge = True
869
            elif (ms_prev_node.merge_depth == ms_node.merge_depth
870
                  and prev_node not in node.parents):
871
                # The next node is not a direct parent of this node
872
                ms_node.end_of_merge = True
873
            else:
874
                ms_node.end_of_merge = False
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
875
        PyList_Append(self._scheduled_nodes, node)
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
876
877
    cdef _schedule_stack(self):
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
878
        cdef _KnownGraphNode last_node, next_node
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
879
        cdef _MergeSortNode ms_node, ms_last_node, ms_next_node
4593.5.24 by John Arbash Meinel
Switch some variables from Py_ssize_t to long
880
        cdef long next_merge_depth
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
881
        ordered = []
4593.5.15 by John Arbash Meinel
Use the same implementation of stack handling to avoid append/pop.
882
        while self._last_stack_item >= 0:
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
883
            # Peek at the last item on the stack
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
884
            last_node = _get_list_node(self._depth_first_stack,
885
                                       self._last_stack_item)
4593.5.25 by John Arbash Meinel
Add tests that we detect GraphCycleError
886
            if last_node.gdfo == -1:
4593.5.28 by John Arbash Meinel
Cleanup pass
887
                # if _find_gdfo skipped a node, that means there is a graph
888
                # cycle, error out now
4593.5.25 by John Arbash Meinel
Add tests that we detect GraphCycleError
889
                raise errors.GraphCycleError(self.graph._nodes)
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
890
            ms_last_node = <_MergeSortNode>last_node.extra
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
891
            if not ms_last_node.has_pending_parents():
892
                # Processed all parents, pop this node
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
893
                self._pop_node()
894
                continue
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
895
            while ms_last_node.has_pending_parents():
896
                if ms_last_node.left_pending_parent is not None:
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
897
                    # recurse depth first into the primary parent
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
898
                    next_node = ms_last_node.left_pending_parent
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
899
                    ms_last_node.left_pending_parent = None
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
900
                else:
901
                    # place any merges in right-to-left order for scheduling
902
                    # which gives us left-to-right order after we reverse
4593.5.28 by John Arbash Meinel
Cleanup pass
903
                    # the scheduled queue.
904
                    # Note: This has the effect of allocating common-new
905
                    #       revisions to the right-most subtree rather than the
906
                    #       left most, which will display nicely (you get
907
                    #       smaller trees at the top of the combined merge).
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
908
                    next_node = ms_last_node.pending_parents.pop()
4593.5.27 by John Arbash Meinel
bit of cleanup passes
909
                ms_next_node = self._get_ms_node(next_node)
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
910
                if ms_next_node.completed:
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
911
                    # this parent was completed by a child on the
912
                    # call stack. skip it.
913
                    continue
914
                # otherwise transfer it from the source graph into the
915
                # top of the current depth first search stack.
916
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
917
                if next_node is ms_last_node.left_parent:
4593.5.24 by John Arbash Meinel
Switch some variables from Py_ssize_t to long
918
                    next_merge_depth = ms_last_node.merge_depth
4593.5.12 by John Arbash Meinel
Move some sets/dicts into attributes.
919
                else:
4593.5.24 by John Arbash Meinel
Switch some variables from Py_ssize_t to long
920
                    next_merge_depth = ms_last_node.merge_depth + 1
4593.5.19 by John Arbash Meinel
Switch from having a MergeSortNode => KnownGraphNode to
921
                self._push_node(next_node, next_merge_depth)
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
922
                # and do not continue processing parents until this 'call'
923
                # has recursed.
924
                break
925
926
    cdef topo_order(self):
4593.5.23 by John Arbash Meinel
Move the end-of-merge computation into _pop_node
927
        cdef _MergeSortNode ms_node
928
        cdef _KnownGraphNode node
4593.5.17 by John Arbash Meinel
No need to pop one item off the stack after another.
929
        cdef Py_ssize_t pos
4593.5.28 by John Arbash Meinel
Cleanup pass
930
        cdef PyObject *temp_key, *temp_node
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
931
4593.5.28 by John Arbash Meinel
Cleanup pass
932
        # Note: allocating a _MergeSortNode and deallocating it for all nodes
933
        #       costs approx 8.52ms (21%) of the total runtime
934
        #       We might consider moving the attributes into the base
935
        #       KnownGraph object.
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
936
        self._schedule_stack()
937
938
        # We've set up the basic schedule, now we can continue processing the
939
        # output.
4593.5.29 by John Arbash Meinel
Remove the gc enable/disable code.
940
        # Note: This final loop costs us 40.0ms => 28.8ms (11ms, 25%) on
4593.5.24 by John Arbash Meinel
Switch some variables from Py_ssize_t to long
941
        #       bzr.dev, to convert the internal Object representation into a
942
        #       Tuple representation...
943
        #       2ms is walking the data and computing revno tuples
944
        #       7ms is computing the return tuple
945
        #       4ms is PyList_Append()
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
946
        ordered = []
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
947
        # output the result in reverse order, and separate the generated info
4593.5.23 by John Arbash Meinel
Move the end-of-merge computation into _pop_node
948
        for pos from PyList_GET_SIZE(self._scheduled_nodes) > pos >= 0:
949
            node = _get_list_node(self._scheduled_nodes, pos)
950
            ms_node = <_MergeSortNode>node.extra
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
951
            PyList_Append(ordered, ms_node)
4593.5.23 by John Arbash Meinel
Move the end-of-merge computation into _pop_node
952
            node.extra = None
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
953
        # Clear out the scheduled nodes now that we're done
4593.5.17 by John Arbash Meinel
No need to pop one item off the stack after another.
954
        self._scheduled_nodes = []
4593.5.8 by John Arbash Meinel
Approximate implementation in Pyrex.
955
        return ordered