1
# Copyright (C) 2009 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Implementation of Graph algorithms when we have already loaded everything.
20
cdef extern from "python-compat.h":
23
cdef extern from "Python.h":
24
ctypedef int Py_ssize_t
25
ctypedef struct PyObject:
28
object PyTuple_New(Py_ssize_t n)
29
Py_ssize_t PyTuple_GET_SIZE(object t)
30
PyObject * PyTuple_GET_ITEM(object t, Py_ssize_t o)
31
void PyTuple_SET_ITEM(object t, Py_ssize_t o, object v)
33
Py_ssize_t PyList_GET_SIZE(object l)
34
PyObject * PyList_GET_ITEM(object l, Py_ssize_t o)
35
int PyList_SetItem(object l, Py_ssize_t o, object l) except -1
36
int PyList_Append(object l, object v) except -1
38
int PyDict_CheckExact(object d)
39
Py_ssize_t PyDict_Size(object d) except -1
40
PyObject * PyDict_GetItem(object d, object k)
41
int PyDict_SetItem(object d, object k, object v) except -1
42
int PyDict_DelItem(object d, object k) except -1
43
int PyDict_Next(object d, Py_ssize_t *pos, PyObject **k, PyObject **v)
45
void Py_INCREF(object)
48
from bzrlib import revision
50
cdef object NULL_REVISION
51
NULL_REVISION = revision.NULL_REVISION
54
cdef class _KnownGraphNode:
55
"""Represents a single object in the known graph."""
63
def __init__(self, key):
70
# Greatest distance from origin
76
cdef _KnownGraphNode child
79
for child in self.children:
80
PyList_Append(keys, child.key)
83
cdef clear_references(self):
88
cdef _KnownGraphNode node
91
if self.parents is not None:
92
for node in self.parents:
93
parent_keys.append(node.key)
95
if self.children is not None:
96
for node in self.children:
97
child_keys.append(node.key)
98
return '%s(%s gdfo:%s par:%s child:%s)' % (
99
self.__class__.__name__, self.key, self.gdfo,
100
parent_keys, child_keys)
103
cdef _KnownGraphNode _get_list_node(lst, Py_ssize_t pos):
104
cdef PyObject *temp_node
106
temp_node = PyList_GET_ITEM(lst, pos)
107
return <_KnownGraphNode>temp_node
110
cdef _KnownGraphNode _get_parent(parents, Py_ssize_t pos):
111
cdef PyObject *temp_node
112
cdef _KnownGraphNode node
114
temp_node = PyTuple_GET_ITEM(parents, pos)
115
return <_KnownGraphNode>temp_node
118
# TODO: slab allocate all _KnownGraphNode objects.
119
# We already know how many we are going to need, except for a couple of
120
# ghosts that could be allocated on demand.
122
cdef class KnownGraph:
123
"""This is a class which assumes we already know the full graph."""
125
cdef public object _nodes
126
cdef object _known_heads
127
cdef public int do_cache
129
def __init__(self, parent_map, do_cache=True):
130
"""Create a new KnownGraph instance.
132
:param parent_map: A dictionary mapping key => parent_keys
134
# tests at pre-allocating the node dict actually slowed things down
136
# Maps {sorted(revision_id, revision_id): heads}
137
self._known_heads = {}
138
self.do_cache = int(do_cache)
139
self._initialize_nodes(parent_map)
142
def __dealloc__(self):
143
cdef _KnownGraphNode child
145
cdef PyObject *temp_node
147
while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
148
child = <_KnownGraphNode>temp_node
149
child.clear_references()
151
cdef _KnownGraphNode _get_or_create_node(self, key):
152
cdef PyObject *temp_node
153
cdef _KnownGraphNode node
155
temp_node = PyDict_GetItem(self._nodes, key)
156
if temp_node == NULL:
157
node = _KnownGraphNode(key)
158
PyDict_SetItem(self._nodes, key, node)
160
node = <_KnownGraphNode>temp_node
163
def _initialize_nodes(self, parent_map):
164
"""Populate self._nodes.
166
After this has finished:
167
- self._nodes will have an entry for every entry in parent_map.
168
- ghosts will have a parent_keys = None,
169
- all nodes found will also have child_keys populated with all known
172
cdef PyObject *temp_key, *temp_parent_keys, *temp_node
173
cdef Py_ssize_t pos, pos2, num_parent_keys
174
cdef _KnownGraphNode node
175
cdef _KnownGraphNode parent_node
177
if not PyDict_CheckExact(parent_map):
178
raise TypeError('parent_map should be a dict of {key:parent_keys}')
179
# for key, parent_keys in parent_map.iteritems():
181
while PyDict_Next(parent_map, &pos, &temp_key, &temp_parent_keys):
182
key = <object>temp_key
183
parent_keys = <object>temp_parent_keys
184
num_parent_keys = len(parent_keys)
185
node = self._get_or_create_node(key)
186
# We know how many parents, so we could pre allocate an exact sized
188
parent_nodes = PyTuple_New(num_parent_keys)
189
# We use iter here, because parent_keys maybe be a list or tuple
190
for pos2 from 0 <= pos2 < num_parent_keys:
191
parent_node = self._get_or_create_node(parent_keys[pos2])
192
# PyTuple_SET_ITEM will steal a reference, so INCREF first
193
Py_INCREF(parent_node)
194
PyTuple_SET_ITEM(parent_nodes, pos2, parent_node)
195
PyList_Append(parent_node.children, node)
196
node.parents = parent_nodes
198
def _find_tails(self):
199
cdef PyObject *temp_node
200
cdef _KnownGraphNode node
205
while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
206
node = <_KnownGraphNode>temp_node
207
if node.parents is None or PyTuple_GET_SIZE(node.parents) == 0:
209
PyList_Append(tails, node)
212
def _find_gdfo(self):
213
cdef _KnownGraphNode node
214
cdef _KnownGraphNode child
218
cdef Py_ssize_t last_item
221
pending = self._find_tails()
223
last_item = PyList_GET_SIZE(pending) - 1
224
while last_item >= 0:
225
# Avoid pop followed by push, instead, peek, and replace
226
# timing shows this is 930ms => 770ms for OOo
227
node = _get_list_node(pending, last_item)
228
last_item = last_item - 1
229
next_gdfo = node.gdfo + 1
230
for pos from 0 <= pos < PyList_GET_SIZE(node.children):
231
child = _get_list_node(node.children, pos)
232
if next_gdfo > child.gdfo:
233
child.gdfo = next_gdfo
234
child.seen = child.seen + 1
235
if child.seen == PyTuple_GET_SIZE(child.parents):
236
# This child is populated, queue it to be walked
237
last_item = last_item + 1
238
if last_item < PyList_GET_SIZE(pending):
239
Py_INCREF(child) # SetItem steals a ref
240
PyList_SetItem(pending, last_item, child)
242
PyList_Append(pending, child)
243
# We have queued this node, we don't need to track it
247
def heads(self, keys):
248
"""Return the heads from amongst keys.
250
This is done by searching the ancestries of each key. Any key that is
251
reachable from another key is not returned; all the others are.
253
This operation scales with the relative depth between any two keys. It
254
uses gdfo to avoid walking all ancestry.
256
:param keys: An iterable of keys.
257
:return: A set of the heads. Note that as a set there is no ordering
258
information. Callers will need to filter their input to create
259
order if they need it.
261
cdef PyObject *maybe_node
262
cdef PyObject *maybe_heads
263
cdef PyObject *temp_node
264
cdef _KnownGraphNode node
265
cdef Py_ssize_t pos, last_item
268
heads_key = frozenset(keys)
269
maybe_heads = PyDict_GetItem(self._known_heads, heads_key)
270
if maybe_heads != NULL:
271
return <object>maybe_heads
272
# Not cached, compute it ourselves
275
maybe_node = PyDict_GetItem(self._nodes, key)
276
if maybe_node == NULL:
277
raise KeyError('key %s not in nodes' % (key,))
278
PyDict_SetItem(candidate_nodes, key, <object>maybe_node)
279
maybe_node = PyDict_GetItem(candidate_nodes, NULL_REVISION)
280
if maybe_node != NULL:
281
# NULL_REVISION is only a head if it is the only entry
282
candidate_nodes.pop(NULL_REVISION)
283
if not candidate_nodes:
284
return frozenset([NULL_REVISION])
285
# The keys changed, so recalculate heads_key
286
heads_key = frozenset(candidate_nodes)
287
if PyDict_Size(candidate_nodes) < 2:
292
# we know a gdfo cannot be longer than a linear chain of all nodes
293
min_gdfo = PyDict_Size(self._nodes) + 1
294
# Build up nodes that need to be walked, note that starting nodes are
295
# not added to seen()
297
while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node):
298
node = <_KnownGraphNode>temp_node
299
if node.parents is not None:
300
pending.extend(node.parents)
301
if node.gdfo < min_gdfo:
304
# Now do all the real work
305
last_item = PyList_GET_SIZE(pending) - 1
306
while last_item >= 0:
307
node = _get_list_node(pending, last_item)
308
last_item = last_item - 1
310
# node already appears in some ancestry
312
PyList_Append(cleanup, node)
314
if node.gdfo <= min_gdfo:
316
if node.parents is not None and PyTuple_GET_SIZE(node.parents) > 0:
317
for pos from 0 <= pos < PyTuple_GET_SIZE(node.parents):
318
parent_node = _get_parent(node.parents, pos)
319
last_item = last_item + 1
320
if last_item < PyList_GET_SIZE(pending):
321
Py_INCREF(parent_node) # SetItem steals a ref
322
PyList_SetItem(pending, last_item, parent_node)
324
PyList_Append(pending, parent_node)
327
while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node):
328
node = <_KnownGraphNode>temp_node
330
PyList_Append(heads, node.key)
331
heads = frozenset(heads)
332
for pos from 0 <= pos < PyList_GET_SIZE(cleanup):
333
node = _get_list_node(cleanup, pos)
336
PyDict_SetItem(self._known_heads, heads_key, heads)