1
# Copyright (C) 2009 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Implementation of Graph algorithms when we have already loaded everything.
20
cdef extern from "python-compat.h":
23
cdef extern from "Python.h":
24
ctypedef int Py_ssize_t
25
ctypedef struct PyObject:
28
object PyFrozenSet_New(object)
30
object PyTuple_New(Py_ssize_t n)
31
Py_ssize_t PyTuple_GET_SIZE(object t)
32
PyObject * PyTuple_GET_ITEM(object t, Py_ssize_t o)
33
void PyTuple_SET_ITEM(object t, Py_ssize_t o, object v)
35
Py_ssize_t PyList_GET_SIZE(object l)
36
PyObject * PyList_GET_ITEM(object l, Py_ssize_t o)
37
int PyList_SetItem(object l, Py_ssize_t o, object l) except -1
38
int PyList_Append(object l, object v) except -1
40
int PyDict_CheckExact(object d)
41
Py_ssize_t PyDict_Size(object d) except -1
42
PyObject * PyDict_GetItem(object d, object k)
43
int PyDict_SetItem(object d, object k, object v) except -1
44
int PyDict_DelItem(object d, object k) except -1
45
int PyDict_Next(object d, Py_ssize_t *pos, PyObject **k, PyObject **v)
47
void Py_INCREF(object)
50
from bzrlib import revision
52
cdef object NULL_REVISION
53
NULL_REVISION = revision.NULL_REVISION
56
cdef class _KnownGraphNode:
57
"""Represents a single object in the known graph."""
65
def __init__(self, key):
72
# Greatest distance from origin
78
cdef _KnownGraphNode child
81
for child in self.children:
82
PyList_Append(keys, child.key)
85
cdef clear_references(self):
90
cdef _KnownGraphNode node
93
if self.parents is not None:
94
for node in self.parents:
95
parent_keys.append(node.key)
97
if self.children is not None:
98
for node in self.children:
99
child_keys.append(node.key)
100
return '%s(%s gdfo:%s par:%s child:%s)' % (
101
self.__class__.__name__, self.key, self.gdfo,
102
parent_keys, child_keys)
105
cdef _KnownGraphNode _get_list_node(lst, Py_ssize_t pos):
106
cdef PyObject *temp_node
108
temp_node = PyList_GET_ITEM(lst, pos)
109
return <_KnownGraphNode>temp_node
112
cdef _KnownGraphNode _get_parent(parents, Py_ssize_t pos):
113
cdef PyObject *temp_node
114
cdef _KnownGraphNode node
116
temp_node = PyTuple_GET_ITEM(parents, pos)
117
return <_KnownGraphNode>temp_node
120
# TODO: slab allocate all _KnownGraphNode objects.
121
# We already know how many we are going to need, except for a couple of
122
# ghosts that could be allocated on demand.
124
cdef class KnownGraph:
125
"""This is a class which assumes we already know the full graph."""
127
cdef public object _nodes
128
cdef object _known_heads
129
cdef public int do_cache
131
def __init__(self, parent_map, do_cache=True):
132
"""Create a new KnownGraph instance.
134
:param parent_map: A dictionary mapping key => parent_keys
136
# tests at pre-allocating the node dict actually slowed things down
138
# Maps {sorted(revision_id, revision_id): heads}
139
self._known_heads = {}
140
self.do_cache = int(do_cache)
141
self._initialize_nodes(parent_map)
144
def __dealloc__(self):
145
cdef _KnownGraphNode child
147
cdef PyObject *temp_node
149
while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
150
child = <_KnownGraphNode>temp_node
151
child.clear_references()
153
cdef _KnownGraphNode _get_or_create_node(self, key):
154
cdef PyObject *temp_node
155
cdef _KnownGraphNode node
157
temp_node = PyDict_GetItem(self._nodes, key)
158
if temp_node == NULL:
159
node = _KnownGraphNode(key)
160
PyDict_SetItem(self._nodes, key, node)
162
node = <_KnownGraphNode>temp_node
165
def _initialize_nodes(self, parent_map):
166
"""Populate self._nodes.
168
After this has finished:
169
- self._nodes will have an entry for every entry in parent_map.
170
- ghosts will have a parent_keys = None,
171
- all nodes found will also have child_keys populated with all known
174
cdef PyObject *temp_key, *temp_parent_keys, *temp_node
175
cdef Py_ssize_t pos, pos2, num_parent_keys
176
cdef _KnownGraphNode node
177
cdef _KnownGraphNode parent_node
179
if not PyDict_CheckExact(parent_map):
180
raise TypeError('parent_map should be a dict of {key:parent_keys}')
181
# for key, parent_keys in parent_map.iteritems():
183
while PyDict_Next(parent_map, &pos, &temp_key, &temp_parent_keys):
184
key = <object>temp_key
185
parent_keys = <object>temp_parent_keys
186
num_parent_keys = len(parent_keys)
187
node = self._get_or_create_node(key)
188
# We know how many parents, so we could pre allocate an exact sized
190
parent_nodes = PyTuple_New(num_parent_keys)
191
# We use iter here, because parent_keys maybe be a list or tuple
192
for pos2 from 0 <= pos2 < num_parent_keys:
193
parent_node = self._get_or_create_node(parent_keys[pos2])
194
# PyTuple_SET_ITEM will steal a reference, so INCREF first
195
Py_INCREF(parent_node)
196
PyTuple_SET_ITEM(parent_nodes, pos2, parent_node)
197
PyList_Append(parent_node.children, node)
198
node.parents = parent_nodes
200
def _find_tails(self):
201
cdef PyObject *temp_node
202
cdef _KnownGraphNode node
207
while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
208
node = <_KnownGraphNode>temp_node
209
if node.parents is None or PyTuple_GET_SIZE(node.parents) == 0:
211
PyList_Append(tails, node)
214
def _find_gdfo(self):
215
cdef _KnownGraphNode node
216
cdef _KnownGraphNode child
220
cdef Py_ssize_t last_item
223
pending = self._find_tails()
225
last_item = PyList_GET_SIZE(pending) - 1
226
while last_item >= 0:
227
# Avoid pop followed by push, instead, peek, and replace
228
# timing shows this is 930ms => 770ms for OOo
229
node = _get_list_node(pending, last_item)
230
last_item = last_item - 1
231
next_gdfo = node.gdfo + 1
232
for pos from 0 <= pos < PyList_GET_SIZE(node.children):
233
child = _get_list_node(node.children, pos)
234
if next_gdfo > child.gdfo:
235
child.gdfo = next_gdfo
236
child.seen = child.seen + 1
237
if child.seen == PyTuple_GET_SIZE(child.parents):
238
# This child is populated, queue it to be walked
239
last_item = last_item + 1
240
if last_item < PyList_GET_SIZE(pending):
241
Py_INCREF(child) # SetItem steals a ref
242
PyList_SetItem(pending, last_item, child)
244
PyList_Append(pending, child)
245
# We have queued this node, we don't need to track it
249
def heads(self, keys):
250
"""Return the heads from amongst keys.
252
This is done by searching the ancestries of each key. Any key that is
253
reachable from another key is not returned; all the others are.
255
This operation scales with the relative depth between any two keys. It
256
uses gdfo to avoid walking all ancestry.
258
:param keys: An iterable of keys.
259
:return: A set of the heads. Note that as a set there is no ordering
260
information. Callers will need to filter their input to create
261
order if they need it.
263
cdef PyObject *maybe_node
264
cdef PyObject *maybe_heads
265
cdef PyObject *temp_node
266
cdef _KnownGraphNode node
267
cdef Py_ssize_t pos, last_item
270
heads_key = PyFrozenSet_New(keys)
271
maybe_heads = PyDict_GetItem(self._known_heads, heads_key)
272
if maybe_heads != NULL:
273
return <object>maybe_heads
274
# Not cached, compute it ourselves
277
maybe_node = PyDict_GetItem(self._nodes, key)
278
if maybe_node == NULL:
279
raise KeyError('key %s not in nodes' % (key,))
280
PyDict_SetItem(candidate_nodes, key, <object>maybe_node)
281
maybe_node = PyDict_GetItem(candidate_nodes, NULL_REVISION)
282
if maybe_node != NULL:
283
# NULL_REVISION is only a head if it is the only entry
284
candidate_nodes.pop(NULL_REVISION)
285
if not candidate_nodes:
286
return frozenset([NULL_REVISION])
287
# The keys changed, so recalculate heads_key
288
heads_key = PyFrozenSet_New(candidate_nodes)
289
if PyDict_Size(candidate_nodes) < 2:
294
# we know a gdfo cannot be longer than a linear chain of all nodes
295
min_gdfo = PyDict_Size(self._nodes) + 1
296
# Build up nodes that need to be walked, note that starting nodes are
297
# not added to seen()
299
while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node):
300
node = <_KnownGraphNode>temp_node
301
if node.parents is not None:
302
pending.extend(node.parents)
303
if node.gdfo < min_gdfo:
306
# Now do all the real work
307
last_item = PyList_GET_SIZE(pending) - 1
308
while last_item >= 0:
309
node = _get_list_node(pending, last_item)
310
last_item = last_item - 1
312
# node already appears in some ancestry
314
PyList_Append(cleanup, node)
316
if node.gdfo <= min_gdfo:
318
if node.parents is not None and PyTuple_GET_SIZE(node.parents) > 0:
319
for pos from 0 <= pos < PyTuple_GET_SIZE(node.parents):
320
parent_node = _get_parent(node.parents, pos)
321
last_item = last_item + 1
322
if last_item < PyList_GET_SIZE(pending):
323
Py_INCREF(parent_node) # SetItem steals a ref
324
PyList_SetItem(pending, last_item, parent_node)
326
PyList_Append(pending, parent_node)
329
while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node):
330
node = <_KnownGraphNode>temp_node
332
PyList_Append(heads, node.key)
333
heads = PyFrozenSet_New(heads)
334
for pos from 0 <= pos < PyList_GET_SIZE(cleanup):
335
node = _get_list_node(cleanup, pos)
338
PyDict_SetItem(self._known_heads, heads_key, heads)