1
# Copyright (C) 2009 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Implementation of Graph algorithms when we have already loaded everything.
20
cdef extern from "python-compat.h":
23
cdef extern from "Python.h":
24
ctypedef int Py_ssize_t
25
ctypedef struct PyObject:
28
object PyFrozenSet_New(object)
29
object PyTuple_New(Py_ssize_t n)
30
void PyTuple_SET_ITEM(object t, Py_ssize_t o, object v)
31
PyObject * PyTuple_GET_ITEM(object t, Py_ssize_t o)
32
Py_ssize_t PyTuple_GET_SIZE(object t)
33
PyObject * PyDict_GetItem(object d, object k)
34
Py_ssize_t PyDict_Size(object d) except -1
35
int PyDict_CheckExact(object d)
36
int PyDict_Next(object d, Py_ssize_t *pos, PyObject **k, PyObject **v)
37
int PyList_Append(object l, object v) except -1
38
PyObject * PyList_GET_ITEM(object l, Py_ssize_t o)
39
Py_ssize_t PyList_GET_SIZE(object l)
40
int PyDict_SetItem(object d, object k, object v) except -1
41
int PySet_Add(object s, object k) except -1
42
void Py_INCREF(object)
47
from bzrlib import revision
49
# Define these as cdef objects, so we don't have to getattr them later
50
cdef object heappush, heappop, heapify, heapreplace
51
heappush = heapq.heappush
52
heappop = heapq.heappop
53
heapify = heapq.heapify
54
heapreplace = heapq.heapreplace
57
cdef class _KnownGraphNode:
58
"""Represents a single object in the known graph."""
63
cdef _KnownGraphNode linear_dominator_node
64
cdef public object gdfo # Int
65
# This could also be simplified
66
cdef object ancestor_of
68
def __init__(self, key):
75
# oldest ancestor, such that no parents between here and there have >1
77
self.linear_dominator_node = None
78
# Greatest distance from origin
80
# This will become a tuple of known heads that have this node as an
82
self.ancestor_of = None
86
cdef _KnownGraphNode child
89
for child in self.children:
90
PyList_Append(keys, child.key)
93
property linear_dominator:
95
if self.linear_dominator_node is None:
98
return self.linear_dominator_node.key
100
cdef clear_references(self):
103
self.linear_dominator_node = None
106
cdef _KnownGraphNode node
109
if self.parents is not None:
110
for node in self.parents:
111
parent_keys.append(node.key)
113
if self.children is not None:
114
for node in self.children:
115
child_keys.append(node.key)
116
return '%s(%s gdfo:%s par:%s child:%s %s)' % (
117
self.__class__.__name__, self.key, self.gdfo,
118
parent_keys, child_keys,
119
self.linear_dominator)
122
cdef _KnownGraphNode _get_list_node(lst, Py_ssize_t pos):
123
cdef PyObject *temp_node
125
temp_node = PyList_GET_ITEM(lst, pos)
126
return <_KnownGraphNode>temp_node
129
cdef _KnownGraphNode _get_parent(parents, Py_ssize_t pos):
130
cdef PyObject *temp_node
131
cdef _KnownGraphNode node
133
temp_node = PyTuple_GET_ITEM(parents, pos)
134
return <_KnownGraphNode>temp_node
137
cdef _KnownGraphNode _peek_node(queue):
138
cdef PyObject *temp_node
139
cdef _KnownGraphNode node
141
temp_node = PyTuple_GET_ITEM(<object>PyList_GET_ITEM(queue, 0), 1)
142
node = <_KnownGraphNode>temp_node
145
# TODO: slab allocate all _KnownGraphNode objects.
146
# We already know how many we are going to need, except for a couple of
147
# ghosts that could be allocated on demand.
149
cdef class KnownGraph:
150
"""This is a class which assumes we already know the full graph."""
152
cdef public object _nodes
153
cdef object _known_heads
154
cdef public int do_cache
155
# Nodes we've touched that we'll need to reset their info when heads() is
157
cdef object _to_cleanup
159
def __init__(self, parent_map, do_cache=True):
160
"""Create a new KnownGraph instance.
162
:param parent_map: A dictionary mapping key => parent_keys
165
# Maps {sorted(revision_id, revision_id): heads}
166
self._known_heads = {}
167
self._to_cleanup = []
168
self.do_cache = int(do_cache)
169
self._initialize_nodes(parent_map)
170
self._find_linear_dominators()
173
def __dealloc__(self):
174
cdef _KnownGraphNode child
176
cdef PyObject *temp_node
178
while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
179
child = <_KnownGraphNode>temp_node
180
child.clear_references()
182
cdef _KnownGraphNode _get_or_create_node(self, key):
183
cdef PyObject *temp_node
184
cdef _KnownGraphNode node
186
temp_node = PyDict_GetItem(self._nodes, key)
187
if temp_node == NULL:
188
node = _KnownGraphNode(key)
189
PyDict_SetItem(self._nodes, key, node)
191
node = <_KnownGraphNode>temp_node
194
def _initialize_nodes(self, parent_map):
195
"""Populate self._nodes.
197
After this has finished, self._nodes will have an entry for every entry
198
in parent_map. Ghosts will have a parent_keys = None, all nodes found
199
will also have .child_keys populated with all known child_keys.
201
cdef PyObject *temp_key, *temp_parent_keys, *temp_node
202
cdef Py_ssize_t pos, pos2, num_parent_keys
203
cdef _KnownGraphNode node
204
cdef _KnownGraphNode parent_node
208
if not PyDict_CheckExact(parent_map):
209
raise TypeError('parent_map should be a dict of {key:parent_keys}')
210
# for key, parent_keys in parent_map.iteritems():
212
while PyDict_Next(parent_map, &pos, &temp_key, &temp_parent_keys):
213
key = <object>temp_key
214
parent_keys = <object>temp_parent_keys
215
node = self._get_or_create_node(key)
216
# We know how many parents, so we could pre allocate an exact sized
218
num_parent_keys = len(parent_keys)
219
parent_nodes = PyTuple_New(num_parent_keys)
220
# We use iter here, because parent_keys maybe be a list or tuple
221
for pos2 from 0 <= pos2 < num_parent_keys:
222
parent_key = parent_keys[pos2]
223
parent_node = self._get_or_create_node(parent_keys[pos2])
224
# PyTuple_SET_ITEM will steal a reference, so INCREF first
225
Py_INCREF(parent_node)
226
PyTuple_SET_ITEM(parent_nodes, pos2, parent_node)
227
PyList_Append(parent_node.children, node)
228
node.parents = parent_nodes
230
cdef _KnownGraphNode _check_is_linear(self, _KnownGraphNode node):
231
"""Check to see if a given node is part of a linear chain."""
232
cdef _KnownGraphNode parent_node
233
if node.parents is None or PyTuple_GET_SIZE(node.parents) != 1:
234
# This node is either a ghost, a tail, or has multiple parents
235
# It its own dominator
236
node.linear_dominator_node = node
238
parent_node = _get_parent(node.parents, 0)
239
if PyList_GET_SIZE(parent_node.children) > 1:
240
# The parent has multiple children, so *this* node is the
242
node.linear_dominator_node = node
244
# The parent is already filled in, so add and continue
245
if parent_node.linear_dominator_node is not None:
246
node.linear_dominator_node = parent_node.linear_dominator_node
248
# We don't know this node, or its parent node, so start walking to
252
def _find_linear_dominators(self):
254
For any given node, the 'linear dominator' is an ancestor, such that
255
all parents between this node and that one have a single parent, and a
256
single child. So if A->B->C->D then B,C,D all have a linear dominator
259
There are two main benefits:
260
1) When walking the graph, we can jump to the nearest linear dominator,
261
rather than walking all of the nodes inbetween.
262
2) When caching heads() results, dominators give the "same" results as
263
their children. (If the dominator is a head, then the descendant is
264
a head, if the dominator is not a head, then the child isn't
267
cdef PyObject *temp_node
269
cdef _KnownGraphNode node
270
cdef _KnownGraphNode next_node
271
cdef _KnownGraphNode dominator
272
cdef int i, num_elements
275
while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
276
node = <_KnownGraphNode>temp_node
277
# The parent is not filled in, so walk until we get somewhere
278
if node.linear_dominator_node is not None: #already done
280
next_node = self._check_is_linear(node)
281
if next_node is None:
282
# Nothing more needs to be done
285
while next_node is not None:
286
PyList_Append(stack, node)
288
next_node = self._check_is_linear(node)
289
# The stack now contains the linear chain, and 'node' should have
291
dominator = node.linear_dominator_node
292
num_elements = len(stack)
293
for i from num_elements > i >= 0:
294
next_node = _get_list_node(stack, i)
295
next_node.linear_dominator_node = dominator
298
cdef object _find_tails(self):
300
cdef PyObject *temp_node
302
cdef _KnownGraphNode node
306
while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
307
node = <_KnownGraphNode>temp_node
308
if node.parents is None or PyTuple_GET_SIZE(node.parents) == 0:
309
PyList_Append(tails, node)
312
def _find_gdfo(self):
313
cdef Py_ssize_t pos, pos2
314
cdef _KnownGraphNode node
315
cdef _KnownGraphNode child_node
316
cdef _KnownGraphNode parent_node
317
cdef int replace_node, missing_parent
319
tails = self._find_tails()
321
for pos from 0 <= pos < PyList_GET_SIZE(tails):
322
node = _get_list_node(tails, pos)
324
PyList_Append(todo, (1, node))
325
# No need to heapify, because all tails have priority=1
326
while PyList_GET_SIZE(todo) > 0:
327
node = _peek_node(todo)
328
next_gdfo = node.gdfo + 1
330
for pos from 0 <= pos < PyList_GET_SIZE(node.children):
331
child_node = _get_list_node(node.children, pos)
332
# We should never have numbered children before we numbered
334
if child_node.gdfo != -1:
336
# Only enque children when all of their parents have been
337
# resolved. With a single parent, we can just take 'this' value
338
child_gdfo = next_gdfo
339
if PyTuple_GET_SIZE(child_node.parents) > 1:
341
for pos2 from 0 <= pos2 < PyTuple_GET_SIZE(child_node.parents):
342
parent_node = _get_parent(child_node.parents, pos2)
343
if parent_node.gdfo == -1:
346
if parent_node.gdfo >= child_gdfo:
347
child_gdfo = parent_node.gdfo + 1
349
# One of the parents is not numbered, so wait until we get
352
child_node.gdfo = child_gdfo
354
heapreplace(todo, (child_gdfo, child_node))
357
heappush(todo, (child_gdfo, child_node))
361
def heads(self, keys):
362
"""Return the heads from amongst keys.
364
This is done by searching the ancestries of each key. Any key that is
365
reachable from another key is not returned; all the others are.
367
This operation scales with the relative depth between any two keys. If
368
any two keys are completely disconnected all ancestry of both sides
371
:param keys: An iterable of keys.
372
:return: A set of the heads. Note that as a set there is no ordering
373
information. Callers will need to filter their input to create
374
order if they need it.
376
cdef PyObject *maybe_node
377
cdef PyObject *maybe_heads
379
heads_key = PyFrozenSet_New(keys)
380
maybe_heads = PyDict_GetItem(self._known_heads, heads_key)
381
if maybe_heads != NULL:
382
return <object>maybe_heads
384
# Not cached, compute it ourselves
388
maybe_node = PyDict_GetItem(nodes, key)
389
if maybe_node == NULL:
390
raise KeyError('key %s not in nodes' % (key,))
391
PyDict_SetItem(candidate_nodes, key, <object>maybe_node)
392
if revision.NULL_REVISION in candidate_nodes:
393
# NULL_REVISION is only a head if it is the only entry
394
candidate_nodes.pop(revision.NULL_REVISION)
395
if not candidate_nodes:
396
return set([revision.NULL_REVISION])
397
# The keys changed, so recalculate heads_key
398
heads_key = PyFrozenSet_New(candidate_nodes)
399
if len(candidate_nodes) < 2:
401
dom_to_node = self._get_dominators_to_nodes(candidate_nodes)
402
if PyDict_Size(candidate_nodes) < 2:
403
return frozenset(candidate_nodes)
404
dom_lookup_key, heads = self._heads_from_dominators(candidate_nodes,
406
if heads is not None:
408
# This heads was not in the cache, or it would have been caught
409
# earlier, but the dom head *was*, so do the simple cache
410
PyDict_SetItem(self._known_heads, heads_key, heads)
412
heads = self._heads_from_candidate_nodes(candidate_nodes, dom_to_node)
414
self._cache_heads(heads, heads_key, dom_lookup_key, candidate_nodes)
417
cdef object _cache_heads(self, heads, heads_key, dom_lookup_key,
419
cdef PyObject *maybe_node
420
cdef _KnownGraphNode node
422
PyDict_SetItem(self._known_heads, heads_key, heads)
425
maybe_node = PyDict_GetItem(candidate_nodes, key)
426
if maybe_node == NULL:
428
node = <_KnownGraphNode>maybe_node
429
PyList_Append(dom_heads, node.linear_dominator_node.key)
430
PyDict_SetItem(self._known_heads, dom_lookup_key,
431
PyFrozenSet_New(dom_heads))
433
cdef _get_dominators_to_nodes(self, candidate_nodes):
434
"""Get the reverse mapping from dominator_key => candidate_nodes.
436
As a side effect, this can also remove potential candidate nodes if we
437
determine that they share a dominator.
440
cdef _KnownGraphNode node, other_node
441
cdef PyObject *temp_node
442
cdef PyObject *maybe_node
447
while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node):
448
node = <_KnownGraphNode>temp_node
449
dom_key = node.linear_dominator_node.key
450
maybe_node = PyDict_GetItem(dom_to_node, dom_key)
451
if maybe_node == NULL:
452
PyDict_SetItem(dom_to_node, dom_key, node)
454
other_node = <_KnownGraphNode>maybe_node
455
# These nodes share a dominator, one of them obviously
456
# supersedes the other, figure out which
457
if other_node.gdfo > node.gdfo:
458
PyList_Append(keys_to_remove, node.key)
460
# This wins, replace the other
461
PyList_Append(keys_to_remove, other_node.key)
462
PyDict_SetItem(dom_to_node, dom_key, node)
463
for pos from 0 <= pos < PyList_GET_SIZE(keys_to_remove):
464
key = <object>PyList_GET_ITEM(keys_to_remove, pos)
465
candidate_nodes.pop(key)
468
cdef object _heads_from_dominators(self, candidate_nodes, dom_to_node):
469
cdef PyObject *maybe_heads
470
cdef PyObject *maybe_node
471
cdef _KnownGraphNode node
473
cdef PyObject *temp_node
477
while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node):
478
node = <_KnownGraphNode>temp_node
479
PyList_Append(dom_list_key, node.linear_dominator_node.key)
480
dom_lookup_key = PyFrozenSet_New(dom_list_key)
481
maybe_heads = PyDict_GetItem(self._known_heads, dom_lookup_key)
482
if maybe_heads == NULL:
483
return dom_lookup_key, None
484
# We need to map back from the dominator head to the original keys
485
dom_heads = <object>maybe_heads
487
for dom_key in dom_heads:
488
maybe_node = PyDict_GetItem(dom_to_node, dom_key)
489
if maybe_node == NULL:
490
# Should never happen
492
node = <_KnownGraphNode>maybe_node
493
PyList_Append(heads, node.key)
494
return dom_lookup_key, PyFrozenSet_New(heads)
496
cdef int _process_parent(self, _KnownGraphNode node,
497
_KnownGraphNode parent_node,
498
candidate_nodes, dom_to_node,
499
queue, int *replace_item) except -1:
500
"""Process the parent of a node, seeing if we need to walk it."""
501
cdef PyObject *maybe_candidate
502
cdef PyObject *maybe_node
503
cdef _KnownGraphNode dom_child_node
504
maybe_candidate = PyDict_GetItem(candidate_nodes, parent_node.key)
505
if maybe_candidate != NULL:
506
candidate_nodes.pop(parent_node.key)
507
# We could pass up a flag that tells the caller to stop processing,
508
# but it doesn't help much, and makes the code uglier
510
maybe_node = PyDict_GetItem(dom_to_node, parent_node.key)
511
if maybe_node != NULL:
512
# This is a dominator of a node
513
dom_child_node = <_KnownGraphNode>maybe_node
514
if dom_child_node is not node:
515
# It isn't a dominator of a node we are searching, so we should
516
# remove it from the search
517
maybe_candidate = PyDict_GetItem(candidate_nodes, dom_child_node.key)
518
if maybe_candidate != NULL:
519
candidate_nodes.pop(dom_child_node.key)
521
if parent_node.ancestor_of is None:
522
# This node hasn't been walked yet, so just project node's ancestor
523
# info directly to parent_node, and enqueue it for later processing
524
parent_node.ancestor_of = node.ancestor_of
526
heapreplace(queue, (-parent_node.gdfo, parent_node))
529
heappush(queue, (-parent_node.gdfo, parent_node))
530
PyList_Append(self._to_cleanup, parent_node)
531
elif parent_node.ancestor_of != node.ancestor_of:
532
# Combine to get the full set of parents
533
# Rewrite using PySet_* functions, unfortunately you have to use
534
# PySet_Add since there is no PySet_Update... :(
535
all_ancestors = set(parent_node.ancestor_of)
536
for k in node.ancestor_of:
537
PySet_Add(all_ancestors, k)
538
parent_node.ancestor_of = tuple(sorted(all_ancestors))
541
cdef object _heads_from_candidate_nodes(self, candidate_nodes, dom_to_node):
542
cdef _KnownGraphNode node
543
cdef _KnownGraphNode parent_node
544
cdef Py_ssize_t num_candidates
545
cdef int num_parents, replace_item
547
cdef PyObject *temp_node
551
while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node):
552
node = <_KnownGraphNode>temp_node
553
node.ancestor_of = (node.key,)
554
PyList_Append(queue, (-node.gdfo, node))
555
PyList_Append(self._to_cleanup, node)
557
# These are nodes that we determined are 'common' that we are no longer
559
# Now we walk nodes until all nodes that are being walked are 'common'
560
num_candidates = len(candidate_nodes)
562
while PyList_GET_SIZE(queue) > 0 and PyDict_Size(candidate_nodes) > 1:
564
# We still need to pop the smallest member out of the queue
565
# before we peek again
567
if PyList_GET_SIZE(queue) == 0:
569
# peek at the smallest item. We don't pop, because we expect we'll
570
# need to push more things into the queue anyway
571
node = _peek_node(queue)
573
if PyTuple_GET_SIZE(node.ancestor_of) == num_candidates:
574
# This node is now considered 'common'
575
# Make sure all parent nodes are marked as such
576
for pos from 0 <= pos < PyTuple_GET_SIZE(node.parents):
577
parent_node = _get_parent(node.parents, pos)
578
if parent_node.ancestor_of is not None:
579
parent_node.ancestor_of = node.ancestor_of
580
if node.linear_dominator_node is not node:
581
parent_node = node.linear_dominator_node
582
if parent_node.ancestor_of is not None:
583
parent_node.ancestor_of = node.ancestor_of
585
if node.parents is None:
588
# Now project the current nodes ancestor list to the parent nodes,
589
# and queue them up to be walked
590
if node.linear_dominator_node is not node:
591
# We are at the tip of a long linear region
592
# We know that there is nothing between here and the tail
593
# that is interesting, so skip to the end
594
self._process_parent(node, node.linear_dominator_node,
595
candidate_nodes, dom_to_node, queue, &replace_item)
597
for pos from 0 <= pos < PyTuple_GET_SIZE(node.parents):
598
parent_node = _get_parent(node.parents, pos)
599
self._process_parent(node, parent_node, candidate_nodes,
600
dom_to_node, queue, &replace_item)
601
for pos from 0 <= pos < PyList_GET_SIZE(self._to_cleanup):
602
node = _get_list_node(self._to_cleanup, pos)
603
node.ancestor_of = None
604
self._to_cleanup = []
605
return PyFrozenSet_New(candidate_nodes)