1
# Copyright (C) 2009 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Implementation of Graph algorithms when we have already loaded everything.
20
cdef extern from "python-compat.h":
23
cdef extern from "Python.h":
24
ctypedef int Py_ssize_t
25
ctypedef struct PyObject:
30
from bzrlib import revision
33
cdef class _KnownGraphNode:
34
"""Represents a single object in the known graph."""
37
# Ideally, we could change this into fixed arrays, rather than get the
38
# extra allocations. Consider that 99% of all revisions will have <= 2
39
# parents, we may want to put in the effort
42
cdef _KnownGraphNode linear_dominator_node
43
cdef public long dominator_distance
45
# This could also be simplified
46
cdef object ancestor_of
48
def __init__(self, key, parents):
51
self.parents = parents
53
if not isinstance(parents, list):
54
raise TypeError('parents must be a list')
55
for parent in parents:
56
if not isinstance(parent, _KnownGraphNode):
57
raise TypeError('parents must be a list of _KnownGraphNode')
58
self.parents = parents
60
# oldest ancestor, such that no parents between here and there have >1
62
self.linear_dominator_node = None
63
self.dominator_distance = 0
64
# Greatest distance from origin
66
# This will become a tuple of known heads that have this node as an
68
self.ancestor_of = None
73
cdef _KnownGraphNode child
76
for child in self.children:
77
keys.append(child.key)
80
property linear_dominator:
82
if self.linear_dominator_node is None:
85
return self.linear_dominator_node.key
87
cdef clear_references(self):
90
self.linear_dominator_node = None
94
for parent in self.parents:
95
parent_keys.append(parent.key)
97
for child in self.children:
98
child_keys.append(child.key)
99
return '%s(%s gdfo:%s par:%s child:%s dom:%s %s)' % (
100
self.__class__.__name__, self.key, self.gdfo,
101
parent_keys, child_keys,
102
self.linear_dominator, self.dominator_distance)
105
# TODO: slab allocate all _KnownGraphNode objects.
106
# We already know how many we are going to need...
108
cdef class KnownGraph:
109
"""This is a class which assumes we already know the full graph."""
111
cdef public object _nodes
112
cdef dict _known_heads
113
cdef public int do_cache
115
def __init__(self, parent_map, do_cache=True):
116
"""Create a new KnownGraph instance.
118
:param parent_map: A dictionary mapping key => parent_keys
121
# Maps {sorted(revision_id, revision_id): heads}
122
self._known_heads = {}
123
self.do_cache = int(do_cache)
124
self._initialize_nodes(parent_map)
125
self._find_linear_dominators()
128
def __dealloc__(self):
129
cdef _KnownGraphNode child
131
for child in self._nodes.itervalues():
132
child.clear_references()
134
def _initialize_nodes(self, parent_map):
135
"""Populate self._nodes.
137
After this has finished, self._nodes will have an entry for every entry
138
in parent_map. Ghosts will have a parent_keys = None, all nodes found
139
will also have .child_keys populated with all known child_keys.
141
cdef _KnownGraphNode node
142
cdef _KnownGraphNode parent_node
143
cdef list parent_nodes
145
for key, parent_keys in parent_map.iteritems():
147
for parent_key in parent_keys:
149
parent_node = self._nodes[parent_key]
151
parent_node = _KnownGraphNode(parent_key, None)
152
self._nodes[parent_key] = parent_node
153
parent_nodes.append(parent_node)
154
if key in self._nodes:
155
node = self._nodes[key]
156
assert node.parents is None
157
node.parents = parent_nodes
159
node = _KnownGraphNode(key, parent_nodes)
160
self._nodes[key] = node
161
for parent_node in parent_nodes:
162
parent_node.children.append(node)
164
cdef object _check_is_linear(self, _KnownGraphNode node):
165
"""Check to see if a given node is part of a linear chain."""
166
cdef _KnownGraphNode parent_node
167
if node.parents is None or len(node.parents) != 1:
168
# This node is either a ghost, a tail, or has multiple parents
169
# It its own dominator
170
node.linear_dominator_node = node
171
node.dominator_distance = 0
173
parent_node = node.parents[0]
174
if len(parent_node.children) > 1:
175
# The parent has multiple children, so *this* node is the
177
node.linear_dominator_node = node
178
node.dominator_distance = 0
180
# The parent is already filled in, so add and continue
181
if parent_node.linear_dominator_node is not None:
182
node.linear_dominator_node = parent_node.linear_dominator_node
183
node.dominator_distance = parent_node.dominator_distance + 1
185
# We don't know this node, or its parent node, so start walking to
189
def _find_linear_dominators(self):
190
"""For each node in the set, find any linear dominators.
192
For any given node, the 'linear dominator' is an ancestor, such that
193
all parents between this node and that one have a single parent, and a
194
single child. So if A->B->C->D then B,C,D all have a linear dominator
195
of A. Because there are no interesting siblings, we can quickly skip to
196
the nearest dominator when doing comparisons.
198
cdef _KnownGraphNode node
199
cdef _KnownGraphNode next_node
202
for node in self._nodes.itervalues():
203
# The parent is not filled in, so walk until we get somewhere
204
if node.linear_dominator_node is not None: #already done
206
next_node = self._check_is_linear(node)
207
if next_node is None:
208
# Nothing more needs to be done
211
while next_node is not None:
214
next_node = self._check_is_linear(node)
215
# The stack now contains the linear chain, and 'node' should have
217
assert node.linear_dominator_node is not None
218
dominator = node.linear_dominator_node
220
next_node = stack.pop()
221
next_node.linear_dominator_node = dominator
222
next_node.dominator_distance = node.dominator_distance + 1
225
cdef list _find_tails(self):
227
cdef _KnownGraphNode node
230
for node in self._nodes.itervalues():
235
def _find_gdfo(self):
236
# TODO: Consider moving the tails search into the first-pass over the
237
# data, inside _find_linear_dominators
238
cdef _KnownGraphNode node
239
cdef _KnownGraphNode child_node
240
cdef _KnownGraphNode parent_node
242
tails = self._find_tails()
244
heappush = heapq.heappush
245
heappop = heapq.heappop
248
heappush(todo, (1, node))
250
max_gdfo = len(self._nodes) + 1
252
gdfo, node = heappop(todo)
254
if node.gdfo != -1 and gdfo < node.gdfo:
255
# This node was reached from a longer path, we assume it was
256
# enqued correctly with the longer gdfo, so don't continue
260
assert next_gdfo <= max_gdfo
261
for child_node in node.children:
262
if child_node.gdfo < next_gdfo:
263
# Only enque children when all of their parents have been
265
for parent_node in child_node.parents:
266
# We know that 'this' parent is counted
267
if parent_node is not node:
268
if parent_node.gdfo == -1:
271
child_node.gdfo = next_gdfo
272
heappush(todo, (next_gdfo, child_node))
274
def heads(self, keys):
275
"""Return the heads from amongst keys.
277
This is done by searching the ancestries of each key. Any key that is
278
reachable from another key is not returned; all the others are.
280
This operation scales with the relative depth between any two keys. If
281
any two keys are completely disconnected all ancestry of both sides
284
:param keys: An iterable of keys.
285
:return: A set of the heads. Note that as a set there is no ordering
286
information. Callers will need to filter their input to create
287
order if they need it.
289
cdef dict candidate_nodes
290
cdef dict dom_to_node
294
candidate_nodes[key] = self._nodes[key]
295
if revision.NULL_REVISION in candidate_nodes:
296
# NULL_REVISION is only a head if it is the only entry
297
candidate_nodes.pop(revision.NULL_REVISION)
298
if not candidate_nodes:
299
return set([revision.NULL_REVISION])
300
if len(candidate_nodes) < 2:
301
return frozenset(candidate_nodes)
302
heads_key = frozenset(candidate_nodes)
304
heads = self._known_heads[heads_key]
307
pass # compute it ourselves
309
## # TODO: We could optimize for the len(candidate_nodes) > 2 by checking
310
## # for *any* pair-wise matching, and then eliminating one of the
311
## # nodes trivially. However, the fairly common case is just 2
312
## # keys, so we'll focus on that, first
313
## for node in candidate_nodes.itervalues():
314
## if dominator is None:
315
## dominator = node.linear_dominator
316
## elif dominator != node.linear_dominator:
319
## # In 'time bzr annotate NEWS' this only catches *one* item, so it
320
## # probably isn't worth the optimization
321
## # All of these nodes have the same linear_dominator, which means
322
## # they are in a line, the head is just the one with the highest
324
## def get_distance(key):
325
## return self._nodes[key].dominator_distance
326
## def get_linear_head():
327
## return max(candidate_nodes, key=get_distance)
328
## return set([get_linear_head()])
329
# Check the linear dominators of these keys, to see if we already
330
# know the heads answer
332
# for node in candidate_nodes.itervalues():
333
# dom_key.append(node.linear_dominator.key)
334
# dom_key = frozenset(dom_key)
335
# if dom_key in self._known_heads:
336
# dom_to_node = dict([(node.linear_dominator, key)
337
# for key, node in candidate_nodes.iteritems()])
338
# # map back into the original keys
339
# heads = self._known_heads[dom_key]
340
# heads = frozenset([dom_to_node[key] for key in heads])
342
heads = self._heads_from_candidate_nodes(candidate_nodes)
344
# self._known_heads[heads_key] = heads
345
# # Cache the dominator heads
346
# if dom_key is not None:
347
# dom_heads = frozenset([candidate_nodes[key].linear_dominator
349
# self._known_heads[dom_key] = dom_heads
352
def _heads_from_candidate_nodes(self, dict candidate_nodes):
354
cdef _KnownGraphNode node
355
cdef _KnownGraphNode parent_node
356
cdef int num_candidates
360
for node in candidate_nodes.itervalues():
361
assert node.ancestor_of is None
362
node.ancestor_of = (node.key,)
363
queue.append((-node.gdfo, node))
364
to_cleanup.append(node)
366
# These are nodes that we determined are 'common' that we are no longer
368
# Now we walk nodes until all nodes that are being walked are 'common'
369
num_candidates = len(candidate_nodes)
371
heappop = heapq.heappop
372
heappush = heapq.heappush
373
while queue and len(candidate_nodes) > 1:
374
_, node = heappop(queue)
375
if len(node.ancestor_of) == num_candidates:
376
# This node is now considered 'common'
377
# Make sure all parent nodes are marked as such
378
for parent_node in node.parents:
379
if parent_node.ancestor_of is not None:
380
parent_node.ancestor_of = node.ancestor_of
381
if node.linear_dominator_node is not node:
382
parent_node = node.linear_dominator_node
383
if parent_node.ancestor_of is not None:
384
parent_node.ancestor_of = node.ancestor_of
386
if node.parents is None:
389
# Now project the current nodes ancestor list to the parent nodes,
390
# and queue them up to be walked
391
# Note: using linear_dominator speeds things up quite a bit
392
# enough that we actually start to be slightly faster
393
# than the default heads() implementation
394
if node.linear_dominator_node is not node:
395
# We are at the tip of a long linear region
396
# We know that there is nothing between here and the tail
397
# that is interesting, so skip to the end
398
parents = [node.linear_dominator_node]
400
parents = node.parents
401
for parent_node in node.parents:
402
if parent_node.key in candidate_nodes:
403
candidate_nodes.pop(parent_node.key)
404
if len(candidate_nodes) <= 1:
406
if parent_node.ancestor_of is None:
407
# This node hasn't been walked yet
408
parent_node.ancestor_of = node.ancestor_of
410
heappush(queue, (-parent_node.gdfo, parent_node))
411
to_cleanup.append(parent_node)
412
elif parent_node.ancestor_of != node.ancestor_of:
413
# Combine to get the full set of parents
414
all_ancestors = set(parent_node.ancestor_of)
415
all_ancestors.update(node.ancestor_of)
416
parent_node.ancestor_of = tuple(sorted(all_ancestors))
417
for node in to_cleanup:
418
node.ancestor_of = None
419
return frozenset(candidate_nodes)