1
# Copyright (C) 2009 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Implementation of Graph algorithms when we have already loaded everything.
27
class _KnownGraphNode(object):
28
"""Represents a single object in the known graph."""
30
__slots__ = ('key', 'parent_keys', 'child_keys', 'linear_dominator',
31
'gdfo', 'ancestor_of')
33
def __init__(self, key, parent_keys):
35
self.parent_keys = parent_keys
37
# oldest ancestor, such that no parents between here and there have >1
39
self.linear_dominator = None
40
# Greatest distance from origin
42
# This will become a tuple of known heads that have this node as an
44
self.ancestor_of = None
47
return '%s(%s gdfo:%s par:%s child:%s %s)' % (
48
self.__class__.__name__, self.key, self.gdfo,
49
self.parent_keys, self.child_keys,
50
self.linear_dominator)
53
class KnownGraph(object):
54
"""This is a class which assumes we already know the full graph."""
56
def __init__(self, parent_map, do_cache=True):
57
"""Create a new KnownGraph instance.
59
:param parent_map: A dictionary mapping key => parent_keys
62
# Maps {sorted(revision_id, revision_id): heads}
63
self._known_heads = {}
64
self.do_cache = do_cache
65
self._initialize_nodes(parent_map)
66
self._find_linear_dominators()
69
def _initialize_nodes(self, parent_map):
70
"""Populate self._nodes.
72
After this has finished, self._nodes will have an entry for every entry
73
in parent_map. Ghosts will have a parent_keys = None, all nodes found
74
will also have .child_keys populated with all known child_keys.
77
for key, parent_keys in parent_map.iteritems():
80
node.parent_keys = parent_keys
82
node = _KnownGraphNode(key, parent_keys)
84
for parent_key in parent_keys:
86
parent_node = nodes[parent_key]
88
parent_node = _KnownGraphNode(parent_key, None)
89
nodes[parent_key] = parent_node
90
parent_node.child_keys.append(key)
92
def _find_linear_dominators(self):
93
"""For each node in the set, find any linear dominators.
95
For any given node, the 'linear dominator' is an ancestor, such that
96
all parents between this node and that one have a single parent, and a
97
single child. So if A->B->C->D then B,C,D all have a linear dominator
100
There are two main benefits:
101
1) When walking the graph, we can jump to the nearest linear dominator,
102
rather than walking all of the nodes inbetween.
103
2) When caching heads() results, dominators give the "same" results as
104
their children. (If the dominator is a head, then the descendant is
105
a head, if the dominator is not a head, then the child isn't
108
def check_node(node):
109
if node.parent_keys is None or len(node.parent_keys) != 1:
110
# This node is either a ghost, a tail, or has multiple parents
111
# It its own dominator
112
node.linear_dominator = node.key
114
parent_node = self._nodes[node.parent_keys[0]]
115
if len(parent_node.child_keys) > 1:
116
# The parent has multiple children, so *this* node is the
118
node.linear_dominator = node.key
120
# The parent is already filled in, so add and continue
121
if parent_node.linear_dominator is not None:
122
node.linear_dominator = parent_node.linear_dominator
124
# We don't know this node, or its parent node, so start walking to
128
for node in self._nodes.itervalues():
129
# The parent is not filled in, so walk until we get somewhere
130
if node.linear_dominator is not None: #already done
132
next_node = check_node(node)
133
if next_node is None:
134
# Nothing more needs to be done
137
while next_node is not None:
140
next_node = check_node(node)
141
# The stack now contains the linear chain, and 'node' should have
143
dominator = node.linear_dominator
145
next_node = stack.pop()
146
next_node.linear_dominator = dominator
149
def _find_gdfo(self):
151
return [node for node in self._nodes.itervalues()
152
if not node.parent_keys]
155
heappush = heapq.heappush
156
heappop = heapq.heappop
160
heappush(todo, (1, node))
163
gdfo, next = heappop(todo)
165
if next.gdfo is not None and gdfo < next.gdfo:
166
# This node was reached from a longer path, we assume it was
167
# enqued correctly with the longer gdfo, so don't continue
171
for child_key in next.child_keys:
172
child_node = nodes[child_key]
173
if child_node.gdfo is None or child_node.gdfo < next_gdfo:
174
# Only enque children when all of their parents have been
176
for parent_key in child_node.parent_keys:
177
# We know that 'this' parent is counted
178
if parent_key != next.key:
179
parent_node = nodes[parent_key]
180
if parent_node.gdfo is None:
183
child_node.gdfo = next_gdfo
184
heappush(todo, (next_gdfo, child_node))
186
def _get_dominators_to_nodes(self, candidate_nodes):
187
"""Get the reverse mapping from dominator_key => candidate_nodes.
189
As a side effect, this can also remove potential candidate nodes if we
190
determine that they share a dominator.
194
for node in candidate_nodes.values():
195
if node.linear_dominator in dom_to_node:
196
# This node already exists, resolve which node supersedes the
198
other_node = dom_to_node[node.linear_dominator]
199
# There should be no way that nodes sharing a dominator could
201
if other_node.gdfo > node.gdfo:
202
# The other node has this node as an ancestor
203
keys_to_remove.append(node.key)
205
# Replace the other node, and set this as the new key
206
keys_to_remove.append(other_node.key)
207
dom_to_node[node.linear_dominator] = node
209
dom_to_node[node.linear_dominator] = node
210
for key in keys_to_remove:
211
candidate_nodes.pop(key)
214
def heads(self, keys):
215
"""Return the heads from amongst keys.
217
This is done by searching the ancestries of each key. Any key that is
218
reachable from another key is not returned; all the others are.
220
This operation scales with the relative depth between any two keys. If
221
any two keys are completely disconnected all ancestry of both sides
224
:param keys: An iterable of keys.
225
:return: A set of the heads. Note that as a set there is no ordering
226
information. Callers will need to filter their input to create
227
order if they need it.
229
candidate_nodes = dict((key, self._nodes[key]) for key in keys)
230
if revision.NULL_REVISION in candidate_nodes:
231
# NULL_REVISION is only a head if it is the only entry
232
candidate_nodes.pop(revision.NULL_REVISION)
233
if not candidate_nodes:
234
return set([revision.NULL_REVISION])
235
if len(candidate_nodes) < 2:
236
return frozenset(candidate_nodes)
237
heads_key = frozenset(candidate_nodes)
238
if heads_key != frozenset(keys):
239
note('%s != %s', heads_key, frozenset(keys))
241
heads = self._known_heads[heads_key]
244
pass # compute it ourselves
245
dom_to_node = self._get_dominators_to_nodes(candidate_nodes)
246
if len(candidate_nodes) < 2:
247
# We shrunk candidate_nodes and determined a new head
248
return frozenset(candidate_nodes)
250
# Check the linear dominators of these keys, to see if we already
251
# know the heads answer
252
dom_heads_key = frozenset([node.linear_dominator
253
for node in candidate_nodes.itervalues()])
254
if dom_heads_key in self._known_heads:
255
# map back into the original keys
256
heads = self._known_heads[dom_heads_key]
257
heads = frozenset([dom_to_node[key].key for key in heads])
259
heads = self._heads_from_candidate_nodes(candidate_nodes, dom_to_node)
261
self._known_heads[heads_key] = heads
262
# Cache the dominator heads
263
if dom_heads_key is not None:
264
dom_heads = frozenset([candidate_nodes[key].linear_dominator
266
self._known_heads[dom_heads_key] = dom_heads
269
def _heads_from_candidate_nodes(self, candidate_nodes, dom_to_node):
272
to_cleanup_append = to_cleanup.append
273
for node in candidate_nodes.itervalues():
274
node.ancestor_of = (node.key,)
275
queue.append((-node.gdfo, node))
276
to_cleanup_append(node)
278
# These are nodes that we determined are 'common' that we are no longer
280
# Now we walk nodes until all nodes that are being walked are 'common'
281
num_candidates = len(candidate_nodes)
283
heappop = heapq.heappop
284
heappush = heapq.heappush
285
while queue and len(candidate_nodes) > 1:
286
_, node = heappop(queue)
287
next_ancestor_of = node.ancestor_of
288
if len(next_ancestor_of) == num_candidates:
289
# This node is now considered 'common'
290
# Make sure all parent nodes are marked as such
291
for parent_key in node.parent_keys:
292
parent_node = nodes[parent_key]
293
if parent_node.ancestor_of is not None:
294
parent_node.ancestor_of = next_ancestor_of
295
if node.linear_dominator != node.key:
296
parent_node = nodes[node.linear_dominator]
297
if parent_node.ancestor_of is not None:
298
parent_node.ancestor_of = next_ancestor_of
300
if node.parent_keys is None:
303
# Now project the current nodes ancestor list to the parent nodes,
304
# and queue them up to be walked
305
# Note: using linear_dominator speeds things up quite a bit
306
# enough that we actually start to be slightly faster
307
# than the default heads() implementation
308
if node.linear_dominator != node.key:
309
# We are at the tip of a long linear region
310
# We know that there is nothing between here and the tail
311
# that is interesting, so skip to the end
312
parent_keys = [node.linear_dominator]
314
parent_keys = node.parent_keys
315
for parent_key in parent_keys:
316
if parent_key in candidate_nodes:
317
candidate_nodes.pop(parent_key)
318
if len(candidate_nodes) <= 1:
320
elif parent_key in dom_to_node:
321
orig_node = dom_to_node[parent_key]
322
if orig_node is not node:
323
if orig_node.key in candidate_nodes:
324
candidate_nodes.pop(orig_node.key)
325
if len(candidate_nodes) <= 1:
327
parent_node = nodes[parent_key]
328
ancestor_of = parent_node.ancestor_of
329
if ancestor_of is None:
330
# This node hasn't been walked yet
331
parent_node.ancestor_of = next_ancestor_of
333
heappush(queue, (-parent_node.gdfo, parent_node))
334
to_cleanup_append(parent_node)
335
elif ancestor_of != next_ancestor_of:
336
# Combine to get the full set of parents
337
all_ancestors = set(ancestor_of)
338
all_ancestors.update(next_ancestor_of)
339
parent_node.ancestor_of = tuple(sorted(all_ancestors))
341
for node in to_cleanup:
342
node.ancestor_of = None
344
return frozenset(candidate_nodes)