27
25
class _KnownGraphNode(object):
28
26
"""Represents a single object in the known graph."""
30
__slots__ = ('key', 'parent_keys', 'child_keys', 'linear_dominator',
31
'gdfo', 'ancestor_of')
28
__slots__ = ('key', 'parent_keys', 'child_keys', 'gdfo')
33
30
def __init__(self, key, parent_keys):
35
32
self.parent_keys = parent_keys
36
33
self.child_keys = []
37
# oldest ancestor, such that no parents between here and there have >1
39
self.linear_dominator = None
40
34
# Greatest distance from origin
42
# This will become a tuple of known heads that have this node as an
44
self.ancestor_of = None
46
37
def __repr__(self):
47
return '%s(%s gdfo:%s par:%s child:%s %s)' % (
38
return '%s(%s gdfo:%s par:%s child:%s)' % (
48
39
self.__class__.__name__, self.key, self.gdfo,
49
self.parent_keys, self.child_keys,
50
self.linear_dominator)
40
self.parent_keys, self.child_keys)
53
43
class KnownGraph(object):
63
53
self._known_heads = {}
64
54
self.do_cache = do_cache
65
55
self._initialize_nodes(parent_map)
66
self._find_linear_dominators()
69
58
def _initialize_nodes(self, parent_map):
70
59
"""Populate self._nodes.
72
After this has finished, self._nodes will have an entry for every entry
73
in parent_map. Ghosts will have a parent_keys = None, all nodes found
74
will also have .child_keys populated with all known child_keys.
61
After this has finished:
62
- self._nodes will have an entry for every entry in parent_map.
63
- ghosts will have a parent_keys = None,
64
- all nodes found will also have .child_keys populated with all known
66
- self._tails will list all the nodes without parents.
68
tails = self._tails = set()
76
69
nodes = self._nodes
77
70
for key, parent_keys in parent_map.iteritems():
80
73
node.parent_keys = parent_keys
75
# This node has been added before being seen in parent_map
82
79
node = _KnownGraphNode(key, parent_keys)
88
85
parent_node = _KnownGraphNode(parent_key, None)
89
86
nodes[parent_key] = parent_node
87
# Potentially a tail, if we're wrong we'll remove it later
89
tails.add(parent_node)
90
90
parent_node.child_keys.append(key)
92
def _find_linear_dominators(self):
93
"""For each node in the set, find any linear dominators.
95
For any given node, the 'linear dominator' is an ancestor, such that
96
all parents between this node and that one have a single parent, and a
97
single child. So if A->B->C->D then B,C,D all have a linear dominator
100
There are two main benefits:
101
1) When walking the graph, we can jump to the nearest linear dominator,
102
rather than walking all of the nodes inbetween.
103
2) When caching heads() results, dominators give the "same" results as
104
their children. (If the dominator is a head, then the descendant is
105
a head, if the dominator is not a head, then the child isn't
108
def check_node(node):
109
if node.parent_keys is None or len(node.parent_keys) != 1:
110
# This node is either a ghost, a tail, or has multiple parents
111
# It its own dominator
112
node.linear_dominator = node.key
114
parent_node = self._nodes[node.parent_keys[0]]
115
if len(parent_node.child_keys) > 1:
116
# The parent has multiple children, so *this* node is the
118
node.linear_dominator = node.key
120
# The parent is already filled in, so add and continue
121
if parent_node.linear_dominator is not None:
122
node.linear_dominator = parent_node.linear_dominator
124
# We don't know this node, or its parent node, so start walking to
128
for node in self._nodes.itervalues():
129
# The parent is not filled in, so walk until we get somewhere
130
if node.linear_dominator is not None: #already done
132
next_node = check_node(node)
133
if next_node is None:
134
# Nothing more needs to be done
137
while next_node is not None:
140
next_node = check_node(node)
141
# The stack now contains the linear chain, and 'node' should have
143
dominator = node.linear_dominator
145
next_node = stack.pop()
146
next_node.linear_dominator = dominator
149
92
def _find_gdfo(self):
151
return [node for node in self._nodes.itervalues()
152
if not node.parent_keys]
155
heappush = heapq.heappush
156
heappop = heapq.heappop
157
93
nodes = self._nodes
94
known_parent_gdfos = {}
97
for node in self._tails:
160
heappush(todo, (1, node))
163
gdfo, next = heappop(todo)
165
if next.gdfo is not None and gdfo < next.gdfo:
166
# This node was reached from a longer path, we assume it was
167
# enqued correctly with the longer gdfo, so don't continue
171
for child_key in next.child_keys:
172
child_node = nodes[child_key]
173
if child_node.gdfo is None or child_node.gdfo < next_gdfo:
174
# Only enque children when all of their parents have been
176
for parent_key in child_node.parent_keys:
177
# We know that 'this' parent is counted
178
if parent_key != next.key:
179
parent_node = nodes[parent_key]
180
if parent_node.gdfo is None:
183
child_node.gdfo = next_gdfo
184
heappush(todo, (next_gdfo, child_node))
186
def _get_dominators_to_nodes(self, candidate_nodes):
187
"""Get the reverse mapping from dominator_key => candidate_nodes.
189
As a side effect, this can also remove potential candidate nodes if we
190
determine that they share a dominator.
194
for node in candidate_nodes.values():
195
if node.linear_dominator in dom_to_node:
196
# This node already exists, resolve which node supersedes the
198
other_node = dom_to_node[node.linear_dominator]
199
# There should be no way that nodes sharing a dominator could
201
if other_node.gdfo > node.gdfo:
202
# The other node has this node as an ancestor
203
keys_to_remove.append(node.key)
205
# Replace the other node, and set this as the new key
206
keys_to_remove.append(other_node.key)
207
dom_to_node[node.linear_dominator] = node
209
dom_to_node[node.linear_dominator] = node
210
for key in keys_to_remove:
211
candidate_nodes.pop(key)
103
for child_key in node.child_keys:
104
child = nodes[child_key]
105
if child_key in known_parent_gdfos:
106
known_gdfo = known_parent_gdfos[child_key] + 1
111
if child.gdfo is None or node.gdfo + 1 > child.gdfo:
112
child.gdfo = node.gdfo + 1
113
if known_gdfo == len(child.parent_keys):
114
# We are the last parent updating that node, we can
115
# continue from there
116
pending.append(child)
118
del known_parent_gdfos[child_key]
120
# Update known_parent_gdfos for a key we couldn't process
121
known_parent_gdfos[child_key] = known_gdfo
214
123
def heads(self, keys):
215
124
"""Return the heads from amongst keys.
231
139
# NULL_REVISION is only a head if it is the only entry
232
140
candidate_nodes.pop(revision.NULL_REVISION)
233
141
if not candidate_nodes:
234
return set([revision.NULL_REVISION])
142
return frozenset([revision.NULL_REVISION])
235
143
if len(candidate_nodes) < 2:
144
# No or only one candidate
236
145
return frozenset(candidate_nodes)
237
146
heads_key = frozenset(candidate_nodes)
238
147
if heads_key != frozenset(keys):
239
149
note('%s != %s', heads_key, frozenset(keys))
150
# Do we have a cached result ?
241
152
heads = self._known_heads[heads_key]
244
pass # compute it ourselves
245
dom_to_node = self._get_dominators_to_nodes(candidate_nodes)
246
if len(candidate_nodes) < 2:
247
# We shrunk candidate_nodes and determined a new head
248
return frozenset(candidate_nodes)
250
# Check the linear dominators of these keys, to see if we already
251
# know the heads answer
252
dom_heads_key = frozenset([node.linear_dominator
253
for node in candidate_nodes.itervalues()])
254
if dom_heads_key in self._known_heads:
255
# map back into the original keys
256
heads = self._known_heads[dom_heads_key]
257
heads = frozenset([dom_to_node[key].key for key in heads])
259
heads = self._heads_from_candidate_nodes(candidate_nodes, dom_to_node)
156
# Let's compute the heads
160
for node in candidate_nodes.values():
162
pending.extend(node.parent_keys)
163
if min_gdfo is None or node.gdfo < min_gdfo:
167
node_key = pending.pop()
169
# node already appears in some ancestry
172
node = nodes[node_key]
173
if node.gdfo <= min_gdfo:
176
pending.extend(node.parent_keys)
177
heads = heads_key.difference(seen)
260
178
if self.do_cache:
261
179
self._known_heads[heads_key] = heads
262
# Cache the dominator heads
263
if dom_heads_key is not None:
264
dom_heads = frozenset([candidate_nodes[key].linear_dominator
266
self._known_heads[dom_heads_key] = dom_heads
269
def _heads_from_candidate_nodes(self, candidate_nodes, dom_to_node):
272
to_cleanup_append = to_cleanup.append
273
for node in candidate_nodes.itervalues():
274
node.ancestor_of = (node.key,)
275
queue.append((-node.gdfo, node))
276
to_cleanup_append(node)
278
# These are nodes that we determined are 'common' that we are no longer
280
# Now we walk nodes until all nodes that are being walked are 'common'
281
num_candidates = len(candidate_nodes)
283
heappop = heapq.heappop
284
heappush = heapq.heappush
285
while queue and len(candidate_nodes) > 1:
286
_, node = heappop(queue)
287
next_ancestor_of = node.ancestor_of
288
if len(next_ancestor_of) == num_candidates:
289
# This node is now considered 'common'
290
# Make sure all parent nodes are marked as such
291
for parent_key in node.parent_keys:
292
parent_node = nodes[parent_key]
293
if parent_node.ancestor_of is not None:
294
parent_node.ancestor_of = next_ancestor_of
295
if node.linear_dominator != node.key:
296
parent_node = nodes[node.linear_dominator]
297
if parent_node.ancestor_of is not None:
298
parent_node.ancestor_of = next_ancestor_of
300
if node.parent_keys is None:
303
# Now project the current nodes ancestor list to the parent nodes,
304
# and queue them up to be walked
305
# Note: using linear_dominator speeds things up quite a bit
306
# enough that we actually start to be slightly faster
307
# than the default heads() implementation
308
if node.linear_dominator != node.key:
309
# We are at the tip of a long linear region
310
# We know that there is nothing between here and the tail
311
# that is interesting, so skip to the end
312
parent_keys = [node.linear_dominator]
314
parent_keys = node.parent_keys
315
for parent_key in parent_keys:
316
if parent_key in candidate_nodes:
317
candidate_nodes.pop(parent_key)
318
if len(candidate_nodes) <= 1:
320
elif parent_key in dom_to_node:
321
orig_node = dom_to_node[parent_key]
322
if orig_node is not node:
323
if orig_node.key in candidate_nodes:
324
candidate_nodes.pop(orig_node.key)
325
if len(candidate_nodes) <= 1:
327
parent_node = nodes[parent_key]
328
ancestor_of = parent_node.ancestor_of
329
if ancestor_of is None:
330
# This node hasn't been walked yet
331
parent_node.ancestor_of = next_ancestor_of
333
heappush(queue, (-parent_node.gdfo, parent_node))
334
to_cleanup_append(parent_node)
335
elif ancestor_of != next_ancestor_of:
336
# Combine to get the full set of parents
337
all_ancestors = set(ancestor_of)
338
all_ancestors.update(next_ancestor_of)
339
parent_node.ancestor_of = tuple(sorted(all_ancestors))
341
for node in to_cleanup:
342
node.ancestor_of = None
344
return frozenset(candidate_nodes)