94
89
nodes[parent_key] = parent_node
95
90
parent_node.child_keys.append(key)
97
def _find_tails(self):
98
return [node for node in self._nodes.itervalues()
99
if not node.parent_keys]
101
def _find_tips(self):
102
return [node for node in self._nodes.itervalues()
103
if not node.child_keys]
92
def _find_linear_dominators(self):
93
"""For each node in the set, find any linear dominators.
95
For any given node, the 'linear dominator' is an ancestor, such that
96
all parents between this node and that one have a single parent, and a
97
single child. So if A->B->C->D then B,C,D all have a linear dominator
100
There are two main benefits:
101
1) When walking the graph, we can jump to the nearest linear dominator,
102
rather than walking all of the nodes inbetween.
103
2) When caching heads() results, dominators give the "same" results as
104
their children. (If the dominator is a head, then the descendant is
105
a head, if the dominator is not a head, then the child isn't
108
def check_node(node):
109
if node.parent_keys is None or len(node.parent_keys) != 1:
110
# This node is either a ghost, a tail, or has multiple parents
111
# It its own dominator
112
node.linear_dominator = node.key
114
parent_node = self._nodes[node.parent_keys[0]]
115
if len(parent_node.child_keys) > 1:
116
# The parent has multiple children, so *this* node is the
118
node.linear_dominator = node.key
120
# The parent is already filled in, so add and continue
121
if parent_node.linear_dominator is not None:
122
node.linear_dominator = parent_node.linear_dominator
124
# We don't know this node, or its parent node, so start walking to
128
for node in self._nodes.itervalues():
129
# The parent is not filled in, so walk until we get somewhere
130
if node.linear_dominator is not None: #already done
132
next_node = check_node(node)
133
if next_node is None:
134
# Nothing more needs to be done
137
while next_node is not None:
140
next_node = check_node(node)
141
# The stack now contains the linear chain, and 'node' should have
143
dominator = node.linear_dominator
145
next_node = stack.pop()
146
next_node.linear_dominator = dominator
105
149
def _find_gdfo(self):
151
return [node for node in self._nodes.itervalues()
152
if not node.parent_keys]
155
heappush = heapq.heappush
156
heappop = heapq.heappop
106
157
nodes = self._nodes
107
known_parent_gdfos = {}
110
for node in self._find_tails():
116
for child_key in node.child_keys:
117
child = nodes[child_key]
118
if child_key in known_parent_gdfos:
119
known_gdfo = known_parent_gdfos[child_key] + 1
124
if child.gdfo is None or node.gdfo + 1 > child.gdfo:
125
child.gdfo = node.gdfo + 1
126
if known_gdfo == len(child.parent_keys):
127
# We are the last parent updating that node, we can
128
# continue from there
129
pending.append(child)
131
del known_parent_gdfos[child_key]
133
# Update known_parent_gdfos for a key we couldn't process
134
known_parent_gdfos[child_key] = known_gdfo
136
def add_node(self, key, parent_keys):
137
"""Add a new node to the graph.
139
If this fills in a ghost, then the gdfos of all children will be
142
:param key: The node being added. If this is a duplicate, this is a
144
:param parent_keys: The parents of the given node.
145
:return: None (should we return if this was a ghost, etc?)
160
heappush(todo, (1, node))
163
gdfo, next = heappop(todo)
165
if next.gdfo is not None and gdfo < next.gdfo:
166
# This node was reached from a longer path, we assume it was
167
# enqued correctly with the longer gdfo, so don't continue
171
for child_key in next.child_keys:
172
child_node = nodes[child_key]
173
if child_node.gdfo is None or child_node.gdfo < next_gdfo:
174
# Only enque children when all of their parents have been
176
for parent_key in child_node.parent_keys:
177
# We know that 'this' parent is counted
178
if parent_key != next.key:
179
parent_node = nodes[parent_key]
180
if parent_node.gdfo is None:
183
child_node.gdfo = next_gdfo
184
heappush(todo, (next_gdfo, child_node))
186
def _get_dominators_to_nodes(self, candidate_nodes):
187
"""Get the reverse mapping from dominator_key => candidate_nodes.
189
As a side effect, this can also remove potential candidate nodes if we
190
determine that they share a dominator.
150
if node.parent_keys is None:
151
node.parent_keys = parent_keys
152
# A ghost is being added, we can no-longer trust the heads
154
self._known_heads.clear()
194
for node in candidate_nodes.values():
195
if node.linear_dominator in dom_to_node:
196
# This node already exists, resolve which node supersedes the
198
other_node = dom_to_node[node.linear_dominator]
199
# There should be no way that nodes sharing a dominator could
201
if other_node.gdfo > node.gdfo:
202
# The other node has this node as an ancestor
203
keys_to_remove.append(node.key)
205
# Replace the other node, and set this as the new key
206
keys_to_remove.append(other_node.key)
207
dom_to_node[node.linear_dominator] = node
156
# Make sure we compare a list to a list, as tuple != list.
157
parent_keys = list(parent_keys)
158
existing_parent_keys = list(node.parent_keys)
159
if parent_keys == existing_parent_keys:
160
return # Identical content
162
raise ValueError('Parent key mismatch, existing node %s'
163
' has parents of %s not %s'
164
% (key, existing_parent_keys, parent_keys))
166
node = _KnownGraphNode(key, parent_keys)
169
for parent_key in parent_keys:
171
parent_node = nodes[parent_key]
173
parent_node = _KnownGraphNode(parent_key, None)
174
# Ghosts and roots have gdfo 1
176
nodes[parent_key] = parent_node
177
if parent_gdfo < parent_node.gdfo:
178
parent_gdfo = parent_node.gdfo
179
parent_node.child_keys.append(key)
180
node.gdfo = parent_gdfo + 1
181
# Now fill the gdfo to all children
182
# Note that this loop is slightly inefficient, in that we may visit the
183
# same child (and its decendents) more than once, however, it is
184
# 'efficient' in that we only walk to nodes that would be updated,
185
# rather than all nodes
186
# We use a deque rather than a simple list stack, to go for BFD rather
187
# than DFD. So that if a longer path is possible, we walk it before we
188
# get to the final child
189
pending = deque([node])
191
node = pending.popleft()
192
next_gdfo = node.gdfo + 1
193
for child_key in node.child_keys:
194
child = nodes[child_key]
195
if child.gdfo < next_gdfo:
196
# This child is being updated, we need to check its
198
child.gdfo = next_gdfo
199
pending.append(child)
209
dom_to_node[node.linear_dominator] = node
210
for key in keys_to_remove:
211
candidate_nodes.pop(key)
201
214
def heads(self, keys):
202
215
"""Return the heads from amongst keys.
217
231
# NULL_REVISION is only a head if it is the only entry
218
232
candidate_nodes.pop(revision.NULL_REVISION)
219
233
if not candidate_nodes:
220
return frozenset([revision.NULL_REVISION])
234
return set([revision.NULL_REVISION])
221
235
if len(candidate_nodes) < 2:
222
# No or only one candidate
223
236
return frozenset(candidate_nodes)
224
237
heads_key = frozenset(candidate_nodes)
225
# Do we have a cached result ?
238
if heads_key != frozenset(keys):
239
note('%s != %s', heads_key, frozenset(keys))
227
241
heads = self._known_heads[heads_key]
231
# Let's compute the heads
235
for node in candidate_nodes.values():
237
pending.extend(node.parent_keys)
238
if min_gdfo is None or node.gdfo < min_gdfo:
242
node_key = pending.pop()
244
# node already appears in some ancestry
247
node = nodes[node_key]
248
if node.gdfo <= min_gdfo:
251
pending.extend(node.parent_keys)
252
heads = heads_key.difference(seen)
244
pass # compute it ourselves
245
dom_to_node = self._get_dominators_to_nodes(candidate_nodes)
246
if len(candidate_nodes) < 2:
247
# We shrunk candidate_nodes and determined a new head
248
return frozenset(candidate_nodes)
250
# Check the linear dominators of these keys, to see if we already
251
# know the heads answer
252
dom_heads_key = frozenset([node.linear_dominator
253
for node in candidate_nodes.itervalues()])
254
if dom_heads_key in self._known_heads:
255
# map back into the original keys
256
heads = self._known_heads[dom_heads_key]
257
heads = frozenset([dom_to_node[key].key for key in heads])
259
heads = self._heads_from_candidate_nodes(candidate_nodes, dom_to_node)
253
260
if self.do_cache:
254
261
self._known_heads[heads_key] = heads
262
# Cache the dominator heads
263
if dom_heads_key is not None:
264
dom_heads = frozenset([candidate_nodes[key].linear_dominator
266
self._known_heads[dom_heads_key] = dom_heads
258
"""Return the nodes in topological order.
260
All parents must occur before all children.
262
for node in self._nodes.itervalues():
263
if node.gdfo is None:
264
raise errors.GraphCycleError(self._nodes)
265
pending = self._find_tails()
266
pending_pop = pending.pop
267
pending_append = pending.append
270
topo_order_append = topo_order.append
272
num_seen_parents = dict.fromkeys(self._nodes, 0)
275
if node.parent_keys is not None:
276
# We don't include ghost parents
277
topo_order_append(node.key)
278
for child_key in node.child_keys:
279
child_node = self._nodes[child_key]
280
seen_parents = num_seen_parents[child_key] + 1
281
if seen_parents == len(child_node.parent_keys):
282
# All parents have been processed, enqueue this child
283
pending_append(child_node)
284
# This has been queued up, stop tracking it
285
del num_seen_parents[child_key]
287
num_seen_parents[child_key] = seen_parents
288
# We started from the parents, so we don't need to do anymore work
292
"""Return a reverse topological ordering which is 'stable'.
294
There are a few constraints:
295
1) Reverse topological (all children before all parents)
297
3) 'stable' sorting, so that we get the same result, independent of
298
machine, or extra data.
299
To do this, we use the same basic algorithm as topo_sort, but when we
300
aren't sure what node to access next, we sort them lexicographically.
302
tips = self._find_tips()
303
# Split the tips based on prefix
306
if node.key.__class__ is str or len(node.key) == 1:
269
def _heads_from_candidate_nodes(self, candidate_nodes, dom_to_node):
272
to_cleanup_append = to_cleanup.append
273
for node in candidate_nodes.itervalues():
274
node.ancestor_of = (node.key,)
275
queue.append((-node.gdfo, node))
276
to_cleanup_append(node)
278
# These are nodes that we determined are 'common' that we are no longer
280
# Now we walk nodes until all nodes that are being walked are 'common'
281
num_candidates = len(candidate_nodes)
283
heappop = heapq.heappop
284
heappush = heapq.heappush
285
while queue and len(candidate_nodes) > 1:
286
_, node = heappop(queue)
287
next_ancestor_of = node.ancestor_of
288
if len(next_ancestor_of) == num_candidates:
289
# This node is now considered 'common'
290
# Make sure all parent nodes are marked as such
291
for parent_key in node.parent_keys:
292
parent_node = nodes[parent_key]
293
if parent_node.ancestor_of is not None:
294
parent_node.ancestor_of = next_ancestor_of
295
if node.linear_dominator != node.key:
296
parent_node = nodes[node.linear_dominator]
297
if parent_node.ancestor_of is not None:
298
parent_node.ancestor_of = next_ancestor_of
300
if node.parent_keys is None:
303
# Now project the current nodes ancestor list to the parent nodes,
304
# and queue them up to be walked
305
# Note: using linear_dominator speeds things up quite a bit
306
# enough that we actually start to be slightly faster
307
# than the default heads() implementation
308
if node.linear_dominator != node.key:
309
# We are at the tip of a long linear region
310
# We know that there is nothing between here and the tail
311
# that is interesting, so skip to the end
312
parent_keys = [node.linear_dominator]
310
prefix_tips.setdefault(prefix, []).append(node)
312
num_seen_children = dict.fromkeys(self._nodes, 0)
315
for prefix in sorted(prefix_tips):
316
pending = sorted(prefix_tips[prefix], key=lambda n:n.key,
320
if node.parent_keys is None:
321
# Ghost node, skip it
323
result.append(node.key)
324
for parent_key in sorted(node.parent_keys, reverse=True):
325
parent_node = self._nodes[parent_key]
326
seen_children = num_seen_children[parent_key] + 1
327
if seen_children == len(parent_node.child_keys):
328
# All children have been processed, enqueue this parent
329
pending.append(parent_node)
330
# This has been queued up, stop tracking it
331
del num_seen_children[parent_key]
333
num_seen_children[parent_key] = seen_children
336
def merge_sort(self, tip_key):
337
"""Compute the merge sorted graph output."""
338
from bzrlib import tsort
339
as_parent_map = dict((node.key, node.parent_keys)
340
for node in self._nodes.itervalues()
341
if node.parent_keys is not None)
342
# We intentionally always generate revnos and never force the
344
# Strip the sequence_number that merge_sort generates
345
return [_MergeSortNode(key, merge_depth, revno, end_of_merge)
346
for _, key, merge_depth, revno, end_of_merge
347
in tsort.merge_sort(as_parent_map, tip_key,
348
mainline_revisions=None,
349
generate_revno=True)]
351
def get_parent_keys(self, key):
352
"""Get the parents for a key
354
Returns a list containg the parents keys. If the key is a ghost,
355
None is returned. A KeyError will be raised if the key is not in
358
:param keys: Key to check (eg revision_id)
359
:return: A list of parents
361
return self._nodes[key].parent_keys
363
def get_child_keys(self, key):
364
"""Get the children for a key
366
Returns a list containg the children keys. A KeyError will be raised
367
if the key is not in the graph.
369
:param keys: Key to check (eg revision_id)
370
:return: A list of children
372
return self._nodes[key].child_keys
314
parent_keys = node.parent_keys
315
for parent_key in parent_keys:
316
if parent_key in candidate_nodes:
317
candidate_nodes.pop(parent_key)
318
if len(candidate_nodes) <= 1:
320
elif parent_key in dom_to_node:
321
orig_node = dom_to_node[parent_key]
322
if orig_node is not node:
323
if orig_node.key in candidate_nodes:
324
candidate_nodes.pop(orig_node.key)
325
if len(candidate_nodes) <= 1:
327
parent_node = nodes[parent_key]
328
ancestor_of = parent_node.ancestor_of
329
if ancestor_of is None:
330
# This node hasn't been walked yet
331
parent_node.ancestor_of = next_ancestor_of
333
heappush(queue, (-parent_node.gdfo, parent_node))
334
to_cleanup_append(parent_node)
335
elif ancestor_of != next_ancestor_of:
336
# Combine to get the full set of parents
337
all_ancestors = set(ancestor_of)
338
all_ancestors.update(next_ancestor_of)
339
parent_node.ancestor_of = tuple(sorted(all_ancestors))
341
for node in to_cleanup:
342
node.ancestor_of = None
344
return frozenset(candidate_nodes)