1
# Copyright (C) 2009 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Implementation of Graph algorithms when we have already loaded everything.
20
from collections import deque
27
class _KnownGraphNode(object):
28
"""Represents a single object in the known graph."""
30
__slots__ = ('key', 'parent_keys', 'child_keys', 'gdfo')
32
def __init__(self, key, parent_keys):
34
self.parent_keys = parent_keys
36
# Greatest distance from origin
40
return '%s(%s gdfo:%s par:%s child:%s)' % (
41
self.__class__.__name__, self.key, self.gdfo,
42
self.parent_keys, self.child_keys)
45
class _MergeSortNode(object):
46
"""Information about a specific node in the merge graph."""
48
__slots__ = ('key', 'merge_depth', 'revno', 'end_of_merge')
50
def __init__(self, key, merge_depth, revno, end_of_merge):
52
self.merge_depth = merge_depth
54
self.end_of_merge = end_of_merge
57
class KnownGraph(object):
58
"""This is a class which assumes we already know the full graph."""
60
def __init__(self, parent_map, do_cache=True):
61
"""Create a new KnownGraph instance.
63
:param parent_map: A dictionary mapping key => parent_keys
66
# Maps {frozenset(revision_id, revision_id): heads}
67
self._known_heads = {}
68
self.do_cache = do_cache
69
self._initialize_nodes(parent_map)
72
def _initialize_nodes(self, parent_map):
73
"""Populate self._nodes.
75
After this has finished:
76
- self._nodes will have an entry for every entry in parent_map.
77
- ghosts will have a parent_keys = None,
78
- all nodes found will also have .child_keys populated with all known
82
for key, parent_keys in parent_map.iteritems():
85
node.parent_keys = parent_keys
87
node = _KnownGraphNode(key, parent_keys)
89
for parent_key in parent_keys:
91
parent_node = nodes[parent_key]
93
parent_node = _KnownGraphNode(parent_key, None)
94
nodes[parent_key] = parent_node
95
parent_node.child_keys.append(key)
97
def _find_tails(self):
98
return [node for node in self._nodes.itervalues()
99
if not node.parent_keys]
101
def _find_tips(self):
102
return [node for node in self._nodes.itervalues()
103
if not node.child_keys]
105
def _find_gdfo(self):
107
known_parent_gdfos = {}
110
for node in self._find_tails():
116
for child_key in node.child_keys:
117
child = nodes[child_key]
118
if child_key in known_parent_gdfos:
119
known_gdfo = known_parent_gdfos[child_key] + 1
124
if child.gdfo is None or node.gdfo + 1 > child.gdfo:
125
child.gdfo = node.gdfo + 1
126
if known_gdfo == len(child.parent_keys):
127
# We are the last parent updating that node, we can
128
# continue from there
129
pending.append(child)
131
del known_parent_gdfos[child_key]
133
# Update known_parent_gdfos for a key we couldn't process
134
known_parent_gdfos[child_key] = known_gdfo
136
def add_node(self, key, parent_keys):
137
"""Add a new node to the graph.
139
If this fills in a ghost, then the gdfos of all children will be
142
:param key: The node being added. If this is a duplicate, this is a
144
:param parent_keys: The parents of the given node.
145
:return: None (should we return if this was a ghost, etc?)
150
if node.parent_keys is None:
151
node.parent_keys = parent_keys
152
# A ghost is being added, we can no-longer trust the heads
154
self._known_heads.clear()
156
# Make sure we compare a list to a list, as tuple != list.
157
parent_keys = list(parent_keys)
158
existing_parent_keys = list(node.parent_keys)
159
if parent_keys == existing_parent_keys:
160
return # Identical content
162
raise ValueError('Parent key mismatch, existing node %s'
163
' has parents of %s not %s'
164
% (key, existing_parent_keys, parent_keys))
166
node = _KnownGraphNode(key, parent_keys)
169
for parent_key in parent_keys:
171
parent_node = nodes[parent_key]
173
parent_node = _KnownGraphNode(parent_key, None)
174
# Ghosts and roots have gdfo 1
176
nodes[parent_key] = parent_node
177
if parent_gdfo < parent_node.gdfo:
178
parent_gdfo = parent_node.gdfo
179
parent_node.child_keys.append(key)
180
node.gdfo = parent_gdfo + 1
181
# Now fill the gdfo to all children
182
# Note that this loop is slightly inefficient, in that we may visit the
183
# same child (and its decendents) more than once, however, it is
184
# 'efficient' in that we only walk to nodes that would be updated,
185
# rather than all nodes
186
# We use a deque rather than a simple list stack, to go for BFD rather
187
# than DFD. So that if a longer path is possible, we walk it before we
188
# get to the final child
189
pending = deque([node])
191
node = pending.popleft()
192
next_gdfo = node.gdfo + 1
193
for child_key in node.child_keys:
194
child = nodes[child_key]
195
if child.gdfo < next_gdfo:
196
# This child is being updated, we need to check its
198
child.gdfo = next_gdfo
199
pending.append(child)
201
def heads(self, keys):
202
"""Return the heads from amongst keys.
204
This is done by searching the ancestries of each key. Any key that is
205
reachable from another key is not returned; all the others are.
207
This operation scales with the relative depth between any two keys. It
208
uses gdfo to avoid walking all ancestry.
210
:param keys: An iterable of keys.
211
:return: A set of the heads. Note that as a set there is no ordering
212
information. Callers will need to filter their input to create
213
order if they need it.
215
candidate_nodes = dict((key, self._nodes[key]) for key in keys)
216
if revision.NULL_REVISION in candidate_nodes:
217
# NULL_REVISION is only a head if it is the only entry
218
candidate_nodes.pop(revision.NULL_REVISION)
219
if not candidate_nodes:
220
return frozenset([revision.NULL_REVISION])
221
if len(candidate_nodes) < 2:
222
# No or only one candidate
223
return frozenset(candidate_nodes)
224
heads_key = frozenset(candidate_nodes)
225
# Do we have a cached result ?
227
heads = self._known_heads[heads_key]
231
# Let's compute the heads
235
for node in candidate_nodes.values():
237
pending.extend(node.parent_keys)
238
if min_gdfo is None or node.gdfo < min_gdfo:
242
node_key = pending.pop()
244
# node already appears in some ancestry
247
node = nodes[node_key]
248
if node.gdfo <= min_gdfo:
251
pending.extend(node.parent_keys)
252
heads = heads_key.difference(seen)
254
self._known_heads[heads_key] = heads
258
"""Return the nodes in topological order.
260
All parents must occur before all children.
262
for node in self._nodes.itervalues():
263
if node.gdfo is None:
264
raise errors.GraphCycleError(self._nodes)
265
pending = self._find_tails()
266
pending_pop = pending.pop
267
pending_append = pending.append
270
topo_order_append = topo_order.append
272
num_seen_parents = dict.fromkeys(self._nodes, 0)
275
if node.parent_keys is not None:
276
# We don't include ghost parents
277
topo_order_append(node.key)
278
for child_key in node.child_keys:
279
child_node = self._nodes[child_key]
280
seen_parents = num_seen_parents[child_key] + 1
281
if seen_parents == len(child_node.parent_keys):
282
# All parents have been processed, enqueue this child
283
pending_append(child_node)
284
# This has been queued up, stop tracking it
285
del num_seen_parents[child_key]
287
num_seen_parents[child_key] = seen_parents
288
# We started from the parents, so we don't need to do anymore work
292
"""Return a reverse topological ordering which is 'stable'.
294
There are a few constraints:
295
1) Reverse topological (all children before all parents)
297
3) 'stable' sorting, so that we get the same result, independent of
298
machine, or extra data.
299
To do this, we use the same basic algorithm as topo_sort, but when we
300
aren't sure what node to access next, we sort them lexicographically.
302
tips = self._find_tips()
303
# Split the tips based on prefix
306
if node.key.__class__ is str or len(node.key) == 1:
310
prefix_tips.setdefault(prefix, []).append(node)
312
num_seen_children = dict.fromkeys(self._nodes, 0)
315
for prefix in sorted(prefix_tips):
316
pending = sorted(prefix_tips[prefix], key=lambda n:n.key,
320
if node.parent_keys is None:
321
# Ghost node, skip it
323
result.append(node.key)
324
for parent_key in sorted(node.parent_keys, reverse=True):
325
parent_node = self._nodes[parent_key]
326
seen_children = num_seen_children[parent_key] + 1
327
if seen_children == len(parent_node.child_keys):
328
# All children have been processed, enqueue this parent
329
pending.append(parent_node)
330
# This has been queued up, stop tracking it
331
del num_seen_children[parent_key]
333
num_seen_children[parent_key] = seen_children
336
def merge_sort(self, tip_key):
337
"""Compute the merge sorted graph output."""
338
from bzrlib import tsort
339
as_parent_map = dict((node.key, node.parent_keys)
340
for node in self._nodes.itervalues()
341
if node.parent_keys is not None)
342
# We intentionally always generate revnos and never force the
344
# Strip the sequence_number that merge_sort generates
345
return [_MergeSortNode(key, merge_depth, revno, end_of_merge)
346
for _, key, merge_depth, revno, end_of_merge
347
in tsort.merge_sort(as_parent_map, tip_key,
348
mainline_revisions=None,
349
generate_revno=True)]
351
def get_parent_keys(self, key):
352
"""Get the parents for a key
354
Returns a list containg the parents keys. If the key is a ghost,
355
None is returned. A KeyError will be raised if the key is not in
358
:param keys: Key to check (eg revision_id)
359
:return: A list of parents
361
return self._nodes[key].parent_keys
363
def get_child_keys(self, key):
364
"""Get the children for a key
366
Returns a list containg the children keys. A KeyError will be raised
367
if the key is not in the graph.
369
:param keys: Key to check (eg revision_id)
370
:return: A list of children
372
return self._nodes[key].child_keys