1
# Copyright (C) 2009, 2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Implementation of Graph algorithms when we have already loaded everything.
20
from __future__ import absolute_import
22
from collections import deque
29
class _KnownGraphNode(object):
30
"""Represents a single object in the known graph."""
32
__slots__ = ('key', 'parent_keys', 'child_keys', 'gdfo')
34
def __init__(self, key, parent_keys):
36
self.parent_keys = parent_keys
38
# Greatest distance from origin
42
return '%s(%s gdfo:%s par:%s child:%s)' % (
43
self.__class__.__name__, self.key, self.gdfo,
44
self.parent_keys, self.child_keys)
47
class _MergeSortNode(object):
48
"""Information about a specific node in the merge graph."""
50
__slots__ = ('key', 'merge_depth', 'revno', 'end_of_merge')
52
def __init__(self, key, merge_depth, revno, end_of_merge):
54
self.merge_depth = merge_depth
56
self.end_of_merge = end_of_merge
59
class KnownGraph(object):
60
"""This is a class which assumes we already know the full graph."""
62
def __init__(self, parent_map, do_cache=True):
63
"""Create a new KnownGraph instance.
65
:param parent_map: A dictionary mapping key => parent_keys
68
# Maps {frozenset(revision_id, revision_id): heads}
69
self._known_heads = {}
70
self.do_cache = do_cache
71
self._initialize_nodes(parent_map)
74
def _initialize_nodes(self, parent_map):
75
"""Populate self._nodes.
77
After this has finished:
78
- self._nodes will have an entry for every entry in parent_map.
79
- ghosts will have a parent_keys = None,
80
- all nodes found will also have .child_keys populated with all known
84
for key, parent_keys in parent_map.iteritems():
87
node.parent_keys = parent_keys
89
node = _KnownGraphNode(key, parent_keys)
91
for parent_key in parent_keys:
93
parent_node = nodes[parent_key]
95
parent_node = _KnownGraphNode(parent_key, None)
96
nodes[parent_key] = parent_node
97
parent_node.child_keys.append(key)
99
def _find_tails(self):
100
return [node for node in self._nodes.itervalues()
101
if not node.parent_keys]
103
def _find_tips(self):
104
return [node for node in self._nodes.itervalues()
105
if not node.child_keys]
107
def _find_gdfo(self):
109
known_parent_gdfos = {}
112
for node in self._find_tails():
118
for child_key in node.child_keys:
119
child = nodes[child_key]
120
if child_key in known_parent_gdfos:
121
known_gdfo = known_parent_gdfos[child_key] + 1
126
if child.gdfo is None or node.gdfo + 1 > child.gdfo:
127
child.gdfo = node.gdfo + 1
128
if known_gdfo == len(child.parent_keys):
129
# We are the last parent updating that node, we can
130
# continue from there
131
pending.append(child)
133
del known_parent_gdfos[child_key]
135
# Update known_parent_gdfos for a key we couldn't process
136
known_parent_gdfos[child_key] = known_gdfo
138
def add_node(self, key, parent_keys):
139
"""Add a new node to the graph.
141
If this fills in a ghost, then the gdfos of all children will be
144
:param key: The node being added. If this is a duplicate, this is a
146
:param parent_keys: The parents of the given node.
147
:return: None (should we return if this was a ghost, etc?)
152
if node.parent_keys is None:
153
node.parent_keys = parent_keys
154
# A ghost is being added, we can no-longer trust the heads
156
self._known_heads.clear()
158
# Make sure we compare a list to a list, as tuple != list.
159
parent_keys = list(parent_keys)
160
existing_parent_keys = list(node.parent_keys)
161
if parent_keys == existing_parent_keys:
162
return # Identical content
164
raise ValueError('Parent key mismatch, existing node %s'
165
' has parents of %s not %s'
166
% (key, existing_parent_keys, parent_keys))
168
node = _KnownGraphNode(key, parent_keys)
171
for parent_key in parent_keys:
173
parent_node = nodes[parent_key]
175
parent_node = _KnownGraphNode(parent_key, None)
176
# Ghosts and roots have gdfo 1
178
nodes[parent_key] = parent_node
179
if parent_gdfo < parent_node.gdfo:
180
parent_gdfo = parent_node.gdfo
181
parent_node.child_keys.append(key)
182
node.gdfo = parent_gdfo + 1
183
# Now fill the gdfo to all children
184
# Note that this loop is slightly inefficient, in that we may visit the
185
# same child (and its decendents) more than once, however, it is
186
# 'efficient' in that we only walk to nodes that would be updated,
187
# rather than all nodes
188
# We use a deque rather than a simple list stack, to go for BFD rather
189
# than DFD. So that if a longer path is possible, we walk it before we
190
# get to the final child
191
pending = deque([node])
193
node = pending.popleft()
194
next_gdfo = node.gdfo + 1
195
for child_key in node.child_keys:
196
child = nodes[child_key]
197
if child.gdfo < next_gdfo:
198
# This child is being updated, we need to check its
200
child.gdfo = next_gdfo
201
pending.append(child)
203
def heads(self, keys):
204
"""Return the heads from amongst keys.
206
This is done by searching the ancestries of each key. Any key that is
207
reachable from another key is not returned; all the others are.
209
This operation scales with the relative depth between any two keys. It
210
uses gdfo to avoid walking all ancestry.
212
:param keys: An iterable of keys.
213
:return: A set of the heads. Note that as a set there is no ordering
214
information. Callers will need to filter their input to create
215
order if they need it.
217
candidate_nodes = dict((key, self._nodes[key]) for key in keys)
218
if revision.NULL_REVISION in candidate_nodes:
219
# NULL_REVISION is only a head if it is the only entry
220
candidate_nodes.pop(revision.NULL_REVISION)
221
if not candidate_nodes:
222
return frozenset([revision.NULL_REVISION])
223
if len(candidate_nodes) < 2:
224
# No or only one candidate
225
return frozenset(candidate_nodes)
226
heads_key = frozenset(candidate_nodes)
227
# Do we have a cached result ?
229
heads = self._known_heads[heads_key]
233
# Let's compute the heads
237
for node in candidate_nodes.values():
239
pending.extend(node.parent_keys)
240
if min_gdfo is None or node.gdfo < min_gdfo:
244
node_key = pending.pop()
246
# node already appears in some ancestry
249
node = nodes[node_key]
250
if node.gdfo <= min_gdfo:
253
pending.extend(node.parent_keys)
254
heads = heads_key.difference(seen)
256
self._known_heads[heads_key] = heads
260
"""Return the nodes in topological order.
262
All parents must occur before all children.
264
for node in self._nodes.itervalues():
265
if node.gdfo is None:
266
raise errors.GraphCycleError(self._nodes)
267
pending = self._find_tails()
268
pending_pop = pending.pop
269
pending_append = pending.append
272
topo_order_append = topo_order.append
274
num_seen_parents = dict.fromkeys(self._nodes, 0)
277
if node.parent_keys is not None:
278
# We don't include ghost parents
279
topo_order_append(node.key)
280
for child_key in node.child_keys:
281
child_node = self._nodes[child_key]
282
seen_parents = num_seen_parents[child_key] + 1
283
if seen_parents == len(child_node.parent_keys):
284
# All parents have been processed, enqueue this child
285
pending_append(child_node)
286
# This has been queued up, stop tracking it
287
del num_seen_parents[child_key]
289
num_seen_parents[child_key] = seen_parents
290
# We started from the parents, so we don't need to do anymore work
294
"""Return a reverse topological ordering which is 'stable'.
296
There are a few constraints:
297
1) Reverse topological (all children before all parents)
299
3) 'stable' sorting, so that we get the same result, independent of
300
machine, or extra data.
301
To do this, we use the same basic algorithm as topo_sort, but when we
302
aren't sure what node to access next, we sort them lexicographically.
304
tips = self._find_tips()
305
# Split the tips based on prefix
308
if node.key.__class__ is str or len(node.key) == 1:
312
prefix_tips.setdefault(prefix, []).append(node)
314
num_seen_children = dict.fromkeys(self._nodes, 0)
317
for prefix in sorted(prefix_tips):
318
pending = sorted(prefix_tips[prefix], key=lambda n:n.key,
322
if node.parent_keys is None:
323
# Ghost node, skip it
325
result.append(node.key)
326
for parent_key in sorted(node.parent_keys, reverse=True):
327
parent_node = self._nodes[parent_key]
328
seen_children = num_seen_children[parent_key] + 1
329
if seen_children == len(parent_node.child_keys):
330
# All children have been processed, enqueue this parent
331
pending.append(parent_node)
332
# This has been queued up, stop tracking it
333
del num_seen_children[parent_key]
335
num_seen_children[parent_key] = seen_children
338
def merge_sort(self, tip_key):
339
"""Compute the merge sorted graph output."""
340
from bzrlib import tsort
341
as_parent_map = dict((node.key, node.parent_keys)
342
for node in self._nodes.itervalues()
343
if node.parent_keys is not None)
344
# We intentionally always generate revnos and never force the
346
# Strip the sequence_number that merge_sort generates
347
return [_MergeSortNode(key, merge_depth, revno, end_of_merge)
348
for _, key, merge_depth, revno, end_of_merge
349
in tsort.merge_sort(as_parent_map, tip_key,
350
mainline_revisions=None,
351
generate_revno=True)]
353
def get_parent_keys(self, key):
354
"""Get the parents for a key
356
Returns a list containg the parents keys. If the key is a ghost,
357
None is returned. A KeyError will be raised if the key is not in
360
:param keys: Key to check (eg revision_id)
361
:return: A list of parents
363
return self._nodes[key].parent_keys
365
def get_child_keys(self, key):
366
"""Get the children for a key
368
Returns a list containg the children keys. A KeyError will be raised
369
if the key is not in the graph.
371
:param keys: Key to check (eg revision_id)
372
:return: A list of children
374
return self._nodes[key].child_keys