1
# Copyright (C) 2009 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Implementation of Graph algorithms when we have already loaded everything.
26
class _KnownGraphNode(object):
27
"""Represents a single object in the known graph."""
29
__slots__ = ('key', 'parent_keys', 'child_keys', 'gdfo')
31
def __init__(self, key, parent_keys):
33
self.parent_keys = parent_keys
35
# Greatest distance from origin
39
return '%s(%s gdfo:%s par:%s child:%s)' % (
40
self.__class__.__name__, self.key, self.gdfo,
41
self.parent_keys, self.child_keys)
44
class _MergeSortNode(object):
45
"""Information about a specific node in the merge graph."""
47
__slots__ = ('key', 'merge_depth', 'revno', 'end_of_merge')
49
def __init__(self, key, merge_depth, revno, end_of_merge):
51
self.merge_depth = merge_depth
53
self.end_of_merge = end_of_merge
56
class KnownGraph(object):
57
"""This is a class which assumes we already know the full graph."""
59
def __init__(self, parent_map, do_cache=True):
60
"""Create a new KnownGraph instance.
62
:param parent_map: A dictionary mapping key => parent_keys
65
# Maps {sorted(revision_id, revision_id): heads}
66
self._known_heads = {}
67
self.do_cache = do_cache
68
self._initialize_nodes(parent_map)
71
def _initialize_nodes(self, parent_map):
72
"""Populate self._nodes.
74
After this has finished:
75
- self._nodes will have an entry for every entry in parent_map.
76
- ghosts will have a parent_keys = None,
77
- all nodes found will also have .child_keys populated with all known
81
for key, parent_keys in parent_map.iteritems():
84
node.parent_keys = parent_keys
86
node = _KnownGraphNode(key, parent_keys)
88
for parent_key in parent_keys:
90
parent_node = nodes[parent_key]
92
parent_node = _KnownGraphNode(parent_key, None)
93
nodes[parent_key] = parent_node
94
parent_node.child_keys.append(key)
96
def _find_tails(self):
97
return [node for node in self._nodes.itervalues()
98
if not node.parent_keys]
100
def _find_tips(self):
101
return [node for node in self._nodes.itervalues()
102
if not node.child_keys]
104
def _find_gdfo(self):
106
known_parent_gdfos = {}
109
for node in self._find_tails():
115
for child_key in node.child_keys:
116
child = nodes[child_key]
117
if child_key in known_parent_gdfos:
118
known_gdfo = known_parent_gdfos[child_key] + 1
123
if child.gdfo is None or node.gdfo + 1 > child.gdfo:
124
child.gdfo = node.gdfo + 1
125
if known_gdfo == len(child.parent_keys):
126
# We are the last parent updating that node, we can
127
# continue from there
128
pending.append(child)
130
del known_parent_gdfos[child_key]
132
# Update known_parent_gdfos for a key we couldn't process
133
known_parent_gdfos[child_key] = known_gdfo
135
def heads(self, keys):
136
"""Return the heads from amongst keys.
138
This is done by searching the ancestries of each key. Any key that is
139
reachable from another key is not returned; all the others are.
141
This operation scales with the relative depth between any two keys. It
142
uses gdfo to avoid walking all ancestry.
144
:param keys: An iterable of keys.
145
:return: A set of the heads. Note that as a set there is no ordering
146
information. Callers will need to filter their input to create
147
order if they need it.
149
candidate_nodes = dict((key, self._nodes[key]) for key in keys)
150
if revision.NULL_REVISION in candidate_nodes:
151
# NULL_REVISION is only a head if it is the only entry
152
candidate_nodes.pop(revision.NULL_REVISION)
153
if not candidate_nodes:
154
return frozenset([revision.NULL_REVISION])
155
if len(candidate_nodes) < 2:
156
# No or only one candidate
157
return frozenset(candidate_nodes)
158
heads_key = frozenset(candidate_nodes)
159
# Do we have a cached result ?
161
heads = self._known_heads[heads_key]
165
# Let's compute the heads
169
for node in candidate_nodes.values():
171
pending.extend(node.parent_keys)
172
if min_gdfo is None or node.gdfo < min_gdfo:
176
node_key = pending.pop()
178
# node already appears in some ancestry
181
node = nodes[node_key]
182
if node.gdfo <= min_gdfo:
185
pending.extend(node.parent_keys)
186
heads = heads_key.difference(seen)
188
self._known_heads[heads_key] = heads
192
"""Return the nodes in topological order.
194
All parents must occur before all children.
196
for node in self._nodes.itervalues():
197
if node.gdfo is None:
198
raise errors.GraphCycleError(self._nodes)
199
pending = self._find_tails()
200
pending_pop = pending.pop
201
pending_append = pending.append
204
topo_order_append = topo_order.append
206
num_seen_parents = dict.fromkeys(self._nodes, 0)
209
if node.parent_keys is not None:
210
# We don't include ghost parents
211
topo_order_append(node.key)
212
for child_key in node.child_keys:
213
child_node = self._nodes[child_key]
214
seen_parents = num_seen_parents[child_key] + 1
215
if seen_parents == len(child_node.parent_keys):
216
# All parents have been processed, enqueue this child
217
pending_append(child_node)
218
# This has been queued up, stop tracking it
219
del num_seen_parents[child_key]
221
num_seen_parents[child_key] = seen_parents
222
# We started from the parents, so we don't need to do anymore work
226
"""Return a reverse topological ordering which is 'stable'.
228
There are a few constraints:
229
1) Reverse topological (all children before all parents)
231
3) 'stable' sorting, so that we get the same result, independent of
232
machine, or extra data.
233
To do this, we use the same basic algorithm as topo_sort, but when we
234
aren't sure what node to access next, we sort them lexicographically.
236
tips = self._find_tips()
237
# Split the tips based on prefix
240
if node.key.__class__ is str or len(node.key) == 1:
244
prefix_tips.setdefault(prefix, []).append(node)
246
num_seen_children = dict.fromkeys(self._nodes, 0)
249
for prefix in sorted(prefix_tips):
250
pending = sorted(prefix_tips[prefix], key=lambda n:n.key,
254
if node.parent_keys is None:
255
# Ghost node, skip it
257
result.append(node.key)
258
for parent_key in sorted(node.parent_keys, reverse=True):
259
parent_node = self._nodes[parent_key]
260
seen_children = num_seen_children[parent_key] + 1
261
if seen_children == len(parent_node.child_keys):
262
# All children have been processed, enqueue this parent
263
pending.append(parent_node)
264
# This has been queued up, stop tracking it
265
del num_seen_children[parent_key]
267
num_seen_children[parent_key] = seen_children
270
def merge_sort(self, tip_key):
271
"""Compute the merge sorted graph output."""
272
from bzrlib import tsort
273
as_parent_map = dict((node.key, node.parent_keys)
274
for node in self._nodes.itervalues()
275
if node.parent_keys is not None)
276
# We intentionally always generate revnos and never force the
278
# Strip the sequence_number that merge_sort generates
279
return [_MergeSortNode(key, merge_depth, revno, end_of_merge)
280
for _, key, merge_depth, revno, end_of_merge
281
in tsort.merge_sort(as_parent_map, tip_key,
282
mainline_revisions=None,
283
generate_revno=True)]