1570.1.7
by Robert Collins
Replace the slow topo_sort routine with a much faster one for non trivial datasets. |
1 |
# (C) 2005, 2006 Canonical Limited.
|
1185.16.113
by mbp at sourcefrog
Add topo_sort utility function |
2 |
#
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
16 |
||
1570.1.7
by Robert Collins
Replace the slow topo_sort routine with a much faster one for non trivial datasets. |
17 |
|
18 |
"""Topological sorting routines."""
|
|
19 |
||
20 |
||
21 |
import bzrlib.errors as errors |
|
22 |
||
1185.16.114
by mbp at sourcefrog
Improved topological sort |
23 |
|
24 |
def topo_sort(graph): |
|
1185.16.113
by mbp at sourcefrog
Add topo_sort utility function |
25 |
"""Topological sort a graph.
|
26 |
||
1185.16.114
by mbp at sourcefrog
Improved topological sort |
27 |
graph -- sequence of pairs of node->parents_list.
|
28 |
||
29 |
The result is a list of node names, such that all parents come before
|
|
30 |
their children.
|
|
31 |
||
32 |
Nodes at the same depth are returned in sorted order.
|
|
1185.16.113
by mbp at sourcefrog
Add topo_sort utility function |
33 |
|
34 |
node identifiers can be any hashable object, and are typically strings.
|
|
35 |
"""
|
|
1570.1.7
by Robert Collins
Replace the slow topo_sort routine with a much faster one for non trivial datasets. |
36 |
return TopoSorter(graph).sorted() |
37 |
||
38 |
||
39 |
class TopoSorter(object): |
|
40 |
||
41 |
def __init__(self, graph): |
|
42 |
"""Topological sorting of a graph.
|
|
43 |
|
|
44 |
:param graph: sequence of pairs of node_name->parent_names_list.
|
|
45 |
i.e. [('C', ['B']), ('B', ['A']), ('A', [])]
|
|
46 |
For this input the output from the sort or
|
|
47 |
iter_topo_order routines will be:
|
|
48 |
'A', 'B', 'C'
|
|
49 |
|
|
50 |
node identifiers can be any hashable object, and are typically strings.
|
|
51 |
||
1587.1.2
by Robert Collins
Review comments for reconcile. |
52 |
If you have a graph like [('a', ['b']), ('a', ['c'])] this will only use
|
53 |
one of the two values for 'a'.
|
|
54 |
||
1570.1.7
by Robert Collins
Replace the slow topo_sort routine with a much faster one for non trivial datasets. |
55 |
The graph is sorted lazily: until you iterate or sort the input is
|
56 |
not processed other than to create an internal representation.
|
|
57 |
||
1587.1.3
by Robert Collins
Typos for reconcile - docstring in tsort.py was out of sync with code. |
58 |
iteration or sorting may raise GraphCycleError if a cycle is present
|
1570.1.7
by Robert Collins
Replace the slow topo_sort routine with a much faster one for non trivial datasets. |
59 |
in the graph.
|
60 |
"""
|
|
61 |
# a dict of the graph.
|
|
62 |
self._graph = dict(graph) |
|
63 |
### if debugging:
|
|
64 |
# self._original_graph = dict(graph)
|
|
65 |
||
66 |
# this is a stack storing the depth first search into the graph.
|
|
67 |
self._node_name_stack = [] |
|
68 |
# at each level of 'recursion' we have to check each parent. This
|
|
69 |
# stack stores the parents we have not yet checked for the node at the
|
|
70 |
# matching depth in _node_name_stack
|
|
71 |
self._pending_parents_stack = [] |
|
72 |
# this is a set of the completed nodes for fast checking whether a
|
|
73 |
# parent in a node we are processing on the stack has already been
|
|
74 |
# emitted and thus can be skipped.
|
|
75 |
self._completed_node_names = set() |
|
76 |
||
77 |
def sorted(self): |
|
78 |
"""Sort the graph and return as a list.
|
|
79 |
|
|
80 |
After calling this the sorter is empty and you must create a new one.
|
|
81 |
"""
|
|
82 |
return list(self.iter_topo_order()) |
|
83 |
||
84 |
### Useful if fiddling with this code.
|
|
85 |
### # cross check
|
|
86 |
### sorted_names = list(self.iter_topo_order())
|
|
87 |
### for index in range(len(sorted_names)):
|
|
88 |
### rev = sorted_names[index]
|
|
89 |
### for left_index in range(index):
|
|
90 |
### if rev in self.original_graph[sorted_names[left_index]]:
|
|
91 |
### print "revision in parent list of earlier revision"
|
|
92 |
### import pdb;pdb.set_trace()
|
|
93 |
||
94 |
def iter_topo_order(self): |
|
1587.1.3
by Robert Collins
Typos for reconcile - docstring in tsort.py was out of sync with code. |
95 |
"""Yield the nodes of the graph in a topological order.
|
1570.1.7
by Robert Collins
Replace the slow topo_sort routine with a much faster one for non trivial datasets. |
96 |
|
97 |
After finishing iteration the sorter is empty and you cannot continue
|
|
98 |
iteration.
|
|
99 |
"""
|
|
100 |
while self._graph: |
|
101 |
# now pick a random node in the source graph, and transfer it to the
|
|
102 |
# top of the depth first search stack.
|
|
103 |
node_name, parents = self._graph.popitem() |
|
104 |
self._push_node(node_name, parents) |
|
105 |
while self._node_name_stack: |
|
106 |
# loop until this call completes.
|
|
107 |
parents_to_visit = self._pending_parents_stack[-1] |
|
108 |
# if all parents are done, the revision is done
|
|
109 |
if not parents_to_visit: |
|
110 |
# append the revision to the topo sorted list
|
|
111 |
# all the nodes parents have been added to the output, now
|
|
112 |
# we can add it to the output.
|
|
113 |
yield self._pop_node() |
|
114 |
else: |
|
115 |
while self._pending_parents_stack[-1]: |
|
116 |
# recurse depth first into a single parent
|
|
117 |
next_node_name = self._pending_parents_stack[-1].pop() |
|
118 |
if next_node_name in self._completed_node_names: |
|
119 |
# this parent was completed by a child on the
|
|
120 |
# call stack. skip it.
|
|
121 |
continue
|
|
122 |
# otherwise transfer it from the source graph into the
|
|
123 |
# top of the current depth first search stack.
|
|
124 |
try: |
|
125 |
parents = self._graph.pop(next_node_name) |
|
126 |
except KeyError: |
|
127 |
# if the next node is not in the source graph it has
|
|
128 |
# already been popped from it and placed into the
|
|
129 |
# current search stack (but not completed or we would
|
|
130 |
# have hit the continue 4 lines up.
|
|
131 |
# this indicates a cycle.
|
|
132 |
raise errors.GraphCycleError(self._node_name_stack) |
|
133 |
self._push_node(next_node_name, parents) |
|
134 |
# and do not continue processing parents until this 'call'
|
|
135 |
# has recursed.
|
|
136 |
break
|
|
137 |
||
138 |
def _push_node(self, node_name, parents): |
|
139 |
"""Add node_name to the pending node stack.
|
|
140 |
|
|
141 |
Names in this stack will get emitted into the output as they are popped
|
|
142 |
off the stack.
|
|
143 |
"""
|
|
144 |
||
145 |
self._node_name_stack.append(node_name) |
|
146 |
self._pending_parents_stack.append(list(parents)) |
|
147 |
||
148 |
def _pop_node(self): |
|
149 |
"""Pop the top node off the stack
|
|
150 |
||
151 |
The node is appended to the sorted output.
|
|
152 |
"""
|
|
153 |
# we are returning from the flattened call frame:
|
|
154 |
# pop off the local variables
|
|
155 |
node_name = self._node_name_stack.pop() |
|
156 |
self._pending_parents_stack.pop() |
|
157 |
||
158 |
self._completed_node_names.add(node_name) |
|
159 |
return node_name |