2052.3.1
by John Arbash Meinel
Add tests to cleanup the copyright of all source files |
1 |
# Copyright (C) 2005, 2006 Canonical Ltd
|
1185.16.113
by mbp at sourcefrog
Add topo_sort utility function |
2 |
#
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
16 |
||
1570.1.7
by Robert Collins
Replace the slow topo_sort routine with a much faster one for non trivial datasets. |
17 |
|
18 |
"""Topological sorting routines."""
|
|
19 |
||
20 |
||
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
21 |
from bzrlib import errors |
1570.1.7
by Robert Collins
Replace the slow topo_sort routine with a much faster one for non trivial datasets. |
22 |
|
1185.16.114
by mbp at sourcefrog
Improved topological sort |
23 |
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
24 |
__all__ = ["topo_sort", "TopoSorter", "merge_sort", "MergeSorter"] |
25 |
||
26 |
||
1185.16.114
by mbp at sourcefrog
Improved topological sort |
27 |
def topo_sort(graph): |
1185.16.113
by mbp at sourcefrog
Add topo_sort utility function |
28 |
"""Topological sort a graph.
|
29 |
||
1185.16.114
by mbp at sourcefrog
Improved topological sort |
30 |
graph -- sequence of pairs of node->parents_list.
|
31 |
||
32 |
The result is a list of node names, such that all parents come before
|
|
33 |
their children.
|
|
34 |
||
1185.16.113
by mbp at sourcefrog
Add topo_sort utility function |
35 |
node identifiers can be any hashable object, and are typically strings.
|
36 |
"""
|
|
1570.1.7
by Robert Collins
Replace the slow topo_sort routine with a much faster one for non trivial datasets. |
37 |
return TopoSorter(graph).sorted() |
38 |
||
39 |
||
40 |
class TopoSorter(object): |
|
41 |
||
42 |
def __init__(self, graph): |
|
43 |
"""Topological sorting of a graph.
|
|
44 |
|
|
45 |
:param graph: sequence of pairs of node_name->parent_names_list.
|
|
46 |
i.e. [('C', ['B']), ('B', ['A']), ('A', [])]
|
|
47 |
For this input the output from the sort or
|
|
48 |
iter_topo_order routines will be:
|
|
49 |
'A', 'B', 'C'
|
|
50 |
|
|
51 |
node identifiers can be any hashable object, and are typically strings.
|
|
52 |
||
1587.1.2
by Robert Collins
Review comments for reconcile. |
53 |
If you have a graph like [('a', ['b']), ('a', ['c'])] this will only use
|
54 |
one of the two values for 'a'.
|
|
55 |
||
1570.1.7
by Robert Collins
Replace the slow topo_sort routine with a much faster one for non trivial datasets. |
56 |
The graph is sorted lazily: until you iterate or sort the input is
|
57 |
not processed other than to create an internal representation.
|
|
58 |
||
1587.1.3
by Robert Collins
Typos for reconcile - docstring in tsort.py was out of sync with code. |
59 |
iteration or sorting may raise GraphCycleError if a cycle is present
|
1570.1.7
by Robert Collins
Replace the slow topo_sort routine with a much faster one for non trivial datasets. |
60 |
in the graph.
|
61 |
"""
|
|
62 |
# a dict of the graph.
|
|
63 |
self._graph = dict(graph) |
|
2490.2.31
by Aaron Bentley
Fix iter_topo_order to permit un-included parents |
64 |
self._visitable = set(self._graph) |
1570.1.7
by Robert Collins
Replace the slow topo_sort routine with a much faster one for non trivial datasets. |
65 |
### if debugging:
|
66 |
# self._original_graph = dict(graph)
|
|
67 |
||
68 |
# this is a stack storing the depth first search into the graph.
|
|
69 |
self._node_name_stack = [] |
|
70 |
# at each level of 'recursion' we have to check each parent. This
|
|
71 |
# stack stores the parents we have not yet checked for the node at the
|
|
72 |
# matching depth in _node_name_stack
|
|
73 |
self._pending_parents_stack = [] |
|
74 |
# this is a set of the completed nodes for fast checking whether a
|
|
75 |
# parent in a node we are processing on the stack has already been
|
|
76 |
# emitted and thus can be skipped.
|
|
77 |
self._completed_node_names = set() |
|
78 |
||
79 |
def sorted(self): |
|
80 |
"""Sort the graph and return as a list.
|
|
81 |
|
|
82 |
After calling this the sorter is empty and you must create a new one.
|
|
83 |
"""
|
|
84 |
return list(self.iter_topo_order()) |
|
85 |
||
86 |
### Useful if fiddling with this code.
|
|
87 |
### # cross check
|
|
88 |
### sorted_names = list(self.iter_topo_order())
|
|
89 |
### for index in range(len(sorted_names)):
|
|
90 |
### rev = sorted_names[index]
|
|
91 |
### for left_index in range(index):
|
|
92 |
### if rev in self.original_graph[sorted_names[left_index]]:
|
|
93 |
### print "revision in parent list of earlier revision"
|
|
94 |
### import pdb;pdb.set_trace()
|
|
95 |
||
96 |
def iter_topo_order(self): |
|
1587.1.3
by Robert Collins
Typos for reconcile - docstring in tsort.py was out of sync with code. |
97 |
"""Yield the nodes of the graph in a topological order.
|
1570.1.7
by Robert Collins
Replace the slow topo_sort routine with a much faster one for non trivial datasets. |
98 |
|
99 |
After finishing iteration the sorter is empty and you cannot continue
|
|
100 |
iteration.
|
|
101 |
"""
|
|
102 |
while self._graph: |
|
103 |
# now pick a random node in the source graph, and transfer it to the
|
|
104 |
# top of the depth first search stack.
|
|
105 |
node_name, parents = self._graph.popitem() |
|
106 |
self._push_node(node_name, parents) |
|
107 |
while self._node_name_stack: |
|
108 |
# loop until this call completes.
|
|
109 |
parents_to_visit = self._pending_parents_stack[-1] |
|
110 |
# if all parents are done, the revision is done
|
|
111 |
if not parents_to_visit: |
|
112 |
# append the revision to the topo sorted list
|
|
113 |
# all the nodes parents have been added to the output, now
|
|
114 |
# we can add it to the output.
|
|
115 |
yield self._pop_node() |
|
116 |
else: |
|
117 |
while self._pending_parents_stack[-1]: |
|
118 |
# recurse depth first into a single parent
|
|
119 |
next_node_name = self._pending_parents_stack[-1].pop() |
|
120 |
if next_node_name in self._completed_node_names: |
|
121 |
# this parent was completed by a child on the
|
|
122 |
# call stack. skip it.
|
|
123 |
continue
|
|
2490.2.31
by Aaron Bentley
Fix iter_topo_order to permit un-included parents |
124 |
if next_node_name not in self._visitable: |
125 |
continue
|
|
1570.1.7
by Robert Collins
Replace the slow topo_sort routine with a much faster one for non trivial datasets. |
126 |
# otherwise transfer it from the source graph into the
|
127 |
# top of the current depth first search stack.
|
|
128 |
try: |
|
129 |
parents = self._graph.pop(next_node_name) |
|
130 |
except KeyError: |
|
131 |
# if the next node is not in the source graph it has
|
|
132 |
# already been popped from it and placed into the
|
|
133 |
# current search stack (but not completed or we would
|
|
134 |
# have hit the continue 4 lines up.
|
|
135 |
# this indicates a cycle.
|
|
136 |
raise errors.GraphCycleError(self._node_name_stack) |
|
137 |
self._push_node(next_node_name, parents) |
|
138 |
# and do not continue processing parents until this 'call'
|
|
139 |
# has recursed.
|
|
140 |
break
|
|
141 |
||
142 |
def _push_node(self, node_name, parents): |
|
143 |
"""Add node_name to the pending node stack.
|
|
144 |
|
|
145 |
Names in this stack will get emitted into the output as they are popped
|
|
146 |
off the stack.
|
|
147 |
"""
|
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
148 |
self._node_name_stack.append(node_name) |
149 |
self._pending_parents_stack.append(list(parents)) |
|
150 |
||
151 |
def _pop_node(self): |
|
152 |
"""Pop the top node off the stack
|
|
153 |
||
154 |
The node is appended to the sorted output.
|
|
155 |
"""
|
|
156 |
# we are returning from the flattened call frame:
|
|
157 |
# pop off the local variables
|
|
158 |
node_name = self._node_name_stack.pop() |
|
159 |
self._pending_parents_stack.pop() |
|
160 |
||
161 |
self._completed_node_names.add(node_name) |
|
162 |
return node_name |
|
163 |
||
164 |
||
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
165 |
def merge_sort(graph, branch_tip, mainline_revisions=None, generate_revno=False): |
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
166 |
"""Topological sort a graph which groups merges.
|
167 |
||
168 |
:param graph: sequence of pairs of node->parents_list.
|
|
169 |
:param branch_tip: the tip of the branch to graph. Revisions not
|
|
170 |
reachable from branch_tip are not included in the
|
|
171 |
output.
|
|
1624.1.3
by Robert Collins
Convert log to use the new tsort.merge_sort routine. |
172 |
:param mainline_revisions: If not None this forces a mainline to be
|
173 |
used rather than synthesised from the graph.
|
|
174 |
This must be a valid path through some part
|
|
175 |
of the graph. If the mainline does not cover all
|
|
176 |
the revisions, output stops at the start of the
|
|
177 |
old revision listed in the mainline revisions
|
|
178 |
list.
|
|
179 |
The order for this parameter is oldest-first.
|
|
1988.4.4
by Robert Collins
Tidy up the patch. |
180 |
:param generate_revno: Optional parameter controlling the generation of
|
181 |
revision number sequences in the output. See the output description of
|
|
182 |
the MergeSorter docstring for details.
|
|
183 |
:result: See the MergeSorter docstring for details.
|
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
184 |
node identifiers can be any hashable object, and are typically strings.
|
185 |
"""
|
|
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
186 |
return MergeSorter(graph, branch_tip, mainline_revisions, |
187 |
generate_revno).sorted() |
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
188 |
|
189 |
||
190 |
class MergeSorter(object): |
|
191 |
||
2425.4.1
by John Arbash Meinel
Use __slots__ for MergeSorter |
192 |
__slots__ = ['_node_name_stack', |
193 |
'_node_merge_depth_stack', |
|
194 |
'_pending_parents_stack', |
|
195 |
'_assigned_sequence_stack', |
|
196 |
'_left_subtree_pushed_stack', |
|
197 |
'_generate_revno', |
|
198 |
'_graph', |
|
199 |
'_mainline_revisions', |
|
200 |
'_stop_revision', |
|
201 |
'_original_graph', |
|
202 |
'_revnos', |
|
203 |
'_root_sequence', |
|
204 |
'_completed_node_names', |
|
205 |
'_scheduled_nodes', |
|
206 |
]
|
|
207 |
||
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
208 |
def __init__(self, graph, branch_tip, mainline_revisions=None, |
209 |
generate_revno=False): |
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
210 |
"""Merge-aware topological sorting of a graph.
|
211 |
|
|
212 |
:param graph: sequence of pairs of node_name->parent_names_list.
|
|
213 |
i.e. [('C', ['B']), ('B', ['A']), ('A', [])]
|
|
214 |
For this input the output from the sort or
|
|
215 |
iter_topo_order routines will be:
|
|
216 |
'A', 'B', 'C'
|
|
1624.1.3
by Robert Collins
Convert log to use the new tsort.merge_sort routine. |
217 |
:param branch_tip: the tip of the branch to graph. Revisions not
|
218 |
reachable from branch_tip are not included in the
|
|
219 |
output.
|
|
220 |
:param mainline_revisions: If not None this forces a mainline to be
|
|
221 |
used rather than synthesised from the graph.
|
|
222 |
This must be a valid path through some part
|
|
223 |
of the graph. If the mainline does not cover all
|
|
224 |
the revisions, output stops at the start of the
|
|
225 |
old revision listed in the mainline revisions
|
|
226 |
list.
|
|
227 |
The order for this parameter is oldest-first.
|
|
1988.4.4
by Robert Collins
Tidy up the patch. |
228 |
:param generate_revno: Optional parameter controlling the generation of
|
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
229 |
revision number sequences in the output. See the output description
|
230 |
for more details.
|
|
231 |
||
1988.4.4
by Robert Collins
Tidy up the patch. |
232 |
The result is a list sorted so that all parents come before
|
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
233 |
their children. Each element of the list is a tuple containing:
|
234 |
(sequence_number, node_name, merge_depth, end_of_merge)
|
|
235 |
* sequence_number: The sequence of this row in the output. Useful for
|
|
236 |
GUIs.
|
|
1988.4.4
by Robert Collins
Tidy up the patch. |
237 |
* node_name: The node name: opaque text to the merge routine.
|
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
238 |
* merge_depth: How many levels of merging deep this node has been
|
239 |
found.
|
|
240 |
* revno_sequence: When requested this field provides a sequence of
|
|
241 |
revision numbers for all revisions. The format is:
|
|
242 |
REVNO[[.BRANCHREVNO.REVNO] ...]. BRANCHREVNO is the number of the
|
|
243 |
branch that the revno is on. From left to right the REVNO numbers
|
|
244 |
are the sequence numbers within that branch of the revision.
|
|
245 |
For instance, the graph {A:[], B:['A'], C:['A', 'B']} will get
|
|
246 |
the following revno_sequences assigned: A:(1,), B:(1,1,1), C:(2,).
|
|
247 |
This should be read as 'A is the first commit in the trunk',
|
|
248 |
'B is the first commit on the first branch made from A', 'C is the
|
|
249 |
second commit in the trunk'.
|
|
250 |
* end_of_merge: When True the next node is part of a different merge.
|
|
1624.1.3
by Robert Collins
Convert log to use the new tsort.merge_sort routine. |
251 |
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
252 |
|
253 |
node identifiers can be any hashable object, and are typically strings.
|
|
254 |
||
255 |
If you have a graph like [('a', ['b']), ('a', ['c'])] this will only use
|
|
256 |
one of the two values for 'a'.
|
|
257 |
||
258 |
The graph is sorted lazily: until you iterate or sort the input is
|
|
259 |
not processed other than to create an internal representation.
|
|
260 |
||
261 |
iteration or sorting may raise GraphCycleError if a cycle is present
|
|
262 |
in the graph.
|
|
263 |
||
264 |
Background information on the design:
|
|
265 |
-------------------------------------
|
|
266 |
definition: the end of any cluster or 'merge' occurs when:
|
|
267 |
1 - the next revision has a lower merge depth than we do.
|
|
268 |
i.e.
|
|
269 |
A 0
|
|
270 |
B 1
|
|
271 |
C 2
|
|
272 |
D 1
|
|
273 |
E 0
|
|
274 |
C, D are the ends of clusters, E might be but we need more data.
|
|
275 |
2 - or the next revision at our merge depth is not our left most
|
|
276 |
ancestor.
|
|
277 |
This is required to handle multiple-merges in one commit.
|
|
278 |
i.e.
|
|
279 |
A 0 [F, B, E]
|
|
280 |
B 1 [D, C]
|
|
281 |
C 2 [D]
|
|
282 |
D 1 [F]
|
|
283 |
E 1 [F]
|
|
284 |
F 0
|
|
285 |
C is the end of a cluster due to rule 1.
|
|
286 |
D is not the end of a cluster from rule 1, but is from rule 2: E
|
|
287 |
is not its left most ancestor
|
|
288 |
E is the end of a cluster due to rule 1
|
|
289 |
F might be but we need more data.
|
|
290 |
|
|
291 |
we show connecting lines to a parent when:
|
|
292 |
- The parent is the start of a merge within this cluster.
|
|
293 |
That is, the merge was not done to the mainline before this cluster
|
|
294 |
was merged to the mainline.
|
|
295 |
This can be detected thus:
|
|
296 |
* The parent has a higher merge depth and is the next revision in
|
|
297 |
the list.
|
|
298 |
|
|
299 |
The next revision in the list constraint is needed for this case:
|
|
300 |
A 0 [D, B]
|
|
301 |
B 1 [C, F] # we do not want to show a line to F which is depth 2
|
|
302 |
but not a merge
|
|
303 |
C 1 [H] # note that this is a long line to show back to the
|
|
304 |
ancestor - see the end of merge rules.
|
|
305 |
D 0 [G, E]
|
|
306 |
E 1 [G, F]
|
|
307 |
F 2 [G]
|
|
308 |
G 1 [H]
|
|
309 |
H 0
|
|
310 |
- Part of this merges 'branch':
|
|
311 |
The parent has the same merge depth and is our left most parent and we
|
|
312 |
are not the end of the cluster.
|
|
313 |
A 0 [C, B] lines: [B, C]
|
|
314 |
B 1 [E, C] lines: [C]
|
|
315 |
C 0 [D] lines: [D]
|
|
316 |
D 0 [F, E] lines: [E, F]
|
|
317 |
E 1 [F] lines: [F]
|
|
318 |
F 0
|
|
319 |
- The end of this merge/cluster:
|
|
320 |
we can ONLY have multiple parents at the end of a cluster if this
|
|
321 |
branch was previously merged into the 'mainline'.
|
|
322 |
- if we have one and only one parent, show it
|
|
323 |
Note that this may be to a greater merge depth - for instance if
|
|
324 |
this branch continued from a deeply nested branch to add something
|
|
325 |
to it.
|
|
326 |
- if we have more than one parent - show the second oldest (older ==
|
|
327 |
further down the list) parent with
|
|
328 |
an equal or lower merge depth
|
|
329 |
XXXX revisit when awake. ddaa asks about the relevance of each one
|
|
330 |
- maybe more than one parent is relevant
|
|
331 |
"""
|
|
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
332 |
self._generate_revno = generate_revno |
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
333 |
# a dict of the graph.
|
334 |
self._graph = dict(graph) |
|
1624.1.3
by Robert Collins
Convert log to use the new tsort.merge_sort routine. |
335 |
# if there is an explicit mainline, alter the graph to match. This is
|
336 |
# easier than checking at every merge whether we are on the mainline and
|
|
337 |
# if so which path to take.
|
|
338 |
if mainline_revisions is None: |
|
339 |
self._mainline_revisions = [] |
|
340 |
self._stop_revision = None |
|
341 |
else: |
|
342 |
self._mainline_revisions = list(mainline_revisions) |
|
343 |
self._stop_revision = self._mainline_revisions[0] |
|
344 |
# skip the first revision, its what we reach and its parents are
|
|
345 |
# therefore irrelevant
|
|
346 |
for index, revision in enumerate(self._mainline_revisions[1:]): |
|
347 |
# NB: index 0 means self._mainline_revisions[1]
|
|
348 |
# if the mainline matches the graph, nothing to do.
|
|
349 |
parent = self._mainline_revisions[index] |
|
350 |
if parent is None: |
|
351 |
# end of mainline_revisions history
|
|
352 |
continue
|
|
353 |
if self._graph[revision][0] == parent: |
|
354 |
continue
|
|
355 |
# remove it from its prior spot
|
|
356 |
self._graph[revision].remove(parent) |
|
357 |
# insert it into the start of the mainline
|
|
358 |
self._graph[revision].insert(0, parent) |
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
359 |
# we need to do a check late in the process to detect end-of-merges
|
360 |
# which requires the parents to be accessible: its easier for now
|
|
361 |
# to just keep the original graph around.
|
|
1624.1.3
by Robert Collins
Convert log to use the new tsort.merge_sort routine. |
362 |
self._original_graph = dict(self._graph.items()) |
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
363 |
# we need to know the revision numbers of revisions to determine
|
364 |
# the revision numbers of their descendants
|
|
365 |
# this is a graph from node to [revno_tuple, sequence_number]
|
|
366 |
# where sequence is the number of branches made from the node,
|
|
367 |
# and revno_tuple is the tuple that was assigned to the node.
|
|
368 |
# we dont know revnos to start with, so we start it seeded with
|
|
369 |
# [None, 0]
|
|
370 |
self._revnos = dict((revision, [None, 0]) for revision in self._graph) |
|
371 |
# the global implicit root node has revno 0, but we need to know
|
|
372 |
# the sequence number for it too:
|
|
373 |
self._root_sequence = 0 |
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
374 |
|
375 |
# this is a stack storing the depth first search into the graph.
|
|
376 |
self._node_name_stack = [] |
|
377 |
# at each level of recursion we need the merge depth this node is at:
|
|
378 |
self._node_merge_depth_stack = [] |
|
379 |
# at each level of 'recursion' we have to check each parent. This
|
|
380 |
# stack stores the parents we have not yet checked for the node at the
|
|
381 |
# matching depth in _node_name_stack
|
|
382 |
self._pending_parents_stack = [] |
|
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
383 |
# When we first look at a node we assign it a seqence number from its
|
384 |
# leftmost parent.
|
|
385 |
self._assigned_sequence_stack = [] |
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
386 |
# this is a set of the nodes who have been completely analysed for fast
|
387 |
# membership checking
|
|
388 |
self._completed_node_names = set() |
|
389 |
# this is the scheduling of nodes list.
|
|
390 |
# Nodes are scheduled
|
|
391 |
# from the bottom left of the tree: in the tree
|
|
392 |
# A 0 [D, B]
|
|
393 |
# B 1 [C]
|
|
394 |
# C 1 [D]
|
|
395 |
# D 0 [F, E]
|
|
396 |
# E 1 [F]
|
|
397 |
# F 0
|
|
398 |
# the scheduling order is: F, E, D, C, B, A
|
|
399 |
# that is - 'left subtree, right subtree, node'
|
|
400 |
# which would mean that when we schedule A we can emit the entire tree.
|
|
401 |
self._scheduled_nodes = [] |
|
402 |
# This records for each node when we have processed its left most
|
|
403 |
# unmerged subtree. After this subtree is scheduled, all other subtrees
|
|
404 |
# have their merge depth increased by one from this nodes merge depth.
|
|
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
405 |
# it contains tuples - name, merge_depth
|
406 |
self._left_subtree_pushed_stack = [] |
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
407 |
|
408 |
# seed the search with the tip of the branch
|
|
409 |
if branch_tip is not None: |
|
410 |
parents = self._graph.pop(branch_tip) |
|
411 |
self._push_node(branch_tip, 0, parents) |
|
412 |
||
413 |
def sorted(self): |
|
414 |
"""Sort the graph and return as a list.
|
|
415 |
|
|
416 |
After calling this the sorter is empty and you must create a new one.
|
|
417 |
"""
|
|
418 |
return list(self.iter_topo_order()) |
|
419 |
||
420 |
def iter_topo_order(self): |
|
421 |
"""Yield the nodes of the graph in a topological order.
|
|
422 |
|
|
423 |
After finishing iteration the sorter is empty and you cannot continue
|
|
424 |
iteration.
|
|
425 |
"""
|
|
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
426 |
# These are safe to offload to local variables, because they are used
|
427 |
# as a stack and modified in place, never assigned to.
|
|
428 |
node_name_stack = self._node_name_stack |
|
2425.4.3
by John Arbash Meinel
Inline self._pop_node and self._push_node |
429 |
node_merge_depth_stack = self._node_merge_depth_stack |
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
430 |
pending_parents_stack = self._pending_parents_stack |
431 |
left_subtree_pushed_stack = self._left_subtree_pushed_stack |
|
432 |
completed_node_names = self._completed_node_names |
|
2425.4.3
by John Arbash Meinel
Inline self._pop_node and self._push_node |
433 |
scheduled_nodes = self._scheduled_nodes |
434 |
||
435 |
graph_pop = self._graph.pop |
|
436 |
||
437 |
def push_node(node_name, merge_depth, parents, |
|
438 |
node_name_stack_append=node_name_stack.append, |
|
439 |
node_merge_depth_stack_append=node_merge_depth_stack.append, |
|
440 |
left_subtree_pushed_stack_append=left_subtree_pushed_stack.append, |
|
441 |
pending_parents_stack_append=pending_parents_stack.append, |
|
442 |
assigned_sequence_stack_append=self._assigned_sequence_stack.append, |
|
443 |
original_graph=self._original_graph, |
|
444 |
revnos=self._revnos, |
|
445 |
):
|
|
446 |
"""Add node_name to the pending node stack.
|
|
447 |
||
448 |
Names in this stack will get emitted into the output as they are popped
|
|
449 |
off the stack.
|
|
450 |
||
451 |
This inlines a lot of self._variable.append functions as local
|
|
452 |
variables.
|
|
453 |
"""
|
|
454 |
node_name_stack_append(node_name) |
|
455 |
node_merge_depth_stack_append(merge_depth) |
|
456 |
left_subtree_pushed_stack_append(False) |
|
457 |
pending_parents_stack_append(list(parents)) |
|
458 |
# as we push it, assign it a sequence number against its parent:
|
|
459 |
parents = original_graph[node_name] |
|
460 |
if parents: |
|
461 |
# node has parents, assign from the left most parent.
|
|
462 |
parent_revno = revnos[parents[0]] |
|
463 |
sequence = parent_revno[1] |
|
464 |
parent_revno[1] += 1 |
|
465 |
else: |
|
466 |
# no parents, use the root sequence
|
|
467 |
sequence = self._root_sequence |
|
468 |
self._root_sequence +=1 |
|
469 |
assigned_sequence_stack_append(sequence) |
|
470 |
||
471 |
def pop_node(node_name_stack_pop=node_name_stack.pop, |
|
472 |
node_merge_depth_stack_pop=node_merge_depth_stack.pop, |
|
473 |
assigned_sequence_stack_pop=self._assigned_sequence_stack.pop, |
|
474 |
left_subtree_pushed_stack_pop=left_subtree_pushed_stack.pop, |
|
475 |
pending_parents_stack_pop=pending_parents_stack.pop, |
|
476 |
original_graph=self._original_graph, |
|
477 |
revnos=self._revnos, |
|
478 |
completed_node_names_add=self._completed_node_names.add, |
|
479 |
scheduled_nodes_append=scheduled_nodes.append, |
|
480 |
):
|
|
481 |
"""Pop the top node off the stack
|
|
482 |
||
483 |
The node is appended to the sorted output.
|
|
484 |
"""
|
|
485 |
# we are returning from the flattened call frame:
|
|
486 |
# pop off the local variables
|
|
487 |
node_name = node_name_stack_pop() |
|
488 |
merge_depth = node_merge_depth_stack_pop() |
|
489 |
sequence = assigned_sequence_stack_pop() |
|
490 |
# remove this node from the pending lists:
|
|
491 |
left_subtree_pushed_stack_pop() |
|
492 |
pending_parents_stack_pop() |
|
493 |
||
494 |
parents = original_graph[node_name] |
|
495 |
if parents: |
|
496 |
# node has parents, assign from the left most parent.
|
|
497 |
parent_revno = revnos[parents[0]] |
|
498 |
if sequence: |
|
499 |
# not the first child, make a new branch
|
|
500 |
revno = parent_revno[0] + (sequence, 1) |
|
501 |
else: |
|
502 |
# increment the sequence number within the branch
|
|
503 |
revno = parent_revno[0][:-1] + (parent_revno[0][-1] + 1,) |
|
504 |
else: |
|
505 |
# no parents, use the root sequence
|
|
506 |
if sequence: |
|
507 |
# make a parallel import revision number
|
|
508 |
revno = (0, sequence, 1) |
|
509 |
else: |
|
510 |
revno = (1,) |
|
511 |
||
512 |
# store the revno for this node for future reference
|
|
513 |
revnos[node_name][0] = revno |
|
514 |
completed_node_names_add(node_name) |
|
515 |
scheduled_nodes_append((node_name, merge_depth, revno)) |
|
516 |
return node_name |
|
517 |
||
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
518 |
|
519 |
while node_name_stack: |
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
520 |
# loop until this call completes.
|
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
521 |
parents_to_visit = pending_parents_stack[-1] |
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
522 |
# if all parents are done, the revision is done
|
523 |
if not parents_to_visit: |
|
524 |
# append the revision to the topo sorted scheduled list:
|
|
525 |
# all the nodes parents have been scheduled added, now
|
|
526 |
# we can add it to the output.
|
|
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
527 |
pop_node() |
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
528 |
else: |
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
529 |
while pending_parents_stack[-1]: |
530 |
if not left_subtree_pushed_stack[-1]: |
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
531 |
# recurse depth first into the primary parent
|
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
532 |
next_node_name = pending_parents_stack[-1].pop(0) |
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
533 |
else: |
534 |
# place any merges in right-to-left order for scheduling
|
|
535 |
# which gives us left-to-right order after we reverse
|
|
536 |
# the scheduled queue. XXX: This has the effect of
|
|
537 |
# allocating common-new revisions to the right-most
|
|
538 |
# subtree rather than the left most, which will
|
|
539 |
# display nicely (you get smaller trees at the top
|
|
540 |
# of the combined merge).
|
|
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
541 |
next_node_name = pending_parents_stack[-1].pop() |
542 |
if next_node_name in completed_node_names: |
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
543 |
# this parent was completed by a child on the
|
544 |
# call stack. skip it.
|
|
545 |
continue
|
|
546 |
# otherwise transfer it from the source graph into the
|
|
547 |
# top of the current depth first search stack.
|
|
548 |
try: |
|
2425.4.3
by John Arbash Meinel
Inline self._pop_node and self._push_node |
549 |
parents = graph_pop(next_node_name) |
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
550 |
except KeyError: |
551 |
# if the next node is not in the source graph it has
|
|
552 |
# already been popped from it and placed into the
|
|
553 |
# current search stack (but not completed or we would
|
|
554 |
# have hit the continue 4 lines up.
|
|
555 |
# this indicates a cycle.
|
|
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
556 |
raise errors.GraphCycleError(node_name_stack) |
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
557 |
next_merge_depth = 0 |
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
558 |
if left_subtree_pushed_stack[-1]: |
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
559 |
# a new child branch from name_stack[-1]
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
560 |
next_merge_depth = 1 |
561 |
else: |
|
562 |
next_merge_depth = 0 |
|
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
563 |
left_subtree_pushed_stack[-1] = True |
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
564 |
next_merge_depth = ( |
2425.4.3
by John Arbash Meinel
Inline self._pop_node and self._push_node |
565 |
node_merge_depth_stack[-1] + next_merge_depth) |
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
566 |
push_node( |
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
567 |
next_node_name, |
568 |
next_merge_depth, |
|
569 |
parents) |
|
570 |
# and do not continue processing parents until this 'call'
|
|
571 |
# has recursed.
|
|
572 |
break
|
|
2425.4.3
by John Arbash Meinel
Inline self._pop_node and self._push_node |
573 |
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
574 |
# We have scheduled the graph. Now deliver the ordered output:
|
575 |
sequence_number = 0 |
|
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
576 |
stop_revision = self._stop_revision |
577 |
generate_revno = self._generate_revno |
|
578 |
original_graph = self._original_graph |
|
579 |
||
580 |
while scheduled_nodes: |
|
581 |
node_name, merge_depth, revno = scheduled_nodes.pop() |
|
582 |
if node_name == stop_revision: |
|
1624.1.3
by Robert Collins
Convert log to use the new tsort.merge_sort routine. |
583 |
return
|
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
584 |
if not len(scheduled_nodes): |
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
585 |
# last revision is the end of a merge
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
586 |
end_of_merge = True |
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
587 |
elif scheduled_nodes[-1][1] < merge_depth: |
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
588 |
# the next node is to our left
|
589 |
end_of_merge = True |
|
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
590 |
elif (scheduled_nodes[-1][1] == merge_depth and |
591 |
(scheduled_nodes[-1][0] not in |
|
592 |
original_graph[node_name])): |
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
593 |
# the next node was part of a multiple-merge.
|
594 |
end_of_merge = True |
|
595 |
else: |
|
596 |
end_of_merge = False |
|
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
597 |
if generate_revno: |
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
598 |
yield (sequence_number, node_name, merge_depth, revno, end_of_merge) |
599 |
else: |
|
600 |
yield (sequence_number, node_name, merge_depth, end_of_merge) |
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
601 |
sequence_number += 1 |
602 |
||
603 |
def _push_node(self, node_name, merge_depth, parents): |
|
604 |
"""Add node_name to the pending node stack.
|
|
605 |
|
|
606 |
Names in this stack will get emitted into the output as they are popped
|
|
607 |
off the stack.
|
|
608 |
"""
|
|
609 |
self._node_name_stack.append(node_name) |
|
610 |
self._node_merge_depth_stack.append(merge_depth) |
|
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
611 |
self._left_subtree_pushed_stack.append(False) |
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
612 |
self._pending_parents_stack.append(list(parents)) |
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
613 |
# as we push it, assign it a sequence number against its parent:
|
614 |
parents = self._original_graph[node_name] |
|
615 |
if parents: |
|
616 |
# node has parents, assign from the left most parent.
|
|
617 |
parent_revno = self._revnos[parents[0]] |
|
618 |
sequence = parent_revno[1] |
|
619 |
parent_revno[1] += 1 |
|
620 |
else: |
|
621 |
# no parents, use the root sequence
|
|
622 |
sequence = self._root_sequence |
|
623 |
self._root_sequence +=1 |
|
624 |
self._assigned_sequence_stack.append(sequence) |
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
625 |
|
626 |
def _pop_node(self): |
|
627 |
"""Pop the top node off the stack
|
|
628 |
||
629 |
The node is appended to the sorted output.
|
|
630 |
"""
|
|
631 |
# we are returning from the flattened call frame:
|
|
632 |
# pop off the local variables
|
|
633 |
node_name = self._node_name_stack.pop() |
|
634 |
merge_depth = self._node_merge_depth_stack.pop() |
|
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
635 |
sequence = self._assigned_sequence_stack.pop() |
636 |
# remove this node from the pending lists:
|
|
637 |
self._left_subtree_pushed_stack.pop() |
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
638 |
self._pending_parents_stack.pop() |
639 |
||
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
640 |
parents = self._original_graph[node_name] |
641 |
if parents: |
|
642 |
# node has parents, assign from the left most parent.
|
|
643 |
parent_revno = self._revnos[parents[0]] |
|
644 |
if sequence: |
|
645 |
# not the first child, make a new branch
|
|
646 |
revno = parent_revno[0] + (sequence, 1) |
|
647 |
else: |
|
648 |
# increment the sequence number within the branch
|
|
649 |
revno = parent_revno[0][:-1] + (parent_revno[0][-1] + 1,) |
|
650 |
else: |
|
651 |
# no parents, use the root sequence
|
|
652 |
if sequence: |
|
653 |
# make a parallel import revision number
|
|
654 |
revno = (0, sequence, 1) |
|
655 |
else: |
|
656 |
revno = (1,) |
|
657 |
||
658 |
# store the revno for this node for future reference
|
|
659 |
self._revnos[node_name][0] = revno |
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
660 |
self._completed_node_names.add(node_name) |
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
661 |
self._scheduled_nodes.append((node_name, merge_depth, self._revnos[node_name][0])) |
1570.1.7
by Robert Collins
Replace the slow topo_sort routine with a much faster one for non trivial datasets. |
662 |
return node_name |