3246.2.2
by Martin Pool
Fix up import of tsort |
1 |
# Copyright (C) 2005, 2006, 2008 Canonical Ltd
|
1185.16.113
by mbp at sourcefrog
Add topo_sort utility function |
2 |
#
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
16 |
||
1570.1.7
by Robert Collins
Replace the slow topo_sort routine with a much faster one for non trivial datasets. |
17 |
|
18 |
"""Topological sorting routines."""
|
|
19 |
||
20 |
||
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
21 |
from bzrlib import errors |
3246.2.2
by Martin Pool
Fix up import of tsort |
22 |
import bzrlib.revision as _mod_revision |
1570.1.7
by Robert Collins
Replace the slow topo_sort routine with a much faster one for non trivial datasets. |
23 |
|
1185.16.114
by mbp at sourcefrog
Improved topological sort |
24 |
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
25 |
__all__ = ["topo_sort", "TopoSorter", "merge_sort", "MergeSorter"] |
26 |
||
27 |
||
1185.16.114
by mbp at sourcefrog
Improved topological sort |
28 |
def topo_sort(graph): |
1185.16.113
by mbp at sourcefrog
Add topo_sort utility function |
29 |
"""Topological sort a graph.
|
30 |
||
1185.16.114
by mbp at sourcefrog
Improved topological sort |
31 |
graph -- sequence of pairs of node->parents_list.
|
32 |
||
33 |
The result is a list of node names, such that all parents come before
|
|
34 |
their children.
|
|
35 |
||
1185.16.113
by mbp at sourcefrog
Add topo_sort utility function |
36 |
node identifiers can be any hashable object, and are typically strings.
|
37 |
"""
|
|
1570.1.7
by Robert Collins
Replace the slow topo_sort routine with a much faster one for non trivial datasets. |
38 |
return TopoSorter(graph).sorted() |
39 |
||
40 |
||
41 |
class TopoSorter(object): |
|
42 |
||
43 |
def __init__(self, graph): |
|
44 |
"""Topological sorting of a graph.
|
|
45 |
|
|
46 |
:param graph: sequence of pairs of node_name->parent_names_list.
|
|
47 |
i.e. [('C', ['B']), ('B', ['A']), ('A', [])]
|
|
48 |
For this input the output from the sort or
|
|
49 |
iter_topo_order routines will be:
|
|
50 |
'A', 'B', 'C'
|
|
51 |
|
|
52 |
node identifiers can be any hashable object, and are typically strings.
|
|
53 |
||
1587.1.2
by Robert Collins
Review comments for reconcile. |
54 |
If you have a graph like [('a', ['b']), ('a', ['c'])] this will only use
|
55 |
one of the two values for 'a'.
|
|
56 |
||
1570.1.7
by Robert Collins
Replace the slow topo_sort routine with a much faster one for non trivial datasets. |
57 |
The graph is sorted lazily: until you iterate or sort the input is
|
58 |
not processed other than to create an internal representation.
|
|
59 |
||
1587.1.3
by Robert Collins
Typos for reconcile - docstring in tsort.py was out of sync with code. |
60 |
iteration or sorting may raise GraphCycleError if a cycle is present
|
1570.1.7
by Robert Collins
Replace the slow topo_sort routine with a much faster one for non trivial datasets. |
61 |
in the graph.
|
62 |
"""
|
|
63 |
# a dict of the graph.
|
|
64 |
self._graph = dict(graph) |
|
2490.2.31
by Aaron Bentley
Fix iter_topo_order to permit un-included parents |
65 |
self._visitable = set(self._graph) |
1570.1.7
by Robert Collins
Replace the slow topo_sort routine with a much faster one for non trivial datasets. |
66 |
### if debugging:
|
67 |
# self._original_graph = dict(graph)
|
|
68 |
||
69 |
# this is a stack storing the depth first search into the graph.
|
|
70 |
self._node_name_stack = [] |
|
71 |
# at each level of 'recursion' we have to check each parent. This
|
|
72 |
# stack stores the parents we have not yet checked for the node at the
|
|
73 |
# matching depth in _node_name_stack
|
|
74 |
self._pending_parents_stack = [] |
|
75 |
# this is a set of the completed nodes for fast checking whether a
|
|
76 |
# parent in a node we are processing on the stack has already been
|
|
77 |
# emitted and thus can be skipped.
|
|
78 |
self._completed_node_names = set() |
|
79 |
||
80 |
def sorted(self): |
|
81 |
"""Sort the graph and return as a list.
|
|
82 |
|
|
83 |
After calling this the sorter is empty and you must create a new one.
|
|
84 |
"""
|
|
85 |
return list(self.iter_topo_order()) |
|
86 |
||
87 |
### Useful if fiddling with this code.
|
|
88 |
### # cross check
|
|
89 |
### sorted_names = list(self.iter_topo_order())
|
|
90 |
### for index in range(len(sorted_names)):
|
|
91 |
### rev = sorted_names[index]
|
|
92 |
### for left_index in range(index):
|
|
93 |
### if rev in self.original_graph[sorted_names[left_index]]:
|
|
94 |
### print "revision in parent list of earlier revision"
|
|
95 |
### import pdb;pdb.set_trace()
|
|
96 |
||
97 |
def iter_topo_order(self): |
|
1587.1.3
by Robert Collins
Typos for reconcile - docstring in tsort.py was out of sync with code. |
98 |
"""Yield the nodes of the graph in a topological order.
|
1570.1.7
by Robert Collins
Replace the slow topo_sort routine with a much faster one for non trivial datasets. |
99 |
|
100 |
After finishing iteration the sorter is empty and you cannot continue
|
|
101 |
iteration.
|
|
102 |
"""
|
|
103 |
while self._graph: |
|
104 |
# now pick a random node in the source graph, and transfer it to the
|
|
105 |
# top of the depth first search stack.
|
|
106 |
node_name, parents = self._graph.popitem() |
|
107 |
self._push_node(node_name, parents) |
|
108 |
while self._node_name_stack: |
|
109 |
# loop until this call completes.
|
|
110 |
parents_to_visit = self._pending_parents_stack[-1] |
|
111 |
# if all parents are done, the revision is done
|
|
112 |
if not parents_to_visit: |
|
113 |
# append the revision to the topo sorted list
|
|
114 |
# all the nodes parents have been added to the output, now
|
|
115 |
# we can add it to the output.
|
|
116 |
yield self._pop_node() |
|
117 |
else: |
|
118 |
while self._pending_parents_stack[-1]: |
|
119 |
# recurse depth first into a single parent
|
|
120 |
next_node_name = self._pending_parents_stack[-1].pop() |
|
121 |
if next_node_name in self._completed_node_names: |
|
122 |
# this parent was completed by a child on the
|
|
123 |
# call stack. skip it.
|
|
124 |
continue
|
|
2490.2.31
by Aaron Bentley
Fix iter_topo_order to permit un-included parents |
125 |
if next_node_name not in self._visitable: |
126 |
continue
|
|
1570.1.7
by Robert Collins
Replace the slow topo_sort routine with a much faster one for non trivial datasets. |
127 |
# otherwise transfer it from the source graph into the
|
128 |
# top of the current depth first search stack.
|
|
129 |
try: |
|
130 |
parents = self._graph.pop(next_node_name) |
|
131 |
except KeyError: |
|
132 |
# if the next node is not in the source graph it has
|
|
133 |
# already been popped from it and placed into the
|
|
134 |
# current search stack (but not completed or we would
|
|
135 |
# have hit the continue 4 lines up.
|
|
136 |
# this indicates a cycle.
|
|
137 |
raise errors.GraphCycleError(self._node_name_stack) |
|
138 |
self._push_node(next_node_name, parents) |
|
139 |
# and do not continue processing parents until this 'call'
|
|
140 |
# has recursed.
|
|
141 |
break
|
|
142 |
||
143 |
def _push_node(self, node_name, parents): |
|
144 |
"""Add node_name to the pending node stack.
|
|
145 |
|
|
146 |
Names in this stack will get emitted into the output as they are popped
|
|
147 |
off the stack.
|
|
148 |
"""
|
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
149 |
self._node_name_stack.append(node_name) |
150 |
self._pending_parents_stack.append(list(parents)) |
|
151 |
||
152 |
def _pop_node(self): |
|
153 |
"""Pop the top node off the stack
|
|
154 |
||
155 |
The node is appended to the sorted output.
|
|
156 |
"""
|
|
157 |
# we are returning from the flattened call frame:
|
|
158 |
# pop off the local variables
|
|
159 |
node_name = self._node_name_stack.pop() |
|
160 |
self._pending_parents_stack.pop() |
|
161 |
||
162 |
self._completed_node_names.add(node_name) |
|
163 |
return node_name |
|
164 |
||
165 |
||
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
166 |
def merge_sort(graph, branch_tip, mainline_revisions=None, generate_revno=False): |
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
167 |
"""Topological sort a graph which groups merges.
|
168 |
||
169 |
:param graph: sequence of pairs of node->parents_list.
|
|
170 |
:param branch_tip: the tip of the branch to graph. Revisions not
|
|
171 |
reachable from branch_tip are not included in the
|
|
172 |
output.
|
|
1624.1.3
by Robert Collins
Convert log to use the new tsort.merge_sort routine. |
173 |
:param mainline_revisions: If not None this forces a mainline to be
|
174 |
used rather than synthesised from the graph.
|
|
175 |
This must be a valid path through some part
|
|
176 |
of the graph. If the mainline does not cover all
|
|
177 |
the revisions, output stops at the start of the
|
|
178 |
old revision listed in the mainline revisions
|
|
179 |
list.
|
|
180 |
The order for this parameter is oldest-first.
|
|
1988.4.4
by Robert Collins
Tidy up the patch. |
181 |
:param generate_revno: Optional parameter controlling the generation of
|
182 |
revision number sequences in the output. See the output description of
|
|
183 |
the MergeSorter docstring for details.
|
|
184 |
:result: See the MergeSorter docstring for details.
|
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
185 |
node identifiers can be any hashable object, and are typically strings.
|
186 |
"""
|
|
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
187 |
return MergeSorter(graph, branch_tip, mainline_revisions, |
188 |
generate_revno).sorted() |
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
189 |
|
190 |
||
191 |
class MergeSorter(object): |
|
192 |
||
2425.4.1
by John Arbash Meinel
Use __slots__ for MergeSorter |
193 |
__slots__ = ['_node_name_stack', |
194 |
'_node_merge_depth_stack', |
|
195 |
'_pending_parents_stack', |
|
3170.3.2
by John Arbash Meinel
Implement the new dotted revision numbering. |
196 |
'_first_child_stack', |
2425.4.1
by John Arbash Meinel
Use __slots__ for MergeSorter |
197 |
'_left_subtree_pushed_stack', |
198 |
'_generate_revno', |
|
199 |
'_graph', |
|
200 |
'_mainline_revisions', |
|
201 |
'_stop_revision', |
|
202 |
'_original_graph', |
|
203 |
'_revnos', |
|
3170.3.2
by John Arbash Meinel
Implement the new dotted revision numbering. |
204 |
'_revno_to_branch_count', |
2425.4.1
by John Arbash Meinel
Use __slots__ for MergeSorter |
205 |
'_completed_node_names', |
206 |
'_scheduled_nodes', |
|
207 |
]
|
|
208 |
||
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
209 |
def __init__(self, graph, branch_tip, mainline_revisions=None, |
210 |
generate_revno=False): |
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
211 |
"""Merge-aware topological sorting of a graph.
|
212 |
|
|
213 |
:param graph: sequence of pairs of node_name->parent_names_list.
|
|
214 |
i.e. [('C', ['B']), ('B', ['A']), ('A', [])]
|
|
215 |
For this input the output from the sort or
|
|
216 |
iter_topo_order routines will be:
|
|
217 |
'A', 'B', 'C'
|
|
1624.1.3
by Robert Collins
Convert log to use the new tsort.merge_sort routine. |
218 |
:param branch_tip: the tip of the branch to graph. Revisions not
|
219 |
reachable from branch_tip are not included in the
|
|
220 |
output.
|
|
221 |
:param mainline_revisions: If not None this forces a mainline to be
|
|
222 |
used rather than synthesised from the graph.
|
|
223 |
This must be a valid path through some part
|
|
224 |
of the graph. If the mainline does not cover all
|
|
225 |
the revisions, output stops at the start of the
|
|
226 |
old revision listed in the mainline revisions
|
|
227 |
list.
|
|
228 |
The order for this parameter is oldest-first.
|
|
1988.4.4
by Robert Collins
Tidy up the patch. |
229 |
:param generate_revno: Optional parameter controlling the generation of
|
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
230 |
revision number sequences in the output. See the output description
|
231 |
for more details.
|
|
232 |
||
1988.4.4
by Robert Collins
Tidy up the patch. |
233 |
The result is a list sorted so that all parents come before
|
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
234 |
their children. Each element of the list is a tuple containing:
|
235 |
(sequence_number, node_name, merge_depth, end_of_merge)
|
|
236 |
* sequence_number: The sequence of this row in the output. Useful for
|
|
237 |
GUIs.
|
|
1988.4.4
by Robert Collins
Tidy up the patch. |
238 |
* node_name: The node name: opaque text to the merge routine.
|
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
239 |
* merge_depth: How many levels of merging deep this node has been
|
240 |
found.
|
|
241 |
* revno_sequence: When requested this field provides a sequence of
|
|
242 |
revision numbers for all revisions. The format is:
|
|
3170.3.7
by John Arbash Meinel
update MergeSorter.__init__() docstring |
243 |
(REVNO, BRANCHNUM, BRANCHREVNO). BRANCHNUM is the number of the
|
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
244 |
branch that the revno is on. From left to right the REVNO numbers
|
245 |
are the sequence numbers within that branch of the revision.
|
|
246 |
For instance, the graph {A:[], B:['A'], C:['A', 'B']} will get
|
|
247 |
the following revno_sequences assigned: A:(1,), B:(1,1,1), C:(2,).
|
|
248 |
This should be read as 'A is the first commit in the trunk',
|
|
249 |
'B is the first commit on the first branch made from A', 'C is the
|
|
250 |
second commit in the trunk'.
|
|
251 |
* end_of_merge: When True the next node is part of a different merge.
|
|
1624.1.3
by Robert Collins
Convert log to use the new tsort.merge_sort routine. |
252 |
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
253 |
|
254 |
node identifiers can be any hashable object, and are typically strings.
|
|
255 |
||
256 |
If you have a graph like [('a', ['b']), ('a', ['c'])] this will only use
|
|
257 |
one of the two values for 'a'.
|
|
258 |
||
259 |
The graph is sorted lazily: until you iterate or sort the input is
|
|
260 |
not processed other than to create an internal representation.
|
|
261 |
||
262 |
iteration or sorting may raise GraphCycleError if a cycle is present
|
|
263 |
in the graph.
|
|
264 |
||
265 |
Background information on the design:
|
|
266 |
-------------------------------------
|
|
267 |
definition: the end of any cluster or 'merge' occurs when:
|
|
268 |
1 - the next revision has a lower merge depth than we do.
|
|
269 |
i.e.
|
|
270 |
A 0
|
|
271 |
B 1
|
|
272 |
C 2
|
|
273 |
D 1
|
|
274 |
E 0
|
|
275 |
C, D are the ends of clusters, E might be but we need more data.
|
|
276 |
2 - or the next revision at our merge depth is not our left most
|
|
277 |
ancestor.
|
|
278 |
This is required to handle multiple-merges in one commit.
|
|
279 |
i.e.
|
|
280 |
A 0 [F, B, E]
|
|
281 |
B 1 [D, C]
|
|
282 |
C 2 [D]
|
|
283 |
D 1 [F]
|
|
284 |
E 1 [F]
|
|
285 |
F 0
|
|
286 |
C is the end of a cluster due to rule 1.
|
|
287 |
D is not the end of a cluster from rule 1, but is from rule 2: E
|
|
288 |
is not its left most ancestor
|
|
289 |
E is the end of a cluster due to rule 1
|
|
290 |
F might be but we need more data.
|
|
291 |
|
|
292 |
we show connecting lines to a parent when:
|
|
293 |
- The parent is the start of a merge within this cluster.
|
|
294 |
That is, the merge was not done to the mainline before this cluster
|
|
295 |
was merged to the mainline.
|
|
296 |
This can be detected thus:
|
|
297 |
* The parent has a higher merge depth and is the next revision in
|
|
298 |
the list.
|
|
299 |
|
|
300 |
The next revision in the list constraint is needed for this case:
|
|
301 |
A 0 [D, B]
|
|
302 |
B 1 [C, F] # we do not want to show a line to F which is depth 2
|
|
303 |
but not a merge
|
|
304 |
C 1 [H] # note that this is a long line to show back to the
|
|
305 |
ancestor - see the end of merge rules.
|
|
306 |
D 0 [G, E]
|
|
307 |
E 1 [G, F]
|
|
308 |
F 2 [G]
|
|
309 |
G 1 [H]
|
|
310 |
H 0
|
|
311 |
- Part of this merges 'branch':
|
|
312 |
The parent has the same merge depth and is our left most parent and we
|
|
313 |
are not the end of the cluster.
|
|
314 |
A 0 [C, B] lines: [B, C]
|
|
315 |
B 1 [E, C] lines: [C]
|
|
316 |
C 0 [D] lines: [D]
|
|
317 |
D 0 [F, E] lines: [E, F]
|
|
318 |
E 1 [F] lines: [F]
|
|
319 |
F 0
|
|
320 |
- The end of this merge/cluster:
|
|
321 |
we can ONLY have multiple parents at the end of a cluster if this
|
|
322 |
branch was previously merged into the 'mainline'.
|
|
323 |
- if we have one and only one parent, show it
|
|
324 |
Note that this may be to a greater merge depth - for instance if
|
|
325 |
this branch continued from a deeply nested branch to add something
|
|
326 |
to it.
|
|
327 |
- if we have more than one parent - show the second oldest (older ==
|
|
328 |
further down the list) parent with
|
|
329 |
an equal or lower merge depth
|
|
330 |
XXXX revisit when awake. ddaa asks about the relevance of each one
|
|
331 |
- maybe more than one parent is relevant
|
|
332 |
"""
|
|
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
333 |
self._generate_revno = generate_revno |
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
334 |
# a dict of the graph.
|
335 |
self._graph = dict(graph) |
|
1624.1.3
by Robert Collins
Convert log to use the new tsort.merge_sort routine. |
336 |
# if there is an explicit mainline, alter the graph to match. This is
|
337 |
# easier than checking at every merge whether we are on the mainline and
|
|
338 |
# if so which path to take.
|
|
339 |
if mainline_revisions is None: |
|
340 |
self._mainline_revisions = [] |
|
341 |
self._stop_revision = None |
|
342 |
else: |
|
343 |
self._mainline_revisions = list(mainline_revisions) |
|
344 |
self._stop_revision = self._mainline_revisions[0] |
|
345 |
# skip the first revision, its what we reach and its parents are
|
|
346 |
# therefore irrelevant
|
|
347 |
for index, revision in enumerate(self._mainline_revisions[1:]): |
|
348 |
# NB: index 0 means self._mainline_revisions[1]
|
|
349 |
# if the mainline matches the graph, nothing to do.
|
|
350 |
parent = self._mainline_revisions[index] |
|
351 |
if parent is None: |
|
352 |
# end of mainline_revisions history
|
|
353 |
continue
|
|
3533.2.1
by John Arbash Meinel
(bug #243536) tsort.merge_sorted() should work even with a ghost in mainline. |
354 |
graph_parent_ids = self._graph[revision] |
355 |
if not graph_parent_ids: |
|
356 |
# We ran into a ghost, skip over it, this is a workaround for
|
|
357 |
# bug #243536, the _graph has had ghosts stripped, but the
|
|
358 |
# mainline_revisions have not
|
|
359 |
continue
|
|
360 |
if graph_parent_ids[0] == parent: |
|
1624.1.3
by Robert Collins
Convert log to use the new tsort.merge_sort routine. |
361 |
continue
|
362 |
# remove it from its prior spot
|
|
363 |
self._graph[revision].remove(parent) |
|
364 |
# insert it into the start of the mainline
|
|
365 |
self._graph[revision].insert(0, parent) |
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
366 |
# we need to do a check late in the process to detect end-of-merges
|
367 |
# which requires the parents to be accessible: its easier for now
|
|
368 |
# to just keep the original graph around.
|
|
1624.1.3
by Robert Collins
Convert log to use the new tsort.merge_sort routine. |
369 |
self._original_graph = dict(self._graph.items()) |
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
370 |
# we need to know the revision numbers of revisions to determine
|
371 |
# the revision numbers of their descendants
|
|
3170.3.2
by John Arbash Meinel
Implement the new dotted revision numbering. |
372 |
# this is a graph from node to [revno_tuple, first_child]
|
373 |
# where first_child is True if no other children have seen this node
|
|
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
374 |
# and revno_tuple is the tuple that was assigned to the node.
|
375 |
# we dont know revnos to start with, so we start it seeded with
|
|
3170.3.2
by John Arbash Meinel
Implement the new dotted revision numbering. |
376 |
# [None, True]
|
377 |
self._revnos = dict((revision, [None, True]) |
|
378 |
for revision in self._graph) |
|
379 |
# Each mainline revision counts how many child branches have spawned from it.
|
|
380 |
self._revno_to_branch_count = {} |
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
381 |
|
382 |
# this is a stack storing the depth first search into the graph.
|
|
383 |
self._node_name_stack = [] |
|
384 |
# at each level of recursion we need the merge depth this node is at:
|
|
385 |
self._node_merge_depth_stack = [] |
|
386 |
# at each level of 'recursion' we have to check each parent. This
|
|
387 |
# stack stores the parents we have not yet checked for the node at the
|
|
388 |
# matching depth in _node_name_stack
|
|
389 |
self._pending_parents_stack = [] |
|
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
390 |
# When we first look at a node we assign it a seqence number from its
|
391 |
# leftmost parent.
|
|
3170.3.2
by John Arbash Meinel
Implement the new dotted revision numbering. |
392 |
self._first_child_stack = [] |
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
393 |
# this is a set of the nodes who have been completely analysed for fast
|
394 |
# membership checking
|
|
395 |
self._completed_node_names = set() |
|
396 |
# this is the scheduling of nodes list.
|
|
397 |
# Nodes are scheduled
|
|
398 |
# from the bottom left of the tree: in the tree
|
|
399 |
# A 0 [D, B]
|
|
400 |
# B 1 [C]
|
|
401 |
# C 1 [D]
|
|
402 |
# D 0 [F, E]
|
|
403 |
# E 1 [F]
|
|
404 |
# F 0
|
|
405 |
# the scheduling order is: F, E, D, C, B, A
|
|
406 |
# that is - 'left subtree, right subtree, node'
|
|
407 |
# which would mean that when we schedule A we can emit the entire tree.
|
|
408 |
self._scheduled_nodes = [] |
|
409 |
# This records for each node when we have processed its left most
|
|
410 |
# unmerged subtree. After this subtree is scheduled, all other subtrees
|
|
411 |
# have their merge depth increased by one from this nodes merge depth.
|
|
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
412 |
# it contains tuples - name, merge_depth
|
413 |
self._left_subtree_pushed_stack = [] |
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
414 |
|
415 |
# seed the search with the tip of the branch
|
|
3236.2.1
by Michael Hudson
test and fix |
416 |
if (branch_tip is not None and |
3246.2.1
by Martin Pool
Merge tsort NULL_REVISION patch from mwhudson, with John's import fix |
417 |
branch_tip != _mod_revision.NULL_REVISION): |
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
418 |
parents = self._graph.pop(branch_tip) |
419 |
self._push_node(branch_tip, 0, parents) |
|
420 |
||
421 |
def sorted(self): |
|
422 |
"""Sort the graph and return as a list.
|
|
423 |
|
|
424 |
After calling this the sorter is empty and you must create a new one.
|
|
425 |
"""
|
|
426 |
return list(self.iter_topo_order()) |
|
427 |
||
428 |
def iter_topo_order(self): |
|
429 |
"""Yield the nodes of the graph in a topological order.
|
|
430 |
|
|
431 |
After finishing iteration the sorter is empty and you cannot continue
|
|
432 |
iteration.
|
|
433 |
"""
|
|
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
434 |
# These are safe to offload to local variables, because they are used
|
435 |
# as a stack and modified in place, never assigned to.
|
|
436 |
node_name_stack = self._node_name_stack |
|
2425.4.3
by John Arbash Meinel
Inline self._pop_node and self._push_node |
437 |
node_merge_depth_stack = self._node_merge_depth_stack |
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
438 |
pending_parents_stack = self._pending_parents_stack |
439 |
left_subtree_pushed_stack = self._left_subtree_pushed_stack |
|
440 |
completed_node_names = self._completed_node_names |
|
2425.4.3
by John Arbash Meinel
Inline self._pop_node and self._push_node |
441 |
scheduled_nodes = self._scheduled_nodes |
442 |
||
443 |
graph_pop = self._graph.pop |
|
444 |
||
445 |
def push_node(node_name, merge_depth, parents, |
|
446 |
node_name_stack_append=node_name_stack.append, |
|
447 |
node_merge_depth_stack_append=node_merge_depth_stack.append, |
|
448 |
left_subtree_pushed_stack_append=left_subtree_pushed_stack.append, |
|
449 |
pending_parents_stack_append=pending_parents_stack.append, |
|
3170.3.2
by John Arbash Meinel
Implement the new dotted revision numbering. |
450 |
first_child_stack_append=self._first_child_stack.append, |
2425.4.3
by John Arbash Meinel
Inline self._pop_node and self._push_node |
451 |
revnos=self._revnos, |
452 |
):
|
|
453 |
"""Add node_name to the pending node stack.
|
|
454 |
||
455 |
Names in this stack will get emitted into the output as they are popped
|
|
456 |
off the stack.
|
|
457 |
||
458 |
This inlines a lot of self._variable.append functions as local
|
|
459 |
variables.
|
|
460 |
"""
|
|
461 |
node_name_stack_append(node_name) |
|
462 |
node_merge_depth_stack_append(merge_depth) |
|
463 |
left_subtree_pushed_stack_append(False) |
|
464 |
pending_parents_stack_append(list(parents)) |
|
3170.3.2
by John Arbash Meinel
Implement the new dotted revision numbering. |
465 |
# as we push it, check if it is the first child
|
2425.4.3
by John Arbash Meinel
Inline self._pop_node and self._push_node |
466 |
if parents: |
467 |
# node has parents, assign from the left most parent.
|
|
3170.3.2
by John Arbash Meinel
Implement the new dotted revision numbering. |
468 |
parent_info = revnos[parents[0]] |
469 |
first_child = parent_info[1] |
|
470 |
parent_info[1] = False |
|
2425.4.3
by John Arbash Meinel
Inline self._pop_node and self._push_node |
471 |
else: |
3170.3.2
by John Arbash Meinel
Implement the new dotted revision numbering. |
472 |
# We don't use the same algorithm here, but we need to keep the
|
473 |
# stack in line
|
|
474 |
first_child = None |
|
475 |
first_child_stack_append(first_child) |
|
2425.4.3
by John Arbash Meinel
Inline self._pop_node and self._push_node |
476 |
|
477 |
def pop_node(node_name_stack_pop=node_name_stack.pop, |
|
478 |
node_merge_depth_stack_pop=node_merge_depth_stack.pop, |
|
3170.3.2
by John Arbash Meinel
Implement the new dotted revision numbering. |
479 |
first_child_stack_pop=self._first_child_stack.pop, |
2425.4.3
by John Arbash Meinel
Inline self._pop_node and self._push_node |
480 |
left_subtree_pushed_stack_pop=left_subtree_pushed_stack.pop, |
481 |
pending_parents_stack_pop=pending_parents_stack.pop, |
|
482 |
original_graph=self._original_graph, |
|
483 |
revnos=self._revnos, |
|
484 |
completed_node_names_add=self._completed_node_names.add, |
|
485 |
scheduled_nodes_append=scheduled_nodes.append, |
|
3170.3.2
by John Arbash Meinel
Implement the new dotted revision numbering. |
486 |
revno_to_branch_count=self._revno_to_branch_count, |
2425.4.3
by John Arbash Meinel
Inline self._pop_node and self._push_node |
487 |
):
|
488 |
"""Pop the top node off the stack
|
|
489 |
||
490 |
The node is appended to the sorted output.
|
|
491 |
"""
|
|
492 |
# we are returning from the flattened call frame:
|
|
493 |
# pop off the local variables
|
|
494 |
node_name = node_name_stack_pop() |
|
495 |
merge_depth = node_merge_depth_stack_pop() |
|
3170.3.2
by John Arbash Meinel
Implement the new dotted revision numbering. |
496 |
first_child = first_child_stack_pop() |
2425.4.3
by John Arbash Meinel
Inline self._pop_node and self._push_node |
497 |
# remove this node from the pending lists:
|
498 |
left_subtree_pushed_stack_pop() |
|
499 |
pending_parents_stack_pop() |
|
500 |
||
501 |
parents = original_graph[node_name] |
|
502 |
if parents: |
|
503 |
# node has parents, assign from the left most parent.
|
|
3170.3.2
by John Arbash Meinel
Implement the new dotted revision numbering. |
504 |
parent_revno = revnos[parents[0]][0] |
505 |
if not first_child: |
|
2425.4.3
by John Arbash Meinel
Inline self._pop_node and self._push_node |
506 |
# not the first child, make a new branch
|
3170.3.2
by John Arbash Meinel
Implement the new dotted revision numbering. |
507 |
base_revno = parent_revno[0] |
508 |
branch_count = revno_to_branch_count.get(base_revno, 0) |
|
509 |
branch_count += 1 |
|
510 |
revno_to_branch_count[base_revno] = branch_count |
|
511 |
revno = (parent_revno[0], branch_count, 1) |
|
512 |
# revno = (parent_revno[0], branch_count, parent_revno[-1]+1)
|
|
2425.4.3
by John Arbash Meinel
Inline self._pop_node and self._push_node |
513 |
else: |
3170.3.2
by John Arbash Meinel
Implement the new dotted revision numbering. |
514 |
# as the first child, we just increase the final revision
|
515 |
# number
|
|
516 |
revno = parent_revno[:-1] + (parent_revno[-1] + 1,) |
|
2425.4.3
by John Arbash Meinel
Inline self._pop_node and self._push_node |
517 |
else: |
518 |
# no parents, use the root sequence
|
|
3613.1.1
by John Arbash Meinel
Fix the merge_sort code so that it properly increments. |
519 |
root_count = revno_to_branch_count.get(0, -1) |
520 |
root_count += 1 |
|
3170.3.2
by John Arbash Meinel
Implement the new dotted revision numbering. |
521 |
if root_count: |
522 |
revno = (0, root_count, 1) |
|
2425.4.3
by John Arbash Meinel
Inline self._pop_node and self._push_node |
523 |
else: |
524 |
revno = (1,) |
|
3170.3.2
by John Arbash Meinel
Implement the new dotted revision numbering. |
525 |
revno_to_branch_count[0] = root_count |
2425.4.3
by John Arbash Meinel
Inline self._pop_node and self._push_node |
526 |
|
527 |
# store the revno for this node for future reference
|
|
528 |
revnos[node_name][0] = revno |
|
529 |
completed_node_names_add(node_name) |
|
530 |
scheduled_nodes_append((node_name, merge_depth, revno)) |
|
531 |
return node_name |
|
532 |
||
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
533 |
|
534 |
while node_name_stack: |
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
535 |
# loop until this call completes.
|
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
536 |
parents_to_visit = pending_parents_stack[-1] |
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
537 |
# if all parents are done, the revision is done
|
538 |
if not parents_to_visit: |
|
539 |
# append the revision to the topo sorted scheduled list:
|
|
540 |
# all the nodes parents have been scheduled added, now
|
|
541 |
# we can add it to the output.
|
|
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
542 |
pop_node() |
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
543 |
else: |
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
544 |
while pending_parents_stack[-1]: |
545 |
if not left_subtree_pushed_stack[-1]: |
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
546 |
# recurse depth first into the primary parent
|
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
547 |
next_node_name = pending_parents_stack[-1].pop(0) |
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
548 |
else: |
549 |
# place any merges in right-to-left order for scheduling
|
|
550 |
# which gives us left-to-right order after we reverse
|
|
551 |
# the scheduled queue. XXX: This has the effect of
|
|
552 |
# allocating common-new revisions to the right-most
|
|
553 |
# subtree rather than the left most, which will
|
|
554 |
# display nicely (you get smaller trees at the top
|
|
555 |
# of the combined merge).
|
|
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
556 |
next_node_name = pending_parents_stack[-1].pop() |
557 |
if next_node_name in completed_node_names: |
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
558 |
# this parent was completed by a child on the
|
559 |
# call stack. skip it.
|
|
560 |
continue
|
|
561 |
# otherwise transfer it from the source graph into the
|
|
562 |
# top of the current depth first search stack.
|
|
563 |
try: |
|
2425.4.3
by John Arbash Meinel
Inline self._pop_node and self._push_node |
564 |
parents = graph_pop(next_node_name) |
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
565 |
except KeyError: |
566 |
# if the next node is not in the source graph it has
|
|
567 |
# already been popped from it and placed into the
|
|
568 |
# current search stack (but not completed or we would
|
|
569 |
# have hit the continue 4 lines up.
|
|
570 |
# this indicates a cycle.
|
|
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
571 |
raise errors.GraphCycleError(node_name_stack) |
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
572 |
next_merge_depth = 0 |
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
573 |
if left_subtree_pushed_stack[-1]: |
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
574 |
# a new child branch from name_stack[-1]
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
575 |
next_merge_depth = 1 |
576 |
else: |
|
577 |
next_merge_depth = 0 |
|
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
578 |
left_subtree_pushed_stack[-1] = True |
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
579 |
next_merge_depth = ( |
2425.4.3
by John Arbash Meinel
Inline self._pop_node and self._push_node |
580 |
node_merge_depth_stack[-1] + next_merge_depth) |
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
581 |
push_node( |
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
582 |
next_node_name, |
583 |
next_merge_depth, |
|
584 |
parents) |
|
585 |
# and do not continue processing parents until this 'call'
|
|
586 |
# has recursed.
|
|
587 |
break
|
|
2425.4.3
by John Arbash Meinel
Inline self._pop_node and self._push_node |
588 |
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
589 |
# We have scheduled the graph. Now deliver the ordered output:
|
590 |
sequence_number = 0 |
|
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
591 |
stop_revision = self._stop_revision |
592 |
generate_revno = self._generate_revno |
|
593 |
original_graph = self._original_graph |
|
594 |
||
595 |
while scheduled_nodes: |
|
596 |
node_name, merge_depth, revno = scheduled_nodes.pop() |
|
597 |
if node_name == stop_revision: |
|
1624.1.3
by Robert Collins
Convert log to use the new tsort.merge_sort routine. |
598 |
return
|
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
599 |
if not len(scheduled_nodes): |
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
600 |
# last revision is the end of a merge
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
601 |
end_of_merge = True |
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
602 |
elif scheduled_nodes[-1][1] < merge_depth: |
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
603 |
# the next node is to our left
|
604 |
end_of_merge = True |
|
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
605 |
elif (scheduled_nodes[-1][1] == merge_depth and |
606 |
(scheduled_nodes[-1][0] not in |
|
607 |
original_graph[node_name])): |
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
608 |
# the next node was part of a multiple-merge.
|
609 |
end_of_merge = True |
|
610 |
else: |
|
611 |
end_of_merge = False |
|
2425.4.2
by John Arbash Meinel
Change valid self._foo variables into local variables. |
612 |
if generate_revno: |
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
613 |
yield (sequence_number, node_name, merge_depth, revno, end_of_merge) |
614 |
else: |
|
615 |
yield (sequence_number, node_name, merge_depth, end_of_merge) |
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
616 |
sequence_number += 1 |
617 |
||
618 |
def _push_node(self, node_name, merge_depth, parents): |
|
619 |
"""Add node_name to the pending node stack.
|
|
620 |
|
|
621 |
Names in this stack will get emitted into the output as they are popped
|
|
622 |
off the stack.
|
|
623 |
"""
|
|
624 |
self._node_name_stack.append(node_name) |
|
625 |
self._node_merge_depth_stack.append(merge_depth) |
|
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
626 |
self._left_subtree_pushed_stack.append(False) |
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
627 |
self._pending_parents_stack.append(list(parents)) |
3170.3.2
by John Arbash Meinel
Implement the new dotted revision numbering. |
628 |
# as we push it, figure out if this is the first child
|
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
629 |
parents = self._original_graph[node_name] |
630 |
if parents: |
|
631 |
# node has parents, assign from the left most parent.
|
|
3170.3.2
by John Arbash Meinel
Implement the new dotted revision numbering. |
632 |
parent_info = self._revnos[parents[0]] |
633 |
first_child = parent_info[1] |
|
634 |
parent_info[1] = False |
|
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
635 |
else: |
3170.3.2
by John Arbash Meinel
Implement the new dotted revision numbering. |
636 |
# We don't use the same algorithm here, but we need to keep the
|
637 |
# stack in line
|
|
638 |
first_child = None |
|
639 |
self._first_child_stack.append(first_child) |
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
640 |
|
641 |
def _pop_node(self): |
|
642 |
"""Pop the top node off the stack
|
|
643 |
||
644 |
The node is appended to the sorted output.
|
|
645 |
"""
|
|
646 |
# we are returning from the flattened call frame:
|
|
647 |
# pop off the local variables
|
|
648 |
node_name = self._node_name_stack.pop() |
|
649 |
merge_depth = self._node_merge_depth_stack.pop() |
|
3170.3.2
by John Arbash Meinel
Implement the new dotted revision numbering. |
650 |
first_child = self._first_child_stack.pop() |
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
651 |
# remove this node from the pending lists:
|
652 |
self._left_subtree_pushed_stack.pop() |
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
653 |
self._pending_parents_stack.pop() |
654 |
||
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
655 |
parents = self._original_graph[node_name] |
656 |
if parents: |
|
657 |
# node has parents, assign from the left most parent.
|
|
3170.3.2
by John Arbash Meinel
Implement the new dotted revision numbering. |
658 |
parent_revno = self._revnos[parents[0]][0] |
659 |
if not first_child: |
|
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
660 |
# not the first child, make a new branch
|
3170.3.2
by John Arbash Meinel
Implement the new dotted revision numbering. |
661 |
base_revno = parent_revno[0] |
662 |
branch_count = self._revno_to_branch_count.get(base_revno, 0) |
|
663 |
branch_count += 1 |
|
664 |
self._revno_to_branch_count[base_revno] = branch_count |
|
665 |
revno = (parent_revno[0], branch_count, 1) |
|
666 |
# revno = (parent_revno[0], branch_count, parent_revno[-1]+1)
|
|
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
667 |
else: |
3170.3.2
by John Arbash Meinel
Implement the new dotted revision numbering. |
668 |
# as the first child, we just increase the final revision
|
669 |
# number
|
|
670 |
revno = parent_revno[:-1] + (parent_revno[-1] + 1,) |
|
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
671 |
else: |
672 |
# no parents, use the root sequence
|
|
3170.3.2
by John Arbash Meinel
Implement the new dotted revision numbering. |
673 |
root_count = self._revno_to_branch_count.get(0, 0) |
674 |
if root_count: |
|
675 |
revno = (0, root_count, 1) |
|
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
676 |
else: |
677 |
revno = (1,) |
|
3170.3.2
by John Arbash Meinel
Implement the new dotted revision numbering. |
678 |
root_count += 1 |
679 |
self._revno_to_branch_count[0] = root_count |
|
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
680 |
|
681 |
# store the revno for this node for future reference
|
|
682 |
self._revnos[node_name][0] = revno |
|
1624.1.2
by Robert Collins
Add MergeSort facility to bzrlib.tsort. |
683 |
self._completed_node_names.add(node_name) |
1988.4.1
by Robert Collins
bzrlib.tsort.merge_sorted now accepts 'generate_revnos'. This parameter |
684 |
self._scheduled_nodes.append((node_name, merge_depth, self._revnos[node_name][0])) |
1570.1.7
by Robert Collins
Replace the slow topo_sort routine with a much faster one for non trivial datasets. |
685 |
return node_name |