1
# Copyright (C) 2005, 2006 Canonical Ltd
1
# Copyright (C) 2005, 2006, 2008 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
18
"""Topological sorting routines."""
21
21
from bzrlib import errors
22
import bzrlib.revision as _mod_revision
24
25
__all__ = ["topo_sort", "TopoSorter", "merge_sort", "MergeSorter"]
42
43
def __init__(self, graph):
43
44
"""Topological sorting of a graph.
45
46
:param graph: sequence of pairs of node_name->parent_names_list.
46
47
i.e. [('C', ['B']), ('B', ['A']), ('A', [])]
47
48
For this input the output from the sort or
48
49
iter_topo_order routines will be:
51
52
node identifiers can be any hashable object, and are typically strings.
53
54
If you have a graph like [('a', ['b']), ('a', ['c'])] this will only use
56
57
The graph is sorted lazily: until you iterate or sort the input is
57
58
not processed other than to create an internal representation.
59
iteration or sorting may raise GraphCycleError if a cycle is present
60
iteration or sorting may raise GraphCycleError if a cycle is present
62
63
# a dict of the graph.
64
65
self._visitable = set(self._graph)
66
67
# self._original_graph = dict(graph)
68
69
# this is a stack storing the depth first search into the graph.
69
70
self._node_name_stack = []
70
71
# at each level of 'recursion' we have to check each parent. This
71
# stack stores the parents we have not yet checked for the node at the
72
# stack stores the parents we have not yet checked for the node at the
72
73
# matching depth in _node_name_stack
73
74
self._pending_parents_stack = []
74
75
# this is a set of the completed nodes for fast checking whether a
115
116
yield self._pop_node()
117
118
while self._pending_parents_stack[-1]:
118
# recurse depth first into a single parent
119
# recurse depth first into a single parent
119
120
next_node_name = self._pending_parents_stack[-1].pop()
120
121
if next_node_name in self._completed_node_names:
121
122
# this parent was completed by a child on the
135
136
# this indicates a cycle.
136
137
raise errors.GraphCycleError(self._node_name_stack)
137
138
self._push_node(next_node_name, parents)
138
# and do not continue processing parents until this 'call'
139
# and do not continue processing parents until this 'call'
142
143
def _push_node(self, node_name, parents):
143
144
"""Add node_name to the pending node stack.
145
146
Names in this stack will get emitted into the output as they are popped
166
167
"""Topological sort a graph which groups merges.
168
169
:param graph: sequence of pairs of node->parents_list.
169
:param branch_tip: the tip of the branch to graph. Revisions not
170
:param branch_tip: the tip of the branch to graph. Revisions not
170
171
reachable from branch_tip are not included in the
172
173
:param mainline_revisions: If not None this forces a mainline to be
192
193
__slots__ = ['_node_name_stack',
193
194
'_node_merge_depth_stack',
194
195
'_pending_parents_stack',
195
'_assigned_sequence_stack',
196
'_first_child_stack',
196
197
'_left_subtree_pushed_stack',
197
198
'_generate_revno',
208
209
def __init__(self, graph, branch_tip, mainline_revisions=None,
209
210
generate_revno=False):
210
211
"""Merge-aware topological sorting of a graph.
212
213
:param graph: sequence of pairs of node_name->parent_names_list.
213
214
i.e. [('C', ['B']), ('B', ['A']), ('A', [])]
214
215
For this input the output from the sort or
215
216
iter_topo_order routines will be:
217
:param branch_tip: the tip of the branch to graph. Revisions not
218
:param branch_tip: the tip of the branch to graph. Revisions not
218
219
reachable from branch_tip are not included in the
220
221
:param mainline_revisions: If not None this forces a mainline to be
232
233
The result is a list sorted so that all parents come before
233
234
their children. Each element of the list is a tuple containing:
234
235
(sequence_number, node_name, merge_depth, end_of_merge)
235
* sequence_number: The sequence of this row in the output. Useful for
236
* sequence_number: The sequence of this row in the output. Useful for
237
238
* node_name: The node name: opaque text to the merge routine.
238
239
* merge_depth: How many levels of merging deep this node has been
240
241
* revno_sequence: When requested this field provides a sequence of
241
242
revision numbers for all revisions. The format is:
242
REVNO[[.BRANCHREVNO.REVNO] ...]. BRANCHREVNO is the number of the
243
(REVNO, BRANCHNUM, BRANCHREVNO). BRANCHNUM is the number of the
243
244
branch that the revno is on. From left to right the REVNO numbers
244
245
are the sequence numbers within that branch of the revision.
245
246
For instance, the graph {A:[], B:['A'], C:['A', 'B']} will get
249
250
second commit in the trunk'.
250
251
* end_of_merge: When True the next node is part of a different merge.
253
254
node identifiers can be any hashable object, and are typically strings.
255
256
If you have a graph like [('a', ['b']), ('a', ['c'])] this will only use
258
259
The graph is sorted lazily: until you iterate or sort the input is
259
260
not processed other than to create an internal representation.
261
iteration or sorting may raise GraphCycleError if a cycle is present
262
iteration or sorting may raise GraphCycleError if a cycle is present
264
265
Background information on the design:
285
286
C is the end of a cluster due to rule 1.
286
D is not the end of a cluster from rule 1, but is from rule 2: E
287
D is not the end of a cluster from rule 1, but is from rule 2: E
287
288
is not its left most ancestor
288
289
E is the end of a cluster due to rule 1
289
290
F might be but we need more data.
291
292
we show connecting lines to a parent when:
292
293
- The parent is the start of a merge within this cluster.
293
That is, the merge was not done to the mainline before this cluster
294
That is, the merge was not done to the mainline before this cluster
294
295
was merged to the mainline.
295
296
This can be detected thus:
296
* The parent has a higher merge depth and is the next revision in
297
* The parent has a higher merge depth and is the next revision in
299
300
The next revision in the list constraint is needed for this case:
301
B 1 [C, F] # we do not want to show a line to F which is depth 2
302
B 1 [C, F] # we do not want to show a line to F which is depth 2
303
C 1 [H] # note that this is a long line to show back to the
304
C 1 [H] # note that this is a long line to show back to the
304
305
ancestor - see the end of merge rules.
342
343
self._mainline_revisions = list(mainline_revisions)
343
344
self._stop_revision = self._mainline_revisions[0]
344
# skip the first revision, its what we reach and its parents are
345
# skip the first revision, its what we reach and its parents are
345
346
# therefore irrelevant
346
347
for index, revision in enumerate(self._mainline_revisions[1:]):
347
348
# NB: index 0 means self._mainline_revisions[1]
350
351
if parent is None:
351
352
# end of mainline_revisions history
353
if self._graph[revision][0] == parent:
354
graph_parent_ids = self._graph[revision]
355
if not graph_parent_ids:
356
# We ran into a ghost, skip over it, this is a workaround for
357
# bug #243536, the _graph has had ghosts stripped, but the
358
# mainline_revisions have not
360
if graph_parent_ids[0] == parent:
355
362
# remove it from its prior spot
356
363
self._graph[revision].remove(parent)
362
369
self._original_graph = dict(self._graph.items())
363
370
# we need to know the revision numbers of revisions to determine
364
371
# the revision numbers of their descendants
365
# this is a graph from node to [revno_tuple, sequence_number]
366
# where sequence is the number of branches made from the node,
372
# this is a graph from node to [revno_tuple, first_child]
373
# where first_child is True if no other children have seen this node
367
374
# and revno_tuple is the tuple that was assigned to the node.
368
375
# we dont know revnos to start with, so we start it seeded with
370
self._revnos = dict((revision, [None, 0]) for revision in self._graph)
371
# the global implicit root node has revno 0, but we need to know
372
# the sequence number for it too:
373
self._root_sequence = 0
377
self._revnos = dict((revision, [None, True])
378
for revision in self._graph)
379
# Each mainline revision counts how many child branches have spawned from it.
380
self._revno_to_branch_count = {}
375
382
# this is a stack storing the depth first search into the graph.
376
383
self._node_name_stack = []
377
384
# at each level of recursion we need the merge depth this node is at:
378
385
self._node_merge_depth_stack = []
379
386
# at each level of 'recursion' we have to check each parent. This
380
# stack stores the parents we have not yet checked for the node at the
387
# stack stores the parents we have not yet checked for the node at the
381
388
# matching depth in _node_name_stack
382
389
self._pending_parents_stack = []
383
390
# When we first look at a node we assign it a seqence number from its
384
391
# leftmost parent.
385
self._assigned_sequence_stack = []
392
self._first_child_stack = []
386
393
# this is a set of the nodes who have been completely analysed for fast
387
394
# membership checking
388
395
self._completed_node_names = set()
398
# the scheduling order is: F, E, D, C, B, A
405
# the scheduling order is: F, E, D, C, B, A
399
406
# that is - 'left subtree, right subtree, node'
400
407
# which would mean that when we schedule A we can emit the entire tree.
401
408
self._scheduled_nodes = []
402
# This records for each node when we have processed its left most
409
# This records for each node when we have processed its left most
403
410
# unmerged subtree. After this subtree is scheduled, all other subtrees
404
411
# have their merge depth increased by one from this nodes merge depth.
405
412
# it contains tuples - name, merge_depth
406
413
self._left_subtree_pushed_stack = []
408
415
# seed the search with the tip of the branch
409
if branch_tip is not None:
416
if (branch_tip is not None and
417
branch_tip != _mod_revision.NULL_REVISION):
410
418
parents = self._graph.pop(branch_tip)
411
419
self._push_node(branch_tip, 0, parents)
413
421
def sorted(self):
414
422
"""Sort the graph and return as a list.
416
424
After calling this the sorter is empty and you must create a new one.
418
426
return list(self.iter_topo_order())
420
428
def iter_topo_order(self):
421
429
"""Yield the nodes of the graph in a topological order.
423
431
After finishing iteration the sorter is empty and you cannot continue
439
447
node_merge_depth_stack_append=node_merge_depth_stack.append,
440
448
left_subtree_pushed_stack_append=left_subtree_pushed_stack.append,
441
449
pending_parents_stack_append=pending_parents_stack.append,
442
assigned_sequence_stack_append=self._assigned_sequence_stack.append,
443
original_graph=self._original_graph,
450
first_child_stack_append=self._first_child_stack.append,
444
451
revnos=self._revnos,
446
453
"""Add node_name to the pending node stack.
455
462
node_merge_depth_stack_append(merge_depth)
456
463
left_subtree_pushed_stack_append(False)
457
464
pending_parents_stack_append(list(parents))
458
# as we push it, assign it a sequence number against its parent:
459
parents = original_graph[node_name]
465
# as we push it, check if it is the first child
461
467
# node has parents, assign from the left most parent.
462
parent_revno = revnos[parents[0]]
463
sequence = parent_revno[1]
468
parent_info = revnos[parents[0]]
469
first_child = parent_info[1]
470
parent_info[1] = False
466
# no parents, use the root sequence
467
sequence = self._root_sequence
468
self._root_sequence +=1
469
assigned_sequence_stack_append(sequence)
472
# We don't use the same algorithm here, but we need to keep the
475
first_child_stack_append(first_child)
471
477
def pop_node(node_name_stack_pop=node_name_stack.pop,
472
478
node_merge_depth_stack_pop=node_merge_depth_stack.pop,
473
assigned_sequence_stack_pop=self._assigned_sequence_stack.pop,
479
first_child_stack_pop=self._first_child_stack.pop,
474
480
left_subtree_pushed_stack_pop=left_subtree_pushed_stack.pop,
475
481
pending_parents_stack_pop=pending_parents_stack.pop,
476
482
original_graph=self._original_graph,
477
483
revnos=self._revnos,
478
484
completed_node_names_add=self._completed_node_names.add,
479
485
scheduled_nodes_append=scheduled_nodes.append,
486
revno_to_branch_count=self._revno_to_branch_count,
481
488
"""Pop the top node off the stack
486
493
# pop off the local variables
487
494
node_name = node_name_stack_pop()
488
495
merge_depth = node_merge_depth_stack_pop()
489
sequence = assigned_sequence_stack_pop()
496
first_child = first_child_stack_pop()
490
497
# remove this node from the pending lists:
491
498
left_subtree_pushed_stack_pop()
492
499
pending_parents_stack_pop()
494
501
parents = original_graph[node_name]
496
503
# node has parents, assign from the left most parent.
497
parent_revno = revnos[parents[0]]
504
parent_revno = revnos[parents[0]][0]
499
506
# not the first child, make a new branch
500
revno = parent_revno[0] + (sequence, 1)
507
base_revno = parent_revno[0]
508
branch_count = revno_to_branch_count.get(base_revno, 0)
510
revno_to_branch_count[base_revno] = branch_count
511
revno = (parent_revno[0], branch_count, 1)
512
# revno = (parent_revno[0], branch_count, parent_revno[-1]+1)
502
# increment the sequence number within the branch
503
revno = parent_revno[0][:-1] + (parent_revno[0][-1] + 1,)
514
# as the first child, we just increase the final revision
516
revno = parent_revno[:-1] + (parent_revno[-1] + 1,)
505
518
# no parents, use the root sequence
507
# make a parallel import revision number
508
revno = (0, sequence, 1)
519
root_count = revno_to_branch_count.get(0, -1)
522
revno = (0, root_count, 1)
525
revno_to_branch_count[0] = root_count
512
527
# store the revno for this node for future reference
513
528
revnos[node_name][0] = revno
534
549
# place any merges in right-to-left order for scheduling
535
550
# which gives us left-to-right order after we reverse
536
# the scheduled queue. XXX: This has the effect of
551
# the scheduled queue. XXX: This has the effect of
537
552
# allocating common-new revisions to the right-most
538
# subtree rather than the left most, which will
553
# subtree rather than the left most, which will
539
554
# display nicely (you get smaller trees at the top
540
555
# of the combined merge).
541
556
next_node_name = pending_parents_stack[-1].pop()
603
618
def _push_node(self, node_name, merge_depth, parents):
604
619
"""Add node_name to the pending node stack.
606
621
Names in this stack will get emitted into the output as they are popped
610
625
self._node_merge_depth_stack.append(merge_depth)
611
626
self._left_subtree_pushed_stack.append(False)
612
627
self._pending_parents_stack.append(list(parents))
613
# as we push it, assign it a sequence number against its parent:
628
# as we push it, figure out if this is the first child
614
629
parents = self._original_graph[node_name]
616
631
# node has parents, assign from the left most parent.
617
parent_revno = self._revnos[parents[0]]
618
sequence = parent_revno[1]
632
parent_info = self._revnos[parents[0]]
633
first_child = parent_info[1]
634
parent_info[1] = False
621
# no parents, use the root sequence
622
sequence = self._root_sequence
623
self._root_sequence +=1
624
self._assigned_sequence_stack.append(sequence)
636
# We don't use the same algorithm here, but we need to keep the
639
self._first_child_stack.append(first_child)
626
641
def _pop_node(self):
627
"""Pop the top node off the stack
642
"""Pop the top node off the stack
629
644
The node is appended to the sorted output.
632
647
# pop off the local variables
633
648
node_name = self._node_name_stack.pop()
634
649
merge_depth = self._node_merge_depth_stack.pop()
635
sequence = self._assigned_sequence_stack.pop()
650
first_child = self._first_child_stack.pop()
636
651
# remove this node from the pending lists:
637
652
self._left_subtree_pushed_stack.pop()
638
653
self._pending_parents_stack.pop()
640
655
parents = self._original_graph[node_name]
642
657
# node has parents, assign from the left most parent.
643
parent_revno = self._revnos[parents[0]]
658
parent_revno = self._revnos[parents[0]][0]
645
660
# not the first child, make a new branch
646
revno = parent_revno[0] + (sequence, 1)
661
base_revno = parent_revno[0]
662
branch_count = self._revno_to_branch_count.get(base_revno, 0)
664
self._revno_to_branch_count[base_revno] = branch_count
665
revno = (parent_revno[0], branch_count, 1)
666
# revno = (parent_revno[0], branch_count, parent_revno[-1]+1)
648
# increment the sequence number within the branch
649
revno = parent_revno[0][:-1] + (parent_revno[0][-1] + 1,)
668
# as the first child, we just increase the final revision
670
revno = parent_revno[:-1] + (parent_revno[-1] + 1,)
651
672
# no parents, use the root sequence
653
# make a parallel import revision number
654
revno = (0, sequence, 1)
673
root_count = self._revno_to_branch_count.get(0, 0)
675
revno = (0, root_count, 1)
679
self._revno_to_branch_count[0] = root_count
658
681
# store the revno for this node for future reference
659
682
self._revnos[node_name][0] = revno