~bzr-pqm/bzr/bzr.dev : contents of bzrlib/tsort.py at revision 1948.4.2

~bzr-pqm/bzr/bzr.dev : (revision 1948.4.2)

# (C) 2005, 2006 Canonical Limited.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA


"""Topological sorting routines."""


import bzrlib.errors as errors


__all__ = ["topo_sort", "TopoSorter", "merge_sort", "MergeSorter"]


def topo_sort(graph):
    """Topological sort a graph.

    graph -- sequence of pairs of node->parents_list.

    The result is a list of node names, such that all parents come before
    their children.

    node identifiers can be any hashable object, and are typically strings.
    """
    return TopoSorter(graph).sorted()


class TopoSorter(object):

    def __init__(self, graph):
        """Topological sorting of a graph.
    
        :param graph: sequence of pairs of node_name->parent_names_list.
                      i.e. [('C', ['B']), ('B', ['A']), ('A', [])]
                      For this input the output from the sort or
                      iter_topo_order routines will be:
                      'A', 'B', 'C'
        
        node identifiers can be any hashable object, and are typically strings.

        If you have a graph like [('a', ['b']), ('a', ['c'])] this will only use
        one of the two values for 'a'.

        The graph is sorted lazily: until you iterate or sort the input is
        not processed other than to create an internal representation.

        iteration or sorting may raise GraphCycleError if a cycle is present 
        in the graph.
        """
        # a dict of the graph.
        self._graph = dict(graph)
        ### if debugging:
        # self._original_graph = dict(graph)
        
        # this is a stack storing the depth first search into the graph.
        self._node_name_stack = []
        # at each level of 'recursion' we have to check each parent. This
        # stack stores the parents we have not yet checked for the node at the 
        # matching depth in _node_name_stack
        self._pending_parents_stack = []
        # this is a set of the completed nodes for fast checking whether a
        # parent in a node we are processing on the stack has already been
        # emitted and thus can be skipped.
        self._completed_node_names = set()

    def sorted(self):
        """Sort the graph and return as a list.
        
        After calling this the sorter is empty and you must create a new one.
        """
        return list(self.iter_topo_order())

###        Useful if fiddling with this code.
###        # cross check
###        sorted_names = list(self.iter_topo_order())
###        for index in range(len(sorted_names)):
###            rev = sorted_names[index]
###            for left_index in range(index):
###                if rev in self.original_graph[sorted_names[left_index]]:
###                    print "revision in parent list of earlier revision"
###                    import pdb;pdb.set_trace()

    def iter_topo_order(self):
        """Yield the nodes of the graph in a topological order.
        
        After finishing iteration the sorter is empty and you cannot continue
        iteration.
        """
        while self._graph:
            # now pick a random node in the source graph, and transfer it to the
            # top of the depth first search stack.
            node_name, parents = self._graph.popitem()
            self._push_node(node_name, parents)
            while self._node_name_stack:
                # loop until this call completes.
                parents_to_visit = self._pending_parents_stack[-1]
                # if all parents are done, the revision is done
                if not parents_to_visit:
                    # append the revision to the topo sorted list
                    # all the nodes parents have been added to the output, now
                    # we can add it to the output.
                    yield self._pop_node()
                else:
                    while self._pending_parents_stack[-1]:
                        # recurse depth first into a single parent 
                        next_node_name = self._pending_parents_stack[-1].pop()
                        if next_node_name in self._completed_node_names:
                            # this parent was completed by a child on the
                            # call stack. skip it.
                            continue
                        # otherwise transfer it from the source graph into the
                        # top of the current depth first search stack.
                        try:
                            parents = self._graph.pop(next_node_name)
                        except KeyError:
                            # if the next node is not in the source graph it has
                            # already been popped from it and placed into the
                            # current search stack (but not completed or we would
                            # have hit the continue 4 lines up.
                            # this indicates a cycle.
                            raise errors.GraphCycleError(self._node_name_stack)
                        self._push_node(next_node_name, parents)
                        # and do not continue processing parents until this 'call' 
                        # has recursed.
                        break

    def _push_node(self, node_name, parents):
        """Add node_name to the pending node stack.
        
        Names in this stack will get emitted into the output as they are popped
        off the stack.
        """
        self._node_name_stack.append(node_name)
        self._pending_parents_stack.append(list(parents))

    def _pop_node(self):
        """Pop the top node off the stack 

        The node is appended to the sorted output.
        """
        # we are returning from the flattened call frame:
        # pop off the local variables
        node_name = self._node_name_stack.pop()
        self._pending_parents_stack.pop()

        self._completed_node_names.add(node_name)
        return node_name


def merge_sort(graph, branch_tip, mainline_revisions=None):
    """Topological sort a graph which groups merges.

    :param graph: sequence of pairs of node->parents_list.
    :param branch_tip: the tip of the branch to graph. Revisions not 
                       reachable from branch_tip are not included in the
                       output.
    :param mainline_revisions: If not None this forces a mainline to be
                               used rather than synthesised from the graph.
                               This must be a valid path through some part
                               of the graph. If the mainline does not cover all
                               the revisions, output stops at the start of the
                               old revision listed in the mainline revisions
                               list.
                               The order for this parameter is oldest-first.

    The result is a list of node names, such that all parents come before
    their children.

    node identifiers can be any hashable object, and are typically strings.
    """
    return MergeSorter(graph, branch_tip, mainline_revisions).sorted()


class MergeSorter(object):

    def __init__(self, graph, branch_tip, mainline_revisions=None):
        """Merge-aware topological sorting of a graph.
    
        :param graph: sequence of pairs of node_name->parent_names_list.
                      i.e. [('C', ['B']), ('B', ['A']), ('A', [])]
                      For this input the output from the sort or
                      iter_topo_order routines will be:
                      'A', 'B', 'C'
        :param branch_tip: the tip of the branch to graph. Revisions not 
                       reachable from branch_tip are not included in the
                       output.
        :param mainline_revisions: If not None this forces a mainline to be
                               used rather than synthesised from the graph.
                               This must be a valid path through some part
                               of the graph. If the mainline does not cover all
                               the revisions, output stops at the start of the
                               old revision listed in the mainline revisions
                               list.
                               The order for this parameter is oldest-first.

        
        node identifiers can be any hashable object, and are typically strings.

        If you have a graph like [('a', ['b']), ('a', ['c'])] this will only use
        one of the two values for 'a'.

        The graph is sorted lazily: until you iterate or sort the input is
        not processed other than to create an internal representation.

        iteration or sorting may raise GraphCycleError if a cycle is present 
        in the graph.

        Background information on the design:
        -------------------------------------
        definition: the end of any cluster or 'merge' occurs when:
            1 - the next revision has a lower merge depth than we do.
              i.e.
              A 0
              B  1
              C   2
              D  1
              E 0
              C, D are the ends of clusters, E might be but we need more data.
            2 - or the next revision at our merge depth is not our left most
              ancestor.
              This is required to handle multiple-merges in one commit.
              i.e.
              A 0    [F, B, E]
              B  1   [D, C]
              C   2  [D]
              D  1   [F]
              E  1   [F]
              F 0
              C is the end of a cluster due to rule 1.
              D is not the end of a cluster from rule 1, but is from rule 2: E 
                is not its left most ancestor
              E is the end of a cluster due to rule 1
              F might be but we need more data.
              
        we show connecting lines to a parent when:
         - The parent is the start of a merge within this cluster.
           That is, the merge was not done to the mainline before this cluster 
           was merged to the mainline.
           This can be detected thus:
            * The parent has a higher merge depth and is the next revision in 
              the list.
          
          The next revision in the list constraint is needed for this case:
          A 0   [D, B]   
          B  1  [C, F]   # we do not want to show a line to F which is depth 2 
                           but not a merge
          C  1  [H]      # note that this is a long line to show back to the 
                           ancestor - see the end of merge rules.
          D 0   [G, E]
          E  1  [G, F]
          F   2 [G]
          G  1  [H]
          H 0
         - Part of this merges 'branch':
          The parent has the same merge depth and is our left most parent and we
           are not the end of the cluster.
          A 0   [C, B] lines: [B, C]
          B  1  [E, C] lines: [C]
          C 0   [D]    lines: [D]
          D 0   [F, E] lines: [E, F]
          E  1  [F]    lines: [F]
          F 0
         - The end of this merge/cluster:
          we can ONLY have multiple parents at the end of a cluster if this
          branch was previously merged into the 'mainline'.
          - if we have one and only one parent, show it
            Note that this may be to a greater merge depth - for instance if
            this branch continued from a deeply nested branch to add something
            to it.
          - if we have more than one parent - show the second oldest (older ==
            further down the list) parent with
            an equal or lower merge depth
             XXXX revisit when awake. ddaa asks about the relevance of each one
             - maybe more than one parent is relevant
        """
        # a dict of the graph.
        self._graph = dict(graph)
        # if there is an explicit mainline, alter the graph to match. This is
        # easier than checking at every merge whether we are on the mainline and
        # if so which path to take.
        if mainline_revisions is None:
            self._mainline_revisions = []
            self._stop_revision = None
        else:
            self._mainline_revisions = list(mainline_revisions)
            self._stop_revision = self._mainline_revisions[0]
        # skip the first revision, its what we reach and its parents are 
        # therefore irrelevant
        for index, revision in enumerate(self._mainline_revisions[1:]):
            # NB: index 0 means self._mainline_revisions[1]
            # if the mainline matches the graph, nothing to do.
            parent = self._mainline_revisions[index]
            if parent is None:
                # end of mainline_revisions history
                continue
            if self._graph[revision][0] == parent:
                continue
            # remove it from its prior spot
            self._graph[revision].remove(parent)
            # insert it into the start of the mainline
            self._graph[revision].insert(0, parent)
        # we need to do a check late in the process to detect end-of-merges
        # which requires the parents to be accessible: its easier for now
        # to just keep the original graph around.
        self._original_graph = dict(self._graph.items())
        
        # this is a stack storing the depth first search into the graph.
        self._node_name_stack = []
        # at each level of recursion we need the merge depth this node is at:
        self._node_merge_depth_stack = []
        # at each level of 'recursion' we have to check each parent. This
        # stack stores the parents we have not yet checked for the node at the 
        # matching depth in _node_name_stack
        self._pending_parents_stack = []
        # this is a set of the nodes who have been completely analysed for fast
        # membership checking
        self._completed_node_names = set()
        # this is the scheduling of nodes list.
        # Nodes are scheduled
        # from the bottom left of the tree: in the tree
        # A 0  [D, B]
        # B  1 [C]
        # C  1 [D]
        # D 0  [F, E]
        # E  1 [F]
        # F 0
        # the scheduling order is: F, E, D, C, B, A 
        # that is - 'left subtree, right subtree, node'
        # which would mean that when we schedule A we can emit the entire tree.
        self._scheduled_nodes = []
        # This records for each node when we have processed its left most 
        # unmerged subtree. After this subtree is scheduled, all other subtrees
        # have their merge depth increased by one from this nodes merge depth.
        self._left_subtree_done_stack = []

        # seed the search with the tip of the branch
        if branch_tip is not None:
            parents = self._graph.pop(branch_tip)
            self._push_node(branch_tip, 0, parents)

    def sorted(self):
        """Sort the graph and return as a list.
        
        After calling this the sorter is empty and you must create a new one.
        """
        return list(self.iter_topo_order())

    def iter_topo_order(self):
        """Yield the nodes of the graph in a topological order.
        
        After finishing iteration the sorter is empty and you cannot continue
        iteration.
        """
        while self._node_name_stack:
            # loop until this call completes.
            parents_to_visit = self._pending_parents_stack[-1]
            # if all parents are done, the revision is done
            if not parents_to_visit:
                # append the revision to the topo sorted scheduled list:
                # all the nodes parents have been scheduled added, now
                # we can add it to the output.
                self._pop_node()
            else:
                while self._pending_parents_stack[-1]:
                    if not self._left_subtree_done_stack[-1]:
                        # recurse depth first into the primary parent
                        next_node_name = self._pending_parents_stack[-1].pop(0)
                    else:
                        # place any merges in right-to-left order for scheduling
                        # which gives us left-to-right order after we reverse
                        # the scheduled queue. XXX: This has the effect of 
                        # allocating common-new revisions to the right-most
                        # subtree rather than the left most, which will 
                        # display nicely (you get smaller trees at the top
                        # of the combined merge).
                        next_node_name = self._pending_parents_stack[-1].pop()
                    if next_node_name in self._completed_node_names:
                        # this parent was completed by a child on the
                        # call stack. skip it.
                        continue
                    # otherwise transfer it from the source graph into the
                    # top of the current depth first search stack.
                    try:
                        parents = self._graph.pop(next_node_name)
                    except KeyError:
                        # if the next node is not in the source graph it has
                        # already been popped from it and placed into the
                        # current search stack (but not completed or we would
                        # have hit the continue 4 lines up.
                        # this indicates a cycle.
                        raise errors.GraphCycleError(self._node_name_stack)
                    next_merge_depth = 0
                    if self._left_subtree_done_stack[-1]:
                        next_merge_depth = 1
                    else:
                        next_merge_depth = 0
                        self._left_subtree_done_stack[-1] = True
                    next_merge_depth = (
                        self._node_merge_depth_stack[-1] + next_merge_depth)
                    self._push_node(
                        next_node_name,
                        next_merge_depth,
                        parents)
                    # and do not continue processing parents until this 'call' 
                    # has recursed.
                    break
        # We have scheduled the graph. Now deliver the ordered output:
        sequence_number = 0
        while self._scheduled_nodes:
            node_name, merge_depth = self._scheduled_nodes.pop()
            if node_name == self._stop_revision:
                return
            if not len(self._scheduled_nodes):
                end_of_merge = True
            elif self._scheduled_nodes[-1][1] < merge_depth:
                # the next node is to our left
                end_of_merge = True
            elif (self._scheduled_nodes[-1][1] == merge_depth and
                  (self._scheduled_nodes[-1][0] not in
                   self._original_graph[node_name])):
                # the next node was part of a multiple-merge.
                end_of_merge = True
            else:
                end_of_merge = False
            yield (sequence_number, node_name, merge_depth, end_of_merge)
            sequence_number += 1

    def _push_node(self, node_name, merge_depth, parents):
        """Add node_name to the pending node stack.
        
        Names in this stack will get emitted into the output as they are popped
        off the stack.
        """
        self._node_name_stack.append(node_name)
        self._node_merge_depth_stack.append(merge_depth)
        self._left_subtree_done_stack.append(False)
        self._pending_parents_stack.append(list(parents))

    def _pop_node(self):
        """Pop the top node off the stack 

        The node is appended to the sorted output.
        """
        # we are returning from the flattened call frame:
        # pop off the local variables
        node_name = self._node_name_stack.pop()
        merge_depth = self._node_merge_depth_stack.pop()
        self._left_subtree_done_stack.pop()
        self._pending_parents_stack.pop()

        self._completed_node_names.add(node_name)
        self._scheduled_nodes.append((node_name, merge_depth))
        return node_name

1570.1.7 by Robert Collins Replace the slow topo_sort routine with a much faster one for non trivial datasets.	1	# (C) 2005, 2006 Canonical Limited.
1185.16.113 by mbp at sourcefrog Add topo_sort utility function	2	#
	3	# This program is free software; you can redistribute it and/or modify
	4	# it under the terms of the GNU General Public License as published by
	5	# the Free Software Foundation; either version 2 of the License, or
	6	# (at your option) any later version.
	7	#
	8	# This program is distributed in the hope that it will be useful,
	9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	# GNU General Public License for more details.
	12	#
	13	# You should have received a copy of the GNU General Public License
	14	# along with this program; if not, write to the Free Software
	15	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	16
1570.1.7 by Robert Collins Replace the slow topo_sort routine with a much faster one for non trivial datasets.	17
	18	"""Topological sorting routines."""
	19
	20
	21	import bzrlib.errors as errors
	22
1185.16.114 by mbp at sourcefrog Improved topological sort	23
1624.1.2 by Robert Collins Add MergeSort facility to bzrlib.tsort.	24	__all__ = ["topo_sort", "TopoSorter", "merge_sort", "MergeSorter"]
	25
	26
1185.16.114 by mbp at sourcefrog Improved topological sort	27	def topo_sort(graph):
1185.16.113 by mbp at sourcefrog Add topo_sort utility function	28	"""Topological sort a graph.
	29
1185.16.114 by mbp at sourcefrog Improved topological sort	30	graph -- sequence of pairs of node->parents_list.
	31
	32	The result is a list of node names, such that all parents come before
	33	their children.
	34
1185.16.113 by mbp at sourcefrog Add topo_sort utility function	35	node identifiers can be any hashable object, and are typically strings.
	36	"""
1570.1.7 by Robert Collins Replace the slow topo_sort routine with a much faster one for non trivial datasets.	37	return TopoSorter(graph).sorted()
	38
	39
	40	class TopoSorter(object):
	41
	42	def __init__(self, graph):
	43	"""Topological sorting of a graph.
	44
	45	:param graph: sequence of pairs of node_name->parent_names_list.
	46	i.e. [('C', ['B']), ('B', ['A']), ('A', [])]
	47	For this input the output from the sort or
	48	iter_topo_order routines will be:
	49	'A', 'B', 'C'
	50
	51	node identifiers can be any hashable object, and are typically strings.
	52
1587.1.2 by Robert Collins Review comments for reconcile.	53	If you have a graph like [('a', ['b']), ('a', ['c'])] this will only use
	54	one of the two values for 'a'.
	55
1570.1.7 by Robert Collins Replace the slow topo_sort routine with a much faster one for non trivial datasets.	56	The graph is sorted lazily: until you iterate or sort the input is
	57	not processed other than to create an internal representation.
	58
1587.1.3 by Robert Collins Typos for reconcile - docstring in tsort.py was out of sync with code.	59	iteration or sorting may raise GraphCycleError if a cycle is present
1570.1.7 by Robert Collins Replace the slow topo_sort routine with a much faster one for non trivial datasets.	60	in the graph.
	61	"""
	62	# a dict of the graph.
	63	self._graph = dict(graph)
	64	### if debugging:
	65	# self._original_graph = dict(graph)
	66
	67	# this is a stack storing the depth first search into the graph.
	68	self._node_name_stack = []
	69	# at each level of 'recursion' we have to check each parent. This
	70	# stack stores the parents we have not yet checked for the node at the
	71	# matching depth in _node_name_stack
	72	self._pending_parents_stack = []
	73	# this is a set of the completed nodes for fast checking whether a
	74	# parent in a node we are processing on the stack has already been
	75	# emitted and thus can be skipped.
	76	self._completed_node_names = set()
	77
	78	def sorted(self):
	79	"""Sort the graph and return as a list.
	80
	81	After calling this the sorter is empty and you must create a new one.
	82	"""
	83	return list(self.iter_topo_order())
	84
	85	### Useful if fiddling with this code.
	86	### # cross check
	87	### sorted_names = list(self.iter_topo_order())
	88	### for index in range(len(sorted_names)):
	89	### rev = sorted_names[index]
	90	### for left_index in range(index):
	91	### if rev in self.original_graph[sorted_names[left_index]]:
	92	### print "revision in parent list of earlier revision"
	93	### import pdb;pdb.set_trace()
	94
	95	def iter_topo_order(self):
1587.1.3 by Robert Collins Typos for reconcile - docstring in tsort.py was out of sync with code.	96	"""Yield the nodes of the graph in a topological order.
1570.1.7 by Robert Collins Replace the slow topo_sort routine with a much faster one for non trivial datasets.	97
	98	After finishing iteration the sorter is empty and you cannot continue
	99	iteration.
	100	"""
	101	while self._graph:
	102	# now pick a random node in the source graph, and transfer it to the
	103	# top of the depth first search stack.
	104	node_name, parents = self._graph.popitem()
	105	self._push_node(node_name, parents)
	106	while self._node_name_stack:
	107	# loop until this call completes.
	108	parents_to_visit = self._pending_parents_stack[-1]
	109	# if all parents are done, the revision is done
	110	if not parents_to_visit:
	111	# append the revision to the topo sorted list
	112	# all the nodes parents have been added to the output, now
	113	# we can add it to the output.
	114	yield self._pop_node()
	115	else:
	116	while self._pending_parents_stack[-1]:
	117	# recurse depth first into a single parent
	118	next_node_name = self._pending_parents_stack[-1].pop()
	119	if next_node_name in self._completed_node_names:
	120	# this parent was completed by a child on the
	121	# call stack. skip it.
	122	continue
	123	# otherwise transfer it from the source graph into the
	124	# top of the current depth first search stack.
	125	try:
	126	parents = self._graph.pop(next_node_name)
	127	except KeyError:
	128	# if the next node is not in the source graph it has
	129	# already been popped from it and placed into the
	130	# current search stack (but not completed or we would
	131	# have hit the continue 4 lines up.
	132	# this indicates a cycle.
	133	raise errors.GraphCycleError(self._node_name_stack)
	134	self._push_node(next_node_name, parents)
	135	# and do not continue processing parents until this 'call'
	136	# has recursed.
	137	break
	138
	139	def _push_node(self, node_name, parents):
	140	"""Add node_name to the pending node stack.
	141
	142	Names in this stack will get emitted into the output as they are popped
	143	off the stack.
	144	"""
1624.1.2 by Robert Collins Add MergeSort facility to bzrlib.tsort.	145	self._node_name_stack.append(node_name)
	146	self._pending_parents_stack.append(list(parents))
	147
	148	def _pop_node(self):
	149	"""Pop the top node off the stack
	150
	151	The node is appended to the sorted output.
	152	"""
	153	# we are returning from the flattened call frame:
	154	# pop off the local variables
	155	node_name = self._node_name_stack.pop()
	156	self._pending_parents_stack.pop()
	157
	158	self._completed_node_names.add(node_name)
	159	return node_name
	160
	161
1624.1.3 by Robert Collins Convert log to use the new tsort.merge_sort routine.	162	def merge_sort(graph, branch_tip, mainline_revisions=None):
1624.1.2 by Robert Collins Add MergeSort facility to bzrlib.tsort.	163	"""Topological sort a graph which groups merges.
	164
	165	:param graph: sequence of pairs of node->parents_list.
	166	:param branch_tip: the tip of the branch to graph. Revisions not
	167	reachable from branch_tip are not included in the
	168	output.
1624.1.3 by Robert Collins Convert log to use the new tsort.merge_sort routine.	169	:param mainline_revisions: If not None this forces a mainline to be
	170	used rather than synthesised from the graph.
	171	This must be a valid path through some part
	172	of the graph. If the mainline does not cover all
	173	the revisions, output stops at the start of the
	174	old revision listed in the mainline revisions
	175	list.
	176	The order for this parameter is oldest-first.
1624.1.2 by Robert Collins Add MergeSort facility to bzrlib.tsort.	177
	178	The result is a list of node names, such that all parents come before
	179	their children.
	180
	181	node identifiers can be any hashable object, and are typically strings.
	182	"""
1624.1.3 by Robert Collins Convert log to use the new tsort.merge_sort routine.	183	return MergeSorter(graph, branch_tip, mainline_revisions).sorted()
1624.1.2 by Robert Collins Add MergeSort facility to bzrlib.tsort.	184
	185
	186	class MergeSorter(object):
	187
1624.1.3 by Robert Collins Convert log to use the new tsort.merge_sort routine.	188	def __init__(self, graph, branch_tip, mainline_revisions=None):
1624.1.2 by Robert Collins Add MergeSort facility to bzrlib.tsort.	189	"""Merge-aware topological sorting of a graph.
	190
	191	:param graph: sequence of pairs of node_name->parent_names_list.
	192	i.e. [('C', ['B']), ('B', ['A']), ('A', [])]
	193	For this input the output from the sort or
	194	iter_topo_order routines will be:
	195	'A', 'B', 'C'
1624.1.3 by Robert Collins Convert log to use the new tsort.merge_sort routine.	196	:param branch_tip: the tip of the branch to graph. Revisions not
	197	reachable from branch_tip are not included in the
	198	output.
	199	:param mainline_revisions: If not None this forces a mainline to be
	200	used rather than synthesised from the graph.
	201	This must be a valid path through some part
	202	of the graph. If the mainline does not cover all
	203	the revisions, output stops at the start of the
	204	old revision listed in the mainline revisions
	205	list.
	206	The order for this parameter is oldest-first.
	207
1624.1.2 by Robert Collins Add MergeSort facility to bzrlib.tsort.	208
	209	node identifiers can be any hashable object, and are typically strings.
	210
	211	If you have a graph like [('a', ['b']), ('a', ['c'])] this will only use
	212	one of the two values for 'a'.
	213
	214	The graph is sorted lazily: until you iterate or sort the input is
	215	not processed other than to create an internal representation.
	216
	217	iteration or sorting may raise GraphCycleError if a cycle is present
	218	in the graph.
	219
	220	Background information on the design:
	221	-------------------------------------
	222	definition: the end of any cluster or 'merge' occurs when:
	223	1 - the next revision has a lower merge depth than we do.
	224	i.e.
	225	A 0
	226	B 1
	227	C 2
	228	D 1
	229	E 0
	230	C, D are the ends of clusters, E might be but we need more data.
	231	2 - or the next revision at our merge depth is not our left most
	232	ancestor.
	233	This is required to handle multiple-merges in one commit.
	234	i.e.
	235	A 0 [F, B, E]
	236	B 1 [D, C]
	237	C 2 [D]
	238	D 1 [F]
	239	E 1 [F]
	240	F 0
	241	C is the end of a cluster due to rule 1.
	242	D is not the end of a cluster from rule 1, but is from rule 2: E
	243	is not its left most ancestor
	244	E is the end of a cluster due to rule 1
	245	F might be but we need more data.
	246
	247	we show connecting lines to a parent when:
	248	- The parent is the start of a merge within this cluster.
	249	That is, the merge was not done to the mainline before this cluster
	250	was merged to the mainline.
	251	This can be detected thus:
	252	* The parent has a higher merge depth and is the next revision in
	253	the list.
	254
	255	The next revision in the list constraint is needed for this case:
	256	A 0 [D, B]
	257	B 1 [C, F] # we do not want to show a line to F which is depth 2
	258	but not a merge
	259	C 1 [H] # note that this is a long line to show back to the
	260	ancestor - see the end of merge rules.
	261	D 0 [G, E]
	262	E 1 [G, F]
	263	F 2 [G]
	264	G 1 [H]
	265	H 0
	266	- Part of this merges 'branch':
	267	The parent has the same merge depth and is our left most parent and we
	268	are not the end of the cluster.
	269	A 0 [C, B] lines: [B, C]
	270	B 1 [E, C] lines: [C]
	271	C 0 [D] lines: [D]
272	D 0 [F, E] lines: [E, F]
273	E 1 [F] lines: [F]
274	F 0
275	- The end of this merge/cluster:
276	we can ONLY have multiple parents at the end of a cluster if this
277	branch was previously merged into the 'mainline'.
278	- if we have one and only one parent, show it
279	Note that this may be to a greater merge depth - for instance if
280	this branch continued from a deeply nested branch to add something
281	to it.
282	- if we have more than one parent - show the second oldest (older ==
283	further down the list) parent with
284	an equal or lower merge depth
285	XXXX revisit when awake. ddaa asks about the relevance of each one
286	- maybe more than one parent is relevant
287	"""
288	# a dict of the graph.
289	self._graph = dict(graph)
1624.1.3 by Robert Collins Convert log to use the new tsort.merge_sort routine.	290	# if there is an explicit mainline, alter the graph to match. This is
	291	# easier than checking at every merge whether we are on the mainline and
	292	# if so which path to take.
	293	if mainline_revisions is None:
	294	self._mainline_revisions = []
	295	self._stop_revision = None
	296	else:
	297	self._mainline_revisions = list(mainline_revisions)
	298	self._stop_revision = self._mainline_revisions[0]
	299	# skip the first revision, its what we reach and its parents are
	300	# therefore irrelevant
	301	for index, revision in enumerate(self._mainline_revisions[1:]):
	302	# NB: index 0 means self._mainline_revisions[1]
	303	# if the mainline matches the graph, nothing to do.
	304	parent = self._mainline_revisions[index]
	305	if parent is None:
	306	# end of mainline_revisions history
	307	continue
	308	if self._graph[revision][0] == parent:
	309	continue
	310	# remove it from its prior spot
	311	self._graph[revision].remove(parent)
	312	# insert it into the start of the mainline
	313	self._graph[revision].insert(0, parent)
1624.1.2 by Robert Collins Add MergeSort facility to bzrlib.tsort.	314	# we need to do a check late in the process to detect end-of-merges
	315	# which requires the parents to be accessible: its easier for now
	316	# to just keep the original graph around.
1624.1.3 by Robert Collins Convert log to use the new tsort.merge_sort routine.	317	self._original_graph = dict(self._graph.items())
1624.1.2 by Robert Collins Add MergeSort facility to bzrlib.tsort.	318
	319	# this is a stack storing the depth first search into the graph.
	320	self._node_name_stack = []
	321	# at each level of recursion we need the merge depth this node is at:
	322	self._node_merge_depth_stack = []
	323	# at each level of 'recursion' we have to check each parent. This
	324	# stack stores the parents we have not yet checked for the node at the
	325	# matching depth in _node_name_stack
	326	self._pending_parents_stack = []
	327	# this is a set of the nodes who have been completely analysed for fast
	328	# membership checking
	329	self._completed_node_names = set()
	330	# this is the scheduling of nodes list.
	331	# Nodes are scheduled
	332	# from the bottom left of the tree: in the tree
	333	# A 0 [D, B]
	334	# B 1 [C]
	335	# C 1 [D]
	336	# D 0 [F, E]
	337	# E 1 [F]
	338	# F 0
	339	# the scheduling order is: F, E, D, C, B, A
	340	# that is - 'left subtree, right subtree, node'
	341	# which would mean that when we schedule A we can emit the entire tree.
	342	self._scheduled_nodes = []
	343	# This records for each node when we have processed its left most
	344	# unmerged subtree. After this subtree is scheduled, all other subtrees
	345	# have their merge depth increased by one from this nodes merge depth.
	346	self._left_subtree_done_stack = []
	347
	348	# seed the search with the tip of the branch
	349	if branch_tip is not None:
	350	parents = self._graph.pop(branch_tip)
	351	self._push_node(branch_tip, 0, parents)
	352
	353	def sorted(self):
	354	"""Sort the graph and return as a list.
	355
	356	After calling this the sorter is empty and you must create a new one.
	357	"""
	358	return list(self.iter_topo_order())
	359
	360	def iter_topo_order(self):
	361	"""Yield the nodes of the graph in a topological order.
	362
	363	After finishing iteration the sorter is empty and you cannot continue
	364	iteration.
	365	"""
	366	while self._node_name_stack:
	367	# loop until this call completes.
	368	parents_to_visit = self._pending_parents_stack[-1]
	369	# if all parents are done, the revision is done
	370	if not parents_to_visit:
	371	# append the revision to the topo sorted scheduled list:
	372	# all the nodes parents have been scheduled added, now
	373	# we can add it to the output.
	374	self._pop_node()
	375	else:
	376	while self._pending_parents_stack[-1]:
	377	if not self._left_subtree_done_stack[-1]:
	378	# recurse depth first into the primary parent
	379	next_node_name = self._pending_parents_stack[-1].pop(0)
	380	else:
	381	# place any merges in right-to-left order for scheduling
382	# which gives us left-to-right order after we reverse
383	# the scheduled queue. XXX: This has the effect of
384	# allocating common-new revisions to the right-most
385	# subtree rather than the left most, which will
386	# display nicely (you get smaller trees at the top
387	# of the combined merge).
388	next_node_name = self._pending_parents_stack[-1].pop()
389	if next_node_name in self._completed_node_names:
390	# this parent was completed by a child on the
391	# call stack. skip it.
392	continue
393	# otherwise transfer it from the source graph into the
394	# top of the current depth first search stack.
395	try:
396	parents = self._graph.pop(next_node_name)
397	except KeyError:
398	# if the next node is not in the source graph it has
399	# already been popped from it and placed into the
400	# current search stack (but not completed or we would
401	# have hit the continue 4 lines up.
402	# this indicates a cycle.
403	raise errors.GraphCycleError(self._node_name_stack)
404	next_merge_depth = 0
405	if self._left_subtree_done_stack[-1]:
406	next_merge_depth = 1
407	else:
408	next_merge_depth = 0
409	self._left_subtree_done_stack[-1] = True
410	next_merge_depth = (
411	self._node_merge_depth_stack[-1] + next_merge_depth)
412	self._push_node(
413	next_node_name,
414	next_merge_depth,
415	parents)
416	# and do not continue processing parents until this 'call'
417	# has recursed.
418	break
419	# We have scheduled the graph. Now deliver the ordered output:
420	sequence_number = 0
421	while self._scheduled_nodes:
422	node_name, merge_depth = self._scheduled_nodes.pop()
1624.1.3 by Robert Collins Convert log to use the new tsort.merge_sort routine.	423	if node_name == self._stop_revision:
	424	return
1624.1.2 by Robert Collins Add MergeSort facility to bzrlib.tsort.	425	if not len(self._scheduled_nodes):
	426	end_of_merge = True
	427	elif self._scheduled_nodes[-1][1] < merge_depth:
	428	# the next node is to our left
	429	end_of_merge = True
	430	elif (self._scheduled_nodes[-1][1] == merge_depth and
	431	(self._scheduled_nodes[-1][0] not in
	432	self._original_graph[node_name])):
	433	# the next node was part of a multiple-merge.
	434	end_of_merge = True
	435	else:
	436	end_of_merge = False
	437	yield (sequence_number, node_name, merge_depth, end_of_merge)
	438	sequence_number += 1
	439
	440	def _push_node(self, node_name, merge_depth, parents):
	441	"""Add node_name to the pending node stack.
	442
	443	Names in this stack will get emitted into the output as they are popped
	444	off the stack.
	445	"""
	446	self._node_name_stack.append(node_name)
	447	self._node_merge_depth_stack.append(merge_depth)
	448	self._left_subtree_done_stack.append(False)
	449	self._pending_parents_stack.append(list(parents))
	450
	451	def _pop_node(self):
	452	"""Pop the top node off the stack
	453
	454	The node is appended to the sorted output.
	455	"""
	456	# we are returning from the flattened call frame:
	457	# pop off the local variables
	458	node_name = self._node_name_stack.pop()
	459	merge_depth = self._node_merge_depth_stack.pop()
	460	self._left_subtree_done_stack.pop()
	461	self._pending_parents_stack.pop()
	462
	463	self._completed_node_names.add(node_name)
	464	self._scheduled_nodes.append((node_name, merge_depth))
1570.1.7 by Robert Collins Replace the slow topo_sort routine with a much faster one for non trivial datasets.	465	return node_name