495
330
node = <_KnownGraphNode>temp_node
496
331
if not node.seen:
497
332
PyList_Append(heads, node.key)
498
heads = frozenset(heads)
333
heads = PyFrozenSet_New(heads)
499
334
for pos from 0 <= pos < PyList_GET_SIZE(cleanup):
500
335
node = _get_list_node(cleanup, pos)
502
337
if self.do_cache:
503
338
PyDict_SetItem(self._known_heads, heads_key, heads)
507
"""Return the nodes in topological order.
509
All parents must occur before all children.
511
# This is, for the most part, the same iteration order that we used for
512
# _find_gdfo, consider finding a way to remove the duplication
513
# In general, we find the 'tails' (nodes with no parents), and then
514
# walk to the children. For children that have all of their parents
515
# yielded, we queue up the child to be yielded as well.
516
cdef _KnownGraphNode node
517
cdef _KnownGraphNode child
521
cdef Py_ssize_t last_item
523
pending = self._find_tails()
524
if PyList_GET_SIZE(pending) == 0 and len(self._nodes) > 0:
525
raise errors.GraphCycleError(self._nodes)
529
last_item = PyList_GET_SIZE(pending) - 1
530
while last_item >= 0:
531
# Avoid pop followed by push, instead, peek, and replace
532
# timing shows this is 930ms => 770ms for OOo
533
node = _get_list_node(pending, last_item)
534
last_item = last_item - 1
535
if node.parents is not None:
536
# We don't include ghost parents
537
PyList_Append(topo_order, node.key)
538
for pos from 0 <= pos < PyList_GET_SIZE(node.children):
539
child = _get_list_node(node.children, pos)
541
# We know we have a graph cycle because a node has a parent
542
# which we couldn't find
543
raise errors.GraphCycleError(self._nodes)
544
child.seen = child.seen + 1
545
if child.seen == PyTuple_GET_SIZE(child.parents):
546
# All parents of this child have been yielded, queue this
547
# one to be yielded as well
548
last_item = last_item + 1
549
if last_item < PyList_GET_SIZE(pending):
550
Py_INCREF(child) # SetItem steals a ref
551
PyList_SetItem(pending, last_item, child)
553
PyList_Append(pending, child)
554
# We have queued this node, we don't need to track it
557
# We started from the parents, so we don't need to do anymore work
561
"""Return a reverse topological ordering which is 'stable'.
563
There are a few constraints:
564
1) Reverse topological (all children before all parents)
566
3) 'stable' sorting, so that we get the same result, independent of
567
machine, or extra data.
568
To do this, we use the same basic algorithm as topo_sort, but when we
569
aren't sure what node to access next, we sort them lexicographically.
572
cdef Py_ssize_t pos, last_item
573
cdef _KnownGraphNode node, node2, parent_node
575
tips = self._find_tips()
576
# Split the tips based on prefix
578
for pos from 0 <= pos < PyList_GET_SIZE(tips):
579
node = _get_list_node(tips, pos)
580
if PyString_CheckExact(node.key) or len(node.key) == 1:
584
temp = PyDict_GetItem(prefix_tips, prefix)
586
prefix_tips[prefix] = [node]
588
tip_nodes = <object>temp
589
PyList_Append(tip_nodes, node)
592
for prefix in sorted(prefix_tips):
593
temp = PyDict_GetItem(prefix_tips, prefix)
595
tip_nodes = <object>temp
596
pending = _sort_list_nodes(tip_nodes, 1)
597
last_item = PyList_GET_SIZE(pending) - 1
598
while last_item >= 0:
599
node = _get_list_node(pending, last_item)
600
last_item = last_item - 1
601
if node.parents is None:
604
PyList_Append(result, node.key)
605
# Sorting the parent keys isn't strictly necessary for stable
606
# sorting of a given graph. But it does help minimize the
607
# differences between graphs
608
# For bzr.dev ancestry:
610
# 7.73ms RichCompareBool sort
611
parents = _sort_list_nodes(node.parents, 1)
612
for pos from 0 <= pos < len(parents):
613
if PyTuple_CheckExact(parents):
614
parent_node = _get_tuple_node(parents, pos)
616
parent_node = _get_list_node(parents, pos)
617
# TODO: GraphCycle detection
618
parent_node.seen = parent_node.seen + 1
620
== PyList_GET_SIZE(parent_node.children)):
621
# All children have been processed, queue up this
623
last_item = last_item + 1
624
if last_item < PyList_GET_SIZE(pending):
625
Py_INCREF(parent_node) # SetItem steals a ref
626
PyList_SetItem(pending, last_item, parent_node)
628
PyList_Append(pending, parent_node)
632
def merge_sort(self, tip_key):
633
"""Compute the merge sorted graph output."""
634
cdef _MergeSorter sorter
636
# TODO: consider disabling gc since we are allocating a lot of nodes
637
# that won't be collectable anyway. real world testing has not
638
# shown a specific impact, yet.
639
sorter = _MergeSorter(self, tip_key)
640
return sorter.topo_order()
642
def get_parent_keys(self, key):
643
"""Get the parents for a key
645
Returns a list containg the parents keys. If the key is a ghost,
646
None is returned. A KeyError will be raised if the key is not in
649
:param keys: Key to check (eg revision_id)
650
:return: A list of parents
652
return self._nodes[key].parent_keys
654
def get_child_keys(self, key):
655
"""Get the children for a key
657
Returns a list containg the children keys. A KeyError will be raised
658
if the key is not in the graph.
660
:param keys: Key to check (eg revision_id)
661
:return: A list of children
663
return self._nodes[key].child_keys
666
cdef class _MergeSortNode:
667
"""Tracks information about a node during the merge_sort operation."""
670
cdef public object key
671
cdef public long merge_depth
672
cdef public object end_of_merge # True/False Is this the end of the current merge
674
# Private api, used while computing the information
675
cdef _KnownGraphNode left_parent
676
cdef _KnownGraphNode left_pending_parent
677
cdef object pending_parents # list of _KnownGraphNode for non-left parents
678
cdef long _revno_first
679
cdef long _revno_second
680
cdef long _revno_last
681
# TODO: turn these into flag/bit fields rather than individual members
682
cdef int is_first_child # Is this the first child?
683
cdef int seen_by_child # A child node has seen this parent
684
cdef int completed # Fully Processed
686
def __init__(self, key):
688
self.merge_depth = -1
689
self.left_parent = None
690
self.left_pending_parent = None
691
self.pending_parents = None
692
self._revno_first = -1
693
self._revno_second = -1
694
self._revno_last = -1
695
self.is_first_child = 0
696
self.seen_by_child = 0
700
return '%s(%s depth:%s rev:%s,%s,%s first:%s seen:%s)' % (
701
self.__class__.__name__, self.key,
703
self._revno_first, self._revno_second, self._revno_last,
704
self.is_first_child, self.seen_by_child)
706
cdef int has_pending_parents(self): # cannot_raise
707
if self.left_pending_parent is not None or self.pending_parents:
711
cdef object _revno(self):
712
if self._revno_first == -1:
713
if self._revno_second != -1:
714
raise RuntimeError('Something wrong with: %s' % (self,))
715
return (self._revno_last,)
717
return (self._revno_first, self._revno_second, self._revno_last)
724
cdef class _MergeSorter:
725
"""This class does the work of computing the merge_sort ordering.
727
We have some small advantages, in that we get all the extra information
728
that KnownGraph knows, like knowing the child lists, etc.
731
# Current performance numbers for merge_sort(bzr_dev_parent_map):
732
# 302ms tsort.merge_sort()
733
# 91ms graph.KnownGraph().merge_sort()
734
# 40ms kg.merge_sort()
736
cdef KnownGraph graph
737
cdef object _depth_first_stack # list
738
cdef Py_ssize_t _last_stack_item # offset to last item on stack
739
# cdef object _ms_nodes # dict of key => _MergeSortNode
740
cdef object _revno_to_branch_count # {revno => num child branches}
741
cdef object _scheduled_nodes # List of nodes ready to be yielded
743
def __init__(self, known_graph, tip_key):
744
cdef _KnownGraphNode node
746
self.graph = known_graph
747
# self._ms_nodes = {}
748
self._revno_to_branch_count = {}
749
self._depth_first_stack = []
750
self._last_stack_item = -1
751
self._scheduled_nodes = []
752
if (tip_key is not None and tip_key != NULL_REVISION
753
and tip_key != (NULL_REVISION,)):
754
node = self.graph._nodes[tip_key]
755
self._push_node(node, 0)
757
cdef _MergeSortNode _get_ms_node(self, _KnownGraphNode node):
758
cdef PyObject *temp_node
759
cdef _MergeSortNode ms_node
761
if node.extra is None:
762
ms_node = _MergeSortNode(node.key)
765
ms_node = <_MergeSortNode>node.extra
768
cdef _push_node(self, _KnownGraphNode node, long merge_depth):
769
cdef _KnownGraphNode parent_node
770
cdef _MergeSortNode ms_node, ms_parent_node
773
ms_node = self._get_ms_node(node)
774
ms_node.merge_depth = merge_depth
775
if node.parents is None:
776
raise RuntimeError('ghost nodes should not be pushed'
777
' onto the stack: %s' % (node,))
778
if PyTuple_GET_SIZE(node.parents) > 0:
779
parent_node = _get_tuple_node(node.parents, 0)
780
ms_node.left_parent = parent_node
781
if parent_node.parents is None: # left-hand ghost
782
ms_node.left_pending_parent = None
783
ms_node.left_parent = None
785
ms_node.left_pending_parent = parent_node
786
if PyTuple_GET_SIZE(node.parents) > 1:
787
ms_node.pending_parents = []
788
for pos from 1 <= pos < PyTuple_GET_SIZE(node.parents):
789
parent_node = _get_tuple_node(node.parents, pos)
790
if parent_node.parents is None: # ghost
792
PyList_Append(ms_node.pending_parents, parent_node)
794
ms_node.is_first_child = 1
795
if ms_node.left_parent is not None:
796
ms_parent_node = self._get_ms_node(ms_node.left_parent)
797
if ms_parent_node.seen_by_child:
798
ms_node.is_first_child = 0
799
ms_parent_node.seen_by_child = 1
800
self._last_stack_item = self._last_stack_item + 1
801
if self._last_stack_item < PyList_GET_SIZE(self._depth_first_stack):
802
Py_INCREF(node) # SetItem steals a ref
803
PyList_SetItem(self._depth_first_stack, self._last_stack_item,
806
PyList_Append(self._depth_first_stack, node)
808
cdef _pop_node(self):
810
cdef _MergeSortNode ms_node, ms_parent_node, ms_prev_node
811
cdef _KnownGraphNode node, parent_node, prev_node
813
node = _get_list_node(self._depth_first_stack, self._last_stack_item)
814
ms_node = <_MergeSortNode>node.extra
815
self._last_stack_item = self._last_stack_item - 1
816
if ms_node.left_parent is not None:
817
# Assign the revision number from the left-hand parent
818
ms_parent_node = <_MergeSortNode>ms_node.left_parent.extra
819
if ms_node.is_first_child:
820
# First child just increments the final digit
821
ms_node._revno_first = ms_parent_node._revno_first
822
ms_node._revno_second = ms_parent_node._revno_second
823
ms_node._revno_last = ms_parent_node._revno_last + 1
825
# Not the first child, make a new branch
826
# (mainline_revno, branch_count, 1)
827
if ms_parent_node._revno_first == -1:
828
# Mainline ancestor, the increment is on the last digit
829
base_revno = ms_parent_node._revno_last
831
base_revno = ms_parent_node._revno_first
832
temp = PyDict_GetItem(self._revno_to_branch_count,
837
branch_count = (<object>temp) + 1
838
PyDict_SetItem(self._revno_to_branch_count, base_revno,
840
ms_node._revno_first = base_revno
841
ms_node._revno_second = branch_count
842
ms_node._revno_last = 1
844
temp = PyDict_GetItem(self._revno_to_branch_count, 0)
846
# The first root node doesn't have a 3-digit revno
848
ms_node._revno_first = -1
849
ms_node._revno_second = -1
850
ms_node._revno_last = 1
852
root_count = (<object>temp) + 1
853
ms_node._revno_first = 0
854
ms_node._revno_second = root_count
855
ms_node._revno_last = 1
856
PyDict_SetItem(self._revno_to_branch_count, 0, root_count)
857
ms_node.completed = 1
858
if PyList_GET_SIZE(self._scheduled_nodes) == 0:
859
# The first scheduled node is always the end of merge
860
ms_node.end_of_merge = True
862
prev_node = _get_list_node(self._scheduled_nodes,
863
PyList_GET_SIZE(self._scheduled_nodes) - 1)
864
ms_prev_node = <_MergeSortNode>prev_node.extra
865
if ms_prev_node.merge_depth < ms_node.merge_depth:
866
# The previously pushed node is to our left, so this is the end
867
# of this right-hand chain
868
ms_node.end_of_merge = True
869
elif (ms_prev_node.merge_depth == ms_node.merge_depth
870
and prev_node not in node.parents):
871
# The next node is not a direct parent of this node
872
ms_node.end_of_merge = True
874
ms_node.end_of_merge = False
875
PyList_Append(self._scheduled_nodes, node)
877
cdef _schedule_stack(self):
878
cdef _KnownGraphNode last_node, next_node
879
cdef _MergeSortNode ms_node, ms_last_node, ms_next_node
880
cdef long next_merge_depth
882
while self._last_stack_item >= 0:
883
# Peek at the last item on the stack
884
last_node = _get_list_node(self._depth_first_stack,
885
self._last_stack_item)
886
if last_node.gdfo == -1:
887
# if _find_gdfo skipped a node, that means there is a graph
888
# cycle, error out now
889
raise errors.GraphCycleError(self.graph._nodes)
890
ms_last_node = <_MergeSortNode>last_node.extra
891
if not ms_last_node.has_pending_parents():
892
# Processed all parents, pop this node
895
while ms_last_node.has_pending_parents():
896
if ms_last_node.left_pending_parent is not None:
897
# recurse depth first into the primary parent
898
next_node = ms_last_node.left_pending_parent
899
ms_last_node.left_pending_parent = None
901
# place any merges in right-to-left order for scheduling
902
# which gives us left-to-right order after we reverse
903
# the scheduled queue.
904
# Note: This has the effect of allocating common-new
905
# revisions to the right-most subtree rather than the
906
# left most, which will display nicely (you get
907
# smaller trees at the top of the combined merge).
908
next_node = ms_last_node.pending_parents.pop()
909
ms_next_node = self._get_ms_node(next_node)
910
if ms_next_node.completed:
911
# this parent was completed by a child on the
912
# call stack. skip it.
914
# otherwise transfer it from the source graph into the
915
# top of the current depth first search stack.
917
if next_node is ms_last_node.left_parent:
918
next_merge_depth = ms_last_node.merge_depth
920
next_merge_depth = ms_last_node.merge_depth + 1
921
self._push_node(next_node, next_merge_depth)
922
# and do not continue processing parents until this 'call'
926
cdef topo_order(self):
927
cdef _MergeSortNode ms_node
928
cdef _KnownGraphNode node
930
cdef PyObject *temp_key, *temp_node
932
# Note: allocating a _MergeSortNode and deallocating it for all nodes
933
# costs approx 8.52ms (21%) of the total runtime
934
# We might consider moving the attributes into the base
936
self._schedule_stack()
938
# We've set up the basic schedule, now we can continue processing the
940
# Note: This final loop costs us 40.0ms => 28.8ms (11ms, 25%) on
941
# bzr.dev, to convert the internal Object representation into a
942
# Tuple representation...
943
# 2ms is walking the data and computing revno tuples
944
# 7ms is computing the return tuple
945
# 4ms is PyList_Append()
947
# output the result in reverse order, and separate the generated info
948
for pos from PyList_GET_SIZE(self._scheduled_nodes) > pos >= 0:
949
node = _get_list_node(self._scheduled_nodes, pos)
950
ms_node = <_MergeSortNode>node.extra
951
PyList_Append(ordered, ms_node)
953
# Clear out the scheduled nodes now that we're done
954
self._scheduled_nodes = []