1
# Copyright (C) 2009 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Implementation of Graph algorithms when we have already loaded everything.
20
cdef extern from "python-compat.h":
23
cdef extern from "Python.h":
24
ctypedef int Py_ssize_t
25
ctypedef struct PyObject:
28
object PyTuple_New(Py_ssize_t n)
29
Py_ssize_t PyTuple_GET_SIZE(object t)
30
PyObject * PyTuple_GET_ITEM(object t, Py_ssize_t o)
31
void PyTuple_SET_ITEM(object t, Py_ssize_t o, object v)
33
Py_ssize_t PyList_GET_SIZE(object l)
34
PyObject * PyList_GET_ITEM(object l, Py_ssize_t o)
35
int PyList_SetItem(object l, Py_ssize_t o, object l) except -1
36
int PyList_Append(object l, object v) except -1
38
int PyDict_CheckExact(object d)
39
Py_ssize_t PyDict_Size(object d) except -1
40
PyObject * PyDict_GetItem(object d, object k)
41
int PyDict_SetItem(object d, object k, object v) except -1
42
int PyDict_DelItem(object d, object k) except -1
43
int PyDict_Next(object d, Py_ssize_t *pos, PyObject **k, PyObject **v)
45
void Py_INCREF(object)
49
from bzrlib import errors, revision
51
cdef object NULL_REVISION
52
NULL_REVISION = revision.NULL_REVISION
55
cdef class _KnownGraphNode:
56
"""Represents a single object in the known graph."""
65
def __init__(self, key):
70
# Greatest distance from origin
77
cdef _KnownGraphNode child
80
for child in self.children:
81
PyList_Append(keys, child.key)
84
cdef clear_references(self):
89
cdef _KnownGraphNode node
92
if self.parents is not None:
93
for node in self.parents:
94
parent_keys.append(node.key)
96
if self.children is not None:
97
for node in self.children:
98
child_keys.append(node.key)
99
return '%s(%s gdfo:%s par:%s child:%s)' % (
100
self.__class__.__name__, self.key, self.gdfo,
101
parent_keys, child_keys)
104
cdef _KnownGraphNode _get_list_node(lst, Py_ssize_t pos):
105
cdef PyObject *temp_node
107
temp_node = PyList_GET_ITEM(lst, pos)
108
return <_KnownGraphNode>temp_node
111
cdef _KnownGraphNode _get_parent(parents, Py_ssize_t pos):
112
cdef PyObject *temp_node
113
cdef _KnownGraphNode node
115
temp_node = PyTuple_GET_ITEM(parents, pos)
116
return <_KnownGraphNode>temp_node
119
cdef class _MergeSorter
121
cdef class KnownGraph:
122
"""This is a class which assumes we already know the full graph."""
124
cdef public object _nodes
125
cdef object _known_heads
126
cdef public int do_cache
128
def __init__(self, parent_map, do_cache=True):
129
"""Create a new KnownGraph instance.
131
:param parent_map: A dictionary mapping key => parent_keys
133
# tests at pre-allocating the node dict actually slowed things down
135
# Maps {sorted(revision_id, revision_id): heads}
136
self._known_heads = {}
137
self.do_cache = int(do_cache)
138
# TODO: consider disabling gc since we are allocating a lot of nodes
139
# that won't be collectable anyway. real world testing has not
140
# shown a specific impact, yet.
141
self._initialize_nodes(parent_map)
144
def __dealloc__(self):
145
cdef _KnownGraphNode child
147
cdef PyObject *temp_node
149
while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
150
child = <_KnownGraphNode>temp_node
151
child.clear_references()
153
cdef _KnownGraphNode _get_or_create_node(self, key):
154
cdef PyObject *temp_node
155
cdef _KnownGraphNode node
157
temp_node = PyDict_GetItem(self._nodes, key)
158
if temp_node == NULL:
159
node = _KnownGraphNode(key)
160
PyDict_SetItem(self._nodes, key, node)
162
node = <_KnownGraphNode>temp_node
165
def _initialize_nodes(self, parent_map):
166
"""Populate self._nodes.
168
After this has finished:
169
- self._nodes will have an entry for every entry in parent_map.
170
- ghosts will have a parent_keys = None,
171
- all nodes found will also have child_keys populated with all known
174
cdef PyObject *temp_key, *temp_parent_keys, *temp_node
175
cdef Py_ssize_t pos, pos2, num_parent_keys
176
cdef _KnownGraphNode node
177
cdef _KnownGraphNode parent_node
179
if not PyDict_CheckExact(parent_map):
180
raise TypeError('parent_map should be a dict of {key:parent_keys}')
181
# for key, parent_keys in parent_map.iteritems():
183
while PyDict_Next(parent_map, &pos, &temp_key, &temp_parent_keys):
184
key = <object>temp_key
185
parent_keys = <object>temp_parent_keys
186
num_parent_keys = len(parent_keys)
187
node = self._get_or_create_node(key)
188
# We know how many parents, so we pre allocate the tuple
189
parent_nodes = PyTuple_New(num_parent_keys)
190
for pos2 from 0 <= pos2 < num_parent_keys:
191
# Note: it costs us 10ms out of 40ms to lookup all of these
192
# parents, it doesn't seem to be an allocation overhead,
193
# but rather a lookup overhead. There doesn't seem to be
194
# a way around it, and that is one reason why
195
# KnownGraphNode maintains a direct pointer to the parent
197
# We use [] because parent_keys may be a tuple or list
198
parent_node = self._get_or_create_node(parent_keys[pos2])
199
# PyTuple_SET_ITEM will steal a reference, so INCREF first
200
Py_INCREF(parent_node)
201
PyTuple_SET_ITEM(parent_nodes, pos2, parent_node)
202
PyList_Append(parent_node.children, node)
203
node.parents = parent_nodes
205
def _find_tails(self):
206
cdef PyObject *temp_node
207
cdef _KnownGraphNode node
212
while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
213
node = <_KnownGraphNode>temp_node
214
if node.parents is None or PyTuple_GET_SIZE(node.parents) == 0:
216
PyList_Append(tails, node)
219
def _find_gdfo(self):
220
cdef _KnownGraphNode node
221
cdef _KnownGraphNode child
225
cdef Py_ssize_t last_item
228
pending = self._find_tails()
230
last_item = PyList_GET_SIZE(pending) - 1
231
while last_item >= 0:
232
# Avoid pop followed by push, instead, peek, and replace
233
# timing shows this is 930ms => 770ms for OOo
234
node = _get_list_node(pending, last_item)
235
last_item = last_item - 1
236
next_gdfo = node.gdfo + 1
237
for pos from 0 <= pos < PyList_GET_SIZE(node.children):
238
child = _get_list_node(node.children, pos)
239
if next_gdfo > child.gdfo:
240
child.gdfo = next_gdfo
241
child.seen = child.seen + 1
242
if child.seen == PyTuple_GET_SIZE(child.parents):
243
# This child is populated, queue it to be walked
244
last_item = last_item + 1
245
if last_item < PyList_GET_SIZE(pending):
246
Py_INCREF(child) # SetItem steals a ref
247
PyList_SetItem(pending, last_item, child)
249
PyList_Append(pending, child)
250
# We have queued this node, we don't need to track it
254
def heads(self, keys):
255
"""Return the heads from amongst keys.
257
This is done by searching the ancestries of each key. Any key that is
258
reachable from another key is not returned; all the others are.
260
This operation scales with the relative depth between any two keys. It
261
uses gdfo to avoid walking all ancestry.
263
:param keys: An iterable of keys.
264
:return: A set of the heads. Note that as a set there is no ordering
265
information. Callers will need to filter their input to create
266
order if they need it.
268
cdef PyObject *maybe_node
269
cdef PyObject *maybe_heads
270
cdef PyObject *temp_node
271
cdef _KnownGraphNode node
272
cdef Py_ssize_t pos, last_item
275
heads_key = frozenset(keys)
276
maybe_heads = PyDict_GetItem(self._known_heads, heads_key)
277
if maybe_heads != NULL:
278
return <object>maybe_heads
279
# Not cached, compute it ourselves
282
maybe_node = PyDict_GetItem(self._nodes, key)
283
if maybe_node == NULL:
284
raise KeyError('key %s not in nodes' % (key,))
285
PyDict_SetItem(candidate_nodes, key, <object>maybe_node)
286
maybe_node = PyDict_GetItem(candidate_nodes, NULL_REVISION)
287
if maybe_node != NULL:
288
# NULL_REVISION is only a head if it is the only entry
289
candidate_nodes.pop(NULL_REVISION)
290
if not candidate_nodes:
291
return frozenset([NULL_REVISION])
292
# The keys changed, so recalculate heads_key
293
heads_key = frozenset(candidate_nodes)
294
if PyDict_Size(candidate_nodes) < 2:
299
# we know a gdfo cannot be longer than a linear chain of all nodes
300
min_gdfo = PyDict_Size(self._nodes) + 1
301
# Build up nodes that need to be walked, note that starting nodes are
302
# not added to seen()
304
while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node):
305
node = <_KnownGraphNode>temp_node
306
if node.parents is not None:
307
pending.extend(node.parents)
308
if node.gdfo < min_gdfo:
311
# Now do all the real work
312
last_item = PyList_GET_SIZE(pending) - 1
313
while last_item >= 0:
314
node = _get_list_node(pending, last_item)
315
last_item = last_item - 1
317
# node already appears in some ancestry
319
PyList_Append(cleanup, node)
321
if node.gdfo <= min_gdfo:
323
if node.parents is not None and PyTuple_GET_SIZE(node.parents) > 0:
324
for pos from 0 <= pos < PyTuple_GET_SIZE(node.parents):
325
parent_node = _get_parent(node.parents, pos)
326
last_item = last_item + 1
327
if last_item < PyList_GET_SIZE(pending):
328
Py_INCREF(parent_node) # SetItem steals a ref
329
PyList_SetItem(pending, last_item, parent_node)
331
PyList_Append(pending, parent_node)
334
while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node):
335
node = <_KnownGraphNode>temp_node
337
PyList_Append(heads, node.key)
338
heads = frozenset(heads)
339
for pos from 0 <= pos < PyList_GET_SIZE(cleanup):
340
node = _get_list_node(cleanup, pos)
343
PyDict_SetItem(self._known_heads, heads_key, heads)
347
"""Return the nodes in topological order.
349
All parents must occur before all children.
351
# This is, for the most part, the same iteration order that we used for
352
# _find_gdfo, consider finding a way to remove the duplication
353
# In general, we find the 'tails' (nodes with no parents), and then
354
# walk to the children. For children that have all of their parents
355
# yielded, we queue up the child to be yielded as well.
356
cdef _KnownGraphNode node
357
cdef _KnownGraphNode child
361
cdef Py_ssize_t last_item
363
pending = self._find_tails()
364
if PyList_GET_SIZE(pending) == 0 and len(self._nodes) > 0:
365
raise errors.GraphCycleError(self._nodes)
369
last_item = PyList_GET_SIZE(pending) - 1
370
while last_item >= 0:
371
# Avoid pop followed by push, instead, peek, and replace
372
# timing shows this is 930ms => 770ms for OOo
373
node = _get_list_node(pending, last_item)
374
last_item = last_item - 1
375
if node.parents is not None:
376
# We don't include ghost parents
377
PyList_Append(topo_order, node.key)
378
for pos from 0 <= pos < PyList_GET_SIZE(node.children):
379
child = _get_list_node(node.children, pos)
381
# We know we have a graph cycle because a node has a parent
382
# which we couldn't find
383
raise errors.GraphCycleError(self._nodes)
384
child.seen = child.seen + 1
385
if child.seen == PyTuple_GET_SIZE(child.parents):
386
# All parents of this child have been yielded, queue this
387
# one to be yielded as well
388
last_item = last_item + 1
389
if last_item < PyList_GET_SIZE(pending):
390
Py_INCREF(child) # SetItem steals a ref
391
PyList_SetItem(pending, last_item, child)
393
PyList_Append(pending, child)
394
# We have queued this node, we don't need to track it
397
# We started from the parents, so we don't need to do anymore work
401
def merge_sort(self, tip_key):
402
"""Compute the merge sorted graph output."""
403
cdef _MergeSorter sorter
405
# TODO: consider disabling gc since we are allocating a lot of nodes
406
# that won't be collectable anyway. real world testing has not
407
# shown a specific impact, yet.
408
sorter = _MergeSorter(self, tip_key)
409
return sorter.topo_order()
412
cdef class _MergeSortNode:
413
"""Tracks information about a node during the merge_sort operation."""
416
cdef public object key
417
cdef public long merge_depth
418
cdef public object end_of_merge # True/False Is this the end of the current merge
420
# Private api, used while computing the information
421
cdef _KnownGraphNode left_parent
422
cdef _KnownGraphNode left_pending_parent
423
cdef object pending_parents # list of _KnownGraphNode for non-left parents
424
cdef long _revno_first
425
cdef long _revno_second
426
cdef long _revno_last
427
# TODO: turn these into flag/bit fields rather than individual members
428
cdef int is_first_child # Is this the first child?
429
cdef int seen_by_child # A child node has seen this parent
430
cdef int completed # Fully Processed
432
def __init__(self, key):
434
self.merge_depth = -1
435
self.left_parent = None
436
self.left_pending_parent = None
437
self.pending_parents = None
438
self._revno_first = -1
439
self._revno_second = -1
440
self._revno_last = -1
441
self.is_first_child = 0
442
self.seen_by_child = 0
446
return '%s(%s depth:%s rev:%s,%s,%s first:%s seen:%s)' % (
447
self.__class__.__name__, self.key,
449
self._revno_first, self._revno_second, self._revno_last,
450
self.is_first_child, self.seen_by_child)
452
cdef int has_pending_parents(self):
453
if self.left_pending_parent is not None or self.pending_parents:
457
cdef object _revno(self):
458
if self._revno_first == -1:
459
if self._revno_second != -1:
460
raise RuntimeError('Something wrong with: %s' % (self,))
461
return (self._revno_last,)
463
return (self._revno_first, self._revno_second, self._revno_last)
470
cdef class _MergeSorter:
471
"""This class does the work of computing the merge_sort ordering.
473
We have some small advantages, in that we get all the extra information
474
that KnownGraph knows, like knowing the child lists, etc.
477
# Current performance numbers for merge_sort(bzr_dev_parent_map):
478
# 302ms tsort.merge_sort()
479
# 91ms graph.KnownGraph().merge_sort()
480
# 40ms kg.merge_sort()
482
cdef KnownGraph graph
483
cdef object _depth_first_stack # list
484
cdef Py_ssize_t _last_stack_item # offset to last item on stack
485
# cdef object _ms_nodes # dict of key => _MergeSortNode
486
cdef object _revno_to_branch_count # {revno => num child branches}
487
cdef object _scheduled_nodes # List of nodes ready to be yielded
489
def __init__(self, known_graph, tip_key):
490
cdef _KnownGraphNode node
492
self.graph = known_graph
493
# self._ms_nodes = {}
494
self._revno_to_branch_count = {}
495
self._depth_first_stack = []
496
self._last_stack_item = -1
497
self._scheduled_nodes = []
498
if (tip_key is not None and tip_key != NULL_REVISION
499
and tip_key != (NULL_REVISION,)):
500
node = self.graph._nodes[tip_key]
501
self._push_node(node, 0)
503
cdef _MergeSortNode _get_ms_node(self, _KnownGraphNode node):
504
cdef PyObject *temp_node
505
cdef _MergeSortNode ms_node
507
if node.extra is None:
508
ms_node = _MergeSortNode(node.key)
511
ms_node = <_MergeSortNode>node.extra
514
cdef _push_node(self, _KnownGraphNode node, long merge_depth):
515
cdef _KnownGraphNode parent_node
516
cdef _MergeSortNode ms_node, ms_parent_node
519
ms_node = self._get_ms_node(node)
520
ms_node.merge_depth = merge_depth
521
if node.parents is None:
522
raise RuntimeError('ghost nodes should not be pushed'
523
' onto the stack: %s' % (node,))
524
if PyTuple_GET_SIZE(node.parents) > 0:
525
parent_node = _get_parent(node.parents, 0)
526
ms_node.left_parent = parent_node
527
if parent_node.parents is None: # left-hand ghost
528
ms_node.left_pending_parent = None
529
ms_node.left_parent = None
531
ms_node.left_pending_parent = parent_node
532
if PyTuple_GET_SIZE(node.parents) > 1:
533
ms_node.pending_parents = []
534
for pos from 1 <= pos < PyTuple_GET_SIZE(node.parents):
535
parent_node = _get_parent(node.parents, pos)
536
if parent_node.parents is None: # ghost
538
PyList_Append(ms_node.pending_parents, parent_node)
540
ms_node.is_first_child = 1
541
if ms_node.left_parent is not None:
542
ms_parent_node = self._get_ms_node(ms_node.left_parent)
543
if ms_parent_node.seen_by_child:
544
ms_node.is_first_child = 0
545
ms_parent_node.seen_by_child = 1
546
self._last_stack_item = self._last_stack_item + 1
547
if self._last_stack_item < PyList_GET_SIZE(self._depth_first_stack):
548
Py_INCREF(node) # SetItem steals a ref
549
PyList_SetItem(self._depth_first_stack, self._last_stack_item,
552
PyList_Append(self._depth_first_stack, node)
554
cdef _pop_node(self):
556
cdef _MergeSortNode ms_node, ms_parent_node, ms_prev_node
557
cdef _KnownGraphNode node, parent_node, prev_node
559
node = _get_list_node(self._depth_first_stack, self._last_stack_item)
560
ms_node = <_MergeSortNode>node.extra
561
self._last_stack_item = self._last_stack_item - 1
562
if ms_node.left_parent is not None:
563
# Assign the revision number from the left-hand parent
564
ms_parent_node = <_MergeSortNode>ms_node.left_parent.extra
565
if ms_node.is_first_child:
566
# First child just increments the final digit
567
ms_node._revno_first = ms_parent_node._revno_first
568
ms_node._revno_second = ms_parent_node._revno_second
569
ms_node._revno_last = ms_parent_node._revno_last + 1
571
# Not the first child, make a new branch
572
# (mainline_revno, branch_count, 1)
573
if ms_parent_node._revno_first == -1:
574
# Mainline ancestor, the increment is on the last digit
575
base_revno = ms_parent_node._revno_last
577
base_revno = ms_parent_node._revno_first
578
temp = PyDict_GetItem(self._revno_to_branch_count,
583
branch_count = (<object>temp) + 1
584
PyDict_SetItem(self._revno_to_branch_count, base_revno,
586
ms_node._revno_first = base_revno
587
ms_node._revno_second = branch_count
588
ms_node._revno_last = 1
590
temp = PyDict_GetItem(self._revno_to_branch_count, 0)
592
# The first root node doesn't have a 3-digit revno
594
ms_node._revno_first = -1
595
ms_node._revno_second = -1
596
ms_node._revno_last = 1
598
root_count = (<object>temp) + 1
599
ms_node._revno_first = 0
600
ms_node._revno_second = root_count
601
ms_node._revno_last = 1
602
PyDict_SetItem(self._revno_to_branch_count, 0, root_count)
603
ms_node.completed = 1
604
if PyList_GET_SIZE(self._scheduled_nodes) == 0:
605
# The first scheduled node is always the end of merge
606
ms_node.end_of_merge = True
608
prev_node = _get_list_node(self._scheduled_nodes,
609
PyList_GET_SIZE(self._scheduled_nodes) - 1)
610
ms_prev_node = <_MergeSortNode>prev_node.extra
611
if ms_prev_node.merge_depth < ms_node.merge_depth:
612
# The previously pushed node is to our left, so this is the end
613
# of this right-hand chain
614
ms_node.end_of_merge = True
615
elif (ms_prev_node.merge_depth == ms_node.merge_depth
616
and prev_node not in node.parents):
617
# The next node is not a direct parent of this node
618
ms_node.end_of_merge = True
620
ms_node.end_of_merge = False
621
PyList_Append(self._scheduled_nodes, node)
623
cdef _schedule_stack(self):
624
cdef _KnownGraphNode last_node, next_node
625
cdef _MergeSortNode ms_node, ms_last_node, ms_next_node
626
cdef long next_merge_depth
628
while self._last_stack_item >= 0:
629
# Peek at the last item on the stack
630
last_node = _get_list_node(self._depth_first_stack,
631
self._last_stack_item)
632
if last_node.gdfo == -1:
633
# if _find_gdfo skipped a node, that means there is a graph
634
# cycle, error out now
635
raise errors.GraphCycleError(self.graph._nodes)
636
ms_last_node = <_MergeSortNode>last_node.extra
637
if not ms_last_node.has_pending_parents():
638
# Processed all parents, pop this node
641
while ms_last_node.has_pending_parents():
642
if ms_last_node.left_pending_parent is not None:
643
# recurse depth first into the primary parent
644
next_node = ms_last_node.left_pending_parent
645
ms_last_node.left_pending_parent = None
647
# place any merges in right-to-left order for scheduling
648
# which gives us left-to-right order after we reverse
649
# the scheduled queue.
650
# Note: This has the effect of allocating common-new
651
# revisions to the right-most subtree rather than the
652
# left most, which will display nicely (you get
653
# smaller trees at the top of the combined merge).
654
next_node = ms_last_node.pending_parents.pop()
655
ms_next_node = self._get_ms_node(next_node)
656
if ms_next_node.completed:
657
# this parent was completed by a child on the
658
# call stack. skip it.
660
# otherwise transfer it from the source graph into the
661
# top of the current depth first search stack.
663
if next_node is ms_last_node.left_parent:
664
next_merge_depth = ms_last_node.merge_depth
666
next_merge_depth = ms_last_node.merge_depth + 1
667
self._push_node(next_node, next_merge_depth)
668
# and do not continue processing parents until this 'call'
672
cdef topo_order(self):
673
cdef _MergeSortNode ms_node
674
cdef _KnownGraphNode node
676
cdef PyObject *temp_key, *temp_node
678
# Note: allocating a _MergeSortNode and deallocating it for all nodes
679
# costs approx 8.52ms (21%) of the total runtime
680
# We might consider moving the attributes into the base
682
self._schedule_stack()
684
# We've set up the basic schedule, now we can continue processing the
686
# Note: This final loop costs us 40.0ms => 28.8ms (11ms, 25%) on
687
# bzr.dev, to convert the internal Object representation into a
688
# Tuple representation...
689
# 2ms is walking the data and computing revno tuples
690
# 7ms is computing the return tuple
691
# 4ms is PyList_Append()
693
# output the result in reverse order, and separate the generated info
694
for pos from PyList_GET_SIZE(self._scheduled_nodes) > pos >= 0:
695
node = _get_list_node(self._scheduled_nodes, pos)
696
ms_node = <_MergeSortNode>node.extra
697
PyList_Append(ordered, ms_node)
699
# Clear out the scheduled nodes now that we're done
700
self._scheduled_nodes = []