~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/_known_graph_pyx.pyx

  • Committer: Vincent Ladeuil
  • Date: 2009-06-18 19:45:24 UTC
  • mto: This revision was merged to the branch mainline in revision 4466.
  • Revision ID: v.ladeuil+lp@free.fr-20090618194524-poedor61th3op5dm
Cleanup.

* bzrlib/tests/test__known_graph.py:
(TestKnownGraph): Delete dominator tests.

* bzrlib/_known_graph_pyx.pyx: 
Cleanup all references to old version and linear dominators :-/

* bzrlib/_known_graph_py.py: 
Cleanup all references to old version and linear dominators :-/

Show diffs side-by-side

added added

removed removed

Lines of Context:
42
42
    void Py_INCREF(object)
43
43
 
44
44
 
45
 
import heapq
46
 
 
47
45
from bzrlib import revision
48
46
 
49
 
# Define these as cdef objects, so we don't have to getattr them later
50
 
cdef object heappush, heappop, heapify, heapreplace
51
 
heappush = heapq.heappush
52
 
heappop = heapq.heappop
53
 
heapify = heapq.heapify
54
 
heapreplace = heapq.heapreplace
55
 
 
56
 
 
57
47
cdef class _KnownGraphNode:
58
48
    """Represents a single object in the known graph."""
59
49
 
60
50
    cdef object key
61
51
    cdef object parents
62
52
    cdef object children
63
 
    cdef _KnownGraphNode linear_dominator_node
64
53
    cdef public object gdfo # Int
65
 
    # This could also be simplified
66
 
    cdef object ancestor_of
67
54
 
68
55
    def __init__(self, key):
69
56
        cdef int i
72
59
        self.parents = None
73
60
 
74
61
        self.children = []
75
 
        # oldest ancestor, such that no parents between here and there have >1
76
 
        # child or >1 parent.
77
 
        self.linear_dominator_node = None
78
62
        # Greatest distance from origin
79
63
        self.gdfo = -1
80
 
        # This will become a tuple of known heads that have this node as an
81
 
        # ancestor
82
 
        self.ancestor_of = None
83
64
 
84
65
    property child_keys:
85
66
        def __get__(self):
90
71
                PyList_Append(keys, child.key)
91
72
            return keys
92
73
 
93
 
    property linear_dominator:
94
 
        def __get__(self):
95
 
            if self.linear_dominator_node is None:
96
 
                return None
97
 
            else:
98
 
                return self.linear_dominator_node.key
99
 
 
100
74
    cdef clear_references(self):
101
75
        self.parents = None
102
76
        self.children = None
103
 
        self.linear_dominator_node = None
104
77
 
105
78
    def __repr__(self):
106
79
        cdef _KnownGraphNode node
113
86
        if self.children is not None:
114
87
            for node in self.children:
115
88
                child_keys.append(node.key)
116
 
        return '%s(%s  gdfo:%s par:%s child:%s %s)' % (
 
89
        return '%s(%s  gdfo:%s par:%s child:%s)' % (
117
90
            self.__class__.__name__, self.key, self.gdfo,
118
 
            parent_keys, child_keys,
119
 
            self.linear_dominator)
120
 
 
121
 
 
122
 
cdef _KnownGraphNode _get_list_node(lst, Py_ssize_t pos):
123
 
    cdef PyObject *temp_node
124
 
 
125
 
    temp_node = PyList_GET_ITEM(lst, pos)
126
 
    return <_KnownGraphNode>temp_node
127
 
 
128
 
 
129
 
cdef _KnownGraphNode _get_parent(parents, Py_ssize_t pos):
130
 
    cdef PyObject *temp_node
131
 
    cdef _KnownGraphNode node
132
 
 
133
 
    temp_node = PyTuple_GET_ITEM(parents, pos)
134
 
    return <_KnownGraphNode>temp_node
135
 
 
136
 
 
137
 
cdef _KnownGraphNode _peek_node(queue):
138
 
    cdef PyObject *temp_node
139
 
    cdef _KnownGraphNode node
140
 
 
141
 
    temp_node = PyTuple_GET_ITEM(<object>PyList_GET_ITEM(queue, 0), 1)
142
 
    node = <_KnownGraphNode>temp_node
143
 
    return node
 
91
            parent_keys, child_keysr)
 
92
 
144
93
 
145
94
# TODO: slab allocate all _KnownGraphNode objects.
146
95
#       We already know how many we are going to need, except for a couple of
153
102
    cdef public object _tails
154
103
    cdef object _known_heads
155
104
    cdef public int do_cache
156
 
    # Nodes we've touched that we'll need to reset their info when heads() is
157
 
    # done
158
 
    cdef object _to_cleanup
159
105
 
160
106
    def __init__(self, parent_map, do_cache=True):
161
107
        """Create a new KnownGraph instance.
165
111
        self._nodes = {}
166
112
        # Maps {sorted(revision_id, revision_id): heads}
167
113
        self._known_heads = {}
168
 
        self._to_cleanup = []
169
114
        self.do_cache = int(do_cache)
170
115
        self._initialize_nodes(parent_map)
171
 
        self._find_linear_dominators()
172
116
        self._find_gdfo()
173
117
 
174
118
    def __dealloc__(self):
243
187
                PyList_Append(parent_node.children, node)
244
188
            node.parents = parent_nodes
245
189
 
246
 
    cdef _KnownGraphNode _check_is_linear(self, _KnownGraphNode node):
247
 
        """Check to see if a given node is part of a linear chain."""
248
 
        cdef _KnownGraphNode parent_node
249
 
        if node.parents is None or PyTuple_GET_SIZE(node.parents) != 1:
250
 
            # This node is either a ghost, a tail, or has multiple parents
251
 
            # It its own dominator
252
 
            node.linear_dominator_node = node
253
 
            return None
254
 
        parent_node = _get_parent(node.parents, 0)
255
 
        if PyList_GET_SIZE(parent_node.children) > 1:
256
 
            # The parent has multiple children, so *this* node is the
257
 
            # dominator
258
 
            node.linear_dominator_node = node
259
 
            return None
260
 
        # The parent is already filled in, so add and continue
261
 
        if parent_node.linear_dominator_node is not None:
262
 
            node.linear_dominator_node = parent_node.linear_dominator_node
263
 
            return None
264
 
        # We don't know this node, or its parent node, so start walking to
265
 
        # next
266
 
        return parent_node
267
 
 
268
 
    def _find_linear_dominators(self):
269
 
        """
270
 
        For any given node, the 'linear dominator' is an ancestor, such that
271
 
        all parents between this node and that one have a single parent, and a
272
 
        single child. So if A->B->C->D then B,C,D all have a linear dominator
273
 
        of A.
274
 
 
275
 
        There are two main benefits:
276
 
        1) When walking the graph, we can jump to the nearest linear dominator,
277
 
           rather than walking all of the nodes inbetween.
278
 
        2) When caching heads() results, dominators give the "same" results as
279
 
           their children. (If the dominator is a head, then the descendant is
280
 
           a head, if the dominator is not a head, then the child isn't
281
 
           either.)
282
 
        """
283
 
        cdef PyObject *temp_node
284
 
        cdef Py_ssize_t pos
285
 
        cdef _KnownGraphNode node
286
 
        cdef _KnownGraphNode next_node
287
 
        cdef _KnownGraphNode dominator
288
 
        cdef int i, num_elements
289
 
 
290
 
        pos = 0
291
 
        while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
292
 
            node = <_KnownGraphNode>temp_node
293
 
            # The parent is not filled in, so walk until we get somewhere
294
 
            if node.linear_dominator_node is not None: #already done
295
 
                continue
296
 
            next_node = self._check_is_linear(node)
297
 
            if next_node is None:
298
 
                # Nothing more needs to be done
299
 
                continue
300
 
            stack = []
301
 
            while next_node is not None:
302
 
                PyList_Append(stack, node)
303
 
                node = next_node
304
 
                next_node = self._check_is_linear(node)
305
 
            # The stack now contains the linear chain, and 'node' should have
306
 
            # been labeled
307
 
            dominator = node.linear_dominator_node
308
 
            num_elements = len(stack)
309
 
            for i from num_elements > i >= 0:
310
 
                next_node = _get_list_node(stack, i)
311
 
                next_node.linear_dominator_node = dominator
312
 
                node = next_node
313
 
 
314
 
    cdef object _find_tails(self):
315
 
        cdef object tails
316
 
        cdef PyObject *temp_node
317
 
        cdef Py_ssize_t pos
318
 
        cdef _KnownGraphNode node
319
 
 
320
 
        tails = []
321
 
        pos = 0
322
 
        while PyDict_Next(self._nodes, &pos, NULL, &temp_node):
323
 
            node = <_KnownGraphNode>temp_node
324
 
            if node.parents is None or PyTuple_GET_SIZE(node.parents) == 0:
325
 
                PyList_Append(tails, node)
326
 
        return tails
327
 
 
328
190
    def _find_gdfo(self):
329
191
        cdef _KnownGraphNode node
330
192
        cdef _KnownGraphNode child
353
215
                    # continue from there
354
216
                    pending.append(child)
355
217
 
356
 
    def x_find_gdfo(self):
357
 
        cdef Py_ssize_t pos, pos2
358
 
        cdef _KnownGraphNode node
359
 
        cdef _KnownGraphNode child_node
360
 
        cdef _KnownGraphNode parent_node
361
 
        cdef int replace_node, missing_parent
362
 
 
363
 
        tails = self._find_tails()
364
 
        todo = []
365
 
        for pos from 0 <= pos < PyList_GET_SIZE(tails):
366
 
            node = _get_list_node(tails, pos)
367
 
            node.gdfo = 1
368
 
            PyList_Append(todo, (1, node))
369
 
        # No need to heapify, because all tails have priority=1
370
 
        while PyList_GET_SIZE(todo) > 0:
371
 
            node = _peek_node(todo)
372
 
            next_gdfo = node.gdfo + 1
373
 
            replace_node = 1
374
 
            for pos from 0 <= pos < PyList_GET_SIZE(node.children):
375
 
                child_node = _get_list_node(node.children, pos)
376
 
                # We should never have numbered children before we numbered
377
 
                # a parent
378
 
                if child_node.gdfo != -1:
379
 
                    continue
380
 
                # Only enque children when all of their parents have been
381
 
                # resolved. With a single parent, we can just take 'this' value
382
 
                child_gdfo = next_gdfo
383
 
                if PyTuple_GET_SIZE(child_node.parents) > 1:
384
 
                    missing_parent = 0
385
 
                    for pos2 from 0 <= pos2 < PyTuple_GET_SIZE(child_node.parents):
386
 
                        parent_node = _get_parent(child_node.parents, pos2)
387
 
                        if parent_node.gdfo == -1:
388
 
                            missing_parent = 1
389
 
                            break
390
 
                        if parent_node.gdfo >= child_gdfo:
391
 
                            child_gdfo = parent_node.gdfo + 1
392
 
                    if missing_parent:
393
 
                        # One of the parents is not numbered, so wait until we get
394
 
                        # back here
395
 
                        continue
396
 
                child_node.gdfo = child_gdfo
397
 
                if replace_node:
398
 
                    heapreplace(todo, (child_gdfo, child_node))
399
 
                    replace_node = 0
400
 
                else:
401
 
                    heappush(todo, (child_gdfo, child_node))
402
 
            if replace_node:
403
 
                heappop(todo)
404
 
 
405
218
    def heads(self, keys):
406
219
        """Return the heads from amongst keys.
407
220
 
469
282
        if self.do_cache:
470
283
            self._known_heads[heads_key] = heads
471
284
        return heads
472
 
 
473
 
    def xheads(self, keys):
474
 
        """Return the heads from amongst keys.
475
 
 
476
 
        This is done by searching the ancestries of each key.  Any key that is
477
 
        reachable from another key is not returned; all the others are.
478
 
 
479
 
        This operation scales with the relative depth between any two keys. If
480
 
        any two keys are completely disconnected all ancestry of both sides
481
 
        will be retrieved.
482
 
 
483
 
        :param keys: An iterable of keys.
484
 
        :return: A set of the heads. Note that as a set there is no ordering
485
 
            information. Callers will need to filter their input to create
486
 
            order if they need it.
487
 
        """
488
 
        cdef PyObject *maybe_node
489
 
        cdef PyObject *maybe_heads
490
 
 
491
 
        heads_key = PyFrozenSet_New(keys)
492
 
        maybe_heads = PyDict_GetItem(self._known_heads, heads_key)
493
 
        if maybe_heads != NULL:
494
 
            return <object>maybe_heads
495
 
 
496
 
        # Not cached, compute it ourselves
497
 
        candidate_nodes = {}
498
 
        nodes = self._nodes
499
 
        for key in keys:
500
 
            maybe_node = PyDict_GetItem(nodes, key)
501
 
            if maybe_node == NULL:
502
 
                raise KeyError('key %s not in nodes' % (key,))
503
 
            PyDict_SetItem(candidate_nodes, key, <object>maybe_node)
504
 
        if revision.NULL_REVISION in candidate_nodes:
505
 
            # NULL_REVISION is only a head if it is the only entry
506
 
            candidate_nodes.pop(revision.NULL_REVISION)
507
 
            if not candidate_nodes:
508
 
                return set([revision.NULL_REVISION])
509
 
            # The keys changed, so recalculate heads_key
510
 
            heads_key = PyFrozenSet_New(candidate_nodes)
511
 
        if len(candidate_nodes) < 2:
512
 
            return heads_key
513
 
        dom_to_node = self._get_dominators_to_nodes(candidate_nodes)
514
 
        if PyDict_Size(candidate_nodes) < 2:
515
 
            return frozenset(candidate_nodes)
516
 
        dom_lookup_key, heads = self._heads_from_dominators(candidate_nodes,
517
 
                                                            dom_to_node)
518
 
        if heads is not None:
519
 
            if self.do_cache:
520
 
                # This heads was not in the cache, or it would have been caught
521
 
                # earlier, but the dom head *was*, so do the simple cache
522
 
                PyDict_SetItem(self._known_heads, heads_key, heads)
523
 
            return heads
524
 
        heads = self._heads_from_candidate_nodes(candidate_nodes, dom_to_node)
525
 
        if self.do_cache:
526
 
            self._cache_heads(heads, heads_key, dom_lookup_key, candidate_nodes)
527
 
        return heads
528
 
 
529
 
    cdef object _cache_heads(self, heads, heads_key, dom_lookup_key,
530
 
                             candidate_nodes):
531
 
        cdef PyObject *maybe_node
532
 
        cdef _KnownGraphNode node
533
 
 
534
 
        PyDict_SetItem(self._known_heads, heads_key, heads)
535
 
        dom_heads = []
536
 
        for key in heads:
537
 
            maybe_node = PyDict_GetItem(candidate_nodes, key)
538
 
            if maybe_node == NULL:
539
 
                raise KeyError
540
 
            node = <_KnownGraphNode>maybe_node
541
 
            PyList_Append(dom_heads, node.linear_dominator_node.key)
542
 
        PyDict_SetItem(self._known_heads, dom_lookup_key,
543
 
                       PyFrozenSet_New(dom_heads))
544
 
 
545
 
    cdef _get_dominators_to_nodes(self, candidate_nodes):
546
 
        """Get the reverse mapping from dominator_key => candidate_nodes.
547
 
 
548
 
        As a side effect, this can also remove potential candidate nodes if we
549
 
        determine that they share a dominator.
550
 
        """
551
 
        cdef Py_ssize_t pos
552
 
        cdef _KnownGraphNode node, other_node
553
 
        cdef PyObject *temp_node
554
 
        cdef PyObject *maybe_node
555
 
 
556
 
        dom_to_node = {}
557
 
        keys_to_remove = []
558
 
        pos = 0
559
 
        while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node):
560
 
            node = <_KnownGraphNode>temp_node
561
 
            dom_key = node.linear_dominator_node.key
562
 
            maybe_node = PyDict_GetItem(dom_to_node, dom_key)
563
 
            if maybe_node == NULL:
564
 
                PyDict_SetItem(dom_to_node, dom_key, node)
565
 
            else:
566
 
                other_node = <_KnownGraphNode>maybe_node
567
 
                # These nodes share a dominator, one of them obviously
568
 
                # supersedes the other, figure out which
569
 
                if other_node.gdfo > node.gdfo:
570
 
                    PyList_Append(keys_to_remove, node.key)
571
 
                else:
572
 
                    # This wins, replace the other
573
 
                    PyList_Append(keys_to_remove, other_node.key)
574
 
                    PyDict_SetItem(dom_to_node, dom_key, node)
575
 
        for pos from 0 <= pos < PyList_GET_SIZE(keys_to_remove):
576
 
            key = <object>PyList_GET_ITEM(keys_to_remove, pos)
577
 
            candidate_nodes.pop(key)
578
 
        return dom_to_node
579
 
 
580
 
    cdef object _heads_from_dominators(self, candidate_nodes, dom_to_node):
581
 
        cdef PyObject *maybe_heads
582
 
        cdef PyObject *maybe_node
583
 
        cdef _KnownGraphNode node
584
 
        cdef Py_ssize_t pos
585
 
        cdef PyObject *temp_node
586
 
 
587
 
        dom_list_key = []
588
 
        pos = 0
589
 
        while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node):
590
 
            node = <_KnownGraphNode>temp_node
591
 
            PyList_Append(dom_list_key, node.linear_dominator_node.key)
592
 
        dom_lookup_key = PyFrozenSet_New(dom_list_key)
593
 
        maybe_heads = PyDict_GetItem(self._known_heads, dom_lookup_key)
594
 
        if maybe_heads == NULL:
595
 
            return dom_lookup_key, None
596
 
        # We need to map back from the dominator head to the original keys
597
 
        dom_heads = <object>maybe_heads
598
 
        heads = []
599
 
        for dom_key in dom_heads:
600
 
            maybe_node = PyDict_GetItem(dom_to_node, dom_key)
601
 
            if maybe_node == NULL:
602
 
                # Should never happen
603
 
                raise KeyError
604
 
            node = <_KnownGraphNode>maybe_node
605
 
            PyList_Append(heads, node.key)
606
 
        return dom_lookup_key, PyFrozenSet_New(heads)
607
 
 
608
 
    cdef int _process_parent(self, _KnownGraphNode node,
609
 
                             _KnownGraphNode parent_node,
610
 
                             candidate_nodes, dom_to_node,
611
 
                             queue, int *replace_item, min_gdfo) except -1:
612
 
        """Process the parent of a node, seeing if we need to walk it."""
613
 
        cdef PyObject *maybe_candidate
614
 
        cdef PyObject *maybe_node
615
 
        cdef _KnownGraphNode dom_child_node
616
 
        maybe_candidate = PyDict_GetItem(candidate_nodes, parent_node.key)
617
 
        if maybe_candidate != NULL:
618
 
            candidate_nodes.pop(parent_node.key)
619
 
            # We could pass up a flag that tells the caller to stop processing,
620
 
            # but it doesn't help much, and makes the code uglier
621
 
            return 0
622
 
        maybe_node = PyDict_GetItem(dom_to_node, parent_node.key)
623
 
        if maybe_node != NULL:
624
 
            # This is a dominator of a node
625
 
            dom_child_node = <_KnownGraphNode>maybe_node
626
 
            if dom_child_node is not node:
627
 
                # It isn't a dominator of a node we are searching, so we should
628
 
                # remove it from the search
629
 
                maybe_candidate = PyDict_GetItem(candidate_nodes, dom_child_node.key)
630
 
                if maybe_candidate != NULL:
631
 
                    candidate_nodes.pop(dom_child_node.key)
632
 
                    return 0
633
 
        if parent_node.gdfo < min_gdfo:
634
 
            # Do not enque this node, it is too old
635
 
            return 0
636
 
        if parent_node.ancestor_of is None:
637
 
            # This node hasn't been walked yet, so just project node's ancestor
638
 
            # info directly to parent_node, and enqueue it for later processing
639
 
            parent_node.ancestor_of = node.ancestor_of
640
 
            if replace_item[0]:
641
 
                heapreplace(queue, (-parent_node.gdfo, parent_node))
642
 
                replace_item[0] = 0
643
 
            else:
644
 
                heappush(queue, (-parent_node.gdfo, parent_node))
645
 
            PyList_Append(self._to_cleanup, parent_node)
646
 
        elif parent_node.ancestor_of != node.ancestor_of:
647
 
            # Combine to get the full set of parents
648
 
            # Rewrite using PySet_* functions, unfortunately you have to use
649
 
            # PySet_Add since there is no PySet_Update... :(
650
 
            all_ancestors = set(parent_node.ancestor_of)
651
 
            for k in node.ancestor_of:
652
 
                PySet_Add(all_ancestors, k)
653
 
            parent_node.ancestor_of = tuple(sorted(all_ancestors))
654
 
        return 0
655
 
 
656
 
    cdef object _heads_from_candidate_nodes(self, candidate_nodes, dom_to_node):
657
 
        cdef _KnownGraphNode node
658
 
        cdef _KnownGraphNode parent_node
659
 
        cdef Py_ssize_t num_candidates
660
 
        cdef int num_parents, replace_item
661
 
        cdef Py_ssize_t pos
662
 
        cdef PyObject *temp_node
663
 
 
664
 
        queue = []
665
 
        pos = 0
666
 
        min_gdfo = None
667
 
        while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node):
668
 
            node = <_KnownGraphNode>temp_node
669
 
            node.ancestor_of = (node.key,)
670
 
            PyList_Append(queue, (-node.gdfo, node))
671
 
            PyList_Append(self._to_cleanup, node)
672
 
            if min_gdfo is None:
673
 
                min_gdfo = node.gdfo
674
 
            elif node.gdfo < min_gdfo:
675
 
                min_gdfo = node.gdfo
676
 
        heapify(queue)
677
 
        # These are nodes that we determined are 'common' that we are no longer
678
 
        # walking
679
 
        # Now we walk nodes until all nodes that are being walked are 'common'
680
 
        num_candidates = len(candidate_nodes)
681
 
        replace_item = 0
682
 
        while PyList_GET_SIZE(queue) > 0 and PyDict_Size(candidate_nodes) > 1:
683
 
            if replace_item:
684
 
                # We still need to pop the smallest member out of the queue
685
 
                # before we peek again
686
 
                heappop(queue)
687
 
                if PyList_GET_SIZE(queue) == 0:
688
 
                    break
689
 
            # peek at the smallest item. We don't pop, because we expect we'll
690
 
            # need to push more things into the queue anyway
691
 
            node = _peek_node(queue)
692
 
            replace_item = 1
693
 
            if PyTuple_GET_SIZE(node.ancestor_of) == num_candidates:
694
 
                # This node is now considered 'common'
695
 
                # Make sure all parent nodes are marked as such
696
 
                for pos from 0 <= pos < PyTuple_GET_SIZE(node.parents):
697
 
                    parent_node = _get_parent(node.parents, pos)
698
 
                    if parent_node.ancestor_of is not None:
699
 
                        parent_node.ancestor_of = node.ancestor_of
700
 
                if node.linear_dominator_node is not node:
701
 
                    parent_node = node.linear_dominator_node
702
 
                    if parent_node.ancestor_of is not None:
703
 
                        parent_node.ancestor_of = node.ancestor_of
704
 
                continue
705
 
            if node.parents is None:
706
 
                # This is a ghost
707
 
                continue
708
 
            # Now project the current nodes ancestor list to the parent nodes,
709
 
            # and queue them up to be walked
710
 
            if node.linear_dominator_node is not node:
711
 
                # We are at the tip of a long linear region
712
 
                # We know that there is nothing between here and the tail
713
 
                # that is interesting, so skip to the end
714
 
                self._process_parent(node, node.linear_dominator_node,
715
 
                                     candidate_nodes, dom_to_node, queue,
716
 
                                     &replace_item, min_gdfo)
717
 
            else:
718
 
                for pos from 0 <= pos < PyTuple_GET_SIZE(node.parents):
719
 
                    parent_node = _get_parent(node.parents, pos)
720
 
                    self._process_parent(node, parent_node, candidate_nodes,
721
 
                                         dom_to_node, queue, &replace_item,
722
 
                                         min_gdfo)
723
 
        for pos from 0 <= pos < PyList_GET_SIZE(self._to_cleanup):
724
 
            node = _get_list_node(self._to_cleanup, pos)
725
 
            node.ancestor_of = None
726
 
        self._to_cleanup = []
727
 
        return PyFrozenSet_New(candidate_nodes)