~bzr-pqm/bzr/bzr.dev

4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
1
# Copyright (C) 2009 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
17
"""Implementation of Graph algorithms when we have already loaded everything.
18
"""
19
20
from bzrlib import (
4593.5.30 by John Arbash Meinel
Move the topo_sort implementation into KnownGraph, rather than calling back to it.
21
    errors,
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
22
    revision,
23
    )
24
25
26
class _KnownGraphNode(object):
27
    """Represents a single object in the known graph."""
28
4371.4.10 by Vincent Ladeuil
Cleanup.
29
    __slots__ = ('key', 'parent_keys', 'child_keys', 'gdfo')
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
30
31
    def __init__(self, key, parent_keys):
32
        self.key = key
33
        self.parent_keys = parent_keys
34
        self.child_keys = []
35
        # Greatest distance from origin
36
        self.gdfo = None
37
38
    def __repr__(self):
4371.4.10 by Vincent Ladeuil
Cleanup.
39
        return '%s(%s  gdfo:%s par:%s child:%s)' % (
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
40
            self.__class__.__name__, self.key, self.gdfo,
4371.4.10 by Vincent Ladeuil
Cleanup.
41
            self.parent_keys, self.child_keys)
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
42
43
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
44
class _MergeSortNode(object):
45
    """Information about a specific node in the merge graph."""
46
47
    __slots__ = ('key', 'merge_depth', 'revno', 'end_of_merge')
48
49
    def __init__(self, key, merge_depth, revno, end_of_merge):
50
        self.key = key
51
        self.merge_depth = merge_depth
52
        self.revno = revno
53
        self.end_of_merge = end_of_merge
54
55
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
56
class KnownGraph(object):
57
    """This is a class which assumes we already know the full graph."""
58
59
    def __init__(self, parent_map, do_cache=True):
60
        """Create a new KnownGraph instance.
61
62
        :param parent_map: A dictionary mapping key => parent_keys
63
        """
64
        self._nodes = {}
65
        # Maps {sorted(revision_id, revision_id): heads}
66
        self._known_heads = {}
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
67
        self.do_cache = do_cache
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
68
        self._initialize_nodes(parent_map)
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
69
        self._find_gdfo()
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
70
71
    def _initialize_nodes(self, parent_map):
72
        """Populate self._nodes.
73
4371.4.9 by Vincent Ladeuil
Simplify gdfo computing by finding tails when at graph build time.
74
        After this has finished:
75
        - self._nodes will have an entry for every entry in parent_map.
76
        - ghosts will have a parent_keys = None,
77
        - all nodes found will also have .child_keys populated with all known
78
          child_keys,
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
79
        """
80
        nodes = self._nodes
81
        for key, parent_keys in parent_map.iteritems():
82
            if key in nodes:
83
                node = nodes[key]
84
                node.parent_keys = parent_keys
85
            else:
86
                node = _KnownGraphNode(key, parent_keys)
87
                nodes[key] = node
88
            for parent_key in parent_keys:
89
                try:
90
                    parent_node = nodes[parent_key]
91
                except KeyError:
92
                    parent_node = _KnownGraphNode(parent_key, None)
93
                    nodes[parent_key] = parent_node
94
                parent_node.child_keys.append(key)
95
4454.3.78 by John Arbash Meinel
Found a bug in the pure-python KnownGraph code.
96
    def _find_tails(self):
97
        return [node for node in self._nodes.itervalues()
98
                if not node.parent_keys]
99
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
100
    def _find_gdfo(self):
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
101
        nodes = self._nodes
4371.4.10 by Vincent Ladeuil
Cleanup.
102
        known_parent_gdfos = {}
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
103
        pending = []
104
4454.3.78 by John Arbash Meinel
Found a bug in the pure-python KnownGraph code.
105
        for node in self._find_tails():
4371.4.9 by Vincent Ladeuil
Simplify gdfo computing by finding tails when at graph build time.
106
            node.gdfo = 1
107
            pending.append(node)
108
4371.4.8 by Vincent Ladeuil
Replace the existing KnownGraph._find_gdfo.
109
        while pending:
110
            node = pending.pop()
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
111
            for child_key in node.child_keys:
112
                child = nodes[child_key]
4371.4.19 by John Arbash Meinel
Update the python code to do the same checking around known_parent_gdfos being present.
113
                if child_key in known_parent_gdfos:
114
                    known_gdfo = known_parent_gdfos[child_key] + 1
115
                    present = True
116
                else:
117
                    known_gdfo = 1
118
                    present = False
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
119
                if child.gdfo is None or node.gdfo + 1 > child.gdfo:
120
                    child.gdfo = node.gdfo + 1
4371.4.19 by John Arbash Meinel
Update the python code to do the same checking around known_parent_gdfos being present.
121
                if known_gdfo == len(child.parent_keys):
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
122
                    # We are the last parent updating that node, we can
123
                    # continue from there
4371.4.2 by Vincent Ladeuil
Same gdfo processing, without recursion.
124
                    pending.append(child)
4371.4.19 by John Arbash Meinel
Update the python code to do the same checking around known_parent_gdfos being present.
125
                    if present:
126
                        del known_parent_gdfos[child_key]
127
                else:
128
                    # Update known_parent_gdfos for a key we couldn't process
129
                    known_parent_gdfos[child_key] = known_gdfo
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
130
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
131
    def heads(self, keys):
132
        """Return the heads from amongst keys.
133
134
        This is done by searching the ancestries of each key.  Any key that is
135
        reachable from another key is not returned; all the others are.
136
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
137
        This operation scales with the relative depth between any two keys. It
138
        uses gdfo to avoid walking all ancestry.
139
140
        :param keys: An iterable of keys.
141
        :return: A set of the heads. Note that as a set there is no ordering
142
            information. Callers will need to filter their input to create
143
            order if they need it.
144
        """
145
        candidate_nodes = dict((key, self._nodes[key]) for key in keys)
146
        if revision.NULL_REVISION in candidate_nodes:
147
            # NULL_REVISION is only a head if it is the only entry
148
            candidate_nodes.pop(revision.NULL_REVISION)
149
            if not candidate_nodes:
150
                return frozenset([revision.NULL_REVISION])
151
        if len(candidate_nodes) < 2:
152
            # No or only one candidate
153
            return frozenset(candidate_nodes)
154
        heads_key = frozenset(candidate_nodes)
155
        # Do we have a cached result ?
156
        try:
157
            heads = self._known_heads[heads_key]
158
            return heads
159
        except KeyError:
160
            pass
161
        # Let's compute the heads
4371.4.4 by Vincent Ladeuil
Feedback from John, 'seen' is a set().
162
        seen = set()
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
163
        pending = []
164
        min_gdfo = None
165
        for node in candidate_nodes.values():
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
166
            if node.parent_keys:
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
167
                pending.extend(node.parent_keys)
168
            if min_gdfo is None or node.gdfo < min_gdfo:
169
                min_gdfo = node.gdfo
170
        nodes = self._nodes
171
        while pending:
172
            node_key = pending.pop()
173
            if node_key in seen:
174
                # node already appears in some ancestry
175
                continue
4371.4.4 by Vincent Ladeuil
Feedback from John, 'seen' is a set().
176
            seen.add(node_key)
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
177
            node = nodes[node_key]
178
            if node.gdfo <= min_gdfo:
179
                continue
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
180
            if node.parent_keys:
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
181
                pending.extend(node.parent_keys)
4371.4.4 by Vincent Ladeuil
Feedback from John, 'seen' is a set().
182
        heads = heads_key.difference(seen)
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
183
        if self.do_cache:
184
            self._known_heads[heads_key] = heads
185
        return heads
186
4593.5.2 by John Arbash Meinel
Implement a very basic version of KnownGraph.topo_sort that just
187
    def topo_sort(self):
4593.5.30 by John Arbash Meinel
Move the topo_sort implementation into KnownGraph, rather than calling back to it.
188
        """Return the nodes in topological order.
189
190
        All parents must occur before all children.
191
        """
192
        for node in self._nodes.itervalues():
193
            if node.gdfo is None:
194
                raise errors.GraphCycleError(self._nodes)
195
        pending = self._find_tails()
196
        pending_pop = pending.pop
197
        pending_append = pending.append
198
199
        topo_order = []
200
        topo_order_append = topo_order.append
201
202
        num_seen_parents = dict.fromkeys(self._nodes, 0)
203
        while pending:
204
            node = pending_pop()
205
            if node.parent_keys is not None:
206
                # We don't include ghost parents
207
                topo_order_append(node.key)
208
            for child_key in node.child_keys:
209
                child_node = self._nodes[child_key]
210
                seen_parents = num_seen_parents[child_key] + 1
211
                if seen_parents == len(child_node.parent_keys):
212
                    # All parents have been processed, enqueue this child
213
                    pending_append(child_node)
214
                    # This has been queued up, stop tracking it
215
                    del num_seen_parents[child_key]
216
                else:
217
                    num_seen_parents[child_key] = seen_parents
218
        # We started from the parents, so we don't need to do anymore work
219
        return topo_order
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
220
221
    def merge_sort(self, tip_key):
222
        """Compute the merge sorted graph output."""
4593.5.33 by John Arbash Meinel
Late import tsort because it causes an import cycle.
223
        from bzrlib import tsort
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
224
        as_parent_map = dict((node.key, node.parent_keys)
4593.5.25 by John Arbash Meinel
Add tests that we detect GraphCycleError
225
                             for node in self._nodes.itervalues()
226
                              if node.parent_keys is not None)
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
227
        # We intentionally always generate revnos and never force the
228
        # mainline_revisions
4593.5.32 by John Arbash Meinel
With the new api, we don't return 'sequence_number' anymore.
229
        # Strip the sequence_number that merge_sort generates
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
230
        return [_MergeSortNode(key, merge_depth, revno, end_of_merge)
231
                for _, key, merge_depth, revno, end_of_merge
232
                 in tsort.merge_sort(as_parent_map, tip_key,
233
                                     mainline_revisions=None,
234
                                     generate_revno=True)]