~bzr-pqm/bzr/bzr.dev

4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
1
# Copyright (C) 2009 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
17
"""Implementation of Graph algorithms when we have already loaded everything.
18
"""
19
20
from bzrlib import (
4593.5.30 by John Arbash Meinel
Move the topo_sort implementation into KnownGraph, rather than calling back to it.
21
    errors,
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
22
    revision,
23
    )
24
25
26
class _KnownGraphNode(object):
27
    """Represents a single object in the known graph."""
28
4371.4.10 by Vincent Ladeuil
Cleanup.
29
    __slots__ = ('key', 'parent_keys', 'child_keys', 'gdfo')
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
30
31
    def __init__(self, key, parent_keys):
32
        self.key = key
33
        self.parent_keys = parent_keys
34
        self.child_keys = []
35
        # Greatest distance from origin
36
        self.gdfo = None
37
38
    def __repr__(self):
4371.4.10 by Vincent Ladeuil
Cleanup.
39
        return '%s(%s  gdfo:%s par:%s child:%s)' % (
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
40
            self.__class__.__name__, self.key, self.gdfo,
4371.4.10 by Vincent Ladeuil
Cleanup.
41
            self.parent_keys, self.child_keys)
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
42
43
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
44
class _MergeSortNode(object):
45
    """Information about a specific node in the merge graph."""
46
47
    __slots__ = ('key', 'merge_depth', 'revno', 'end_of_merge')
48
49
    def __init__(self, key, merge_depth, revno, end_of_merge):
50
        self.key = key
51
        self.merge_depth = merge_depth
52
        self.revno = revno
53
        self.end_of_merge = end_of_merge
54
55
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
56
class KnownGraph(object):
57
    """This is a class which assumes we already know the full graph."""
58
59
    def __init__(self, parent_map, do_cache=True):
60
        """Create a new KnownGraph instance.
61
62
        :param parent_map: A dictionary mapping key => parent_keys
63
        """
64
        self._nodes = {}
65
        # Maps {sorted(revision_id, revision_id): heads}
66
        self._known_heads = {}
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
67
        self.do_cache = do_cache
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
68
        self._initialize_nodes(parent_map)
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
69
        self._find_gdfo()
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
70
71
    def _initialize_nodes(self, parent_map):
72
        """Populate self._nodes.
73
4371.4.9 by Vincent Ladeuil
Simplify gdfo computing by finding tails when at graph build time.
74
        After this has finished:
75
        - self._nodes will have an entry for every entry in parent_map.
76
        - ghosts will have a parent_keys = None,
77
        - all nodes found will also have .child_keys populated with all known
78
          child_keys,
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
79
        """
80
        nodes = self._nodes
81
        for key, parent_keys in parent_map.iteritems():
82
            if key in nodes:
83
                node = nodes[key]
84
                node.parent_keys = parent_keys
85
            else:
86
                node = _KnownGraphNode(key, parent_keys)
87
                nodes[key] = node
88
            for parent_key in parent_keys:
89
                try:
90
                    parent_node = nodes[parent_key]
91
                except KeyError:
92
                    parent_node = _KnownGraphNode(parent_key, None)
93
                    nodes[parent_key] = parent_node
94
                parent_node.child_keys.append(key)
95
4454.3.78 by John Arbash Meinel
Found a bug in the pure-python KnownGraph code.
96
    def _find_tails(self):
97
        return [node for node in self._nodes.itervalues()
98
                if not node.parent_keys]
99
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
100
    def _find_tips(self):
101
        return [node for node in self._nodes.itervalues()
102
                      if not node.child_keys]
103
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
104
    def _find_gdfo(self):
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
105
        nodes = self._nodes
4371.4.10 by Vincent Ladeuil
Cleanup.
106
        known_parent_gdfos = {}
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
107
        pending = []
108
4454.3.78 by John Arbash Meinel
Found a bug in the pure-python KnownGraph code.
109
        for node in self._find_tails():
4371.4.9 by Vincent Ladeuil
Simplify gdfo computing by finding tails when at graph build time.
110
            node.gdfo = 1
111
            pending.append(node)
112
4371.4.8 by Vincent Ladeuil
Replace the existing KnownGraph._find_gdfo.
113
        while pending:
114
            node = pending.pop()
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
115
            for child_key in node.child_keys:
116
                child = nodes[child_key]
4371.4.19 by John Arbash Meinel
Update the python code to do the same checking around known_parent_gdfos being present.
117
                if child_key in known_parent_gdfos:
118
                    known_gdfo = known_parent_gdfos[child_key] + 1
119
                    present = True
120
                else:
121
                    known_gdfo = 1
122
                    present = False
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
123
                if child.gdfo is None or node.gdfo + 1 > child.gdfo:
124
                    child.gdfo = node.gdfo + 1
4371.4.19 by John Arbash Meinel
Update the python code to do the same checking around known_parent_gdfos being present.
125
                if known_gdfo == len(child.parent_keys):
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
126
                    # We are the last parent updating that node, we can
127
                    # continue from there
4371.4.2 by Vincent Ladeuil
Same gdfo processing, without recursion.
128
                    pending.append(child)
4371.4.19 by John Arbash Meinel
Update the python code to do the same checking around known_parent_gdfos being present.
129
                    if present:
130
                        del known_parent_gdfos[child_key]
131
                else:
132
                    # Update known_parent_gdfos for a key we couldn't process
133
                    known_parent_gdfos[child_key] = known_gdfo
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
134
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
135
    def heads(self, keys):
136
        """Return the heads from amongst keys.
137
138
        This is done by searching the ancestries of each key.  Any key that is
139
        reachable from another key is not returned; all the others are.
140
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
141
        This operation scales with the relative depth between any two keys. It
142
        uses gdfo to avoid walking all ancestry.
143
144
        :param keys: An iterable of keys.
145
        :return: A set of the heads. Note that as a set there is no ordering
146
            information. Callers will need to filter their input to create
147
            order if they need it.
148
        """
149
        candidate_nodes = dict((key, self._nodes[key]) for key in keys)
150
        if revision.NULL_REVISION in candidate_nodes:
151
            # NULL_REVISION is only a head if it is the only entry
152
            candidate_nodes.pop(revision.NULL_REVISION)
153
            if not candidate_nodes:
154
                return frozenset([revision.NULL_REVISION])
155
        if len(candidate_nodes) < 2:
156
            # No or only one candidate
157
            return frozenset(candidate_nodes)
158
        heads_key = frozenset(candidate_nodes)
159
        # Do we have a cached result ?
160
        try:
161
            heads = self._known_heads[heads_key]
162
            return heads
163
        except KeyError:
164
            pass
165
        # Let's compute the heads
4371.4.4 by Vincent Ladeuil
Feedback from John, 'seen' is a set().
166
        seen = set()
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
167
        pending = []
168
        min_gdfo = None
169
        for node in candidate_nodes.values():
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
170
            if node.parent_keys:
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
171
                pending.extend(node.parent_keys)
172
            if min_gdfo is None or node.gdfo < min_gdfo:
173
                min_gdfo = node.gdfo
174
        nodes = self._nodes
175
        while pending:
176
            node_key = pending.pop()
177
            if node_key in seen:
178
                # node already appears in some ancestry
179
                continue
4371.4.4 by Vincent Ladeuil
Feedback from John, 'seen' is a set().
180
            seen.add(node_key)
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
181
            node = nodes[node_key]
182
            if node.gdfo <= min_gdfo:
183
                continue
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
184
            if node.parent_keys:
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
185
                pending.extend(node.parent_keys)
4371.4.4 by Vincent Ladeuil
Feedback from John, 'seen' is a set().
186
        heads = heads_key.difference(seen)
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
187
        if self.do_cache:
188
            self._known_heads[heads_key] = heads
189
        return heads
190
4593.5.2 by John Arbash Meinel
Implement a very basic version of KnownGraph.topo_sort that just
191
    def topo_sort(self):
4593.5.30 by John Arbash Meinel
Move the topo_sort implementation into KnownGraph, rather than calling back to it.
192
        """Return the nodes in topological order.
193
194
        All parents must occur before all children.
195
        """
196
        for node in self._nodes.itervalues():
197
            if node.gdfo is None:
198
                raise errors.GraphCycleError(self._nodes)
199
        pending = self._find_tails()
200
        pending_pop = pending.pop
201
        pending_append = pending.append
202
203
        topo_order = []
204
        topo_order_append = topo_order.append
205
206
        num_seen_parents = dict.fromkeys(self._nodes, 0)
207
        while pending:
208
            node = pending_pop()
209
            if node.parent_keys is not None:
210
                # We don't include ghost parents
211
                topo_order_append(node.key)
212
            for child_key in node.child_keys:
213
                child_node = self._nodes[child_key]
214
                seen_parents = num_seen_parents[child_key] + 1
215
                if seen_parents == len(child_node.parent_keys):
216
                    # All parents have been processed, enqueue this child
217
                    pending_append(child_node)
218
                    # This has been queued up, stop tracking it
219
                    del num_seen_parents[child_key]
220
                else:
221
                    num_seen_parents[child_key] = seen_parents
222
        # We started from the parents, so we don't need to do anymore work
223
        return topo_order
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
224
4641.5.2 by John Arbash Meinel
Get ready to move the tests into KnownGraph implementation tests.
225
    def gc_sort(self):
226
        """Return a reverse topological ordering which is 'stable'.
227
228
        There are a few constraints:
229
          1) Reverse topological (all children before all parents)
230
          2) Grouped by prefix
231
          3) 'stable' sorting, so that we get the same result, independent of
232
             machine, or extra data.
233
        To do this, we use the same basic algorithm as topo_sort, but when we
234
        aren't sure what node to access next, we sort them lexicographically.
235
        """
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
236
        tips = self._find_tips()
237
        # Split the tips based on prefix
238
        prefix_tips = {}
239
        for node in tips:
240
            if node.key.__class__ is str or len(node.key) == 1:
241
                prefix = ''
242
            else:
243
                prefix = node.key[0]
244
            prefix_tips.setdefault(prefix, []).append(node)
245
246
        num_seen_children = dict.fromkeys(self._nodes, 0)
247
248
        result = []
249
        for prefix in sorted(prefix_tips):
250
            pending = sorted(prefix_tips[prefix], key=lambda n:n.key,
251
                             reverse=True)
252
            while pending:
253
                node = pending.pop()
4641.5.6 by John Arbash Meinel
Add support for skipping ghost nodes.
254
                if node.parent_keys is None:
255
                    # Ghost node, skip it
256
                    continue
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
257
                result.append(node.key)
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
258
                for parent_key in sorted(node.parent_keys, reverse=True):
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
259
                    parent_node = self._nodes[parent_key]
260
                    seen_children = num_seen_children[parent_key] + 1
261
                    if seen_children == len(parent_node.child_keys):
262
                        # All children have been processed, enqueue this parent
263
                        pending.append(parent_node)
264
                        # This has been queued up, stop tracking it
265
                        del num_seen_children[parent_key]
266
                    else:
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
267
                        num_seen_children[parent_key] = seen_children
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
268
        return result
4641.5.2 by John Arbash Meinel
Get ready to move the tests into KnownGraph implementation tests.
269
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
270
    def merge_sort(self, tip_key):
271
        """Compute the merge sorted graph output."""
4593.5.33 by John Arbash Meinel
Late import tsort because it causes an import cycle.
272
        from bzrlib import tsort
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
273
        as_parent_map = dict((node.key, node.parent_keys)
4593.5.25 by John Arbash Meinel
Add tests that we detect GraphCycleError
274
                             for node in self._nodes.itervalues()
275
                              if node.parent_keys is not None)
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
276
        # We intentionally always generate revnos and never force the
277
        # mainline_revisions
4593.5.32 by John Arbash Meinel
With the new api, we don't return 'sequence_number' anymore.
278
        # Strip the sequence_number that merge_sort generates
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
279
        return [_MergeSortNode(key, merge_depth, revno, end_of_merge)
280
                for _, key, merge_depth, revno, end_of_merge
281
                 in tsort.merge_sort(as_parent_map, tip_key,
282
                                     mainline_revisions=None,
283
                                     generate_revno=True)]
4648.2.1 by Gary van der Merwe
Add get_parent_keys, and get_child_keys to KnownGraph.
284
    
285
    def get_parent_keys(self, key):
286
        """Get the parents for a key
287
        
288
        Returns a list containg the parents keys. If the key is a ghost,
289
        None is returned. A KeyError will be raised if the key is not in
290
        the graph.
291
        
292
        :param keys: Key to check (eg revision_id)
293
        :return: A list of parents
294
        """
295
        return self._nodes[key].parent_keys
296
297
    def get_child_keys(self, key):
298
        """Get the children for a key
299
        
300
        Returns a list containg the children keys. A KeyError will be raised
301
        if the key is not in the graph.
302
        
303
        :param keys: Key to check (eg revision_id)
304
        :return: A list of children
305
        """
306
        return self._nodes[key].child_keys