~bzr-pqm/bzr/bzr.dev

4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
1
# Copyright (C) 2009 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
17
"""Implementation of Graph algorithms when we have already loaded everything.
18
"""
19
4838.1.1 by John Arbash Meinel
Implement KnownGraph.add_node(), along with tests, for the python version.
20
from collections import deque
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
21
from bzrlib import (
4593.5.30 by John Arbash Meinel
Move the topo_sort implementation into KnownGraph, rather than calling back to it.
22
    errors,
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
23
    revision,
24
    )
25
26
27
class _KnownGraphNode(object):
28
    """Represents a single object in the known graph."""
29
4371.4.10 by Vincent Ladeuil
Cleanup.
30
    __slots__ = ('key', 'parent_keys', 'child_keys', 'gdfo')
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
31
32
    def __init__(self, key, parent_keys):
33
        self.key = key
34
        self.parent_keys = parent_keys
35
        self.child_keys = []
36
        # Greatest distance from origin
37
        self.gdfo = None
38
39
    def __repr__(self):
4371.4.10 by Vincent Ladeuil
Cleanup.
40
        return '%s(%s  gdfo:%s par:%s child:%s)' % (
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
41
            self.__class__.__name__, self.key, self.gdfo,
4371.4.10 by Vincent Ladeuil
Cleanup.
42
            self.parent_keys, self.child_keys)
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
43
44
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
45
class _MergeSortNode(object):
46
    """Information about a specific node in the merge graph."""
47
48
    __slots__ = ('key', 'merge_depth', 'revno', 'end_of_merge')
49
50
    def __init__(self, key, merge_depth, revno, end_of_merge):
51
        self.key = key
52
        self.merge_depth = merge_depth
53
        self.revno = revno
54
        self.end_of_merge = end_of_merge
55
56
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
57
class KnownGraph(object):
58
    """This is a class which assumes we already know the full graph."""
59
60
    def __init__(self, parent_map, do_cache=True):
61
        """Create a new KnownGraph instance.
62
63
        :param parent_map: A dictionary mapping key => parent_keys
64
        """
65
        self._nodes = {}
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
66
        # Maps {frozenset(revision_id, revision_id): heads}
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
67
        self._known_heads = {}
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
68
        self.do_cache = do_cache
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
69
        self._initialize_nodes(parent_map)
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
70
        self._find_gdfo()
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
71
72
    def _initialize_nodes(self, parent_map):
73
        """Populate self._nodes.
74
4371.4.9 by Vincent Ladeuil
Simplify gdfo computing by finding tails when at graph build time.
75
        After this has finished:
76
        - self._nodes will have an entry for every entry in parent_map.
77
        - ghosts will have a parent_keys = None,
78
        - all nodes found will also have .child_keys populated with all known
79
          child_keys,
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
80
        """
81
        nodes = self._nodes
82
        for key, parent_keys in parent_map.iteritems():
83
            if key in nodes:
84
                node = nodes[key]
85
                node.parent_keys = parent_keys
86
            else:
87
                node = _KnownGraphNode(key, parent_keys)
88
                nodes[key] = node
89
            for parent_key in parent_keys:
90
                try:
91
                    parent_node = nodes[parent_key]
92
                except KeyError:
93
                    parent_node = _KnownGraphNode(parent_key, None)
94
                    nodes[parent_key] = parent_node
95
                parent_node.child_keys.append(key)
96
4454.3.78 by John Arbash Meinel
Found a bug in the pure-python KnownGraph code.
97
    def _find_tails(self):
98
        return [node for node in self._nodes.itervalues()
99
                if not node.parent_keys]
100
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
101
    def _find_tips(self):
102
        return [node for node in self._nodes.itervalues()
103
                      if not node.child_keys]
104
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
105
    def _find_gdfo(self):
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
106
        nodes = self._nodes
4371.4.10 by Vincent Ladeuil
Cleanup.
107
        known_parent_gdfos = {}
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
108
        pending = []
109
4454.3.78 by John Arbash Meinel
Found a bug in the pure-python KnownGraph code.
110
        for node in self._find_tails():
4371.4.9 by Vincent Ladeuil
Simplify gdfo computing by finding tails when at graph build time.
111
            node.gdfo = 1
112
            pending.append(node)
113
4371.4.8 by Vincent Ladeuil
Replace the existing KnownGraph._find_gdfo.
114
        while pending:
115
            node = pending.pop()
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
116
            for child_key in node.child_keys:
117
                child = nodes[child_key]
4371.4.19 by John Arbash Meinel
Update the python code to do the same checking around known_parent_gdfos being present.
118
                if child_key in known_parent_gdfos:
119
                    known_gdfo = known_parent_gdfos[child_key] + 1
120
                    present = True
121
                else:
122
                    known_gdfo = 1
123
                    present = False
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
124
                if child.gdfo is None or node.gdfo + 1 > child.gdfo:
125
                    child.gdfo = node.gdfo + 1
4371.4.19 by John Arbash Meinel
Update the python code to do the same checking around known_parent_gdfos being present.
126
                if known_gdfo == len(child.parent_keys):
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
127
                    # We are the last parent updating that node, we can
128
                    # continue from there
4371.4.2 by Vincent Ladeuil
Same gdfo processing, without recursion.
129
                    pending.append(child)
4371.4.19 by John Arbash Meinel
Update the python code to do the same checking around known_parent_gdfos being present.
130
                    if present:
131
                        del known_parent_gdfos[child_key]
132
                else:
133
                    # Update known_parent_gdfos for a key we couldn't process
134
                    known_parent_gdfos[child_key] = known_gdfo
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
135
4838.1.1 by John Arbash Meinel
Implement KnownGraph.add_node(), along with tests, for the python version.
136
    def add_node(self, key, parent_keys):
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
137
        """Add a new node to the graph.
138
139
        If this fills in a ghost, then the gdfos of all children will be
140
        updated accordingly.
141
        
142
        :param key: The node being added. If this is a duplicate, this is a
143
            no-op.
144
        :param parent_keys: The parents of the given node.
145
        :return: None (should we return if this was a ghost, etc?)
146
        """
4838.1.1 by John Arbash Meinel
Implement KnownGraph.add_node(), along with tests, for the python version.
147
        nodes = self._nodes
148
        if key in nodes:
149
            node = nodes[key]
150
            if node.parent_keys is None:
151
                node.parent_keys = parent_keys
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
152
                # A ghost is being added, we can no-longer trust the heads
153
                # cache, so clear it
154
                self._known_heads.clear()
4838.1.1 by John Arbash Meinel
Implement KnownGraph.add_node(), along with tests, for the python version.
155
            else:
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
156
                # Make sure we compare a list to a list, as tuple != list.
157
                parent_keys = list(parent_keys)
158
                existing_parent_keys = list(node.parent_keys)
159
                if parent_keys == existing_parent_keys:
160
                    return # Identical content
161
                else:
162
                    raise ValueError('Parent key mismatch, existing node %s'
163
                        ' has parents of %s not %s'
164
                        % (key, existing_parent_keys, parent_keys))
4838.1.1 by John Arbash Meinel
Implement KnownGraph.add_node(), along with tests, for the python version.
165
        else:
166
            node = _KnownGraphNode(key, parent_keys)
167
            nodes[key] = node
168
        parent_gdfo = 0
169
        for parent_key in parent_keys:
170
            try:
171
                parent_node = nodes[parent_key]
172
            except KeyError:
173
                parent_node = _KnownGraphNode(parent_key, None)
174
                # Ghosts and roots have gdfo 1
175
                parent_node.gdfo = 1
176
                nodes[parent_key] = parent_node
177
            if parent_gdfo < parent_node.gdfo:
178
                parent_gdfo = parent_node.gdfo
179
            parent_node.child_keys.append(key)
180
        node.gdfo = parent_gdfo + 1
181
        # Now fill the gdfo to all children
182
        # Note that this loop is slightly inefficient, in that we may visit the
183
        # same child (and its decendents) more than once, however, it is
184
        # 'efficient' in that we only walk to nodes that would be updated,
185
        # rather than all nodes
186
        # We use a deque rather than a simple list stack, to go for BFD rather
187
        # than DFD. So that if a longer path is possible, we walk it before we
188
        # get to the final child
189
        pending = deque([node])
190
        while pending:
191
            node = pending.popleft()
192
            next_gdfo = node.gdfo + 1
193
            for child_key in node.child_keys:
194
                child = nodes[child_key]
195
                if child.gdfo < next_gdfo:
196
                    # This child is being updated, we need to check its
197
                    # children
198
                    child.gdfo = next_gdfo
199
                    pending.append(child)
200
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
201
    def heads(self, keys):
202
        """Return the heads from amongst keys.
203
204
        This is done by searching the ancestries of each key.  Any key that is
205
        reachable from another key is not returned; all the others are.
206
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
207
        This operation scales with the relative depth between any two keys. It
208
        uses gdfo to avoid walking all ancestry.
209
210
        :param keys: An iterable of keys.
211
        :return: A set of the heads. Note that as a set there is no ordering
212
            information. Callers will need to filter their input to create
213
            order if they need it.
214
        """
215
        candidate_nodes = dict((key, self._nodes[key]) for key in keys)
216
        if revision.NULL_REVISION in candidate_nodes:
217
            # NULL_REVISION is only a head if it is the only entry
218
            candidate_nodes.pop(revision.NULL_REVISION)
219
            if not candidate_nodes:
220
                return frozenset([revision.NULL_REVISION])
221
        if len(candidate_nodes) < 2:
222
            # No or only one candidate
223
            return frozenset(candidate_nodes)
224
        heads_key = frozenset(candidate_nodes)
225
        # Do we have a cached result ?
226
        try:
227
            heads = self._known_heads[heads_key]
228
            return heads
229
        except KeyError:
230
            pass
231
        # Let's compute the heads
4371.4.4 by Vincent Ladeuil
Feedback from John, 'seen' is a set().
232
        seen = set()
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
233
        pending = []
234
        min_gdfo = None
235
        for node in candidate_nodes.values():
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
236
            if node.parent_keys:
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
237
                pending.extend(node.parent_keys)
238
            if min_gdfo is None or node.gdfo < min_gdfo:
239
                min_gdfo = node.gdfo
240
        nodes = self._nodes
241
        while pending:
242
            node_key = pending.pop()
243
            if node_key in seen:
244
                # node already appears in some ancestry
245
                continue
4371.4.4 by Vincent Ladeuil
Feedback from John, 'seen' is a set().
246
            seen.add(node_key)
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
247
            node = nodes[node_key]
248
            if node.gdfo <= min_gdfo:
249
                continue
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
250
            if node.parent_keys:
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
251
                pending.extend(node.parent_keys)
4371.4.4 by Vincent Ladeuil
Feedback from John, 'seen' is a set().
252
        heads = heads_key.difference(seen)
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
253
        if self.do_cache:
254
            self._known_heads[heads_key] = heads
255
        return heads
256
4593.5.2 by John Arbash Meinel
Implement a very basic version of KnownGraph.topo_sort that just
257
    def topo_sort(self):
4593.5.30 by John Arbash Meinel
Move the topo_sort implementation into KnownGraph, rather than calling back to it.
258
        """Return the nodes in topological order.
259
260
        All parents must occur before all children.
261
        """
262
        for node in self._nodes.itervalues():
263
            if node.gdfo is None:
264
                raise errors.GraphCycleError(self._nodes)
265
        pending = self._find_tails()
266
        pending_pop = pending.pop
267
        pending_append = pending.append
268
269
        topo_order = []
270
        topo_order_append = topo_order.append
271
272
        num_seen_parents = dict.fromkeys(self._nodes, 0)
273
        while pending:
274
            node = pending_pop()
275
            if node.parent_keys is not None:
276
                # We don't include ghost parents
277
                topo_order_append(node.key)
278
            for child_key in node.child_keys:
279
                child_node = self._nodes[child_key]
280
                seen_parents = num_seen_parents[child_key] + 1
281
                if seen_parents == len(child_node.parent_keys):
282
                    # All parents have been processed, enqueue this child
283
                    pending_append(child_node)
284
                    # This has been queued up, stop tracking it
285
                    del num_seen_parents[child_key]
286
                else:
287
                    num_seen_parents[child_key] = seen_parents
288
        # We started from the parents, so we don't need to do anymore work
289
        return topo_order
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
290
4641.5.2 by John Arbash Meinel
Get ready to move the tests into KnownGraph implementation tests.
291
    def gc_sort(self):
292
        """Return a reverse topological ordering which is 'stable'.
293
294
        There are a few constraints:
295
          1) Reverse topological (all children before all parents)
296
          2) Grouped by prefix
297
          3) 'stable' sorting, so that we get the same result, independent of
298
             machine, or extra data.
299
        To do this, we use the same basic algorithm as topo_sort, but when we
300
        aren't sure what node to access next, we sort them lexicographically.
301
        """
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
302
        tips = self._find_tips()
303
        # Split the tips based on prefix
304
        prefix_tips = {}
305
        for node in tips:
306
            if node.key.__class__ is str or len(node.key) == 1:
307
                prefix = ''
308
            else:
309
                prefix = node.key[0]
310
            prefix_tips.setdefault(prefix, []).append(node)
311
312
        num_seen_children = dict.fromkeys(self._nodes, 0)
313
314
        result = []
315
        for prefix in sorted(prefix_tips):
316
            pending = sorted(prefix_tips[prefix], key=lambda n:n.key,
317
                             reverse=True)
318
            while pending:
319
                node = pending.pop()
4641.5.6 by John Arbash Meinel
Add support for skipping ghost nodes.
320
                if node.parent_keys is None:
321
                    # Ghost node, skip it
322
                    continue
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
323
                result.append(node.key)
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
324
                for parent_key in sorted(node.parent_keys, reverse=True):
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
325
                    parent_node = self._nodes[parent_key]
326
                    seen_children = num_seen_children[parent_key] + 1
327
                    if seen_children == len(parent_node.child_keys):
328
                        # All children have been processed, enqueue this parent
329
                        pending.append(parent_node)
330
                        # This has been queued up, stop tracking it
331
                        del num_seen_children[parent_key]
332
                    else:
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
333
                        num_seen_children[parent_key] = seen_children
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
334
        return result
4641.5.2 by John Arbash Meinel
Get ready to move the tests into KnownGraph implementation tests.
335
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
336
    def merge_sort(self, tip_key):
337
        """Compute the merge sorted graph output."""
4593.5.33 by John Arbash Meinel
Late import tsort because it causes an import cycle.
338
        from bzrlib import tsort
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
339
        as_parent_map = dict((node.key, node.parent_keys)
4593.5.25 by John Arbash Meinel
Add tests that we detect GraphCycleError
340
                             for node in self._nodes.itervalues()
341
                              if node.parent_keys is not None)
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
342
        # We intentionally always generate revnos and never force the
343
        # mainline_revisions
4593.5.32 by John Arbash Meinel
With the new api, we don't return 'sequence_number' anymore.
344
        # Strip the sequence_number that merge_sort generates
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
345
        return [_MergeSortNode(key, merge_depth, revno, end_of_merge)
346
                for _, key, merge_depth, revno, end_of_merge
347
                 in tsort.merge_sort(as_parent_map, tip_key,
348
                                     mainline_revisions=None,
349
                                     generate_revno=True)]
4648.2.1 by Gary van der Merwe
Add get_parent_keys, and get_child_keys to KnownGraph.
350
    
351
    def get_parent_keys(self, key):
352
        """Get the parents for a key
353
        
354
        Returns a list containg the parents keys. If the key is a ghost,
355
        None is returned. A KeyError will be raised if the key is not in
356
        the graph.
357
        
358
        :param keys: Key to check (eg revision_id)
359
        :return: A list of parents
360
        """
361
        return self._nodes[key].parent_keys
362
363
    def get_child_keys(self, key):
364
        """Get the children for a key
365
        
366
        Returns a list containg the children keys. A KeyError will be raised
367
        if the key is not in the graph.
368
        
369
        :param keys: Key to check (eg revision_id)
370
        :return: A list of children
371
        """
372
        return self._nodes[key].child_keys