~bzr-pqm/bzr/bzr.dev

4763.2.4 by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry.
1
# Copyright (C) 2009, 2010 Canonical Ltd
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
17
"""Implementation of Graph algorithms when we have already loaded everything.
18
"""
19
6379.6.7 by Jelmer Vernooij
Move importing from future until after doc string, otherwise the doc string will disappear.
20
from __future__ import absolute_import
21
4838.1.1 by John Arbash Meinel
Implement KnownGraph.add_node(), along with tests, for the python version.
22
from collections import deque
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
23
from bzrlib import (
4593.5.30 by John Arbash Meinel
Move the topo_sort implementation into KnownGraph, rather than calling back to it.
24
    errors,
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
25
    revision,
26
    )
27
28
29
class _KnownGraphNode(object):
30
    """Represents a single object in the known graph."""
31
4371.4.10 by Vincent Ladeuil
Cleanup.
32
    __slots__ = ('key', 'parent_keys', 'child_keys', 'gdfo')
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
33
34
    def __init__(self, key, parent_keys):
35
        self.key = key
36
        self.parent_keys = parent_keys
37
        self.child_keys = []
38
        # Greatest distance from origin
39
        self.gdfo = None
40
41
    def __repr__(self):
4371.4.10 by Vincent Ladeuil
Cleanup.
42
        return '%s(%s  gdfo:%s par:%s child:%s)' % (
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
43
            self.__class__.__name__, self.key, self.gdfo,
4371.4.10 by Vincent Ladeuil
Cleanup.
44
            self.parent_keys, self.child_keys)
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
45
46
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
47
class _MergeSortNode(object):
48
    """Information about a specific node in the merge graph."""
49
50
    __slots__ = ('key', 'merge_depth', 'revno', 'end_of_merge')
51
52
    def __init__(self, key, merge_depth, revno, end_of_merge):
53
        self.key = key
54
        self.merge_depth = merge_depth
55
        self.revno = revno
56
        self.end_of_merge = end_of_merge
57
58
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
59
class KnownGraph(object):
60
    """This is a class which assumes we already know the full graph."""
61
62
    def __init__(self, parent_map, do_cache=True):
63
        """Create a new KnownGraph instance.
64
65
        :param parent_map: A dictionary mapping key => parent_keys
66
        """
67
        self._nodes = {}
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
68
        # Maps {frozenset(revision_id, revision_id): heads}
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
69
        self._known_heads = {}
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
70
        self.do_cache = do_cache
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
71
        self._initialize_nodes(parent_map)
4371.3.19 by John Arbash Meinel
Get an initial pyrex implementation.
72
        self._find_gdfo()
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
73
74
    def _initialize_nodes(self, parent_map):
75
        """Populate self._nodes.
76
4371.4.9 by Vincent Ladeuil
Simplify gdfo computing by finding tails when at graph build time.
77
        After this has finished:
78
        - self._nodes will have an entry for every entry in parent_map.
79
        - ghosts will have a parent_keys = None,
80
        - all nodes found will also have .child_keys populated with all known
81
          child_keys,
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
82
        """
83
        nodes = self._nodes
84
        for key, parent_keys in parent_map.iteritems():
85
            if key in nodes:
86
                node = nodes[key]
87
                node.parent_keys = parent_keys
88
            else:
89
                node = _KnownGraphNode(key, parent_keys)
90
                nodes[key] = node
91
            for parent_key in parent_keys:
92
                try:
93
                    parent_node = nodes[parent_key]
94
                except KeyError:
95
                    parent_node = _KnownGraphNode(parent_key, None)
96
                    nodes[parent_key] = parent_node
97
                parent_node.child_keys.append(key)
98
4454.3.78 by John Arbash Meinel
Found a bug in the pure-python KnownGraph code.
99
    def _find_tails(self):
100
        return [node for node in self._nodes.itervalues()
101
                if not node.parent_keys]
102
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
103
    def _find_tips(self):
104
        return [node for node in self._nodes.itervalues()
105
                      if not node.child_keys]
106
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
107
    def _find_gdfo(self):
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
108
        nodes = self._nodes
4371.4.10 by Vincent Ladeuil
Cleanup.
109
        known_parent_gdfos = {}
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
110
        pending = []
111
4454.3.78 by John Arbash Meinel
Found a bug in the pure-python KnownGraph code.
112
        for node in self._find_tails():
4371.4.9 by Vincent Ladeuil
Simplify gdfo computing by finding tails when at graph build time.
113
            node.gdfo = 1
114
            pending.append(node)
115
4371.4.8 by Vincent Ladeuil
Replace the existing KnownGraph._find_gdfo.
116
        while pending:
117
            node = pending.pop()
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
118
            for child_key in node.child_keys:
119
                child = nodes[child_key]
4371.4.19 by John Arbash Meinel
Update the python code to do the same checking around known_parent_gdfos being present.
120
                if child_key in known_parent_gdfos:
121
                    known_gdfo = known_parent_gdfos[child_key] + 1
122
                    present = True
123
                else:
124
                    known_gdfo = 1
125
                    present = False
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
126
                if child.gdfo is None or node.gdfo + 1 > child.gdfo:
127
                    child.gdfo = node.gdfo + 1
4371.4.19 by John Arbash Meinel
Update the python code to do the same checking around known_parent_gdfos being present.
128
                if known_gdfo == len(child.parent_keys):
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
129
                    # We are the last parent updating that node, we can
130
                    # continue from there
4371.4.2 by Vincent Ladeuil
Same gdfo processing, without recursion.
131
                    pending.append(child)
4371.4.19 by John Arbash Meinel
Update the python code to do the same checking around known_parent_gdfos being present.
132
                    if present:
133
                        del known_parent_gdfos[child_key]
134
                else:
135
                    # Update known_parent_gdfos for a key we couldn't process
136
                    known_parent_gdfos[child_key] = known_gdfo
4371.4.1 by Vincent Ladeuil
A new recursive gdfo processing method.
137
4838.1.1 by John Arbash Meinel
Implement KnownGraph.add_node(), along with tests, for the python version.
138
    def add_node(self, key, parent_keys):
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
139
        """Add a new node to the graph.
140
141
        If this fills in a ghost, then the gdfos of all children will be
142
        updated accordingly.
143
        
144
        :param key: The node being added. If this is a duplicate, this is a
145
            no-op.
146
        :param parent_keys: The parents of the given node.
147
        :return: None (should we return if this was a ghost, etc?)
148
        """
4838.1.1 by John Arbash Meinel
Implement KnownGraph.add_node(), along with tests, for the python version.
149
        nodes = self._nodes
150
        if key in nodes:
151
            node = nodes[key]
152
            if node.parent_keys is None:
153
                node.parent_keys = parent_keys
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
154
                # A ghost is being added, we can no-longer trust the heads
155
                # cache, so clear it
156
                self._known_heads.clear()
4838.1.1 by John Arbash Meinel
Implement KnownGraph.add_node(), along with tests, for the python version.
157
            else:
4838.1.2 by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version.
158
                # Make sure we compare a list to a list, as tuple != list.
159
                parent_keys = list(parent_keys)
160
                existing_parent_keys = list(node.parent_keys)
161
                if parent_keys == existing_parent_keys:
162
                    return # Identical content
163
                else:
164
                    raise ValueError('Parent key mismatch, existing node %s'
165
                        ' has parents of %s not %s'
166
                        % (key, existing_parent_keys, parent_keys))
4838.1.1 by John Arbash Meinel
Implement KnownGraph.add_node(), along with tests, for the python version.
167
        else:
168
            node = _KnownGraphNode(key, parent_keys)
169
            nodes[key] = node
170
        parent_gdfo = 0
171
        for parent_key in parent_keys:
172
            try:
173
                parent_node = nodes[parent_key]
174
            except KeyError:
175
                parent_node = _KnownGraphNode(parent_key, None)
176
                # Ghosts and roots have gdfo 1
177
                parent_node.gdfo = 1
178
                nodes[parent_key] = parent_node
179
            if parent_gdfo < parent_node.gdfo:
180
                parent_gdfo = parent_node.gdfo
181
            parent_node.child_keys.append(key)
182
        node.gdfo = parent_gdfo + 1
183
        # Now fill the gdfo to all children
184
        # Note that this loop is slightly inefficient, in that we may visit the
185
        # same child (and its decendents) more than once, however, it is
186
        # 'efficient' in that we only walk to nodes that would be updated,
187
        # rather than all nodes
188
        # We use a deque rather than a simple list stack, to go for BFD rather
189
        # than DFD. So that if a longer path is possible, we walk it before we
190
        # get to the final child
191
        pending = deque([node])
192
        while pending:
193
            node = pending.popleft()
194
            next_gdfo = node.gdfo + 1
195
            for child_key in node.child_keys:
196
                child = nodes[child_key]
197
                if child.gdfo < next_gdfo:
198
                    # This child is being updated, we need to check its
199
                    # children
200
                    child.gdfo = next_gdfo
201
                    pending.append(child)
202
4371.3.18 by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code.
203
    def heads(self, keys):
204
        """Return the heads from amongst keys.
205
206
        This is done by searching the ancestries of each key.  Any key that is
207
        reachable from another key is not returned; all the others are.
208
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
209
        This operation scales with the relative depth between any two keys. It
210
        uses gdfo to avoid walking all ancestry.
211
212
        :param keys: An iterable of keys.
213
        :return: A set of the heads. Note that as a set there is no ordering
214
            information. Callers will need to filter their input to create
215
            order if they need it.
216
        """
217
        candidate_nodes = dict((key, self._nodes[key]) for key in keys)
218
        if revision.NULL_REVISION in candidate_nodes:
219
            # NULL_REVISION is only a head if it is the only entry
220
            candidate_nodes.pop(revision.NULL_REVISION)
221
            if not candidate_nodes:
222
                return frozenset([revision.NULL_REVISION])
223
        if len(candidate_nodes) < 2:
224
            # No or only one candidate
225
            return frozenset(candidate_nodes)
226
        heads_key = frozenset(candidate_nodes)
227
        # Do we have a cached result ?
228
        try:
229
            heads = self._known_heads[heads_key]
230
            return heads
231
        except KeyError:
232
            pass
233
        # Let's compute the heads
4371.4.4 by Vincent Ladeuil
Feedback from John, 'seen' is a set().
234
        seen = set()
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
235
        pending = []
236
        min_gdfo = None
237
        for node in candidate_nodes.values():
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
238
            if node.parent_keys:
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
239
                pending.extend(node.parent_keys)
240
            if min_gdfo is None or node.gdfo < min_gdfo:
241
                min_gdfo = node.gdfo
242
        nodes = self._nodes
243
        while pending:
244
            node_key = pending.pop()
245
            if node_key in seen:
246
                # node already appears in some ancestry
247
                continue
4371.4.4 by Vincent Ladeuil
Feedback from John, 'seen' is a set().
248
            seen.add(node_key)
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
249
            node = nodes[node_key]
250
            if node.gdfo <= min_gdfo:
251
                continue
4371.4.7 by Vincent Ladeuil
Simple Pyrex version for the new heads() method.
252
            if node.parent_keys:
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
253
                pending.extend(node.parent_keys)
4371.4.4 by Vincent Ladeuil
Feedback from John, 'seen' is a set().
254
        heads = heads_key.difference(seen)
4371.4.3 by Vincent Ladeuil
A new heads() method based on gdfo.
255
        if self.do_cache:
256
            self._known_heads[heads_key] = heads
257
        return heads
258
4593.5.2 by John Arbash Meinel
Implement a very basic version of KnownGraph.topo_sort that just
259
    def topo_sort(self):
4593.5.30 by John Arbash Meinel
Move the topo_sort implementation into KnownGraph, rather than calling back to it.
260
        """Return the nodes in topological order.
261
262
        All parents must occur before all children.
263
        """
264
        for node in self._nodes.itervalues():
265
            if node.gdfo is None:
266
                raise errors.GraphCycleError(self._nodes)
267
        pending = self._find_tails()
268
        pending_pop = pending.pop
269
        pending_append = pending.append
270
271
        topo_order = []
272
        topo_order_append = topo_order.append
273
274
        num_seen_parents = dict.fromkeys(self._nodes, 0)
275
        while pending:
276
            node = pending_pop()
277
            if node.parent_keys is not None:
278
                # We don't include ghost parents
279
                topo_order_append(node.key)
280
            for child_key in node.child_keys:
281
                child_node = self._nodes[child_key]
282
                seen_parents = num_seen_parents[child_key] + 1
283
                if seen_parents == len(child_node.parent_keys):
284
                    # All parents have been processed, enqueue this child
285
                    pending_append(child_node)
286
                    # This has been queued up, stop tracking it
287
                    del num_seen_parents[child_key]
288
                else:
289
                    num_seen_parents[child_key] = seen_parents
290
        # We started from the parents, so we don't need to do anymore work
291
        return topo_order
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
292
4641.5.2 by John Arbash Meinel
Get ready to move the tests into KnownGraph implementation tests.
293
    def gc_sort(self):
294
        """Return a reverse topological ordering which is 'stable'.
295
296
        There are a few constraints:
297
          1) Reverse topological (all children before all parents)
298
          2) Grouped by prefix
299
          3) 'stable' sorting, so that we get the same result, independent of
300
             machine, or extra data.
301
        To do this, we use the same basic algorithm as topo_sort, but when we
302
        aren't sure what node to access next, we sort them lexicographically.
303
        """
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
304
        tips = self._find_tips()
305
        # Split the tips based on prefix
306
        prefix_tips = {}
307
        for node in tips:
308
            if node.key.__class__ is str or len(node.key) == 1:
309
                prefix = ''
310
            else:
311
                prefix = node.key[0]
312
            prefix_tips.setdefault(prefix, []).append(node)
313
314
        num_seen_children = dict.fromkeys(self._nodes, 0)
315
316
        result = []
317
        for prefix in sorted(prefix_tips):
318
            pending = sorted(prefix_tips[prefix], key=lambda n:n.key,
319
                             reverse=True)
320
            while pending:
321
                node = pending.pop()
4641.5.6 by John Arbash Meinel
Add support for skipping ghost nodes.
322
                if node.parent_keys is None:
323
                    # Ghost node, skip it
324
                    continue
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
325
                result.append(node.key)
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
326
                for parent_key in sorted(node.parent_keys, reverse=True):
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
327
                    parent_node = self._nodes[parent_key]
328
                    seen_children = num_seen_children[parent_key] + 1
329
                    if seen_children == len(parent_node.child_keys):
330
                        # All children have been processed, enqueue this parent
331
                        pending.append(parent_node)
332
                        # This has been queued up, stop tracking it
333
                        del num_seen_children[parent_key]
334
                    else:
4641.5.5 by John Arbash Meinel
Implement initial gc_sort in pyrex.
335
                        num_seen_children[parent_key] = seen_children
4641.5.4 by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code.
336
        return result
4641.5.2 by John Arbash Meinel
Get ready to move the tests into KnownGraph implementation tests.
337
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
338
    def merge_sort(self, tip_key):
339
        """Compute the merge sorted graph output."""
4593.5.33 by John Arbash Meinel
Late import tsort because it causes an import cycle.
340
        from bzrlib import tsort
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
341
        as_parent_map = dict((node.key, node.parent_keys)
4593.5.25 by John Arbash Meinel
Add tests that we detect GraphCycleError
342
                             for node in self._nodes.itervalues()
343
                              if node.parent_keys is not None)
4593.5.6 by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation.
344
        # We intentionally always generate revnos and never force the
345
        # mainline_revisions
4593.5.32 by John Arbash Meinel
With the new api, we don't return 'sequence_number' anymore.
346
        # Strip the sequence_number that merge_sort generates
4593.5.34 by John Arbash Meinel
Change the KnownGraph.merge_sort api.
347
        return [_MergeSortNode(key, merge_depth, revno, end_of_merge)
348
                for _, key, merge_depth, revno, end_of_merge
349
                 in tsort.merge_sort(as_parent_map, tip_key,
350
                                     mainline_revisions=None,
351
                                     generate_revno=True)]
4648.2.1 by Gary van der Merwe
Add get_parent_keys, and get_child_keys to KnownGraph.
352
    
353
    def get_parent_keys(self, key):
354
        """Get the parents for a key
355
        
356
        Returns a list containg the parents keys. If the key is a ghost,
357
        None is returned. A KeyError will be raised if the key is not in
358
        the graph.
359
        
360
        :param keys: Key to check (eg revision_id)
361
        :return: A list of parents
362
        """
363
        return self._nodes[key].parent_keys
364
365
    def get_child_keys(self, key):
366
        """Get the children for a key
367
        
368
        Returns a list containg the children keys. A KeyError will be raised
369
        if the key is not in the graph.
370
        
371
        :param keys: Key to check (eg revision_id)
372
        :return: A list of children
373
        """
374
        return self._nodes[key].child_keys