4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
1 |
# Copyright (C) 2009 Canonical Ltd
|
2 |
#
|
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
16 |
||
17 |
"""Implementation of Graph algorithms when we have already loaded everything.
|
|
18 |
"""
|
|
19 |
||
4838.1.1
by John Arbash Meinel
Implement KnownGraph.add_node(), along with tests, for the python version. |
20 |
from collections import deque |
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
21 |
from bzrlib import ( |
4593.5.30
by John Arbash Meinel
Move the topo_sort implementation into KnownGraph, rather than calling back to it. |
22 |
errors, |
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
23 |
revision, |
24 |
)
|
|
25 |
||
26 |
||
27 |
class _KnownGraphNode(object): |
|
28 |
"""Represents a single object in the known graph."""
|
|
29 |
||
4371.4.10
by Vincent Ladeuil
Cleanup. |
30 |
__slots__ = ('key', 'parent_keys', 'child_keys', 'gdfo') |
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
31 |
|
32 |
def __init__(self, key, parent_keys): |
|
33 |
self.key = key |
|
34 |
self.parent_keys = parent_keys |
|
35 |
self.child_keys = [] |
|
36 |
# Greatest distance from origin
|
|
37 |
self.gdfo = None |
|
38 |
||
39 |
def __repr__(self): |
|
4371.4.10
by Vincent Ladeuil
Cleanup. |
40 |
return '%s(%s gdfo:%s par:%s child:%s)' % ( |
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
41 |
self.__class__.__name__, self.key, self.gdfo, |
4371.4.10
by Vincent Ladeuil
Cleanup. |
42 |
self.parent_keys, self.child_keys) |
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
43 |
|
44 |
||
4593.5.34
by John Arbash Meinel
Change the KnownGraph.merge_sort api. |
45 |
class _MergeSortNode(object): |
46 |
"""Information about a specific node in the merge graph."""
|
|
47 |
||
48 |
__slots__ = ('key', 'merge_depth', 'revno', 'end_of_merge') |
|
49 |
||
50 |
def __init__(self, key, merge_depth, revno, end_of_merge): |
|
51 |
self.key = key |
|
52 |
self.merge_depth = merge_depth |
|
53 |
self.revno = revno |
|
54 |
self.end_of_merge = end_of_merge |
|
55 |
||
56 |
||
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
57 |
class KnownGraph(object): |
58 |
"""This is a class which assumes we already know the full graph."""
|
|
59 |
||
60 |
def __init__(self, parent_map, do_cache=True): |
|
61 |
"""Create a new KnownGraph instance.
|
|
62 |
||
63 |
:param parent_map: A dictionary mapping key => parent_keys
|
|
64 |
"""
|
|
65 |
self._nodes = {} |
|
4838.1.2
by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version. |
66 |
# Maps {frozenset(revision_id, revision_id): heads}
|
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
67 |
self._known_heads = {} |
4371.3.19
by John Arbash Meinel
Get an initial pyrex implementation. |
68 |
self.do_cache = do_cache |
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
69 |
self._initialize_nodes(parent_map) |
4371.3.19
by John Arbash Meinel
Get an initial pyrex implementation. |
70 |
self._find_gdfo() |
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
71 |
|
72 |
def _initialize_nodes(self, parent_map): |
|
73 |
"""Populate self._nodes.
|
|
74 |
||
4371.4.9
by Vincent Ladeuil
Simplify gdfo computing by finding tails when at graph build time. |
75 |
After this has finished:
|
76 |
- self._nodes will have an entry for every entry in parent_map.
|
|
77 |
- ghosts will have a parent_keys = None,
|
|
78 |
- all nodes found will also have .child_keys populated with all known
|
|
79 |
child_keys,
|
|
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
80 |
"""
|
81 |
nodes = self._nodes |
|
82 |
for key, parent_keys in parent_map.iteritems(): |
|
83 |
if key in nodes: |
|
84 |
node = nodes[key] |
|
85 |
node.parent_keys = parent_keys |
|
86 |
else: |
|
87 |
node = _KnownGraphNode(key, parent_keys) |
|
88 |
nodes[key] = node |
|
89 |
for parent_key in parent_keys: |
|
90 |
try: |
|
91 |
parent_node = nodes[parent_key] |
|
92 |
except KeyError: |
|
93 |
parent_node = _KnownGraphNode(parent_key, None) |
|
94 |
nodes[parent_key] = parent_node |
|
95 |
parent_node.child_keys.append(key) |
|
96 |
||
4454.3.78
by John Arbash Meinel
Found a bug in the pure-python KnownGraph code. |
97 |
def _find_tails(self): |
98 |
return [node for node in self._nodes.itervalues() |
|
99 |
if not node.parent_keys] |
|
100 |
||
4641.5.4
by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code. |
101 |
def _find_tips(self): |
102 |
return [node for node in self._nodes.itervalues() |
|
103 |
if not node.child_keys] |
|
104 |
||
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
105 |
def _find_gdfo(self): |
4371.4.1
by Vincent Ladeuil
A new recursive gdfo processing method. |
106 |
nodes = self._nodes |
4371.4.10
by Vincent Ladeuil
Cleanup. |
107 |
known_parent_gdfos = {} |
4371.4.1
by Vincent Ladeuil
A new recursive gdfo processing method. |
108 |
pending = [] |
109 |
||
4454.3.78
by John Arbash Meinel
Found a bug in the pure-python KnownGraph code. |
110 |
for node in self._find_tails(): |
4371.4.9
by Vincent Ladeuil
Simplify gdfo computing by finding tails when at graph build time. |
111 |
node.gdfo = 1 |
112 |
pending.append(node) |
|
113 |
||
4371.4.8
by Vincent Ladeuil
Replace the existing KnownGraph._find_gdfo. |
114 |
while pending: |
115 |
node = pending.pop() |
|
4371.4.1
by Vincent Ladeuil
A new recursive gdfo processing method. |
116 |
for child_key in node.child_keys: |
117 |
child = nodes[child_key] |
|
4371.4.19
by John Arbash Meinel
Update the python code to do the same checking around known_parent_gdfos being present. |
118 |
if child_key in known_parent_gdfos: |
119 |
known_gdfo = known_parent_gdfos[child_key] + 1 |
|
120 |
present = True |
|
121 |
else: |
|
122 |
known_gdfo = 1 |
|
123 |
present = False |
|
4371.4.1
by Vincent Ladeuil
A new recursive gdfo processing method. |
124 |
if child.gdfo is None or node.gdfo + 1 > child.gdfo: |
125 |
child.gdfo = node.gdfo + 1 |
|
4371.4.19
by John Arbash Meinel
Update the python code to do the same checking around known_parent_gdfos being present. |
126 |
if known_gdfo == len(child.parent_keys): |
4371.4.1
by Vincent Ladeuil
A new recursive gdfo processing method. |
127 |
# We are the last parent updating that node, we can
|
128 |
# continue from there
|
|
4371.4.2
by Vincent Ladeuil
Same gdfo processing, without recursion. |
129 |
pending.append(child) |
4371.4.19
by John Arbash Meinel
Update the python code to do the same checking around known_parent_gdfos being present. |
130 |
if present: |
131 |
del known_parent_gdfos[child_key] |
|
132 |
else: |
|
133 |
# Update known_parent_gdfos for a key we couldn't process
|
|
134 |
known_parent_gdfos[child_key] = known_gdfo |
|
4371.4.1
by Vincent Ladeuil
A new recursive gdfo processing method. |
135 |
|
4838.1.1
by John Arbash Meinel
Implement KnownGraph.add_node(), along with tests, for the python version. |
136 |
def add_node(self, key, parent_keys): |
4838.1.2
by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version. |
137 |
"""Add a new node to the graph.
|
138 |
||
139 |
If this fills in a ghost, then the gdfos of all children will be
|
|
140 |
updated accordingly.
|
|
141 |
|
|
142 |
:param key: The node being added. If this is a duplicate, this is a
|
|
143 |
no-op.
|
|
144 |
:param parent_keys: The parents of the given node.
|
|
145 |
:return: None (should we return if this was a ghost, etc?)
|
|
146 |
"""
|
|
4838.1.1
by John Arbash Meinel
Implement KnownGraph.add_node(), along with tests, for the python version. |
147 |
nodes = self._nodes |
148 |
if key in nodes: |
|
149 |
node = nodes[key] |
|
150 |
if node.parent_keys is None: |
|
151 |
node.parent_keys = parent_keys |
|
4838.1.2
by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version. |
152 |
# A ghost is being added, we can no-longer trust the heads
|
153 |
# cache, so clear it
|
|
154 |
self._known_heads.clear() |
|
4838.1.1
by John Arbash Meinel
Implement KnownGraph.add_node(), along with tests, for the python version. |
155 |
else: |
4838.1.2
by John Arbash Meinel
Implement KnownGraph.add_node() for the pyrex version. |
156 |
# Make sure we compare a list to a list, as tuple != list.
|
157 |
parent_keys = list(parent_keys) |
|
158 |
existing_parent_keys = list(node.parent_keys) |
|
159 |
if parent_keys == existing_parent_keys: |
|
160 |
return # Identical content |
|
161 |
else: |
|
162 |
raise ValueError('Parent key mismatch, existing node %s' |
|
163 |
' has parents of %s not %s' |
|
164 |
% (key, existing_parent_keys, parent_keys)) |
|
4838.1.1
by John Arbash Meinel
Implement KnownGraph.add_node(), along with tests, for the python version. |
165 |
else: |
166 |
node = _KnownGraphNode(key, parent_keys) |
|
167 |
nodes[key] = node |
|
168 |
parent_gdfo = 0 |
|
169 |
for parent_key in parent_keys: |
|
170 |
try: |
|
171 |
parent_node = nodes[parent_key] |
|
172 |
except KeyError: |
|
173 |
parent_node = _KnownGraphNode(parent_key, None) |
|
174 |
# Ghosts and roots have gdfo 1
|
|
175 |
parent_node.gdfo = 1 |
|
176 |
nodes[parent_key] = parent_node |
|
177 |
if parent_gdfo < parent_node.gdfo: |
|
178 |
parent_gdfo = parent_node.gdfo |
|
179 |
parent_node.child_keys.append(key) |
|
180 |
node.gdfo = parent_gdfo + 1 |
|
181 |
# Now fill the gdfo to all children
|
|
182 |
# Note that this loop is slightly inefficient, in that we may visit the
|
|
183 |
# same child (and its decendents) more than once, however, it is
|
|
184 |
# 'efficient' in that we only walk to nodes that would be updated,
|
|
185 |
# rather than all nodes
|
|
186 |
# We use a deque rather than a simple list stack, to go for BFD rather
|
|
187 |
# than DFD. So that if a longer path is possible, we walk it before we
|
|
188 |
# get to the final child
|
|
189 |
pending = deque([node]) |
|
190 |
while pending: |
|
191 |
node = pending.popleft() |
|
192 |
next_gdfo = node.gdfo + 1 |
|
193 |
for child_key in node.child_keys: |
|
194 |
child = nodes[child_key] |
|
195 |
if child.gdfo < next_gdfo: |
|
196 |
# This child is being updated, we need to check its
|
|
197 |
# children
|
|
198 |
child.gdfo = next_gdfo |
|
199 |
pending.append(child) |
|
200 |
||
4371.3.18
by John Arbash Meinel
Change VF.annotate to use the new KnownGraph code. |
201 |
def heads(self, keys): |
202 |
"""Return the heads from amongst keys.
|
|
203 |
||
204 |
This is done by searching the ancestries of each key. Any key that is
|
|
205 |
reachable from another key is not returned; all the others are.
|
|
206 |
||
4371.4.3
by Vincent Ladeuil
A new heads() method based on gdfo. |
207 |
This operation scales with the relative depth between any two keys. It
|
208 |
uses gdfo to avoid walking all ancestry.
|
|
209 |
||
210 |
:param keys: An iterable of keys.
|
|
211 |
:return: A set of the heads. Note that as a set there is no ordering
|
|
212 |
information. Callers will need to filter their input to create
|
|
213 |
order if they need it.
|
|
214 |
"""
|
|
215 |
candidate_nodes = dict((key, self._nodes[key]) for key in keys) |
|
216 |
if revision.NULL_REVISION in candidate_nodes: |
|
217 |
# NULL_REVISION is only a head if it is the only entry
|
|
218 |
candidate_nodes.pop(revision.NULL_REVISION) |
|
219 |
if not candidate_nodes: |
|
220 |
return frozenset([revision.NULL_REVISION]) |
|
221 |
if len(candidate_nodes) < 2: |
|
222 |
# No or only one candidate
|
|
223 |
return frozenset(candidate_nodes) |
|
224 |
heads_key = frozenset(candidate_nodes) |
|
225 |
# Do we have a cached result ?
|
|
226 |
try: |
|
227 |
heads = self._known_heads[heads_key] |
|
228 |
return heads |
|
229 |
except KeyError: |
|
230 |
pass
|
|
231 |
# Let's compute the heads
|
|
4371.4.4
by Vincent Ladeuil
Feedback from John, 'seen' is a set(). |
232 |
seen = set() |
4371.4.3
by Vincent Ladeuil
A new heads() method based on gdfo. |
233 |
pending = [] |
234 |
min_gdfo = None |
|
235 |
for node in candidate_nodes.values(): |
|
4371.4.7
by Vincent Ladeuil
Simple Pyrex version for the new heads() method. |
236 |
if node.parent_keys: |
4371.4.3
by Vincent Ladeuil
A new heads() method based on gdfo. |
237 |
pending.extend(node.parent_keys) |
238 |
if min_gdfo is None or node.gdfo < min_gdfo: |
|
239 |
min_gdfo = node.gdfo |
|
240 |
nodes = self._nodes |
|
241 |
while pending: |
|
242 |
node_key = pending.pop() |
|
243 |
if node_key in seen: |
|
244 |
# node already appears in some ancestry
|
|
245 |
continue
|
|
4371.4.4
by Vincent Ladeuil
Feedback from John, 'seen' is a set(). |
246 |
seen.add(node_key) |
4371.4.3
by Vincent Ladeuil
A new heads() method based on gdfo. |
247 |
node = nodes[node_key] |
248 |
if node.gdfo <= min_gdfo: |
|
249 |
continue
|
|
4371.4.7
by Vincent Ladeuil
Simple Pyrex version for the new heads() method. |
250 |
if node.parent_keys: |
4371.4.3
by Vincent Ladeuil
A new heads() method based on gdfo. |
251 |
pending.extend(node.parent_keys) |
4371.4.4
by Vincent Ladeuil
Feedback from John, 'seen' is a set(). |
252 |
heads = heads_key.difference(seen) |
4371.4.3
by Vincent Ladeuil
A new heads() method based on gdfo. |
253 |
if self.do_cache: |
254 |
self._known_heads[heads_key] = heads |
|
255 |
return heads |
|
256 |
||
4593.5.2
by John Arbash Meinel
Implement a very basic version of KnownGraph.topo_sort that just |
257 |
def topo_sort(self): |
4593.5.30
by John Arbash Meinel
Move the topo_sort implementation into KnownGraph, rather than calling back to it. |
258 |
"""Return the nodes in topological order.
|
259 |
||
260 |
All parents must occur before all children.
|
|
261 |
"""
|
|
262 |
for node in self._nodes.itervalues(): |
|
263 |
if node.gdfo is None: |
|
264 |
raise errors.GraphCycleError(self._nodes) |
|
265 |
pending = self._find_tails() |
|
266 |
pending_pop = pending.pop |
|
267 |
pending_append = pending.append |
|
268 |
||
269 |
topo_order = [] |
|
270 |
topo_order_append = topo_order.append |
|
271 |
||
272 |
num_seen_parents = dict.fromkeys(self._nodes, 0) |
|
273 |
while pending: |
|
274 |
node = pending_pop() |
|
275 |
if node.parent_keys is not None: |
|
276 |
# We don't include ghost parents
|
|
277 |
topo_order_append(node.key) |
|
278 |
for child_key in node.child_keys: |
|
279 |
child_node = self._nodes[child_key] |
|
280 |
seen_parents = num_seen_parents[child_key] + 1 |
|
281 |
if seen_parents == len(child_node.parent_keys): |
|
282 |
# All parents have been processed, enqueue this child
|
|
283 |
pending_append(child_node) |
|
284 |
# This has been queued up, stop tracking it
|
|
285 |
del num_seen_parents[child_key] |
|
286 |
else: |
|
287 |
num_seen_parents[child_key] = seen_parents |
|
288 |
# We started from the parents, so we don't need to do anymore work
|
|
289 |
return topo_order |
|
4593.5.6
by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation. |
290 |
|
4641.5.2
by John Arbash Meinel
Get ready to move the tests into KnownGraph implementation tests. |
291 |
def gc_sort(self): |
292 |
"""Return a reverse topological ordering which is 'stable'.
|
|
293 |
||
294 |
There are a few constraints:
|
|
295 |
1) Reverse topological (all children before all parents)
|
|
296 |
2) Grouped by prefix
|
|
297 |
3) 'stable' sorting, so that we get the same result, independent of
|
|
298 |
machine, or extra data.
|
|
299 |
To do this, we use the same basic algorithm as topo_sort, but when we
|
|
300 |
aren't sure what node to access next, we sort them lexicographically.
|
|
301 |
"""
|
|
4641.5.4
by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code. |
302 |
tips = self._find_tips() |
303 |
# Split the tips based on prefix
|
|
304 |
prefix_tips = {} |
|
305 |
for node in tips: |
|
306 |
if node.key.__class__ is str or len(node.key) == 1: |
|
307 |
prefix = '' |
|
308 |
else: |
|
309 |
prefix = node.key[0] |
|
310 |
prefix_tips.setdefault(prefix, []).append(node) |
|
311 |
||
312 |
num_seen_children = dict.fromkeys(self._nodes, 0) |
|
313 |
||
314 |
result = [] |
|
315 |
for prefix in sorted(prefix_tips): |
|
316 |
pending = sorted(prefix_tips[prefix], key=lambda n:n.key, |
|
317 |
reverse=True) |
|
318 |
while pending: |
|
319 |
node = pending.pop() |
|
4641.5.6
by John Arbash Meinel
Add support for skipping ghost nodes. |
320 |
if node.parent_keys is None: |
321 |
# Ghost node, skip it
|
|
322 |
continue
|
|
4641.5.4
by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code. |
323 |
result.append(node.key) |
4641.5.5
by John Arbash Meinel
Implement initial gc_sort in pyrex. |
324 |
for parent_key in sorted(node.parent_keys, reverse=True): |
4641.5.4
by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code. |
325 |
parent_node = self._nodes[parent_key] |
326 |
seen_children = num_seen_children[parent_key] + 1 |
|
327 |
if seen_children == len(parent_node.child_keys): |
|
328 |
# All children have been processed, enqueue this parent
|
|
329 |
pending.append(parent_node) |
|
330 |
# This has been queued up, stop tracking it
|
|
331 |
del num_seen_children[parent_key] |
|
332 |
else: |
|
4641.5.5
by John Arbash Meinel
Implement initial gc_sort in pyrex. |
333 |
num_seen_children[parent_key] = seen_children |
4641.5.4
by John Arbash Meinel
Implement a basic algorithm for the pure-python KnownGraph code. |
334 |
return result |
4641.5.2
by John Arbash Meinel
Get ready to move the tests into KnownGraph implementation tests. |
335 |
|
4593.5.6
by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation. |
336 |
def merge_sort(self, tip_key): |
337 |
"""Compute the merge sorted graph output."""
|
|
4593.5.33
by John Arbash Meinel
Late import tsort because it causes an import cycle. |
338 |
from bzrlib import tsort |
4593.5.6
by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation. |
339 |
as_parent_map = dict((node.key, node.parent_keys) |
4593.5.25
by John Arbash Meinel
Add tests that we detect GraphCycleError |
340 |
for node in self._nodes.itervalues() |
341 |
if node.parent_keys is not None) |
|
4593.5.6
by John Arbash Meinel
Create the crude merge_sort implementation that just thunks over to the old implementation. |
342 |
# We intentionally always generate revnos and never force the
|
343 |
# mainline_revisions
|
|
4593.5.32
by John Arbash Meinel
With the new api, we don't return 'sequence_number' anymore. |
344 |
# Strip the sequence_number that merge_sort generates
|
4593.5.34
by John Arbash Meinel
Change the KnownGraph.merge_sort api. |
345 |
return [_MergeSortNode(key, merge_depth, revno, end_of_merge) |
346 |
for _, key, merge_depth, revno, end_of_merge |
|
347 |
in tsort.merge_sort(as_parent_map, tip_key, |
|
348 |
mainline_revisions=None, |
|
349 |
generate_revno=True)] |
|
4648.2.1
by Gary van der Merwe
Add get_parent_keys, and get_child_keys to KnownGraph. |
350 |
|
351 |
def get_parent_keys(self, key): |
|
352 |
"""Get the parents for a key
|
|
353 |
|
|
354 |
Returns a list containg the parents keys. If the key is a ghost,
|
|
355 |
None is returned. A KeyError will be raised if the key is not in
|
|
356 |
the graph.
|
|
357 |
|
|
358 |
:param keys: Key to check (eg revision_id)
|
|
359 |
:return: A list of parents
|
|
360 |
"""
|
|
361 |
return self._nodes[key].parent_keys |
|
362 |
||
363 |
def get_child_keys(self, key): |
|
364 |
"""Get the children for a key
|
|
365 |
|
|
366 |
Returns a list containg the children keys. A KeyError will be raised
|
|
367 |
if the key is not in the graph.
|
|
368 |
|
|
369 |
:param keys: Key to check (eg revision_id)
|
|
370 |
:return: A list of children
|
|
371 |
"""
|
|
372 |
return self._nodes[key].child_keys |