1
# Copyright (C) 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from bzrlib.deprecated_graph import (node_distances, select_farthest)
18
from bzrlib.revision import NULL_REVISION
20
# DIAGRAM of terminology
30
# In this diagram, relative to G and H:
31
# A, B, C, D, E are common ancestors.
32
# C, D and E are border ancestors, because each has a non-common descendant.
33
# D and E are least common ancestors because none of their descendants are
35
# C is not a least common ancestor because its descendant, E, is a common
38
# The find_unique_lca algorithm will pick A in two steps:
39
# 1. find_lca('G', 'H') => ['D', 'E']
40
# 2. Since len(['D', 'E']) > 1, find_lca('D', 'E') => ['A']
44
class _StackedParentsProvider(object):
46
def __init__(self, parent_providers):
47
self._parent_providers = parent_providers
49
def get_parents(self, revision_ids):
50
"""Find revision ids of the parents of a list of revisions
52
A list is returned of the same length as the input. Each entry
53
is a list of parent ids for the corresponding input revision.
55
[NULL_REVISION] is used as the parent of the first user-committed
56
revision. Its parent list is empty.
58
If the revision is not present (i.e. a ghost), None is used in place
59
of the list of parents.
62
for parents_provider in self._parent_providers:
63
pending_revisions = [r for r in revision_ids if r not in found]
64
parent_list = parents_provider.get_parents(pending_revisions)
65
new_found = dict((k, v) for k, v in zip(pending_revisions,
66
parent_list) if v is not None)
67
found.update(new_found)
68
if len(found) == len(revision_ids):
70
return [found.get(r, None) for r in revision_ids]
74
"""Provide incremental access to revision graphs.
76
This is the generic implementation; it is intended to be subclassed to
77
specialize it for other repository types.
80
def __init__(self, parents_provider):
81
"""Construct a Graph that uses several graphs as its input
83
This should not normally be invoked directly, because there may be
84
specialized implementations for particular repository types. See
85
Repository.get_graph()
87
:param parents_func: an object providing a get_parents call
88
conforming to the behavior of StackedParentsProvider.get_parents
90
self.get_parents = parents_provider.get_parents
92
def find_lca(self, *revisions):
93
"""Determine the lowest common ancestors of the provided revisions
95
A lowest common ancestor is a common ancestor none of whose
96
descendants are common ancestors. In graphs, unlike trees, there may
97
be multiple lowest common ancestors.
99
This algorithm has two phases. Phase 1 identifies border ancestors,
100
and phase 2 filters border ancestors to determine lowest common
103
In phase 1, border ancestors are identified, using a breadth-first
104
search starting at the bottom of the graph. Searches are stopped
105
whenever a node or one of its descendants is determined to be common
107
In phase 2, the border ancestors are filtered to find the least
108
common ancestors. This is done by searching the ancestries of each
111
Phase 2 is perfomed on the principle that a border ancestor that is
112
not an ancestor of any other border ancestor is a least common
115
Searches are stopped when they find a node that is determined to be a
116
common ancestor of all border ancestors, because this shows that it
117
cannot be a descendant of any border ancestor.
119
The scaling of this operation should be proportional to
120
1. The number of uncommon ancestors
121
2. The number of border ancestors
122
3. The length of the shortest path between a border ancestor and an
123
ancestor of all border ancestors.
125
border_common, common, sides = self._find_border_ancestors(revisions)
126
return self._filter_candidate_lca(border_common)
128
def find_difference(self, left_revision, right_revision):
129
"""Determine the graph difference between two revisions"""
130
border, common, (left, right) = self._find_border_ancestors(
131
[left_revision, right_revision])
132
return (left.difference(right).difference(common),
133
right.difference(left).difference(common))
135
def _make_breadth_first_searcher(self, revisions):
136
return _BreadthFirstSearcher(revisions, self)
138
def _find_border_ancestors(self, revisions):
139
"""Find common ancestors with at least one uncommon descendant.
141
Border ancestors are identified using a breadth-first
142
search starting at the bottom of the graph. Searches are stopped
143
whenever a node or one of its descendants is determined to be common.
145
This will scale with the number of uncommon ancestors.
147
As well as the border ancestors, a set of seen common ancestors and a
148
list of sets of seen ancestors for each input revision is returned.
149
This allows calculation of graph difference from the results of this
152
common_searcher = self._make_breadth_first_searcher([])
153
common_ancestors = set()
154
searchers = [self._make_breadth_first_searcher([r])
156
active_searchers = searchers[:]
157
border_ancestors = set()
158
def update_common(searcher, revisions):
159
w_seen_ancestors = searcher.find_seen_ancestors(
161
stopped = searcher.stop_searching_any(w_seen_ancestors)
162
common_ancestors.update(w_seen_ancestors)
163
common_searcher.start_searching(stopped)
166
if len(active_searchers) == 0:
167
return border_ancestors, common_ancestors, [s.seen for s in
170
new_common = common_searcher.next()
171
common_ancestors.update(new_common)
172
except StopIteration:
175
for searcher in active_searchers:
176
for revision in new_common.intersection(searcher.seen):
177
update_common(searcher, revision)
180
new_active_searchers = []
181
for searcher in active_searchers:
183
newly_seen.update(searcher.next())
184
except StopIteration:
187
new_active_searchers.append(searcher)
188
active_searchers = new_active_searchers
189
for revision in newly_seen:
190
if revision in common_ancestors:
191
for searcher in searchers:
192
update_common(searcher, revision)
194
for searcher in searchers:
195
if revision not in searcher.seen:
198
border_ancestors.add(revision)
199
for searcher in searchers:
200
update_common(searcher, revision)
202
def _filter_candidate_lca(self, candidate_lca):
203
"""Remove candidates which are ancestors of other candidates.
205
This is done by searching the ancestries of each border ancestor. It
206
is perfomed on the principle that a border ancestor that is not an
207
ancestor of any other border ancestor is a lowest common ancestor.
209
Searches are stopped when they find a node that is determined to be a
210
common ancestor of all border ancestors, because this shows that it
211
cannot be a descendant of any border ancestor.
213
This will scale with the number of candidate ancestors and the length
214
of the shortest path from a candidate to an ancestor common to all
217
searchers = dict((c, self._make_breadth_first_searcher([c]))
218
for c in candidate_lca)
219
active_searchers = dict(searchers)
220
# skip over the actual candidate for each searcher
221
for searcher in active_searchers.itervalues():
223
while len(active_searchers) > 0:
224
for candidate, searcher in list(active_searchers.iteritems()):
226
ancestors = searcher.next()
227
except StopIteration:
228
del active_searchers[candidate]
230
for ancestor in ancestors:
231
if ancestor in candidate_lca:
232
candidate_lca.remove(ancestor)
233
del searchers[ancestor]
234
if ancestor in active_searchers:
235
del active_searchers[ancestor]
236
for searcher in searchers.itervalues():
237
if ancestor not in searcher.seen:
240
# if this revision was seen by all searchers, then it
241
# is a descendant of all candidates, so we can stop
242
# searching it, and any seen ancestors
243
for searcher in searchers.itervalues():
245
searcher.find_seen_ancestors(ancestor)
246
searcher.stop_searching_any(seen_ancestors)
249
def find_unique_lca(self, left_revision, right_revision):
250
"""Find a unique LCA.
252
Find lowest common ancestors. If there is no unique common
253
ancestor, find the lowest common ancestors of those ancestors.
255
Iteration stops when a unique lowest common ancestor is found.
256
The graph origin is necessarily a unique lowest common ancestor.
258
Note that None is not an acceptable substitute for NULL_REVISION.
259
in the input for this method.
261
revisions = [left_revision, right_revision]
263
lca = self.find_lca(*revisions)
269
class _BreadthFirstSearcher(object):
270
"""Parallel search the breadth-first the ancestry of revisions.
272
This class implements the iterator protocol, but additionally
273
1. provides a set of seen ancestors, and
274
2. allows some ancestries to be unsearched, via stop_searching_any
277
def __init__(self, revisions, parents_provider):
278
self._start = set(revisions)
279
self._search_revisions = None
280
self.seen = set(revisions)
281
self._parents_provider = parents_provider
284
return ('_BreadthFirstSearcher(self._search_revisions=%r,'
285
' self.seen=%r)' % (self._search_revisions, self.seen))
288
"""Return the next ancestors of this revision.
290
Ancestors are returned in the order they are seen in a breadth-first
291
traversal. No ancestor will be returned more than once.
293
if self._search_revisions is None:
294
self._search_revisions = self._start
296
new_search_revisions = set()
297
for parents in self._parents_provider.get_parents(
298
self._search_revisions):
301
new_search_revisions.update(p for p in parents if
303
self._search_revisions = new_search_revisions
304
if len(self._search_revisions) == 0:
305
raise StopIteration()
306
self.seen.update(self._search_revisions)
307
return self._search_revisions
312
def find_seen_ancestors(self, revision):
313
"""Find ancestors of this revision that have already been seen."""
314
searcher = _BreadthFirstSearcher([revision], self._parents_provider)
315
seen_ancestors = set()
316
for ancestors in searcher:
317
for ancestor in ancestors:
318
if ancestor not in self.seen:
319
searcher.stop_searching_any([ancestor])
321
seen_ancestors.add(ancestor)
322
return seen_ancestors
324
def stop_searching_any(self, revisions):
326
Remove any of the specified revisions from the search list.
328
None of the specified revisions are required to be present in the
329
search list. In this case, the call is a no-op.
331
stopped = self._search_revisions.intersection(revisions)
332
self._search_revisions = self._search_revisions.difference(revisions)
335
def start_searching(self, revisions):
336
if self._search_revisions is None:
337
self._start = set(revisions)
339
self._search_revisions.update(revisions.difference(self.seen))
340
self.seen.update(revisions)