1
# Copyright (C) 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from bzrlib import errors
18
from bzrlib.deprecated_graph import (node_distances, select_farthest)
19
from bzrlib.revision import NULL_REVISION
21
# DIAGRAM of terminology
31
# In this diagram, relative to G and H:
32
# A, B, C, D, E are common ancestors.
33
# C, D and E are border ancestors, because each has a non-common descendant.
34
# D and E are least common ancestors because none of their descendants are
36
# C is not a least common ancestor because its descendant, E, is a common
39
# The find_unique_lca algorithm will pick A in two steps:
40
# 1. find_lca('G', 'H') => ['D', 'E']
41
# 2. Since len(['D', 'E']) > 1, find_lca('D', 'E') => ['A']
45
class _StackedParentsProvider(object):
47
def __init__(self, parent_providers):
48
self._parent_providers = parent_providers
51
return "_StackedParentsProvider(%r)" % self._parent_providers
53
def get_parents(self, revision_ids):
54
"""Find revision ids of the parents of a list of revisions
56
A list is returned of the same length as the input. Each entry
57
is a list of parent ids for the corresponding input revision.
59
[NULL_REVISION] is used as the parent of the first user-committed
60
revision. Its parent list is empty.
62
If the revision is not present (i.e. a ghost), None is used in place
63
of the list of parents.
66
for parents_provider in self._parent_providers:
67
pending_revisions = [r for r in revision_ids if r not in found]
68
parent_list = parents_provider.get_parents(pending_revisions)
69
new_found = dict((k, v) for k, v in zip(pending_revisions,
70
parent_list) if v is not None)
71
found.update(new_found)
72
if len(found) == len(revision_ids):
74
return [found.get(r, None) for r in revision_ids]
78
"""Provide incremental access to revision graphs.
80
This is the generic implementation; it is intended to be subclassed to
81
specialize it for other repository types.
84
def __init__(self, parents_provider):
85
"""Construct a Graph that uses several graphs as its input
87
This should not normally be invoked directly, because there may be
88
specialized implementations for particular repository types. See
89
Repository.get_graph()
91
:param parents_func: an object providing a get_parents call
92
conforming to the behavior of StackedParentsProvider.get_parents
94
self.get_parents = parents_provider.get_parents
95
self._parents_provider = parents_provider
98
return 'Graph(%r)' % self._parents_provider
100
def find_lca(self, *revisions):
101
"""Determine the lowest common ancestors of the provided revisions
103
A lowest common ancestor is a common ancestor none of whose
104
descendants are common ancestors. In graphs, unlike trees, there may
105
be multiple lowest common ancestors.
107
This algorithm has two phases. Phase 1 identifies border ancestors,
108
and phase 2 filters border ancestors to determine lowest common
111
In phase 1, border ancestors are identified, using a breadth-first
112
search starting at the bottom of the graph. Searches are stopped
113
whenever a node or one of its descendants is determined to be common
115
In phase 2, the border ancestors are filtered to find the least
116
common ancestors. This is done by searching the ancestries of each
119
Phase 2 is perfomed on the principle that a border ancestor that is
120
not an ancestor of any other border ancestor is a least common
123
Searches are stopped when they find a node that is determined to be a
124
common ancestor of all border ancestors, because this shows that it
125
cannot be a descendant of any border ancestor.
127
The scaling of this operation should be proportional to
128
1. The number of uncommon ancestors
129
2. The number of border ancestors
130
3. The length of the shortest path between a border ancestor and an
131
ancestor of all border ancestors.
133
border_common, common, sides = self._find_border_ancestors(revisions)
134
return self._filter_candidate_lca(border_common)
136
def find_difference(self, left_revision, right_revision):
137
"""Determine the graph difference between two revisions"""
138
border, common, (left, right) = self._find_border_ancestors(
139
[left_revision, right_revision])
140
return (left.difference(right).difference(common),
141
right.difference(left).difference(common))
143
def _make_breadth_first_searcher(self, revisions):
144
return _BreadthFirstSearcher(revisions, self)
146
def _find_border_ancestors(self, revisions):
147
"""Find common ancestors with at least one uncommon descendant.
149
Border ancestors are identified using a breadth-first
150
search starting at the bottom of the graph. Searches are stopped
151
whenever a node or one of its descendants is determined to be common.
153
This will scale with the number of uncommon ancestors.
155
As well as the border ancestors, a set of seen common ancestors and a
156
list of sets of seen ancestors for each input revision is returned.
157
This allows calculation of graph difference from the results of this
160
if None in revisions:
161
raise errors.InvalidRevisionId(None, self)
162
common_searcher = self._make_breadth_first_searcher([])
163
common_ancestors = set()
164
searchers = [self._make_breadth_first_searcher([r])
166
active_searchers = searchers[:]
167
border_ancestors = set()
168
def update_common(searcher, revisions):
169
w_seen_ancestors = searcher.find_seen_ancestors(
171
stopped = searcher.stop_searching_any(w_seen_ancestors)
172
common_ancestors.update(w_seen_ancestors)
173
common_searcher.start_searching(stopped)
176
if len(active_searchers) == 0:
177
return border_ancestors, common_ancestors, [s.seen for s in
180
new_common = common_searcher.next()
181
common_ancestors.update(new_common)
182
except StopIteration:
185
for searcher in active_searchers:
186
for revision in new_common.intersection(searcher.seen):
187
update_common(searcher, revision)
190
new_active_searchers = []
191
for searcher in active_searchers:
193
newly_seen.update(searcher.next())
194
except StopIteration:
197
new_active_searchers.append(searcher)
198
active_searchers = new_active_searchers
199
for revision in newly_seen:
200
if revision in common_ancestors:
201
for searcher in searchers:
202
update_common(searcher, revision)
204
for searcher in searchers:
205
if revision not in searcher.seen:
208
border_ancestors.add(revision)
209
for searcher in searchers:
210
update_common(searcher, revision)
212
def _filter_candidate_lca(self, candidate_lca):
213
"""Remove candidates which are ancestors of other candidates.
215
This is done by searching the ancestries of each border ancestor. It
216
is perfomed on the principle that a border ancestor that is not an
217
ancestor of any other border ancestor is a lowest common ancestor.
219
Searches are stopped when they find a node that is determined to be a
220
common ancestor of all border ancestors, because this shows that it
221
cannot be a descendant of any border ancestor.
223
This will scale with the number of candidate ancestors and the length
224
of the shortest path from a candidate to an ancestor common to all
227
searchers = dict((c, self._make_breadth_first_searcher([c]))
228
for c in candidate_lca)
229
active_searchers = dict(searchers)
230
# skip over the actual candidate for each searcher
231
for searcher in active_searchers.itervalues():
233
while len(active_searchers) > 0:
234
for candidate, searcher in list(active_searchers.iteritems()):
236
ancestors = searcher.next()
237
except StopIteration:
238
del active_searchers[candidate]
240
for ancestor in ancestors:
241
if ancestor in candidate_lca:
242
candidate_lca.remove(ancestor)
243
del searchers[ancestor]
244
if ancestor in active_searchers:
245
del active_searchers[ancestor]
246
for searcher in searchers.itervalues():
247
if ancestor not in searcher.seen:
250
# if this revision was seen by all searchers, then it
251
# is a descendant of all candidates, so we can stop
252
# searching it, and any seen ancestors
253
for searcher in searchers.itervalues():
255
searcher.find_seen_ancestors(ancestor)
256
searcher.stop_searching_any(seen_ancestors)
259
def find_unique_lca(self, left_revision, right_revision):
260
"""Find a unique LCA.
262
Find lowest common ancestors. If there is no unique common
263
ancestor, find the lowest common ancestors of those ancestors.
265
Iteration stops when a unique lowest common ancestor is found.
266
The graph origin is necessarily a unique lowest common ancestor.
268
Note that None is not an acceptable substitute for NULL_REVISION.
269
in the input for this method.
271
revisions = [left_revision, right_revision]
273
lca = self.find_lca(*revisions)
279
class _BreadthFirstSearcher(object):
280
"""Parallel search the breadth-first the ancestry of revisions.
282
This class implements the iterator protocol, but additionally
283
1. provides a set of seen ancestors, and
284
2. allows some ancestries to be unsearched, via stop_searching_any
287
def __init__(self, revisions, parents_provider):
288
self._start = set(revisions)
289
self._search_revisions = None
290
self.seen = set(revisions)
291
self._parents_provider = parents_provider
294
return ('_BreadthFirstSearcher(self._search_revisions=%r,'
295
' self.seen=%r)' % (self._search_revisions, self.seen))
298
"""Return the next ancestors of this revision.
300
Ancestors are returned in the order they are seen in a breadth-first
301
traversal. No ancestor will be returned more than once.
303
if self._search_revisions is None:
304
self._search_revisions = self._start
306
new_search_revisions = set()
307
for parents in self._parents_provider.get_parents(
308
self._search_revisions):
311
new_search_revisions.update(p for p in parents if
313
self._search_revisions = new_search_revisions
314
if len(self._search_revisions) == 0:
315
raise StopIteration()
316
self.seen.update(self._search_revisions)
317
return self._search_revisions
322
def find_seen_ancestors(self, revision):
323
"""Find ancestors of this revision that have already been seen."""
324
searcher = _BreadthFirstSearcher([revision], self._parents_provider)
325
seen_ancestors = set()
326
for ancestors in searcher:
327
for ancestor in ancestors:
328
if ancestor not in self.seen:
329
searcher.stop_searching_any([ancestor])
331
seen_ancestors.add(ancestor)
332
return seen_ancestors
334
def stop_searching_any(self, revisions):
336
Remove any of the specified revisions from the search list.
338
None of the specified revisions are required to be present in the
339
search list. In this case, the call is a no-op.
341
stopped = self._search_revisions.intersection(revisions)
342
self._search_revisions = self._search_revisions.difference(revisions)
345
def start_searching(self, revisions):
346
if self._search_revisions is None:
347
self._start = set(revisions)
349
self._search_revisions.update(revisions.difference(self.seen))
350
self.seen.update(revisions)