1
# Copyright (C) 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27
from bzrlib.deprecated_graph import (node_distances, select_farthest)
29
STEP_UNIQUE_SEARCHER_EVERY = 5
31
# DIAGRAM of terminology
41
# In this diagram, relative to G and H:
42
# A, B, C, D, E are common ancestors.
43
# C, D and E are border ancestors, because each has a non-common descendant.
44
# D and E are least common ancestors because none of their descendants are
46
# C is not a least common ancestor because its descendant, E, is a common
49
# The find_unique_lca algorithm will pick A in two steps:
50
# 1. find_lca('G', 'H') => ['D', 'E']
51
# 2. Since len(['D', 'E']) > 1, find_lca('D', 'E') => ['A']
54
class DictParentsProvider(object):
55
"""A parents provider for Graph objects."""
57
def __init__(self, ancestry):
58
self.ancestry = ancestry
61
return 'DictParentsProvider(%r)' % self.ancestry
63
def get_parent_map(self, keys):
64
"""See _StackedParentsProvider.get_parent_map"""
65
ancestry = self.ancestry
66
return dict((k, ancestry[k]) for k in keys if k in ancestry)
69
class _StackedParentsProvider(object):
71
def __init__(self, parent_providers):
72
self._parent_providers = parent_providers
75
return "_StackedParentsProvider(%r)" % self._parent_providers
77
def get_parent_map(self, keys):
78
"""Get a mapping of keys => parents
80
A dictionary is returned with an entry for each key present in this
81
source. If this source doesn't have information about a key, it should
84
[NULL_REVISION] is used as the parent of the first user-committed
85
revision. Its parent list is empty.
87
:param keys: An iterable returning keys to check (eg revision_ids)
88
:return: A dictionary mapping each key to its parents
92
for parents_provider in self._parent_providers:
93
new_found = parents_provider.get_parent_map(remaining)
94
found.update(new_found)
95
remaining.difference_update(new_found)
101
class CachingParentsProvider(object):
102
"""A parents provider which will cache the revision => parents in a dict.
104
This is useful for providers that have an expensive lookup.
107
def __init__(self, parent_provider):
108
self._real_provider = parent_provider
109
# Theoretically we could use an LRUCache here
113
return "%s(%r)" % (self.__class__.__name__, self._real_provider)
115
def get_parent_map(self, keys):
116
"""See _StackedParentsProvider.get_parent_map"""
118
# If the _real_provider doesn't have a key, we cache a value of None,
119
# which we then later use to realize we cannot provide a value for that
126
if value is not None:
127
parent_map[key] = value
132
new_parents = self._real_provider.get_parent_map(needed)
133
cache.update(new_parents)
134
parent_map.update(new_parents)
135
needed.difference_update(new_parents)
136
cache.update(dict.fromkeys(needed, None))
141
"""Provide incremental access to revision graphs.
143
This is the generic implementation; it is intended to be subclassed to
144
specialize it for other repository types.
147
def __init__(self, parents_provider):
148
"""Construct a Graph that uses several graphs as its input
150
This should not normally be invoked directly, because there may be
151
specialized implementations for particular repository types. See
152
Repository.get_graph().
154
:param parents_provider: An object providing a get_parent_map call
155
conforming to the behavior of
156
StackedParentsProvider.get_parent_map.
158
if getattr(parents_provider, 'get_parents', None) is not None:
159
self.get_parents = parents_provider.get_parents
160
if getattr(parents_provider, 'get_parent_map', None) is not None:
161
self.get_parent_map = parents_provider.get_parent_map
162
self._parents_provider = parents_provider
165
return 'Graph(%r)' % self._parents_provider
167
def find_lca(self, *revisions):
168
"""Determine the lowest common ancestors of the provided revisions
170
A lowest common ancestor is a common ancestor none of whose
171
descendants are common ancestors. In graphs, unlike trees, there may
172
be multiple lowest common ancestors.
174
This algorithm has two phases. Phase 1 identifies border ancestors,
175
and phase 2 filters border ancestors to determine lowest common
178
In phase 1, border ancestors are identified, using a breadth-first
179
search starting at the bottom of the graph. Searches are stopped
180
whenever a node or one of its descendants is determined to be common
182
In phase 2, the border ancestors are filtered to find the least
183
common ancestors. This is done by searching the ancestries of each
186
Phase 2 is perfomed on the principle that a border ancestor that is
187
not an ancestor of any other border ancestor is a least common
190
Searches are stopped when they find a node that is determined to be a
191
common ancestor of all border ancestors, because this shows that it
192
cannot be a descendant of any border ancestor.
194
The scaling of this operation should be proportional to
195
1. The number of uncommon ancestors
196
2. The number of border ancestors
197
3. The length of the shortest path between a border ancestor and an
198
ancestor of all border ancestors.
200
border_common, common, sides = self._find_border_ancestors(revisions)
201
# We may have common ancestors that can be reached from each other.
202
# - ask for the heads of them to filter it down to only ones that
203
# cannot be reached from each other - phase 2.
204
return self.heads(border_common)
206
def find_difference(self, left_revision, right_revision):
207
"""Determine the graph difference between two revisions"""
208
border, common, searchers = self._find_border_ancestors(
209
[left_revision, right_revision])
210
self._search_for_extra_common(common, searchers)
211
left = searchers[0].seen
212
right = searchers[1].seen
213
return (left.difference(right), right.difference(left))
215
def find_unique_ancestors(self, unique_revision, common_revisions):
216
"""Find the unique ancestors for a revision versus others.
218
This returns the ancestry of unique_revision, excluding all revisions
219
in the ancestry of common_revisions. If unique_revision is in the
220
ancestry, then the empty set will be returned.
222
:param unique_revision: The revision_id whose ancestry we are
224
XXX: Would this API be better if we allowed multiple revisions on
226
:param common_revisions: Revision_ids of ancestries to exclude.
227
:return: A set of revisions in the ancestry of unique_revision
229
if unique_revision in common_revisions:
232
# Algorithm description
233
# 1) Walk backwards from the unique node and all common nodes.
234
# 2) When a node is seen by both sides, stop searching it in the unique
235
# walker, include it in the common walker.
236
# 3) Stop searching when there are no nodes left for the unique walker.
237
# At this point, you have a maximal set of unique nodes. Some of
238
# them may actually be common, and you haven't reached them yet.
239
# 4) Start new searchers for the unique nodes, seeded with the
240
# information you have so far.
241
# 5) Continue searching, stopping the common searches when the search
242
# tip is an ancestor of all unique nodes.
243
# 6) Aggregate together unique searchers when they are searching the
244
# same tips. When all unique searchers are searching the same node,
245
# stop move it to a single 'all_unique_searcher'.
246
# 7) The 'all_unique_searcher' represents the very 'tip' of searching.
247
# Most of the time this produces very little important information.
248
# So don't step it as quickly as the other searchers.
249
# 8) Search is done when all common searchers have completed.
251
unique_searcher, common_searcher = self._find_initial_unique_nodes(
252
[unique_revision], common_revisions)
254
unique_nodes = unique_searcher.seen.difference(common_searcher.seen)
258
(all_unique_searcher,
259
unique_tip_searchers) = self._make_unique_searchers(unique_nodes,
260
unique_searcher, common_searcher)
262
self._refine_unique_nodes(unique_searcher, all_unique_searcher,
263
unique_tip_searchers, common_searcher)
264
true_unique_nodes = unique_nodes.difference(common_searcher.seen)
265
if 'graph' in debug.debug_flags:
266
trace.mutter('Found %d truly unique nodes out of %d',
267
len(true_unique_nodes), len(unique_nodes))
268
return true_unique_nodes
270
def _find_initial_unique_nodes(self, unique_revisions, common_revisions):
271
"""Steps 1-3 of find_unique_ancestors.
273
Find the maximal set of unique nodes. Some of these might actually
274
still be common, but we are sure that there are no other unique nodes.
276
:return: (unique_searcher, common_searcher)
279
unique_searcher = self._make_breadth_first_searcher(unique_revisions)
280
# we know that unique_revisions aren't in common_revisions, so skip
282
unique_searcher.next()
283
common_searcher = self._make_breadth_first_searcher(common_revisions)
285
# As long as we are still finding unique nodes, keep searching
286
while unique_searcher._next_query:
287
next_unique_nodes = set(unique_searcher.step())
288
next_common_nodes = set(common_searcher.step())
290
# Check if either searcher encounters new nodes seen by the other
292
unique_are_common_nodes = next_unique_nodes.intersection(
293
common_searcher.seen)
294
unique_are_common_nodes.update(
295
next_common_nodes.intersection(unique_searcher.seen))
296
if unique_are_common_nodes:
297
ancestors = unique_searcher.find_seen_ancestors(
298
unique_are_common_nodes)
299
# TODO: This is a bit overboard, we only really care about
300
# the ancestors of the tips because the rest we
301
# already know. This is *correct* but causes us to
302
# search too much ancestry.
303
ancestors.update(common_searcher.find_seen_ancestors(ancestors))
304
unique_searcher.stop_searching_any(ancestors)
305
common_searcher.start_searching(ancestors)
307
return unique_searcher, common_searcher
309
def _make_unique_searchers(self, unique_nodes, unique_searcher,
311
"""Create a searcher for all the unique search tips (step 4).
313
As a side effect, the common_searcher will stop searching any nodes
314
that are ancestors of the unique searcher tips.
316
:return: (all_unique_searcher, unique_tip_searchers)
318
unique_tips = self._remove_simple_descendants(unique_nodes,
319
self.get_parent_map(unique_nodes))
321
if len(unique_tips) == 1:
322
unique_tip_searchers = []
323
ancestor_all_unique = unique_searcher.find_seen_ancestors(unique_tips)
325
unique_tip_searchers = []
326
for tip in unique_tips:
327
revs_to_search = unique_searcher.find_seen_ancestors([tip])
328
revs_to_search.update(
329
common_searcher.find_seen_ancestors(revs_to_search))
330
searcher = self._make_breadth_first_searcher(revs_to_search)
331
# We don't care about the starting nodes.
332
searcher._label = tip
334
unique_tip_searchers.append(searcher)
336
ancestor_all_unique = None
337
for searcher in unique_tip_searchers:
338
if ancestor_all_unique is None:
339
ancestor_all_unique = set(searcher.seen)
341
ancestor_all_unique = ancestor_all_unique.intersection(
343
# Collapse all the common nodes into a single searcher
344
all_unique_searcher = self._make_breadth_first_searcher(
346
if ancestor_all_unique:
347
# We've seen these nodes in all the searchers, so we'll just go to
349
all_unique_searcher.step()
351
# Stop any search tips that are already known as ancestors of the
353
stopped_common = common_searcher.stop_searching_any(
354
common_searcher.find_seen_ancestors(ancestor_all_unique))
357
for searcher in unique_tip_searchers:
358
total_stopped += len(searcher.stop_searching_any(
359
searcher.find_seen_ancestors(ancestor_all_unique)))
360
if 'graph' in debug.debug_flags:
361
trace.mutter('For %d unique nodes, created %d + 1 unique searchers'
362
' (%d stopped search tips, %d common ancestors'
363
' (%d stopped common)',
364
len(unique_nodes), len(unique_tip_searchers),
365
total_stopped, len(ancestor_all_unique),
367
return all_unique_searcher, unique_tip_searchers
369
def _step_unique_and_common_searchers(self, common_searcher,
370
unique_tip_searchers,
372
"""Step all the searchers"""
373
newly_seen_common = set(common_searcher.step())
374
newly_seen_unique = set()
375
for searcher in unique_tip_searchers:
376
next = set(searcher.step())
377
next.update(unique_searcher.find_seen_ancestors(next))
378
next.update(common_searcher.find_seen_ancestors(next))
379
for alt_searcher in unique_tip_searchers:
380
if alt_searcher is searcher:
382
next.update(alt_searcher.find_seen_ancestors(next))
383
searcher.start_searching(next)
384
newly_seen_unique.update(next)
385
return newly_seen_common, newly_seen_unique
387
def _find_nodes_common_to_all_unique(self, unique_tip_searchers,
389
newly_seen_unique, step_all_unique):
390
"""Find nodes that are common to all unique_tip_searchers.
392
If it is time, step the all_unique_searcher, and add its nodes to the
395
common_to_all_unique_nodes = newly_seen_unique.copy()
396
for searcher in unique_tip_searchers:
397
common_to_all_unique_nodes.intersection_update(searcher.seen)
398
common_to_all_unique_nodes.intersection_update(
399
all_unique_searcher.seen)
400
# Step all-unique less frequently than the other searchers.
401
# In the common case, we don't need to spider out far here, so
402
# avoid doing extra work.
404
tstart = time.clock()
405
nodes = all_unique_searcher.step()
406
common_to_all_unique_nodes.update(nodes)
407
if 'graph' in debug.debug_flags:
408
tdelta = time.clock() - tstart
409
trace.mutter('all_unique_searcher step() took %.3fs'
410
'for %d nodes (%d total), iteration: %s',
411
tdelta, len(nodes), len(all_unique_searcher.seen),
412
all_unique_searcher._iterations)
413
return common_to_all_unique_nodes
415
def _collapse_unique_searchers(self, unique_tip_searchers,
416
common_to_all_unique_nodes):
417
"""Combine searchers that are searching the same tips.
419
When two searchers are searching the same tips, we can stop one of the
420
searchers. We also know that the maximal set of common ancestors is the
421
intersection of the two original searchers.
423
:return: A list of searchers that are searching unique nodes.
425
# Filter out searchers that don't actually search different
426
# nodes. We already have the ancestry intersection for them
427
unique_search_tips = {}
428
for searcher in unique_tip_searchers:
429
stopped = searcher.stop_searching_any(common_to_all_unique_nodes)
430
will_search_set = frozenset(searcher._next_query)
431
if not will_search_set:
432
if 'graph' in debug.debug_flags:
433
trace.mutter('Unique searcher %s was stopped.'
434
' (%s iterations) %d nodes stopped',
436
searcher._iterations,
438
elif will_search_set not in unique_search_tips:
439
# This searcher is searching a unique set of nodes, let it
440
unique_search_tips[will_search_set] = [searcher]
442
unique_search_tips[will_search_set].append(searcher)
443
# TODO: it might be possible to collapse searchers faster when they
444
# only have *some* search tips in common.
445
next_unique_searchers = []
446
for searchers in unique_search_tips.itervalues():
447
if len(searchers) == 1:
448
# Searching unique tips, go for it
449
next_unique_searchers.append(searchers[0])
451
# These searchers have started searching the same tips, we
452
# don't need them to cover the same ground. The
453
# intersection of their ancestry won't change, so create a
454
# new searcher, combining their histories.
455
next_searcher = searchers[0]
456
for searcher in searchers[1:]:
457
next_searcher.seen.intersection_update(searcher.seen)
458
if 'graph' in debug.debug_flags:
459
trace.mutter('Combining %d searchers into a single'
460
' searcher searching %d nodes with'
463
len(next_searcher._next_query),
464
len(next_searcher.seen))
465
next_unique_searchers.append(next_searcher)
466
return next_unique_searchers
468
def _refine_unique_nodes(self, unique_searcher, all_unique_searcher,
469
unique_tip_searchers, common_searcher):
470
"""Steps 5-8 of find_unique_ancestors.
472
This function returns when common_searcher has stopped searching for
475
# We step the ancestor_all_unique searcher only every
476
# STEP_UNIQUE_SEARCHER_EVERY steps.
477
step_all_unique_counter = 0
478
# While we still have common nodes to search
479
while common_searcher._next_query:
481
newly_seen_unique) = self._step_unique_and_common_searchers(
482
common_searcher, unique_tip_searchers, unique_searcher)
483
# These nodes are common ancestors of all unique nodes
484
common_to_all_unique_nodes = self._find_nodes_common_to_all_unique(
485
unique_tip_searchers, all_unique_searcher, newly_seen_unique,
486
step_all_unique_counter==0)
487
step_all_unique_counter = ((step_all_unique_counter + 1)
488
% STEP_UNIQUE_SEARCHER_EVERY)
490
if newly_seen_common:
491
# If a 'common' node is an ancestor of all unique searchers, we
492
# can stop searching it.
493
common_searcher.stop_searching_any(
494
all_unique_searcher.seen.intersection(newly_seen_common))
495
if common_to_all_unique_nodes:
496
common_to_all_unique_nodes.update(
497
common_searcher.find_seen_ancestors(
498
common_to_all_unique_nodes))
499
# The all_unique searcher can start searching the common nodes
500
# but everyone else can stop.
501
# This is the sort of thing where we would like to not have it
502
# start_searching all of the nodes, but only mark all of them
503
# as seen, and have it search only the actual tips. Otherwise
504
# it is another get_parent_map() traversal for it to figure out
505
# what we already should know.
506
all_unique_searcher.start_searching(common_to_all_unique_nodes)
507
common_searcher.stop_searching_any(common_to_all_unique_nodes)
509
next_unique_searchers = self._collapse_unique_searchers(
510
unique_tip_searchers, common_to_all_unique_nodes)
511
if len(unique_tip_searchers) != len(next_unique_searchers):
512
if 'graph' in debug.debug_flags:
513
trace.mutter('Collapsed %d unique searchers => %d'
515
len(unique_tip_searchers),
516
len(next_unique_searchers),
517
all_unique_searcher._iterations)
518
unique_tip_searchers = next_unique_searchers
520
@symbol_versioning.deprecated_method(symbol_versioning.one_one)
521
def get_parents(self, revisions):
522
"""Find revision ids of the parents of a list of revisions
524
A list is returned of the same length as the input. Each entry
525
is a list of parent ids for the corresponding input revision.
527
[NULL_REVISION] is used as the parent of the first user-committed
528
revision. Its parent list is empty.
530
If the revision is not present (i.e. a ghost), None is used in place
531
of the list of parents.
533
Deprecated in bzr 1.2 - please see get_parent_map.
535
parents = self.get_parent_map(revisions)
536
return [parents.get(r, None) for r in revisions]
538
def get_parent_map(self, revisions):
539
"""Get a map of key:parent_list for revisions.
541
This implementation delegates to get_parents, for old parent_providers
542
that do not supply get_parent_map.
545
for rev, parents in self.get_parents(revisions):
546
if parents is not None:
547
result[rev] = parents
550
def _make_breadth_first_searcher(self, revisions):
551
return _BreadthFirstSearcher(revisions, self)
553
def _find_border_ancestors(self, revisions):
554
"""Find common ancestors with at least one uncommon descendant.
556
Border ancestors are identified using a breadth-first
557
search starting at the bottom of the graph. Searches are stopped
558
whenever a node or one of its descendants is determined to be common.
560
This will scale with the number of uncommon ancestors.
562
As well as the border ancestors, a set of seen common ancestors and a
563
list of sets of seen ancestors for each input revision is returned.
564
This allows calculation of graph difference from the results of this
567
if None in revisions:
568
raise errors.InvalidRevisionId(None, self)
569
common_ancestors = set()
570
searchers = [self._make_breadth_first_searcher([r])
572
active_searchers = searchers[:]
573
border_ancestors = set()
577
for searcher in searchers:
578
new_ancestors = searcher.step()
580
newly_seen.update(new_ancestors)
582
for revision in newly_seen:
583
if revision in common_ancestors:
584
# Not a border ancestor because it was seen as common
586
new_common.add(revision)
588
for searcher in searchers:
589
if revision not in searcher.seen:
592
# This is a border because it is a first common that we see
593
# after walking for a while.
594
border_ancestors.add(revision)
595
new_common.add(revision)
597
for searcher in searchers:
598
new_common.update(searcher.find_seen_ancestors(new_common))
599
for searcher in searchers:
600
searcher.start_searching(new_common)
601
common_ancestors.update(new_common)
603
# Figure out what the searchers will be searching next, and if
604
# there is only 1 set being searched, then we are done searching,
605
# since all searchers would have to be searching the same data,
606
# thus it *must* be in common.
607
unique_search_sets = set()
608
for searcher in searchers:
609
will_search_set = frozenset(searcher._next_query)
610
if will_search_set not in unique_search_sets:
611
# This searcher is searching a unique set of nodes, let it
612
unique_search_sets.add(will_search_set)
614
if len(unique_search_sets) == 1:
615
nodes = unique_search_sets.pop()
616
uncommon_nodes = nodes.difference(common_ancestors)
618
raise AssertionError("Somehow we ended up converging"
619
" without actually marking them as"
622
"\nuncommon_nodes: %s"
623
% (revisions, uncommon_nodes))
625
return border_ancestors, common_ancestors, searchers
627
def heads(self, keys):
628
"""Return the heads from amongst keys.
630
This is done by searching the ancestries of each key. Any key that is
631
reachable from another key is not returned; all the others are.
633
This operation scales with the relative depth between any two keys. If
634
any two keys are completely disconnected all ancestry of both sides
637
:param keys: An iterable of keys.
638
:return: A set of the heads. Note that as a set there is no ordering
639
information. Callers will need to filter their input to create
640
order if they need it.
642
candidate_heads = set(keys)
643
if revision.NULL_REVISION in candidate_heads:
644
# NULL_REVISION is only a head if it is the only entry
645
candidate_heads.remove(revision.NULL_REVISION)
646
if not candidate_heads:
647
return set([revision.NULL_REVISION])
648
if len(candidate_heads) < 2:
649
return candidate_heads
650
searchers = dict((c, self._make_breadth_first_searcher([c]))
651
for c in candidate_heads)
652
active_searchers = dict(searchers)
653
# skip over the actual candidate for each searcher
654
for searcher in active_searchers.itervalues():
656
# The common walker finds nodes that are common to two or more of the
657
# input keys, so that we don't access all history when a currently
658
# uncommon search point actually meets up with something behind a
659
# common search point. Common search points do not keep searches
660
# active; they just allow us to make searches inactive without
661
# accessing all history.
662
common_walker = self._make_breadth_first_searcher([])
663
while len(active_searchers) > 0:
668
except StopIteration:
669
# No common points being searched at this time.
671
for candidate in active_searchers.keys():
673
searcher = active_searchers[candidate]
675
# rare case: we deleted candidate in a previous iteration
676
# through this for loop, because it was determined to be
677
# a descendant of another candidate.
680
ancestors.update(searcher.next())
681
except StopIteration:
682
del active_searchers[candidate]
684
# process found nodes
686
for ancestor in ancestors:
687
if ancestor in candidate_heads:
688
candidate_heads.remove(ancestor)
689
del searchers[ancestor]
690
if ancestor in active_searchers:
691
del active_searchers[ancestor]
692
# it may meet up with a known common node
693
if ancestor in common_walker.seen:
694
# some searcher has encountered our known common nodes:
696
ancestor_set = set([ancestor])
697
for searcher in searchers.itervalues():
698
searcher.stop_searching_any(ancestor_set)
700
# or it may have been just reached by all the searchers:
701
for searcher in searchers.itervalues():
702
if ancestor not in searcher.seen:
705
# The final active searcher has just reached this node,
706
# making it be known as a descendant of all candidates,
707
# so we can stop searching it, and any seen ancestors
708
new_common.add(ancestor)
709
for searcher in searchers.itervalues():
711
searcher.find_seen_ancestors([ancestor])
712
searcher.stop_searching_any(seen_ancestors)
713
common_walker.start_searching(new_common)
714
return candidate_heads
716
def find_unique_lca(self, left_revision, right_revision,
718
"""Find a unique LCA.
720
Find lowest common ancestors. If there is no unique common
721
ancestor, find the lowest common ancestors of those ancestors.
723
Iteration stops when a unique lowest common ancestor is found.
724
The graph origin is necessarily a unique lowest common ancestor.
726
Note that None is not an acceptable substitute for NULL_REVISION.
727
in the input for this method.
729
:param count_steps: If True, the return value will be a tuple of
730
(unique_lca, steps) where steps is the number of times that
731
find_lca was run. If False, only unique_lca is returned.
733
revisions = [left_revision, right_revision]
737
lca = self.find_lca(*revisions)
745
raise errors.NoCommonAncestor(left_revision, right_revision)
748
def iter_ancestry(self, revision_ids):
749
"""Iterate the ancestry of this revision.
751
:param revision_ids: Nodes to start the search
752
:return: Yield tuples mapping a revision_id to its parents for the
753
ancestry of revision_id.
754
Ghosts will be returned with None as their parents, and nodes
755
with no parents will have NULL_REVISION as their only parent. (As
756
defined by get_parent_map.)
757
There will also be a node for (NULL_REVISION, ())
759
pending = set(revision_ids)
762
processed.update(pending)
763
next_map = self.get_parent_map(pending)
765
for item in next_map.iteritems():
767
next_pending.update(p for p in item[1] if p not in processed)
768
ghosts = pending.difference(next_map)
771
pending = next_pending
773
def iter_topo_order(self, revisions):
774
"""Iterate through the input revisions in topological order.
776
This sorting only ensures that parents come before their children.
777
An ancestor may sort after a descendant if the relationship is not
778
visible in the supplied list of revisions.
780
sorter = tsort.TopoSorter(self.get_parent_map(revisions))
781
return sorter.iter_topo_order()
783
def is_ancestor(self, candidate_ancestor, candidate_descendant):
784
"""Determine whether a revision is an ancestor of another.
786
We answer this using heads() as heads() has the logic to perform the
787
smallest number of parent lookups to determine the ancestral
788
relationship between N revisions.
790
return set([candidate_descendant]) == self.heads(
791
[candidate_ancestor, candidate_descendant])
793
def _search_for_extra_common(self, common, searchers):
794
"""Make sure that unique nodes are genuinely unique.
796
After _find_border_ancestors, all nodes marked "common" are indeed
797
common. Some of the nodes considered unique are not, due to history
798
shortcuts stopping the searches early.
800
We know that we have searched enough when all common search tips are
801
descended from all unique (uncommon) nodes because we know that a node
802
cannot be an ancestor of its own ancestor.
804
:param common: A set of common nodes
805
:param searchers: The searchers returned from _find_border_ancestors
809
# A) The passed in searchers should all be on the same tips, thus
810
# they should be considered the "common" searchers.
811
# B) We find the difference between the searchers, these are the
812
# "unique" nodes for each side.
813
# C) We do a quick culling so that we only start searching from the
814
# more interesting unique nodes. (A unique ancestor is more
815
# interesting than any of its children.)
816
# D) We start searching for ancestors common to all unique nodes.
817
# E) We have the common searchers stop searching any ancestors of
819
# F) When there are no more common search tips, we stop
821
# TODO: We need a way to remove unique_searchers when they overlap with
822
# other unique searchers.
823
if len(searchers) != 2:
824
raise NotImplementedError(
825
"Algorithm not yet implemented for > 2 searchers")
826
common_searchers = searchers
827
left_searcher = searchers[0]
828
right_searcher = searchers[1]
829
unique = left_searcher.seen.symmetric_difference(right_searcher.seen)
830
if not unique: # No unique nodes, nothing to do
832
total_unique = len(unique)
833
unique = self._remove_simple_descendants(unique,
834
self.get_parent_map(unique))
835
simple_unique = len(unique)
837
unique_searchers = []
838
for revision_id in unique:
839
if revision_id in left_searcher.seen:
840
parent_searcher = left_searcher
842
parent_searcher = right_searcher
843
revs_to_search = parent_searcher.find_seen_ancestors([revision_id])
844
if not revs_to_search: # XXX: This shouldn't be possible
845
revs_to_search = [revision_id]
846
searcher = self._make_breadth_first_searcher(revs_to_search)
847
# We don't care about the starting nodes.
849
unique_searchers.append(searcher)
851
# possible todo: aggregate the common searchers into a single common
852
# searcher, just make sure that we include the nodes into the .seen
853
# properties of the original searchers
855
ancestor_all_unique = None
856
for searcher in unique_searchers:
857
if ancestor_all_unique is None:
858
ancestor_all_unique = set(searcher.seen)
860
ancestor_all_unique = ancestor_all_unique.intersection(
863
trace.mutter('Started %s unique searchers for %s unique revisions',
864
simple_unique, total_unique)
866
while True: # If we have no more nodes we have nothing to do
867
newly_seen_common = set()
868
for searcher in common_searchers:
869
newly_seen_common.update(searcher.step())
870
newly_seen_unique = set()
871
for searcher in unique_searchers:
872
newly_seen_unique.update(searcher.step())
873
new_common_unique = set()
874
for revision in newly_seen_unique:
875
for searcher in unique_searchers:
876
if revision not in searcher.seen:
879
# This is a border because it is a first common that we see
880
# after walking for a while.
881
new_common_unique.add(revision)
882
if newly_seen_common:
883
# These are nodes descended from one of the 'common' searchers.
884
# Make sure all searchers are on the same page
885
for searcher in common_searchers:
886
newly_seen_common.update(
887
searcher.find_seen_ancestors(newly_seen_common))
888
# We start searching the whole ancestry. It is a bit wasteful,
889
# though. We really just want to mark all of these nodes as
890
# 'seen' and then start just the tips. However, it requires a
891
# get_parent_map() call to figure out the tips anyway, and all
892
# redundant requests should be fairly fast.
893
for searcher in common_searchers:
894
searcher.start_searching(newly_seen_common)
896
# If a 'common' node is an ancestor of all unique searchers, we
897
# can stop searching it.
898
stop_searching_common = ancestor_all_unique.intersection(
900
if stop_searching_common:
901
for searcher in common_searchers:
902
searcher.stop_searching_any(stop_searching_common)
903
if new_common_unique:
904
# We found some ancestors that are common
905
for searcher in unique_searchers:
906
new_common_unique.update(
907
searcher.find_seen_ancestors(new_common_unique))
908
# Since these are common, we can grab another set of ancestors
910
for searcher in common_searchers:
911
new_common_unique.update(
912
searcher.find_seen_ancestors(new_common_unique))
914
# We can tell all of the unique searchers to start at these
915
# nodes, and tell all of the common searchers to *stop*
916
# searching these nodes
917
for searcher in unique_searchers:
918
searcher.start_searching(new_common_unique)
919
for searcher in common_searchers:
920
searcher.stop_searching_any(new_common_unique)
921
ancestor_all_unique.update(new_common_unique)
923
# Filter out searchers that don't actually search different
924
# nodes. We already have the ancestry intersection for them
925
next_unique_searchers = []
926
unique_search_sets = set()
927
for searcher in unique_searchers:
928
will_search_set = frozenset(searcher._next_query)
929
if will_search_set not in unique_search_sets:
930
# This searcher is searching a unique set of nodes, let it
931
unique_search_sets.add(will_search_set)
932
next_unique_searchers.append(searcher)
933
unique_searchers = next_unique_searchers
934
for searcher in common_searchers:
935
if searcher._next_query:
938
# All common searcher have stopped searching
941
def _remove_simple_descendants(self, revisions, parent_map):
942
"""remove revisions which are children of other ones in the set
944
This doesn't do any graph searching, it just checks the immediate
945
parent_map to find if there are any children which can be removed.
947
:param revisions: A set of revision_ids
948
:return: A set of revision_ids with the children removed
950
simple_ancestors = revisions.copy()
951
# TODO: jam 20071214 we *could* restrict it to searching only the
952
# parent_map of revisions already present in 'revisions', but
953
# considering the general use case, I think this is actually
956
# This is the same as the following loop. I don't know that it is any
958
## simple_ancestors.difference_update(r for r, p_ids in parent_map.iteritems()
959
## if p_ids is not None and revisions.intersection(p_ids))
960
## return simple_ancestors
962
# Yet Another Way, invert the parent map (which can be cached)
964
## for revision_id, parent_ids in parent_map.iteritems():
965
## for p_id in parent_ids:
966
## descendants.setdefault(p_id, []).append(revision_id)
967
## for revision in revisions.intersection(descendants):
968
## simple_ancestors.difference_update(descendants[revision])
969
## return simple_ancestors
970
for revision, parent_ids in parent_map.iteritems():
971
if parent_ids is None:
973
for parent_id in parent_ids:
974
if parent_id in revisions:
975
# This node has a parent present in the set, so we can
977
simple_ancestors.discard(revision)
979
return simple_ancestors
982
class HeadsCache(object):
983
"""A cache of results for graph heads calls."""
985
def __init__(self, graph):
989
def heads(self, keys):
990
"""Return the heads of keys.
992
This matches the API of Graph.heads(), specifically the return value is
993
a set which can be mutated, and ordering of the input is not preserved
996
:see also: Graph.heads.
997
:param keys: The keys to calculate heads for.
998
:return: A set containing the heads, which may be mutated without
999
affecting future lookups.
1001
keys = frozenset(keys)
1003
return set(self._heads[keys])
1005
heads = self.graph.heads(keys)
1006
self._heads[keys] = heads
1010
class FrozenHeadsCache(object):
1011
"""Cache heads() calls, assuming the caller won't modify them."""
1013
def __init__(self, graph):
1017
def heads(self, keys):
1018
"""Return the heads of keys.
1020
Similar to Graph.heads(). The main difference is that the return value
1021
is a frozen set which cannot be mutated.
1023
:see also: Graph.heads.
1024
:param keys: The keys to calculate heads for.
1025
:return: A frozenset containing the heads.
1027
keys = frozenset(keys)
1029
return self._heads[keys]
1031
heads = frozenset(self.graph.heads(keys))
1032
self._heads[keys] = heads
1035
def cache(self, keys, heads):
1036
"""Store a known value."""
1037
self._heads[frozenset(keys)] = frozenset(heads)
1040
class _BreadthFirstSearcher(object):
1041
"""Parallel search breadth-first the ancestry of revisions.
1043
This class implements the iterator protocol, but additionally
1044
1. provides a set of seen ancestors, and
1045
2. allows some ancestries to be unsearched, via stop_searching_any
1048
def __init__(self, revisions, parents_provider):
1049
self._iterations = 0
1050
self._next_query = set(revisions)
1052
self._started_keys = set(self._next_query)
1053
self._stopped_keys = set()
1054
self._parents_provider = parents_provider
1055
self._returning = 'next_with_ghosts'
1056
self._current_present = set()
1057
self._current_ghosts = set()
1058
self._current_parents = {}
1061
if self._iterations:
1062
prefix = "searching"
1065
search = '%s=%r' % (prefix, list(self._next_query))
1066
return ('_BreadthFirstSearcher(iterations=%d, %s,'
1067
' seen=%r)' % (self._iterations, search, list(self.seen)))
1069
def get_result(self):
1070
"""Get a SearchResult for the current state of this searcher.
1072
:return: A SearchResult for this search so far. The SearchResult is
1073
static - the search can be advanced and the search result will not
1074
be invalidated or altered.
1076
if self._returning == 'next':
1077
# We have to know the current nodes children to be able to list the
1078
# exclude keys for them. However, while we could have a second
1079
# look-ahead result buffer and shuffle things around, this method
1080
# is typically only called once per search - when memoising the
1081
# results of the search.
1082
found, ghosts, next, parents = self._do_query(self._next_query)
1083
# pretend we didn't query: perhaps we should tweak _do_query to be
1084
# entirely stateless?
1085
self.seen.difference_update(next)
1086
next_query = next.union(ghosts)
1088
next_query = self._next_query
1089
excludes = self._stopped_keys.union(next_query)
1090
included_keys = self.seen.difference(excludes)
1091
return SearchResult(self._started_keys, excludes, len(included_keys),
1097
except StopIteration:
1101
"""Return the next ancestors of this revision.
1103
Ancestors are returned in the order they are seen in a breadth-first
1104
traversal. No ancestor will be returned more than once. Ancestors are
1105
returned before their parentage is queried, so ghosts and missing
1106
revisions (including the start revisions) are included in the result.
1107
This can save a round trip in LCA style calculation by allowing
1108
convergence to be detected without reading the data for the revision
1109
the convergence occurs on.
1111
:return: A set of revision_ids.
1113
if self._returning != 'next':
1114
# switch to returning the query, not the results.
1115
self._returning = 'next'
1116
self._iterations += 1
1119
if len(self._next_query) == 0:
1120
raise StopIteration()
1121
# We have seen what we're querying at this point as we are returning
1122
# the query, not the results.
1123
self.seen.update(self._next_query)
1124
return self._next_query
1126
def next_with_ghosts(self):
1127
"""Return the next found ancestors, with ghosts split out.
1129
Ancestors are returned in the order they are seen in a breadth-first
1130
traversal. No ancestor will be returned more than once. Ancestors are
1131
returned only after asking for their parents, which allows us to detect
1132
which revisions are ghosts and which are not.
1134
:return: A tuple with (present ancestors, ghost ancestors) sets.
1136
if self._returning != 'next_with_ghosts':
1137
# switch to returning the results, not the current query.
1138
self._returning = 'next_with_ghosts'
1140
if len(self._next_query) == 0:
1141
raise StopIteration()
1143
return self._current_present, self._current_ghosts
1146
"""Advance the search.
1148
Updates self.seen, self._next_query, self._current_present,
1149
self._current_ghosts, self._current_parents and self._iterations.
1151
self._iterations += 1
1152
found, ghosts, next, parents = self._do_query(self._next_query)
1153
self._current_present = found
1154
self._current_ghosts = ghosts
1155
self._next_query = next
1156
self._current_parents = parents
1157
# ghosts are implicit stop points, otherwise the search cannot be
1158
# repeated when ghosts are filled.
1159
self._stopped_keys.update(ghosts)
1161
def _do_query(self, revisions):
1162
"""Query for revisions.
1164
Adds revisions to the seen set.
1166
:param revisions: Revisions to query.
1167
:return: A tuple: (set(found_revisions), set(ghost_revisions),
1168
set(parents_of_found_revisions), dict(found_revisions:parents)).
1170
found_revisions = set()
1171
parents_of_found = set()
1172
# revisions may contain nodes that point to other nodes in revisions:
1173
# we want to filter them out.
1174
self.seen.update(revisions)
1175
parent_map = self._parents_provider.get_parent_map(revisions)
1176
found_revisions.update(parent_map)
1177
for rev_id, parents in parent_map.iteritems():
1178
new_found_parents = [p for p in parents if p not in self.seen]
1179
if new_found_parents:
1180
# Calling set.update() with an empty generator is actually
1182
parents_of_found.update(new_found_parents)
1183
ghost_revisions = revisions - found_revisions
1184
return found_revisions, ghost_revisions, parents_of_found, parent_map
1189
def find_seen_ancestors(self, revisions):
1190
"""Find ancestors of these revisions that have already been seen.
1192
This function generally makes the assumption that querying for the
1193
parents of a node that has already been queried is reasonably cheap.
1194
(eg, not a round trip to a remote host).
1196
# TODO: Often we might ask one searcher for its seen ancestors, and
1197
# then ask another searcher the same question. This can result in
1198
# searching the same revisions repeatedly if the two searchers
1199
# have a lot of overlap.
1200
all_seen = self.seen
1201
pending = set(revisions).intersection(all_seen)
1202
seen_ancestors = set(pending)
1204
if self._returning == 'next':
1205
# self.seen contains what nodes have been returned, not what nodes
1206
# have been queried. We don't want to probe for nodes that haven't
1207
# been searched yet.
1208
not_searched_yet = self._next_query
1210
not_searched_yet = ()
1211
pending.difference_update(not_searched_yet)
1212
get_parent_map = self._parents_provider.get_parent_map
1214
parent_map = get_parent_map(pending)
1216
# We don't care if it is a ghost, since it can't be seen if it is
1218
for parent_ids in parent_map.itervalues():
1219
all_parents.extend(parent_ids)
1220
next_pending = all_seen.intersection(all_parents).difference(seen_ancestors)
1221
seen_ancestors.update(next_pending)
1222
next_pending.difference_update(not_searched_yet)
1223
pending = next_pending
1225
return seen_ancestors
1227
def stop_searching_any(self, revisions):
1229
Remove any of the specified revisions from the search list.
1231
None of the specified revisions are required to be present in the
1232
search list. In this case, the call is a no-op.
1234
# TODO: does this help performance?
1237
revisions = frozenset(revisions)
1238
if self._returning == 'next':
1239
stopped = self._next_query.intersection(revisions)
1240
self._next_query = self._next_query.difference(revisions)
1242
stopped_present = self._current_present.intersection(revisions)
1243
stopped = stopped_present.union(
1244
self._current_ghosts.intersection(revisions))
1245
self._current_present.difference_update(stopped)
1246
self._current_ghosts.difference_update(stopped)
1247
# stopping 'x' should stop returning parents of 'x', but
1248
# not if 'y' always references those same parents
1249
stop_rev_references = {}
1250
for rev in stopped_present:
1251
for parent_id in self._current_parents[rev]:
1252
if parent_id not in stop_rev_references:
1253
stop_rev_references[parent_id] = 0
1254
stop_rev_references[parent_id] += 1
1255
# if only the stopped revisions reference it, the ref count will be
1257
for parents in self._current_parents.itervalues():
1258
for parent_id in parents:
1260
stop_rev_references[parent_id] -= 1
1263
stop_parents = set()
1264
for rev_id, refs in stop_rev_references.iteritems():
1266
stop_parents.add(rev_id)
1267
self._next_query.difference_update(stop_parents)
1268
self._stopped_keys.update(stopped)
1271
def start_searching(self, revisions):
1272
"""Add revisions to the search.
1274
The parents of revisions will be returned from the next call to next()
1275
or next_with_ghosts(). If next_with_ghosts was the most recently used
1276
next* call then the return value is the result of looking up the
1277
ghost/not ghost status of revisions. (A tuple (present, ghosted)).
1279
revisions = frozenset(revisions)
1280
self._started_keys.update(revisions)
1281
new_revisions = revisions.difference(self.seen)
1282
if self._returning == 'next':
1283
self._next_query.update(new_revisions)
1284
self.seen.update(new_revisions)
1286
# perform a query on revisions
1287
revs, ghosts, query, parents = self._do_query(revisions)
1288
self._stopped_keys.update(ghosts)
1289
self._current_present.update(revs)
1290
self._current_ghosts.update(ghosts)
1291
self._next_query.update(query)
1292
self._current_parents.update(parents)
1296
class SearchResult(object):
1297
"""The result of a breadth first search.
1299
A SearchResult provides the ability to reconstruct the search or access a
1300
set of the keys the search found.
1303
def __init__(self, start_keys, exclude_keys, key_count, keys):
1304
"""Create a SearchResult.
1306
:param start_keys: The keys the search started at.
1307
:param exclude_keys: The keys the search excludes.
1308
:param key_count: The total number of keys (from start to but not
1310
:param keys: The keys the search found. Note that in future we may get
1311
a SearchResult from a smart server, in which case the keys list is
1312
not necessarily immediately available.
1314
self._recipe = (start_keys, exclude_keys, key_count)
1315
self._keys = frozenset(keys)
1317
def get_recipe(self):
1318
"""Return a recipe that can be used to replay this search.
1320
The recipe allows reconstruction of the same results at a later date
1321
without knowing all the found keys. The essential elements are a list
1322
of keys to start and and to stop at. In order to give reproducible
1323
results when ghosts are encountered by a search they are automatically
1324
added to the exclude list (or else ghost filling may alter the
1327
:return: A tuple (start_keys_set, exclude_keys_set, revision_count). To
1328
recreate the results of this search, create a breadth first
1329
searcher on the same graph starting at start_keys. Then call next()
1330
(or next_with_ghosts()) repeatedly, and on every result, call
1331
stop_searching_any on any keys from the exclude_keys set. The
1332
revision_count value acts as a trivial cross-check - the found
1333
revisions of the new search should have as many elements as
1334
revision_count. If it does not, then additional revisions have been
1335
ghosted since the search was executed the first time and the second
1341
"""Return the keys found in this search.
1343
:return: A set of keys.