539
508
def __init__(self, revisions, parents_provider):
541
self._next_query = set(revisions)
543
self._started_keys = set(self._next_query)
544
self._stopped_keys = set()
509
self._start = set(revisions)
510
self._search_revisions = None
511
self.seen = set(revisions)
545
512
self._parents_provider = parents_provider
546
self._returning = 'next_with_ghosts'
547
self._current_present = set()
548
self._current_ghosts = set()
549
self._current_parents = {}
551
514
def __repr__(self):
556
search = '%s=%r' % (prefix, list(self._next_query))
557
return ('_BreadthFirstSearcher(iterations=%d, %s,'
558
' seen=%r)' % (self._iterations, search, list(self.seen)))
560
def get_result(self):
561
"""Get a SearchResult for the current state of this searcher.
563
:return: A SearchResult for this search so far. The SearchResult is
564
static - the search can be advanced and the search result will not
565
be invalidated or altered.
567
if self._returning == 'next':
568
# We have to know the current nodes children to be able to list the
569
# exclude keys for them. However, while we could have a second
570
# look-ahead result buffer and shuffle things around, this method
571
# is typically only called once per search - when memoising the
572
# results of the search.
573
found, ghosts, next, parents = self._do_query(self._next_query)
574
# pretend we didn't query: perhaps we should tweak _do_query to be
575
# entirely stateless?
576
self.seen.difference_update(next)
577
next_query = next.union(ghosts)
579
next_query = self._next_query
580
excludes = self._stopped_keys.union(next_query)
581
included_keys = self.seen.difference(excludes)
582
return SearchResult(self._started_keys, excludes, len(included_keys),
515
if self._search_revisions is not None:
516
search = 'searching=%r' % (list(self._search_revisions),)
518
search = 'starting=%r' % (list(self._start),)
519
return ('_BreadthFirstSearcher(%s,'
520
' seen=%r)' % (search, list(self.seen)))
586
523
"""Return the next ancestors of this revision.
588
525
Ancestors are returned in the order they are seen in a breadth-first
589
traversal. No ancestor will be returned more than once. Ancestors are
590
returned before their parentage is queried, so ghosts and missing
591
revisions (including the start revisions) are included in the result.
592
This can save a round trip in LCA style calculation by allowing
593
convergence to be detected without reading the data for the revision
594
the convergence occurs on.
596
:return: A set of revision_ids.
526
traversal. No ancestor will be returned more than once.
598
if self._returning != 'next':
599
# switch to returning the query, not the results.
600
self._returning = 'next'
601
self._iterations += 1
528
if self._search_revisions is None:
529
self._search_revisions = self._start
604
if len(self._next_query) == 0:
605
raise StopIteration()
606
# We have seen what we're querying at this point as we are returning
607
# the query, not the results.
608
self.seen.update(self._next_query)
609
return self._next_query
611
def next_with_ghosts(self):
612
"""Return the next found ancestors, with ghosts split out.
614
Ancestors are returned in the order they are seen in a breadth-first
615
traversal. No ancestor will be returned more than once. Ancestors are
616
returned only after asking for their parents, which allows us to detect
617
which revisions are ghosts and which are not.
619
:return: A tuple with (present ancestors, ghost ancestors) sets.
621
if self._returning != 'next_with_ghosts':
622
# switch to returning the results, not the current query.
623
self._returning = 'next_with_ghosts'
625
if len(self._next_query) == 0:
626
raise StopIteration()
628
return self._current_present, self._current_ghosts
631
"""Advance the search.
633
Updates self.seen, self._next_query, self._current_present,
634
self._current_ghosts, self._current_parents and self._iterations.
636
self._iterations += 1
637
found, ghosts, next, parents = self._do_query(self._next_query)
638
self._current_present = found
639
self._current_ghosts = ghosts
640
self._next_query = next
641
self._current_parents = parents
642
# ghosts are implicit stop points, otherwise the search cannot be
643
# repeated when ghosts are filled.
644
self._stopped_keys.update(ghosts)
646
def _do_query(self, revisions):
647
"""Query for revisions.
649
Adds revisions to the seen set.
651
:param revisions: Revisions to query.
652
:return: A tuple: (set(found_revisions), set(ghost_revisions),
653
set(parents_of_found_revisions), dict(found_revisions:parents)).
655
found_parents = set()
656
parents_of_found = set()
657
# revisions may contain nodes that point to other nodes in revisions:
658
# we want to filter them out.
659
self.seen.update(revisions)
660
parent_map = self._parents_provider.get_parent_map(revisions)
661
for rev_id, parents in parent_map.iteritems():
662
found_parents.add(rev_id)
663
parents_of_found.update(p for p in parents if p not in self.seen)
664
ghost_parents = revisions - found_parents
665
return found_parents, ghost_parents, parents_of_found, parent_map
531
new_search_revisions = set()
532
parent_map = self._parents_provider.get_parent_map(
533
self._search_revisions)
534
for parents in parent_map.itervalues():
535
new_search_revisions.update(p for p in parents if
537
self._search_revisions = new_search_revisions
538
if len(self._search_revisions) == 0:
539
raise StopIteration()
540
self.seen.update(self._search_revisions)
541
return self._search_revisions
667
543
def __iter__(self):
686
562
None of the specified revisions are required to be present in the
687
563
search list. In this case, the call is a no-op.
689
revisions = frozenset(revisions)
690
if self._returning == 'next':
691
stopped = self._next_query.intersection(revisions)
692
self._next_query = self._next_query.difference(revisions)
694
stopped_present = self._current_present.intersection(revisions)
695
stopped = stopped_present.union(
696
self._current_ghosts.intersection(revisions))
697
self._current_present.difference_update(stopped)
698
self._current_ghosts.difference_update(stopped)
699
# stopping 'x' should stop returning parents of 'x', but
700
# not if 'y' always references those same parents
701
stop_rev_references = {}
702
for rev in stopped_present:
703
for parent_id in self._current_parents[rev]:
704
if parent_id not in stop_rev_references:
705
stop_rev_references[parent_id] = 0
706
stop_rev_references[parent_id] += 1
707
# if only the stopped revisions reference it, the ref count will be
709
for parents in self._current_parents.itervalues():
710
for parent_id in parents:
712
stop_rev_references[parent_id] -= 1
716
for rev_id, refs in stop_rev_references.iteritems():
718
stop_parents.add(rev_id)
719
self._next_query.difference_update(stop_parents)
720
self._stopped_keys.update(stopped)
565
stopped = self._search_revisions.intersection(revisions)
566
self._search_revisions = self._search_revisions.difference(revisions)
723
569
def start_searching(self, revisions):
724
"""Add revisions to the search.
726
The parents of revisions will be returned from the next call to next()
727
or next_with_ghosts(). If next_with_ghosts was the most recently used
728
next* call then the return value is the result of looking up the
729
ghost/not ghost status of revisions. (A tuple (present, ghosted)).
731
revisions = frozenset(revisions)
732
self._started_keys.update(revisions)
733
new_revisions = revisions.difference(self.seen)
734
revs, ghosts, query, parents = self._do_query(revisions)
735
self._stopped_keys.update(ghosts)
736
if self._returning == 'next':
737
self._next_query.update(new_revisions)
570
if self._search_revisions is None:
571
self._start = set(revisions)
739
# perform a query on revisions
740
self._current_present.update(revs)
741
self._current_ghosts.update(ghosts)
742
self._next_query.update(query)
743
self._current_parents.update(parents)
747
class SearchResult(object):
748
"""The result of a breadth first search.
750
A SearchResult provides the ability to reconstruct the search or access a
751
set of the keys the search found.
754
def __init__(self, start_keys, exclude_keys, key_count, keys):
755
"""Create a SearchResult.
757
:param start_keys: The keys the search started at.
758
:param exclude_keys: The keys the search excludes.
759
:param key_count: The total number of keys (from start to but not
761
:param keys: The keys the search found. Note that in future we may get
762
a SearchResult from a smart server, in which case the keys list is
763
not necessarily immediately available.
765
self._recipe = (start_keys, exclude_keys, key_count)
766
self._keys = frozenset(keys)
768
def get_recipe(self):
769
"""Return a recipe that can be used to replay this search.
771
The recipe allows reconstruction of the same results at a later date
772
without knowing all the found keys. The essential elements are a list
773
of keys to start and and to stop at. In order to give reproducible
774
results when ghosts are encountered by a search they are automatically
775
added to the exclude list (or else ghost filling may alter the
778
:return: A tuple (start_keys_set, exclude_keys_set, revision_count). To
779
recreate the results of this search, create a breadth first
780
searcher on the same graph starting at start_keys. Then call next()
781
(or next_with_ghosts()) repeatedly, and on every result, call
782
stop_searching_any on any keys from the exclude_keys set. The
783
revision_count value acts as a trivial cross-check - the found
784
revisions of the new search should have as many elements as
785
revision_count. If it does not, then additional revisions have been
786
ghosted since the search was executed the first time and the second
792
"""Return the keys found in this search.
794
:return: A set of keys.
573
self._search_revisions.update(revisions.difference(self.seen))
574
self.seen.update(revisions)