1594
1583
return revs, ghosts
1586
class AbstractSearchResult(object):
1587
"""The result of a search, describing a set of keys.
1589
Search results are typically used as the 'fetch_spec' parameter when
1592
:seealso: AbstractSearch
1595
def get_recipe(self):
1596
"""Return a recipe that can be used to replay this search.
1598
The recipe allows reconstruction of the same results at a later date.
1600
:return: A tuple of `(search_kind_str, *details)`. The details vary by
1601
kind of search result.
1603
raise NotImplementedError(self.get_recipe)
1605
def get_network_struct(self):
1606
"""Return a tuple that can be transmitted via the HPSS protocol."""
1607
raise NotImplementedError(self.get_network_struct)
1610
"""Return the keys found in this search.
1612
:return: A set of keys.
1614
raise NotImplementedError(self.get_keys)
1617
"""Return false if the search lists 1 or more revisions."""
1618
raise NotImplementedError(self.is_empty)
1620
def refine(self, seen, referenced):
1621
"""Create a new search by refining this search.
1623
:param seen: Revisions that have been satisfied.
1624
:param referenced: Revision references observed while satisfying some
1626
:return: A search result.
1628
raise NotImplementedError(self.refine)
1631
class AbstractSearch(object):
1632
"""A search that can be executed, producing a search result.
1634
:seealso: AbstractSearchResult
1638
"""Construct a network-ready search result from this search description.
1640
This may take some time to search repositories, etc.
1642
:return: A search result (an object that implements
1643
AbstractSearchResult's API).
1645
raise NotImplementedError(self.execute)
1648
class SearchResult(AbstractSearchResult):
1649
"""The result of a breadth first search.
1651
A SearchResult provides the ability to reconstruct the search or access a
1652
set of the keys the search found.
1655
def __init__(self, start_keys, exclude_keys, key_count, keys):
1656
"""Create a SearchResult.
1658
:param start_keys: The keys the search started at.
1659
:param exclude_keys: The keys the search excludes.
1660
:param key_count: The total number of keys (from start to but not
1662
:param keys: The keys the search found. Note that in future we may get
1663
a SearchResult from a smart server, in which case the keys list is
1664
not necessarily immediately available.
1666
self._recipe = ('search', start_keys, exclude_keys, key_count)
1667
self._keys = frozenset(keys)
1670
kind, start_keys, exclude_keys, key_count = self._recipe
1671
if len(start_keys) > 5:
1672
start_keys_repr = repr(list(start_keys)[:5])[:-1] + ', ...]'
1674
start_keys_repr = repr(start_keys)
1675
if len(exclude_keys) > 5:
1676
exclude_keys_repr = repr(list(exclude_keys)[:5])[:-1] + ', ...]'
1678
exclude_keys_repr = repr(exclude_keys)
1679
return '<%s %s:(%s, %s, %d)>' % (self.__class__.__name__,
1680
kind, start_keys_repr, exclude_keys_repr, key_count)
1682
def get_recipe(self):
1683
"""Return a recipe that can be used to replay this search.
1685
The recipe allows reconstruction of the same results at a later date
1686
without knowing all the found keys. The essential elements are a list
1687
of keys to start and to stop at. In order to give reproducible
1688
results when ghosts are encountered by a search they are automatically
1689
added to the exclude list (or else ghost filling may alter the
1692
:return: A tuple ('search', start_keys_set, exclude_keys_set,
1693
revision_count). To recreate the results of this search, create a
1694
breadth first searcher on the same graph starting at start_keys.
1695
Then call next() (or next_with_ghosts()) repeatedly, and on every
1696
result, call stop_searching_any on any keys from the exclude_keys
1697
set. The revision_count value acts as a trivial cross-check - the
1698
found revisions of the new search should have as many elements as
1699
revision_count. If it does not, then additional revisions have been
1700
ghosted since the search was executed the first time and the second
1705
def get_network_struct(self):
1706
start_keys = ' '.join(self._recipe[1])
1707
stop_keys = ' '.join(self._recipe[2])
1708
count = str(self._recipe[3])
1709
return (self._recipe[0], '\n'.join((start_keys, stop_keys, count)))
1712
"""Return the keys found in this search.
1714
:return: A set of keys.
1719
"""Return false if the search lists 1 or more revisions."""
1720
return self._recipe[3] == 0
1722
def refine(self, seen, referenced):
1723
"""Create a new search by refining this search.
1725
:param seen: Revisions that have been satisfied.
1726
:param referenced: Revision references observed while satisfying some
1729
start = self._recipe[1]
1730
exclude = self._recipe[2]
1731
count = self._recipe[3]
1732
keys = self.get_keys()
1733
# New heads = referenced + old heads - seen things - exclude
1734
pending_refs = set(referenced)
1735
pending_refs.update(start)
1736
pending_refs.difference_update(seen)
1737
pending_refs.difference_update(exclude)
1738
# New exclude = old exclude + satisfied heads
1739
seen_heads = start.intersection(seen)
1740
exclude.update(seen_heads)
1741
# keys gets seen removed
1743
# length is reduced by len(seen)
1745
return SearchResult(pending_refs, exclude, count, keys)
1748
class PendingAncestryResult(AbstractSearchResult):
1749
"""A search result that will reconstruct the ancestry for some graph heads.
1751
Unlike SearchResult, this doesn't hold the complete search result in
1752
memory, it just holds a description of how to generate it.
1755
def __init__(self, heads, repo):
1758
:param heads: an iterable of graph heads.
1759
:param repo: a repository to use to generate the ancestry for the given
1762
self.heads = frozenset(heads)
1766
if len(self.heads) > 5:
1767
heads_repr = repr(list(self.heads)[:5])[:-1]
1768
heads_repr += ', <%d more>...]' % (len(self.heads) - 5,)
1770
heads_repr = repr(self.heads)
1771
return '<%s heads:%s repo:%r>' % (
1772
self.__class__.__name__, heads_repr, self.repo)
1774
def get_recipe(self):
1775
"""Return a recipe that can be used to replay this search.
1777
The recipe allows reconstruction of the same results at a later date.
1779
:seealso SearchResult.get_recipe:
1781
:return: A tuple ('proxy-search', start_keys_set, set(), -1)
1782
To recreate this result, create a PendingAncestryResult with the
1785
return ('proxy-search', self.heads, set(), -1)
1787
def get_network_struct(self):
1788
parts = ['ancestry-of']
1789
parts.extend(self.heads)
1793
"""See SearchResult.get_keys.
1795
Returns all the keys for the ancestry of the heads, excluding
1798
return self._get_keys(self.repo.get_graph())
1800
def _get_keys(self, graph):
1801
NULL_REVISION = revision.NULL_REVISION
1802
keys = [key for (key, parents) in graph.iter_ancestry(self.heads)
1803
if key != NULL_REVISION and parents is not None]
1807
"""Return false if the search lists 1 or more revisions."""
1808
if revision.NULL_REVISION in self.heads:
1809
return len(self.heads) == 1
1811
return len(self.heads) == 0
1813
def refine(self, seen, referenced):
1814
"""Create a new search by refining this search.
1816
:param seen: Revisions that have been satisfied.
1817
:param referenced: Revision references observed while satisfying some
1820
referenced = self.heads.union(referenced)
1821
return PendingAncestryResult(referenced - seen, self.repo)
1824
class EmptySearchResult(AbstractSearchResult):
1825
"""An empty search result."""
1831
class EverythingResult(AbstractSearchResult):
1832
"""A search result that simply requests everything in the repository."""
1834
def __init__(self, repo):
1838
return '%s(%r)' % (self.__class__.__name__, self._repo)
1840
def get_recipe(self):
1841
raise NotImplementedError(self.get_recipe)
1843
def get_network_struct(self):
1844
return ('everything',)
1847
if 'evil' in debug.debug_flags:
1848
from bzrlib import remote
1849
if isinstance(self._repo, remote.RemoteRepository):
1850
# warn developers (not users) not to do this
1851
trace.mutter_callsite(
1852
2, "EverythingResult(RemoteRepository).get_keys() is slow.")
1853
return self._repo.all_revision_ids()
1856
# It's ok for this to wrongly return False: the worst that can happen
1857
# is that RemoteStreamSource will initiate a get_stream on an empty
1858
# repository. And almost all repositories are non-empty.
1861
def refine(self, seen, referenced):
1862
heads = set(self._repo.all_revision_ids())
1863
heads.difference_update(seen)
1864
heads.update(referenced)
1865
return PendingAncestryResult(heads, self._repo)
1868
class EverythingNotInOther(AbstractSearch):
1869
"""Find all revisions in that are in one repo but not the other."""
1871
def __init__(self, to_repo, from_repo, find_ghosts=False):
1872
self.to_repo = to_repo
1873
self.from_repo = from_repo
1874
self.find_ghosts = find_ghosts
1877
return self.to_repo.search_missing_revision_ids(
1878
self.from_repo, find_ghosts=self.find_ghosts)
1881
class NotInOtherForRevs(AbstractSearch):
1882
"""Find all revisions missing in one repo for a some specific heads."""
1884
def __init__(self, to_repo, from_repo, required_ids, if_present_ids=None,
1885
find_ghosts=False, limit=None):
1888
:param required_ids: revision IDs of heads that must be found, or else
1889
the search will fail with NoSuchRevision. All revisions in their
1890
ancestry not already in the other repository will be included in
1892
:param if_present_ids: revision IDs of heads that may be absent in the
1893
source repository. If present, then their ancestry not already
1894
found in other will be included in the search result.
1895
:param limit: maximum number of revisions to fetch
1897
self.to_repo = to_repo
1898
self.from_repo = from_repo
1899
self.find_ghosts = find_ghosts
1900
self.required_ids = required_ids
1901
self.if_present_ids = if_present_ids
1905
if len(self.required_ids) > 5:
1906
reqd_revs_repr = repr(list(self.required_ids)[:5])[:-1] + ', ...]'
1908
reqd_revs_repr = repr(self.required_ids)
1909
if self.if_present_ids and len(self.if_present_ids) > 5:
1910
ifp_revs_repr = repr(list(self.if_present_ids)[:5])[:-1] + ', ...]'
1912
ifp_revs_repr = repr(self.if_present_ids)
1914
return ("<%s from:%r to:%r find_ghosts:%r req'd:%r if-present:%r"
1916
self.__class__.__name__, self.from_repo, self.to_repo,
1917
self.find_ghosts, reqd_revs_repr, ifp_revs_repr,
1921
return self.to_repo.search_missing_revision_ids(
1922
self.from_repo, revision_ids=self.required_ids,
1923
if_present_ids=self.if_present_ids, find_ghosts=self.find_ghosts,
1597
1927
def invert_parent_map(parent_map):
1598
1928
"""Given a map from child => parents, create a map of parent=>children"""
1609
1939
return child_map
1942
def _find_possible_heads(parent_map, tip_keys, depth):
1943
"""Walk backwards (towards children) through the parent_map.
1945
This finds 'heads' that will hopefully succinctly describe our search
1948
child_map = invert_parent_map(parent_map)
1950
current_roots = tip_keys
1951
walked = set(current_roots)
1952
while current_roots and depth > 0:
1955
children_update = children.update
1956
for p in current_roots:
1957
# Is it better to pre- or post- filter the children?
1959
children_update(child_map[p])
1962
# If we've seen a key before, we don't want to walk it again. Note that
1963
# 'children' stays relatively small while 'walked' grows large. So
1964
# don't use 'difference_update' here which has to walk all of 'walked'.
1965
# '.difference' is smart enough to walk only children and compare it to
1967
children = children.difference(walked)
1968
walked.update(children)
1969
current_roots = children
1971
# We walked to the end of depth, so these are the new tips.
1972
heads.update(current_roots)
1976
def _run_search(parent_map, heads, exclude_keys):
1977
"""Given a parent map, run a _BreadthFirstSearcher on it.
1979
Start at heads, walk until you hit exclude_keys. As a further improvement,
1980
watch for any heads that you encounter while walking, which means they were
1981
not heads of the search.
1983
This is mostly used to generate a succinct recipe for how to walk through
1986
:return: (_BreadthFirstSearcher, set(heads_encountered_by_walking))
1988
g = Graph(DictParentsProvider(parent_map))
1989
s = g._make_breadth_first_searcher(heads)
1993
next_revs = s.next()
1994
except StopIteration:
1996
for parents in s._current_parents.itervalues():
1997
f_heads = heads.intersection(parents)
1999
found_heads.update(f_heads)
2000
stop_keys = exclude_keys.intersection(next_revs)
2002
s.stop_searching_any(stop_keys)
2003
for parents in s._current_parents.itervalues():
2004
f_heads = heads.intersection(parents)
2006
found_heads.update(f_heads)
2007
return s, found_heads
2010
def limited_search_result_from_parent_map(parent_map, missing_keys, tip_keys,
2012
"""Transform a parent_map that is searching 'tip_keys' into an
2013
approximate SearchResult.
2015
We should be able to generate a SearchResult from a given set of starting
2016
keys, that covers a subset of parent_map that has the last step pointing at
2017
tip_keys. This is to handle the case that really-long-searches shouldn't be
2018
started from scratch on each get_parent_map request, but we *do* want to
2019
filter out some of the keys that we've already seen, so we don't get
2020
information that we already know about on every request.
2022
The server will validate the search (that starting at start_keys and
2023
stopping at stop_keys yields the exact key_count), so we have to be careful
2024
to give an exact recipe.
2027
1) Invert parent_map to get child_map (todo: have it cached and pass it
2029
2) Starting at tip_keys, walk towards children for 'depth' steps.
2030
3) At that point, we have the 'start' keys.
2031
4) Start walking parent_map from 'start' keys, counting how many keys
2032
are seen, and generating stop_keys for anything that would walk
2033
outside of the parent_map.
2035
:param parent_map: A map from {child_id: (parent_ids,)}
2036
:param missing_keys: parent_ids that we know are unavailable
2037
:param tip_keys: the revision_ids that we are searching
2038
:param depth: How far back to walk.
2041
# No search to send, because we haven't done any searching yet.
2043
heads = _find_possible_heads(parent_map, tip_keys, depth)
2044
s, found_heads = _run_search(parent_map, heads, set(tip_keys))
2045
_, start_keys, exclude_keys, key_count = s.get_result().get_recipe()
2047
# Anything in found_heads are redundant start_keys, we hit them while
2048
# walking, so we can exclude them from the start list.
2049
start_keys = set(start_keys).difference(found_heads)
2050
return start_keys, exclude_keys, key_count
2053
def search_result_from_parent_map(parent_map, missing_keys):
2054
"""Transform a parent_map into SearchResult information."""
2056
# parent_map is empty or None, simple search result
2058
# start_set is all the keys in the cache
2059
start_set = set(parent_map)
2060
# result set is all the references to keys in the cache
2061
result_parents = set()
2062
for parents in parent_map.itervalues():
2063
result_parents.update(parents)
2064
stop_keys = result_parents.difference(start_set)
2065
# We don't need to send ghosts back to the server as a position to
2067
stop_keys.difference_update(missing_keys)
2068
key_count = len(parent_map)
2069
if (revision.NULL_REVISION in result_parents
2070
and revision.NULL_REVISION in missing_keys):
2071
# If we pruned NULL_REVISION from the stop_keys because it's also
2072
# in our cache of "missing" keys we need to increment our key count
2073
# by 1, because the reconsitituted SearchResult on the server will
2074
# still consider NULL_REVISION to be an included key.
2076
included_keys = start_set.intersection(result_parents)
2077
start_set.difference_update(included_keys)
2078
return start_set, stop_keys, key_count
1612
2081
def collapse_linear_regions(parent_map):
1613
2082
"""Collapse regions of the graph that are 'linear'.