~bzr-pqm/bzr/bzr.dev : contents of bzrlib/vf

~bzr-pqm/bzr/bzr.dev : (revision 6616)

# Copyright (C) 2007-2011 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

"""Searching in versioned file repositories."""

from __future__ import absolute_import

from bzrlib import (
    debug,
    revision,
    trace,
    )

from bzrlib.graph import (
    DictParentsProvider,
    Graph,
    invert_parent_map,
    )


class AbstractSearchResult(object):
    """The result of a search, describing a set of keys.
    
    Search results are typically used as the 'fetch_spec' parameter when
    fetching revisions.

    :seealso: AbstractSearch
    """

    def get_recipe(self):
        """Return a recipe that can be used to replay this search.

        The recipe allows reconstruction of the same results at a later date.

        :return: A tuple of `(search_kind_str, *details)`.  The details vary by
            kind of search result.
        """
        raise NotImplementedError(self.get_recipe)

    def get_network_struct(self):
        """Return a tuple that can be transmitted via the HPSS protocol."""
        raise NotImplementedError(self.get_network_struct)

    def get_keys(self):
        """Return the keys found in this search.

        :return: A set of keys.
        """
        raise NotImplementedError(self.get_keys)

    def is_empty(self):
        """Return false if the search lists 1 or more revisions."""
        raise NotImplementedError(self.is_empty)

    def refine(self, seen, referenced):
        """Create a new search by refining this search.

        :param seen: Revisions that have been satisfied.
        :param referenced: Revision references observed while satisfying some
            of this search.
        :return: A search result.
        """
        raise NotImplementedError(self.refine)


class AbstractSearch(object):
    """A search that can be executed, producing a search result.

    :seealso: AbstractSearchResult
    """

    def execute(self):
        """Construct a network-ready search result from this search description.

        This may take some time to search repositories, etc.

        :return: A search result (an object that implements
            AbstractSearchResult's API).
        """
        raise NotImplementedError(self.execute)


class SearchResult(AbstractSearchResult):
    """The result of a breadth first search.

    A SearchResult provides the ability to reconstruct the search or access a
    set of the keys the search found.
    """

    def __init__(self, start_keys, exclude_keys, key_count, keys):
        """Create a SearchResult.

        :param start_keys: The keys the search started at.
        :param exclude_keys: The keys the search excludes.
        :param key_count: The total number of keys (from start to but not
            including exclude).
        :param keys: The keys the search found. Note that in future we may get
            a SearchResult from a smart server, in which case the keys list is
            not necessarily immediately available.
        """
        self._recipe = ('search', start_keys, exclude_keys, key_count)
        self._keys = frozenset(keys)

    def __repr__(self):
        kind, start_keys, exclude_keys, key_count = self._recipe
        if len(start_keys) > 5:
            start_keys_repr = repr(list(start_keys)[:5])[:-1] + ', ...]'
        else:
            start_keys_repr = repr(start_keys)
        if len(exclude_keys) > 5:
            exclude_keys_repr = repr(list(exclude_keys)[:5])[:-1] + ', ...]'
        else:
            exclude_keys_repr = repr(exclude_keys)
        return '<%s %s:(%s, %s, %d)>' % (self.__class__.__name__,
            kind, start_keys_repr, exclude_keys_repr, key_count)

    def get_recipe(self):
        """Return a recipe that can be used to replay this search.

        The recipe allows reconstruction of the same results at a later date
        without knowing all the found keys. The essential elements are a list
        of keys to start and to stop at. In order to give reproducible
        results when ghosts are encountered by a search they are automatically
        added to the exclude list (or else ghost filling may alter the
        results).

        :return: A tuple ('search', start_keys_set, exclude_keys_set,
            revision_count). To recreate the results of this search, create a
            breadth first searcher on the same graph starting at start_keys.
            Then call next() (or next_with_ghosts()) repeatedly, and on every
            result, call stop_searching_any on any keys from the exclude_keys
            set. The revision_count value acts as a trivial cross-check - the
            found revisions of the new search should have as many elements as
            revision_count. If it does not, then additional revisions have been
            ghosted since the search was executed the first time and the second
            time.
        """
        return self._recipe

    def get_network_struct(self):
        start_keys = ' '.join(self._recipe[1])
        stop_keys = ' '.join(self._recipe[2])
        count = str(self._recipe[3])
        return (self._recipe[0], '\n'.join((start_keys, stop_keys, count)))

    def get_keys(self):
        """Return the keys found in this search.

        :return: A set of keys.
        """
        return self._keys

    def is_empty(self):
        """Return false if the search lists 1 or more revisions."""
        return self._recipe[3] == 0

    def refine(self, seen, referenced):
        """Create a new search by refining this search.

        :param seen: Revisions that have been satisfied.
        :param referenced: Revision references observed while satisfying some
            of this search.
        """
        start = self._recipe[1]
        exclude = self._recipe[2]
        count = self._recipe[3]
        keys = self.get_keys()
        # New heads = referenced + old heads - seen things - exclude
        pending_refs = set(referenced)
        pending_refs.update(start)
        pending_refs.difference_update(seen)
        pending_refs.difference_update(exclude)
        # New exclude = old exclude + satisfied heads
        seen_heads = start.intersection(seen)
        exclude.update(seen_heads)
        # keys gets seen removed
        keys = keys - seen
        # length is reduced by len(seen)
        count -= len(seen)
        return SearchResult(pending_refs, exclude, count, keys)


class PendingAncestryResult(AbstractSearchResult):
    """A search result that will reconstruct the ancestry for some graph heads.

    Unlike SearchResult, this doesn't hold the complete search result in
    memory, it just holds a description of how to generate it.
    """

    def __init__(self, heads, repo):
        """Constructor.

        :param heads: an iterable of graph heads.
        :param repo: a repository to use to generate the ancestry for the given
            heads.
        """
        self.heads = frozenset(heads)
        self.repo = repo

    def __repr__(self):
        if len(self.heads) > 5:
            heads_repr = repr(list(self.heads)[:5])[:-1]
            heads_repr += ', <%d more>...]' % (len(self.heads) - 5,)
        else:
            heads_repr = repr(self.heads)
        return '<%s heads:%s repo:%r>' % (
            self.__class__.__name__, heads_repr, self.repo)

    def get_recipe(self):
        """Return a recipe that can be used to replay this search.

        The recipe allows reconstruction of the same results at a later date.

        :seealso SearchResult.get_recipe:

        :return: A tuple ('proxy-search', start_keys_set, set(), -1)
            To recreate this result, create a PendingAncestryResult with the
            start_keys_set.
        """
        return ('proxy-search', self.heads, set(), -1)

    def get_network_struct(self):
        parts = ['ancestry-of']
        parts.extend(self.heads)
        return parts

    def get_keys(self):
        """See SearchResult.get_keys.

        Returns all the keys for the ancestry of the heads, excluding
        NULL_REVISION.
        """
        return self._get_keys(self.repo.get_graph())

    def _get_keys(self, graph):
        NULL_REVISION = revision.NULL_REVISION
        keys = [key for (key, parents) in graph.iter_ancestry(self.heads)
                if key != NULL_REVISION and parents is not None]
        return keys

    def is_empty(self):
        """Return false if the search lists 1 or more revisions."""
        if revision.NULL_REVISION in self.heads:
            return len(self.heads) == 1
        else:
            return len(self.heads) == 0

    def refine(self, seen, referenced):
        """Create a new search by refining this search.

        :param seen: Revisions that have been satisfied.
        :param referenced: Revision references observed while satisfying some
            of this search.
        """
        referenced = self.heads.union(referenced)
        return PendingAncestryResult(referenced - seen, self.repo)


class EmptySearchResult(AbstractSearchResult):
    """An empty search result."""

    def is_empty(self):
        return True


class EverythingResult(AbstractSearchResult):
    """A search result that simply requests everything in the repository."""

    def __init__(self, repo):
        self._repo = repo

    def __repr__(self):
        return '%s(%r)' % (self.__class__.__name__, self._repo)

    def get_recipe(self):
        raise NotImplementedError(self.get_recipe)

    def get_network_struct(self):
        return ('everything',)

    def get_keys(self):
        if 'evil' in debug.debug_flags:
            from bzrlib import remote
            if isinstance(self._repo, remote.RemoteRepository):
                # warn developers (not users) not to do this
                trace.mutter_callsite(
                    2, "EverythingResult(RemoteRepository).get_keys() is slow.")
        return self._repo.all_revision_ids()

    def is_empty(self):
        # It's ok for this to wrongly return False: the worst that can happen
        # is that RemoteStreamSource will initiate a get_stream on an empty
        # repository.  And almost all repositories are non-empty.
        return False

    def refine(self, seen, referenced):
        heads = set(self._repo.all_revision_ids())
        heads.difference_update(seen)
        heads.update(referenced)
        return PendingAncestryResult(heads, self._repo)


class EverythingNotInOther(AbstractSearch):
    """Find all revisions in that are in one repo but not the other."""

    def __init__(self, to_repo, from_repo, find_ghosts=False):
        self.to_repo = to_repo
        self.from_repo = from_repo
        self.find_ghosts = find_ghosts

    def execute(self):
        return self.to_repo.search_missing_revision_ids(
            self.from_repo, find_ghosts=self.find_ghosts)


class NotInOtherForRevs(AbstractSearch):
    """Find all revisions missing in one repo for a some specific heads."""

    def __init__(self, to_repo, from_repo, required_ids, if_present_ids=None,
            find_ghosts=False, limit=None):
        """Constructor.

        :param required_ids: revision IDs of heads that must be found, or else
            the search will fail with NoSuchRevision.  All revisions in their
            ancestry not already in the other repository will be included in
            the search result.
        :param if_present_ids: revision IDs of heads that may be absent in the
            source repository.  If present, then their ancestry not already
            found in other will be included in the search result.
        :param limit: maximum number of revisions to fetch
        """
        self.to_repo = to_repo
        self.from_repo = from_repo
        self.find_ghosts = find_ghosts
        self.required_ids = required_ids
        self.if_present_ids = if_present_ids
        self.limit = limit

    def __repr__(self):
        if len(self.required_ids) > 5:
            reqd_revs_repr = repr(list(self.required_ids)[:5])[:-1] + ', ...]'
        else:
            reqd_revs_repr = repr(self.required_ids)
        if self.if_present_ids and len(self.if_present_ids) > 5:
            ifp_revs_repr = repr(list(self.if_present_ids)[:5])[:-1] + ', ...]'
        else:
            ifp_revs_repr = repr(self.if_present_ids)

        return ("<%s from:%r to:%r find_ghosts:%r req'd:%r if-present:%r"
                "limit:%r>") % (
                self.__class__.__name__, self.from_repo, self.to_repo,
                self.find_ghosts, reqd_revs_repr, ifp_revs_repr,
                self.limit)

    def execute(self):
        return self.to_repo.search_missing_revision_ids(
            self.from_repo, revision_ids=self.required_ids,
            if_present_ids=self.if_present_ids, find_ghosts=self.find_ghosts,
            limit=self.limit)


def search_result_from_parent_map(parent_map, missing_keys):
    """Transform a parent_map into SearchResult information."""
    if not parent_map:
        # parent_map is empty or None, simple search result
        return [], [], 0
    # start_set is all the keys in the cache
    start_set = set(parent_map)
    # result set is all the references to keys in the cache
    result_parents = set()
    for parents in parent_map.itervalues():
        result_parents.update(parents)
    stop_keys = result_parents.difference(start_set)
    # We don't need to send ghosts back to the server as a position to
    # stop either.
    stop_keys.difference_update(missing_keys)
    key_count = len(parent_map)
    if (revision.NULL_REVISION in result_parents
        and revision.NULL_REVISION in missing_keys):
        # If we pruned NULL_REVISION from the stop_keys because it's also
        # in our cache of "missing" keys we need to increment our key count
        # by 1, because the reconsitituted SearchResult on the server will
        # still consider NULL_REVISION to be an included key.
        key_count += 1
    included_keys = start_set.intersection(result_parents)
    start_set.difference_update(included_keys)
    return start_set, stop_keys, key_count


def _run_search(parent_map, heads, exclude_keys):
    """Given a parent map, run a _BreadthFirstSearcher on it.

    Start at heads, walk until you hit exclude_keys. As a further improvement,
    watch for any heads that you encounter while walking, which means they were
    not heads of the search.

    This is mostly used to generate a succinct recipe for how to walk through
    most of parent_map.

    :return: (_BreadthFirstSearcher, set(heads_encountered_by_walking))
    """
    g = Graph(DictParentsProvider(parent_map))
    s = g._make_breadth_first_searcher(heads)
    found_heads = set()
    while True:
        try:
            next_revs = s.next()
        except StopIteration:
            break
        for parents in s._current_parents.itervalues():
            f_heads = heads.intersection(parents)
            if f_heads:
                found_heads.update(f_heads)
        stop_keys = exclude_keys.intersection(next_revs)
        if stop_keys:
            s.stop_searching_any(stop_keys)
    for parents in s._current_parents.itervalues():
        f_heads = heads.intersection(parents)
        if f_heads:
            found_heads.update(f_heads)
    return s, found_heads


def _find_possible_heads(parent_map, tip_keys, depth):
    """Walk backwards (towards children) through the parent_map.

    This finds 'heads' that will hopefully succinctly describe our search
    graph.
    """
    child_map = invert_parent_map(parent_map)
    heads = set()
    current_roots = tip_keys
    walked = set(current_roots)
    while current_roots and depth > 0:
        depth -= 1
        children = set()
        children_update = children.update
        for p in current_roots:
            # Is it better to pre- or post- filter the children?
            try:
                children_update(child_map[p])
            except KeyError:
                heads.add(p)
        # If we've seen a key before, we don't want to walk it again. Note that
        # 'children' stays relatively small while 'walked' grows large. So
        # don't use 'difference_update' here which has to walk all of 'walked'.
        # '.difference' is smart enough to walk only children and compare it to
        # walked.
        children = children.difference(walked)
        walked.update(children)
        current_roots = children
    if current_roots:
        # We walked to the end of depth, so these are the new tips.
        heads.update(current_roots)
    return heads


def limited_search_result_from_parent_map(parent_map, missing_keys, tip_keys,
                                          depth):
    """Transform a parent_map that is searching 'tip_keys' into an
    approximate SearchResult.

    We should be able to generate a SearchResult from a given set of starting
    keys, that covers a subset of parent_map that has the last step pointing at
    tip_keys. This is to handle the case that really-long-searches shouldn't be
    started from scratch on each get_parent_map request, but we *do* want to
    filter out some of the keys that we've already seen, so we don't get
    information that we already know about on every request.

    The server will validate the search (that starting at start_keys and
    stopping at stop_keys yields the exact key_count), so we have to be careful
    to give an exact recipe.

    Basic algorithm is:
        1) Invert parent_map to get child_map (todo: have it cached and pass it
           in)
        2) Starting at tip_keys, walk towards children for 'depth' steps.
        3) At that point, we have the 'start' keys.
        4) Start walking parent_map from 'start' keys, counting how many keys
           are seen, and generating stop_keys for anything that would walk
           outside of the parent_map.

    :param parent_map: A map from {child_id: (parent_ids,)}
    :param missing_keys: parent_ids that we know are unavailable
    :param tip_keys: the revision_ids that we are searching
    :param depth: How far back to walk.
    """
    if not parent_map:
        # No search to send, because we haven't done any searching yet.
        return [], [], 0
    heads = _find_possible_heads(parent_map, tip_keys, depth)
    s, found_heads = _run_search(parent_map, heads, set(tip_keys))
    start_keys, exclude_keys, keys = s.get_state()
    if found_heads:
        # Anything in found_heads are redundant start_keys, we hit them while
        # walking, so we can exclude them from the start list.
        start_keys = set(start_keys).difference(found_heads)
    return start_keys, exclude_keys, len(keys)

6341.1.3 by Jelmer Vernooij Move search result code to vf_search module.	1	# Copyright (C) 2007-2011 Canonical Ltd
	2	#
	3	# This program is free software; you can redistribute it and/or modify
	4	# it under the terms of the GNU General Public License as published by
	5	# the Free Software Foundation; either version 2 of the License, or
	6	# (at your option) any later version.
	7	#
	8	# This program is distributed in the hope that it will be useful,
	9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	# GNU General Public License for more details.
	12	#
	13	# You should have received a copy of the GNU General Public License
	14	# along with this program; if not, write to the Free Software
	15	# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
	16
6379.6.7 by Jelmer Vernooij Move importing from future until after doc string, otherwise the doc string will disappear.	17	"""Searching in versioned file repositories."""
	18
6379.6.3 by Jelmer Vernooij Use absolute_import.	19	from __future__ import absolute_import
6379.6.3 by Jelmer Vernooij Use absolute_import.	20
6341.1.3 by Jelmer Vernooij Move search result code to vf_search module.	21	from bzrlib import (
	22	debug,
	23	revision,
	24	trace,
	25	)
	26
	27	from bzrlib.graph import (
	28	DictParentsProvider,
	29	Graph,
	30	invert_parent_map,
	31	)
	32
	33
	34	class AbstractSearchResult(object):
	35	"""The result of a search, describing a set of keys.
	36
	37	Search results are typically used as the 'fetch_spec' parameter when
	38	fetching revisions.
	39
	40	:seealso: AbstractSearch
	41	"""
	42
	43	def get_recipe(self):
	44	"""Return a recipe that can be used to replay this search.
	45
	46	The recipe allows reconstruction of the same results at a later date.
	47
	48	:return: A tuple of `(search_kind_str, *details)`. The details vary by
	49	kind of search result.
	50	"""
	51	raise NotImplementedError(self.get_recipe)
	52
	53	def get_network_struct(self):
	54	"""Return a tuple that can be transmitted via the HPSS protocol."""
	55	raise NotImplementedError(self.get_network_struct)
	56
	57	def get_keys(self):
	58	"""Return the keys found in this search.
	59
	60	:return: A set of keys.
	61	"""
	62	raise NotImplementedError(self.get_keys)
	63
	64	def is_empty(self):
	65	"""Return false if the search lists 1 or more revisions."""
	66	raise NotImplementedError(self.is_empty)
	67
	68	def refine(self, seen, referenced):
	69	"""Create a new search by refining this search.
	70
	71	:param seen: Revisions that have been satisfied.
	72	:param referenced: Revision references observed while satisfying some
	73	of this search.
	74	:return: A search result.
	75	"""
	76	raise NotImplementedError(self.refine)
	77
	78
	79	class AbstractSearch(object):
	80	"""A search that can be executed, producing a search result.
	81
	82	:seealso: AbstractSearchResult
	83	"""
	84
85	def execute(self):
86	"""Construct a network-ready search result from this search description.
87
88	This may take some time to search repositories, etc.
89
90	:return: A search result (an object that implements
91	AbstractSearchResult's API).
92	"""
93	raise NotImplementedError(self.execute)
94
95
96	class SearchResult(AbstractSearchResult):
97	"""The result of a breadth first search.
98
99	A SearchResult provides the ability to reconstruct the search or access a
100	set of the keys the search found.
101	"""
102
103	def __init__(self, start_keys, exclude_keys, key_count, keys):
104	"""Create a SearchResult.
105
106	:param start_keys: The keys the search started at.
107	:param exclude_keys: The keys the search excludes.
108	:param key_count: The total number of keys (from start to but not
109	including exclude).
110	:param keys: The keys the search found. Note that in future we may get
111	a SearchResult from a smart server, in which case the keys list is
112	not necessarily immediately available.
113	"""
114	self._recipe = ('search', start_keys, exclude_keys, key_count)
115	self._keys = frozenset(keys)
116
117	def __repr__(self):
118	kind, start_keys, exclude_keys, key_count = self._recipe
119	if len(start_keys) > 5:
120	start_keys_repr = repr(list(start_keys)[:5])[:-1] + ', ...]'
121	else:
122	start_keys_repr = repr(start_keys)
123	if len(exclude_keys) > 5:
124	exclude_keys_repr = repr(list(exclude_keys)[:5])[:-1] + ', ...]'
125	else:
126	exclude_keys_repr = repr(exclude_keys)
127	return '<%s %s:(%s, %s, %d)>' % (self.__class__.__name__,
128	kind, start_keys_repr, exclude_keys_repr, key_count)
129
130	def get_recipe(self):
131	"""Return a recipe that can be used to replay this search.
132
133	The recipe allows reconstruction of the same results at a later date
134	without knowing all the found keys. The essential elements are a list
135	of keys to start and to stop at. In order to give reproducible
136	results when ghosts are encountered by a search they are automatically
137	added to the exclude list (or else ghost filling may alter the
138	results).
139
140	:return: A tuple ('search', start_keys_set, exclude_keys_set,
141	revision_count). To recreate the results of this search, create a
142	breadth first searcher on the same graph starting at start_keys.
143	Then call next() (or next_with_ghosts()) repeatedly, and on every
144	result, call stop_searching_any on any keys from the exclude_keys
145	set. The revision_count value acts as a trivial cross-check - the
146	found revisions of the new search should have as many elements as
147	revision_count. If it does not, then additional revisions have been
148	ghosted since the search was executed the first time and the second
149	time.
150	"""
151	return self._recipe
152
153	def get_network_struct(self):
154	start_keys = ' '.join(self._recipe[1])
155	stop_keys = ' '.join(self._recipe[2])
156	count = str(self._recipe[3])
157	return (self._recipe[0], '\n'.join((start_keys, stop_keys, count)))
158
159	def get_keys(self):
160	"""Return the keys found in this search.
161
162	:return: A set of keys.
163	"""
164	return self._keys
165
166	def is_empty(self):
167	"""Return false if the search lists 1 or more revisions."""
168	return self._recipe[3] == 0
169
170	def refine(self, seen, referenced):
171	"""Create a new search by refining this search.
172
173	:param seen: Revisions that have been satisfied.
174	:param referenced: Revision references observed while satisfying some
175	of this search.
176	"""
177	start = self._recipe[1]
178	exclude = self._recipe[2]
179	count = self._recipe[3]
180	keys = self.get_keys()
181	# New heads = referenced + old heads - seen things - exclude
182	pending_refs = set(referenced)
183	pending_refs.update(start)
184	pending_refs.difference_update(seen)
185	pending_refs.difference_update(exclude)
186	# New exclude = old exclude + satisfied heads
187	seen_heads = start.intersection(seen)
188	exclude.update(seen_heads)
189	# keys gets seen removed
190	keys = keys - seen
191	# length is reduced by len(seen)
192	count -= len(seen)
193	return SearchResult(pending_refs, exclude, count, keys)
194
195
196	class PendingAncestryResult(AbstractSearchResult):
197	"""A search result that will reconstruct the ancestry for some graph heads.
198
199	Unlike SearchResult, this doesn't hold the complete search result in
200	memory, it just holds a description of how to generate it.
201	"""
202
203	def __init__(self, heads, repo):
204	"""Constructor.
205
206	:param heads: an iterable of graph heads.
207	:param repo: a repository to use to generate the ancestry for the given
208	heads.
209	"""
210	self.heads = frozenset(heads)
211	self.repo = repo
212
213	def __repr__(self):
214	if len(self.heads) > 5:
215	heads_repr = repr(list(self.heads)[:5])[:-1]
216	heads_repr += ', <%d more>...]' % (len(self.heads) - 5,)
217	else:
218	heads_repr = repr(self.heads)
219	return '<%s heads:%s repo:%r>' % (
220	self.__class__.__name__, heads_repr, self.repo)
221
222	def get_recipe(self):
223	"""Return a recipe that can be used to replay this search.
224
225	The recipe allows reconstruction of the same results at a later date.
226
227	:seealso SearchResult.get_recipe:
228
229	:return: A tuple ('proxy-search', start_keys_set, set(), -1)
230	To recreate this result, create a PendingAncestryResult with the
231	start_keys_set.
232	"""
233	return ('proxy-search', self.heads, set(), -1)
234
235	def get_network_struct(self):
236	parts = ['ancestry-of']
237	parts.extend(self.heads)
238	return parts
239
240	def get_keys(self):
241	"""See SearchResult.get_keys.
242
243	Returns all the keys for the ancestry of the heads, excluding
244	NULL_REVISION.
245	"""
246	return self._get_keys(self.repo.get_graph())
247
248	def _get_keys(self, graph):
249	NULL_REVISION = revision.NULL_REVISION
250	keys = [key for (key, parents) in graph.iter_ancestry(self.heads)
251	if key != NULL_REVISION and parents is not None]
252	return keys
253
254	def is_empty(self):
255	"""Return false if the search lists 1 or more revisions."""
256	if revision.NULL_REVISION in self.heads:
257	return len(self.heads) == 1
258	else:
259	return len(self.heads) == 0
260
261	def refine(self, seen, referenced):
262	"""Create a new search by refining this search.
263
264	:param seen: Revisions that have been satisfied.
265	:param referenced: Revision references observed while satisfying some
266	of this search.
267	"""
268	referenced = self.heads.union(referenced)
269	return PendingAncestryResult(referenced - seen, self.repo)
270
271
272	class EmptySearchResult(AbstractSearchResult):
273	"""An empty search result."""
274
275	def is_empty(self):
276	return True
277
278
279	class EverythingResult(AbstractSearchResult):
280	"""A search result that simply requests everything in the repository."""
281
282	def __init__(self, repo):
283	self._repo = repo
284
285	def __repr__(self):
286	return '%s(%r)' % (self.__class__.__name__, self._repo)
287
288	def get_recipe(self):
289	raise NotImplementedError(self.get_recipe)
290
291	def get_network_struct(self):
292	return ('everything',)
293
294	def get_keys(self):
295	if 'evil' in debug.debug_flags:
296	from bzrlib import remote
297	if isinstance(self._repo, remote.RemoteRepository):
298	# warn developers (not users) not to do this
299	trace.mutter_callsite(
300	2, "EverythingResult(RemoteRepository).get_keys() is slow.")
301	return self._repo.all_revision_ids()
302
303	def is_empty(self):
304	# It's ok for this to wrongly return False: the worst that can happen
305	# is that RemoteStreamSource will initiate a get_stream on an empty
306	# repository. And almost all repositories are non-empty.
307	return False
308
309	def refine(self, seen, referenced):
310	heads = set(self._repo.all_revision_ids())
311	heads.difference_update(seen)
312	heads.update(referenced)
313	return PendingAncestryResult(heads, self._repo)
314
315
316	class EverythingNotInOther(AbstractSearch):
317	"""Find all revisions in that are in one repo but not the other."""
318
319	def __init__(self, to_repo, from_repo, find_ghosts=False):
320	self.to_repo = to_repo
321	self.from_repo = from_repo
322	self.find_ghosts = find_ghosts
323
324	def execute(self):
325	return self.to_repo.search_missing_revision_ids(
326	self.from_repo, find_ghosts=self.find_ghosts)
327
328
329	class NotInOtherForRevs(AbstractSearch):
330	"""Find all revisions missing in one repo for a some specific heads."""
331
332	def __init__(self, to_repo, from_repo, required_ids, if_present_ids=None,
333	find_ghosts=False, limit=None):
334	"""Constructor.
335
336	:param required_ids: revision IDs of heads that must be found, or else
337	the search will fail with NoSuchRevision. All revisions in their
338	ancestry not already in the other repository will be included in
339	the search result.
340	:param if_present_ids: revision IDs of heads that may be absent in the
341	source repository. If present, then their ancestry not already
342	found in other will be included in the search result.
343	:param limit: maximum number of revisions to fetch
344	"""
345	self.to_repo = to_repo
346	self.from_repo = from_repo
347	self.find_ghosts = find_ghosts
348	self.required_ids = required_ids
349	self.if_present_ids = if_present_ids
350	self.limit = limit
351
352	def __repr__(self):
353	if len(self.required_ids) > 5:
354	reqd_revs_repr = repr(list(self.required_ids)[:5])[:-1] + ', ...]'
355	else:
356	reqd_revs_repr = repr(self.required_ids)
357	if self.if_present_ids and len(self.if_present_ids) > 5:
358	ifp_revs_repr = repr(list(self.if_present_ids)[:5])[:-1] + ', ...]'
359	else:
360	ifp_revs_repr = repr(self.if_present_ids)
361
362	return ("<%s from:%r to:%r find_ghosts:%r req'd:%r if-present:%r"
363	"limit:%r>") % (
364	self.__class__.__name__, self.from_repo, self.to_repo,
365	self.find_ghosts, reqd_revs_repr, ifp_revs_repr,
366	self.limit)
367
368	def execute(self):
369	return self.to_repo.search_missing_revision_ids(
370	self.from_repo, revision_ids=self.required_ids,
371	if_present_ids=self.if_present_ids, find_ghosts=self.find_ghosts,
372	limit=self.limit)
373
374
375	def search_result_from_parent_map(parent_map, missing_keys):
376	"""Transform a parent_map into SearchResult information."""
377	if not parent_map:
378	# parent_map is empty or None, simple search result
379	return [], [], 0
380	# start_set is all the keys in the cache
381	start_set = set(parent_map)
382	# result set is all the references to keys in the cache
383	result_parents = set()
384	for parents in parent_map.itervalues():
385	result_parents.update(parents)
386	stop_keys = result_parents.difference(start_set)
387	# We don't need to send ghosts back to the server as a position to
388	# stop either.
389	stop_keys.difference_update(missing_keys)
390	key_count = len(parent_map)
391	if (revision.NULL_REVISION in result_parents
392	and revision.NULL_REVISION in missing_keys):
393	# If we pruned NULL_REVISION from the stop_keys because it's also
394	# in our cache of "missing" keys we need to increment our key count
395	# by 1, because the reconsitituted SearchResult on the server will
396	# still consider NULL_REVISION to be an included key.
397	key_count += 1
398	included_keys = start_set.intersection(result_parents)
399	start_set.difference_update(included_keys)
400	return start_set, stop_keys, key_count
401
402
403	def _run_search(parent_map, heads, exclude_keys):
404	"""Given a parent map, run a _BreadthFirstSearcher on it.
405
406	Start at heads, walk until you hit exclude_keys. As a further improvement,
407	watch for any heads that you encounter while walking, which means they were
408	not heads of the search.
409
410	This is mostly used to generate a succinct recipe for how to walk through
411	most of parent_map.
412
413	:return: (_BreadthFirstSearcher, set(heads_encountered_by_walking))
414	"""
415	g = Graph(DictParentsProvider(parent_map))
416	s = g._make_breadth_first_searcher(heads)
417	found_heads = set()
418	while True:
419	try:
420	next_revs = s.next()
421	except StopIteration:
422	break
423	for parents in s._current_parents.itervalues():
424	f_heads = heads.intersection(parents)
425	if f_heads:
426	found_heads.update(f_heads)
427	stop_keys = exclude_keys.intersection(next_revs)
428	if stop_keys:
429	s.stop_searching_any(stop_keys)
430	for parents in s._current_parents.itervalues():
431	f_heads = heads.intersection(parents)
432	if f_heads:
433	found_heads.update(f_heads)
434	return s, found_heads
435
436
437	def _find_possible_heads(parent_map, tip_keys, depth):
438	"""Walk backwards (towards children) through the parent_map.
439
440	This finds 'heads' that will hopefully succinctly describe our search
441	graph.
442	"""
443	child_map = invert_parent_map(parent_map)
444	heads = set()
445	current_roots = tip_keys
446	walked = set(current_roots)
447	while current_roots and depth > 0:
448	depth -= 1
449	children = set()
450	children_update = children.update
451	for p in current_roots:
452	# Is it better to pre- or post- filter the children?
453	try:
454	children_update(child_map[p])
455	except KeyError:
456	heads.add(p)
457	# If we've seen a key before, we don't want to walk it again. Note that
458	# 'children' stays relatively small while 'walked' grows large. So
459	# don't use 'difference_update' here which has to walk all of 'walked'.
460	# '.difference' is smart enough to walk only children and compare it to
461	# walked.
462	children = children.difference(walked)
463	walked.update(children)
464	current_roots = children
465	if current_roots:
466	# We walked to the end of depth, so these are the new tips.
467	heads.update(current_roots)
468	return heads
469
470
471	def limited_search_result_from_parent_map(parent_map, missing_keys, tip_keys,
472	depth):
473	"""Transform a parent_map that is searching 'tip_keys' into an
474	approximate SearchResult.
475
476	We should be able to generate a SearchResult from a given set of starting
477	keys, that covers a subset of parent_map that has the last step pointing at
478	tip_keys. This is to handle the case that really-long-searches shouldn't be
479	started from scratch on each get_parent_map request, but we do want to
480	filter out some of the keys that we've already seen, so we don't get
481	information that we already know about on every request.
482
483	The server will validate the search (that starting at start_keys and
484	stopping at stop_keys yields the exact key_count), so we have to be careful
485	to give an exact recipe.
486
487	Basic algorithm is:
488	1) Invert parent_map to get child_map (todo: have it cached and pass it
489	in)
490	2) Starting at tip_keys, walk towards children for 'depth' steps.
491	3) At that point, we have the 'start' keys.
492	4) Start walking parent_map from 'start' keys, counting how many keys
493	are seen, and generating stop_keys for anything that would walk
494	outside of the parent_map.
495
496	:param parent_map: A map from {child_id: (parent_ids,)}
497	:param missing_keys: parent_ids that we know are unavailable
498	:param tip_keys: the revision_ids that we are searching
499	:param depth: How far back to walk.
500	"""
501	if not parent_map:
502	# No search to send, because we haven't done any searching yet.
503	return [], [], 0
504	heads = _find_possible_heads(parent_map, tip_keys, depth)
505	s, found_heads = _run_search(parent_map, heads, set(tip_keys))
6341.1.5 by Jelmer Vernooij Fix get_state().	506	start_keys, exclude_keys, keys = s.get_state()
6341.1.3 by Jelmer Vernooij Move search result code to vf_search module.	507	if found_heads:
	508	# Anything in found_heads are redundant start_keys, we hit them while
	509	# walking, so we can exclude them from the start list.
	510	start_keys = set(start_keys).difference(found_heads)
6341.1.5 by Jelmer Vernooij Fix get_state().	511	return start_keys, exclude_keys, len(keys)