1
# Copyright (C) 2007-2011 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Searching in versioned file repositories."""
19
from __future__ import absolute_import
27
from bzrlib.graph import (
34
class AbstractSearchResult(object):
35
"""The result of a search, describing a set of keys.
37
Search results are typically used as the 'fetch_spec' parameter when
40
:seealso: AbstractSearch
44
"""Return a recipe that can be used to replay this search.
46
The recipe allows reconstruction of the same results at a later date.
48
:return: A tuple of `(search_kind_str, *details)`. The details vary by
49
kind of search result.
51
raise NotImplementedError(self.get_recipe)
53
def get_network_struct(self):
54
"""Return a tuple that can be transmitted via the HPSS protocol."""
55
raise NotImplementedError(self.get_network_struct)
58
"""Return the keys found in this search.
60
:return: A set of keys.
62
raise NotImplementedError(self.get_keys)
65
"""Return false if the search lists 1 or more revisions."""
66
raise NotImplementedError(self.is_empty)
68
def refine(self, seen, referenced):
69
"""Create a new search by refining this search.
71
:param seen: Revisions that have been satisfied.
72
:param referenced: Revision references observed while satisfying some
74
:return: A search result.
76
raise NotImplementedError(self.refine)
79
class AbstractSearch(object):
80
"""A search that can be executed, producing a search result.
82
:seealso: AbstractSearchResult
86
"""Construct a network-ready search result from this search description.
88
This may take some time to search repositories, etc.
90
:return: A search result (an object that implements
91
AbstractSearchResult's API).
93
raise NotImplementedError(self.execute)
96
class SearchResult(AbstractSearchResult):
97
"""The result of a breadth first search.
99
A SearchResult provides the ability to reconstruct the search or access a
100
set of the keys the search found.
103
def __init__(self, start_keys, exclude_keys, key_count, keys):
104
"""Create a SearchResult.
106
:param start_keys: The keys the search started at.
107
:param exclude_keys: The keys the search excludes.
108
:param key_count: The total number of keys (from start to but not
110
:param keys: The keys the search found. Note that in future we may get
111
a SearchResult from a smart server, in which case the keys list is
112
not necessarily immediately available.
114
self._recipe = ('search', start_keys, exclude_keys, key_count)
115
self._keys = frozenset(keys)
118
kind, start_keys, exclude_keys, key_count = self._recipe
119
if len(start_keys) > 5:
120
start_keys_repr = repr(list(start_keys)[:5])[:-1] + ', ...]'
122
start_keys_repr = repr(start_keys)
123
if len(exclude_keys) > 5:
124
exclude_keys_repr = repr(list(exclude_keys)[:5])[:-1] + ', ...]'
126
exclude_keys_repr = repr(exclude_keys)
127
return '<%s %s:(%s, %s, %d)>' % (self.__class__.__name__,
128
kind, start_keys_repr, exclude_keys_repr, key_count)
130
def get_recipe(self):
131
"""Return a recipe that can be used to replay this search.
133
The recipe allows reconstruction of the same results at a later date
134
without knowing all the found keys. The essential elements are a list
135
of keys to start and to stop at. In order to give reproducible
136
results when ghosts are encountered by a search they are automatically
137
added to the exclude list (or else ghost filling may alter the
140
:return: A tuple ('search', start_keys_set, exclude_keys_set,
141
revision_count). To recreate the results of this search, create a
142
breadth first searcher on the same graph starting at start_keys.
143
Then call next() (or next_with_ghosts()) repeatedly, and on every
144
result, call stop_searching_any on any keys from the exclude_keys
145
set. The revision_count value acts as a trivial cross-check - the
146
found revisions of the new search should have as many elements as
147
revision_count. If it does not, then additional revisions have been
148
ghosted since the search was executed the first time and the second
153
def get_network_struct(self):
154
start_keys = ' '.join(self._recipe[1])
155
stop_keys = ' '.join(self._recipe[2])
156
count = str(self._recipe[3])
157
return (self._recipe[0], '\n'.join((start_keys, stop_keys, count)))
160
"""Return the keys found in this search.
162
:return: A set of keys.
167
"""Return false if the search lists 1 or more revisions."""
168
return self._recipe[3] == 0
170
def refine(self, seen, referenced):
171
"""Create a new search by refining this search.
173
:param seen: Revisions that have been satisfied.
174
:param referenced: Revision references observed while satisfying some
177
start = self._recipe[1]
178
exclude = self._recipe[2]
179
count = self._recipe[3]
180
keys = self.get_keys()
181
# New heads = referenced + old heads - seen things - exclude
182
pending_refs = set(referenced)
183
pending_refs.update(start)
184
pending_refs.difference_update(seen)
185
pending_refs.difference_update(exclude)
186
# New exclude = old exclude + satisfied heads
187
seen_heads = start.intersection(seen)
188
exclude.update(seen_heads)
189
# keys gets seen removed
191
# length is reduced by len(seen)
193
return SearchResult(pending_refs, exclude, count, keys)
196
class PendingAncestryResult(AbstractSearchResult):
197
"""A search result that will reconstruct the ancestry for some graph heads.
199
Unlike SearchResult, this doesn't hold the complete search result in
200
memory, it just holds a description of how to generate it.
203
def __init__(self, heads, repo):
206
:param heads: an iterable of graph heads.
207
:param repo: a repository to use to generate the ancestry for the given
210
self.heads = frozenset(heads)
214
if len(self.heads) > 5:
215
heads_repr = repr(list(self.heads)[:5])[:-1]
216
heads_repr += ', <%d more>...]' % (len(self.heads) - 5,)
218
heads_repr = repr(self.heads)
219
return '<%s heads:%s repo:%r>' % (
220
self.__class__.__name__, heads_repr, self.repo)
222
def get_recipe(self):
223
"""Return a recipe that can be used to replay this search.
225
The recipe allows reconstruction of the same results at a later date.
227
:seealso SearchResult.get_recipe:
229
:return: A tuple ('proxy-search', start_keys_set, set(), -1)
230
To recreate this result, create a PendingAncestryResult with the
233
return ('proxy-search', self.heads, set(), -1)
235
def get_network_struct(self):
236
parts = ['ancestry-of']
237
parts.extend(self.heads)
241
"""See SearchResult.get_keys.
243
Returns all the keys for the ancestry of the heads, excluding
246
return self._get_keys(self.repo.get_graph())
248
def _get_keys(self, graph):
249
NULL_REVISION = revision.NULL_REVISION
250
keys = [key for (key, parents) in graph.iter_ancestry(self.heads)
251
if key != NULL_REVISION and parents is not None]
255
"""Return false if the search lists 1 or more revisions."""
256
if revision.NULL_REVISION in self.heads:
257
return len(self.heads) == 1
259
return len(self.heads) == 0
261
def refine(self, seen, referenced):
262
"""Create a new search by refining this search.
264
:param seen: Revisions that have been satisfied.
265
:param referenced: Revision references observed while satisfying some
268
referenced = self.heads.union(referenced)
269
return PendingAncestryResult(referenced - seen, self.repo)
272
class EmptySearchResult(AbstractSearchResult):
273
"""An empty search result."""
279
class EverythingResult(AbstractSearchResult):
280
"""A search result that simply requests everything in the repository."""
282
def __init__(self, repo):
286
return '%s(%r)' % (self.__class__.__name__, self._repo)
288
def get_recipe(self):
289
raise NotImplementedError(self.get_recipe)
291
def get_network_struct(self):
292
return ('everything',)
295
if 'evil' in debug.debug_flags:
296
from bzrlib import remote
297
if isinstance(self._repo, remote.RemoteRepository):
298
# warn developers (not users) not to do this
299
trace.mutter_callsite(
300
2, "EverythingResult(RemoteRepository).get_keys() is slow.")
301
return self._repo.all_revision_ids()
304
# It's ok for this to wrongly return False: the worst that can happen
305
# is that RemoteStreamSource will initiate a get_stream on an empty
306
# repository. And almost all repositories are non-empty.
309
def refine(self, seen, referenced):
310
heads = set(self._repo.all_revision_ids())
311
heads.difference_update(seen)
312
heads.update(referenced)
313
return PendingAncestryResult(heads, self._repo)
316
class EverythingNotInOther(AbstractSearch):
317
"""Find all revisions in that are in one repo but not the other."""
319
def __init__(self, to_repo, from_repo, find_ghosts=False):
320
self.to_repo = to_repo
321
self.from_repo = from_repo
322
self.find_ghosts = find_ghosts
325
return self.to_repo.search_missing_revision_ids(
326
self.from_repo, find_ghosts=self.find_ghosts)
329
class NotInOtherForRevs(AbstractSearch):
330
"""Find all revisions missing in one repo for a some specific heads."""
332
def __init__(self, to_repo, from_repo, required_ids, if_present_ids=None,
333
find_ghosts=False, limit=None):
336
:param required_ids: revision IDs of heads that must be found, or else
337
the search will fail with NoSuchRevision. All revisions in their
338
ancestry not already in the other repository will be included in
340
:param if_present_ids: revision IDs of heads that may be absent in the
341
source repository. If present, then their ancestry not already
342
found in other will be included in the search result.
343
:param limit: maximum number of revisions to fetch
345
self.to_repo = to_repo
346
self.from_repo = from_repo
347
self.find_ghosts = find_ghosts
348
self.required_ids = required_ids
349
self.if_present_ids = if_present_ids
353
if len(self.required_ids) > 5:
354
reqd_revs_repr = repr(list(self.required_ids)[:5])[:-1] + ', ...]'
356
reqd_revs_repr = repr(self.required_ids)
357
if self.if_present_ids and len(self.if_present_ids) > 5:
358
ifp_revs_repr = repr(list(self.if_present_ids)[:5])[:-1] + ', ...]'
360
ifp_revs_repr = repr(self.if_present_ids)
362
return ("<%s from:%r to:%r find_ghosts:%r req'd:%r if-present:%r"
364
self.__class__.__name__, self.from_repo, self.to_repo,
365
self.find_ghosts, reqd_revs_repr, ifp_revs_repr,
369
return self.to_repo.search_missing_revision_ids(
370
self.from_repo, revision_ids=self.required_ids,
371
if_present_ids=self.if_present_ids, find_ghosts=self.find_ghosts,
375
def search_result_from_parent_map(parent_map, missing_keys):
376
"""Transform a parent_map into SearchResult information."""
378
# parent_map is empty or None, simple search result
380
# start_set is all the keys in the cache
381
start_set = set(parent_map)
382
# result set is all the references to keys in the cache
383
result_parents = set()
384
for parents in parent_map.itervalues():
385
result_parents.update(parents)
386
stop_keys = result_parents.difference(start_set)
387
# We don't need to send ghosts back to the server as a position to
389
stop_keys.difference_update(missing_keys)
390
key_count = len(parent_map)
391
if (revision.NULL_REVISION in result_parents
392
and revision.NULL_REVISION in missing_keys):
393
# If we pruned NULL_REVISION from the stop_keys because it's also
394
# in our cache of "missing" keys we need to increment our key count
395
# by 1, because the reconsitituted SearchResult on the server will
396
# still consider NULL_REVISION to be an included key.
398
included_keys = start_set.intersection(result_parents)
399
start_set.difference_update(included_keys)
400
return start_set, stop_keys, key_count
403
def _run_search(parent_map, heads, exclude_keys):
404
"""Given a parent map, run a _BreadthFirstSearcher on it.
406
Start at heads, walk until you hit exclude_keys. As a further improvement,
407
watch for any heads that you encounter while walking, which means they were
408
not heads of the search.
410
This is mostly used to generate a succinct recipe for how to walk through
413
:return: (_BreadthFirstSearcher, set(heads_encountered_by_walking))
415
g = Graph(DictParentsProvider(parent_map))
416
s = g._make_breadth_first_searcher(heads)
421
except StopIteration:
423
for parents in s._current_parents.itervalues():
424
f_heads = heads.intersection(parents)
426
found_heads.update(f_heads)
427
stop_keys = exclude_keys.intersection(next_revs)
429
s.stop_searching_any(stop_keys)
430
for parents in s._current_parents.itervalues():
431
f_heads = heads.intersection(parents)
433
found_heads.update(f_heads)
434
return s, found_heads
437
def _find_possible_heads(parent_map, tip_keys, depth):
438
"""Walk backwards (towards children) through the parent_map.
440
This finds 'heads' that will hopefully succinctly describe our search
443
child_map = invert_parent_map(parent_map)
445
current_roots = tip_keys
446
walked = set(current_roots)
447
while current_roots and depth > 0:
450
children_update = children.update
451
for p in current_roots:
452
# Is it better to pre- or post- filter the children?
454
children_update(child_map[p])
457
# If we've seen a key before, we don't want to walk it again. Note that
458
# 'children' stays relatively small while 'walked' grows large. So
459
# don't use 'difference_update' here which has to walk all of 'walked'.
460
# '.difference' is smart enough to walk only children and compare it to
462
children = children.difference(walked)
463
walked.update(children)
464
current_roots = children
466
# We walked to the end of depth, so these are the new tips.
467
heads.update(current_roots)
471
def limited_search_result_from_parent_map(parent_map, missing_keys, tip_keys,
473
"""Transform a parent_map that is searching 'tip_keys' into an
474
approximate SearchResult.
476
We should be able to generate a SearchResult from a given set of starting
477
keys, that covers a subset of parent_map that has the last step pointing at
478
tip_keys. This is to handle the case that really-long-searches shouldn't be
479
started from scratch on each get_parent_map request, but we *do* want to
480
filter out some of the keys that we've already seen, so we don't get
481
information that we already know about on every request.
483
The server will validate the search (that starting at start_keys and
484
stopping at stop_keys yields the exact key_count), so we have to be careful
485
to give an exact recipe.
488
1) Invert parent_map to get child_map (todo: have it cached and pass it
490
2) Starting at tip_keys, walk towards children for 'depth' steps.
491
3) At that point, we have the 'start' keys.
492
4) Start walking parent_map from 'start' keys, counting how many keys
493
are seen, and generating stop_keys for anything that would walk
494
outside of the parent_map.
496
:param parent_map: A map from {child_id: (parent_ids,)}
497
:param missing_keys: parent_ids that we know are unavailable
498
:param tip_keys: the revision_ids that we are searching
499
:param depth: How far back to walk.
502
# No search to send, because we haven't done any searching yet.
504
heads = _find_possible_heads(parent_map, tip_keys, depth)
505
s, found_heads = _run_search(parent_map, heads, set(tip_keys))
506
start_keys, exclude_keys, keys = s.get_state()
508
# Anything in found_heads are redundant start_keys, we hit them while
509
# walking, so we can exclude them from the start list.
510
start_keys = set(start_keys).difference(found_heads)
511
return start_keys, exclude_keys, len(keys)