1
# Copyright (C) 2007-2011 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Searching in versioned file repositories."""
25
from bzrlib.graph import (
32
class AbstractSearchResult(object):
33
"""The result of a search, describing a set of keys.
35
Search results are typically used as the 'fetch_spec' parameter when
38
:seealso: AbstractSearch
42
"""Return a recipe that can be used to replay this search.
44
The recipe allows reconstruction of the same results at a later date.
46
:return: A tuple of `(search_kind_str, *details)`. The details vary by
47
kind of search result.
49
raise NotImplementedError(self.get_recipe)
51
def get_network_struct(self):
52
"""Return a tuple that can be transmitted via the HPSS protocol."""
53
raise NotImplementedError(self.get_network_struct)
56
"""Return the keys found in this search.
58
:return: A set of keys.
60
raise NotImplementedError(self.get_keys)
63
"""Return false if the search lists 1 or more revisions."""
64
raise NotImplementedError(self.is_empty)
66
def refine(self, seen, referenced):
67
"""Create a new search by refining this search.
69
:param seen: Revisions that have been satisfied.
70
:param referenced: Revision references observed while satisfying some
72
:return: A search result.
74
raise NotImplementedError(self.refine)
77
class AbstractSearch(object):
78
"""A search that can be executed, producing a search result.
80
:seealso: AbstractSearchResult
84
"""Construct a network-ready search result from this search description.
86
This may take some time to search repositories, etc.
88
:return: A search result (an object that implements
89
AbstractSearchResult's API).
91
raise NotImplementedError(self.execute)
94
class SearchResult(AbstractSearchResult):
95
"""The result of a breadth first search.
97
A SearchResult provides the ability to reconstruct the search or access a
98
set of the keys the search found.
101
def __init__(self, start_keys, exclude_keys, key_count, keys):
102
"""Create a SearchResult.
104
:param start_keys: The keys the search started at.
105
:param exclude_keys: The keys the search excludes.
106
:param key_count: The total number of keys (from start to but not
108
:param keys: The keys the search found. Note that in future we may get
109
a SearchResult from a smart server, in which case the keys list is
110
not necessarily immediately available.
112
self._recipe = ('search', start_keys, exclude_keys, key_count)
113
self._keys = frozenset(keys)
116
kind, start_keys, exclude_keys, key_count = self._recipe
117
if len(start_keys) > 5:
118
start_keys_repr = repr(list(start_keys)[:5])[:-1] + ', ...]'
120
start_keys_repr = repr(start_keys)
121
if len(exclude_keys) > 5:
122
exclude_keys_repr = repr(list(exclude_keys)[:5])[:-1] + ', ...]'
124
exclude_keys_repr = repr(exclude_keys)
125
return '<%s %s:(%s, %s, %d)>' % (self.__class__.__name__,
126
kind, start_keys_repr, exclude_keys_repr, key_count)
128
def get_recipe(self):
129
"""Return a recipe that can be used to replay this search.
131
The recipe allows reconstruction of the same results at a later date
132
without knowing all the found keys. The essential elements are a list
133
of keys to start and to stop at. In order to give reproducible
134
results when ghosts are encountered by a search they are automatically
135
added to the exclude list (or else ghost filling may alter the
138
:return: A tuple ('search', start_keys_set, exclude_keys_set,
139
revision_count). To recreate the results of this search, create a
140
breadth first searcher on the same graph starting at start_keys.
141
Then call next() (or next_with_ghosts()) repeatedly, and on every
142
result, call stop_searching_any on any keys from the exclude_keys
143
set. The revision_count value acts as a trivial cross-check - the
144
found revisions of the new search should have as many elements as
145
revision_count. If it does not, then additional revisions have been
146
ghosted since the search was executed the first time and the second
151
def get_network_struct(self):
152
start_keys = ' '.join(self._recipe[1])
153
stop_keys = ' '.join(self._recipe[2])
154
count = str(self._recipe[3])
155
return (self._recipe[0], '\n'.join((start_keys, stop_keys, count)))
158
"""Return the keys found in this search.
160
:return: A set of keys.
165
"""Return false if the search lists 1 or more revisions."""
166
return self._recipe[3] == 0
168
def refine(self, seen, referenced):
169
"""Create a new search by refining this search.
171
:param seen: Revisions that have been satisfied.
172
:param referenced: Revision references observed while satisfying some
175
start = self._recipe[1]
176
exclude = self._recipe[2]
177
count = self._recipe[3]
178
keys = self.get_keys()
179
# New heads = referenced + old heads - seen things - exclude
180
pending_refs = set(referenced)
181
pending_refs.update(start)
182
pending_refs.difference_update(seen)
183
pending_refs.difference_update(exclude)
184
# New exclude = old exclude + satisfied heads
185
seen_heads = start.intersection(seen)
186
exclude.update(seen_heads)
187
# keys gets seen removed
189
# length is reduced by len(seen)
191
return SearchResult(pending_refs, exclude, count, keys)
194
class PendingAncestryResult(AbstractSearchResult):
195
"""A search result that will reconstruct the ancestry for some graph heads.
197
Unlike SearchResult, this doesn't hold the complete search result in
198
memory, it just holds a description of how to generate it.
201
def __init__(self, heads, repo):
204
:param heads: an iterable of graph heads.
205
:param repo: a repository to use to generate the ancestry for the given
208
self.heads = frozenset(heads)
212
if len(self.heads) > 5:
213
heads_repr = repr(list(self.heads)[:5])[:-1]
214
heads_repr += ', <%d more>...]' % (len(self.heads) - 5,)
216
heads_repr = repr(self.heads)
217
return '<%s heads:%s repo:%r>' % (
218
self.__class__.__name__, heads_repr, self.repo)
220
def get_recipe(self):
221
"""Return a recipe that can be used to replay this search.
223
The recipe allows reconstruction of the same results at a later date.
225
:seealso SearchResult.get_recipe:
227
:return: A tuple ('proxy-search', start_keys_set, set(), -1)
228
To recreate this result, create a PendingAncestryResult with the
231
return ('proxy-search', self.heads, set(), -1)
233
def get_network_struct(self):
234
parts = ['ancestry-of']
235
parts.extend(self.heads)
239
"""See SearchResult.get_keys.
241
Returns all the keys for the ancestry of the heads, excluding
244
return self._get_keys(self.repo.get_graph())
246
def _get_keys(self, graph):
247
NULL_REVISION = revision.NULL_REVISION
248
keys = [key for (key, parents) in graph.iter_ancestry(self.heads)
249
if key != NULL_REVISION and parents is not None]
253
"""Return false if the search lists 1 or more revisions."""
254
if revision.NULL_REVISION in self.heads:
255
return len(self.heads) == 1
257
return len(self.heads) == 0
259
def refine(self, seen, referenced):
260
"""Create a new search by refining this search.
262
:param seen: Revisions that have been satisfied.
263
:param referenced: Revision references observed while satisfying some
266
referenced = self.heads.union(referenced)
267
return PendingAncestryResult(referenced - seen, self.repo)
270
class EmptySearchResult(AbstractSearchResult):
271
"""An empty search result."""
277
class EverythingResult(AbstractSearchResult):
278
"""A search result that simply requests everything in the repository."""
280
def __init__(self, repo):
284
return '%s(%r)' % (self.__class__.__name__, self._repo)
286
def get_recipe(self):
287
raise NotImplementedError(self.get_recipe)
289
def get_network_struct(self):
290
return ('everything',)
293
if 'evil' in debug.debug_flags:
294
from bzrlib import remote
295
if isinstance(self._repo, remote.RemoteRepository):
296
# warn developers (not users) not to do this
297
trace.mutter_callsite(
298
2, "EverythingResult(RemoteRepository).get_keys() is slow.")
299
return self._repo.all_revision_ids()
302
# It's ok for this to wrongly return False: the worst that can happen
303
# is that RemoteStreamSource will initiate a get_stream on an empty
304
# repository. And almost all repositories are non-empty.
307
def refine(self, seen, referenced):
308
heads = set(self._repo.all_revision_ids())
309
heads.difference_update(seen)
310
heads.update(referenced)
311
return PendingAncestryResult(heads, self._repo)
314
class EverythingNotInOther(AbstractSearch):
315
"""Find all revisions in that are in one repo but not the other."""
317
def __init__(self, to_repo, from_repo, find_ghosts=False):
318
self.to_repo = to_repo
319
self.from_repo = from_repo
320
self.find_ghosts = find_ghosts
323
return self.to_repo.search_missing_revision_ids(
324
self.from_repo, find_ghosts=self.find_ghosts)
327
class NotInOtherForRevs(AbstractSearch):
328
"""Find all revisions missing in one repo for a some specific heads."""
330
def __init__(self, to_repo, from_repo, required_ids, if_present_ids=None,
331
find_ghosts=False, limit=None):
334
:param required_ids: revision IDs of heads that must be found, or else
335
the search will fail with NoSuchRevision. All revisions in their
336
ancestry not already in the other repository will be included in
338
:param if_present_ids: revision IDs of heads that may be absent in the
339
source repository. If present, then their ancestry not already
340
found in other will be included in the search result.
341
:param limit: maximum number of revisions to fetch
343
self.to_repo = to_repo
344
self.from_repo = from_repo
345
self.find_ghosts = find_ghosts
346
self.required_ids = required_ids
347
self.if_present_ids = if_present_ids
351
if len(self.required_ids) > 5:
352
reqd_revs_repr = repr(list(self.required_ids)[:5])[:-1] + ', ...]'
354
reqd_revs_repr = repr(self.required_ids)
355
if self.if_present_ids and len(self.if_present_ids) > 5:
356
ifp_revs_repr = repr(list(self.if_present_ids)[:5])[:-1] + ', ...]'
358
ifp_revs_repr = repr(self.if_present_ids)
360
return ("<%s from:%r to:%r find_ghosts:%r req'd:%r if-present:%r"
362
self.__class__.__name__, self.from_repo, self.to_repo,
363
self.find_ghosts, reqd_revs_repr, ifp_revs_repr,
367
return self.to_repo.search_missing_revision_ids(
368
self.from_repo, revision_ids=self.required_ids,
369
if_present_ids=self.if_present_ids, find_ghosts=self.find_ghosts,
373
def search_result_from_parent_map(parent_map, missing_keys):
374
"""Transform a parent_map into SearchResult information."""
376
# parent_map is empty or None, simple search result
378
# start_set is all the keys in the cache
379
start_set = set(parent_map)
380
# result set is all the references to keys in the cache
381
result_parents = set()
382
for parents in parent_map.itervalues():
383
result_parents.update(parents)
384
stop_keys = result_parents.difference(start_set)
385
# We don't need to send ghosts back to the server as a position to
387
stop_keys.difference_update(missing_keys)
388
key_count = len(parent_map)
389
if (revision.NULL_REVISION in result_parents
390
and revision.NULL_REVISION in missing_keys):
391
# If we pruned NULL_REVISION from the stop_keys because it's also
392
# in our cache of "missing" keys we need to increment our key count
393
# by 1, because the reconsitituted SearchResult on the server will
394
# still consider NULL_REVISION to be an included key.
396
included_keys = start_set.intersection(result_parents)
397
start_set.difference_update(included_keys)
398
return start_set, stop_keys, key_count
401
def _run_search(parent_map, heads, exclude_keys):
402
"""Given a parent map, run a _BreadthFirstSearcher on it.
404
Start at heads, walk until you hit exclude_keys. As a further improvement,
405
watch for any heads that you encounter while walking, which means they were
406
not heads of the search.
408
This is mostly used to generate a succinct recipe for how to walk through
411
:return: (_BreadthFirstSearcher, set(heads_encountered_by_walking))
413
g = Graph(DictParentsProvider(parent_map))
414
s = g._make_breadth_first_searcher(heads)
419
except StopIteration:
421
for parents in s._current_parents.itervalues():
422
f_heads = heads.intersection(parents)
424
found_heads.update(f_heads)
425
stop_keys = exclude_keys.intersection(next_revs)
427
s.stop_searching_any(stop_keys)
428
for parents in s._current_parents.itervalues():
429
f_heads = heads.intersection(parents)
431
found_heads.update(f_heads)
432
return s, found_heads
435
def _find_possible_heads(parent_map, tip_keys, depth):
436
"""Walk backwards (towards children) through the parent_map.
438
This finds 'heads' that will hopefully succinctly describe our search
441
child_map = invert_parent_map(parent_map)
443
current_roots = tip_keys
444
walked = set(current_roots)
445
while current_roots and depth > 0:
448
children_update = children.update
449
for p in current_roots:
450
# Is it better to pre- or post- filter the children?
452
children_update(child_map[p])
455
# If we've seen a key before, we don't want to walk it again. Note that
456
# 'children' stays relatively small while 'walked' grows large. So
457
# don't use 'difference_update' here which has to walk all of 'walked'.
458
# '.difference' is smart enough to walk only children and compare it to
460
children = children.difference(walked)
461
walked.update(children)
462
current_roots = children
464
# We walked to the end of depth, so these are the new tips.
465
heads.update(current_roots)
469
def limited_search_result_from_parent_map(parent_map, missing_keys, tip_keys,
471
"""Transform a parent_map that is searching 'tip_keys' into an
472
approximate SearchResult.
474
We should be able to generate a SearchResult from a given set of starting
475
keys, that covers a subset of parent_map that has the last step pointing at
476
tip_keys. This is to handle the case that really-long-searches shouldn't be
477
started from scratch on each get_parent_map request, but we *do* want to
478
filter out some of the keys that we've already seen, so we don't get
479
information that we already know about on every request.
481
The server will validate the search (that starting at start_keys and
482
stopping at stop_keys yields the exact key_count), so we have to be careful
483
to give an exact recipe.
486
1) Invert parent_map to get child_map (todo: have it cached and pass it
488
2) Starting at tip_keys, walk towards children for 'depth' steps.
489
3) At that point, we have the 'start' keys.
490
4) Start walking parent_map from 'start' keys, counting how many keys
491
are seen, and generating stop_keys for anything that would walk
492
outside of the parent_map.
494
:param parent_map: A map from {child_id: (parent_ids,)}
495
:param missing_keys: parent_ids that we know are unavailable
496
:param tip_keys: the revision_ids that we are searching
497
:param depth: How far back to walk.
500
# No search to send, because we haven't done any searching yet.
502
heads = _find_possible_heads(parent_map, tip_keys, depth)
503
s, found_heads = _run_search(parent_map, heads, set(tip_keys))
504
start_keys, exclude_keys, keys = s.get_state()
506
# Anything in found_heads are redundant start_keys, we hit them while
507
# walking, so we can exclude them from the start list.
508
start_keys = set(start_keys).difference(found_heads)
509
return start_keys, exclude_keys, len(keys)