1919
1919
yield line, revid
1921
1921
def _find_file_ids_from_xml_inventory_lines(self, line_iterator,
1923
1923
"""Helper routine for fileids_altered_by_revision_ids.
1925
1925
This performs the translation of xml lines to revision ids.
1927
1927
:param line_iterator: An iterator of lines, origin_version_id
1928
:param revision_ids: The revision ids to filter for. This should be a
1928
:param revision_keys: The revision ids to filter for. This should be a
1929
1929
set or other type which supports efficient __contains__ lookups, as
1930
the revision id from each parsed line will be looked up in the
1931
revision_ids filter.
1930
the revision key from each parsed line will be looked up in the
1931
revision_keys filter.
1932
1932
:return: a dictionary mapping altered file-ids to an iterable of
1933
1933
revision_ids. Each altered file-ids has the exact revision_ids that
1934
1934
altered it listed explicitly.
1936
1936
seen = set(self._find_text_key_references_from_xml_inventory_lines(
1937
1937
line_iterator).iterkeys())
1938
# Note that revision_ids are revision keys.
1939
parent_maps = self.revisions.get_parent_map(revision_ids)
1941
map(parents.update, parent_maps.itervalues())
1942
parents.difference_update(revision_ids)
1938
parent_keys = self._find_parent_keys_of_revisions(revision_keys)
1943
1939
parent_seen = set(self._find_text_key_references_from_xml_inventory_lines(
1944
self._inventory_xml_lines_for_keys(parents)))
1940
self._inventory_xml_lines_for_keys(parent_keys)))
1945
1941
new_keys = seen - parent_seen
1947
1943
setdefault = result.setdefault
1949
1945
setdefault(key[0], set()).add(key[-1])
1948
def _find_parent_ids_of_revisions(self, revision_ids):
1949
"""Find all parent ids that are mentioned in the revision graph.
1951
:return: set of revisions that are parents of revision_ids which are
1952
not part of revision_ids themselves
1954
parent_map = self.get_parent_map(revision_ids)
1956
map(parent_ids.update, parent_map.itervalues())
1957
parent_ids.difference_update(revision_ids)
1958
parent_ids.discard(_mod_revision.NULL_REVISION)
1961
def _find_parent_keys_of_revisions(self, revision_keys):
1962
"""Similar to _find_parent_ids_of_revisions, but used with keys.
1964
:param revision_keys: An iterable of revision_keys.
1965
:return: The parents of all revision_keys that are not already in
1968
parent_map = self.revisions.get_parent_map(revision_keys)
1970
map(parent_keys.update, parent_map.itervalues())
1971
parent_keys.difference_update(revision_keys)
1972
parent_keys.discard(_mod_revision.NULL_REVISION)
1952
1975
def fileids_altered_by_revision_ids(self, revision_ids, _inv_weave=None):
1953
1976
"""Find the file ids and versions affected by revisions.
2235
2258
return self.get_revision(revision_id).inventory_sha1
2260
def get_rev_id_for_revno(self, revno, known_pair):
2261
"""Return the revision id of a revno, given a later (revno, revid)
2262
pair in the same history.
2264
:return: if found (True, revid). If the available history ran out
2265
before reaching the revno, then this returns
2266
(False, (closest_revno, closest_revid)).
2268
known_revno, known_revid = known_pair
2269
partial_history = [known_revid]
2270
distance_from_known = known_revno - revno
2271
if distance_from_known < 0:
2273
'requested revno (%d) is later than given known revno (%d)'
2274
% (revno, known_revno))
2277
self, partial_history, stop_index=distance_from_known)
2278
except errors.RevisionNotPresent, err:
2279
if err.revision_id == known_revid:
2280
# The start revision (known_revid) wasn't found.
2282
# This is a stacked repository with no fallbacks, or a there's a
2283
# left-hand ghost. Either way, even though the revision named in
2284
# the error isn't in this repo, we know it's the next step in this
2285
# left-hand history.
2286
partial_history.append(err.revision_id)
2287
if len(partial_history) <= distance_from_known:
2288
# Didn't find enough history to get a revid for the revno.
2289
earliest_revno = known_revno - len(partial_history) + 1
2290
return (False, (earliest_revno, partial_history[-1]))
2291
if len(partial_history) - 1 > distance_from_known:
2292
raise AssertionError('_iter_for_revno returned too much history')
2293
return (True, partial_history[-1])
2237
2295
def iter_reverse_revision_history(self, revision_id):
2238
2296
"""Iterate backwards through revision ids in the lefthand history
3413
3476
return self.source.revision_ids_to_search_result(result_set)
3416
class InterPackRepo(InterSameDataRepository):
3417
"""Optimised code paths between Pack based repositories."""
3420
def _get_repo_format_to_test(self):
3421
from bzrlib.repofmt import pack_repo
3422
return pack_repo.RepositoryFormatKnitPack6RichRoot()
3425
def is_compatible(source, target):
3426
"""Be compatible with known Pack formats.
3428
We don't test for the stores being of specific types because that
3429
could lead to confusing results, and there is no need to be
3432
InterPackRepo does not support CHK based repositories.
3434
from bzrlib.repofmt.pack_repo import RepositoryFormatPack
3435
from bzrlib.repofmt.groupcompress_repo import RepositoryFormatCHK1
3437
are_packs = (isinstance(source._format, RepositoryFormatPack) and
3438
isinstance(target._format, RepositoryFormatPack))
3439
not_packs = (isinstance(source._format, RepositoryFormatCHK1) or
3440
isinstance(target._format, RepositoryFormatCHK1))
3441
except AttributeError:
3443
if not_packs or not are_packs:
3445
return InterRepository._same_model(source, target)
3448
def fetch(self, revision_id=None, pb=None, find_ghosts=False,
3450
"""See InterRepository.fetch()."""
3451
if (len(self.source._fallback_repositories) > 0 or
3452
len(self.target._fallback_repositories) > 0):
3453
# The pack layer is not aware of fallback repositories, so when
3454
# fetching from a stacked repository or into a stacked repository
3455
# we use the generic fetch logic which uses the VersionedFiles
3456
# attributes on repository.
3457
from bzrlib.fetch import RepoFetcher
3458
fetcher = RepoFetcher(self.target, self.source, revision_id,
3459
pb, find_ghosts, fetch_spec=fetch_spec)
3460
if fetch_spec is not None:
3461
if len(list(fetch_spec.heads)) != 1:
3462
raise AssertionError(
3463
"InterPackRepo.fetch doesn't support "
3464
"fetching multiple heads yet.")
3465
revision_id = list(fetch_spec.heads)[0]
3467
if revision_id is None:
3469
# everything to do - use pack logic
3470
# to fetch from all packs to one without
3471
# inventory parsing etc, IFF nothing to be copied is in the target.
3473
source_revision_ids = frozenset(self.source.all_revision_ids())
3474
revision_ids = source_revision_ids - \
3475
frozenset(self.target.get_parent_map(source_revision_ids))
3476
revision_keys = [(revid,) for revid in revision_ids]
3477
index = self.target._pack_collection.revision_index.combined_index
3478
present_revision_ids = set(item[1][0] for item in
3479
index.iter_entries(revision_keys))
3480
revision_ids = set(revision_ids) - present_revision_ids
3481
# implementing the TODO will involve:
3482
# - detecting when all of a pack is selected
3483
# - avoiding as much as possible pre-selection, so the
3484
# more-core routines such as create_pack_from_packs can filter in
3485
# a just-in-time fashion. (though having a HEADS list on a
3486
# repository might make this a lot easier, because we could
3487
# sensibly detect 'new revisions' without doing a full index scan.
3488
elif _mod_revision.is_null(revision_id):
3492
revision_ids = self.search_missing_revision_ids(revision_id,
3493
find_ghosts=find_ghosts).get_keys()
3494
if len(revision_ids) == 0:
3496
return self._pack(self.source, self.target, revision_ids)
3498
def _pack(self, source, target, revision_ids):
3499
from bzrlib.repofmt.pack_repo import Packer
3500
packs = source._pack_collection.all_packs()
3501
pack = Packer(self.target._pack_collection, packs, '.fetch',
3502
revision_ids).pack()
3503
if pack is not None:
3504
self.target._pack_collection._save_pack_names()
3505
copied_revs = pack.get_revision_count()
3506
# Trigger an autopack. This may duplicate effort as we've just done
3507
# a pack creation, but for now it is simpler to think about as
3508
# 'upload data, then repack if needed'.
3509
self.target._pack_collection.autopack()
3510
return (copied_revs, [])
3515
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
3516
"""See InterRepository.missing_revision_ids().
3518
:param find_ghosts: Find ghosts throughout the ancestry of
3521
if not find_ghosts and revision_id is not None:
3522
return self._walk_to_common_revisions([revision_id])
3523
elif revision_id is not None:
3524
# Find ghosts: search for revisions pointing from one repository to
3525
# the other, and vice versa, anywhere in the history of revision_id.
3526
graph = self.target.get_graph(other_repository=self.source)
3527
searcher = graph._make_breadth_first_searcher([revision_id])
3531
next_revs, ghosts = searcher.next_with_ghosts()
3532
except StopIteration:
3534
if revision_id in ghosts:
3535
raise errors.NoSuchRevision(self.source, revision_id)
3536
found_ids.update(next_revs)
3537
found_ids.update(ghosts)
3538
found_ids = frozenset(found_ids)
3539
# Double query here: should be able to avoid this by changing the
3540
# graph api further.
3541
result_set = found_ids - frozenset(
3542
self.target.get_parent_map(found_ids))
3544
source_ids = self.source.all_revision_ids()
3545
# source_ids is the worst possible case we may need to pull.
3546
# now we want to filter source_ids against what we actually
3547
# have in target, but don't try to check for existence where we know
3548
# we do not have a revision as that would be pointless.
3549
target_ids = set(self.target.all_revision_ids())
3550
result_set = set(source_ids).difference(target_ids)
3551
return self.source.revision_ids_to_search_result(result_set)
3554
3479
class InterDifferingSerializer(InterRepository):
4412
4336
yield versionedfile.FulltextContentFactory(
4413
4337
key, parent_keys, None, as_bytes)
4340
def _iter_for_revno(repo, partial_history_cache, stop_index=None,
4341
stop_revision=None):
4342
"""Extend the partial history to include a given index
4344
If a stop_index is supplied, stop when that index has been reached.
4345
If a stop_revision is supplied, stop when that revision is
4346
encountered. Otherwise, stop when the beginning of history is
4349
:param stop_index: The index which should be present. When it is
4350
present, history extension will stop.
4351
:param stop_revision: The revision id which should be present. When
4352
it is encountered, history extension will stop.
4354
start_revision = partial_history_cache[-1]
4355
iterator = repo.iter_reverse_revision_history(start_revision)
4357
#skip the last revision in the list
4360
if (stop_index is not None and
4361
len(partial_history_cache) > stop_index):
4363
if partial_history_cache[-1] == stop_revision:
4365
revision_id = iterator.next()
4366
partial_history_cache.append(revision_id)
4367
except StopIteration: