832
841
extent possible considering file system caching etc).
835
def fetch(self, source, revision_id=None, pb=None):
844
def fetch(self, source, revision_id=None, pb=None, find_ghosts=False):
836
845
"""Fetch the content required to construct revision_id from source.
838
847
If revision_id is None all content is copied.
848
:param find_ghosts: Find and copy revisions in the source that are
849
ghosts in the target (and not reachable directly by walking out to
850
the first-present revision in target from revision_id).
840
852
# fast path same-url fetch operations
841
853
if self.has_same_location(source):
1042
1056
self.get_transaction())
1044
def fileids_altered_by_revision_ids(self, revision_ids):
1045
"""Find the file ids and versions affected by revisions.
1047
:param revisions: an iterable containing revision ids.
1058
def _find_file_ids_from_xml_inventory_lines(self, line_iterator,
1060
"""Helper routine for fileids_altered_by_revision_ids.
1062
This performs the translation of xml lines to revision ids.
1064
:param line_iterator: An iterator of lines
1065
:param revision_ids: The revision ids to filter for. This should be a
1066
set or other type which supports efficient __contains__ lookups, as
1067
the revision id from each parsed line will be looked up in the
1068
revision_ids filter.
1048
1069
:return: a dictionary mapping altered file-ids to an iterable of
1049
1070
revision_ids. Each altered file-ids has the exact revision_ids that
1050
1071
altered it listed explicitly.
1052
assert self._serializer.support_altered_by_hack, \
1053
("fileids_altered_by_revision_ids only supported for branches "
1054
"which store inventory as unnested xml, not on %r" % self)
1055
selected_revision_ids = set(revision_ids)
1056
w = self.get_inventory_weave()
1059
1075
# this code needs to read every new line in every inventory for the
1075
1091
search = self._file_ids_altered_regex.search
1076
1092
unescape = _unescape_xml
1077
1093
setdefault = result.setdefault
1094
for line in line_iterator:
1095
match = search(line)
1098
# One call to match.group() returning multiple items is quite a
1099
# bit faster than 2 calls to match.group() each returning 1
1100
file_id, revision_id = match.group('file_id', 'revision_id')
1102
# Inlining the cache lookups helps a lot when you make 170,000
1103
# lines and 350k ids, versus 8.4 unique ids.
1104
# Using a cache helps in 2 ways:
1105
# 1) Avoids unnecessary decoding calls
1106
# 2) Re-uses cached strings, which helps in future set and
1108
# (2) is enough that removing encoding entirely along with
1109
# the cache (so we are using plain strings) results in no
1110
# performance improvement.
1112
revision_id = unescape_revid_cache[revision_id]
1114
unescaped = unescape(revision_id)
1115
unescape_revid_cache[revision_id] = unescaped
1116
revision_id = unescaped
1118
if revision_id in revision_ids:
1120
file_id = unescape_fileid_cache[file_id]
1122
unescaped = unescape(file_id)
1123
unescape_fileid_cache[file_id] = unescaped
1125
setdefault(file_id, set()).add(revision_id)
1128
def fileids_altered_by_revision_ids(self, revision_ids):
1129
"""Find the file ids and versions affected by revisions.
1131
:param revisions: an iterable containing revision ids.
1132
:return: a dictionary mapping altered file-ids to an iterable of
1133
revision_ids. Each altered file-ids has the exact revision_ids that
1134
altered it listed explicitly.
1136
assert self._serializer.support_altered_by_hack, \
1137
("fileids_altered_by_revision_ids only supported for branches "
1138
"which store inventory as unnested xml, not on %r" % self)
1139
selected_revision_ids = set(revision_ids)
1140
w = self.get_inventory_weave()
1078
1141
pb = ui.ui_factory.nested_progress_bar()
1080
for line in w.iter_lines_added_or_present_in_versions(
1081
selected_revision_ids, pb=pb):
1082
match = search(line)
1085
# One call to match.group() returning multiple items is quite a
1086
# bit faster than 2 calls to match.group() each returning 1
1087
file_id, revision_id = match.group('file_id', 'revision_id')
1089
# Inlining the cache lookups helps a lot when you make 170,000
1090
# lines and 350k ids, versus 8.4 unique ids.
1091
# Using a cache helps in 2 ways:
1092
# 1) Avoids unnecessary decoding calls
1093
# 2) Re-uses cached strings, which helps in future set and
1095
# (2) is enough that removing encoding entirely along with
1096
# the cache (so we are using plain strings) results in no
1097
# performance improvement.
1099
revision_id = unescape_revid_cache[revision_id]
1101
unescaped = unescape(revision_id)
1102
unescape_revid_cache[revision_id] = unescaped
1103
revision_id = unescaped
1105
if revision_id in selected_revision_ids:
1107
file_id = unescape_fileid_cache[file_id]
1109
unescaped = unescape(file_id)
1110
unescape_fileid_cache[file_id] = unescaped
1112
setdefault(file_id, set()).add(revision_id)
1143
return self._find_file_ids_from_xml_inventory_lines(
1144
w.iter_lines_added_or_present_in_versions(
1145
selected_revision_ids, pb=pb),
1146
selected_revision_ids)
1117
1150
def iter_files_bytes(self, desired_files):
1118
1151
"""Iterate through file versions.
1932
1981
'RepositoryFormatKnit3',
1984
# Pack-based formats. There is one format for pre-subtrees, and one for
1985
# post-subtrees to allow ease of testing.
1986
# NOTE: These are experimental in 0.92.
1987
format_registry.register_lazy(
1988
'Bazaar pack repository format 1 (needs bzr 0.92)\n',
1989
'bzrlib.repofmt.pack_repo',
1990
'RepositoryFormatKnitPack1',
1992
format_registry.register_lazy(
1993
'Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n',
1994
'bzrlib.repofmt.pack_repo',
1995
'RepositoryFormatKnitPack3',
1936
1999
class InterRepository(InterObject):
1937
2000
"""This class represents operations taking place between two repositories.
2242
2305
return self.source._eliminate_revisions_not_present(required_topo_revisions)
2308
class InterPackRepo(InterSameDataRepository):
2309
"""Optimised code paths between Pack based repositories."""
2312
def _get_repo_format_to_test(self):
2313
from bzrlib.repofmt import pack_repo
2314
return pack_repo.RepositoryFormatKnitPack1()
2317
def is_compatible(source, target):
2318
"""Be compatible with known Pack formats.
2320
We don't test for the stores being of specific types because that
2321
could lead to confusing results, and there is no need to be
2324
from bzrlib.repofmt.pack_repo import RepositoryFormatPack
2326
are_packs = (isinstance(source._format, RepositoryFormatPack) and
2327
isinstance(target._format, RepositoryFormatPack))
2328
except AttributeError:
2330
return are_packs and InterRepository._same_model(source, target)
2333
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2334
"""See InterRepository.fetch()."""
2335
from bzrlib.repofmt.pack_repo import Packer
2336
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2337
self.source, self.source._format, self.target, self.target._format)
2338
self.count_copied = 0
2339
if revision_id is None:
2341
# everything to do - use pack logic
2342
# to fetch from all packs to one without
2343
# inventory parsing etc, IFF nothing to be copied is in the target.
2345
revision_ids = self.source.all_revision_ids()
2346
# implementing the TODO will involve:
2347
# - detecting when all of a pack is selected
2348
# - avoiding as much as possible pre-selection, so the
2349
# more-core routines such as create_pack_from_packs can filter in
2350
# a just-in-time fashion. (though having a HEADS list on a
2351
# repository might make this a lot easier, because we could
2352
# sensibly detect 'new revisions' without doing a full index scan.
2353
elif _mod_revision.is_null(revision_id):
2358
revision_ids = self.missing_revision_ids(revision_id,
2359
find_ghosts=find_ghosts)
2360
except errors.NoSuchRevision:
2361
raise errors.InstallFailed([revision_id])
2362
packs = self.source._pack_collection.all_packs()
2363
pack = Packer(self.target._pack_collection, packs, '.fetch',
2364
revision_ids).pack()
2365
if pack is not None:
2366
self.target._pack_collection._save_pack_names()
2367
# Trigger an autopack. This may duplicate effort as we've just done
2368
# a pack creation, but for now it is simpler to think about as
2369
# 'upload data, then repack if needed'.
2370
self.target._pack_collection.autopack()
2371
return pack.get_revision_count()
2376
def missing_revision_ids(self, revision_id=None, find_ghosts=True):
2377
"""See InterRepository.missing_revision_ids().
2379
:param find_ghosts: Find ghosts throughough the ancestry of
2382
if not find_ghosts and revision_id is not None:
2383
graph = self.source.get_graph()
2384
missing_revs = set()
2385
searcher = graph._make_breadth_first_searcher([revision_id])
2387
self.target._pack_collection.revision_index.combined_index
2388
null_set = frozenset([_mod_revision.NULL_REVISION])
2391
next_revs = set(searcher.next())
2392
except StopIteration:
2394
next_revs.difference_update(null_set)
2395
target_keys = [(key,) for key in next_revs]
2396
have_revs = frozenset(node[1][0] for node in
2397
target_index.iter_entries(target_keys))
2398
missing_revs.update(next_revs - have_revs)
2399
searcher.stop_searching_any(have_revs)
2401
elif revision_id is not None:
2402
source_ids = self.source.get_ancestry(revision_id)
2403
assert source_ids[0] is None
2406
source_ids = self.source.all_revision_ids()
2407
# source_ids is the worst possible case we may need to pull.
2408
# now we want to filter source_ids against what we actually
2409
# have in target, but don't try to check for existence where we know
2410
# we do not have a revision as that would be pointless.
2411
target_ids = set(self.target.all_revision_ids())
2412
return [r for r in source_ids if (r not in target_ids)]
2245
2415
class InterModel1and2(InterRepository):
2297
2467
"""Be compatible with Knit1 source and Knit3 target"""
2298
2468
from bzrlib.repofmt.knitrepo import RepositoryFormatKnit3
2300
from bzrlib.repofmt.knitrepo import RepositoryFormatKnit1, \
2301
RepositoryFormatKnit3
2302
return (isinstance(source._format, (RepositoryFormatKnit1)) and
2303
isinstance(target._format, (RepositoryFormatKnit3)))
2470
from bzrlib.repofmt.knitrepo import (RepositoryFormatKnit1,
2471
RepositoryFormatKnit3)
2472
from bzrlib.repofmt.pack_repo import (RepositoryFormatKnitPack1,
2473
RepositoryFormatKnitPack3)
2474
return (isinstance(source._format,
2475
(RepositoryFormatKnit1, RepositoryFormatKnitPack1)) and
2476
isinstance(target._format,
2477
(RepositoryFormatKnit3, RepositoryFormatKnitPack3))
2304
2479
except AttributeError:
2307
2482
@needs_write_lock
2308
def fetch(self, revision_id=None, pb=None):
2483
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2309
2484
"""See InterRepository.fetch()."""
2310
2485
from bzrlib.fetch import Knit1to2Fetcher
2311
2486
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2531
2710
self.revision_parents[revision_id] = parents
2534
def heads(self, revision_ids):
2535
revision_ids = tuple(revision_ids)
2537
return self.rev_heads[revision_ids]
2539
heads = self.repo_graph.heads(revision_ids)
2540
self.rev_heads[revision_ids] = heads
2713
def used_file_versions(self):
2714
"""Return a set of (revision_id, file_id) pairs for each file version
2715
referenced by any inventory cached by this _RevisionTextVersionCache.
2717
If the entire repository has been cached, this can be used to find all
2718
file versions that are actually referenced by inventories. Thus any
2719
other file version is completely unused and can be removed safely.
2722
for inventory_summary in self.revision_versions.itervalues():
2723
result.update(inventory_summary.items())
2543
2727
class VersionedFileChecker(object):
2567
2754
for parent in parents_from_inventories:
2568
2755
if parent in heads and parent not in new_parents:
2569
2756
new_parents.append(parent)
2757
return tuple(new_parents)
2572
2759
def check_file_version_parents(self, weave, file_id):
2760
"""Check the parents stored in a versioned file are correct.
2762
It also detects file versions that are not referenced by their
2763
corresponding revision's inventory.
2765
:returns: A tuple of (wrong_parents, dangling_file_versions).
2766
wrong_parents is a dict mapping {revision_id: (stored_parents,
2767
correct_parents)} for each revision_id where the stored parents
2768
are not correct. dangling_file_versions is a set of (file_id,
2769
revision_id) tuples for versions that are present in this versioned
2770
file, but not used by the corresponding inventory.
2773
dangling_file_versions = set()
2574
2774
for num, revision_id in enumerate(self.planned_revisions):
2575
2775
correct_parents = self.calculate_file_version_parents(
2576
2776
revision_id, file_id)
2579
2779
text_revision = self.revision_versions.get_text_version(
2580
2780
file_id, revision_id)
2581
knit_parents = weave.get_parents(text_revision)
2782
knit_parents = tuple(weave.get_parents(revision_id))
2783
except errors.RevisionNotPresent:
2785
if text_revision != revision_id:
2786
# This file version is not referenced by its corresponding
2788
dangling_file_versions.add((file_id, revision_id))
2582
2789
if correct_parents != knit_parents:
2583
result[revision_id] = (knit_parents, correct_parents)
2790
wrong_parents[revision_id] = (knit_parents, correct_parents)
2791
return wrong_parents, dangling_file_versions