160
169
self._validate_unicode_text(value,
161
170
'revision property (%s)' % (key,))
172
def _ensure_fallback_inventories(self):
173
"""Ensure that appropriate inventories are available.
175
This only applies to repositories that are stacked, and is about
176
enusring the stacking invariants. Namely, that for any revision that is
177
present, we either have all of the file content, or we have the parent
178
inventory and the delta file content.
180
if not self.repository._fallback_repositories:
182
if not self.repository._format.supports_chks:
183
raise errors.BzrError("Cannot commit directly to a stacked branch"
184
" in pre-2a formats. See "
185
"https://bugs.launchpad.net/bzr/+bug/375013 for details.")
186
# This is a stacked repo, we need to make sure we have the parent
187
# inventories for the parents.
188
parent_keys = [(p,) for p in self.parents]
189
parent_map = self.repository.inventories._index.get_parent_map(parent_keys)
190
missing_parent_keys = set([pk for pk in parent_keys
191
if pk not in parent_map])
192
fallback_repos = list(reversed(self.repository._fallback_repositories))
193
missing_keys = [('inventories', pk[0])
194
for pk in missing_parent_keys]
196
while missing_keys and fallback_repos:
197
fallback_repo = fallback_repos.pop()
198
source = fallback_repo._get_source(self.repository._format)
199
sink = self.repository._get_sink()
200
stream = source.get_stream_for_missing_keys(missing_keys)
201
missing_keys = sink.insert_stream_without_locking(stream,
202
self.repository._format)
204
raise errors.BzrError('Unable to fill in parent inventories for a'
163
207
def commit(self, message):
164
208
"""Make the actual commit.
1548
1586
@needs_read_lock
1549
def search_missing_revision_ids(self, other, revision_id=None, find_ghosts=True):
1587
def search_missing_revision_ids(self, other,
1588
revision_id=symbol_versioning.DEPRECATED_PARAMETER,
1589
find_ghosts=True, revision_ids=None, if_present_ids=None):
1550
1590
"""Return the revision ids that other has that this does not.
1552
1592
These are returned in topological order.
1554
1594
revision_id: only return revision ids included by revision_id.
1596
if symbol_versioning.deprecated_passed(revision_id):
1597
symbol_versioning.warn(
1598
'search_missing_revision_ids(revision_id=...) was '
1599
'deprecated in 2.4. Use revision_ids=[...] instead.',
1600
DeprecationWarning, stacklevel=3)
1601
if revision_ids is not None:
1602
raise AssertionError(
1603
'revision_ids is mutually exclusive with revision_id')
1604
if revision_id is not None:
1605
revision_ids = [revision_id]
1556
1606
return InterRepository.get(other, self).search_missing_revision_ids(
1557
revision_id, find_ghosts)
1607
find_ghosts=find_ghosts, revision_ids=revision_ids,
1608
if_present_ids=if_present_ids)
1560
1611
def open(base):
2003
2051
w = self.inventories
2004
2052
pb = ui.ui_factory.nested_progress_bar()
2006
return self._find_text_key_references_from_xml_inventory_lines(
2054
return self._serializer._find_text_key_references(
2007
2055
w.iter_lines_added_or_present_in_keys(revision_keys, pb=pb))
2011
def _find_text_key_references_from_xml_inventory_lines(self,
2013
"""Core routine for extracting references to texts from inventories.
2015
This performs the translation of xml lines to revision ids.
2017
:param line_iterator: An iterator of lines, origin_version_id
2018
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
2019
to whether they were referred to by the inventory of the
2020
revision_id that they contain. Note that if that revision_id was
2021
not part of the line_iterator's output then False will be given -
2022
even though it may actually refer to that key.
2024
if not self._serializer.support_altered_by_hack:
2025
raise AssertionError(
2026
"_find_text_key_references_from_xml_inventory_lines only "
2027
"supported for branches which store inventory as unnested xml"
2028
", not on %r" % self)
2031
# this code needs to read every new line in every inventory for the
2032
# inventories [revision_ids]. Seeing a line twice is ok. Seeing a line
2033
# not present in one of those inventories is unnecessary but not
2034
# harmful because we are filtering by the revision id marker in the
2035
# inventory lines : we only select file ids altered in one of those
2036
# revisions. We don't need to see all lines in the inventory because
2037
# only those added in an inventory in rev X can contain a revision=X
2039
unescape_revid_cache = {}
2040
unescape_fileid_cache = {}
2042
# jam 20061218 In a big fetch, this handles hundreds of thousands
2043
# of lines, so it has had a lot of inlining and optimizing done.
2044
# Sorry that it is a little bit messy.
2045
# Move several functions to be local variables, since this is a long
2047
search = self._file_ids_altered_regex.search
2048
unescape = _unescape_xml
2049
setdefault = result.setdefault
2050
for line, line_key in line_iterator:
2051
match = search(line)
2054
# One call to match.group() returning multiple items is quite a
2055
# bit faster than 2 calls to match.group() each returning 1
2056
file_id, revision_id = match.group('file_id', 'revision_id')
2058
# Inlining the cache lookups helps a lot when you make 170,000
2059
# lines and 350k ids, versus 8.4 unique ids.
2060
# Using a cache helps in 2 ways:
2061
# 1) Avoids unnecessary decoding calls
2062
# 2) Re-uses cached strings, which helps in future set and
2064
# (2) is enough that removing encoding entirely along with
2065
# the cache (so we are using plain strings) results in no
2066
# performance improvement.
2068
revision_id = unescape_revid_cache[revision_id]
2070
unescaped = unescape(revision_id)
2071
unescape_revid_cache[revision_id] = unescaped
2072
revision_id = unescaped
2074
# Note that unconditionally unescaping means that we deserialise
2075
# every fileid, which for general 'pull' is not great, but we don't
2076
# really want to have some many fulltexts that this matters anyway.
2079
file_id = unescape_fileid_cache[file_id]
2081
unescaped = unescape(file_id)
2082
unescape_fileid_cache[file_id] = unescaped
2085
key = (file_id, revision_id)
2086
setdefault(key, False)
2087
if revision_id == line_key[-1]:
2091
2059
def _inventory_xml_lines_for_keys(self, keys):
2092
2060
"""Get a line iterator of the sort needed for findind references.
3261
3213
return self.get_format_string()
3264
# Pre-0.8 formats that don't have a disk format string (because they are
3265
# versioned by the matching control directory). We use the control directories
3266
# disk format string as a key for the network_name because they meet the
3267
# constraints (simple string, unique, immutable).
3268
network_format_registry.register_lazy(
3269
"Bazaar-NG branch, format 5\n",
3270
'bzrlib.repofmt.weaverepo',
3271
'RepositoryFormat5',
3273
network_format_registry.register_lazy(
3274
"Bazaar-NG branch, format 6\n",
3275
'bzrlib.repofmt.weaverepo',
3276
'RepositoryFormat6',
3279
3216
# formats which have no format string are not discoverable or independently
3280
3217
# creatable on disk, so are not registered in format_registry. They're
3281
# all in bzrlib.repofmt.weaverepo now. When an instance of one of these is
3218
# all in bzrlib.repofmt.knitreponow. When an instance of one of these is
3282
3219
# needed, it's constructed directly by the BzrDir. Non-native formats where
3283
3220
# the repository is not separately opened are similar.
3285
3222
format_registry.register_lazy(
3286
'Bazaar-NG Repository format 7',
3287
'bzrlib.repofmt.weaverepo',
3291
format_registry.register_lazy(
3292
3223
'Bazaar-NG Knit Repository Format 1',
3293
3224
'bzrlib.repofmt.knitrepo',
3294
3225
'RepositoryFormatKnit1',
3349
3280
'bzrlib.repofmt.pack_repo',
3350
3281
'RepositoryFormatKnitPack6RichRoot',
3283
format_registry.register_lazy(
3284
'Bazaar repository format 2a (needs bzr 1.16 or later)\n',
3285
'bzrlib.repofmt.groupcompress_repo',
3286
'RepositoryFormat2a',
3353
3289
# Development formats.
3354
# Obsolete but kept pending a CHK based subtree format.
3290
# Check their docstrings to see if/when they are obsolete.
3355
3291
format_registry.register_lazy(
3356
3292
("Bazaar development format 2 with subtree support "
3357
3293
"(needs bzr.dev from before 1.8)\n"),
3358
3294
'bzrlib.repofmt.pack_repo',
3359
3295
'RepositoryFormatPackDevelopment2Subtree',
3362
# 1.14->1.16 go below here
3363
format_registry.register_lazy(
3364
'Bazaar development format - group compression and chk inventory'
3365
' (needs bzr.dev from 1.14)\n',
3366
'bzrlib.repofmt.groupcompress_repo',
3367
'RepositoryFormatCHK1',
3370
format_registry.register_lazy(
3371
'Bazaar development format - chk repository with bencode revision '
3372
'serialization (needs bzr.dev from 1.16)\n',
3373
'bzrlib.repofmt.groupcompress_repo',
3374
'RepositoryFormatCHK2',
3376
format_registry.register_lazy(
3377
'Bazaar repository format 2a (needs bzr 1.16 or later)\n',
3378
'bzrlib.repofmt.groupcompress_repo',
3379
'RepositoryFormat2a',
3297
format_registry.register_lazy(
3298
'Bazaar development format 8\n',
3299
'bzrlib.repofmt.groupcompress_repo',
3300
'RepositoryFormat2aSubtree',
3490
3414
return searcher.get_result()
3492
3416
@needs_read_lock
3493
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
3417
def search_missing_revision_ids(self,
3418
revision_id=symbol_versioning.DEPRECATED_PARAMETER,
3419
find_ghosts=True, revision_ids=None, if_present_ids=None):
3494
3420
"""Return the revision ids that source has that target does not.
3496
3422
:param revision_id: only return revision ids included by this
3424
:param revision_ids: return revision ids included by these
3425
revision_ids. NoSuchRevision will be raised if any of these
3426
revisions are not present.
3427
:param if_present_ids: like revision_ids, but will not cause
3428
NoSuchRevision if any of these are absent, instead they will simply
3429
not be in the result. This is useful for e.g. finding revisions
3430
to fetch for tags, which may reference absent revisions.
3498
3431
:param find_ghosts: If True find missing revisions in deep history
3499
3432
rather than just finding the surface difference.
3500
3433
:return: A bzrlib.graph.SearchResult.
3435
if symbol_versioning.deprecated_passed(revision_id):
3436
symbol_versioning.warn(
3437
'search_missing_revision_ids(revision_id=...) was '
3438
'deprecated in 2.4. Use revision_ids=[...] instead.',
3439
DeprecationWarning, stacklevel=2)
3440
if revision_ids is not None:
3441
raise AssertionError(
3442
'revision_ids is mutually exclusive with revision_id')
3443
if revision_id is not None:
3444
revision_ids = [revision_id]
3502
3446
# stop searching at found target revisions.
3503
if not find_ghosts and revision_id is not None:
3504
return self._walk_to_common_revisions([revision_id])
3447
if not find_ghosts and (revision_ids is not None or if_present_ids is
3449
return self._walk_to_common_revisions(revision_ids,
3450
if_present_ids=if_present_ids)
3505
3451
# generic, possibly worst case, slow code path.
3506
3452
target_ids = set(self.target.all_revision_ids())
3507
if revision_id is not None:
3508
source_ids = self.source.get_ancestry(revision_id)
3509
if source_ids[0] is not None:
3510
raise AssertionError()
3513
source_ids = self.source.all_revision_ids()
3453
source_ids = self._present_source_revisions_for(
3454
revision_ids, if_present_ids)
3514
3455
result_set = set(source_ids).difference(target_ids)
3515
3456
return self.source.revision_ids_to_search_result(result_set)
3458
def _present_source_revisions_for(self, revision_ids, if_present_ids=None):
3459
"""Returns set of all revisions in ancestry of revision_ids present in
3462
:param revision_ids: if None, all revisions in source are returned.
3463
:param if_present_ids: like revision_ids, but if any/all of these are
3464
absent no error is raised.
3466
if revision_ids is not None or if_present_ids is not None:
3467
# First, ensure all specified revisions exist. Callers expect
3468
# NoSuchRevision when they pass absent revision_ids here.
3469
if revision_ids is None:
3470
revision_ids = set()
3471
if if_present_ids is None:
3472
if_present_ids = set()
3473
revision_ids = set(revision_ids)
3474
if_present_ids = set(if_present_ids)
3475
all_wanted_ids = revision_ids.union(if_present_ids)
3476
graph = self.source.get_graph()
3477
present_revs = set(graph.get_parent_map(all_wanted_ids))
3478
missing = revision_ids.difference(present_revs)
3480
raise errors.NoSuchRevision(self.source, missing.pop())
3481
found_ids = all_wanted_ids.intersection(present_revs)
3482
source_ids = [rev_id for (rev_id, parents) in
3483
graph.iter_ancestry(found_ids)
3484
if rev_id != _mod_revision.NULL_REVISION
3485
and parents is not None]
3487
source_ids = self.source.all_revision_ids()
3488
return set(source_ids)
3518
3491
def _same_model(source, target):
3519
3492
"""True if source and target have the same data representation.
3560
3533
return InterRepository._same_model(source, target)
3563
class InterWeaveRepo(InterSameDataRepository):
3564
"""Optimised code paths between Weave based repositories.
3566
This should be in bzrlib/repofmt/weaverepo.py but we have not yet
3567
implemented lazy inter-object optimisation.
3571
def _get_repo_format_to_test(self):
3572
from bzrlib.repofmt import weaverepo
3573
return weaverepo.RepositoryFormat7()
3576
def is_compatible(source, target):
3577
"""Be compatible with known Weave formats.
3579
We don't test for the stores being of specific types because that
3580
could lead to confusing results, and there is no need to be
3583
from bzrlib.repofmt.weaverepo import (
3589
return (isinstance(source._format, (RepositoryFormat5,
3591
RepositoryFormat7)) and
3592
isinstance(target._format, (RepositoryFormat5,
3594
RepositoryFormat7)))
3595
except AttributeError:
3599
def copy_content(self, revision_id=None):
3600
"""See InterRepository.copy_content()."""
3601
# weave specific optimised path:
3603
self.target.set_make_working_trees(self.source.make_working_trees())
3604
except (errors.RepositoryUpgradeRequired, NotImplemented):
3606
# FIXME do not peek!
3607
if self.source._transport.listable():
3608
pb = ui.ui_factory.nested_progress_bar()
3610
self.target.texts.insert_record_stream(
3611
self.source.texts.get_record_stream(
3612
self.source.texts.keys(), 'topological', False))
3613
pb.update('Copying inventory', 0, 1)
3614
self.target.inventories.insert_record_stream(
3615
self.source.inventories.get_record_stream(
3616
self.source.inventories.keys(), 'topological', False))
3617
self.target.signatures.insert_record_stream(
3618
self.source.signatures.get_record_stream(
3619
self.source.signatures.keys(),
3621
self.target.revisions.insert_record_stream(
3622
self.source.revisions.get_record_stream(
3623
self.source.revisions.keys(),
3624
'topological', True))
3628
self.target.fetch(self.source, revision_id=revision_id)
3631
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
3632
"""See InterRepository.missing_revision_ids()."""
3633
# we want all revisions to satisfy revision_id in source.
3634
# but we don't want to stat every file here and there.
3635
# we want then, all revisions other needs to satisfy revision_id
3636
# checked, but not those that we have locally.
3637
# so the first thing is to get a subset of the revisions to
3638
# satisfy revision_id in source, and then eliminate those that
3639
# we do already have.
3640
# this is slow on high latency connection to self, but as this
3641
# disk format scales terribly for push anyway due to rewriting
3642
# inventory.weave, this is considered acceptable.
3644
if revision_id is not None:
3645
source_ids = self.source.get_ancestry(revision_id)
3646
if source_ids[0] is not None:
3647
raise AssertionError()
3650
source_ids = self.source._all_possible_ids()
3651
source_ids_set = set(source_ids)
3652
# source_ids is the worst possible case we may need to pull.
3653
# now we want to filter source_ids against what we actually
3654
# have in target, but don't try to check for existence where we know
3655
# we do not have a revision as that would be pointless.
3656
target_ids = set(self.target._all_possible_ids())
3657
possibly_present_revisions = target_ids.intersection(source_ids_set)
3658
actually_present_revisions = set(
3659
self.target._eliminate_revisions_not_present(possibly_present_revisions))
3660
required_revisions = source_ids_set.difference(actually_present_revisions)
3661
if revision_id is not None:
3662
# we used get_ancestry to determine source_ids then we are assured all
3663
# revisions referenced are present as they are installed in topological order.
3664
# and the tip revision was validated by get_ancestry.
3665
result_set = required_revisions
3667
# if we just grabbed the possibly available ids, then
3668
# we only have an estimate of whats available and need to validate
3669
# that against the revision records.
3671
self.source._eliminate_revisions_not_present(required_revisions))
3672
return self.source.revision_ids_to_search_result(result_set)
3675
class InterKnitRepo(InterSameDataRepository):
3676
"""Optimised code paths between Knit based repositories."""
3679
def _get_repo_format_to_test(self):
3680
from bzrlib.repofmt import knitrepo
3681
return knitrepo.RepositoryFormatKnit1()
3684
def is_compatible(source, target):
3685
"""Be compatible with known Knit formats.
3687
We don't test for the stores being of specific types because that
3688
could lead to confusing results, and there is no need to be
3691
from bzrlib.repofmt.knitrepo import RepositoryFormatKnit
3693
are_knits = (isinstance(source._format, RepositoryFormatKnit) and
3694
isinstance(target._format, RepositoryFormatKnit))
3695
except AttributeError:
3697
return are_knits and InterRepository._same_model(source, target)
3700
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
3701
"""See InterRepository.missing_revision_ids()."""
3702
if revision_id is not None:
3703
source_ids = self.source.get_ancestry(revision_id)
3704
if source_ids[0] is not None:
3705
raise AssertionError()
3708
source_ids = self.source.all_revision_ids()
3709
source_ids_set = set(source_ids)
3710
# source_ids is the worst possible case we may need to pull.
3711
# now we want to filter source_ids against what we actually
3712
# have in target, but don't try to check for existence where we know
3713
# we do not have a revision as that would be pointless.
3714
target_ids = set(self.target.all_revision_ids())
3715
possibly_present_revisions = target_ids.intersection(source_ids_set)
3716
actually_present_revisions = set(
3717
self.target._eliminate_revisions_not_present(possibly_present_revisions))
3718
required_revisions = source_ids_set.difference(actually_present_revisions)
3719
if revision_id is not None:
3720
# we used get_ancestry to determine source_ids then we are assured all
3721
# revisions referenced are present as they are installed in topological order.
3722
# and the tip revision was validated by get_ancestry.
3723
result_set = required_revisions
3725
# if we just grabbed the possibly available ids, then
3726
# we only have an estimate of whats available and need to validate
3727
# that against the revision records.
3729
self.source._eliminate_revisions_not_present(required_revisions))
3730
return self.source.revision_ids_to_search_result(result_set)
3733
3536
class InterDifferingSerializer(InterRepository):
4273
4029
is_resume = False
4275
4031
# locked_insert_stream performs a commit|suspend.
4276
return self._locked_insert_stream(stream, src_format, is_resume)
4032
missing_keys = self.insert_stream_without_locking(stream,
4033
src_format, is_resume)
4035
# suspend the write group and tell the caller what we is
4036
# missing. We know we can suspend or else we would not have
4037
# entered this code path. (All repositories that can handle
4038
# missing keys can handle suspending a write group).
4039
write_group_tokens = self.target_repo.suspend_write_group()
4040
return write_group_tokens, missing_keys
4041
hint = self.target_repo.commit_write_group()
4042
to_serializer = self.target_repo._format._serializer
4043
src_serializer = src_format._serializer
4044
if (to_serializer != src_serializer and
4045
self.target_repo._format.pack_compresses):
4046
self.target_repo.pack(hint=hint)
4278
4049
self.target_repo.abort_write_group(suppress_errors=True)
4281
4052
self.target_repo.unlock()
4283
def _locked_insert_stream(self, stream, src_format, is_resume):
4054
def insert_stream_without_locking(self, stream, src_format,
4056
"""Insert a stream's content into the target repository.
4058
This assumes that you already have a locked repository and an active
4061
:param src_format: a bzr repository format.
4062
:param is_resume: Passed down to get_missing_parent_inventories to
4063
indicate if we should be checking for missing texts at the same
4066
:return: A set of keys that are missing.
4068
if not self.target_repo.is_write_locked():
4069
raise errors.ObjectNotLocked(self)
4070
if not self.target_repo.is_in_write_group():
4071
raise errors.BzrError('you must already be in a write group')
4284
4072
to_serializer = self.target_repo._format._serializer
4285
4073
src_serializer = src_format._serializer
4286
4074
new_pack = None