172
176
self._validate_unicode_text(value,
173
177
'revision property (%s)' % (key,))
179
def _ensure_fallback_inventories(self):
180
"""Ensure that appropriate inventories are available.
182
This only applies to repositories that are stacked, and is about
183
enusring the stacking invariants. Namely, that for any revision that is
184
present, we either have all of the file content, or we have the parent
185
inventory and the delta file content.
187
if not self.repository._fallback_repositories:
189
if not self.repository._format.supports_chks:
190
raise errors.BzrError("Cannot commit directly to a stacked branch"
191
" in pre-2a formats. See "
192
"https://bugs.launchpad.net/bzr/+bug/375013 for details.")
193
# This is a stacked repo, we need to make sure we have the parent
194
# inventories for the parents.
195
parent_keys = [(p,) for p in self.parents]
196
parent_map = self.repository.inventories._index.get_parent_map(parent_keys)
197
missing_parent_keys = set([pk for pk in parent_keys
198
if pk not in parent_map])
199
fallback_repos = list(reversed(self.repository._fallback_repositories))
200
missing_keys = [('inventories', pk[0])
201
for pk in missing_parent_keys]
203
while missing_keys and fallback_repos:
204
fallback_repo = fallback_repos.pop()
205
source = fallback_repo._get_source(self.repository._format)
206
sink = self.repository._get_sink()
207
stream = source.get_stream_for_missing_keys(missing_keys)
208
missing_keys = sink.insert_stream_without_locking(stream,
209
self.repository._format)
211
raise errors.BzrError('Unable to fill in parent inventories for a'
175
214
def commit(self, message):
176
215
"""Make the actual commit.
1559
1593
@needs_read_lock
1560
def search_missing_revision_ids(self, other, revision_id=None, find_ghosts=True):
1594
def search_missing_revision_ids(self, other,
1595
revision_id=symbol_versioning.DEPRECATED_PARAMETER,
1596
find_ghosts=True, revision_ids=None, if_present_ids=None):
1561
1597
"""Return the revision ids that other has that this does not.
1563
1599
These are returned in topological order.
1565
1601
revision_id: only return revision ids included by revision_id.
1603
if symbol_versioning.deprecated_passed(revision_id):
1604
symbol_versioning.warn(
1605
'search_missing_revision_ids(revision_id=...) was '
1606
'deprecated in 2.4. Use revision_ids=[...] instead.',
1607
DeprecationWarning, stacklevel=3)
1608
if revision_ids is not None:
1609
raise AssertionError(
1610
'revision_ids is mutually exclusive with revision_id')
1611
if revision_id is not None:
1612
revision_ids = [revision_id]
1567
1613
return InterRepository.get(other, self).search_missing_revision_ids(
1568
revision_id, find_ghosts)
1614
find_ghosts=find_ghosts, revision_ids=revision_ids,
1615
if_present_ids=if_present_ids)
1571
1618
def open(base):
2014
2058
w = self.inventories
2015
2059
pb = ui.ui_factory.nested_progress_bar()
2017
return self._find_text_key_references_from_xml_inventory_lines(
2061
return self._serializer._find_text_key_references(
2018
2062
w.iter_lines_added_or_present_in_keys(revision_keys, pb=pb))
2022
def _find_text_key_references_from_xml_inventory_lines(self,
2024
"""Core routine for extracting references to texts from inventories.
2026
This performs the translation of xml lines to revision ids.
2028
:param line_iterator: An iterator of lines, origin_version_id
2029
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
2030
to whether they were referred to by the inventory of the
2031
revision_id that they contain. Note that if that revision_id was
2032
not part of the line_iterator's output then False will be given -
2033
even though it may actually refer to that key.
2035
if not self._serializer.support_altered_by_hack:
2036
raise AssertionError(
2037
"_find_text_key_references_from_xml_inventory_lines only "
2038
"supported for branches which store inventory as unnested xml"
2039
", not on %r" % self)
2042
# this code needs to read every new line in every inventory for the
2043
# inventories [revision_ids]. Seeing a line twice is ok. Seeing a line
2044
# not present in one of those inventories is unnecessary but not
2045
# harmful because we are filtering by the revision id marker in the
2046
# inventory lines : we only select file ids altered in one of those
2047
# revisions. We don't need to see all lines in the inventory because
2048
# only those added in an inventory in rev X can contain a revision=X
2050
unescape_revid_cache = {}
2051
unescape_fileid_cache = {}
2053
# jam 20061218 In a big fetch, this handles hundreds of thousands
2054
# of lines, so it has had a lot of inlining and optimizing done.
2055
# Sorry that it is a little bit messy.
2056
# Move several functions to be local variables, since this is a long
2058
search = self._file_ids_altered_regex.search
2059
unescape = _unescape_xml
2060
setdefault = result.setdefault
2061
for line, line_key in line_iterator:
2062
match = search(line)
2065
# One call to match.group() returning multiple items is quite a
2066
# bit faster than 2 calls to match.group() each returning 1
2067
file_id, revision_id = match.group('file_id', 'revision_id')
2069
# Inlining the cache lookups helps a lot when you make 170,000
2070
# lines and 350k ids, versus 8.4 unique ids.
2071
# Using a cache helps in 2 ways:
2072
# 1) Avoids unnecessary decoding calls
2073
# 2) Re-uses cached strings, which helps in future set and
2075
# (2) is enough that removing encoding entirely along with
2076
# the cache (so we are using plain strings) results in no
2077
# performance improvement.
2079
revision_id = unescape_revid_cache[revision_id]
2081
unescaped = unescape(revision_id)
2082
unescape_revid_cache[revision_id] = unescaped
2083
revision_id = unescaped
2085
# Note that unconditionally unescaping means that we deserialise
2086
# every fileid, which for general 'pull' is not great, but we don't
2087
# really want to have some many fulltexts that this matters anyway.
2090
file_id = unescape_fileid_cache[file_id]
2092
unescaped = unescape(file_id)
2093
unescape_fileid_cache[file_id] = unescaped
2096
key = (file_id, revision_id)
2097
setdefault(key, False)
2098
if revision_id == line_key[-1]:
2102
2066
def _inventory_xml_lines_for_keys(self, keys):
2103
2067
"""Get a line iterator of the sort needed for findind references.
3272
3220
return self.get_format_string()
3275
# Pre-0.8 formats that don't have a disk format string (because they are
3276
# versioned by the matching control directory). We use the control directories
3277
# disk format string as a key for the network_name because they meet the
3278
# constraints (simple string, unique, immutable).
3279
network_format_registry.register_lazy(
3280
"Bazaar-NG branch, format 5\n",
3281
'bzrlib.repofmt.weaverepo',
3282
'RepositoryFormat5',
3284
network_format_registry.register_lazy(
3285
"Bazaar-NG branch, format 6\n",
3286
'bzrlib.repofmt.weaverepo',
3287
'RepositoryFormat6',
3290
3223
# formats which have no format string are not discoverable or independently
3291
3224
# creatable on disk, so are not registered in format_registry. They're
3292
# all in bzrlib.repofmt.weaverepo now. When an instance of one of these is
3225
# all in bzrlib.repofmt.knitreponow. When an instance of one of these is
3293
3226
# needed, it's constructed directly by the BzrDir. Non-native formats where
3294
3227
# the repository is not separately opened are similar.
3296
3229
format_registry.register_lazy(
3297
'Bazaar-NG Repository format 7',
3298
'bzrlib.repofmt.weaverepo',
3302
format_registry.register_lazy(
3303
3230
'Bazaar-NG Knit Repository Format 1',
3304
3231
'bzrlib.repofmt.knitrepo',
3305
3232
'RepositoryFormatKnit1',
3360
3287
'bzrlib.repofmt.pack_repo',
3361
3288
'RepositoryFormatKnitPack6RichRoot',
3290
format_registry.register_lazy(
3291
'Bazaar repository format 2a (needs bzr 1.16 or later)\n',
3292
'bzrlib.repofmt.groupcompress_repo',
3293
'RepositoryFormat2a',
3364
3296
# Development formats.
3365
# Obsolete but kept pending a CHK based subtree format.
3297
# Check their docstrings to see if/when they are obsolete.
3366
3298
format_registry.register_lazy(
3367
3299
("Bazaar development format 2 with subtree support "
3368
3300
"(needs bzr.dev from before 1.8)\n"),
3369
3301
'bzrlib.repofmt.pack_repo',
3370
3302
'RepositoryFormatPackDevelopment2Subtree',
3373
# 1.14->1.16 go below here
3374
format_registry.register_lazy(
3375
'Bazaar development format - group compression and chk inventory'
3376
' (needs bzr.dev from 1.14)\n',
3377
'bzrlib.repofmt.groupcompress_repo',
3378
'RepositoryFormatCHK1',
3381
format_registry.register_lazy(
3382
'Bazaar development format - chk repository with bencode revision '
3383
'serialization (needs bzr.dev from 1.16)\n',
3384
'bzrlib.repofmt.groupcompress_repo',
3385
'RepositoryFormatCHK2',
3387
format_registry.register_lazy(
3388
'Bazaar repository format 2a (needs bzr 1.16 or later)\n',
3389
'bzrlib.repofmt.groupcompress_repo',
3390
'RepositoryFormat2a',
3392
3304
format_registry.register_lazy(
3393
3305
'Bazaar development format 8\n',
3394
3306
'bzrlib.repofmt.groupcompress_repo',
3506
3421
return searcher.get_result()
3508
3423
@needs_read_lock
3509
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
3424
def search_missing_revision_ids(self,
3425
revision_id=symbol_versioning.DEPRECATED_PARAMETER,
3426
find_ghosts=True, revision_ids=None, if_present_ids=None):
3510
3427
"""Return the revision ids that source has that target does not.
3512
3429
:param revision_id: only return revision ids included by this
3431
:param revision_ids: return revision ids included by these
3432
revision_ids. NoSuchRevision will be raised if any of these
3433
revisions are not present.
3434
:param if_present_ids: like revision_ids, but will not cause
3435
NoSuchRevision if any of these are absent, instead they will simply
3436
not be in the result. This is useful for e.g. finding revisions
3437
to fetch for tags, which may reference absent revisions.
3514
3438
:param find_ghosts: If True find missing revisions in deep history
3515
3439
rather than just finding the surface difference.
3516
3440
:return: A bzrlib.graph.SearchResult.
3442
if symbol_versioning.deprecated_passed(revision_id):
3443
symbol_versioning.warn(
3444
'search_missing_revision_ids(revision_id=...) was '
3445
'deprecated in 2.4. Use revision_ids=[...] instead.',
3446
DeprecationWarning, stacklevel=2)
3447
if revision_ids is not None:
3448
raise AssertionError(
3449
'revision_ids is mutually exclusive with revision_id')
3450
if revision_id is not None:
3451
revision_ids = [revision_id]
3518
3453
# stop searching at found target revisions.
3519
if not find_ghosts and revision_id is not None:
3520
return self._walk_to_common_revisions([revision_id])
3454
if not find_ghosts and (revision_ids is not None or if_present_ids is
3456
return self._walk_to_common_revisions(revision_ids,
3457
if_present_ids=if_present_ids)
3521
3458
# generic, possibly worst case, slow code path.
3522
3459
target_ids = set(self.target.all_revision_ids())
3523
if revision_id is not None:
3524
source_ids = self.source.get_ancestry(revision_id)
3525
if source_ids[0] is not None:
3526
raise AssertionError()
3529
source_ids = self.source.all_revision_ids()
3460
source_ids = self._present_source_revisions_for(
3461
revision_ids, if_present_ids)
3530
3462
result_set = set(source_ids).difference(target_ids)
3531
3463
return self.source.revision_ids_to_search_result(result_set)
3465
def _present_source_revisions_for(self, revision_ids, if_present_ids=None):
3466
"""Returns set of all revisions in ancestry of revision_ids present in
3469
:param revision_ids: if None, all revisions in source are returned.
3470
:param if_present_ids: like revision_ids, but if any/all of these are
3471
absent no error is raised.
3473
if revision_ids is not None or if_present_ids is not None:
3474
# First, ensure all specified revisions exist. Callers expect
3475
# NoSuchRevision when they pass absent revision_ids here.
3476
if revision_ids is None:
3477
revision_ids = set()
3478
if if_present_ids is None:
3479
if_present_ids = set()
3480
revision_ids = set(revision_ids)
3481
if_present_ids = set(if_present_ids)
3482
all_wanted_ids = revision_ids.union(if_present_ids)
3483
graph = self.source.get_graph()
3484
present_revs = set(graph.get_parent_map(all_wanted_ids))
3485
missing = revision_ids.difference(present_revs)
3487
raise errors.NoSuchRevision(self.source, missing.pop())
3488
found_ids = all_wanted_ids.intersection(present_revs)
3489
source_ids = [rev_id for (rev_id, parents) in
3490
graph.iter_ancestry(found_ids)
3491
if rev_id != _mod_revision.NULL_REVISION
3492
and parents is not None]
3494
source_ids = self.source.all_revision_ids()
3495
return set(source_ids)
3534
3498
def _same_model(source, target):
3535
3499
"""True if source and target have the same data representation.
3576
3540
return InterRepository._same_model(source, target)
3579
class InterWeaveRepo(InterSameDataRepository):
3580
"""Optimised code paths between Weave based repositories.
3582
This should be in bzrlib/repofmt/weaverepo.py but we have not yet
3583
implemented lazy inter-object optimisation.
3587
def _get_repo_format_to_test(self):
3588
from bzrlib.repofmt import weaverepo
3589
return weaverepo.RepositoryFormat7()
3592
def is_compatible(source, target):
3593
"""Be compatible with known Weave formats.
3595
We don't test for the stores being of specific types because that
3596
could lead to confusing results, and there is no need to be
3599
from bzrlib.repofmt.weaverepo import (
3605
return (isinstance(source._format, (RepositoryFormat5,
3607
RepositoryFormat7)) and
3608
isinstance(target._format, (RepositoryFormat5,
3610
RepositoryFormat7)))
3611
except AttributeError:
3615
def copy_content(self, revision_id=None):
3616
"""See InterRepository.copy_content()."""
3617
# weave specific optimised path:
3619
self.target.set_make_working_trees(self.source.make_working_trees())
3620
except (errors.RepositoryUpgradeRequired, NotImplemented):
3622
# FIXME do not peek!
3623
if self.source._transport.listable():
3624
pb = ui.ui_factory.nested_progress_bar()
3626
self.target.texts.insert_record_stream(
3627
self.source.texts.get_record_stream(
3628
self.source.texts.keys(), 'topological', False))
3629
pb.update('Copying inventory', 0, 1)
3630
self.target.inventories.insert_record_stream(
3631
self.source.inventories.get_record_stream(
3632
self.source.inventories.keys(), 'topological', False))
3633
self.target.signatures.insert_record_stream(
3634
self.source.signatures.get_record_stream(
3635
self.source.signatures.keys(),
3637
self.target.revisions.insert_record_stream(
3638
self.source.revisions.get_record_stream(
3639
self.source.revisions.keys(),
3640
'topological', True))
3644
self.target.fetch(self.source, revision_id=revision_id)
3647
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
3648
"""See InterRepository.missing_revision_ids()."""
3649
# we want all revisions to satisfy revision_id in source.
3650
# but we don't want to stat every file here and there.
3651
# we want then, all revisions other needs to satisfy revision_id
3652
# checked, but not those that we have locally.
3653
# so the first thing is to get a subset of the revisions to
3654
# satisfy revision_id in source, and then eliminate those that
3655
# we do already have.
3656
# this is slow on high latency connection to self, but as this
3657
# disk format scales terribly for push anyway due to rewriting
3658
# inventory.weave, this is considered acceptable.
3660
if revision_id is not None:
3661
source_ids = self.source.get_ancestry(revision_id)
3662
if source_ids[0] is not None:
3663
raise AssertionError()
3666
source_ids = self.source._all_possible_ids()
3667
source_ids_set = set(source_ids)
3668
# source_ids is the worst possible case we may need to pull.
3669
# now we want to filter source_ids against what we actually
3670
# have in target, but don't try to check for existence where we know
3671
# we do not have a revision as that would be pointless.
3672
target_ids = set(self.target._all_possible_ids())
3673
possibly_present_revisions = target_ids.intersection(source_ids_set)
3674
actually_present_revisions = set(
3675
self.target._eliminate_revisions_not_present(possibly_present_revisions))
3676
required_revisions = source_ids_set.difference(actually_present_revisions)
3677
if revision_id is not None:
3678
# we used get_ancestry to determine source_ids then we are assured all
3679
# revisions referenced are present as they are installed in topological order.
3680
# and the tip revision was validated by get_ancestry.
3681
result_set = required_revisions
3683
# if we just grabbed the possibly available ids, then
3684
# we only have an estimate of whats available and need to validate
3685
# that against the revision records.
3687
self.source._eliminate_revisions_not_present(required_revisions))
3688
return self.source.revision_ids_to_search_result(result_set)
3691
class InterKnitRepo(InterSameDataRepository):
3692
"""Optimised code paths between Knit based repositories."""
3695
def _get_repo_format_to_test(self):
3696
from bzrlib.repofmt import knitrepo
3697
return knitrepo.RepositoryFormatKnit1()
3700
def is_compatible(source, target):
3701
"""Be compatible with known Knit formats.
3703
We don't test for the stores being of specific types because that
3704
could lead to confusing results, and there is no need to be
3707
from bzrlib.repofmt.knitrepo import RepositoryFormatKnit
3709
are_knits = (isinstance(source._format, RepositoryFormatKnit) and
3710
isinstance(target._format, RepositoryFormatKnit))
3711
except AttributeError:
3713
return are_knits and InterRepository._same_model(source, target)
3716
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
3717
"""See InterRepository.missing_revision_ids()."""
3718
if revision_id is not None:
3719
source_ids = self.source.get_ancestry(revision_id)
3720
if source_ids[0] is not None:
3721
raise AssertionError()
3724
source_ids = self.source.all_revision_ids()
3725
source_ids_set = set(source_ids)
3726
# source_ids is the worst possible case we may need to pull.
3727
# now we want to filter source_ids against what we actually
3728
# have in target, but don't try to check for existence where we know
3729
# we do not have a revision as that would be pointless.
3730
target_ids = set(self.target.all_revision_ids())
3731
possibly_present_revisions = target_ids.intersection(source_ids_set)
3732
actually_present_revisions = set(
3733
self.target._eliminate_revisions_not_present(possibly_present_revisions))
3734
required_revisions = source_ids_set.difference(actually_present_revisions)
3735
if revision_id is not None:
3736
# we used get_ancestry to determine source_ids then we are assured all
3737
# revisions referenced are present as they are installed in topological order.
3738
# and the tip revision was validated by get_ancestry.
3739
result_set = required_revisions
3741
# if we just grabbed the possibly available ids, then
3742
# we only have an estimate of whats available and need to validate
3743
# that against the revision records.
3745
self.source._eliminate_revisions_not_present(required_revisions))
3746
return self.source.revision_ids_to_search_result(result_set)
3749
3543
class InterDifferingSerializer(InterRepository):
4289
4036
is_resume = False
4291
4038
# locked_insert_stream performs a commit|suspend.
4292
return self._locked_insert_stream(stream, src_format,
4039
missing_keys = self.insert_stream_without_locking(stream,
4040
src_format, is_resume)
4042
# suspend the write group and tell the caller what we is
4043
# missing. We know we can suspend or else we would not have
4044
# entered this code path. (All repositories that can handle
4045
# missing keys can handle suspending a write group).
4046
write_group_tokens = self.target_repo.suspend_write_group()
4047
return write_group_tokens, missing_keys
4048
hint = self.target_repo.commit_write_group()
4049
to_serializer = self.target_repo._format._serializer
4050
src_serializer = src_format._serializer
4051
if (to_serializer != src_serializer and
4052
self.target_repo._format.pack_compresses):
4053
self.target_repo.pack(hint=hint)
4295
4056
self.target_repo.abort_write_group(suppress_errors=True)
4298
4059
self.target_repo.unlock()
4300
def _locked_insert_stream(self, stream, src_format, is_resume):
4061
def insert_stream_without_locking(self, stream, src_format,
4063
"""Insert a stream's content into the target repository.
4065
This assumes that you already have a locked repository and an active
4068
:param src_format: a bzr repository format.
4069
:param is_resume: Passed down to get_missing_parent_inventories to
4070
indicate if we should be checking for missing texts at the same
4073
:return: A set of keys that are missing.
4075
if not self.target_repo.is_write_locked():
4076
raise errors.ObjectNotLocked(self)
4077
if not self.target_repo.is_in_write_group():
4078
raise errors.BzrError('you must already be in a write group')
4301
4079
to_serializer = self.target_repo._format._serializer
4302
4080
src_serializer = src_format._serializer
4303
4081
new_pack = None