402
539
# unchanged, carry over.
403
540
ie.reference_revision = parent_entry.reference_revision
404
541
ie.revision = parent_entry.revision
405
return self._get_delta(ie, basis_inv, path), False
542
return self._get_delta(ie, basis_inv, path), False, None
406
543
ie.reference_revision = content_summary[3]
408
self._add_text_to_weave(ie.file_id, lines, heads, None)
544
if ie.reference_revision is None:
545
raise AssertionError("invalid content_summary for nested tree: %r"
546
% (content_summary,))
547
self._add_text_to_weave(ie.file_id, '', heads, None)
410
549
raise NotImplementedError('unknown kind')
411
550
ie.revision = self._new_revision_id
412
return self._get_delta(ie, basis_inv, path), True
414
def _add_text_to_weave(self, file_id, new_lines, parents, nostore_sha):
415
# Note: as we read the content directly from the tree, we know its not
416
# been turned into unicode or badly split - but a broken tree
417
# implementation could give us bad output from readlines() so this is
418
# not a guarantee of safety. What would be better is always checking
419
# the content during test suite execution. RBC 20070912
420
parent_keys = tuple((file_id, parent) for parent in parents)
421
return self.repository.texts.add_lines(
422
(file_id, self._new_revision_id), parent_keys, new_lines,
423
nostore_sha=nostore_sha, random_id=self.random_revid,
424
check_content=False)[0:2]
551
self._any_changes = True
552
return self._get_delta(ie, basis_inv, path), True, fingerprint
554
def record_iter_changes(self, tree, basis_revision_id, iter_changes,
555
_entry_factory=entry_factory):
556
"""Record a new tree via iter_changes.
558
:param tree: The tree to obtain text contents from for changed objects.
559
:param basis_revision_id: The revision id of the tree the iter_changes
560
has been generated against. Currently assumed to be the same
561
as self.parents[0] - if it is not, errors may occur.
562
:param iter_changes: An iter_changes iterator with the changes to apply
563
to basis_revision_id. The iterator must not include any items with
564
a current kind of None - missing items must be either filtered out
565
or errored-on beefore record_iter_changes sees the item.
566
:param _entry_factory: Private method to bind entry_factory locally for
568
:return: A generator of (file_id, relpath, fs_hash) tuples for use with
571
# Create an inventory delta based on deltas between all the parents and
572
# deltas between all the parent inventories. We use inventory delta's
573
# between the inventory objects because iter_changes masks
574
# last-changed-field only changes.
576
# file_id -> change map, change is fileid, paths, changed, versioneds,
577
# parents, names, kinds, executables
579
# {file_id -> revision_id -> inventory entry, for entries in parent
580
# trees that are not parents[0]
584
revtrees = list(self.repository.revision_trees(self.parents))
585
except errors.NoSuchRevision:
586
# one or more ghosts, slow path.
588
for revision_id in self.parents:
590
revtrees.append(self.repository.revision_tree(revision_id))
591
except errors.NoSuchRevision:
593
basis_revision_id = _mod_revision.NULL_REVISION
595
revtrees.append(self.repository.revision_tree(
596
_mod_revision.NULL_REVISION))
597
# The basis inventory from a repository
599
basis_inv = revtrees[0].inventory
601
basis_inv = self.repository.revision_tree(
602
_mod_revision.NULL_REVISION).inventory
603
if len(self.parents) > 0:
604
if basis_revision_id != self.parents[0] and not ghost_basis:
606
"arbitrary basis parents not yet supported with merges")
607
for revtree in revtrees[1:]:
608
for change in revtree.inventory._make_delta(basis_inv):
609
if change[1] is None:
610
# Not present in this parent.
612
if change[2] not in merged_ids:
613
if change[0] is not None:
614
basis_entry = basis_inv[change[2]]
615
merged_ids[change[2]] = [
617
basis_entry.revision,
620
parent_entries[change[2]] = {
622
basis_entry.revision:basis_entry,
624
change[3].revision:change[3],
627
merged_ids[change[2]] = [change[3].revision]
628
parent_entries[change[2]] = {change[3].revision:change[3]}
630
merged_ids[change[2]].append(change[3].revision)
631
parent_entries[change[2]][change[3].revision] = change[3]
634
# Setup the changes from the tree:
635
# changes maps file_id -> (change, [parent revision_ids])
637
for change in iter_changes:
638
# This probably looks up in basis_inv way to much.
639
if change[1][0] is not None:
640
head_candidate = [basis_inv[change[0]].revision]
643
changes[change[0]] = change, merged_ids.get(change[0],
645
unchanged_merged = set(merged_ids) - set(changes)
646
# Extend the changes dict with synthetic changes to record merges of
648
for file_id in unchanged_merged:
649
# Record a merged version of these items that did not change vs the
650
# basis. This can be either identical parallel changes, or a revert
651
# of a specific file after a merge. The recorded content will be
652
# that of the current tree (which is the same as the basis), but
653
# the per-file graph will reflect a merge.
654
# NB:XXX: We are reconstructing path information we had, this
655
# should be preserved instead.
656
# inv delta change: (file_id, (path_in_source, path_in_target),
657
# changed_content, versioned, parent, name, kind,
660
basis_entry = basis_inv[file_id]
661
except errors.NoSuchId:
662
# a change from basis->some_parents but file_id isn't in basis
663
# so was new in the merge, which means it must have changed
664
# from basis -> current, and as it hasn't the add was reverted
665
# by the user. So we discard this change.
669
(basis_inv.id2path(file_id), tree.id2path(file_id)),
671
(basis_entry.parent_id, basis_entry.parent_id),
672
(basis_entry.name, basis_entry.name),
673
(basis_entry.kind, basis_entry.kind),
674
(basis_entry.executable, basis_entry.executable))
675
changes[file_id] = (change, merged_ids[file_id])
676
# changes contains tuples with the change and a set of inventory
677
# candidates for the file.
679
# old_path, new_path, file_id, new_inventory_entry
680
seen_root = False # Is the root in the basis delta?
681
inv_delta = self._basis_delta
682
modified_rev = self._new_revision_id
683
for change, head_candidates in changes.values():
684
if change[3][1]: # versioned in target.
685
# Several things may be happening here:
686
# We may have a fork in the per-file graph
687
# - record a change with the content from tree
688
# We may have a change against < all trees
689
# - carry over the tree that hasn't changed
690
# We may have a change against all trees
691
# - record the change with the content from tree
694
entry = _entry_factory[kind](file_id, change[5][1],
696
head_set = self._heads(change[0], set(head_candidates))
699
for head_candidate in head_candidates:
700
if head_candidate in head_set:
701
heads.append(head_candidate)
702
head_set.remove(head_candidate)
705
# Could be a carry-over situation:
706
parent_entry_revs = parent_entries.get(file_id, None)
707
if parent_entry_revs:
708
parent_entry = parent_entry_revs.get(heads[0], None)
711
if parent_entry is None:
712
# The parent iter_changes was called against is the one
713
# that is the per-file head, so any change is relevant
714
# iter_changes is valid.
715
carry_over_possible = False
717
# could be a carry over situation
718
# A change against the basis may just indicate a merge,
719
# we need to check the content against the source of the
720
# merge to determine if it was changed after the merge
722
if (parent_entry.kind != entry.kind or
723
parent_entry.parent_id != entry.parent_id or
724
parent_entry.name != entry.name):
725
# Metadata common to all entries has changed
726
# against per-file parent
727
carry_over_possible = False
729
carry_over_possible = True
730
# per-type checks for changes against the parent_entry
733
# Cannot be a carry-over situation
734
carry_over_possible = False
735
# Populate the entry in the delta
737
# XXX: There is still a small race here: If someone reverts the content of a file
738
# after iter_changes examines and decides it has changed,
739
# we will unconditionally record a new version even if some
740
# other process reverts it while commit is running (with
741
# the revert happening after iter_changes did it's
744
entry.executable = True
746
entry.executable = False
747
if (carry_over_possible and
748
parent_entry.executable == entry.executable):
749
# Check the file length, content hash after reading
751
nostore_sha = parent_entry.text_sha1
754
file_obj, stat_value = tree.get_file_with_stat(file_id, change[1][1])
756
text = file_obj.read()
760
entry.text_sha1, entry.text_size = self._add_text_to_weave(
761
file_id, text, heads, nostore_sha)
762
yield file_id, change[1][1], (entry.text_sha1, stat_value)
763
except errors.ExistingContent:
764
# No content change against a carry_over parent
765
# Perhaps this should also yield a fs hash update?
767
entry.text_size = parent_entry.text_size
768
entry.text_sha1 = parent_entry.text_sha1
769
elif kind == 'symlink':
771
entry.symlink_target = tree.get_symlink_target(file_id)
772
if (carry_over_possible and
773
parent_entry.symlink_target == entry.symlink_target):
776
self._add_text_to_weave(change[0], '', heads, None)
777
elif kind == 'directory':
778
if carry_over_possible:
781
# Nothing to set on the entry.
782
# XXX: split into the Root and nonRoot versions.
783
if change[1][1] != '' or self.repository.supports_rich_root():
784
self._add_text_to_weave(change[0], '', heads, None)
785
elif kind == 'tree-reference':
786
if not self.repository._format.supports_tree_reference:
787
# This isn't quite sane as an error, but we shouldn't
788
# ever see this code path in practice: tree's don't
789
# permit references when the repo doesn't support tree
791
raise errors.UnsupportedOperation(tree.add_reference,
793
reference_revision = tree.get_reference_revision(change[0])
794
entry.reference_revision = reference_revision
795
if (carry_over_possible and
796
parent_entry.reference_revision == reference_revision):
799
self._add_text_to_weave(change[0], '', heads, None)
801
raise AssertionError('unknown kind %r' % kind)
803
entry.revision = modified_rev
805
entry.revision = parent_entry.revision
808
new_path = change[1][1]
809
inv_delta.append((change[1][0], new_path, change[0], entry))
812
self.new_inventory = None
814
# This should perhaps be guarded by a check that the basis we
815
# commit against is the basis for the commit and if not do a delta
817
self._any_changes = True
819
# housekeeping root entry changes do not affect no-change commits.
820
self._require_root_change(tree)
821
self.basis_delta_revision = basis_revision_id
823
def _add_text_to_weave(self, file_id, new_text, parents, nostore_sha):
824
parent_keys = tuple([(file_id, parent) for parent in parents])
825
return self.repository.texts._add_text(
826
(file_id, self._new_revision_id), parent_keys, new_text,
827
nostore_sha=nostore_sha, random_id=self.random_revid)[0:2]
427
830
class RootCommitBuilder(CommitBuilder):
428
831
"""This commitbuilder actually records the root id"""
430
833
# the root entry gets versioned properly by this builder.
431
834
_versioned_root = True
657
1161
# The old API returned a list, should this actually be a set?
658
1162
return parent_map.keys()
1164
def _check_inventories(self, checker):
1165
"""Check the inventories found from the revision scan.
1167
This is responsible for verifying the sha1 of inventories and
1168
creating a pending_keys set that covers data referenced by inventories.
1170
bar = ui.ui_factory.nested_progress_bar()
1172
self._do_check_inventories(checker, bar)
1176
def _do_check_inventories(self, checker, bar):
1177
"""Helper for _check_inventories."""
1179
keys = {'chk_bytes':set(), 'inventories':set(), 'texts':set()}
1180
kinds = ['chk_bytes', 'texts']
1181
count = len(checker.pending_keys)
1182
bar.update("inventories", 0, 2)
1183
current_keys = checker.pending_keys
1184
checker.pending_keys = {}
1185
# Accumulate current checks.
1186
for key in current_keys:
1187
if key[0] != 'inventories' and key[0] not in kinds:
1188
checker._report_items.append('unknown key type %r' % (key,))
1189
keys[key[0]].add(key[1:])
1190
if keys['inventories']:
1191
# NB: output order *should* be roughly sorted - topo or
1192
# inverse topo depending on repository - either way decent
1193
# to just delta against. However, pre-CHK formats didn't
1194
# try to optimise inventory layout on disk. As such the
1195
# pre-CHK code path does not use inventory deltas.
1197
for record in self.inventories.check(keys=keys['inventories']):
1198
if record.storage_kind == 'absent':
1199
checker._report_items.append(
1200
'Missing inventory {%s}' % (record.key,))
1202
last_object = self._check_record('inventories', record,
1203
checker, last_object,
1204
current_keys[('inventories',) + record.key])
1205
del keys['inventories']
1208
bar.update("texts", 1)
1209
while (checker.pending_keys or keys['chk_bytes']
1211
# Something to check.
1212
current_keys = checker.pending_keys
1213
checker.pending_keys = {}
1214
# Accumulate current checks.
1215
for key in current_keys:
1216
if key[0] not in kinds:
1217
checker._report_items.append('unknown key type %r' % (key,))
1218
keys[key[0]].add(key[1:])
1219
# Check the outermost kind only - inventories || chk_bytes || texts
1223
for record in getattr(self, kind).check(keys=keys[kind]):
1224
if record.storage_kind == 'absent':
1225
checker._report_items.append(
1226
'Missing %s {%s}' % (kind, record.key,))
1228
last_object = self._check_record(kind, record,
1229
checker, last_object, current_keys[(kind,) + record.key])
1233
def _check_record(self, kind, record, checker, last_object, item_data):
1234
"""Check a single text from this repository."""
1235
if kind == 'inventories':
1236
rev_id = record.key[0]
1237
inv = self.deserialise_inventory(rev_id,
1238
record.get_bytes_as('fulltext'))
1239
if last_object is not None:
1240
delta = inv._make_delta(last_object)
1241
for old_path, path, file_id, ie in delta:
1244
ie.check(checker, rev_id, inv)
1246
for path, ie in inv.iter_entries():
1247
ie.check(checker, rev_id, inv)
1248
if self._format.fast_deltas:
1250
elif kind == 'chk_bytes':
1251
# No code written to check chk_bytes for this repo format.
1252
checker._report_items.append(
1253
'unsupported key type chk_bytes for %s' % (record.key,))
1254
elif kind == 'texts':
1255
self._check_text(record, checker, item_data)
1257
checker._report_items.append(
1258
'unknown key type %s for %s' % (kind, record.key))
1260
def _check_text(self, record, checker, item_data):
1261
"""Check a single text."""
1262
# Check it is extractable.
1263
# TODO: check length.
1264
if record.storage_kind == 'chunked':
1265
chunks = record.get_bytes_as(record.storage_kind)
1266
sha1 = osutils.sha_strings(chunks)
1267
length = sum(map(len, chunks))
1269
content = record.get_bytes_as('fulltext')
1270
sha1 = osutils.sha_string(content)
1271
length = len(content)
1272
if item_data and sha1 != item_data[1]:
1273
checker._report_items.append(
1274
'sha1 mismatch: %s has sha1 %s expected %s referenced by %s' %
1275
(record.key, sha1, item_data[1], item_data[2]))
661
1278
def create(a_bzrdir):
662
1279
"""Construct the current default format repository in a_bzrdir."""
922
1544
"""Commit the contents accrued within the current write group.
924
1546
:seealso: start_write_group.
1548
:return: it may return an opaque hint that can be passed to 'pack'.
926
1550
if self._write_group is not self.get_transaction():
927
1551
# has an unlock or relock occured ?
928
1552
raise errors.BzrError('mismatched lock context %r and '
929
1553
'write group %r.' %
930
1554
(self.get_transaction(), self._write_group))
931
self._commit_write_group()
1555
result = self._commit_write_group()
932
1556
self._write_group = None
934
1559
def _commit_write_group(self):
935
1560
"""Template method for per-repository write group cleanup.
937
This is called before the write group is considered to be
1562
This is called before the write group is considered to be
938
1563
finished and should ensure that all data handed to the repository
939
for writing during the write group is safely committed (to the
1564
for writing during the write group is safely committed (to the
940
1565
extent possible considering file system caching etc).
943
def fetch(self, source, revision_id=None, pb=None, find_ghosts=False):
1568
def suspend_write_group(self):
1569
raise errors.UnsuspendableWriteGroup(self)
1571
def get_missing_parent_inventories(self, check_for_missing_texts=True):
1572
"""Return the keys of missing inventory parents for revisions added in
1575
A revision is not complete if the inventory delta for that revision
1576
cannot be calculated. Therefore if the parent inventories of a
1577
revision are not present, the revision is incomplete, and e.g. cannot
1578
be streamed by a smart server. This method finds missing inventory
1579
parents for revisions added in this write group.
1581
if not self._format.supports_external_lookups:
1582
# This is only an issue for stacked repositories
1584
if not self.is_in_write_group():
1585
raise AssertionError('not in a write group')
1587
# XXX: We assume that every added revision already has its
1588
# corresponding inventory, so we only check for parent inventories that
1589
# might be missing, rather than all inventories.
1590
parents = set(self.revisions._index.get_missing_parents())
1591
parents.discard(_mod_revision.NULL_REVISION)
1592
unstacked_inventories = self.inventories._index
1593
present_inventories = unstacked_inventories.get_parent_map(
1594
key[-1:] for key in parents)
1595
parents.difference_update(present_inventories)
1596
if len(parents) == 0:
1597
# No missing parent inventories.
1599
if not check_for_missing_texts:
1600
return set(('inventories', rev_id) for (rev_id,) in parents)
1601
# Ok, now we have a list of missing inventories. But these only matter
1602
# if the inventories that reference them are missing some texts they
1603
# appear to introduce.
1604
# XXX: Texts referenced by all added inventories need to be present,
1605
# but at the moment we're only checking for texts referenced by
1606
# inventories at the graph's edge.
1607
key_deps = self.revisions._index._key_dependencies
1608
key_deps.satisfy_refs_for_keys(present_inventories)
1609
referrers = frozenset(r[0] for r in key_deps.get_referrers())
1610
file_ids = self.fileids_altered_by_revision_ids(referrers)
1611
missing_texts = set()
1612
for file_id, version_ids in file_ids.iteritems():
1613
missing_texts.update(
1614
(file_id, version_id) for version_id in version_ids)
1615
present_texts = self.texts.get_parent_map(missing_texts)
1616
missing_texts.difference_update(present_texts)
1617
if not missing_texts:
1618
# No texts are missing, so all revisions and their deltas are
1621
# Alternatively the text versions could be returned as the missing
1622
# keys, but this is likely to be less data.
1623
missing_keys = set(('inventories', rev_id) for (rev_id,) in parents)
1626
def refresh_data(self):
1627
"""Re-read any data needed to to synchronise with disk.
1629
This method is intended to be called after another repository instance
1630
(such as one used by a smart server) has inserted data into the
1631
repository. It may not be called during a write group, but may be
1632
called at any other time.
1634
if self.is_in_write_group():
1635
raise errors.InternalBzrError(
1636
"May not refresh_data while in a write group.")
1637
self._refresh_data()
1639
def resume_write_group(self, tokens):
1640
if not self.is_write_locked():
1641
raise errors.NotWriteLocked(self)
1642
if self._write_group:
1643
raise errors.BzrError('already in a write group')
1644
self._resume_write_group(tokens)
1645
# so we can detect unlock/relock - the write group is now entered.
1646
self._write_group = self.get_transaction()
1648
def _resume_write_group(self, tokens):
1649
raise errors.UnsuspendableWriteGroup(self)
1651
def fetch(self, source, revision_id=None, pb=None, find_ghosts=False,
944
1653
"""Fetch the content required to construct revision_id from source.
946
If revision_id is None all content is copied.
1655
If revision_id is None and fetch_spec is None, then all content is
1658
fetch() may not be used when the repository is in a write group -
1659
either finish the current write group before using fetch, or use
1660
fetch before starting the write group.
947
1662
:param find_ghosts: Find and copy revisions in the source that are
948
1663
ghosts in the target (and not reachable directly by walking out to
949
1664
the first-present revision in target from revision_id).
1665
:param revision_id: If specified, all the content needed for this
1666
revision ID will be copied to the target. Fetch will determine for
1667
itself which content needs to be copied.
1668
:param fetch_spec: If specified, a SearchResult or
1669
PendingAncestryResult that describes which revisions to copy. This
1670
allows copying multiple heads at once. Mutually exclusive with
1673
if fetch_spec is not None and revision_id is not None:
1674
raise AssertionError(
1675
"fetch_spec and revision_id are mutually exclusive.")
1676
if self.is_in_write_group():
1677
raise errors.InternalBzrError(
1678
"May not fetch while in a write group.")
951
1679
# fast path same-url fetch operations
952
if self.has_same_location(source):
1680
# TODO: lift out to somewhere common with RemoteRepository
1681
# <https://bugs.edge.launchpad.net/bzr/+bug/401646>
1682
if (self.has_same_location(source)
1683
and fetch_spec is None
1684
and self._has_same_fallbacks(source)):
953
1685
# check that last_revision is in 'from' and then return a
955
1687
if (revision_id is not None and
956
1688
not _mod_revision.is_null(revision_id)):
957
1689
self.get_revision(revision_id)
1691
# if there is no specific appropriate InterRepository, this will get
1692
# the InterRepository base class, which raises an
1693
# IncompatibleRepositories when asked to fetch.
959
1694
inter = InterRepository.get(source, self)
961
return inter.fetch(revision_id=revision_id, pb=pb, find_ghosts=find_ghosts)
962
except NotImplementedError:
963
raise errors.IncompatibleRepositories(source, self)
1695
return inter.fetch(revision_id=revision_id, pb=pb,
1696
find_ghosts=find_ghosts, fetch_spec=fetch_spec)
965
1698
def create_bundle(self, target, base, fileobj, format=None):
966
1699
return serializer.write_bundle(self, target, base, fileobj, format)
1102
1849
@needs_read_lock
1103
1850
def get_revisions(self, revision_ids):
1104
"""Get many revisions at once."""
1851
"""Get many revisions at once.
1853
Repositories that need to check data on every revision read should
1854
subclass this method.
1105
1856
return self._get_revisions(revision_ids)
1107
1858
@needs_read_lock
1108
1859
def _get_revisions(self, revision_ids):
1109
1860
"""Core work logic to get many revisions without sanity checks."""
1110
for rev_id in revision_ids:
1111
if not rev_id or not isinstance(rev_id, basestring):
1112
raise errors.InvalidRevisionId(revision_id=rev_id, branch=self)
1862
for revid, rev in self._iter_revisions(revision_ids):
1864
raise errors.NoSuchRevision(self, revid)
1866
return [revs[revid] for revid in revision_ids]
1868
def _iter_revisions(self, revision_ids):
1869
"""Iterate over revision objects.
1871
:param revision_ids: An iterable of revisions to examine. None may be
1872
passed to request all revisions known to the repository. Note that
1873
not all repositories can find unreferenced revisions; for those
1874
repositories only referenced ones will be returned.
1875
:return: An iterator of (revid, revision) tuples. Absent revisions (
1876
those asked for but not available) are returned as (revid, None).
1878
if revision_ids is None:
1879
revision_ids = self.all_revision_ids()
1881
for rev_id in revision_ids:
1882
if not rev_id or not isinstance(rev_id, basestring):
1883
raise errors.InvalidRevisionId(revision_id=rev_id, branch=self)
1113
1884
keys = [(key,) for key in revision_ids]
1114
1885
stream = self.revisions.get_record_stream(keys, 'unordered', True)
1116
1886
for record in stream:
1887
revid = record.key[0]
1117
1888
if record.storage_kind == 'absent':
1118
raise errors.NoSuchRevision(self, record.key[0])
1119
text = record.get_bytes_as('fulltext')
1120
rev = self._serializer.read_revision_from_string(text)
1121
revs[record.key[0]] = rev
1122
return [revs[revid] for revid in revision_ids]
1891
text = record.get_bytes_as('fulltext')
1892
rev = self._serializer.read_revision_from_string(text)
1124
1895
@needs_read_lock
1125
1896
def get_revision_xml(self, revision_id):
1126
1897
# TODO: jam 20070210 This shouldn't be necessary since get_revision
1127
1898
# would have already do it.
1128
1899
# TODO: jam 20070210 Just use _serializer.write_revision_to_string()
1900
# TODO: this can't just be replaced by:
1901
# return self._serializer.write_revision_to_string(
1902
# self.get_revision(revision_id))
1903
# as cStringIO preservers the encoding unlike write_revision_to_string
1904
# or some other call down the path.
1129
1905
rev = self.get_revision(revision_id)
1130
rev_tmp = StringIO()
1906
rev_tmp = cStringIO.StringIO()
1131
1907
# the current serializer..
1132
1908
self._serializer.write_revision(rev, rev_tmp)
1133
1909
rev_tmp.seek(0)
1134
1910
return rev_tmp.getvalue()
1136
def get_deltas_for_revisions(self, revisions):
1912
def get_deltas_for_revisions(self, revisions, specific_fileids=None):
1137
1913
"""Produce a generator of revision deltas.
1139
1915
Note that the input is a sequence of REVISIONS, not revision_ids.
1140
1916
Trees will be held in memory until the generator exits.
1141
1917
Each delta is relative to the revision's lefthand predecessor.
1919
:param specific_fileids: if not None, the result is filtered
1920
so that only those file-ids, their parents and their
1921
children are included.
1923
# Get the revision-ids of interest
1143
1924
required_trees = set()
1144
1925
for revision in revisions:
1145
1926
required_trees.add(revision.revision_id)
1146
1927
required_trees.update(revision.parent_ids[:1])
1147
trees = dict((t.get_revision_id(), t) for
1148
t in self.revision_trees(required_trees))
1929
# Get the matching filtered trees. Note that it's more
1930
# efficient to pass filtered trees to changes_from() rather
1931
# than doing the filtering afterwards. changes_from() could
1932
# arguably do the filtering itself but it's path-based, not
1933
# file-id based, so filtering before or afterwards is
1935
if specific_fileids is None:
1936
trees = dict((t.get_revision_id(), t) for
1937
t in self.revision_trees(required_trees))
1939
trees = dict((t.get_revision_id(), t) for
1940
t in self._filtered_revision_trees(required_trees,
1943
# Calculate the deltas
1149
1944
for revision in revisions:
1150
1945
if not revision.parent_ids:
1151
old_tree = self.revision_tree(None)
1946
old_tree = self.revision_tree(_mod_revision.NULL_REVISION)
1153
1948
old_tree = trees[revision.parent_ids[0]]
1154
1949
yield trees[revision.revision_id].changes_from(old_tree)
1156
1951
@needs_read_lock
1157
def get_revision_delta(self, revision_id):
1952
def get_revision_delta(self, revision_id, specific_fileids=None):
1158
1953
"""Return the delta for one revision.
1160
1955
The delta is relative to the left-hand predecessor of the
1958
:param specific_fileids: if not None, the result is filtered
1959
so that only those file-ids, their parents and their
1960
children are included.
1163
1962
r = self.get_revision(revision_id)
1164
return list(self.get_deltas_for_revisions([r]))[0]
1963
return list(self.get_deltas_for_revisions([r],
1964
specific_fileids=specific_fileids))[0]
1166
1966
@needs_write_lock
1167
1967
def store_revision_signature(self, gpg_strategy, plaintext, revision_id):
2284
3305
'bzrlib.repofmt.pack_repo',
2285
3306
'RepositoryFormatKnitPack4',
2287
# Development formats.
2289
# development 0 - stub to introduce development versioning scheme.
2290
format_registry.register_lazy(
2291
"Bazaar development format 0 (needs bzr.dev from before 1.3)\n",
2292
'bzrlib.repofmt.pack_repo',
2293
'RepositoryFormatPackDevelopment0',
2295
format_registry.register_lazy(
2296
("Bazaar development format 0 with subtree support "
2297
"(needs bzr.dev from before 1.3)\n"),
2298
'bzrlib.repofmt.pack_repo',
2299
'RepositoryFormatPackDevelopment0Subtree',
2301
format_registry.register_lazy(
2302
"Bazaar development format 1 (needs bzr.dev from before 1.6)\n",
2303
'bzrlib.repofmt.pack_repo',
2304
'RepositoryFormatPackDevelopment1',
2306
format_registry.register_lazy(
2307
("Bazaar development format 1 with subtree support "
2308
"(needs bzr.dev from before 1.6)\n"),
2309
'bzrlib.repofmt.pack_repo',
2310
'RepositoryFormatPackDevelopment1Subtree',
2312
# 1.3->1.4 go below here
3308
format_registry.register_lazy(
3309
'Bazaar RepositoryFormatKnitPack5 (bzr 1.6)\n',
3310
'bzrlib.repofmt.pack_repo',
3311
'RepositoryFormatKnitPack5',
3313
format_registry.register_lazy(
3314
'Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6.1)\n',
3315
'bzrlib.repofmt.pack_repo',
3316
'RepositoryFormatKnitPack5RichRoot',
3318
format_registry.register_lazy(
3319
'Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6)\n',
3320
'bzrlib.repofmt.pack_repo',
3321
'RepositoryFormatKnitPack5RichRootBroken',
3323
format_registry.register_lazy(
3324
'Bazaar RepositoryFormatKnitPack6 (bzr 1.9)\n',
3325
'bzrlib.repofmt.pack_repo',
3326
'RepositoryFormatKnitPack6',
3328
format_registry.register_lazy(
3329
'Bazaar RepositoryFormatKnitPack6RichRoot (bzr 1.9)\n',
3330
'bzrlib.repofmt.pack_repo',
3331
'RepositoryFormatKnitPack6RichRoot',
3334
# Development formats.
3335
# Obsolete but kept pending a CHK based subtree format.
3336
format_registry.register_lazy(
3337
("Bazaar development format 2 with subtree support "
3338
"(needs bzr.dev from before 1.8)\n"),
3339
'bzrlib.repofmt.pack_repo',
3340
'RepositoryFormatPackDevelopment2Subtree',
3343
# 1.14->1.16 go below here
3344
format_registry.register_lazy(
3345
'Bazaar development format - group compression and chk inventory'
3346
' (needs bzr.dev from 1.14)\n',
3347
'bzrlib.repofmt.groupcompress_repo',
3348
'RepositoryFormatCHK1',
3351
format_registry.register_lazy(
3352
'Bazaar development format - chk repository with bencode revision '
3353
'serialization (needs bzr.dev from 1.16)\n',
3354
'bzrlib.repofmt.groupcompress_repo',
3355
'RepositoryFormatCHK2',
3357
format_registry.register_lazy(
3358
'Bazaar repository format 2a (needs bzr 1.16 or later)\n',
3359
'bzrlib.repofmt.groupcompress_repo',
3360
'RepositoryFormat2a',
2315
3364
class InterRepository(InterObject):
2316
3365
"""This class represents operations taking place between two repositories.
2318
3367
Its instances have methods like copy_content and fetch, and contain
2319
references to the source and target repositories these operations can be
3368
references to the source and target repositories these operations can be
2320
3369
carried out on.
2322
3371
Often we will provide convenience methods on 'repository' which carry out
2682
3706
return self.source.revision_ids_to_search_result(result_set)
2685
class InterPackRepo(InterSameDataRepository):
2686
"""Optimised code paths between Pack based repositories."""
2689
def _get_repo_format_to_test(self):
2690
from bzrlib.repofmt import pack_repo
2691
return pack_repo.RepositoryFormatKnitPack1()
2694
def is_compatible(source, target):
2695
"""Be compatible with known Pack formats.
2697
We don't test for the stores being of specific types because that
2698
could lead to confusing results, and there is no need to be
2701
from bzrlib.repofmt.pack_repo import RepositoryFormatPack
2703
are_packs = (isinstance(source._format, RepositoryFormatPack) and
2704
isinstance(target._format, RepositoryFormatPack))
2705
except AttributeError:
2707
return are_packs and InterRepository._same_model(source, target)
2710
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2711
"""See InterRepository.fetch()."""
2712
from bzrlib.repofmt.pack_repo import Packer
2713
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2714
self.source, self.source._format, self.target, self.target._format)
2715
self.count_copied = 0
2716
if revision_id is None:
2718
# everything to do - use pack logic
2719
# to fetch from all packs to one without
2720
# inventory parsing etc, IFF nothing to be copied is in the target.
2722
source_revision_ids = frozenset(self.source.all_revision_ids())
2723
revision_ids = source_revision_ids - \
2724
frozenset(self.target.get_parent_map(source_revision_ids))
2725
revision_keys = [(revid,) for revid in revision_ids]
2726
index = self.target._pack_collection.revision_index.combined_index
2727
present_revision_ids = set(item[1][0] for item in
2728
index.iter_entries(revision_keys))
2729
revision_ids = set(revision_ids) - present_revision_ids
2730
# implementing the TODO will involve:
2731
# - detecting when all of a pack is selected
2732
# - avoiding as much as possible pre-selection, so the
2733
# more-core routines such as create_pack_from_packs can filter in
2734
# a just-in-time fashion. (though having a HEADS list on a
2735
# repository might make this a lot easier, because we could
2736
# sensibly detect 'new revisions' without doing a full index scan.
2737
elif _mod_revision.is_null(revision_id):
2742
revision_ids = self.search_missing_revision_ids(revision_id,
2743
find_ghosts=find_ghosts).get_keys()
2744
except errors.NoSuchRevision:
2745
raise errors.InstallFailed([revision_id])
2746
if len(revision_ids) == 0:
2748
packs = self.source._pack_collection.all_packs()
2749
pack = Packer(self.target._pack_collection, packs, '.fetch',
2750
revision_ids).pack()
2751
if pack is not None:
2752
self.target._pack_collection._save_pack_names()
2753
# Trigger an autopack. This may duplicate effort as we've just done
2754
# a pack creation, but for now it is simpler to think about as
2755
# 'upload data, then repack if needed'.
2756
self.target._pack_collection.autopack()
2757
return (pack.get_revision_count(), [])
2762
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
2763
"""See InterRepository.missing_revision_ids().
2765
:param find_ghosts: Find ghosts throughout the ancestry of
2768
if not find_ghosts and revision_id is not None:
2769
return self._walk_to_common_revisions([revision_id])
2770
elif revision_id is not None:
2771
# Find ghosts: search for revisions pointing from one repository to
2772
# the other, and vice versa, anywhere in the history of revision_id.
2773
graph = self.target.get_graph(other_repository=self.source)
2774
searcher = graph._make_breadth_first_searcher([revision_id])
2778
next_revs, ghosts = searcher.next_with_ghosts()
2779
except StopIteration:
2781
if revision_id in ghosts:
2782
raise errors.NoSuchRevision(self.source, revision_id)
2783
found_ids.update(next_revs)
2784
found_ids.update(ghosts)
2785
found_ids = frozenset(found_ids)
2786
# Double query here: should be able to avoid this by changing the
2787
# graph api further.
2788
result_set = found_ids - frozenset(
2789
self.target.get_graph().get_parent_map(found_ids))
2791
source_ids = self.source.all_revision_ids()
2792
# source_ids is the worst possible case we may need to pull.
2793
# now we want to filter source_ids against what we actually
2794
# have in target, but don't try to check for existence where we know
2795
# we do not have a revision as that would be pointless.
2796
target_ids = set(self.target.all_revision_ids())
2797
result_set = set(source_ids).difference(target_ids)
2798
return self.source.revision_ids_to_search_result(result_set)
2801
class InterModel1and2(InterRepository):
2804
def _get_repo_format_to_test(self):
2808
def is_compatible(source, target):
2809
if not source.supports_rich_root() and target.supports_rich_root():
2815
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2816
"""See InterRepository.fetch()."""
2817
from bzrlib.fetch import Model1toKnit2Fetcher
2818
f = Model1toKnit2Fetcher(to_repository=self.target,
2819
from_repository=self.source,
2820
last_revision=revision_id,
2821
pb=pb, find_ghosts=find_ghosts)
2822
return f.count_copied, f.failed_revisions
2825
def copy_content(self, revision_id=None):
2826
"""Make a complete copy of the content in self into destination.
2828
This is a destructive operation! Do not use it on existing
2831
:param revision_id: Only copy the content needed to construct
2832
revision_id and its parents.
2835
self.target.set_make_working_trees(self.source.make_working_trees())
2836
except NotImplementedError:
2838
# but don't bother fetching if we have the needed data now.
2839
if (revision_id not in (None, _mod_revision.NULL_REVISION) and
2840
self.target.has_revision(revision_id)):
2842
self.target.fetch(self.source, revision_id=revision_id)
2845
class InterKnit1and2(InterKnitRepo):
2848
def _get_repo_format_to_test(self):
2852
def is_compatible(source, target):
2853
"""Be compatible with Knit1 source and Knit3 target"""
2854
from bzrlib.repofmt.knitrepo import RepositoryFormatKnit3
2856
from bzrlib.repofmt.knitrepo import (RepositoryFormatKnit1,
2857
RepositoryFormatKnit3)
2858
from bzrlib.repofmt.pack_repo import (
2859
RepositoryFormatKnitPack1,
2860
RepositoryFormatKnitPack3,
2861
RepositoryFormatPackDevelopment0,
2862
RepositoryFormatPackDevelopment0Subtree,
2865
RepositoryFormatKnit1,
2866
RepositoryFormatKnitPack1,
2867
RepositoryFormatPackDevelopment0,
2870
RepositoryFormatKnit3,
2871
RepositoryFormatKnitPack3,
2872
RepositoryFormatPackDevelopment0Subtree,
2874
return (isinstance(source._format, nosubtrees) and
2875
isinstance(target._format, subtrees))
2876
except AttributeError:
2880
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2881
"""See InterRepository.fetch()."""
2882
from bzrlib.fetch import Knit1to2Fetcher
2883
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2884
self.source, self.source._format, self.target,
2885
self.target._format)
2886
f = Knit1to2Fetcher(to_repository=self.target,
2887
from_repository=self.source,
2888
last_revision=revision_id,
2889
pb=pb, find_ghosts=find_ghosts)
2890
return f.count_copied, f.failed_revisions
2893
class InterDifferingSerializer(InterKnitRepo):
3709
class InterDifferingSerializer(InterRepository):
2896
3712
def _get_repo_format_to_test(self):
2900
3716
def is_compatible(source, target):
2901
3717
"""Be compatible with Knit2 source and Knit3 target"""
2902
if source.supports_rich_root() != target.supports_rich_root():
2904
# Ideally, we'd support fetching if the source had no tree references
2905
# even if it supported them...
2906
if (getattr(source, '_format.supports_tree_reference', False) and
2907
not getattr(target, '_format.supports_tree_reference', False)):
3718
# This is redundant with format.check_conversion_target(), however that
3719
# raises an exception, and we just want to say "False" as in we won't
3720
# support converting between these formats.
3721
if 'IDS_never' in debug.debug_flags:
3723
if source.supports_rich_root() and not target.supports_rich_root():
3725
if (source._format.supports_tree_reference
3726
and not target._format.supports_tree_reference):
3728
if target._fallback_repositories and target._format.supports_chks:
3729
# IDS doesn't know how to copy CHKs for the parent inventories it
3730
# adds to stacked repos.
3732
if 'IDS_always' in debug.debug_flags:
3734
# Only use this code path for local source and target. IDS does far
3735
# too much IO (both bandwidth and roundtrips) over a network.
3736
if not source.bzrdir.transport.base.startswith('file:///'):
3738
if not target.bzrdir.transport.base.startswith('file:///'):
3742
def _get_trees(self, revision_ids, cache):
3744
for rev_id in revision_ids:
3746
possible_trees.append((rev_id, cache[rev_id]))
3748
# Not cached, but inventory might be present anyway.
3750
tree = self.source.revision_tree(rev_id)
3751
except errors.NoSuchRevision:
3752
# Nope, parent is ghost.
3755
cache[rev_id] = tree
3756
possible_trees.append((rev_id, tree))
3757
return possible_trees
3759
def _get_delta_for_revision(self, tree, parent_ids, possible_trees):
3760
"""Get the best delta and base for this revision.
3762
:return: (basis_id, delta)
3765
# Generate deltas against each tree, to find the shortest.
3766
texts_possibly_new_in_tree = set()
3767
for basis_id, basis_tree in possible_trees:
3768
delta = tree.inventory._make_delta(basis_tree.inventory)
3769
for old_path, new_path, file_id, new_entry in delta:
3770
if new_path is None:
3771
# This file_id isn't present in the new rev, so we don't
3775
# Rich roots are handled elsewhere...
3777
kind = new_entry.kind
3778
if kind != 'directory' and kind != 'file':
3779
# No text record associated with this inventory entry.
3781
# This is a directory or file that has changed somehow.
3782
texts_possibly_new_in_tree.add((file_id, new_entry.revision))
3783
deltas.append((len(delta), basis_id, delta))
3785
return deltas[0][1:]
3787
def _fetch_parent_invs_for_stacking(self, parent_map, cache):
3788
"""Find all parent revisions that are absent, but for which the
3789
inventory is present, and copy those inventories.
3791
This is necessary to preserve correctness when the source is stacked
3792
without fallbacks configured. (Note that in cases like upgrade the
3793
source may be not have _fallback_repositories even though it is
3797
for parents in parent_map.values():
3798
parent_revs.update(parents)
3799
present_parents = self.source.get_parent_map(parent_revs)
3800
absent_parents = set(parent_revs).difference(present_parents)
3801
parent_invs_keys_for_stacking = self.source.inventories.get_parent_map(
3802
(rev_id,) for rev_id in absent_parents)
3803
parent_inv_ids = [key[-1] for key in parent_invs_keys_for_stacking]
3804
for parent_tree in self.source.revision_trees(parent_inv_ids):
3805
current_revision_id = parent_tree.get_revision_id()
3806
parents_parents_keys = parent_invs_keys_for_stacking[
3807
(current_revision_id,)]
3808
parents_parents = [key[-1] for key in parents_parents_keys]
3809
basis_id = _mod_revision.NULL_REVISION
3810
basis_tree = self.source.revision_tree(basis_id)
3811
delta = parent_tree.inventory._make_delta(basis_tree.inventory)
3812
self.target.add_inventory_by_delta(
3813
basis_id, delta, current_revision_id, parents_parents)
3814
cache[current_revision_id] = parent_tree
3816
def _fetch_batch(self, revision_ids, basis_id, cache):
3817
"""Fetch across a few revisions.
3819
:param revision_ids: The revisions to copy
3820
:param basis_id: The revision_id of a tree that must be in cache, used
3821
as a basis for delta when no other base is available
3822
:param cache: A cache of RevisionTrees that we can use.
3823
:return: The revision_id of the last converted tree. The RevisionTree
3824
for it will be in cache
3826
# Walk though all revisions; get inventory deltas, copy referenced
3827
# texts that delta references, insert the delta, revision and
3829
root_keys_to_create = set()
3832
pending_revisions = []
3833
parent_map = self.source.get_parent_map(revision_ids)
3834
self._fetch_parent_invs_for_stacking(parent_map, cache)
3835
for tree in self.source.revision_trees(revision_ids):
3836
# Find a inventory delta for this revision.
3837
# Find text entries that need to be copied, too.
3838
current_revision_id = tree.get_revision_id()
3839
parent_ids = parent_map.get(current_revision_id, ())
3840
parent_trees = self._get_trees(parent_ids, cache)
3841
possible_trees = list(parent_trees)
3842
if len(possible_trees) == 0:
3843
# There either aren't any parents, or the parents are ghosts,
3844
# so just use the last converted tree.
3845
possible_trees.append((basis_id, cache[basis_id]))
3846
basis_id, delta = self._get_delta_for_revision(tree, parent_ids,
3848
revision = self.source.get_revision(current_revision_id)
3849
pending_deltas.append((basis_id, delta,
3850
current_revision_id, revision.parent_ids))
3851
if self._converting_to_rich_root:
3852
self._revision_id_to_root_id[current_revision_id] = \
3854
# Determine which texts are in present in this revision but not in
3855
# any of the available parents.
3856
texts_possibly_new_in_tree = set()
3857
for old_path, new_path, file_id, entry in delta:
3858
if new_path is None:
3859
# This file_id isn't present in the new rev
3863
if not self.target.supports_rich_root():
3864
# The target doesn't support rich root, so we don't
3867
if self._converting_to_rich_root:
3868
# This can't be copied normally, we have to insert
3870
root_keys_to_create.add((file_id, entry.revision))
3873
texts_possibly_new_in_tree.add((file_id, entry.revision))
3874
for basis_id, basis_tree in possible_trees:
3875
basis_inv = basis_tree.inventory
3876
for file_key in list(texts_possibly_new_in_tree):
3877
file_id, file_revision = file_key
3879
entry = basis_inv[file_id]
3880
except errors.NoSuchId:
3882
if entry.revision == file_revision:
3883
texts_possibly_new_in_tree.remove(file_key)
3884
text_keys.update(texts_possibly_new_in_tree)
3885
pending_revisions.append(revision)
3886
cache[current_revision_id] = tree
3887
basis_id = current_revision_id
3889
from_texts = self.source.texts
3890
to_texts = self.target.texts
3891
if root_keys_to_create:
3892
from bzrlib.fetch import _new_root_data_stream
3893
root_stream = _new_root_data_stream(
3894
root_keys_to_create, self._revision_id_to_root_id, parent_map,
3896
to_texts.insert_record_stream(root_stream)
3897
to_texts.insert_record_stream(from_texts.get_record_stream(
3898
text_keys, self.target._format._fetch_order,
3899
not self.target._format._fetch_uses_deltas))
3900
# insert inventory deltas
3901
for delta in pending_deltas:
3902
self.target.add_inventory_by_delta(*delta)
3903
if self.target._fallback_repositories:
3904
# Make sure this stacked repository has all the parent inventories
3905
# for the new revisions that we are about to insert. We do this
3906
# before adding the revisions so that no revision is added until
3907
# all the inventories it may depend on are added.
3908
# Note that this is overzealous, as we may have fetched these in an
3911
revision_ids = set()
3912
for revision in pending_revisions:
3913
revision_ids.add(revision.revision_id)
3914
parent_ids.update(revision.parent_ids)
3915
parent_ids.difference_update(revision_ids)
3916
parent_ids.discard(_mod_revision.NULL_REVISION)
3917
parent_map = self.source.get_parent_map(parent_ids)
3918
# we iterate over parent_map and not parent_ids because we don't
3919
# want to try copying any revision which is a ghost
3920
for parent_tree in self.source.revision_trees(parent_map):
3921
current_revision_id = parent_tree.get_revision_id()
3922
parents_parents = parent_map[current_revision_id]
3923
possible_trees = self._get_trees(parents_parents, cache)
3924
if len(possible_trees) == 0:
3925
# There either aren't any parents, or the parents are
3926
# ghosts, so just use the last converted tree.
3927
possible_trees.append((basis_id, cache[basis_id]))
3928
basis_id, delta = self._get_delta_for_revision(parent_tree,
3929
parents_parents, possible_trees)
3930
self.target.add_inventory_by_delta(
3931
basis_id, delta, current_revision_id, parents_parents)
3932
# insert signatures and revisions
3933
for revision in pending_revisions:
3935
signature = self.source.get_signature_text(
3936
revision.revision_id)
3937
self.target.add_signature_text(revision.revision_id,
3939
except errors.NoSuchRevision:
3941
self.target.add_revision(revision.revision_id, revision)
3944
def _fetch_all_revisions(self, revision_ids, pb):
3945
"""Fetch everything for the list of revisions.
3947
:param revision_ids: The list of revisions to fetch. Must be in
3949
:param pb: A ProgressTask
3952
basis_id, basis_tree = self._get_basis(revision_ids[0])
3954
cache = lru_cache.LRUCache(100)
3955
cache[basis_id] = basis_tree
3956
del basis_tree # We don't want to hang on to it here
3958
for offset in range(0, len(revision_ids), batch_size):
3959
self.target.start_write_group()
3961
pb.update('Transferring revisions', offset,
3963
batch = revision_ids[offset:offset+batch_size]
3964
basis_id = self._fetch_batch(batch, basis_id, cache)
3966
self.target.abort_write_group()
3969
hint = self.target.commit_write_group()
3972
if hints and self.target._format.pack_compresses:
3973
self.target.pack(hint=hints)
3974
pb.update('Transferring revisions', len(revision_ids),
2911
3977
@needs_write_lock
2912
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
3978
def fetch(self, revision_id=None, pb=None, find_ghosts=False,
2913
3980
"""See InterRepository.fetch()."""
3981
if fetch_spec is not None:
3982
raise AssertionError("Not implemented yet...")
3983
# See <https://launchpad.net/bugs/456077> asking for a warning here
3985
# nb this is only active for local-local fetches; other things using
3987
ui.ui_factory.warn_cross_format_fetch(self.source._format,
3988
self.target._format)
3989
if (not self.source.supports_rich_root()
3990
and self.target.supports_rich_root()):
3991
self._converting_to_rich_root = True
3992
self._revision_id_to_root_id = {}
3994
self._converting_to_rich_root = False
2914
3995
revision_ids = self.target.search_missing_revision_ids(self.source,
2915
3996
revision_id, find_ghosts=find_ghosts).get_keys()
3997
if not revision_ids:
2916
3999
revision_ids = tsort.topo_sort(
2917
4000
self.source.get_graph().get_parent_map(revision_ids))
2918
def revisions_iterator():
2919
for current_revision_id in revision_ids:
2920
revision = self.source.get_revision(current_revision_id)
2921
tree = self.source.revision_tree(current_revision_id)
2923
signature = self.source.get_signature_text(
2924
current_revision_id)
2925
except errors.NoSuchRevision:
2927
yield revision, tree, signature
4001
if not revision_ids:
4003
# Walk though all revisions; get inventory deltas, copy referenced
4004
# texts that delta references, insert the delta, revision and
2929
4007
my_pb = ui.ui_factory.nested_progress_bar()
4010
symbol_versioning.warn(
4011
symbol_versioning.deprecated_in((1, 14, 0))
4012
% "pb parameter to fetch()")
2934
install_revisions(self.target, revisions_iterator(),
2935
len(revision_ids), pb)
4015
self._fetch_all_revisions(revision_ids, pb)
2937
4017
if my_pb is not None:
2938
4018
my_pb.finished()
2939
4019
return len(revision_ids), 0
2942
class InterOtherToRemote(InterRepository):
2944
def __init__(self, source, target):
2945
InterRepository.__init__(self, source, target)
2946
self._real_inter = None
2949
def is_compatible(source, target):
2950
if isinstance(target, remote.RemoteRepository):
2954
def _ensure_real_inter(self):
2955
if self._real_inter is None:
2956
self.target._ensure_real()
2957
real_target = self.target._real_repository
2958
self._real_inter = InterRepository.get(self.source, real_target)
2960
def copy_content(self, revision_id=None):
2961
self._ensure_real_inter()
2962
self._real_inter.copy_content(revision_id=revision_id)
2964
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2965
self._ensure_real_inter()
2966
return self._real_inter.fetch(revision_id=revision_id, pb=pb,
2967
find_ghosts=find_ghosts)
2970
def _get_repo_format_to_test(self):
2974
class InterRemoteToOther(InterRepository):
2976
def __init__(self, source, target):
2977
InterRepository.__init__(self, source, target)
2978
self._real_inter = None
2981
def is_compatible(source, target):
2982
if not isinstance(source, remote.RemoteRepository):
2984
# Is source's model compatible with target's model?
2985
source._ensure_real()
2986
real_source = source._real_repository
2987
if isinstance(real_source, remote.RemoteRepository):
2988
raise NotImplementedError(
2989
"We don't support remote repos backed by remote repos yet.")
2990
return InterRepository._same_model(real_source, target)
2992
def _ensure_real_inter(self):
2993
if self._real_inter is None:
2994
self.source._ensure_real()
2995
real_source = self.source._real_repository
2996
self._real_inter = InterRepository.get(real_source, self.target)
2998
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2999
self._ensure_real_inter()
3000
return self._real_inter.fetch(revision_id=revision_id, pb=pb,
3001
find_ghosts=find_ghosts)
3003
def copy_content(self, revision_id=None):
3004
self._ensure_real_inter()
3005
self._real_inter.copy_content(revision_id=revision_id)
3008
def _get_repo_format_to_test(self):
4021
def _get_basis(self, first_revision_id):
4022
"""Get a revision and tree which exists in the target.
4024
This assumes that first_revision_id is selected for transmission
4025
because all other ancestors are already present. If we can't find an
4026
ancestor we fall back to NULL_REVISION since we know that is safe.
4028
:return: (basis_id, basis_tree)
4030
first_rev = self.source.get_revision(first_revision_id)
4032
basis_id = first_rev.parent_ids[0]
4033
# only valid as a basis if the target has it
4034
self.target.get_revision(basis_id)
4035
# Try to get a basis tree - if its a ghost it will hit the
4036
# NoSuchRevision case.
4037
basis_tree = self.source.revision_tree(basis_id)
4038
except (IndexError, errors.NoSuchRevision):
4039
basis_id = _mod_revision.NULL_REVISION
4040
basis_tree = self.source.revision_tree(basis_id)
4041
return basis_id, basis_tree
3013
4044
InterRepository.register_optimiser(InterDifferingSerializer)
3014
4045
InterRepository.register_optimiser(InterSameDataRepository)
3015
4046
InterRepository.register_optimiser(InterWeaveRepo)
3016
4047
InterRepository.register_optimiser(InterKnitRepo)
3017
InterRepository.register_optimiser(InterModel1and2)
3018
InterRepository.register_optimiser(InterKnit1and2)
3019
InterRepository.register_optimiser(InterPackRepo)
3020
InterRepository.register_optimiser(InterOtherToRemote)
3021
InterRepository.register_optimiser(InterRemoteToOther)
3024
4050
class CopyConverter(object):
3025
4051
"""A repository conversion tool which just performs a copy of the content.
3027
4053
This is slow but quite reliable.
3175
4210
revision_graph[key] = tuple(parent for parent in parents if parent
3176
4211
in revision_graph)
3177
4212
return revision_graph
4215
class StreamSink(object):
4216
"""An object that can insert a stream into a repository.
4218
This interface handles the complexity of reserialising inventories and
4219
revisions from different formats, and allows unidirectional insertion into
4220
stacked repositories without looking for the missing basis parents
4224
def __init__(self, target_repo):
4225
self.target_repo = target_repo
4227
def insert_stream(self, stream, src_format, resume_tokens):
4228
"""Insert a stream's content into the target repository.
4230
:param src_format: a bzr repository format.
4232
:return: a list of resume tokens and an iterable of keys additional
4233
items required before the insertion can be completed.
4235
self.target_repo.lock_write()
4238
self.target_repo.resume_write_group(resume_tokens)
4241
self.target_repo.start_write_group()
4244
# locked_insert_stream performs a commit|suspend.
4245
return self._locked_insert_stream(stream, src_format, is_resume)
4247
self.target_repo.abort_write_group(suppress_errors=True)
4250
self.target_repo.unlock()
4252
def _locked_insert_stream(self, stream, src_format, is_resume):
4253
to_serializer = self.target_repo._format._serializer
4254
src_serializer = src_format._serializer
4256
if to_serializer == src_serializer:
4257
# If serializers match and the target is a pack repository, set the
4258
# write cache size on the new pack. This avoids poor performance
4259
# on transports where append is unbuffered (such as
4260
# RemoteTransport). This is safe to do because nothing should read
4261
# back from the target repository while a stream with matching
4262
# serialization is being inserted.
4263
# The exception is that a delta record from the source that should
4264
# be a fulltext may need to be expanded by the target (see
4265
# test_fetch_revisions_with_deltas_into_pack); but we take care to
4266
# explicitly flush any buffered writes first in that rare case.
4268
new_pack = self.target_repo._pack_collection._new_pack
4269
except AttributeError:
4270
# Not a pack repository
4273
new_pack.set_write_cache_size(1024*1024)
4274
for substream_type, substream in stream:
4275
if 'stream' in debug.debug_flags:
4276
mutter('inserting substream: %s', substream_type)
4277
if substream_type == 'texts':
4278
self.target_repo.texts.insert_record_stream(substream)
4279
elif substream_type == 'inventories':
4280
if src_serializer == to_serializer:
4281
self.target_repo.inventories.insert_record_stream(
4284
self._extract_and_insert_inventories(
4285
substream, src_serializer)
4286
elif substream_type == 'inventory-deltas':
4287
ui.ui_factory.warn_cross_format_fetch(src_format,
4288
self.target_repo._format)
4289
self._extract_and_insert_inventory_deltas(
4290
substream, src_serializer)
4291
elif substream_type == 'chk_bytes':
4292
# XXX: This doesn't support conversions, as it assumes the
4293
# conversion was done in the fetch code.
4294
self.target_repo.chk_bytes.insert_record_stream(substream)
4295
elif substream_type == 'revisions':
4296
# This may fallback to extract-and-insert more often than
4297
# required if the serializers are different only in terms of
4299
if src_serializer == to_serializer:
4300
self.target_repo.revisions.insert_record_stream(
4303
self._extract_and_insert_revisions(substream,
4305
elif substream_type == 'signatures':
4306
self.target_repo.signatures.insert_record_stream(substream)
4308
raise AssertionError('kaboom! %s' % (substream_type,))
4309
# Done inserting data, and the missing_keys calculations will try to
4310
# read back from the inserted data, so flush the writes to the new pack
4311
# (if this is pack format).
4312
if new_pack is not None:
4313
new_pack._write_data('', flush=True)
4314
# Find all the new revisions (including ones from resume_tokens)
4315
missing_keys = self.target_repo.get_missing_parent_inventories(
4316
check_for_missing_texts=is_resume)
4318
for prefix, versioned_file in (
4319
('texts', self.target_repo.texts),
4320
('inventories', self.target_repo.inventories),
4321
('revisions', self.target_repo.revisions),
4322
('signatures', self.target_repo.signatures),
4323
('chk_bytes', self.target_repo.chk_bytes),
4325
if versioned_file is None:
4327
missing_keys.update((prefix,) + key for key in
4328
versioned_file.get_missing_compression_parent_keys())
4329
except NotImplementedError:
4330
# cannot even attempt suspending, and missing would have failed
4331
# during stream insertion.
4332
missing_keys = set()
4335
# suspend the write group and tell the caller what we is
4336
# missing. We know we can suspend or else we would not have
4337
# entered this code path. (All repositories that can handle
4338
# missing keys can handle suspending a write group).
4339
write_group_tokens = self.target_repo.suspend_write_group()
4340
return write_group_tokens, missing_keys
4341
hint = self.target_repo.commit_write_group()
4342
if (to_serializer != src_serializer and
4343
self.target_repo._format.pack_compresses):
4344
self.target_repo.pack(hint=hint)
4347
def _extract_and_insert_inventory_deltas(self, substream, serializer):
4348
target_rich_root = self.target_repo._format.rich_root_data
4349
target_tree_refs = self.target_repo._format.supports_tree_reference
4350
for record in substream:
4351
# Insert the delta directly
4352
inventory_delta_bytes = record.get_bytes_as('fulltext')
4353
deserialiser = inventory_delta.InventoryDeltaDeserializer()
4355
parse_result = deserialiser.parse_text_bytes(
4356
inventory_delta_bytes)
4357
except inventory_delta.IncompatibleInventoryDelta, err:
4358
trace.mutter("Incompatible delta: %s", err.msg)
4359
raise errors.IncompatibleRevision(self.target_repo._format)
4360
basis_id, new_id, rich_root, tree_refs, inv_delta = parse_result
4361
revision_id = new_id
4362
parents = [key[0] for key in record.parents]
4363
self.target_repo.add_inventory_by_delta(
4364
basis_id, inv_delta, revision_id, parents)
4366
def _extract_and_insert_inventories(self, substream, serializer,
4368
"""Generate a new inventory versionedfile in target, converting data.
4370
The inventory is retrieved from the source, (deserializing it), and
4371
stored in the target (reserializing it in a different format).
4373
target_rich_root = self.target_repo._format.rich_root_data
4374
target_tree_refs = self.target_repo._format.supports_tree_reference
4375
for record in substream:
4376
# It's not a delta, so it must be a fulltext in the source
4377
# serializer's format.
4378
bytes = record.get_bytes_as('fulltext')
4379
revision_id = record.key[0]
4380
inv = serializer.read_inventory_from_string(bytes, revision_id)
4381
parents = [key[0] for key in record.parents]
4382
self.target_repo.add_inventory(revision_id, inv, parents)
4383
# No need to keep holding this full inv in memory when the rest of
4384
# the substream is likely to be all deltas.
4387
def _extract_and_insert_revisions(self, substream, serializer):
4388
for record in substream:
4389
bytes = record.get_bytes_as('fulltext')
4390
revision_id = record.key[0]
4391
rev = serializer.read_revision_from_string(bytes)
4392
if rev.revision_id != revision_id:
4393
raise AssertionError('wtf: %s != %s' % (rev, revision_id))
4394
self.target_repo.add_revision(revision_id, rev)
4397
if self.target_repo._format._fetch_reconcile:
4398
self.target_repo.reconcile()
4401
class StreamSource(object):
4402
"""A source of a stream for fetching between repositories."""
4404
def __init__(self, from_repository, to_format):
4405
"""Create a StreamSource streaming from from_repository."""
4406
self.from_repository = from_repository
4407
self.to_format = to_format
4409
def delta_on_metadata(self):
4410
"""Return True if delta's are permitted on metadata streams.
4412
That is on revisions and signatures.
4414
src_serializer = self.from_repository._format._serializer
4415
target_serializer = self.to_format._serializer
4416
return (self.to_format._fetch_uses_deltas and
4417
src_serializer == target_serializer)
4419
def _fetch_revision_texts(self, revs):
4420
# fetch signatures first and then the revision texts
4421
# may need to be a InterRevisionStore call here.
4422
from_sf = self.from_repository.signatures
4423
# A missing signature is just skipped.
4424
keys = [(rev_id,) for rev_id in revs]
4425
signatures = versionedfile.filter_absent(from_sf.get_record_stream(
4427
self.to_format._fetch_order,
4428
not self.to_format._fetch_uses_deltas))
4429
# If a revision has a delta, this is actually expanded inside the
4430
# insert_record_stream code now, which is an alternate fix for
4432
from_rf = self.from_repository.revisions
4433
revisions = from_rf.get_record_stream(
4435
self.to_format._fetch_order,
4436
not self.delta_on_metadata())
4437
return [('signatures', signatures), ('revisions', revisions)]
4439
def _generate_root_texts(self, revs):
4440
"""This will be called by get_stream between fetching weave texts and
4441
fetching the inventory weave.
4443
if self._rich_root_upgrade():
4445
return bzrlib.fetch.Inter1and2Helper(
4446
self.from_repository).generate_root_texts(revs)
4450
def get_stream(self, search):
4452
revs = search.get_keys()
4453
graph = self.from_repository.get_graph()
4454
revs = tsort.topo_sort(graph.get_parent_map(revs))
4455
data_to_fetch = self.from_repository.item_keys_introduced_by(revs)
4457
for knit_kind, file_id, revisions in data_to_fetch:
4458
if knit_kind != phase:
4460
# Make a new progress bar for this phase
4461
if knit_kind == "file":
4462
# Accumulate file texts
4463
text_keys.extend([(file_id, revision) for revision in
4465
elif knit_kind == "inventory":
4466
# Now copy the file texts.
4467
from_texts = self.from_repository.texts
4468
yield ('texts', from_texts.get_record_stream(
4469
text_keys, self.to_format._fetch_order,
4470
not self.to_format._fetch_uses_deltas))
4471
# Cause an error if a text occurs after we have done the
4474
# Before we process the inventory we generate the root
4475
# texts (if necessary) so that the inventories references
4477
for _ in self._generate_root_texts(revs):
4479
# we fetch only the referenced inventories because we do not
4480
# know for unselected inventories whether all their required
4481
# texts are present in the other repository - it could be
4483
for info in self._get_inventory_stream(revs):
4485
elif knit_kind == "signatures":
4486
# Nothing to do here; this will be taken care of when
4487
# _fetch_revision_texts happens.
4489
elif knit_kind == "revisions":
4490
for record in self._fetch_revision_texts(revs):
4493
raise AssertionError("Unknown knit kind %r" % knit_kind)
4495
def get_stream_for_missing_keys(self, missing_keys):
4496
# missing keys can only occur when we are byte copying and not
4497
# translating (because translation means we don't send
4498
# unreconstructable deltas ever).
4500
keys['texts'] = set()
4501
keys['revisions'] = set()
4502
keys['inventories'] = set()
4503
keys['chk_bytes'] = set()
4504
keys['signatures'] = set()
4505
for key in missing_keys:
4506
keys[key[0]].add(key[1:])
4507
if len(keys['revisions']):
4508
# If we allowed copying revisions at this point, we could end up
4509
# copying a revision without copying its required texts: a
4510
# violation of the requirements for repository integrity.
4511
raise AssertionError(
4512
'cannot copy revisions to fill in missing deltas %s' % (
4513
keys['revisions'],))
4514
for substream_kind, keys in keys.iteritems():
4515
vf = getattr(self.from_repository, substream_kind)
4516
if vf is None and keys:
4517
raise AssertionError(
4518
"cannot fill in keys for a versioned file we don't"
4519
" have: %s needs %s" % (substream_kind, keys))
4521
# No need to stream something we don't have
4523
if substream_kind == 'inventories':
4524
# Some missing keys are genuinely ghosts, filter those out.
4525
present = self.from_repository.inventories.get_parent_map(keys)
4526
revs = [key[0] for key in present]
4527
# Get the inventory stream more-or-less as we do for the
4528
# original stream; there's no reason to assume that records
4529
# direct from the source will be suitable for the sink. (Think
4530
# e.g. 2a -> 1.9-rich-root).
4531
for info in self._get_inventory_stream(revs, missing=True):
4535
# Ask for full texts always so that we don't need more round trips
4536
# after this stream.
4537
# Some of the missing keys are genuinely ghosts, so filter absent
4538
# records. The Sink is responsible for doing another check to
4539
# ensure that ghosts don't introduce missing data for future
4541
stream = versionedfile.filter_absent(vf.get_record_stream(keys,
4542
self.to_format._fetch_order, True))
4543
yield substream_kind, stream
4545
def inventory_fetch_order(self):
4546
if self._rich_root_upgrade():
4547
return 'topological'
4549
return self.to_format._fetch_order
4551
def _rich_root_upgrade(self):
4552
return (not self.from_repository._format.rich_root_data and
4553
self.to_format.rich_root_data)
4555
def _get_inventory_stream(self, revision_ids, missing=False):
4556
from_format = self.from_repository._format
4557
if (from_format.supports_chks and self.to_format.supports_chks and
4558
from_format.network_name() == self.to_format.network_name()):
4559
raise AssertionError(
4560
"this case should be handled by GroupCHKStreamSource")
4561
elif 'forceinvdeltas' in debug.debug_flags:
4562
return self._get_convertable_inventory_stream(revision_ids,
4563
delta_versus_null=missing)
4564
elif from_format.network_name() == self.to_format.network_name():
4566
return self._get_simple_inventory_stream(revision_ids,
4568
elif (not from_format.supports_chks and not self.to_format.supports_chks
4569
and from_format._serializer == self.to_format._serializer):
4570
# Essentially the same format.
4571
return self._get_simple_inventory_stream(revision_ids,
4574
# Any time we switch serializations, we want to use an
4575
# inventory-delta based approach.
4576
return self._get_convertable_inventory_stream(revision_ids,
4577
delta_versus_null=missing)
4579
def _get_simple_inventory_stream(self, revision_ids, missing=False):
4580
# NB: This currently reopens the inventory weave in source;
4581
# using a single stream interface instead would avoid this.
4582
from_weave = self.from_repository.inventories
4584
delta_closure = True
4586
delta_closure = not self.delta_on_metadata()
4587
yield ('inventories', from_weave.get_record_stream(
4588
[(rev_id,) for rev_id in revision_ids],
4589
self.inventory_fetch_order(), delta_closure))
4591
def _get_convertable_inventory_stream(self, revision_ids,
4592
delta_versus_null=False):
4593
# The two formats are sufficiently different that there is no fast
4594
# path, so we need to send just inventorydeltas, which any
4595
# sufficiently modern client can insert into any repository.
4596
# The StreamSink code expects to be able to
4597
# convert on the target, so we need to put bytes-on-the-wire that can
4598
# be converted. That means inventory deltas (if the remote is <1.19,
4599
# RemoteStreamSink will fallback to VFS to insert the deltas).
4600
yield ('inventory-deltas',
4601
self._stream_invs_as_deltas(revision_ids,
4602
delta_versus_null=delta_versus_null))
4604
def _stream_invs_as_deltas(self, revision_ids, delta_versus_null=False):
4605
"""Return a stream of inventory-deltas for the given rev ids.
4607
:param revision_ids: The list of inventories to transmit
4608
:param delta_versus_null: Don't try to find a minimal delta for this
4609
entry, instead compute the delta versus the NULL_REVISION. This
4610
effectively streams a complete inventory. Used for stuff like
4611
filling in missing parents, etc.
4613
from_repo = self.from_repository
4614
revision_keys = [(rev_id,) for rev_id in revision_ids]
4615
parent_map = from_repo.inventories.get_parent_map(revision_keys)
4616
# XXX: possibly repos could implement a more efficient iter_inv_deltas
4618
inventories = self.from_repository.iter_inventories(
4619
revision_ids, 'topological')
4620
format = from_repo._format
4621
invs_sent_so_far = set([_mod_revision.NULL_REVISION])
4622
inventory_cache = lru_cache.LRUCache(50)
4623
null_inventory = from_repo.revision_tree(
4624
_mod_revision.NULL_REVISION).inventory
4625
# XXX: ideally the rich-root/tree-refs flags would be per-revision, not
4626
# per-repo (e.g. streaming a non-rich-root revision out of a rich-root
4627
# repo back into a non-rich-root repo ought to be allowed)
4628
serializer = inventory_delta.InventoryDeltaSerializer(
4629
versioned_root=format.rich_root_data,
4630
tree_references=format.supports_tree_reference)
4631
for inv in inventories:
4632
key = (inv.revision_id,)
4633
parent_keys = parent_map.get(key, ())
4635
if not delta_versus_null and parent_keys:
4636
# The caller did not ask for complete inventories and we have
4637
# some parents that we can delta against. Make a delta against
4638
# each parent so that we can find the smallest.
4639
parent_ids = [parent_key[0] for parent_key in parent_keys]
4640
for parent_id in parent_ids:
4641
if parent_id not in invs_sent_so_far:
4642
# We don't know that the remote side has this basis, so
4645
if parent_id == _mod_revision.NULL_REVISION:
4646
parent_inv = null_inventory
4648
parent_inv = inventory_cache.get(parent_id, None)
4649
if parent_inv is None:
4650
parent_inv = from_repo.get_inventory(parent_id)
4651
candidate_delta = inv._make_delta(parent_inv)
4652
if (delta is None or
4653
len(delta) > len(candidate_delta)):
4654
delta = candidate_delta
4655
basis_id = parent_id
4657
# Either none of the parents ended up being suitable, or we
4658
# were asked to delta against NULL
4659
basis_id = _mod_revision.NULL_REVISION
4660
delta = inv._make_delta(null_inventory)
4661
invs_sent_so_far.add(inv.revision_id)
4662
inventory_cache[inv.revision_id] = inv
4663
delta_serialized = ''.join(
4664
serializer.delta_to_lines(basis_id, key[-1], delta))
4665
yield versionedfile.FulltextContentFactory(
4666
key, parent_keys, None, delta_serialized)
4669
def _iter_for_revno(repo, partial_history_cache, stop_index=None,
4670
stop_revision=None):
4671
"""Extend the partial history to include a given index
4673
If a stop_index is supplied, stop when that index has been reached.
4674
If a stop_revision is supplied, stop when that revision is
4675
encountered. Otherwise, stop when the beginning of history is
4678
:param stop_index: The index which should be present. When it is
4679
present, history extension will stop.
4680
:param stop_revision: The revision id which should be present. When
4681
it is encountered, history extension will stop.
4683
start_revision = partial_history_cache[-1]
4684
iterator = repo.iter_reverse_revision_history(start_revision)
4686
#skip the last revision in the list
4689
if (stop_index is not None and
4690
len(partial_history_cache) > stop_index):
4692
if partial_history_cache[-1] == stop_revision:
4694
revision_id = iterator.next()
4695
partial_history_cache.append(revision_id)
4696
except StopIteration: