393
202
commit to be valid, deletes against the basis MUST be recorded via
394
203
builder.record_delete().
396
self._recording_deletes = True
398
basis_id = self.parents[0]
400
basis_id = _mod_revision.NULL_REVISION
401
self.basis_delta_revision = basis_id
403
def record_entry_contents(self, ie, parent_invs, path, tree,
405
"""Record the content of ie from tree into the commit if needed.
407
Side effect: sets ie.revision when unchanged
409
:param ie: An inventory entry present in the commit.
410
:param parent_invs: The inventories of the parent revisions of the
412
:param path: The path the entry is at in the tree.
413
:param tree: The tree which contains this entry and should be used to
415
:param content_summary: Summary data from the tree about the paths
416
content - stat, length, exec, sha/link target. This is only
417
accessed when the entry has a revision of None - that is when it is
418
a candidate to commit.
419
:return: A tuple (change_delta, version_recorded, fs_hash).
420
change_delta is an inventory_delta change for this entry against
421
the basis tree of the commit, or None if no change occured against
423
version_recorded is True if a new version of the entry has been
424
recorded. For instance, committing a merge where a file was only
425
changed on the other side will return (delta, False).
426
fs_hash is either None, or the hash details for the path (currently
427
a tuple of the contents sha1 and the statvalue returned by
428
tree.get_file_with_stat()).
430
if self.new_inventory.root is None:
431
if ie.parent_id is not None:
432
raise errors.RootMissing()
433
self._check_root(ie, parent_invs, tree)
434
if ie.revision is None:
435
kind = content_summary[0]
437
# ie is carried over from a prior commit
439
# XXX: repository specific check for nested tree support goes here - if
440
# the repo doesn't want nested trees we skip it ?
441
if (kind == 'tree-reference' and
442
not self.repository._format.supports_tree_reference):
443
# mismatch between commit builder logic and repository:
444
# this needs the entry creation pushed down into the builder.
445
raise NotImplementedError('Missing repository subtree support.')
446
self.new_inventory.add(ie)
448
# TODO: slow, take it out of the inner loop.
450
basis_inv = parent_invs[0]
452
basis_inv = Inventory(root_id=None)
454
# ie.revision is always None if the InventoryEntry is considered
455
# for committing. We may record the previous parents revision if the
456
# content is actually unchanged against a sole head.
457
if ie.revision is not None:
458
if not self._versioned_root and path == '':
459
# repositories that do not version the root set the root's
460
# revision to the new commit even when no change occurs (more
461
# specifically, they do not record a revision on the root; and
462
# the rev id is assigned to the root during deserialisation -
463
# this masks when a change may have occurred against the basis.
464
# To match this we always issue a delta, because the revision
465
# of the root will always be changing.
466
if ie.file_id in basis_inv:
467
delta = (basis_inv.id2path(ie.file_id), path,
471
delta = (None, path, ie.file_id, ie)
472
self._basis_delta.append(delta)
473
return delta, False, None
475
# we don't need to commit this, because the caller already
476
# determined that an existing revision of this file is
477
# appropriate. If it's not being considered for committing then
478
# it and all its parents to the root must be unaltered so
479
# no-change against the basis.
480
if ie.revision == self._new_revision_id:
481
raise AssertionError("Impossible situation, a skipped "
482
"inventory entry (%r) claims to be modified in this "
483
"commit (%r).", (ie, self._new_revision_id))
484
return None, False, None
485
# XXX: Friction: parent_candidates should return a list not a dict
486
# so that we don't have to walk the inventories again.
487
parent_candiate_entries = ie.parent_candidates(parent_invs)
488
head_set = self._heads(ie.file_id, parent_candiate_entries.keys())
490
for inv in parent_invs:
491
if ie.file_id in inv:
492
old_rev = inv[ie.file_id].revision
493
if old_rev in head_set:
494
heads.append(inv[ie.file_id].revision)
495
head_set.remove(inv[ie.file_id].revision)
498
# now we check to see if we need to write a new record to the
500
# We write a new entry unless there is one head to the ancestors, and
501
# the kind-derived content is unchanged.
503
# Cheapest check first: no ancestors, or more the one head in the
504
# ancestors, we write a new node.
508
# There is a single head, look it up for comparison
509
parent_entry = parent_candiate_entries[heads[0]]
510
# if the non-content specific data has changed, we'll be writing a
512
if (parent_entry.parent_id != ie.parent_id or
513
parent_entry.name != ie.name):
515
# now we need to do content specific checks:
517
# if the kind changed the content obviously has
518
if kind != parent_entry.kind:
520
# Stat cache fingerprint feedback for the caller - None as we usually
521
# don't generate one.
524
if content_summary[2] is None:
525
raise ValueError("Files must not have executable = None")
527
# We can't trust a check of the file length because of content
529
if (# if the exec bit has changed we have to store:
530
parent_entry.executable != content_summary[2]):
532
elif parent_entry.text_sha1 == content_summary[3]:
533
# all meta and content is unchanged (using a hash cache
534
# hit to check the sha)
535
ie.revision = parent_entry.revision
536
ie.text_size = parent_entry.text_size
537
ie.text_sha1 = parent_entry.text_sha1
538
ie.executable = parent_entry.executable
539
return self._get_delta(ie, basis_inv, path), False, None
541
# Either there is only a hash change(no hash cache entry,
542
# or same size content change), or there is no change on
544
# Provide the parent's hash to the store layer, so that the
545
# content is unchanged we will not store a new node.
546
nostore_sha = parent_entry.text_sha1
548
# We want to record a new node regardless of the presence or
549
# absence of a content change in the file.
551
ie.executable = content_summary[2]
552
file_obj, stat_value = tree.get_file_with_stat(ie.file_id, path)
554
text = file_obj.read()
558
ie.text_sha1, ie.text_size = self._add_text_to_weave(
559
ie.file_id, text, heads, nostore_sha)
560
# Let the caller know we generated a stat fingerprint.
561
fingerprint = (ie.text_sha1, stat_value)
562
except errors.ExistingContent:
563
# Turns out that the file content was unchanged, and we were
564
# only going to store a new node if it was changed. Carry over
566
ie.revision = parent_entry.revision
567
ie.text_size = parent_entry.text_size
568
ie.text_sha1 = parent_entry.text_sha1
569
ie.executable = parent_entry.executable
570
return self._get_delta(ie, basis_inv, path), False, None
571
elif kind == 'directory':
573
# all data is meta here, nothing specific to directory, so
575
ie.revision = parent_entry.revision
576
return self._get_delta(ie, basis_inv, path), False, None
577
self._add_text_to_weave(ie.file_id, '', heads, None)
578
elif kind == 'symlink':
579
current_link_target = content_summary[3]
581
# symlink target is not generic metadata, check if it has
583
if current_link_target != parent_entry.symlink_target:
586
# unchanged, carry over.
587
ie.revision = parent_entry.revision
588
ie.symlink_target = parent_entry.symlink_target
589
return self._get_delta(ie, basis_inv, path), False, None
590
ie.symlink_target = current_link_target
591
self._add_text_to_weave(ie.file_id, '', heads, None)
592
elif kind == 'tree-reference':
594
if content_summary[3] != parent_entry.reference_revision:
597
# unchanged, carry over.
598
ie.reference_revision = parent_entry.reference_revision
599
ie.revision = parent_entry.revision
600
return self._get_delta(ie, basis_inv, path), False, None
601
ie.reference_revision = content_summary[3]
602
if ie.reference_revision is None:
603
raise AssertionError("invalid content_summary for nested tree: %r"
604
% (content_summary,))
605
self._add_text_to_weave(ie.file_id, '', heads, None)
607
raise NotImplementedError('unknown kind')
608
ie.revision = self._new_revision_id
609
self._any_changes = True
610
return self._get_delta(ie, basis_inv, path), True, fingerprint
612
def record_iter_changes(self, tree, basis_revision_id, iter_changes,
613
_entry_factory=entry_factory):
205
raise NotImplementedError(self.will_record_deletes)
207
def record_iter_changes(self, tree, basis_revision_id, iter_changes):
614
208
"""Record a new tree via iter_changes.
616
210
:param tree: The tree to obtain text contents from for changed objects.
621
215
to basis_revision_id. The iterator must not include any items with
622
216
a current kind of None - missing items must be either filtered out
623
217
or errored-on beefore record_iter_changes sees the item.
624
:param _entry_factory: Private method to bind entry_factory locally for
626
218
:return: A generator of (file_id, relpath, fs_hash) tuples for use with
627
219
tree._observed_sha1.
629
# Create an inventory delta based on deltas between all the parents and
630
# deltas between all the parent inventories. We use inventory delta's
631
# between the inventory objects because iter_changes masks
632
# last-changed-field only changes.
634
# file_id -> change map, change is fileid, paths, changed, versioneds,
635
# parents, names, kinds, executables
637
# {file_id -> revision_id -> inventory entry, for entries in parent
638
# trees that are not parents[0]
642
revtrees = list(self.repository.revision_trees(self.parents))
643
except errors.NoSuchRevision:
644
# one or more ghosts, slow path.
646
for revision_id in self.parents:
648
revtrees.append(self.repository.revision_tree(revision_id))
649
except errors.NoSuchRevision:
651
basis_revision_id = _mod_revision.NULL_REVISION
653
revtrees.append(self.repository.revision_tree(
654
_mod_revision.NULL_REVISION))
655
# The basis inventory from a repository
657
basis_inv = revtrees[0].inventory
659
basis_inv = self.repository.revision_tree(
660
_mod_revision.NULL_REVISION).inventory
661
if len(self.parents) > 0:
662
if basis_revision_id != self.parents[0] and not ghost_basis:
664
"arbitrary basis parents not yet supported with merges")
665
for revtree in revtrees[1:]:
666
for change in revtree.inventory._make_delta(basis_inv):
667
if change[1] is None:
668
# Not present in this parent.
670
if change[2] not in merged_ids:
671
if change[0] is not None:
672
basis_entry = basis_inv[change[2]]
673
merged_ids[change[2]] = [
675
basis_entry.revision,
678
parent_entries[change[2]] = {
680
basis_entry.revision:basis_entry,
682
change[3].revision:change[3],
685
merged_ids[change[2]] = [change[3].revision]
686
parent_entries[change[2]] = {change[3].revision:change[3]}
688
merged_ids[change[2]].append(change[3].revision)
689
parent_entries[change[2]][change[3].revision] = change[3]
692
# Setup the changes from the tree:
693
# changes maps file_id -> (change, [parent revision_ids])
695
for change in iter_changes:
696
# This probably looks up in basis_inv way to much.
697
if change[1][0] is not None:
698
head_candidate = [basis_inv[change[0]].revision]
701
changes[change[0]] = change, merged_ids.get(change[0],
703
unchanged_merged = set(merged_ids) - set(changes)
704
# Extend the changes dict with synthetic changes to record merges of
706
for file_id in unchanged_merged:
707
# Record a merged version of these items that did not change vs the
708
# basis. This can be either identical parallel changes, or a revert
709
# of a specific file after a merge. The recorded content will be
710
# that of the current tree (which is the same as the basis), but
711
# the per-file graph will reflect a merge.
712
# NB:XXX: We are reconstructing path information we had, this
713
# should be preserved instead.
714
# inv delta change: (file_id, (path_in_source, path_in_target),
715
# changed_content, versioned, parent, name, kind,
718
basis_entry = basis_inv[file_id]
719
except errors.NoSuchId:
720
# a change from basis->some_parents but file_id isn't in basis
721
# so was new in the merge, which means it must have changed
722
# from basis -> current, and as it hasn't the add was reverted
723
# by the user. So we discard this change.
727
(basis_inv.id2path(file_id), tree.id2path(file_id)),
729
(basis_entry.parent_id, basis_entry.parent_id),
730
(basis_entry.name, basis_entry.name),
731
(basis_entry.kind, basis_entry.kind),
732
(basis_entry.executable, basis_entry.executable))
733
changes[file_id] = (change, merged_ids[file_id])
734
# changes contains tuples with the change and a set of inventory
735
# candidates for the file.
737
# old_path, new_path, file_id, new_inventory_entry
738
seen_root = False # Is the root in the basis delta?
739
inv_delta = self._basis_delta
740
modified_rev = self._new_revision_id
741
for change, head_candidates in changes.values():
742
if change[3][1]: # versioned in target.
743
# Several things may be happening here:
744
# We may have a fork in the per-file graph
745
# - record a change with the content from tree
746
# We may have a change against < all trees
747
# - carry over the tree that hasn't changed
748
# We may have a change against all trees
749
# - record the change with the content from tree
752
entry = _entry_factory[kind](file_id, change[5][1],
754
head_set = self._heads(change[0], set(head_candidates))
757
for head_candidate in head_candidates:
758
if head_candidate in head_set:
759
heads.append(head_candidate)
760
head_set.remove(head_candidate)
763
# Could be a carry-over situation:
764
parent_entry_revs = parent_entries.get(file_id, None)
765
if parent_entry_revs:
766
parent_entry = parent_entry_revs.get(heads[0], None)
769
if parent_entry is None:
770
# The parent iter_changes was called against is the one
771
# that is the per-file head, so any change is relevant
772
# iter_changes is valid.
773
carry_over_possible = False
775
# could be a carry over situation
776
# A change against the basis may just indicate a merge,
777
# we need to check the content against the source of the
778
# merge to determine if it was changed after the merge
780
if (parent_entry.kind != entry.kind or
781
parent_entry.parent_id != entry.parent_id or
782
parent_entry.name != entry.name):
783
# Metadata common to all entries has changed
784
# against per-file parent
785
carry_over_possible = False
787
carry_over_possible = True
788
# per-type checks for changes against the parent_entry
791
# Cannot be a carry-over situation
792
carry_over_possible = False
793
# Populate the entry in the delta
795
# XXX: There is still a small race here: If someone reverts the content of a file
796
# after iter_changes examines and decides it has changed,
797
# we will unconditionally record a new version even if some
798
# other process reverts it while commit is running (with
799
# the revert happening after iter_changes did its
802
entry.executable = True
804
entry.executable = False
805
if (carry_over_possible and
806
parent_entry.executable == entry.executable):
807
# Check the file length, content hash after reading
809
nostore_sha = parent_entry.text_sha1
812
file_obj, stat_value = tree.get_file_with_stat(file_id, change[1][1])
814
text = file_obj.read()
818
entry.text_sha1, entry.text_size = self._add_text_to_weave(
819
file_id, text, heads, nostore_sha)
820
yield file_id, change[1][1], (entry.text_sha1, stat_value)
821
except errors.ExistingContent:
822
# No content change against a carry_over parent
823
# Perhaps this should also yield a fs hash update?
825
entry.text_size = parent_entry.text_size
826
entry.text_sha1 = parent_entry.text_sha1
827
elif kind == 'symlink':
829
entry.symlink_target = tree.get_symlink_target(file_id)
830
if (carry_over_possible and
831
parent_entry.symlink_target == entry.symlink_target):
834
self._add_text_to_weave(change[0], '', heads, None)
835
elif kind == 'directory':
836
if carry_over_possible:
839
# Nothing to set on the entry.
840
# XXX: split into the Root and nonRoot versions.
841
if change[1][1] != '' or self.repository.supports_rich_root():
842
self._add_text_to_weave(change[0], '', heads, None)
843
elif kind == 'tree-reference':
844
if not self.repository._format.supports_tree_reference:
845
# This isn't quite sane as an error, but we shouldn't
846
# ever see this code path in practice: tree's don't
847
# permit references when the repo doesn't support tree
849
raise errors.UnsupportedOperation(tree.add_reference,
851
reference_revision = tree.get_reference_revision(change[0])
852
entry.reference_revision = reference_revision
853
if (carry_over_possible and
854
parent_entry.reference_revision == reference_revision):
857
self._add_text_to_weave(change[0], '', heads, None)
859
raise AssertionError('unknown kind %r' % kind)
861
entry.revision = modified_rev
863
entry.revision = parent_entry.revision
866
new_path = change[1][1]
867
inv_delta.append((change[1][0], new_path, change[0], entry))
870
self.new_inventory = None
872
# This should perhaps be guarded by a check that the basis we
873
# commit against is the basis for the commit and if not do a delta
875
self._any_changes = True
877
# housekeeping root entry changes do not affect no-change commits.
878
self._require_root_change(tree)
879
self.basis_delta_revision = basis_revision_id
881
def _add_text_to_weave(self, file_id, new_text, parents, nostore_sha):
882
parent_keys = tuple([(file_id, parent) for parent in parents])
883
return self.repository.texts._add_text(
884
(file_id, self._new_revision_id), parent_keys, new_text,
885
nostore_sha=nostore_sha, random_id=self.random_revid)[0:2]
888
class RootCommitBuilder(CommitBuilder):
889
"""This commitbuilder actually records the root id"""
891
# the root entry gets versioned properly by this builder.
892
_versioned_root = True
894
def _check_root(self, ie, parent_invs, tree):
895
"""Helper for record_entry_contents.
897
:param ie: An entry being added.
898
:param parent_invs: The inventories of the parent revisions of the
900
:param tree: The tree that is being committed.
903
def _require_root_change(self, tree):
904
"""Enforce an appropriate root object change.
906
This is called once when record_iter_changes is called, if and only if
907
the root was not in the delta calculated by record_iter_changes.
909
:param tree: The tree which is being committed.
911
# versioned roots do not change unless the tree found a change.
221
raise NotImplementedError(self.record_iter_changes)
914
224
class RepositoryWriteLockResult(LogicalLockResult):
1069
313
return InterRepository._assert_same_model(self, repository)
1071
def add_inventory(self, revision_id, inv, parents):
1072
"""Add the inventory inv to the repository as revision_id.
1074
:param parents: The revision ids of the parents that revision_id
1075
is known to have and are in the repository already.
1077
:returns: The validator(which is a sha1 digest, though what is sha'd is
1078
repository format specific) of the serialized inventory.
1080
if not self.is_in_write_group():
1081
raise AssertionError("%r not in write group" % (self,))
1082
_mod_revision.check_not_reserved_id(revision_id)
1083
if not (inv.revision_id is None or inv.revision_id == revision_id):
1084
raise AssertionError(
1085
"Mismatch between inventory revision"
1086
" id and insertion revid (%r, %r)"
1087
% (inv.revision_id, revision_id))
1088
if inv.root is None:
1089
raise errors.RootMissing()
1090
return self._add_inventory_checked(revision_id, inv, parents)
1092
def _add_inventory_checked(self, revision_id, inv, parents):
1093
"""Add inv to the repository after checking the inputs.
1095
This function can be overridden to allow different inventory styles.
1097
:seealso: add_inventory, for the contract.
1099
inv_lines = self._serializer.write_inventory_to_lines(inv)
1100
return self._inventory_add_lines(revision_id, parents,
1101
inv_lines, check_content=False)
1103
def add_inventory_by_delta(self, basis_revision_id, delta, new_revision_id,
1104
parents, basis_inv=None, propagate_caches=False):
1105
"""Add a new inventory expressed as a delta against another revision.
1107
See the inventory developers documentation for the theory behind
1110
:param basis_revision_id: The inventory id the delta was created
1111
against. (This does not have to be a direct parent.)
1112
:param delta: The inventory delta (see Inventory.apply_delta for
1114
:param new_revision_id: The revision id that the inventory is being
1116
:param parents: The revision ids of the parents that revision_id is
1117
known to have and are in the repository already. These are supplied
1118
for repositories that depend on the inventory graph for revision
1119
graph access, as well as for those that pun ancestry with delta
1121
:param basis_inv: The basis inventory if it is already known,
1123
:param propagate_caches: If True, the caches for this inventory are
1124
copied to and updated for the result if possible.
1126
:returns: (validator, new_inv)
1127
The validator(which is a sha1 digest, though what is sha'd is
1128
repository format specific) of the serialized inventory, and the
1129
resulting inventory.
1131
if not self.is_in_write_group():
1132
raise AssertionError("%r not in write group" % (self,))
1133
_mod_revision.check_not_reserved_id(new_revision_id)
1134
basis_tree = self.revision_tree(basis_revision_id)
1135
basis_tree.lock_read()
1137
# Note that this mutates the inventory of basis_tree, which not all
1138
# inventory implementations may support: A better idiom would be to
1139
# return a new inventory, but as there is no revision tree cache in
1140
# repository this is safe for now - RBC 20081013
1141
if basis_inv is None:
1142
basis_inv = basis_tree.inventory
1143
basis_inv.apply_delta(delta)
1144
basis_inv.revision_id = new_revision_id
1145
return (self.add_inventory(new_revision_id, basis_inv, parents),
1150
def _inventory_add_lines(self, revision_id, parents, lines,
1151
check_content=True):
1152
"""Store lines in inv_vf and return the sha1 of the inventory."""
1153
parents = [(parent,) for parent in parents]
1154
result = self.inventories.add_lines((revision_id,), parents, lines,
1155
check_content=check_content)[0]
1156
self.inventories._access.flush()
1159
def add_revision(self, revision_id, rev, inv=None, config=None):
1160
"""Add rev to the revision store as revision_id.
1162
:param revision_id: the revision id to use.
1163
:param rev: The revision object.
1164
:param inv: The inventory for the revision. if None, it will be looked
1165
up in the inventory storer
1166
:param config: If None no digital signature will be created.
1167
If supplied its signature_needed method will be used
1168
to determine if a signature should be made.
1170
# TODO: jam 20070210 Shouldn't we check rev.revision_id and
1172
_mod_revision.check_not_reserved_id(revision_id)
1173
if config is not None and config.signature_needed():
1175
inv = self.get_inventory(revision_id)
1176
plaintext = Testament(rev, inv).as_short_text()
1177
self.store_revision_signature(
1178
gpg.GPGStrategy(config), plaintext, revision_id)
1179
# check inventory present
1180
if not self.inventories.get_parent_map([(revision_id,)]):
1182
raise errors.WeaveRevisionNotPresent(revision_id,
1185
# yes, this is not suitable for adding with ghosts.
1186
rev.inventory_sha1 = self.add_inventory(revision_id, inv,
1189
key = (revision_id,)
1190
rev.inventory_sha1 = self.inventories.get_sha1s([key])[key]
1191
self._add_revision(rev)
1193
def _add_revision(self, revision):
1194
text = self._serializer.write_revision_to_string(revision)
1195
key = (revision.revision_id,)
1196
parents = tuple((parent,) for parent in revision.parent_ids)
1197
self.revisions.add_lines(key, parents, osutils.split_lines(text))
1199
315
def all_revision_ids(self):
1200
316
"""Returns a list of all the revision ids in the repository.
1236
352
# The old API returned a list, should this actually be a set?
1237
353
return parent_map.keys()
1239
def _check_inventories(self, checker):
1240
"""Check the inventories found from the revision scan.
1242
This is responsible for verifying the sha1 of inventories and
1243
creating a pending_keys set that covers data referenced by inventories.
1245
bar = ui.ui_factory.nested_progress_bar()
1247
self._do_check_inventories(checker, bar)
1251
def _do_check_inventories(self, checker, bar):
1252
"""Helper for _check_inventories."""
1254
keys = {'chk_bytes':set(), 'inventories':set(), 'texts':set()}
1255
kinds = ['chk_bytes', 'texts']
1256
count = len(checker.pending_keys)
1257
bar.update("inventories", 0, 2)
1258
current_keys = checker.pending_keys
1259
checker.pending_keys = {}
1260
# Accumulate current checks.
1261
for key in current_keys:
1262
if key[0] != 'inventories' and key[0] not in kinds:
1263
checker._report_items.append('unknown key type %r' % (key,))
1264
keys[key[0]].add(key[1:])
1265
if keys['inventories']:
1266
# NB: output order *should* be roughly sorted - topo or
1267
# inverse topo depending on repository - either way decent
1268
# to just delta against. However, pre-CHK formats didn't
1269
# try to optimise inventory layout on disk. As such the
1270
# pre-CHK code path does not use inventory deltas.
1272
for record in self.inventories.check(keys=keys['inventories']):
1273
if record.storage_kind == 'absent':
1274
checker._report_items.append(
1275
'Missing inventory {%s}' % (record.key,))
1277
last_object = self._check_record('inventories', record,
1278
checker, last_object,
1279
current_keys[('inventories',) + record.key])
1280
del keys['inventories']
1283
bar.update("texts", 1)
1284
while (checker.pending_keys or keys['chk_bytes']
1286
# Something to check.
1287
current_keys = checker.pending_keys
1288
checker.pending_keys = {}
1289
# Accumulate current checks.
1290
for key in current_keys:
1291
if key[0] not in kinds:
1292
checker._report_items.append('unknown key type %r' % (key,))
1293
keys[key[0]].add(key[1:])
1294
# Check the outermost kind only - inventories || chk_bytes || texts
1298
for record in getattr(self, kind).check(keys=keys[kind]):
1299
if record.storage_kind == 'absent':
1300
checker._report_items.append(
1301
'Missing %s {%s}' % (kind, record.key,))
1303
last_object = self._check_record(kind, record,
1304
checker, last_object, current_keys[(kind,) + record.key])
1308
def _check_record(self, kind, record, checker, last_object, item_data):
1309
"""Check a single text from this repository."""
1310
if kind == 'inventories':
1311
rev_id = record.key[0]
1312
inv = self._deserialise_inventory(rev_id,
1313
record.get_bytes_as('fulltext'))
1314
if last_object is not None:
1315
delta = inv._make_delta(last_object)
1316
for old_path, path, file_id, ie in delta:
1319
ie.check(checker, rev_id, inv)
1321
for path, ie in inv.iter_entries():
1322
ie.check(checker, rev_id, inv)
1323
if self._format.fast_deltas:
1325
elif kind == 'chk_bytes':
1326
# No code written to check chk_bytes for this repo format.
1327
checker._report_items.append(
1328
'unsupported key type chk_bytes for %s' % (record.key,))
1329
elif kind == 'texts':
1330
self._check_text(record, checker, item_data)
1332
checker._report_items.append(
1333
'unknown key type %s for %s' % (kind, record.key))
1335
def _check_text(self, record, checker, item_data):
1336
"""Check a single text."""
1337
# Check it is extractable.
1338
# TODO: check length.
1339
if record.storage_kind == 'chunked':
1340
chunks = record.get_bytes_as(record.storage_kind)
1341
sha1 = osutils.sha_strings(chunks)
1342
length = sum(map(len, chunks))
1344
content = record.get_bytes_as('fulltext')
1345
sha1 = osutils.sha_string(content)
1346
length = len(content)
1347
if item_data and sha1 != item_data[1]:
1348
checker._report_items.append(
1349
'sha1 mismatch: %s has sha1 %s expected %s referenced by %s' %
1350
(record.key, sha1, item_data[1], item_data[2]))
1353
356
def create(a_bzrdir):
1354
357
"""Construct the current default format repository in a_bzrdir."""
2037
921
signature = gpg_strategy.sign(plaintext)
2038
922
self.add_signature_text(revision_id, signature)
2041
924
def add_signature_text(self, revision_id, signature):
2042
self.signatures.add_lines((revision_id,), (),
2043
osutils.split_lines(signature))
2045
def find_text_key_references(self):
2046
"""Find the text key references within the repository.
2048
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
2049
to whether they were referred to by the inventory of the
2050
revision_id that they contain. The inventory texts from all present
2051
revision ids are assessed to generate this report.
2053
revision_keys = self.revisions.keys()
2054
w = self.inventories
2055
pb = ui.ui_factory.nested_progress_bar()
2057
return self._find_text_key_references_from_xml_inventory_lines(
2058
w.iter_lines_added_or_present_in_keys(revision_keys, pb=pb))
2062
def _find_text_key_references_from_xml_inventory_lines(self,
2064
"""Core routine for extracting references to texts from inventories.
2066
This performs the translation of xml lines to revision ids.
2068
:param line_iterator: An iterator of lines, origin_version_id
2069
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
2070
to whether they were referred to by the inventory of the
2071
revision_id that they contain. Note that if that revision_id was
2072
not part of the line_iterator's output then False will be given -
2073
even though it may actually refer to that key.
2075
if not self._serializer.support_altered_by_hack:
2076
raise AssertionError(
2077
"_find_text_key_references_from_xml_inventory_lines only "
2078
"supported for branches which store inventory as unnested xml"
2079
", not on %r" % self)
2082
# this code needs to read every new line in every inventory for the
2083
# inventories [revision_ids]. Seeing a line twice is ok. Seeing a line
2084
# not present in one of those inventories is unnecessary but not
2085
# harmful because we are filtering by the revision id marker in the
2086
# inventory lines : we only select file ids altered in one of those
2087
# revisions. We don't need to see all lines in the inventory because
2088
# only those added in an inventory in rev X can contain a revision=X
2090
unescape_revid_cache = {}
2091
unescape_fileid_cache = {}
2093
# jam 20061218 In a big fetch, this handles hundreds of thousands
2094
# of lines, so it has had a lot of inlining and optimizing done.
2095
# Sorry that it is a little bit messy.
2096
# Move several functions to be local variables, since this is a long
2098
search = self._file_ids_altered_regex.search
2099
unescape = _unescape_xml
2100
setdefault = result.setdefault
2101
for line, line_key in line_iterator:
2102
match = search(line)
2105
# One call to match.group() returning multiple items is quite a
2106
# bit faster than 2 calls to match.group() each returning 1
2107
file_id, revision_id = match.group('file_id', 'revision_id')
2109
# Inlining the cache lookups helps a lot when you make 170,000
2110
# lines and 350k ids, versus 8.4 unique ids.
2111
# Using a cache helps in 2 ways:
2112
# 1) Avoids unnecessary decoding calls
2113
# 2) Re-uses cached strings, which helps in future set and
2115
# (2) is enough that removing encoding entirely along with
2116
# the cache (so we are using plain strings) results in no
2117
# performance improvement.
2119
revision_id = unescape_revid_cache[revision_id]
2121
unescaped = unescape(revision_id)
2122
unescape_revid_cache[revision_id] = unescaped
2123
revision_id = unescaped
2125
# Note that unconditionally unescaping means that we deserialise
2126
# every fileid, which for general 'pull' is not great, but we don't
2127
# really want to have some many fulltexts that this matters anyway.
2130
file_id = unescape_fileid_cache[file_id]
2132
unescaped = unescape(file_id)
2133
unescape_fileid_cache[file_id] = unescaped
2136
key = (file_id, revision_id)
2137
setdefault(key, False)
2138
if revision_id == line_key[-1]:
2142
def _inventory_xml_lines_for_keys(self, keys):
2143
"""Get a line iterator of the sort needed for findind references.
2145
Not relevant for non-xml inventory repositories.
2147
Ghosts in revision_keys are ignored.
2149
:param revision_keys: The revision keys for the inventories to inspect.
2150
:return: An iterator over (inventory line, revid) for the fulltexts of
2151
all of the xml inventories specified by revision_keys.
2153
stream = self.inventories.get_record_stream(keys, 'unordered', True)
2154
for record in stream:
2155
if record.storage_kind != 'absent':
2156
chunks = record.get_bytes_as('chunked')
2157
revid = record.key[-1]
2158
lines = osutils.chunks_to_lines(chunks)
2162
def _find_file_ids_from_xml_inventory_lines(self, line_iterator,
2164
"""Helper routine for fileids_altered_by_revision_ids.
2166
This performs the translation of xml lines to revision ids.
2168
:param line_iterator: An iterator of lines, origin_version_id
2169
:param revision_keys: The revision ids to filter for. This should be a
2170
set or other type which supports efficient __contains__ lookups, as
2171
the revision key from each parsed line will be looked up in the
2172
revision_keys filter.
2173
:return: a dictionary mapping altered file-ids to an iterable of
2174
revision_ids. Each altered file-ids has the exact revision_ids that
2175
altered it listed explicitly.
2177
seen = set(self._find_text_key_references_from_xml_inventory_lines(
2178
line_iterator).iterkeys())
2179
parent_keys = self._find_parent_keys_of_revisions(revision_keys)
2180
parent_seen = set(self._find_text_key_references_from_xml_inventory_lines(
2181
self._inventory_xml_lines_for_keys(parent_keys)))
2182
new_keys = seen - parent_seen
2184
setdefault = result.setdefault
2185
for key in new_keys:
2186
setdefault(key[0], set()).add(key[-1])
925
"""Store a signature text for a revision.
927
:param revision_id: Revision id of the revision
928
:param signature: Signature text.
930
raise NotImplementedError(self.add_signature_text)
2189
932
def _find_parent_ids_of_revisions(self, revision_ids):
2190
933
"""Find all parent ids that are mentioned in the revision graph.
2241
963
uniquely identify the file version in the caller's context. (Examples:
2242
964
an index number or a TreeTransform trans_id.)
2244
bytes_iterator is an iterable of bytestrings for the file. The
2245
kind of iterable and length of the bytestrings are unspecified, but for
2246
this implementation, it is a list of bytes produced by
2247
VersionedFile.get_record_stream().
2249
966
:param desired_files: a list of (file_id, revision_id, identifier)
2253
for file_id, revision_id, callable_data in desired_files:
2254
text_keys[(file_id, revision_id)] = callable_data
2255
for record in self.texts.get_record_stream(text_keys, 'unordered', True):
2256
if record.storage_kind == 'absent':
2257
raise errors.RevisionNotPresent(record.key, self)
2258
yield text_keys[record.key], record.get_bytes_as('chunked')
2260
def _generate_text_key_index(self, text_key_references=None,
2262
"""Generate a new text key index for the repository.
2264
This is an expensive function that will take considerable time to run.
2266
:return: A dict mapping text keys ((file_id, revision_id) tuples) to a
2267
list of parents, also text keys. When a given key has no parents,
2268
the parents list will be [NULL_REVISION].
2270
# All revisions, to find inventory parents.
2271
if ancestors is None:
2272
graph = self.get_graph()
2273
ancestors = graph.get_parent_map(self.all_revision_ids())
2274
if text_key_references is None:
2275
text_key_references = self.find_text_key_references()
2276
pb = ui.ui_factory.nested_progress_bar()
2278
return self._do_generate_text_key_index(ancestors,
2279
text_key_references, pb)
2283
def _do_generate_text_key_index(self, ancestors, text_key_references, pb):
2284
"""Helper for _generate_text_key_index to avoid deep nesting."""
2285
revision_order = tsort.topo_sort(ancestors)
2286
invalid_keys = set()
2288
for revision_id in revision_order:
2289
revision_keys[revision_id] = set()
2290
text_count = len(text_key_references)
2291
# a cache of the text keys to allow reuse; costs a dict of all the
2292
# keys, but saves a 2-tuple for every child of a given key.
2294
for text_key, valid in text_key_references.iteritems():
2296
invalid_keys.add(text_key)
2298
revision_keys[text_key[1]].add(text_key)
2299
text_key_cache[text_key] = text_key
2300
del text_key_references
2302
text_graph = graph.Graph(graph.DictParentsProvider(text_index))
2303
NULL_REVISION = _mod_revision.NULL_REVISION
2304
# Set a cache with a size of 10 - this suffices for bzr.dev but may be
2305
# too small for large or very branchy trees. However, for 55K path
2306
# trees, it would be easy to use too much memory trivially. Ideally we
2307
# could gauge this by looking at available real memory etc, but this is
2308
# always a tricky proposition.
2309
inventory_cache = lru_cache.LRUCache(10)
2310
batch_size = 10 # should be ~150MB on a 55K path tree
2311
batch_count = len(revision_order) / batch_size + 1
2313
pb.update("Calculating text parents", processed_texts, text_count)
2314
for offset in xrange(batch_count):
2315
to_query = revision_order[offset * batch_size:(offset + 1) *
2319
for revision_id in to_query:
2320
parent_ids = ancestors[revision_id]
2321
for text_key in revision_keys[revision_id]:
2322
pb.update("Calculating text parents", processed_texts)
2323
processed_texts += 1
2324
candidate_parents = []
2325
for parent_id in parent_ids:
2326
parent_text_key = (text_key[0], parent_id)
2328
check_parent = parent_text_key not in \
2329
revision_keys[parent_id]
2331
# the parent parent_id is a ghost:
2332
check_parent = False
2333
# truncate the derived graph against this ghost.
2334
parent_text_key = None
2336
# look at the parent commit details inventories to
2337
# determine possible candidates in the per file graph.
2340
inv = inventory_cache[parent_id]
2342
inv = self.revision_tree(parent_id).inventory
2343
inventory_cache[parent_id] = inv
2345
parent_entry = inv[text_key[0]]
2346
except (KeyError, errors.NoSuchId):
2348
if parent_entry is not None:
2350
text_key[0], parent_entry.revision)
2352
parent_text_key = None
2353
if parent_text_key is not None:
2354
candidate_parents.append(
2355
text_key_cache[parent_text_key])
2356
parent_heads = text_graph.heads(candidate_parents)
2357
new_parents = list(parent_heads)
2358
new_parents.sort(key=lambda x:candidate_parents.index(x))
2359
if new_parents == []:
2360
new_parents = [NULL_REVISION]
2361
text_index[text_key] = new_parents
2363
for text_key in invalid_keys:
2364
text_index[text_key] = [NULL_REVISION]
2367
def item_keys_introduced_by(self, revision_ids, _files_pb=None):
2368
"""Get an iterable listing the keys of all the data introduced by a set
2371
The keys will be ordered so that the corresponding items can be safely
2372
fetched and inserted in that order.
2374
:returns: An iterable producing tuples of (knit-kind, file-id,
2375
versions). knit-kind is one of 'file', 'inventory', 'signatures',
2376
'revisions'. file-id is None unless knit-kind is 'file'.
2378
for result in self._find_file_keys_to_fetch(revision_ids, _files_pb):
2381
for result in self._find_non_file_keys_to_fetch(revision_ids):
2384
def _find_file_keys_to_fetch(self, revision_ids, pb):
2385
# XXX: it's a bit weird to control the inventory weave caching in this
2386
# generator. Ideally the caching would be done in fetch.py I think. Or
2387
# maybe this generator should explicitly have the contract that it
2388
# should not be iterated until the previously yielded item has been
2390
inv_w = self.inventories
2392
# file ids that changed
2393
file_ids = self.fileids_altered_by_revision_ids(revision_ids, inv_w)
2395
num_file_ids = len(file_ids)
2396
for file_id, altered_versions in file_ids.iteritems():
2398
pb.update("Fetch texts", count, num_file_ids)
2400
yield ("file", file_id, altered_versions)
2402
def _find_non_file_keys_to_fetch(self, revision_ids):
2404
yield ("inventory", None, revision_ids)
2407
# XXX: Note ATM no callers actually pay attention to this return
2408
# instead they just use the list of revision ids and ignore
2409
# missing sigs. Consider removing this work entirely
2410
revisions_with_signatures = set(self.signatures.get_parent_map(
2411
[(r,) for r in revision_ids]))
2412
revisions_with_signatures = set(
2413
[r for (r,) in revisions_with_signatures])
2414
revisions_with_signatures.intersection_update(revision_ids)
2415
yield ("signatures", None, revisions_with_signatures)
2418
yield ("revisions", None, revision_ids)
2421
def get_inventory(self, revision_id):
2422
"""Get Inventory object by revision id."""
2423
return self.iter_inventories([revision_id]).next()
2425
def iter_inventories(self, revision_ids, ordering=None):
2426
"""Get many inventories by revision_ids.
2428
This will buffer some or all of the texts used in constructing the
2429
inventories in memory, but will only parse a single inventory at a
2432
:param revision_ids: The expected revision ids of the inventories.
2433
:param ordering: optional ordering, e.g. 'topological'. If not
2434
specified, the order of revision_ids will be preserved (by
2435
buffering if necessary).
2436
:return: An iterator of inventories.
2438
if ((None in revision_ids)
2439
or (_mod_revision.NULL_REVISION in revision_ids)):
2440
raise ValueError('cannot get null revision inventory')
2441
return self._iter_inventories(revision_ids, ordering)
2443
def _iter_inventories(self, revision_ids, ordering):
2444
"""single-document based inventory iteration."""
2445
inv_xmls = self._iter_inventory_xmls(revision_ids, ordering)
2446
for text, revision_id in inv_xmls:
2447
yield self._deserialise_inventory(revision_id, text)
2449
def _iter_inventory_xmls(self, revision_ids, ordering):
2450
if ordering is None:
2451
order_as_requested = True
2452
ordering = 'unordered'
2454
order_as_requested = False
2455
keys = [(revision_id,) for revision_id in revision_ids]
2458
if order_as_requested:
2459
key_iter = iter(keys)
2460
next_key = key_iter.next()
2461
stream = self.inventories.get_record_stream(keys, ordering, True)
2463
for record in stream:
2464
if record.storage_kind != 'absent':
2465
chunks = record.get_bytes_as('chunked')
2466
if order_as_requested:
2467
text_chunks[record.key] = chunks
2469
yield ''.join(chunks), record.key[-1]
2471
raise errors.NoSuchRevision(self, record.key)
2472
if order_as_requested:
2473
# Yield as many results as we can while preserving order.
2474
while next_key in text_chunks:
2475
chunks = text_chunks.pop(next_key)
2476
yield ''.join(chunks), next_key[-1]
2478
next_key = key_iter.next()
2479
except StopIteration:
2480
# We still want to fully consume the get_record_stream,
2481
# just in case it is not actually finished at this point
2485
def _deserialise_inventory(self, revision_id, xml):
2486
"""Transform the xml into an inventory object.
2488
:param revision_id: The expected revision id of the inventory.
2489
:param xml: A serialised inventory.
2491
result = self._serializer.read_inventory_from_string(xml, revision_id,
2492
entry_cache=self._inventory_entry_cache,
2493
return_from_cache=self._safe_to_return_from_cache)
2494
if result.revision_id != revision_id:
2495
raise AssertionError('revision id mismatch %s != %s' % (
2496
result.revision_id, revision_id))
2499
def get_serializer_format(self):
2500
return self._serializer.format_num
2503
def _get_inventory_xml(self, revision_id):
2504
"""Get serialized inventory as a string."""
2505
texts = self._iter_inventory_xmls([revision_id], 'unordered')
2507
text, revision_id = texts.next()
2508
except StopIteration:
2509
raise errors.HistoryMissing(self, 'inventory', revision_id)
969
raise NotImplementedError(self.iter_files_bytes)
2512
971
def get_rev_id_for_revno(self, revno, known_pair):
2513
972
"""Return the revision id of a revno, given a later (revno, revid)
2846
1289
except UnicodeDecodeError:
2847
1290
raise errors.NonAsciiRevisionId(method, self)
2849
def revision_graph_can_have_wrong_parents(self):
2850
"""Is it possible for this repository to have a revision graph with
2853
If True, then this repository must also implement
2854
_find_inconsistent_revision_parents so that check and reconcile can
2855
check for inconsistencies before proceeding with other checks that may
2856
depend on the revision index being consistent.
2858
raise NotImplementedError(self.revision_graph_can_have_wrong_parents)
2861
def install_revision(repository, rev, revision_tree):
2862
"""Install all revision data into a repository."""
2863
install_revisions(repository, [(rev, revision_tree, None)])
2866
def install_revisions(repository, iterable, num_revisions=None, pb=None):
2867
"""Install all revision data into a repository.
2869
Accepts an iterable of revision, tree, signature tuples. The signature
2872
repository.start_write_group()
2874
inventory_cache = lru_cache.LRUCache(10)
2875
for n, (revision, revision_tree, signature) in enumerate(iterable):
2876
_install_revision(repository, revision, revision_tree, signature,
2879
pb.update('Transferring revisions', n + 1, num_revisions)
2881
repository.abort_write_group()
2884
repository.commit_write_group()
2887
def _install_revision(repository, rev, revision_tree, signature,
2889
"""Install all revision data into a repository."""
2890
present_parents = []
2892
for p_id in rev.parent_ids:
2893
if repository.has_revision(p_id):
2894
present_parents.append(p_id)
2895
parent_trees[p_id] = repository.revision_tree(p_id)
2897
parent_trees[p_id] = repository.revision_tree(
2898
_mod_revision.NULL_REVISION)
2900
inv = revision_tree.inventory
2901
entries = inv.iter_entries()
2902
# backwards compatibility hack: skip the root id.
2903
if not repository.supports_rich_root():
2904
path, root = entries.next()
2905
if root.revision != rev.revision_id:
2906
raise errors.IncompatibleRevision(repr(repository))
2908
for path, ie in entries:
2909
text_keys[(ie.file_id, ie.revision)] = ie
2910
text_parent_map = repository.texts.get_parent_map(text_keys)
2911
missing_texts = set(text_keys) - set(text_parent_map)
2912
# Add the texts that are not already present
2913
for text_key in missing_texts:
2914
ie = text_keys[text_key]
2916
# FIXME: TODO: The following loop overlaps/duplicates that done by
2917
# commit to determine parents. There is a latent/real bug here where
2918
# the parents inserted are not those commit would do - in particular
2919
# they are not filtered by heads(). RBC, AB
2920
for revision, tree in parent_trees.iteritems():
2921
if ie.file_id not in tree:
2923
parent_id = tree.inventory[ie.file_id].revision
2924
if parent_id in text_parents:
2926
text_parents.append((ie.file_id, parent_id))
2927
lines = revision_tree.get_file(ie.file_id).readlines()
2928
repository.texts.add_lines(text_key, text_parents, lines)
2930
# install the inventory
2931
if repository._format._commit_inv_deltas and len(rev.parent_ids):
2932
# Cache this inventory
2933
inventory_cache[rev.revision_id] = inv
2935
basis_inv = inventory_cache[rev.parent_ids[0]]
2937
repository.add_inventory(rev.revision_id, inv, present_parents)
2939
delta = inv._make_delta(basis_inv)
2940
repository.add_inventory_by_delta(rev.parent_ids[0], delta,
2941
rev.revision_id, present_parents)
2943
repository.add_inventory(rev.revision_id, inv, present_parents)
2944
except errors.RevisionAlreadyPresent:
2946
if signature is not None:
2947
repository.add_signature_text(rev.revision_id, signature)
2948
repository.add_revision(rev.revision_id, rev, inv)
2951
1293
class MetaDirRepository(Repository):
2952
1294
"""Repositories in the new meta-dir layout.
3421
1733
:param revision_id: if None all content is copied, if NULL_REVISION no
3422
1734
content is copied.
3426
ui.ui_factory.warn_experimental_format_fetch(self)
3427
from bzrlib.fetch import RepoFetcher
3428
# See <https://launchpad.net/bugs/456077> asking for a warning here
3429
if self.source._format.network_name() != self.target._format.network_name():
3430
ui.ui_factory.show_user_warning('cross_format_fetch',
3431
from_format=self.source._format,
3432
to_format=self.target._format)
3433
f = RepoFetcher(to_repository=self.target,
3434
from_repository=self.source,
3435
last_revision=revision_id,
3436
fetch_spec=fetch_spec,
3437
find_ghosts=find_ghosts)
3439
def _walk_to_common_revisions(self, revision_ids):
3440
"""Walk out from revision_ids in source to revisions target has.
3442
:param revision_ids: The start point for the search.
3443
:return: A set of revision ids.
3445
target_graph = self.target.get_graph()
3446
revision_ids = frozenset(revision_ids)
3447
missing_revs = set()
3448
source_graph = self.source.get_graph()
3449
# ensure we don't pay silly lookup costs.
3450
searcher = source_graph._make_breadth_first_searcher(revision_ids)
3451
null_set = frozenset([_mod_revision.NULL_REVISION])
3452
searcher_exhausted = False
3456
# Iterate the searcher until we have enough next_revs
3457
while len(next_revs) < self._walk_to_common_revisions_batch_size:
3459
next_revs_part, ghosts_part = searcher.next_with_ghosts()
3460
next_revs.update(next_revs_part)
3461
ghosts.update(ghosts_part)
3462
except StopIteration:
3463
searcher_exhausted = True
3465
# If there are ghosts in the source graph, and the caller asked for
3466
# them, make sure that they are present in the target.
3467
# We don't care about other ghosts as we can't fetch them and
3468
# haven't been asked to.
3469
ghosts_to_check = set(revision_ids.intersection(ghosts))
3470
revs_to_get = set(next_revs).union(ghosts_to_check)
3472
have_revs = set(target_graph.get_parent_map(revs_to_get))
3473
# we always have NULL_REVISION present.
3474
have_revs = have_revs.union(null_set)
3475
# Check if the target is missing any ghosts we need.
3476
ghosts_to_check.difference_update(have_revs)
3478
# One of the caller's revision_ids is a ghost in both the
3479
# source and the target.
3480
raise errors.NoSuchRevision(
3481
self.source, ghosts_to_check.pop())
3482
missing_revs.update(next_revs - have_revs)
3483
# Because we may have walked past the original stop point, make
3484
# sure everything is stopped
3485
stop_revs = searcher.find_seen_ancestors(have_revs)
3486
searcher.stop_searching_any(stop_revs)
3487
if searcher_exhausted:
3489
return searcher.get_result()
1737
raise NotImplementedError(self.fetch)
3491
1739
@needs_read_lock
3492
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
1740
def search_missing_revision_ids(self,
1741
revision_id=symbol_versioning.DEPRECATED_PARAMETER,
1742
find_ghosts=True, revision_ids=None, if_present_ids=None,
3493
1744
"""Return the revision ids that source has that target does not.
3495
1746
:param revision_id: only return revision ids included by this
1748
:param revision_ids: return revision ids included by these
1749
revision_ids. NoSuchRevision will be raised if any of these
1750
revisions are not present.
1751
:param if_present_ids: like revision_ids, but will not cause
1752
NoSuchRevision if any of these are absent, instead they will simply
1753
not be in the result. This is useful for e.g. finding revisions
1754
to fetch for tags, which may reference absent revisions.
3497
1755
:param find_ghosts: If True find missing revisions in deep history
3498
1756
rather than just finding the surface difference.
1757
:param limit: Maximum number of revisions to return, topologically
3499
1759
:return: A bzrlib.graph.SearchResult.
3501
# stop searching at found target revisions.
3502
if not find_ghosts and revision_id is not None:
3503
return self._walk_to_common_revisions([revision_id])
3504
# generic, possibly worst case, slow code path.
3505
target_ids = set(self.target.all_revision_ids())
3506
if revision_id is not None:
3507
source_ids = self.source.get_ancestry(revision_id)
3508
if source_ids[0] is not None:
3509
raise AssertionError()
3512
source_ids = self.source.all_revision_ids()
3513
result_set = set(source_ids).difference(target_ids)
3514
return self.source.revision_ids_to_search_result(result_set)
1761
raise NotImplementedError(self.search_missing_revision_ids)
3517
1764
def _same_model(source, target):
3538
1785
"different serializers")
3541
class InterSameDataRepository(InterRepository):
3542
"""Code for converting between repositories that represent the same data.
3544
Data format and model must match for this to work.
3548
def _get_repo_format_to_test(self):
3549
"""Repository format for testing with.
3551
InterSameData can pull from subtree to subtree and from non-subtree to
3552
non-subtree, so we test this with the richest repository format.
3554
from bzrlib.repofmt import knitrepo
3555
return knitrepo.RepositoryFormatKnit3()
3558
def is_compatible(source, target):
3559
return InterRepository._same_model(source, target)
3562
class InterDifferingSerializer(InterRepository):
3565
def _get_repo_format_to_test(self):
3569
def is_compatible(source, target):
3570
"""Be compatible with Knit2 source and Knit3 target"""
3571
# This is redundant with format.check_conversion_target(), however that
3572
# raises an exception, and we just want to say "False" as in we won't
3573
# support converting between these formats.
3574
if 'IDS_never' in debug.debug_flags:
3576
if source.supports_rich_root() and not target.supports_rich_root():
3578
if (source._format.supports_tree_reference
3579
and not target._format.supports_tree_reference):
3581
if target._fallback_repositories and target._format.supports_chks:
3582
# IDS doesn't know how to copy CHKs for the parent inventories it
3583
# adds to stacked repos.
3585
if 'IDS_always' in debug.debug_flags:
3587
# Only use this code path for local source and target. IDS does far
3588
# too much IO (both bandwidth and roundtrips) over a network.
3589
if not source.bzrdir.transport.base.startswith('file:///'):
3591
if not target.bzrdir.transport.base.startswith('file:///'):
3595
def _get_trees(self, revision_ids, cache):
3597
for rev_id in revision_ids:
3599
possible_trees.append((rev_id, cache[rev_id]))
3601
# Not cached, but inventory might be present anyway.
3603
tree = self.source.revision_tree(rev_id)
3604
except errors.NoSuchRevision:
3605
# Nope, parent is ghost.
3608
cache[rev_id] = tree
3609
possible_trees.append((rev_id, tree))
3610
return possible_trees
3612
def _get_delta_for_revision(self, tree, parent_ids, possible_trees):
3613
"""Get the best delta and base for this revision.
3615
:return: (basis_id, delta)
3618
# Generate deltas against each tree, to find the shortest.
3619
texts_possibly_new_in_tree = set()
3620
for basis_id, basis_tree in possible_trees:
3621
delta = tree.inventory._make_delta(basis_tree.inventory)
3622
for old_path, new_path, file_id, new_entry in delta:
3623
if new_path is None:
3624
# This file_id isn't present in the new rev, so we don't
3628
# Rich roots are handled elsewhere...
3630
kind = new_entry.kind
3631
if kind != 'directory' and kind != 'file':
3632
# No text record associated with this inventory entry.
3634
# This is a directory or file that has changed somehow.
3635
texts_possibly_new_in_tree.add((file_id, new_entry.revision))
3636
deltas.append((len(delta), basis_id, delta))
3638
return deltas[0][1:]
3640
def _fetch_parent_invs_for_stacking(self, parent_map, cache):
3641
"""Find all parent revisions that are absent, but for which the
3642
inventory is present, and copy those inventories.
3644
This is necessary to preserve correctness when the source is stacked
3645
without fallbacks configured. (Note that in cases like upgrade the
3646
source may be not have _fallback_repositories even though it is
3650
for parents in parent_map.values():
3651
parent_revs.update(parents)
3652
present_parents = self.source.get_parent_map(parent_revs)
3653
absent_parents = set(parent_revs).difference(present_parents)
3654
parent_invs_keys_for_stacking = self.source.inventories.get_parent_map(
3655
(rev_id,) for rev_id in absent_parents)
3656
parent_inv_ids = [key[-1] for key in parent_invs_keys_for_stacking]
3657
for parent_tree in self.source.revision_trees(parent_inv_ids):
3658
current_revision_id = parent_tree.get_revision_id()
3659
parents_parents_keys = parent_invs_keys_for_stacking[
3660
(current_revision_id,)]
3661
parents_parents = [key[-1] for key in parents_parents_keys]
3662
basis_id = _mod_revision.NULL_REVISION
3663
basis_tree = self.source.revision_tree(basis_id)
3664
delta = parent_tree.inventory._make_delta(basis_tree.inventory)
3665
self.target.add_inventory_by_delta(
3666
basis_id, delta, current_revision_id, parents_parents)
3667
cache[current_revision_id] = parent_tree
3669
def _fetch_batch(self, revision_ids, basis_id, cache):
3670
"""Fetch across a few revisions.
3672
:param revision_ids: The revisions to copy
3673
:param basis_id: The revision_id of a tree that must be in cache, used
3674
as a basis for delta when no other base is available
3675
:param cache: A cache of RevisionTrees that we can use.
3676
:return: The revision_id of the last converted tree. The RevisionTree
3677
for it will be in cache
3679
# Walk though all revisions; get inventory deltas, copy referenced
3680
# texts that delta references, insert the delta, revision and
3682
root_keys_to_create = set()
3685
pending_revisions = []
3686
parent_map = self.source.get_parent_map(revision_ids)
3687
self._fetch_parent_invs_for_stacking(parent_map, cache)
3688
self.source._safe_to_return_from_cache = True
3689
for tree in self.source.revision_trees(revision_ids):
3690
# Find a inventory delta for this revision.
3691
# Find text entries that need to be copied, too.
3692
current_revision_id = tree.get_revision_id()
3693
parent_ids = parent_map.get(current_revision_id, ())
3694
parent_trees = self._get_trees(parent_ids, cache)
3695
possible_trees = list(parent_trees)
3696
if len(possible_trees) == 0:
3697
# There either aren't any parents, or the parents are ghosts,
3698
# so just use the last converted tree.
3699
possible_trees.append((basis_id, cache[basis_id]))
3700
basis_id, delta = self._get_delta_for_revision(tree, parent_ids,
3702
revision = self.source.get_revision(current_revision_id)
3703
pending_deltas.append((basis_id, delta,
3704
current_revision_id, revision.parent_ids))
3705
if self._converting_to_rich_root:
3706
self._revision_id_to_root_id[current_revision_id] = \
3708
# Determine which texts are in present in this revision but not in
3709
# any of the available parents.
3710
texts_possibly_new_in_tree = set()
3711
for old_path, new_path, file_id, entry in delta:
3712
if new_path is None:
3713
# This file_id isn't present in the new rev
3717
if not self.target.supports_rich_root():
3718
# The target doesn't support rich root, so we don't
3721
if self._converting_to_rich_root:
3722
# This can't be copied normally, we have to insert
3724
root_keys_to_create.add((file_id, entry.revision))
3727
texts_possibly_new_in_tree.add((file_id, entry.revision))
3728
for basis_id, basis_tree in possible_trees:
3729
basis_inv = basis_tree.inventory
3730
for file_key in list(texts_possibly_new_in_tree):
3731
file_id, file_revision = file_key
3733
entry = basis_inv[file_id]
3734
except errors.NoSuchId:
3736
if entry.revision == file_revision:
3737
texts_possibly_new_in_tree.remove(file_key)
3738
text_keys.update(texts_possibly_new_in_tree)
3739
pending_revisions.append(revision)
3740
cache[current_revision_id] = tree
3741
basis_id = current_revision_id
3742
self.source._safe_to_return_from_cache = False
3744
from_texts = self.source.texts
3745
to_texts = self.target.texts
3746
if root_keys_to_create:
3747
root_stream = _mod_fetch._new_root_data_stream(
3748
root_keys_to_create, self._revision_id_to_root_id, parent_map,
3750
to_texts.insert_record_stream(root_stream)
3751
to_texts.insert_record_stream(from_texts.get_record_stream(
3752
text_keys, self.target._format._fetch_order,
3753
not self.target._format._fetch_uses_deltas))
3754
# insert inventory deltas
3755
for delta in pending_deltas:
3756
self.target.add_inventory_by_delta(*delta)
3757
if self.target._fallback_repositories:
3758
# Make sure this stacked repository has all the parent inventories
3759
# for the new revisions that we are about to insert. We do this
3760
# before adding the revisions so that no revision is added until
3761
# all the inventories it may depend on are added.
3762
# Note that this is overzealous, as we may have fetched these in an
3765
revision_ids = set()
3766
for revision in pending_revisions:
3767
revision_ids.add(revision.revision_id)
3768
parent_ids.update(revision.parent_ids)
3769
parent_ids.difference_update(revision_ids)
3770
parent_ids.discard(_mod_revision.NULL_REVISION)
3771
parent_map = self.source.get_parent_map(parent_ids)
3772
# we iterate over parent_map and not parent_ids because we don't
3773
# want to try copying any revision which is a ghost
3774
for parent_tree in self.source.revision_trees(parent_map):
3775
current_revision_id = parent_tree.get_revision_id()
3776
parents_parents = parent_map[current_revision_id]
3777
possible_trees = self._get_trees(parents_parents, cache)
3778
if len(possible_trees) == 0:
3779
# There either aren't any parents, or the parents are
3780
# ghosts, so just use the last converted tree.
3781
possible_trees.append((basis_id, cache[basis_id]))
3782
basis_id, delta = self._get_delta_for_revision(parent_tree,
3783
parents_parents, possible_trees)
3784
self.target.add_inventory_by_delta(
3785
basis_id, delta, current_revision_id, parents_parents)
3786
# insert signatures and revisions
3787
for revision in pending_revisions:
3789
signature = self.source.get_signature_text(
3790
revision.revision_id)
3791
self.target.add_signature_text(revision.revision_id,
3793
except errors.NoSuchRevision:
3795
self.target.add_revision(revision.revision_id, revision)
3798
def _fetch_all_revisions(self, revision_ids, pb):
3799
"""Fetch everything for the list of revisions.
3801
:param revision_ids: The list of revisions to fetch. Must be in
3803
:param pb: A ProgressTask
3806
basis_id, basis_tree = self._get_basis(revision_ids[0])
3808
cache = lru_cache.LRUCache(100)
3809
cache[basis_id] = basis_tree
3810
del basis_tree # We don't want to hang on to it here
3814
for offset in range(0, len(revision_ids), batch_size):
3815
self.target.start_write_group()
3817
pb.update('Transferring revisions', offset,
3819
batch = revision_ids[offset:offset+batch_size]
3820
basis_id = self._fetch_batch(batch, basis_id, cache)
3822
self.source._safe_to_return_from_cache = False
3823
self.target.abort_write_group()
3826
hint = self.target.commit_write_group()
3829
if hints and self.target._format.pack_compresses:
3830
self.target.pack(hint=hints)
3831
pb.update('Transferring revisions', len(revision_ids),
3835
def fetch(self, revision_id=None, pb=None, find_ghosts=False,
3837
"""See InterRepository.fetch()."""
3838
if fetch_spec is not None:
3839
raise AssertionError("Not implemented yet...")
3840
ui.ui_factory.warn_experimental_format_fetch(self)
3841
if (not self.source.supports_rich_root()
3842
and self.target.supports_rich_root()):
3843
self._converting_to_rich_root = True
3844
self._revision_id_to_root_id = {}
3846
self._converting_to_rich_root = False
3847
# See <https://launchpad.net/bugs/456077> asking for a warning here
3848
if self.source._format.network_name() != self.target._format.network_name():
3849
ui.ui_factory.show_user_warning('cross_format_fetch',
3850
from_format=self.source._format,
3851
to_format=self.target._format)
3852
revision_ids = self.target.search_missing_revision_ids(self.source,
3853
revision_id, find_ghosts=find_ghosts).get_keys()
3854
if not revision_ids:
3856
revision_ids = tsort.topo_sort(
3857
self.source.get_graph().get_parent_map(revision_ids))
3858
if not revision_ids:
3860
# Walk though all revisions; get inventory deltas, copy referenced
3861
# texts that delta references, insert the delta, revision and
3864
my_pb = ui.ui_factory.nested_progress_bar()
3867
symbol_versioning.warn(
3868
symbol_versioning.deprecated_in((1, 14, 0))
3869
% "pb parameter to fetch()")
3872
self._fetch_all_revisions(revision_ids, pb)
3874
if my_pb is not None:
3876
return len(revision_ids), 0
3878
def _get_basis(self, first_revision_id):
3879
"""Get a revision and tree which exists in the target.
3881
This assumes that first_revision_id is selected for transmission
3882
because all other ancestors are already present. If we can't find an
3883
ancestor we fall back to NULL_REVISION since we know that is safe.
3885
:return: (basis_id, basis_tree)
3887
first_rev = self.source.get_revision(first_revision_id)
3889
basis_id = first_rev.parent_ids[0]
3890
# only valid as a basis if the target has it
3891
self.target.get_revision(basis_id)
3892
# Try to get a basis tree - if it's a ghost it will hit the
3893
# NoSuchRevision case.
3894
basis_tree = self.source.revision_tree(basis_id)
3895
except (IndexError, errors.NoSuchRevision):
3896
basis_id = _mod_revision.NULL_REVISION
3897
basis_tree = self.source.revision_tree(basis_id)
3898
return basis_id, basis_tree
3901
InterRepository.register_optimiser(InterDifferingSerializer)
3902
InterRepository.register_optimiser(InterSameDataRepository)
3905
1788
class CopyConverter(object):
3906
1789
"""A repository conversion tool which just performs a copy of the content.
4063
1844
return revision_graph
4066
class StreamSink(object):
4067
"""An object that can insert a stream into a repository.
4069
This interface handles the complexity of reserialising inventories and
4070
revisions from different formats, and allows unidirectional insertion into
4071
stacked repositories without looking for the missing basis parents
4075
def __init__(self, target_repo):
4076
self.target_repo = target_repo
4078
def insert_stream(self, stream, src_format, resume_tokens):
4079
"""Insert a stream's content into the target repository.
4081
:param src_format: a bzr repository format.
4083
:return: a list of resume tokens and an iterable of keys additional
4084
items required before the insertion can be completed.
4086
self.target_repo.lock_write()
4089
self.target_repo.resume_write_group(resume_tokens)
4092
self.target_repo.start_write_group()
4095
# locked_insert_stream performs a commit|suspend.
4096
missing_keys = self.insert_stream_without_locking(stream,
4097
src_format, is_resume)
4099
# suspend the write group and tell the caller what we is
4100
# missing. We know we can suspend or else we would not have
4101
# entered this code path. (All repositories that can handle
4102
# missing keys can handle suspending a write group).
4103
write_group_tokens = self.target_repo.suspend_write_group()
4104
return write_group_tokens, missing_keys
4105
hint = self.target_repo.commit_write_group()
4106
to_serializer = self.target_repo._format._serializer
4107
src_serializer = src_format._serializer
4108
if (to_serializer != src_serializer and
4109
self.target_repo._format.pack_compresses):
4110
self.target_repo.pack(hint=hint)
4113
self.target_repo.abort_write_group(suppress_errors=True)
4116
self.target_repo.unlock()
4118
def insert_stream_without_locking(self, stream, src_format,
4120
"""Insert a stream's content into the target repository.
4122
This assumes that you already have a locked repository and an active
4125
:param src_format: a bzr repository format.
4126
:param is_resume: Passed down to get_missing_parent_inventories to
4127
indicate if we should be checking for missing texts at the same
4130
:return: A set of keys that are missing.
4132
if not self.target_repo.is_write_locked():
4133
raise errors.ObjectNotLocked(self)
4134
if not self.target_repo.is_in_write_group():
4135
raise errors.BzrError('you must already be in a write group')
4136
to_serializer = self.target_repo._format._serializer
4137
src_serializer = src_format._serializer
4139
if to_serializer == src_serializer:
4140
# If serializers match and the target is a pack repository, set the
4141
# write cache size on the new pack. This avoids poor performance
4142
# on transports where append is unbuffered (such as
4143
# RemoteTransport). This is safe to do because nothing should read
4144
# back from the target repository while a stream with matching
4145
# serialization is being inserted.
4146
# The exception is that a delta record from the source that should
4147
# be a fulltext may need to be expanded by the target (see
4148
# test_fetch_revisions_with_deltas_into_pack); but we take care to
4149
# explicitly flush any buffered writes first in that rare case.
4151
new_pack = self.target_repo._pack_collection._new_pack
4152
except AttributeError:
4153
# Not a pack repository
4156
new_pack.set_write_cache_size(1024*1024)
4157
for substream_type, substream in stream:
4158
if 'stream' in debug.debug_flags:
4159
mutter('inserting substream: %s', substream_type)
4160
if substream_type == 'texts':
4161
self.target_repo.texts.insert_record_stream(substream)
4162
elif substream_type == 'inventories':
4163
if src_serializer == to_serializer:
4164
self.target_repo.inventories.insert_record_stream(
4167
self._extract_and_insert_inventories(
4168
substream, src_serializer)
4169
elif substream_type == 'inventory-deltas':
4170
self._extract_and_insert_inventory_deltas(
4171
substream, src_serializer)
4172
elif substream_type == 'chk_bytes':
4173
# XXX: This doesn't support conversions, as it assumes the
4174
# conversion was done in the fetch code.
4175
self.target_repo.chk_bytes.insert_record_stream(substream)
4176
elif substream_type == 'revisions':
4177
# This may fallback to extract-and-insert more often than
4178
# required if the serializers are different only in terms of
4180
if src_serializer == to_serializer:
4181
self.target_repo.revisions.insert_record_stream(substream)
4183
self._extract_and_insert_revisions(substream,
4185
elif substream_type == 'signatures':
4186
self.target_repo.signatures.insert_record_stream(substream)
4188
raise AssertionError('kaboom! %s' % (substream_type,))
4189
# Done inserting data, and the missing_keys calculations will try to
4190
# read back from the inserted data, so flush the writes to the new pack
4191
# (if this is pack format).
4192
if new_pack is not None:
4193
new_pack._write_data('', flush=True)
4194
# Find all the new revisions (including ones from resume_tokens)
4195
missing_keys = self.target_repo.get_missing_parent_inventories(
4196
check_for_missing_texts=is_resume)
4198
for prefix, versioned_file in (
4199
('texts', self.target_repo.texts),
4200
('inventories', self.target_repo.inventories),
4201
('revisions', self.target_repo.revisions),
4202
('signatures', self.target_repo.signatures),
4203
('chk_bytes', self.target_repo.chk_bytes),
4205
if versioned_file is None:
4207
# TODO: key is often going to be a StaticTuple object
4208
# I don't believe we can define a method by which
4209
# (prefix,) + StaticTuple will work, though we could
4210
# define a StaticTuple.sq_concat that would allow you to
4211
# pass in either a tuple or a StaticTuple as the second
4212
# object, so instead we could have:
4213
# StaticTuple(prefix) + key here...
4214
missing_keys.update((prefix,) + key for key in
4215
versioned_file.get_missing_compression_parent_keys())
4216
except NotImplementedError:
4217
# cannot even attempt suspending, and missing would have failed
4218
# during stream insertion.
4219
missing_keys = set()
4222
def _extract_and_insert_inventory_deltas(self, substream, serializer):
4223
target_rich_root = self.target_repo._format.rich_root_data
4224
target_tree_refs = self.target_repo._format.supports_tree_reference
4225
for record in substream:
4226
# Insert the delta directly
4227
inventory_delta_bytes = record.get_bytes_as('fulltext')
4228
deserialiser = inventory_delta.InventoryDeltaDeserializer()
4230
parse_result = deserialiser.parse_text_bytes(
4231
inventory_delta_bytes)
4232
except inventory_delta.IncompatibleInventoryDelta, err:
4233
trace.mutter("Incompatible delta: %s", err.msg)
4234
raise errors.IncompatibleRevision(self.target_repo._format)
4235
basis_id, new_id, rich_root, tree_refs, inv_delta = parse_result
4236
revision_id = new_id
4237
parents = [key[0] for key in record.parents]
4238
self.target_repo.add_inventory_by_delta(
4239
basis_id, inv_delta, revision_id, parents)
4241
def _extract_and_insert_inventories(self, substream, serializer,
4243
"""Generate a new inventory versionedfile in target, converting data.
4245
The inventory is retrieved from the source, (deserializing it), and
4246
stored in the target (reserializing it in a different format).
4248
target_rich_root = self.target_repo._format.rich_root_data
4249
target_tree_refs = self.target_repo._format.supports_tree_reference
4250
for record in substream:
4251
# It's not a delta, so it must be a fulltext in the source
4252
# serializer's format.
4253
bytes = record.get_bytes_as('fulltext')
4254
revision_id = record.key[0]
4255
inv = serializer.read_inventory_from_string(bytes, revision_id)
4256
parents = [key[0] for key in record.parents]
4257
self.target_repo.add_inventory(revision_id, inv, parents)
4258
# No need to keep holding this full inv in memory when the rest of
4259
# the substream is likely to be all deltas.
4262
def _extract_and_insert_revisions(self, substream, serializer):
4263
for record in substream:
4264
bytes = record.get_bytes_as('fulltext')
4265
revision_id = record.key[0]
4266
rev = serializer.read_revision_from_string(bytes)
4267
if rev.revision_id != revision_id:
4268
raise AssertionError('wtf: %s != %s' % (rev, revision_id))
4269
self.target_repo.add_revision(revision_id, rev)
4272
if self.target_repo._format._fetch_reconcile:
4273
self.target_repo.reconcile()
4276
class StreamSource(object):
4277
"""A source of a stream for fetching between repositories."""
4279
def __init__(self, from_repository, to_format):
4280
"""Create a StreamSource streaming from from_repository."""
4281
self.from_repository = from_repository
4282
self.to_format = to_format
4283
self._record_counter = RecordCounter()
4285
def delta_on_metadata(self):
4286
"""Return True if delta's are permitted on metadata streams.
4288
That is on revisions and signatures.
4290
src_serializer = self.from_repository._format._serializer
4291
target_serializer = self.to_format._serializer
4292
return (self.to_format._fetch_uses_deltas and
4293
src_serializer == target_serializer)
4295
def _fetch_revision_texts(self, revs):
4296
# fetch signatures first and then the revision texts
4297
# may need to be a InterRevisionStore call here.
4298
from_sf = self.from_repository.signatures
4299
# A missing signature is just skipped.
4300
keys = [(rev_id,) for rev_id in revs]
4301
signatures = versionedfile.filter_absent(from_sf.get_record_stream(
4303
self.to_format._fetch_order,
4304
not self.to_format._fetch_uses_deltas))
4305
# If a revision has a delta, this is actually expanded inside the
4306
# insert_record_stream code now, which is an alternate fix for
4308
from_rf = self.from_repository.revisions
4309
revisions = from_rf.get_record_stream(
4311
self.to_format._fetch_order,
4312
not self.delta_on_metadata())
4313
return [('signatures', signatures), ('revisions', revisions)]
4315
def _generate_root_texts(self, revs):
4316
"""This will be called by get_stream between fetching weave texts and
4317
fetching the inventory weave.
4319
if self._rich_root_upgrade():
4320
return _mod_fetch.Inter1and2Helper(
4321
self.from_repository).generate_root_texts(revs)
4325
def get_stream(self, search):
4327
revs = search.get_keys()
4328
graph = self.from_repository.get_graph()
4329
revs = tsort.topo_sort(graph.get_parent_map(revs))
4330
data_to_fetch = self.from_repository.item_keys_introduced_by(revs)
4332
for knit_kind, file_id, revisions in data_to_fetch:
4333
if knit_kind != phase:
4335
# Make a new progress bar for this phase
4336
if knit_kind == "file":
4337
# Accumulate file texts
4338
text_keys.extend([(file_id, revision) for revision in
4340
elif knit_kind == "inventory":
4341
# Now copy the file texts.
4342
from_texts = self.from_repository.texts
4343
yield ('texts', from_texts.get_record_stream(
4344
text_keys, self.to_format._fetch_order,
4345
not self.to_format._fetch_uses_deltas))
4346
# Cause an error if a text occurs after we have done the
4349
# Before we process the inventory we generate the root
4350
# texts (if necessary) so that the inventories references
4352
for _ in self._generate_root_texts(revs):
4354
# we fetch only the referenced inventories because we do not
4355
# know for unselected inventories whether all their required
4356
# texts are present in the other repository - it could be
4358
for info in self._get_inventory_stream(revs):
4360
elif knit_kind == "signatures":
4361
# Nothing to do here; this will be taken care of when
4362
# _fetch_revision_texts happens.
4364
elif knit_kind == "revisions":
4365
for record in self._fetch_revision_texts(revs):
4368
raise AssertionError("Unknown knit kind %r" % knit_kind)
4370
def get_stream_for_missing_keys(self, missing_keys):
4371
# missing keys can only occur when we are byte copying and not
4372
# translating (because translation means we don't send
4373
# unreconstructable deltas ever).
4375
keys['texts'] = set()
4376
keys['revisions'] = set()
4377
keys['inventories'] = set()
4378
keys['chk_bytes'] = set()
4379
keys['signatures'] = set()
4380
for key in missing_keys:
4381
keys[key[0]].add(key[1:])
4382
if len(keys['revisions']):
4383
# If we allowed copying revisions at this point, we could end up
4384
# copying a revision without copying its required texts: a
4385
# violation of the requirements for repository integrity.
4386
raise AssertionError(
4387
'cannot copy revisions to fill in missing deltas %s' % (
4388
keys['revisions'],))
4389
for substream_kind, keys in keys.iteritems():
4390
vf = getattr(self.from_repository, substream_kind)
4391
if vf is None and keys:
4392
raise AssertionError(
4393
"cannot fill in keys for a versioned file we don't"
4394
" have: %s needs %s" % (substream_kind, keys))
4396
# No need to stream something we don't have
4398
if substream_kind == 'inventories':
4399
# Some missing keys are genuinely ghosts, filter those out.
4400
present = self.from_repository.inventories.get_parent_map(keys)
4401
revs = [key[0] for key in present]
4402
# Get the inventory stream more-or-less as we do for the
4403
# original stream; there's no reason to assume that records
4404
# direct from the source will be suitable for the sink. (Think
4405
# e.g. 2a -> 1.9-rich-root).
4406
for info in self._get_inventory_stream(revs, missing=True):
4410
# Ask for full texts always so that we don't need more round trips
4411
# after this stream.
4412
# Some of the missing keys are genuinely ghosts, so filter absent
4413
# records. The Sink is responsible for doing another check to
4414
# ensure that ghosts don't introduce missing data for future
4416
stream = versionedfile.filter_absent(vf.get_record_stream(keys,
4417
self.to_format._fetch_order, True))
4418
yield substream_kind, stream
4420
def inventory_fetch_order(self):
4421
if self._rich_root_upgrade():
4422
return 'topological'
4424
return self.to_format._fetch_order
4426
def _rich_root_upgrade(self):
4427
return (not self.from_repository._format.rich_root_data and
4428
self.to_format.rich_root_data)
4430
def _get_inventory_stream(self, revision_ids, missing=False):
4431
from_format = self.from_repository._format
4432
if (from_format.supports_chks and self.to_format.supports_chks and
4433
from_format.network_name() == self.to_format.network_name()):
4434
raise AssertionError(
4435
"this case should be handled by GroupCHKStreamSource")
4436
elif 'forceinvdeltas' in debug.debug_flags:
4437
return self._get_convertable_inventory_stream(revision_ids,
4438
delta_versus_null=missing)
4439
elif from_format.network_name() == self.to_format.network_name():
4441
return self._get_simple_inventory_stream(revision_ids,
4443
elif (not from_format.supports_chks and not self.to_format.supports_chks
4444
and from_format._serializer == self.to_format._serializer):
4445
# Essentially the same format.
4446
return self._get_simple_inventory_stream(revision_ids,
4449
# Any time we switch serializations, we want to use an
4450
# inventory-delta based approach.
4451
return self._get_convertable_inventory_stream(revision_ids,
4452
delta_versus_null=missing)
4454
def _get_simple_inventory_stream(self, revision_ids, missing=False):
4455
# NB: This currently reopens the inventory weave in source;
4456
# using a single stream interface instead would avoid this.
4457
from_weave = self.from_repository.inventories
4459
delta_closure = True
4461
delta_closure = not self.delta_on_metadata()
4462
yield ('inventories', from_weave.get_record_stream(
4463
[(rev_id,) for rev_id in revision_ids],
4464
self.inventory_fetch_order(), delta_closure))
4466
def _get_convertable_inventory_stream(self, revision_ids,
4467
delta_versus_null=False):
4468
# The two formats are sufficiently different that there is no fast
4469
# path, so we need to send just inventorydeltas, which any
4470
# sufficiently modern client can insert into any repository.
4471
# The StreamSink code expects to be able to
4472
# convert on the target, so we need to put bytes-on-the-wire that can
4473
# be converted. That means inventory deltas (if the remote is <1.19,
4474
# RemoteStreamSink will fallback to VFS to insert the deltas).
4475
yield ('inventory-deltas',
4476
self._stream_invs_as_deltas(revision_ids,
4477
delta_versus_null=delta_versus_null))
4479
def _stream_invs_as_deltas(self, revision_ids, delta_versus_null=False):
4480
"""Return a stream of inventory-deltas for the given rev ids.
4482
:param revision_ids: The list of inventories to transmit
4483
:param delta_versus_null: Don't try to find a minimal delta for this
4484
entry, instead compute the delta versus the NULL_REVISION. This
4485
effectively streams a complete inventory. Used for stuff like
4486
filling in missing parents, etc.
4488
from_repo = self.from_repository
4489
revision_keys = [(rev_id,) for rev_id in revision_ids]
4490
parent_map = from_repo.inventories.get_parent_map(revision_keys)
4491
# XXX: possibly repos could implement a more efficient iter_inv_deltas
4493
inventories = self.from_repository.iter_inventories(
4494
revision_ids, 'topological')
4495
format = from_repo._format
4496
invs_sent_so_far = set([_mod_revision.NULL_REVISION])
4497
inventory_cache = lru_cache.LRUCache(50)
4498
null_inventory = from_repo.revision_tree(
4499
_mod_revision.NULL_REVISION).inventory
4500
# XXX: ideally the rich-root/tree-refs flags would be per-revision, not
4501
# per-repo (e.g. streaming a non-rich-root revision out of a rich-root
4502
# repo back into a non-rich-root repo ought to be allowed)
4503
serializer = inventory_delta.InventoryDeltaSerializer(
4504
versioned_root=format.rich_root_data,
4505
tree_references=format.supports_tree_reference)
4506
for inv in inventories:
4507
key = (inv.revision_id,)
4508
parent_keys = parent_map.get(key, ())
4510
if not delta_versus_null and parent_keys:
4511
# The caller did not ask for complete inventories and we have
4512
# some parents that we can delta against. Make a delta against
4513
# each parent so that we can find the smallest.
4514
parent_ids = [parent_key[0] for parent_key in parent_keys]
4515
for parent_id in parent_ids:
4516
if parent_id not in invs_sent_so_far:
4517
# We don't know that the remote side has this basis, so
4520
if parent_id == _mod_revision.NULL_REVISION:
4521
parent_inv = null_inventory
4523
parent_inv = inventory_cache.get(parent_id, None)
4524
if parent_inv is None:
4525
parent_inv = from_repo.get_inventory(parent_id)
4526
candidate_delta = inv._make_delta(parent_inv)
4527
if (delta is None or
4528
len(delta) > len(candidate_delta)):
4529
delta = candidate_delta
4530
basis_id = parent_id
4532
# Either none of the parents ended up being suitable, or we
4533
# were asked to delta against NULL
4534
basis_id = _mod_revision.NULL_REVISION
4535
delta = inv._make_delta(null_inventory)
4536
invs_sent_so_far.add(inv.revision_id)
4537
inventory_cache[inv.revision_id] = inv
4538
delta_serialized = ''.join(
4539
serializer.delta_to_lines(basis_id, key[-1], delta))
4540
yield versionedfile.FulltextContentFactory(
4541
key, parent_keys, None, delta_serialized)
4544
1847
def _iter_for_revno(repo, partial_history_cache, stop_index=None,
4545
1848
stop_revision=None):
4546
1849
"""Extend the partial history to include a given index