208
392
commit to be valid, deletes against the basis MUST be recorded via
209
393
builder.record_delete().
211
raise NotImplementedError(self.will_record_deletes)
213
def record_iter_changes(self, tree, basis_revision_id, iter_changes):
395
self._recording_deletes = True
397
basis_id = self.parents[0]
399
basis_id = _mod_revision.NULL_REVISION
400
self.basis_delta_revision = basis_id
402
def record_entry_contents(self, ie, parent_invs, path, tree,
404
"""Record the content of ie from tree into the commit if needed.
406
Side effect: sets ie.revision when unchanged
408
:param ie: An inventory entry present in the commit.
409
:param parent_invs: The inventories of the parent revisions of the
411
:param path: The path the entry is at in the tree.
412
:param tree: The tree which contains this entry and should be used to
414
:param content_summary: Summary data from the tree about the paths
415
content - stat, length, exec, sha/link target. This is only
416
accessed when the entry has a revision of None - that is when it is
417
a candidate to commit.
418
:return: A tuple (change_delta, version_recorded, fs_hash).
419
change_delta is an inventory_delta change for this entry against
420
the basis tree of the commit, or None if no change occured against
422
version_recorded is True if a new version of the entry has been
423
recorded. For instance, committing a merge where a file was only
424
changed on the other side will return (delta, False).
425
fs_hash is either None, or the hash details for the path (currently
426
a tuple of the contents sha1 and the statvalue returned by
427
tree.get_file_with_stat()).
429
if self.new_inventory.root is None:
430
if ie.parent_id is not None:
431
raise errors.RootMissing()
432
self._check_root(ie, parent_invs, tree)
433
if ie.revision is None:
434
kind = content_summary[0]
436
# ie is carried over from a prior commit
438
# XXX: repository specific check for nested tree support goes here - if
439
# the repo doesn't want nested trees we skip it ?
440
if (kind == 'tree-reference' and
441
not self.repository._format.supports_tree_reference):
442
# mismatch between commit builder logic and repository:
443
# this needs the entry creation pushed down into the builder.
444
raise NotImplementedError('Missing repository subtree support.')
445
self.new_inventory.add(ie)
447
# TODO: slow, take it out of the inner loop.
449
basis_inv = parent_invs[0]
451
basis_inv = Inventory(root_id=None)
453
# ie.revision is always None if the InventoryEntry is considered
454
# for committing. We may record the previous parents revision if the
455
# content is actually unchanged against a sole head.
456
if ie.revision is not None:
457
if not self._versioned_root and path == '':
458
# repositories that do not version the root set the root's
459
# revision to the new commit even when no change occurs (more
460
# specifically, they do not record a revision on the root; and
461
# the rev id is assigned to the root during deserialisation -
462
# this masks when a change may have occurred against the basis.
463
# To match this we always issue a delta, because the revision
464
# of the root will always be changing.
465
if ie.file_id in basis_inv:
466
delta = (basis_inv.id2path(ie.file_id), path,
470
delta = (None, path, ie.file_id, ie)
471
self._basis_delta.append(delta)
472
return delta, False, None
474
# we don't need to commit this, because the caller already
475
# determined that an existing revision of this file is
476
# appropriate. If it's not being considered for committing then
477
# it and all its parents to the root must be unaltered so
478
# no-change against the basis.
479
if ie.revision == self._new_revision_id:
480
raise AssertionError("Impossible situation, a skipped "
481
"inventory entry (%r) claims to be modified in this "
482
"commit (%r).", (ie, self._new_revision_id))
483
return None, False, None
484
# XXX: Friction: parent_candidates should return a list not a dict
485
# so that we don't have to walk the inventories again.
486
parent_candiate_entries = ie.parent_candidates(parent_invs)
487
head_set = self._heads(ie.file_id, parent_candiate_entries.keys())
489
for inv in parent_invs:
490
if ie.file_id in inv:
491
old_rev = inv[ie.file_id].revision
492
if old_rev in head_set:
493
heads.append(inv[ie.file_id].revision)
494
head_set.remove(inv[ie.file_id].revision)
497
# now we check to see if we need to write a new record to the
499
# We write a new entry unless there is one head to the ancestors, and
500
# the kind-derived content is unchanged.
502
# Cheapest check first: no ancestors, or more the one head in the
503
# ancestors, we write a new node.
507
# There is a single head, look it up for comparison
508
parent_entry = parent_candiate_entries[heads[0]]
509
# if the non-content specific data has changed, we'll be writing a
511
if (parent_entry.parent_id != ie.parent_id or
512
parent_entry.name != ie.name):
514
# now we need to do content specific checks:
516
# if the kind changed the content obviously has
517
if kind != parent_entry.kind:
519
# Stat cache fingerprint feedback for the caller - None as we usually
520
# don't generate one.
523
if content_summary[2] is None:
524
raise ValueError("Files must not have executable = None")
526
# We can't trust a check of the file length because of content
528
if (# if the exec bit has changed we have to store:
529
parent_entry.executable != content_summary[2]):
531
elif parent_entry.text_sha1 == content_summary[3]:
532
# all meta and content is unchanged (using a hash cache
533
# hit to check the sha)
534
ie.revision = parent_entry.revision
535
ie.text_size = parent_entry.text_size
536
ie.text_sha1 = parent_entry.text_sha1
537
ie.executable = parent_entry.executable
538
return self._get_delta(ie, basis_inv, path), False, None
540
# Either there is only a hash change(no hash cache entry,
541
# or same size content change), or there is no change on
543
# Provide the parent's hash to the store layer, so that the
544
# content is unchanged we will not store a new node.
545
nostore_sha = parent_entry.text_sha1
547
# We want to record a new node regardless of the presence or
548
# absence of a content change in the file.
550
ie.executable = content_summary[2]
551
file_obj, stat_value = tree.get_file_with_stat(ie.file_id, path)
553
text = file_obj.read()
557
ie.text_sha1, ie.text_size = self._add_text_to_weave(
558
ie.file_id, text, heads, nostore_sha)
559
# Let the caller know we generated a stat fingerprint.
560
fingerprint = (ie.text_sha1, stat_value)
561
except errors.ExistingContent:
562
# Turns out that the file content was unchanged, and we were
563
# only going to store a new node if it was changed. Carry over
565
ie.revision = parent_entry.revision
566
ie.text_size = parent_entry.text_size
567
ie.text_sha1 = parent_entry.text_sha1
568
ie.executable = parent_entry.executable
569
return self._get_delta(ie, basis_inv, path), False, None
570
elif kind == 'directory':
572
# all data is meta here, nothing specific to directory, so
574
ie.revision = parent_entry.revision
575
return self._get_delta(ie, basis_inv, path), False, None
576
self._add_text_to_weave(ie.file_id, '', heads, None)
577
elif kind == 'symlink':
578
current_link_target = content_summary[3]
580
# symlink target is not generic metadata, check if it has
582
if current_link_target != parent_entry.symlink_target:
585
# unchanged, carry over.
586
ie.revision = parent_entry.revision
587
ie.symlink_target = parent_entry.symlink_target
588
return self._get_delta(ie, basis_inv, path), False, None
589
ie.symlink_target = current_link_target
590
self._add_text_to_weave(ie.file_id, '', heads, None)
591
elif kind == 'tree-reference':
593
if content_summary[3] != parent_entry.reference_revision:
596
# unchanged, carry over.
597
ie.reference_revision = parent_entry.reference_revision
598
ie.revision = parent_entry.revision
599
return self._get_delta(ie, basis_inv, path), False, None
600
ie.reference_revision = content_summary[3]
601
if ie.reference_revision is None:
602
raise AssertionError("invalid content_summary for nested tree: %r"
603
% (content_summary,))
604
self._add_text_to_weave(ie.file_id, '', heads, None)
606
raise NotImplementedError('unknown kind')
607
ie.revision = self._new_revision_id
608
self._any_changes = True
609
return self._get_delta(ie, basis_inv, path), True, fingerprint
611
def record_iter_changes(self, tree, basis_revision_id, iter_changes,
612
_entry_factory=entry_factory):
214
613
"""Record a new tree via iter_changes.
216
615
:param tree: The tree to obtain text contents from for changed objects.
221
620
to basis_revision_id. The iterator must not include any items with
222
621
a current kind of None - missing items must be either filtered out
223
622
or errored-on beefore record_iter_changes sees the item.
623
:param _entry_factory: Private method to bind entry_factory locally for
224
625
:return: A generator of (file_id, relpath, fs_hash) tuples for use with
225
626
tree._observed_sha1.
227
raise NotImplementedError(self.record_iter_changes)
628
# Create an inventory delta based on deltas between all the parents and
629
# deltas between all the parent inventories. We use inventory delta's
630
# between the inventory objects because iter_changes masks
631
# last-changed-field only changes.
633
# file_id -> change map, change is fileid, paths, changed, versioneds,
634
# parents, names, kinds, executables
636
# {file_id -> revision_id -> inventory entry, for entries in parent
637
# trees that are not parents[0]
641
revtrees = list(self.repository.revision_trees(self.parents))
642
except errors.NoSuchRevision:
643
# one or more ghosts, slow path.
645
for revision_id in self.parents:
647
revtrees.append(self.repository.revision_tree(revision_id))
648
except errors.NoSuchRevision:
650
basis_revision_id = _mod_revision.NULL_REVISION
652
revtrees.append(self.repository.revision_tree(
653
_mod_revision.NULL_REVISION))
654
# The basis inventory from a repository
656
basis_inv = revtrees[0].inventory
658
basis_inv = self.repository.revision_tree(
659
_mod_revision.NULL_REVISION).inventory
660
if len(self.parents) > 0:
661
if basis_revision_id != self.parents[0] and not ghost_basis:
663
"arbitrary basis parents not yet supported with merges")
664
for revtree in revtrees[1:]:
665
for change in revtree.inventory._make_delta(basis_inv):
666
if change[1] is None:
667
# Not present in this parent.
669
if change[2] not in merged_ids:
670
if change[0] is not None:
671
basis_entry = basis_inv[change[2]]
672
merged_ids[change[2]] = [
674
basis_entry.revision,
677
parent_entries[change[2]] = {
679
basis_entry.revision:basis_entry,
681
change[3].revision:change[3],
684
merged_ids[change[2]] = [change[3].revision]
685
parent_entries[change[2]] = {change[3].revision:change[3]}
687
merged_ids[change[2]].append(change[3].revision)
688
parent_entries[change[2]][change[3].revision] = change[3]
691
# Setup the changes from the tree:
692
# changes maps file_id -> (change, [parent revision_ids])
694
for change in iter_changes:
695
# This probably looks up in basis_inv way to much.
696
if change[1][0] is not None:
697
head_candidate = [basis_inv[change[0]].revision]
700
changes[change[0]] = change, merged_ids.get(change[0],
702
unchanged_merged = set(merged_ids) - set(changes)
703
# Extend the changes dict with synthetic changes to record merges of
705
for file_id in unchanged_merged:
706
# Record a merged version of these items that did not change vs the
707
# basis. This can be either identical parallel changes, or a revert
708
# of a specific file after a merge. The recorded content will be
709
# that of the current tree (which is the same as the basis), but
710
# the per-file graph will reflect a merge.
711
# NB:XXX: We are reconstructing path information we had, this
712
# should be preserved instead.
713
# inv delta change: (file_id, (path_in_source, path_in_target),
714
# changed_content, versioned, parent, name, kind,
717
basis_entry = basis_inv[file_id]
718
except errors.NoSuchId:
719
# a change from basis->some_parents but file_id isn't in basis
720
# so was new in the merge, which means it must have changed
721
# from basis -> current, and as it hasn't the add was reverted
722
# by the user. So we discard this change.
726
(basis_inv.id2path(file_id), tree.id2path(file_id)),
728
(basis_entry.parent_id, basis_entry.parent_id),
729
(basis_entry.name, basis_entry.name),
730
(basis_entry.kind, basis_entry.kind),
731
(basis_entry.executable, basis_entry.executable))
732
changes[file_id] = (change, merged_ids[file_id])
733
# changes contains tuples with the change and a set of inventory
734
# candidates for the file.
736
# old_path, new_path, file_id, new_inventory_entry
737
seen_root = False # Is the root in the basis delta?
738
inv_delta = self._basis_delta
739
modified_rev = self._new_revision_id
740
for change, head_candidates in changes.values():
741
if change[3][1]: # versioned in target.
742
# Several things may be happening here:
743
# We may have a fork in the per-file graph
744
# - record a change with the content from tree
745
# We may have a change against < all trees
746
# - carry over the tree that hasn't changed
747
# We may have a change against all trees
748
# - record the change with the content from tree
751
entry = _entry_factory[kind](file_id, change[5][1],
753
head_set = self._heads(change[0], set(head_candidates))
756
for head_candidate in head_candidates:
757
if head_candidate in head_set:
758
heads.append(head_candidate)
759
head_set.remove(head_candidate)
762
# Could be a carry-over situation:
763
parent_entry_revs = parent_entries.get(file_id, None)
764
if parent_entry_revs:
765
parent_entry = parent_entry_revs.get(heads[0], None)
768
if parent_entry is None:
769
# The parent iter_changes was called against is the one
770
# that is the per-file head, so any change is relevant
771
# iter_changes is valid.
772
carry_over_possible = False
774
# could be a carry over situation
775
# A change against the basis may just indicate a merge,
776
# we need to check the content against the source of the
777
# merge to determine if it was changed after the merge
779
if (parent_entry.kind != entry.kind or
780
parent_entry.parent_id != entry.parent_id or
781
parent_entry.name != entry.name):
782
# Metadata common to all entries has changed
783
# against per-file parent
784
carry_over_possible = False
786
carry_over_possible = True
787
# per-type checks for changes against the parent_entry
790
# Cannot be a carry-over situation
791
carry_over_possible = False
792
# Populate the entry in the delta
794
# XXX: There is still a small race here: If someone reverts the content of a file
795
# after iter_changes examines and decides it has changed,
796
# we will unconditionally record a new version even if some
797
# other process reverts it while commit is running (with
798
# the revert happening after iter_changes did its
801
entry.executable = True
803
entry.executable = False
804
if (carry_over_possible and
805
parent_entry.executable == entry.executable):
806
# Check the file length, content hash after reading
808
nostore_sha = parent_entry.text_sha1
811
file_obj, stat_value = tree.get_file_with_stat(file_id, change[1][1])
813
text = file_obj.read()
817
entry.text_sha1, entry.text_size = self._add_text_to_weave(
818
file_id, text, heads, nostore_sha)
819
yield file_id, change[1][1], (entry.text_sha1, stat_value)
820
except errors.ExistingContent:
821
# No content change against a carry_over parent
822
# Perhaps this should also yield a fs hash update?
824
entry.text_size = parent_entry.text_size
825
entry.text_sha1 = parent_entry.text_sha1
826
elif kind == 'symlink':
828
entry.symlink_target = tree.get_symlink_target(file_id)
829
if (carry_over_possible and
830
parent_entry.symlink_target == entry.symlink_target):
833
self._add_text_to_weave(change[0], '', heads, None)
834
elif kind == 'directory':
835
if carry_over_possible:
838
# Nothing to set on the entry.
839
# XXX: split into the Root and nonRoot versions.
840
if change[1][1] != '' or self.repository.supports_rich_root():
841
self._add_text_to_weave(change[0], '', heads, None)
842
elif kind == 'tree-reference':
843
if not self.repository._format.supports_tree_reference:
844
# This isn't quite sane as an error, but we shouldn't
845
# ever see this code path in practice: tree's don't
846
# permit references when the repo doesn't support tree
848
raise errors.UnsupportedOperation(tree.add_reference,
850
reference_revision = tree.get_reference_revision(change[0])
851
entry.reference_revision = reference_revision
852
if (carry_over_possible and
853
parent_entry.reference_revision == reference_revision):
856
self._add_text_to_weave(change[0], '', heads, None)
858
raise AssertionError('unknown kind %r' % kind)
860
entry.revision = modified_rev
862
entry.revision = parent_entry.revision
865
new_path = change[1][1]
866
inv_delta.append((change[1][0], new_path, change[0], entry))
869
self.new_inventory = None
871
# This should perhaps be guarded by a check that the basis we
872
# commit against is the basis for the commit and if not do a delta
874
self._any_changes = True
876
# housekeeping root entry changes do not affect no-change commits.
877
self._require_root_change(tree)
878
self.basis_delta_revision = basis_revision_id
880
def _add_text_to_weave(self, file_id, new_text, parents, nostore_sha):
881
parent_keys = tuple([(file_id, parent) for parent in parents])
882
return self.repository.texts._add_text(
883
(file_id, self._new_revision_id), parent_keys, new_text,
884
nostore_sha=nostore_sha, random_id=self.random_revid)[0:2]
887
class RootCommitBuilder(CommitBuilder):
888
"""This commitbuilder actually records the root id"""
890
# the root entry gets versioned properly by this builder.
891
_versioned_root = True
893
def _check_root(self, ie, parent_invs, tree):
894
"""Helper for record_entry_contents.
896
:param ie: An entry being added.
897
:param parent_invs: The inventories of the parent revisions of the
899
:param tree: The tree that is being committed.
902
def _require_root_change(self, tree):
903
"""Enforce an appropriate root object change.
905
This is called once when record_iter_changes is called, if and only if
906
the root was not in the delta calculated by record_iter_changes.
908
:param tree: The tree which is being committed.
910
# versioned roots do not change unless the tree found a change.
230
913
class RepositoryWriteLockResult(LogicalLockResult):
319
1068
return InterRepository._assert_same_model(self, repository)
1070
def add_inventory(self, revision_id, inv, parents):
1071
"""Add the inventory inv to the repository as revision_id.
1073
:param parents: The revision ids of the parents that revision_id
1074
is known to have and are in the repository already.
1076
:returns: The validator(which is a sha1 digest, though what is sha'd is
1077
repository format specific) of the serialized inventory.
1079
if not self.is_in_write_group():
1080
raise AssertionError("%r not in write group" % (self,))
1081
_mod_revision.check_not_reserved_id(revision_id)
1082
if not (inv.revision_id is None or inv.revision_id == revision_id):
1083
raise AssertionError(
1084
"Mismatch between inventory revision"
1085
" id and insertion revid (%r, %r)"
1086
% (inv.revision_id, revision_id))
1087
if inv.root is None:
1088
raise errors.RootMissing()
1089
return self._add_inventory_checked(revision_id, inv, parents)
1091
def _add_inventory_checked(self, revision_id, inv, parents):
1092
"""Add inv to the repository after checking the inputs.
1094
This function can be overridden to allow different inventory styles.
1096
:seealso: add_inventory, for the contract.
1098
inv_lines = self._serializer.write_inventory_to_lines(inv)
1099
return self._inventory_add_lines(revision_id, parents,
1100
inv_lines, check_content=False)
1102
def add_inventory_by_delta(self, basis_revision_id, delta, new_revision_id,
1103
parents, basis_inv=None, propagate_caches=False):
1104
"""Add a new inventory expressed as a delta against another revision.
1106
See the inventory developers documentation for the theory behind
1109
:param basis_revision_id: The inventory id the delta was created
1110
against. (This does not have to be a direct parent.)
1111
:param delta: The inventory delta (see Inventory.apply_delta for
1113
:param new_revision_id: The revision id that the inventory is being
1115
:param parents: The revision ids of the parents that revision_id is
1116
known to have and are in the repository already. These are supplied
1117
for repositories that depend on the inventory graph for revision
1118
graph access, as well as for those that pun ancestry with delta
1120
:param basis_inv: The basis inventory if it is already known,
1122
:param propagate_caches: If True, the caches for this inventory are
1123
copied to and updated for the result if possible.
1125
:returns: (validator, new_inv)
1126
The validator(which is a sha1 digest, though what is sha'd is
1127
repository format specific) of the serialized inventory, and the
1128
resulting inventory.
1130
if not self.is_in_write_group():
1131
raise AssertionError("%r not in write group" % (self,))
1132
_mod_revision.check_not_reserved_id(new_revision_id)
1133
basis_tree = self.revision_tree(basis_revision_id)
1134
basis_tree.lock_read()
1136
# Note that this mutates the inventory of basis_tree, which not all
1137
# inventory implementations may support: A better idiom would be to
1138
# return a new inventory, but as there is no revision tree cache in
1139
# repository this is safe for now - RBC 20081013
1140
if basis_inv is None:
1141
basis_inv = basis_tree.inventory
1142
basis_inv.apply_delta(delta)
1143
basis_inv.revision_id = new_revision_id
1144
return (self.add_inventory(new_revision_id, basis_inv, parents),
1149
def _inventory_add_lines(self, revision_id, parents, lines,
1150
check_content=True):
1151
"""Store lines in inv_vf and return the sha1 of the inventory."""
1152
parents = [(parent,) for parent in parents]
1153
result = self.inventories.add_lines((revision_id,), parents, lines,
1154
check_content=check_content)[0]
1155
self.inventories._access.flush()
1158
def add_revision(self, revision_id, rev, inv=None, config=None):
1159
"""Add rev to the revision store as revision_id.
1161
:param revision_id: the revision id to use.
1162
:param rev: The revision object.
1163
:param inv: The inventory for the revision. if None, it will be looked
1164
up in the inventory storer
1165
:param config: If None no digital signature will be created.
1166
If supplied its signature_needed method will be used
1167
to determine if a signature should be made.
1169
# TODO: jam 20070210 Shouldn't we check rev.revision_id and
1171
_mod_revision.check_not_reserved_id(revision_id)
1172
if config is not None and config.signature_needed():
1174
inv = self.get_inventory(revision_id)
1175
plaintext = Testament(rev, inv).as_short_text()
1176
self.store_revision_signature(
1177
gpg.GPGStrategy(config), plaintext, revision_id)
1178
# check inventory present
1179
if not self.inventories.get_parent_map([(revision_id,)]):
1181
raise errors.WeaveRevisionNotPresent(revision_id,
1184
# yes, this is not suitable for adding with ghosts.
1185
rev.inventory_sha1 = self.add_inventory(revision_id, inv,
1188
key = (revision_id,)
1189
rev.inventory_sha1 = self.inventories.get_sha1s([key])[key]
1190
self._add_revision(rev)
1192
def _add_revision(self, revision):
1193
text = self._serializer.write_revision_to_string(revision)
1194
key = (revision.revision_id,)
1195
parents = tuple((parent,) for parent in revision.parent_ids)
1196
self.revisions.add_lines(key, parents, osutils.split_lines(text))
321
1198
def all_revision_ids(self):
322
1199
"""Returns a list of all the revision ids in the repository.
347
1224
self.control_files.break_lock()
1227
def _eliminate_revisions_not_present(self, revision_ids):
1228
"""Check every revision id in revision_ids to see if we have it.
1230
Returns a set of the present revisions.
1233
graph = self.get_graph()
1234
parent_map = graph.get_parent_map(revision_ids)
1235
# The old API returned a list, should this actually be a set?
1236
return parent_map.keys()
1238
def _check_inventories(self, checker):
1239
"""Check the inventories found from the revision scan.
1241
This is responsible for verifying the sha1 of inventories and
1242
creating a pending_keys set that covers data referenced by inventories.
1244
bar = ui.ui_factory.nested_progress_bar()
1246
self._do_check_inventories(checker, bar)
1250
def _do_check_inventories(self, checker, bar):
1251
"""Helper for _check_inventories."""
1253
keys = {'chk_bytes':set(), 'inventories':set(), 'texts':set()}
1254
kinds = ['chk_bytes', 'texts']
1255
count = len(checker.pending_keys)
1256
bar.update("inventories", 0, 2)
1257
current_keys = checker.pending_keys
1258
checker.pending_keys = {}
1259
# Accumulate current checks.
1260
for key in current_keys:
1261
if key[0] != 'inventories' and key[0] not in kinds:
1262
checker._report_items.append('unknown key type %r' % (key,))
1263
keys[key[0]].add(key[1:])
1264
if keys['inventories']:
1265
# NB: output order *should* be roughly sorted - topo or
1266
# inverse topo depending on repository - either way decent
1267
# to just delta against. However, pre-CHK formats didn't
1268
# try to optimise inventory layout on disk. As such the
1269
# pre-CHK code path does not use inventory deltas.
1271
for record in self.inventories.check(keys=keys['inventories']):
1272
if record.storage_kind == 'absent':
1273
checker._report_items.append(
1274
'Missing inventory {%s}' % (record.key,))
1276
last_object = self._check_record('inventories', record,
1277
checker, last_object,
1278
current_keys[('inventories',) + record.key])
1279
del keys['inventories']
1282
bar.update("texts", 1)
1283
while (checker.pending_keys or keys['chk_bytes']
1285
# Something to check.
1286
current_keys = checker.pending_keys
1287
checker.pending_keys = {}
1288
# Accumulate current checks.
1289
for key in current_keys:
1290
if key[0] not in kinds:
1291
checker._report_items.append('unknown key type %r' % (key,))
1292
keys[key[0]].add(key[1:])
1293
# Check the outermost kind only - inventories || chk_bytes || texts
1297
for record in getattr(self, kind).check(keys=keys[kind]):
1298
if record.storage_kind == 'absent':
1299
checker._report_items.append(
1300
'Missing %s {%s}' % (kind, record.key,))
1302
last_object = self._check_record(kind, record,
1303
checker, last_object, current_keys[(kind,) + record.key])
1307
def _check_record(self, kind, record, checker, last_object, item_data):
1308
"""Check a single text from this repository."""
1309
if kind == 'inventories':
1310
rev_id = record.key[0]
1311
inv = self._deserialise_inventory(rev_id,
1312
record.get_bytes_as('fulltext'))
1313
if last_object is not None:
1314
delta = inv._make_delta(last_object)
1315
for old_path, path, file_id, ie in delta:
1318
ie.check(checker, rev_id, inv)
1320
for path, ie in inv.iter_entries():
1321
ie.check(checker, rev_id, inv)
1322
if self._format.fast_deltas:
1324
elif kind == 'chk_bytes':
1325
# No code written to check chk_bytes for this repo format.
1326
checker._report_items.append(
1327
'unsupported key type chk_bytes for %s' % (record.key,))
1328
elif kind == 'texts':
1329
self._check_text(record, checker, item_data)
1331
checker._report_items.append(
1332
'unknown key type %s for %s' % (kind, record.key))
1334
def _check_text(self, record, checker, item_data):
1335
"""Check a single text."""
1336
# Check it is extractable.
1337
# TODO: check length.
1338
if record.storage_kind == 'chunked':
1339
chunks = record.get_bytes_as(record.storage_kind)
1340
sha1 = osutils.sha_strings(chunks)
1341
length = sum(map(len, chunks))
1343
content = record.get_bytes_as('fulltext')
1344
sha1 = osutils.sha_string(content)
1345
length = len(content)
1346
if item_data and sha1 != item_data[1]:
1347
checker._report_items.append(
1348
'sha1 mismatch: %s has sha1 %s expected %s referenced by %s' %
1349
(record.key, sha1, item_data[1], item_data[2]))
350
def create(controldir):
351
"""Construct the current default format repository in controldir."""
352
return RepositoryFormat.get_default_format().initialize(controldir)
1352
def create(a_bzrdir):
1353
"""Construct the current default format repository in a_bzrdir."""
1354
return RepositoryFormat.get_default_format().initialize(a_bzrdir)
354
def __init__(self, _format, controldir, control_files):
1356
def __init__(self, _format, a_bzrdir, control_files):
355
1357
"""instantiate a Repository.
357
1359
:param _format: The format of the repository on disk.
358
:param controldir: The ControlDir of the repository.
359
:param control_files: Control files to use for locking, etc.
1360
:param a_bzrdir: The BzrDir of the repository.
361
1362
# In the future we will have a single api for all stores for
362
1363
# getting file texts, inventories and revisions, then
909
2046
signature = gpg_strategy.sign(plaintext)
910
2047
self.add_signature_text(revision_id, signature)
912
2050
def add_signature_text(self, revision_id, signature):
913
"""Store a signature text for a revision.
915
:param revision_id: Revision id of the revision
916
:param signature: Signature text.
918
raise NotImplementedError(self.add_signature_text)
2051
self.signatures.add_lines((revision_id,), (),
2052
osutils.split_lines(signature))
2054
def find_text_key_references(self):
2055
"""Find the text key references within the repository.
2057
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
2058
to whether they were referred to by the inventory of the
2059
revision_id that they contain. The inventory texts from all present
2060
revision ids are assessed to generate this report.
2062
revision_keys = self.revisions.keys()
2063
w = self.inventories
2064
pb = ui.ui_factory.nested_progress_bar()
2066
return self._find_text_key_references_from_xml_inventory_lines(
2067
w.iter_lines_added_or_present_in_keys(revision_keys, pb=pb))
2071
def _find_text_key_references_from_xml_inventory_lines(self,
2073
"""Core routine for extracting references to texts from inventories.
2075
This performs the translation of xml lines to revision ids.
2077
:param line_iterator: An iterator of lines, origin_version_id
2078
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
2079
to whether they were referred to by the inventory of the
2080
revision_id that they contain. Note that if that revision_id was
2081
not part of the line_iterator's output then False will be given -
2082
even though it may actually refer to that key.
2084
if not self._serializer.support_altered_by_hack:
2085
raise AssertionError(
2086
"_find_text_key_references_from_xml_inventory_lines only "
2087
"supported for branches which store inventory as unnested xml"
2088
", not on %r" % self)
2091
# this code needs to read every new line in every inventory for the
2092
# inventories [revision_ids]. Seeing a line twice is ok. Seeing a line
2093
# not present in one of those inventories is unnecessary but not
2094
# harmful because we are filtering by the revision id marker in the
2095
# inventory lines : we only select file ids altered in one of those
2096
# revisions. We don't need to see all lines in the inventory because
2097
# only those added in an inventory in rev X can contain a revision=X
2099
unescape_revid_cache = {}
2100
unescape_fileid_cache = {}
2102
# jam 20061218 In a big fetch, this handles hundreds of thousands
2103
# of lines, so it has had a lot of inlining and optimizing done.
2104
# Sorry that it is a little bit messy.
2105
# Move several functions to be local variables, since this is a long
2107
search = self._file_ids_altered_regex.search
2108
unescape = _unescape_xml
2109
setdefault = result.setdefault
2110
for line, line_key in line_iterator:
2111
match = search(line)
2114
# One call to match.group() returning multiple items is quite a
2115
# bit faster than 2 calls to match.group() each returning 1
2116
file_id, revision_id = match.group('file_id', 'revision_id')
2118
# Inlining the cache lookups helps a lot when you make 170,000
2119
# lines and 350k ids, versus 8.4 unique ids.
2120
# Using a cache helps in 2 ways:
2121
# 1) Avoids unnecessary decoding calls
2122
# 2) Re-uses cached strings, which helps in future set and
2124
# (2) is enough that removing encoding entirely along with
2125
# the cache (so we are using plain strings) results in no
2126
# performance improvement.
2128
revision_id = unescape_revid_cache[revision_id]
2130
unescaped = unescape(revision_id)
2131
unescape_revid_cache[revision_id] = unescaped
2132
revision_id = unescaped
2134
# Note that unconditionally unescaping means that we deserialise
2135
# every fileid, which for general 'pull' is not great, but we don't
2136
# really want to have some many fulltexts that this matters anyway.
2139
file_id = unescape_fileid_cache[file_id]
2141
unescaped = unescape(file_id)
2142
unescape_fileid_cache[file_id] = unescaped
2145
key = (file_id, revision_id)
2146
setdefault(key, False)
2147
if revision_id == line_key[-1]:
2151
def _inventory_xml_lines_for_keys(self, keys):
2152
"""Get a line iterator of the sort needed for findind references.
2154
Not relevant for non-xml inventory repositories.
2156
Ghosts in revision_keys are ignored.
2158
:param revision_keys: The revision keys for the inventories to inspect.
2159
:return: An iterator over (inventory line, revid) for the fulltexts of
2160
all of the xml inventories specified by revision_keys.
2162
stream = self.inventories.get_record_stream(keys, 'unordered', True)
2163
for record in stream:
2164
if record.storage_kind != 'absent':
2165
chunks = record.get_bytes_as('chunked')
2166
revid = record.key[-1]
2167
lines = osutils.chunks_to_lines(chunks)
2171
def _find_file_ids_from_xml_inventory_lines(self, line_iterator,
2173
"""Helper routine for fileids_altered_by_revision_ids.
2175
This performs the translation of xml lines to revision ids.
2177
:param line_iterator: An iterator of lines, origin_version_id
2178
:param revision_keys: The revision ids to filter for. This should be a
2179
set or other type which supports efficient __contains__ lookups, as
2180
the revision key from each parsed line will be looked up in the
2181
revision_keys filter.
2182
:return: a dictionary mapping altered file-ids to an iterable of
2183
revision_ids. Each altered file-ids has the exact revision_ids that
2184
altered it listed explicitly.
2186
seen = set(self._find_text_key_references_from_xml_inventory_lines(
2187
line_iterator).iterkeys())
2188
parent_keys = self._find_parent_keys_of_revisions(revision_keys)
2189
parent_seen = set(self._find_text_key_references_from_xml_inventory_lines(
2190
self._inventory_xml_lines_for_keys(parent_keys)))
2191
new_keys = seen - parent_seen
2193
setdefault = result.setdefault
2194
for key in new_keys:
2195
setdefault(key[0], set()).add(key[-1])
920
2198
def _find_parent_ids_of_revisions(self, revision_ids):
921
2199
"""Find all parent ids that are mentioned in the revision graph.
941
2250
uniquely identify the file version in the caller's context. (Examples:
942
2251
an index number or a TreeTransform trans_id.)
2253
bytes_iterator is an iterable of bytestrings for the file. The
2254
kind of iterable and length of the bytestrings are unspecified, but for
2255
this implementation, it is a list of bytes produced by
2256
VersionedFile.get_record_stream().
944
2258
:param desired_files: a list of (file_id, revision_id, identifier)
947
raise NotImplementedError(self.iter_files_bytes)
2262
for file_id, revision_id, callable_data in desired_files:
2263
text_keys[(file_id, revision_id)] = callable_data
2264
for record in self.texts.get_record_stream(text_keys, 'unordered', True):
2265
if record.storage_kind == 'absent':
2266
raise errors.RevisionNotPresent(record.key, self)
2267
yield text_keys[record.key], record.get_bytes_as('chunked')
2269
def _generate_text_key_index(self, text_key_references=None,
2271
"""Generate a new text key index for the repository.
2273
This is an expensive function that will take considerable time to run.
2275
:return: A dict mapping text keys ((file_id, revision_id) tuples) to a
2276
list of parents, also text keys. When a given key has no parents,
2277
the parents list will be [NULL_REVISION].
2279
# All revisions, to find inventory parents.
2280
if ancestors is None:
2281
graph = self.get_graph()
2282
ancestors = graph.get_parent_map(self.all_revision_ids())
2283
if text_key_references is None:
2284
text_key_references = self.find_text_key_references()
2285
pb = ui.ui_factory.nested_progress_bar()
2287
return self._do_generate_text_key_index(ancestors,
2288
text_key_references, pb)
2292
def _do_generate_text_key_index(self, ancestors, text_key_references, pb):
2293
"""Helper for _generate_text_key_index to avoid deep nesting."""
2294
revision_order = tsort.topo_sort(ancestors)
2295
invalid_keys = set()
2297
for revision_id in revision_order:
2298
revision_keys[revision_id] = set()
2299
text_count = len(text_key_references)
2300
# a cache of the text keys to allow reuse; costs a dict of all the
2301
# keys, but saves a 2-tuple for every child of a given key.
2303
for text_key, valid in text_key_references.iteritems():
2305
invalid_keys.add(text_key)
2307
revision_keys[text_key[1]].add(text_key)
2308
text_key_cache[text_key] = text_key
2309
del text_key_references
2311
text_graph = graph.Graph(graph.DictParentsProvider(text_index))
2312
NULL_REVISION = _mod_revision.NULL_REVISION
2313
# Set a cache with a size of 10 - this suffices for bzr.dev but may be
2314
# too small for large or very branchy trees. However, for 55K path
2315
# trees, it would be easy to use too much memory trivially. Ideally we
2316
# could gauge this by looking at available real memory etc, but this is
2317
# always a tricky proposition.
2318
inventory_cache = lru_cache.LRUCache(10)
2319
batch_size = 10 # should be ~150MB on a 55K path tree
2320
batch_count = len(revision_order) / batch_size + 1
2322
pb.update("Calculating text parents", processed_texts, text_count)
2323
for offset in xrange(batch_count):
2324
to_query = revision_order[offset * batch_size:(offset + 1) *
2328
for revision_id in to_query:
2329
parent_ids = ancestors[revision_id]
2330
for text_key in revision_keys[revision_id]:
2331
pb.update("Calculating text parents", processed_texts)
2332
processed_texts += 1
2333
candidate_parents = []
2334
for parent_id in parent_ids:
2335
parent_text_key = (text_key[0], parent_id)
2337
check_parent = parent_text_key not in \
2338
revision_keys[parent_id]
2340
# the parent parent_id is a ghost:
2341
check_parent = False
2342
# truncate the derived graph against this ghost.
2343
parent_text_key = None
2345
# look at the parent commit details inventories to
2346
# determine possible candidates in the per file graph.
2349
inv = inventory_cache[parent_id]
2351
inv = self.revision_tree(parent_id).inventory
2352
inventory_cache[parent_id] = inv
2354
parent_entry = inv[text_key[0]]
2355
except (KeyError, errors.NoSuchId):
2357
if parent_entry is not None:
2359
text_key[0], parent_entry.revision)
2361
parent_text_key = None
2362
if parent_text_key is not None:
2363
candidate_parents.append(
2364
text_key_cache[parent_text_key])
2365
parent_heads = text_graph.heads(candidate_parents)
2366
new_parents = list(parent_heads)
2367
new_parents.sort(key=lambda x:candidate_parents.index(x))
2368
if new_parents == []:
2369
new_parents = [NULL_REVISION]
2370
text_index[text_key] = new_parents
2372
for text_key in invalid_keys:
2373
text_index[text_key] = [NULL_REVISION]
2376
def item_keys_introduced_by(self, revision_ids, _files_pb=None):
2377
"""Get an iterable listing the keys of all the data introduced by a set
2380
The keys will be ordered so that the corresponding items can be safely
2381
fetched and inserted in that order.
2383
:returns: An iterable producing tuples of (knit-kind, file-id,
2384
versions). knit-kind is one of 'file', 'inventory', 'signatures',
2385
'revisions'. file-id is None unless knit-kind is 'file'.
2387
for result in self._find_file_keys_to_fetch(revision_ids, _files_pb):
2390
for result in self._find_non_file_keys_to_fetch(revision_ids):
2393
def _find_file_keys_to_fetch(self, revision_ids, pb):
2394
# XXX: it's a bit weird to control the inventory weave caching in this
2395
# generator. Ideally the caching would be done in fetch.py I think. Or
2396
# maybe this generator should explicitly have the contract that it
2397
# should not be iterated until the previously yielded item has been
2399
inv_w = self.inventories
2401
# file ids that changed
2402
file_ids = self.fileids_altered_by_revision_ids(revision_ids, inv_w)
2404
num_file_ids = len(file_ids)
2405
for file_id, altered_versions in file_ids.iteritems():
2407
pb.update("Fetch texts", count, num_file_ids)
2409
yield ("file", file_id, altered_versions)
2411
def _find_non_file_keys_to_fetch(self, revision_ids):
2413
yield ("inventory", None, revision_ids)
2416
# XXX: Note ATM no callers actually pay attention to this return
2417
# instead they just use the list of revision ids and ignore
2418
# missing sigs. Consider removing this work entirely
2419
revisions_with_signatures = set(self.signatures.get_parent_map(
2420
[(r,) for r in revision_ids]))
2421
revisions_with_signatures = set(
2422
[r for (r,) in revisions_with_signatures])
2423
revisions_with_signatures.intersection_update(revision_ids)
2424
yield ("signatures", None, revisions_with_signatures)
2427
yield ("revisions", None, revision_ids)
2430
def get_inventory(self, revision_id):
2431
"""Get Inventory object by revision id."""
2432
return self.iter_inventories([revision_id]).next()
2434
def iter_inventories(self, revision_ids, ordering=None):
2435
"""Get many inventories by revision_ids.
2437
This will buffer some or all of the texts used in constructing the
2438
inventories in memory, but will only parse a single inventory at a
2441
:param revision_ids: The expected revision ids of the inventories.
2442
:param ordering: optional ordering, e.g. 'topological'. If not
2443
specified, the order of revision_ids will be preserved (by
2444
buffering if necessary).
2445
:return: An iterator of inventories.
2447
if ((None in revision_ids)
2448
or (_mod_revision.NULL_REVISION in revision_ids)):
2449
raise ValueError('cannot get null revision inventory')
2450
return self._iter_inventories(revision_ids, ordering)
2452
def _iter_inventories(self, revision_ids, ordering):
2453
"""single-document based inventory iteration."""
2454
inv_xmls = self._iter_inventory_xmls(revision_ids, ordering)
2455
for text, revision_id in inv_xmls:
2456
yield self._deserialise_inventory(revision_id, text)
2458
def _iter_inventory_xmls(self, revision_ids, ordering):
2459
if ordering is None:
2460
order_as_requested = True
2461
ordering = 'unordered'
2463
order_as_requested = False
2464
keys = [(revision_id,) for revision_id in revision_ids]
2467
if order_as_requested:
2468
key_iter = iter(keys)
2469
next_key = key_iter.next()
2470
stream = self.inventories.get_record_stream(keys, ordering, True)
2472
for record in stream:
2473
if record.storage_kind != 'absent':
2474
chunks = record.get_bytes_as('chunked')
2475
if order_as_requested:
2476
text_chunks[record.key] = chunks
2478
yield ''.join(chunks), record.key[-1]
2480
raise errors.NoSuchRevision(self, record.key)
2481
if order_as_requested:
2482
# Yield as many results as we can while preserving order.
2483
while next_key in text_chunks:
2484
chunks = text_chunks.pop(next_key)
2485
yield ''.join(chunks), next_key[-1]
2487
next_key = key_iter.next()
2488
except StopIteration:
2489
# We still want to fully consume the get_record_stream,
2490
# just in case it is not actually finished at this point
2494
def _deserialise_inventory(self, revision_id, xml):
2495
"""Transform the xml into an inventory object.
2497
:param revision_id: The expected revision id of the inventory.
2498
:param xml: A serialised inventory.
2500
result = self._serializer.read_inventory_from_string(xml, revision_id,
2501
entry_cache=self._inventory_entry_cache,
2502
return_from_cache=self._safe_to_return_from_cache)
2503
if result.revision_id != revision_id:
2504
raise AssertionError('revision id mismatch %s != %s' % (
2505
result.revision_id, revision_id))
2508
def get_serializer_format(self):
2509
return self._serializer.format_num
2512
def _get_inventory_xml(self, revision_id):
2513
"""Get serialized inventory as a string."""
2514
texts = self._iter_inventory_xmls([revision_id], 'unordered')
2516
text, revision_id = texts.next()
2517
except StopIteration:
2518
raise errors.HistoryMissing(self, 'inventory', revision_id)
949
2521
def get_rev_id_for_revno(self, revno, known_pair):
950
2522
"""Return the revision id of a revno, given a later (revno, revid)
1221
2855
except UnicodeDecodeError:
1222
2856
raise errors.NonAsciiRevisionId(method, self)
2858
def revision_graph_can_have_wrong_parents(self):
2859
"""Is it possible for this repository to have a revision graph with
2862
If True, then this repository must also implement
2863
_find_inconsistent_revision_parents so that check and reconcile can
2864
check for inconsistencies before proceeding with other checks that may
2865
depend on the revision index being consistent.
2867
raise NotImplementedError(self.revision_graph_can_have_wrong_parents)
2870
def install_revision(repository, rev, revision_tree):
2871
"""Install all revision data into a repository."""
2872
install_revisions(repository, [(rev, revision_tree, None)])
2875
def install_revisions(repository, iterable, num_revisions=None, pb=None):
2876
"""Install all revision data into a repository.
2878
Accepts an iterable of revision, tree, signature tuples. The signature
2881
repository.start_write_group()
2883
inventory_cache = lru_cache.LRUCache(10)
2884
for n, (revision, revision_tree, signature) in enumerate(iterable):
2885
_install_revision(repository, revision, revision_tree, signature,
2888
pb.update('Transferring revisions', n + 1, num_revisions)
2890
repository.abort_write_group()
2893
repository.commit_write_group()
2896
def _install_revision(repository, rev, revision_tree, signature,
2898
"""Install all revision data into a repository."""
2899
present_parents = []
2901
for p_id in rev.parent_ids:
2902
if repository.has_revision(p_id):
2903
present_parents.append(p_id)
2904
parent_trees[p_id] = repository.revision_tree(p_id)
2906
parent_trees[p_id] = repository.revision_tree(
2907
_mod_revision.NULL_REVISION)
2909
inv = revision_tree.inventory
2910
entries = inv.iter_entries()
2911
# backwards compatibility hack: skip the root id.
2912
if not repository.supports_rich_root():
2913
path, root = entries.next()
2914
if root.revision != rev.revision_id:
2915
raise errors.IncompatibleRevision(repr(repository))
2917
for path, ie in entries:
2918
text_keys[(ie.file_id, ie.revision)] = ie
2919
text_parent_map = repository.texts.get_parent_map(text_keys)
2920
missing_texts = set(text_keys) - set(text_parent_map)
2921
# Add the texts that are not already present
2922
for text_key in missing_texts:
2923
ie = text_keys[text_key]
2925
# FIXME: TODO: The following loop overlaps/duplicates that done by
2926
# commit to determine parents. There is a latent/real bug here where
2927
# the parents inserted are not those commit would do - in particular
2928
# they are not filtered by heads(). RBC, AB
2929
for revision, tree in parent_trees.iteritems():
2930
if ie.file_id not in tree:
2932
parent_id = tree.inventory[ie.file_id].revision
2933
if parent_id in text_parents:
2935
text_parents.append((ie.file_id, parent_id))
2936
lines = revision_tree.get_file(ie.file_id).readlines()
2937
repository.texts.add_lines(text_key, text_parents, lines)
2939
# install the inventory
2940
if repository._format._commit_inv_deltas and len(rev.parent_ids):
2941
# Cache this inventory
2942
inventory_cache[rev.revision_id] = inv
2944
basis_inv = inventory_cache[rev.parent_ids[0]]
2946
repository.add_inventory(rev.revision_id, inv, present_parents)
2948
delta = inv._make_delta(basis_inv)
2949
repository.add_inventory_by_delta(rev.parent_ids[0], delta,
2950
rev.revision_id, present_parents)
2952
repository.add_inventory(rev.revision_id, inv, present_parents)
2953
except errors.RevisionAlreadyPresent:
2955
if signature is not None:
2956
repository.add_signature_text(rev.revision_id, signature)
2957
repository.add_revision(rev.revision_id, rev, inv)
1225
2960
class MetaDirRepository(Repository):
1226
2961
"""Repositories in the new meta-dir layout.
1709
3600
"different serializers")
3603
class InterSameDataRepository(InterRepository):
3604
"""Code for converting between repositories that represent the same data.
3606
Data format and model must match for this to work.
3610
def _get_repo_format_to_test(self):
3611
"""Repository format for testing with.
3613
InterSameData can pull from subtree to subtree and from non-subtree to
3614
non-subtree, so we test this with the richest repository format.
3616
from bzrlib.repofmt import knitrepo
3617
return knitrepo.RepositoryFormatKnit3()
3620
def is_compatible(source, target):
3621
return InterRepository._same_model(source, target)
3624
class InterDifferingSerializer(InterRepository):
3627
def _get_repo_format_to_test(self):
3631
def is_compatible(source, target):
3632
"""Be compatible with Knit2 source and Knit3 target"""
3633
# This is redundant with format.check_conversion_target(), however that
3634
# raises an exception, and we just want to say "False" as in we won't
3635
# support converting between these formats.
3636
if 'IDS_never' in debug.debug_flags:
3638
if source.supports_rich_root() and not target.supports_rich_root():
3640
if (source._format.supports_tree_reference
3641
and not target._format.supports_tree_reference):
3643
if target._fallback_repositories and target._format.supports_chks:
3644
# IDS doesn't know how to copy CHKs for the parent inventories it
3645
# adds to stacked repos.
3647
if 'IDS_always' in debug.debug_flags:
3649
# Only use this code path for local source and target. IDS does far
3650
# too much IO (both bandwidth and roundtrips) over a network.
3651
if not source.bzrdir.transport.base.startswith('file:///'):
3653
if not target.bzrdir.transport.base.startswith('file:///'):
3657
def _get_trees(self, revision_ids, cache):
3659
for rev_id in revision_ids:
3661
possible_trees.append((rev_id, cache[rev_id]))
3663
# Not cached, but inventory might be present anyway.
3665
tree = self.source.revision_tree(rev_id)
3666
except errors.NoSuchRevision:
3667
# Nope, parent is ghost.
3670
cache[rev_id] = tree
3671
possible_trees.append((rev_id, tree))
3672
return possible_trees
3674
def _get_delta_for_revision(self, tree, parent_ids, possible_trees):
3675
"""Get the best delta and base for this revision.
3677
:return: (basis_id, delta)
3680
# Generate deltas against each tree, to find the shortest.
3681
texts_possibly_new_in_tree = set()
3682
for basis_id, basis_tree in possible_trees:
3683
delta = tree.inventory._make_delta(basis_tree.inventory)
3684
for old_path, new_path, file_id, new_entry in delta:
3685
if new_path is None:
3686
# This file_id isn't present in the new rev, so we don't
3690
# Rich roots are handled elsewhere...
3692
kind = new_entry.kind
3693
if kind != 'directory' and kind != 'file':
3694
# No text record associated with this inventory entry.
3696
# This is a directory or file that has changed somehow.
3697
texts_possibly_new_in_tree.add((file_id, new_entry.revision))
3698
deltas.append((len(delta), basis_id, delta))
3700
return deltas[0][1:]
3702
def _fetch_parent_invs_for_stacking(self, parent_map, cache):
3703
"""Find all parent revisions that are absent, but for which the
3704
inventory is present, and copy those inventories.
3706
This is necessary to preserve correctness when the source is stacked
3707
without fallbacks configured. (Note that in cases like upgrade the
3708
source may be not have _fallback_repositories even though it is
3712
for parents in parent_map.values():
3713
parent_revs.update(parents)
3714
present_parents = self.source.get_parent_map(parent_revs)
3715
absent_parents = set(parent_revs).difference(present_parents)
3716
parent_invs_keys_for_stacking = self.source.inventories.get_parent_map(
3717
(rev_id,) for rev_id in absent_parents)
3718
parent_inv_ids = [key[-1] for key in parent_invs_keys_for_stacking]
3719
for parent_tree in self.source.revision_trees(parent_inv_ids):
3720
current_revision_id = parent_tree.get_revision_id()
3721
parents_parents_keys = parent_invs_keys_for_stacking[
3722
(current_revision_id,)]
3723
parents_parents = [key[-1] for key in parents_parents_keys]
3724
basis_id = _mod_revision.NULL_REVISION
3725
basis_tree = self.source.revision_tree(basis_id)
3726
delta = parent_tree.inventory._make_delta(basis_tree.inventory)
3727
self.target.add_inventory_by_delta(
3728
basis_id, delta, current_revision_id, parents_parents)
3729
cache[current_revision_id] = parent_tree
3731
def _fetch_batch(self, revision_ids, basis_id, cache):
3732
"""Fetch across a few revisions.
3734
:param revision_ids: The revisions to copy
3735
:param basis_id: The revision_id of a tree that must be in cache, used
3736
as a basis for delta when no other base is available
3737
:param cache: A cache of RevisionTrees that we can use.
3738
:return: The revision_id of the last converted tree. The RevisionTree
3739
for it will be in cache
3741
# Walk though all revisions; get inventory deltas, copy referenced
3742
# texts that delta references, insert the delta, revision and
3744
root_keys_to_create = set()
3747
pending_revisions = []
3748
parent_map = self.source.get_parent_map(revision_ids)
3749
self._fetch_parent_invs_for_stacking(parent_map, cache)
3750
self.source._safe_to_return_from_cache = True
3751
for tree in self.source.revision_trees(revision_ids):
3752
# Find a inventory delta for this revision.
3753
# Find text entries that need to be copied, too.
3754
current_revision_id = tree.get_revision_id()
3755
parent_ids = parent_map.get(current_revision_id, ())
3756
parent_trees = self._get_trees(parent_ids, cache)
3757
possible_trees = list(parent_trees)
3758
if len(possible_trees) == 0:
3759
# There either aren't any parents, or the parents are ghosts,
3760
# so just use the last converted tree.
3761
possible_trees.append((basis_id, cache[basis_id]))
3762
basis_id, delta = self._get_delta_for_revision(tree, parent_ids,
3764
revision = self.source.get_revision(current_revision_id)
3765
pending_deltas.append((basis_id, delta,
3766
current_revision_id, revision.parent_ids))
3767
if self._converting_to_rich_root:
3768
self._revision_id_to_root_id[current_revision_id] = \
3770
# Determine which texts are in present in this revision but not in
3771
# any of the available parents.
3772
texts_possibly_new_in_tree = set()
3773
for old_path, new_path, file_id, entry in delta:
3774
if new_path is None:
3775
# This file_id isn't present in the new rev
3779
if not self.target.supports_rich_root():
3780
# The target doesn't support rich root, so we don't
3783
if self._converting_to_rich_root:
3784
# This can't be copied normally, we have to insert
3786
root_keys_to_create.add((file_id, entry.revision))
3789
texts_possibly_new_in_tree.add((file_id, entry.revision))
3790
for basis_id, basis_tree in possible_trees:
3791
basis_inv = basis_tree.inventory
3792
for file_key in list(texts_possibly_new_in_tree):
3793
file_id, file_revision = file_key
3795
entry = basis_inv[file_id]
3796
except errors.NoSuchId:
3798
if entry.revision == file_revision:
3799
texts_possibly_new_in_tree.remove(file_key)
3800
text_keys.update(texts_possibly_new_in_tree)
3801
pending_revisions.append(revision)
3802
cache[current_revision_id] = tree
3803
basis_id = current_revision_id
3804
self.source._safe_to_return_from_cache = False
3806
from_texts = self.source.texts
3807
to_texts = self.target.texts
3808
if root_keys_to_create:
3809
root_stream = _mod_fetch._new_root_data_stream(
3810
root_keys_to_create, self._revision_id_to_root_id, parent_map,
3812
to_texts.insert_record_stream(root_stream)
3813
to_texts.insert_record_stream(from_texts.get_record_stream(
3814
text_keys, self.target._format._fetch_order,
3815
not self.target._format._fetch_uses_deltas))
3816
# insert inventory deltas
3817
for delta in pending_deltas:
3818
self.target.add_inventory_by_delta(*delta)
3819
if self.target._fallback_repositories:
3820
# Make sure this stacked repository has all the parent inventories
3821
# for the new revisions that we are about to insert. We do this
3822
# before adding the revisions so that no revision is added until
3823
# all the inventories it may depend on are added.
3824
# Note that this is overzealous, as we may have fetched these in an
3827
revision_ids = set()
3828
for revision in pending_revisions:
3829
revision_ids.add(revision.revision_id)
3830
parent_ids.update(revision.parent_ids)
3831
parent_ids.difference_update(revision_ids)
3832
parent_ids.discard(_mod_revision.NULL_REVISION)
3833
parent_map = self.source.get_parent_map(parent_ids)
3834
# we iterate over parent_map and not parent_ids because we don't
3835
# want to try copying any revision which is a ghost
3836
for parent_tree in self.source.revision_trees(parent_map):
3837
current_revision_id = parent_tree.get_revision_id()
3838
parents_parents = parent_map[current_revision_id]
3839
possible_trees = self._get_trees(parents_parents, cache)
3840
if len(possible_trees) == 0:
3841
# There either aren't any parents, or the parents are
3842
# ghosts, so just use the last converted tree.
3843
possible_trees.append((basis_id, cache[basis_id]))
3844
basis_id, delta = self._get_delta_for_revision(parent_tree,
3845
parents_parents, possible_trees)
3846
self.target.add_inventory_by_delta(
3847
basis_id, delta, current_revision_id, parents_parents)
3848
# insert signatures and revisions
3849
for revision in pending_revisions:
3851
signature = self.source.get_signature_text(
3852
revision.revision_id)
3853
self.target.add_signature_text(revision.revision_id,
3855
except errors.NoSuchRevision:
3857
self.target.add_revision(revision.revision_id, revision)
3860
def _fetch_all_revisions(self, revision_ids, pb):
3861
"""Fetch everything for the list of revisions.
3863
:param revision_ids: The list of revisions to fetch. Must be in
3865
:param pb: A ProgressTask
3868
basis_id, basis_tree = self._get_basis(revision_ids[0])
3870
cache = lru_cache.LRUCache(100)
3871
cache[basis_id] = basis_tree
3872
del basis_tree # We don't want to hang on to it here
3876
for offset in range(0, len(revision_ids), batch_size):
3877
self.target.start_write_group()
3879
pb.update('Transferring revisions', offset,
3881
batch = revision_ids[offset:offset+batch_size]
3882
basis_id = self._fetch_batch(batch, basis_id, cache)
3884
self.source._safe_to_return_from_cache = False
3885
self.target.abort_write_group()
3888
hint = self.target.commit_write_group()
3891
if hints and self.target._format.pack_compresses:
3892
self.target.pack(hint=hints)
3893
pb.update('Transferring revisions', len(revision_ids),
3897
def fetch(self, revision_id=None, pb=None, find_ghosts=False,
3899
"""See InterRepository.fetch()."""
3900
if fetch_spec is not None:
3901
revision_ids = fetch_spec.get_keys()
3904
ui.ui_factory.warn_experimental_format_fetch(self)
3905
if (not self.source.supports_rich_root()
3906
and self.target.supports_rich_root()):
3907
self._converting_to_rich_root = True
3908
self._revision_id_to_root_id = {}
3910
self._converting_to_rich_root = False
3911
# See <https://launchpad.net/bugs/456077> asking for a warning here
3912
if self.source._format.network_name() != self.target._format.network_name():
3913
ui.ui_factory.show_user_warning('cross_format_fetch',
3914
from_format=self.source._format,
3915
to_format=self.target._format)
3916
if revision_ids is None:
3918
search_revision_ids = [revision_id]
3920
search_revision_ids = None
3921
revision_ids = self.target.search_missing_revision_ids(self.source,
3922
revision_ids=search_revision_ids,
3923
find_ghosts=find_ghosts).get_keys()
3924
if not revision_ids:
3926
revision_ids = tsort.topo_sort(
3927
self.source.get_graph().get_parent_map(revision_ids))
3928
if not revision_ids:
3930
# Walk though all revisions; get inventory deltas, copy referenced
3931
# texts that delta references, insert the delta, revision and
3934
my_pb = ui.ui_factory.nested_progress_bar()
3937
symbol_versioning.warn(
3938
symbol_versioning.deprecated_in((1, 14, 0))
3939
% "pb parameter to fetch()")
3942
self._fetch_all_revisions(revision_ids, pb)
3944
if my_pb is not None:
3946
return len(revision_ids), 0
3948
def _get_basis(self, first_revision_id):
3949
"""Get a revision and tree which exists in the target.
3951
This assumes that first_revision_id is selected for transmission
3952
because all other ancestors are already present. If we can't find an
3953
ancestor we fall back to NULL_REVISION since we know that is safe.
3955
:return: (basis_id, basis_tree)
3957
first_rev = self.source.get_revision(first_revision_id)
3959
basis_id = first_rev.parent_ids[0]
3960
# only valid as a basis if the target has it
3961
self.target.get_revision(basis_id)
3962
# Try to get a basis tree - if it's a ghost it will hit the
3963
# NoSuchRevision case.
3964
basis_tree = self.source.revision_tree(basis_id)
3965
except (IndexError, errors.NoSuchRevision):
3966
basis_id = _mod_revision.NULL_REVISION
3967
basis_tree = self.source.revision_tree(basis_id)
3968
return basis_id, basis_tree
3971
InterRepository.register_optimiser(InterDifferingSerializer)
3972
InterRepository.register_optimiser(InterSameDataRepository)
1712
3975
class CopyConverter(object):
1713
3976
"""A repository conversion tool which just performs a copy of the content.
1735
3998
# trigger an assertion if not such
1736
3999
repo._format.get_format_string()
1737
4000
self.repo_dir = repo.bzrdir
1738
pb.update(gettext('Moving repository to repository.backup'))
4001
pb.update('Moving repository to repository.backup')
1739
4002
self.repo_dir.transport.move('repository', 'repository.backup')
1740
4003
backup_transport = self.repo_dir.transport.clone('repository.backup')
1741
4004
repo._format.check_conversion_target(self.target_format)
1742
4005
self.source_repo = repo._format.open(self.repo_dir,
1744
4007
_override_transport=backup_transport)
1745
pb.update(gettext('Creating new repository'))
4008
pb.update('Creating new repository')
1746
4009
converted = self.target_format.initialize(self.repo_dir,
1747
4010
self.source_repo.is_shared())
1748
4011
converted.lock_write()
1750
pb.update(gettext('Copying content'))
4013
pb.update('Copying content')
1751
4014
self.source_repo.copy_content_into(converted)
1753
4016
converted.unlock()
1754
pb.update(gettext('Deleting old repository content'))
4017
pb.update('Deleting old repository content')
1755
4018
self.repo_dir.transport.delete_tree('repository.backup')
1756
ui.ui_factory.note(gettext('repository converted'))
4019
ui.ui_factory.note('repository converted')
4032
def _unescaper(match, _map=_unescape_map):
4033
code = match.group(1)
4037
if not code.startswith('#'):
4039
return unichr(int(code[1:])).encode('utf8')
4045
def _unescape_xml(data):
4046
"""Unescape predefined XML entities in a string of data."""
4048
if _unescape_re is None:
4049
_unescape_re = re.compile('\&([^;]*);')
4050
return _unescape_re.sub(_unescaper, data)
4053
class _VersionedFileChecker(object):
4055
def __init__(self, repository, text_key_references=None, ancestors=None):
4056
self.repository = repository
4057
self.text_index = self.repository._generate_text_key_index(
4058
text_key_references=text_key_references, ancestors=ancestors)
4060
def calculate_file_version_parents(self, text_key):
4061
"""Calculate the correct parents for a file version according to
4064
parent_keys = self.text_index[text_key]
4065
if parent_keys == [_mod_revision.NULL_REVISION]:
4067
return tuple(parent_keys)
4069
def check_file_version_parents(self, texts, progress_bar=None):
4070
"""Check the parents stored in a versioned file are correct.
4072
It also detects file versions that are not referenced by their
4073
corresponding revision's inventory.
4075
:returns: A tuple of (wrong_parents, dangling_file_versions).
4076
wrong_parents is a dict mapping {revision_id: (stored_parents,
4077
correct_parents)} for each revision_id where the stored parents
4078
are not correct. dangling_file_versions is a set of (file_id,
4079
revision_id) tuples for versions that are present in this versioned
4080
file, but not used by the corresponding inventory.
4082
local_progress = None
4083
if progress_bar is None:
4084
local_progress = ui.ui_factory.nested_progress_bar()
4085
progress_bar = local_progress
4087
return self._check_file_version_parents(texts, progress_bar)
4090
local_progress.finished()
4092
def _check_file_version_parents(self, texts, progress_bar):
4093
"""See check_file_version_parents."""
4095
self.file_ids = set([file_id for file_id, _ in
4096
self.text_index.iterkeys()])
4097
# text keys is now grouped by file_id
4098
n_versions = len(self.text_index)
4099
progress_bar.update('loading text store', 0, n_versions)
4100
parent_map = self.repository.texts.get_parent_map(self.text_index)
4101
# On unlistable transports this could well be empty/error...
4102
text_keys = self.repository.texts.keys()
4103
unused_keys = frozenset(text_keys) - set(self.text_index)
4104
for num, key in enumerate(self.text_index.iterkeys()):
4105
progress_bar.update('checking text graph', num, n_versions)
4106
correct_parents = self.calculate_file_version_parents(key)
4108
knit_parents = parent_map[key]
4109
except errors.RevisionNotPresent:
4112
if correct_parents != knit_parents:
4113
wrong_parents[key] = (knit_parents, correct_parents)
4114
return wrong_parents, unused_keys
4117
def _old_get_graph(repository, revision_id):
4118
"""DO NOT USE. That is all. I'm serious."""
4119
graph = repository.get_graph()
4120
revision_graph = dict(((key, value) for key, value in
4121
graph.iter_ancestry([revision_id]) if value is not None))
4122
return _strip_NULL_ghosts(revision_graph)
1760
4125
def _strip_NULL_ghosts(revision_graph):
1761
4126
"""Also don't use this. more compatibility code for unmigrated clients."""
1762
4127
# Filter ghosts, and null:
1768
4133
return revision_graph
4136
class StreamSink(object):
4137
"""An object that can insert a stream into a repository.
4139
This interface handles the complexity of reserialising inventories and
4140
revisions from different formats, and allows unidirectional insertion into
4141
stacked repositories without looking for the missing basis parents
4145
def __init__(self, target_repo):
4146
self.target_repo = target_repo
4148
def insert_stream(self, stream, src_format, resume_tokens):
4149
"""Insert a stream's content into the target repository.
4151
:param src_format: a bzr repository format.
4153
:return: a list of resume tokens and an iterable of keys additional
4154
items required before the insertion can be completed.
4156
self.target_repo.lock_write()
4159
self.target_repo.resume_write_group(resume_tokens)
4162
self.target_repo.start_write_group()
4165
# locked_insert_stream performs a commit|suspend.
4166
missing_keys = self.insert_stream_without_locking(stream,
4167
src_format, is_resume)
4169
# suspend the write group and tell the caller what we is
4170
# missing. We know we can suspend or else we would not have
4171
# entered this code path. (All repositories that can handle
4172
# missing keys can handle suspending a write group).
4173
write_group_tokens = self.target_repo.suspend_write_group()
4174
return write_group_tokens, missing_keys
4175
hint = self.target_repo.commit_write_group()
4176
to_serializer = self.target_repo._format._serializer
4177
src_serializer = src_format._serializer
4178
if (to_serializer != src_serializer and
4179
self.target_repo._format.pack_compresses):
4180
self.target_repo.pack(hint=hint)
4183
self.target_repo.abort_write_group(suppress_errors=True)
4186
self.target_repo.unlock()
4188
def insert_stream_without_locking(self, stream, src_format,
4190
"""Insert a stream's content into the target repository.
4192
This assumes that you already have a locked repository and an active
4195
:param src_format: a bzr repository format.
4196
:param is_resume: Passed down to get_missing_parent_inventories to
4197
indicate if we should be checking for missing texts at the same
4200
:return: A set of keys that are missing.
4202
if not self.target_repo.is_write_locked():
4203
raise errors.ObjectNotLocked(self)
4204
if not self.target_repo.is_in_write_group():
4205
raise errors.BzrError('you must already be in a write group')
4206
to_serializer = self.target_repo._format._serializer
4207
src_serializer = src_format._serializer
4209
if to_serializer == src_serializer:
4210
# If serializers match and the target is a pack repository, set the
4211
# write cache size on the new pack. This avoids poor performance
4212
# on transports where append is unbuffered (such as
4213
# RemoteTransport). This is safe to do because nothing should read
4214
# back from the target repository while a stream with matching
4215
# serialization is being inserted.
4216
# The exception is that a delta record from the source that should
4217
# be a fulltext may need to be expanded by the target (see
4218
# test_fetch_revisions_with_deltas_into_pack); but we take care to
4219
# explicitly flush any buffered writes first in that rare case.
4221
new_pack = self.target_repo._pack_collection._new_pack
4222
except AttributeError:
4223
# Not a pack repository
4226
new_pack.set_write_cache_size(1024*1024)
4227
for substream_type, substream in stream:
4228
if 'stream' in debug.debug_flags:
4229
mutter('inserting substream: %s', substream_type)
4230
if substream_type == 'texts':
4231
self.target_repo.texts.insert_record_stream(substream)
4232
elif substream_type == 'inventories':
4233
if src_serializer == to_serializer:
4234
self.target_repo.inventories.insert_record_stream(
4237
self._extract_and_insert_inventories(
4238
substream, src_serializer)
4239
elif substream_type == 'inventory-deltas':
4240
self._extract_and_insert_inventory_deltas(
4241
substream, src_serializer)
4242
elif substream_type == 'chk_bytes':
4243
# XXX: This doesn't support conversions, as it assumes the
4244
# conversion was done in the fetch code.
4245
self.target_repo.chk_bytes.insert_record_stream(substream)
4246
elif substream_type == 'revisions':
4247
# This may fallback to extract-and-insert more often than
4248
# required if the serializers are different only in terms of
4250
if src_serializer == to_serializer:
4251
self.target_repo.revisions.insert_record_stream(substream)
4253
self._extract_and_insert_revisions(substream,
4255
elif substream_type == 'signatures':
4256
self.target_repo.signatures.insert_record_stream(substream)
4258
raise AssertionError('kaboom! %s' % (substream_type,))
4259
# Done inserting data, and the missing_keys calculations will try to
4260
# read back from the inserted data, so flush the writes to the new pack
4261
# (if this is pack format).
4262
if new_pack is not None:
4263
new_pack._write_data('', flush=True)
4264
# Find all the new revisions (including ones from resume_tokens)
4265
missing_keys = self.target_repo.get_missing_parent_inventories(
4266
check_for_missing_texts=is_resume)
4268
for prefix, versioned_file in (
4269
('texts', self.target_repo.texts),
4270
('inventories', self.target_repo.inventories),
4271
('revisions', self.target_repo.revisions),
4272
('signatures', self.target_repo.signatures),
4273
('chk_bytes', self.target_repo.chk_bytes),
4275
if versioned_file is None:
4277
# TODO: key is often going to be a StaticTuple object
4278
# I don't believe we can define a method by which
4279
# (prefix,) + StaticTuple will work, though we could
4280
# define a StaticTuple.sq_concat that would allow you to
4281
# pass in either a tuple or a StaticTuple as the second
4282
# object, so instead we could have:
4283
# StaticTuple(prefix) + key here...
4284
missing_keys.update((prefix,) + key for key in
4285
versioned_file.get_missing_compression_parent_keys())
4286
except NotImplementedError:
4287
# cannot even attempt suspending, and missing would have failed
4288
# during stream insertion.
4289
missing_keys = set()
4292
def _extract_and_insert_inventory_deltas(self, substream, serializer):
4293
target_rich_root = self.target_repo._format.rich_root_data
4294
target_tree_refs = self.target_repo._format.supports_tree_reference
4295
for record in substream:
4296
# Insert the delta directly
4297
inventory_delta_bytes = record.get_bytes_as('fulltext')
4298
deserialiser = inventory_delta.InventoryDeltaDeserializer()
4300
parse_result = deserialiser.parse_text_bytes(
4301
inventory_delta_bytes)
4302
except inventory_delta.IncompatibleInventoryDelta, err:
4303
trace.mutter("Incompatible delta: %s", err.msg)
4304
raise errors.IncompatibleRevision(self.target_repo._format)
4305
basis_id, new_id, rich_root, tree_refs, inv_delta = parse_result
4306
revision_id = new_id
4307
parents = [key[0] for key in record.parents]
4308
self.target_repo.add_inventory_by_delta(
4309
basis_id, inv_delta, revision_id, parents)
4311
def _extract_and_insert_inventories(self, substream, serializer,
4313
"""Generate a new inventory versionedfile in target, converting data.
4315
The inventory is retrieved from the source, (deserializing it), and
4316
stored in the target (reserializing it in a different format).
4318
target_rich_root = self.target_repo._format.rich_root_data
4319
target_tree_refs = self.target_repo._format.supports_tree_reference
4320
for record in substream:
4321
# It's not a delta, so it must be a fulltext in the source
4322
# serializer's format.
4323
bytes = record.get_bytes_as('fulltext')
4324
revision_id = record.key[0]
4325
inv = serializer.read_inventory_from_string(bytes, revision_id)
4326
parents = [key[0] for key in record.parents]
4327
self.target_repo.add_inventory(revision_id, inv, parents)
4328
# No need to keep holding this full inv in memory when the rest of
4329
# the substream is likely to be all deltas.
4332
def _extract_and_insert_revisions(self, substream, serializer):
4333
for record in substream:
4334
bytes = record.get_bytes_as('fulltext')
4335
revision_id = record.key[0]
4336
rev = serializer.read_revision_from_string(bytes)
4337
if rev.revision_id != revision_id:
4338
raise AssertionError('wtf: %s != %s' % (rev, revision_id))
4339
self.target_repo.add_revision(revision_id, rev)
4342
if self.target_repo._format._fetch_reconcile:
4343
self.target_repo.reconcile()
4346
class StreamSource(object):
4347
"""A source of a stream for fetching between repositories."""
4349
def __init__(self, from_repository, to_format):
4350
"""Create a StreamSource streaming from from_repository."""
4351
self.from_repository = from_repository
4352
self.to_format = to_format
4353
self._record_counter = RecordCounter()
4355
def delta_on_metadata(self):
4356
"""Return True if delta's are permitted on metadata streams.
4358
That is on revisions and signatures.
4360
src_serializer = self.from_repository._format._serializer
4361
target_serializer = self.to_format._serializer
4362
return (self.to_format._fetch_uses_deltas and
4363
src_serializer == target_serializer)
4365
def _fetch_revision_texts(self, revs):
4366
# fetch signatures first and then the revision texts
4367
# may need to be a InterRevisionStore call here.
4368
from_sf = self.from_repository.signatures
4369
# A missing signature is just skipped.
4370
keys = [(rev_id,) for rev_id in revs]
4371
signatures = versionedfile.filter_absent(from_sf.get_record_stream(
4373
self.to_format._fetch_order,
4374
not self.to_format._fetch_uses_deltas))
4375
# If a revision has a delta, this is actually expanded inside the
4376
# insert_record_stream code now, which is an alternate fix for
4378
from_rf = self.from_repository.revisions
4379
revisions = from_rf.get_record_stream(
4381
self.to_format._fetch_order,
4382
not self.delta_on_metadata())
4383
return [('signatures', signatures), ('revisions', revisions)]
4385
def _generate_root_texts(self, revs):
4386
"""This will be called by get_stream between fetching weave texts and
4387
fetching the inventory weave.
4389
if self._rich_root_upgrade():
4390
return _mod_fetch.Inter1and2Helper(
4391
self.from_repository).generate_root_texts(revs)
4395
def get_stream(self, search):
4397
revs = search.get_keys()
4398
graph = self.from_repository.get_graph()
4399
revs = tsort.topo_sort(graph.get_parent_map(revs))
4400
data_to_fetch = self.from_repository.item_keys_introduced_by(revs)
4402
for knit_kind, file_id, revisions in data_to_fetch:
4403
if knit_kind != phase:
4405
# Make a new progress bar for this phase
4406
if knit_kind == "file":
4407
# Accumulate file texts
4408
text_keys.extend([(file_id, revision) for revision in
4410
elif knit_kind == "inventory":
4411
# Now copy the file texts.
4412
from_texts = self.from_repository.texts
4413
yield ('texts', from_texts.get_record_stream(
4414
text_keys, self.to_format._fetch_order,
4415
not self.to_format._fetch_uses_deltas))
4416
# Cause an error if a text occurs after we have done the
4419
# Before we process the inventory we generate the root
4420
# texts (if necessary) so that the inventories references
4422
for _ in self._generate_root_texts(revs):
4424
# we fetch only the referenced inventories because we do not
4425
# know for unselected inventories whether all their required
4426
# texts are present in the other repository - it could be
4428
for info in self._get_inventory_stream(revs):
4430
elif knit_kind == "signatures":
4431
# Nothing to do here; this will be taken care of when
4432
# _fetch_revision_texts happens.
4434
elif knit_kind == "revisions":
4435
for record in self._fetch_revision_texts(revs):
4438
raise AssertionError("Unknown knit kind %r" % knit_kind)
4440
def get_stream_for_missing_keys(self, missing_keys):
4441
# missing keys can only occur when we are byte copying and not
4442
# translating (because translation means we don't send
4443
# unreconstructable deltas ever).
4445
keys['texts'] = set()
4446
keys['revisions'] = set()
4447
keys['inventories'] = set()
4448
keys['chk_bytes'] = set()
4449
keys['signatures'] = set()
4450
for key in missing_keys:
4451
keys[key[0]].add(key[1:])
4452
if len(keys['revisions']):
4453
# If we allowed copying revisions at this point, we could end up
4454
# copying a revision without copying its required texts: a
4455
# violation of the requirements for repository integrity.
4456
raise AssertionError(
4457
'cannot copy revisions to fill in missing deltas %s' % (
4458
keys['revisions'],))
4459
for substream_kind, keys in keys.iteritems():
4460
vf = getattr(self.from_repository, substream_kind)
4461
if vf is None and keys:
4462
raise AssertionError(
4463
"cannot fill in keys for a versioned file we don't"
4464
" have: %s needs %s" % (substream_kind, keys))
4466
# No need to stream something we don't have
4468
if substream_kind == 'inventories':
4469
# Some missing keys are genuinely ghosts, filter those out.
4470
present = self.from_repository.inventories.get_parent_map(keys)
4471
revs = [key[0] for key in present]
4472
# Get the inventory stream more-or-less as we do for the
4473
# original stream; there's no reason to assume that records
4474
# direct from the source will be suitable for the sink. (Think
4475
# e.g. 2a -> 1.9-rich-root).
4476
for info in self._get_inventory_stream(revs, missing=True):
4480
# Ask for full texts always so that we don't need more round trips
4481
# after this stream.
4482
# Some of the missing keys are genuinely ghosts, so filter absent
4483
# records. The Sink is responsible for doing another check to
4484
# ensure that ghosts don't introduce missing data for future
4486
stream = versionedfile.filter_absent(vf.get_record_stream(keys,
4487
self.to_format._fetch_order, True))
4488
yield substream_kind, stream
4490
def inventory_fetch_order(self):
4491
if self._rich_root_upgrade():
4492
return 'topological'
4494
return self.to_format._fetch_order
4496
def _rich_root_upgrade(self):
4497
return (not self.from_repository._format.rich_root_data and
4498
self.to_format.rich_root_data)
4500
def _get_inventory_stream(self, revision_ids, missing=False):
4501
from_format = self.from_repository._format
4502
if (from_format.supports_chks and self.to_format.supports_chks and
4503
from_format.network_name() == self.to_format.network_name()):
4504
raise AssertionError(
4505
"this case should be handled by GroupCHKStreamSource")
4506
elif 'forceinvdeltas' in debug.debug_flags:
4507
return self._get_convertable_inventory_stream(revision_ids,
4508
delta_versus_null=missing)
4509
elif from_format.network_name() == self.to_format.network_name():
4511
return self._get_simple_inventory_stream(revision_ids,
4513
elif (not from_format.supports_chks and not self.to_format.supports_chks
4514
and from_format._serializer == self.to_format._serializer):
4515
# Essentially the same format.
4516
return self._get_simple_inventory_stream(revision_ids,
4519
# Any time we switch serializations, we want to use an
4520
# inventory-delta based approach.
4521
return self._get_convertable_inventory_stream(revision_ids,
4522
delta_versus_null=missing)
4524
def _get_simple_inventory_stream(self, revision_ids, missing=False):
4525
# NB: This currently reopens the inventory weave in source;
4526
# using a single stream interface instead would avoid this.
4527
from_weave = self.from_repository.inventories
4529
delta_closure = True
4531
delta_closure = not self.delta_on_metadata()
4532
yield ('inventories', from_weave.get_record_stream(
4533
[(rev_id,) for rev_id in revision_ids],
4534
self.inventory_fetch_order(), delta_closure))
4536
def _get_convertable_inventory_stream(self, revision_ids,
4537
delta_versus_null=False):
4538
# The two formats are sufficiently different that there is no fast
4539
# path, so we need to send just inventorydeltas, which any
4540
# sufficiently modern client can insert into any repository.
4541
# The StreamSink code expects to be able to
4542
# convert on the target, so we need to put bytes-on-the-wire that can
4543
# be converted. That means inventory deltas (if the remote is <1.19,
4544
# RemoteStreamSink will fallback to VFS to insert the deltas).
4545
yield ('inventory-deltas',
4546
self._stream_invs_as_deltas(revision_ids,
4547
delta_versus_null=delta_versus_null))
4549
def _stream_invs_as_deltas(self, revision_ids, delta_versus_null=False):
4550
"""Return a stream of inventory-deltas for the given rev ids.
4552
:param revision_ids: The list of inventories to transmit
4553
:param delta_versus_null: Don't try to find a minimal delta for this
4554
entry, instead compute the delta versus the NULL_REVISION. This
4555
effectively streams a complete inventory. Used for stuff like
4556
filling in missing parents, etc.
4558
from_repo = self.from_repository
4559
revision_keys = [(rev_id,) for rev_id in revision_ids]
4560
parent_map = from_repo.inventories.get_parent_map(revision_keys)
4561
# XXX: possibly repos could implement a more efficient iter_inv_deltas
4563
inventories = self.from_repository.iter_inventories(
4564
revision_ids, 'topological')
4565
format = from_repo._format
4566
invs_sent_so_far = set([_mod_revision.NULL_REVISION])
4567
inventory_cache = lru_cache.LRUCache(50)
4568
null_inventory = from_repo.revision_tree(
4569
_mod_revision.NULL_REVISION).inventory
4570
# XXX: ideally the rich-root/tree-refs flags would be per-revision, not
4571
# per-repo (e.g. streaming a non-rich-root revision out of a rich-root
4572
# repo back into a non-rich-root repo ought to be allowed)
4573
serializer = inventory_delta.InventoryDeltaSerializer(
4574
versioned_root=format.rich_root_data,
4575
tree_references=format.supports_tree_reference)
4576
for inv in inventories:
4577
key = (inv.revision_id,)
4578
parent_keys = parent_map.get(key, ())
4580
if not delta_versus_null and parent_keys:
4581
# The caller did not ask for complete inventories and we have
4582
# some parents that we can delta against. Make a delta against
4583
# each parent so that we can find the smallest.
4584
parent_ids = [parent_key[0] for parent_key in parent_keys]
4585
for parent_id in parent_ids:
4586
if parent_id not in invs_sent_so_far:
4587
# We don't know that the remote side has this basis, so
4590
if parent_id == _mod_revision.NULL_REVISION:
4591
parent_inv = null_inventory
4593
parent_inv = inventory_cache.get(parent_id, None)
4594
if parent_inv is None:
4595
parent_inv = from_repo.get_inventory(parent_id)
4596
candidate_delta = inv._make_delta(parent_inv)
4597
if (delta is None or
4598
len(delta) > len(candidate_delta)):
4599
delta = candidate_delta
4600
basis_id = parent_id
4602
# Either none of the parents ended up being suitable, or we
4603
# were asked to delta against NULL
4604
basis_id = _mod_revision.NULL_REVISION
4605
delta = inv._make_delta(null_inventory)
4606
invs_sent_so_far.add(inv.revision_id)
4607
inventory_cache[inv.revision_id] = inv
4608
delta_serialized = ''.join(
4609
serializer.delta_to_lines(basis_id, key[-1], delta))
4610
yield versionedfile.FulltextContentFactory(
4611
key, parent_keys, None, delta_serialized)
1771
4614
def _iter_for_revno(repo, partial_history_cache, stop_index=None,
1772
4615
stop_revision=None):
1773
4616
"""Extend the partial history to include a given index