357
206
commit to be valid, deletes against the basis MUST be recorded via
358
207
builder.record_delete().
360
self._recording_deletes = True
362
basis_id = self.parents[0]
364
basis_id = _mod_revision.NULL_REVISION
365
self.basis_delta_revision = basis_id
367
def record_entry_contents(self, ie, parent_invs, path, tree,
369
"""Record the content of ie from tree into the commit if needed.
371
Side effect: sets ie.revision when unchanged
373
:param ie: An inventory entry present in the commit.
374
:param parent_invs: The inventories of the parent revisions of the
376
:param path: The path the entry is at in the tree.
377
:param tree: The tree which contains this entry and should be used to
379
:param content_summary: Summary data from the tree about the paths
380
content - stat, length, exec, sha/link target. This is only
381
accessed when the entry has a revision of None - that is when it is
382
a candidate to commit.
383
:return: A tuple (change_delta, version_recorded, fs_hash).
384
change_delta is an inventory_delta change for this entry against
385
the basis tree of the commit, or None if no change occured against
387
version_recorded is True if a new version of the entry has been
388
recorded. For instance, committing a merge where a file was only
389
changed on the other side will return (delta, False).
390
fs_hash is either None, or the hash details for the path (currently
391
a tuple of the contents sha1 and the statvalue returned by
392
tree.get_file_with_stat()).
394
if self.new_inventory.root is None:
395
if ie.parent_id is not None:
396
raise errors.RootMissing()
397
self._check_root(ie, parent_invs, tree)
398
if ie.revision is None:
399
kind = content_summary[0]
401
# ie is carried over from a prior commit
403
# XXX: repository specific check for nested tree support goes here - if
404
# the repo doesn't want nested trees we skip it ?
405
if (kind == 'tree-reference' and
406
not self.repository._format.supports_tree_reference):
407
# mismatch between commit builder logic and repository:
408
# this needs the entry creation pushed down into the builder.
409
raise NotImplementedError('Missing repository subtree support.')
410
self.new_inventory.add(ie)
412
# TODO: slow, take it out of the inner loop.
414
basis_inv = parent_invs[0]
416
basis_inv = Inventory(root_id=None)
418
# ie.revision is always None if the InventoryEntry is considered
419
# for committing. We may record the previous parents revision if the
420
# content is actually unchanged against a sole head.
421
if ie.revision is not None:
422
if not self._versioned_root and path == '':
423
# repositories that do not version the root set the root's
424
# revision to the new commit even when no change occurs (more
425
# specifically, they do not record a revision on the root; and
426
# the rev id is assigned to the root during deserialisation -
427
# this masks when a change may have occurred against the basis.
428
# To match this we always issue a delta, because the revision
429
# of the root will always be changing.
430
if ie.file_id in basis_inv:
431
delta = (basis_inv.id2path(ie.file_id), path,
435
delta = (None, path, ie.file_id, ie)
436
self._basis_delta.append(delta)
437
return delta, False, None
439
# we don't need to commit this, because the caller already
440
# determined that an existing revision of this file is
441
# appropriate. If it's not being considered for committing then
442
# it and all its parents to the root must be unaltered so
443
# no-change against the basis.
444
if ie.revision == self._new_revision_id:
445
raise AssertionError("Impossible situation, a skipped "
446
"inventory entry (%r) claims to be modified in this "
447
"commit (%r).", (ie, self._new_revision_id))
448
return None, False, None
449
# XXX: Friction: parent_candidates should return a list not a dict
450
# so that we don't have to walk the inventories again.
451
parent_candiate_entries = ie.parent_candidates(parent_invs)
452
head_set = self._heads(ie.file_id, parent_candiate_entries.keys())
454
for inv in parent_invs:
455
if ie.file_id in inv:
456
old_rev = inv[ie.file_id].revision
457
if old_rev in head_set:
458
heads.append(inv[ie.file_id].revision)
459
head_set.remove(inv[ie.file_id].revision)
462
# now we check to see if we need to write a new record to the
464
# We write a new entry unless there is one head to the ancestors, and
465
# the kind-derived content is unchanged.
467
# Cheapest check first: no ancestors, or more the one head in the
468
# ancestors, we write a new node.
472
# There is a single head, look it up for comparison
473
parent_entry = parent_candiate_entries[heads[0]]
474
# if the non-content specific data has changed, we'll be writing a
476
if (parent_entry.parent_id != ie.parent_id or
477
parent_entry.name != ie.name):
479
# now we need to do content specific checks:
481
# if the kind changed the content obviously has
482
if kind != parent_entry.kind:
484
# Stat cache fingerprint feedback for the caller - None as we usually
485
# don't generate one.
488
if content_summary[2] is None:
489
raise ValueError("Files must not have executable = None")
491
# We can't trust a check of the file length because of content
493
if (# if the exec bit has changed we have to store:
494
parent_entry.executable != content_summary[2]):
496
elif parent_entry.text_sha1 == content_summary[3]:
497
# all meta and content is unchanged (using a hash cache
498
# hit to check the sha)
499
ie.revision = parent_entry.revision
500
ie.text_size = parent_entry.text_size
501
ie.text_sha1 = parent_entry.text_sha1
502
ie.executable = parent_entry.executable
503
return self._get_delta(ie, basis_inv, path), False, None
505
# Either there is only a hash change(no hash cache entry,
506
# or same size content change), or there is no change on
508
# Provide the parent's hash to the store layer, so that the
509
# content is unchanged we will not store a new node.
510
nostore_sha = parent_entry.text_sha1
512
# We want to record a new node regardless of the presence or
513
# absence of a content change in the file.
515
ie.executable = content_summary[2]
516
file_obj, stat_value = tree.get_file_with_stat(ie.file_id, path)
518
text = file_obj.read()
522
ie.text_sha1, ie.text_size = self._add_text_to_weave(
523
ie.file_id, text, heads, nostore_sha)
524
# Let the caller know we generated a stat fingerprint.
525
fingerprint = (ie.text_sha1, stat_value)
526
except errors.ExistingContent:
527
# Turns out that the file content was unchanged, and we were
528
# only going to store a new node if it was changed. Carry over
530
ie.revision = parent_entry.revision
531
ie.text_size = parent_entry.text_size
532
ie.text_sha1 = parent_entry.text_sha1
533
ie.executable = parent_entry.executable
534
return self._get_delta(ie, basis_inv, path), False, None
535
elif kind == 'directory':
537
# all data is meta here, nothing specific to directory, so
539
ie.revision = parent_entry.revision
540
return self._get_delta(ie, basis_inv, path), False, None
541
self._add_text_to_weave(ie.file_id, '', heads, None)
542
elif kind == 'symlink':
543
current_link_target = content_summary[3]
545
# symlink target is not generic metadata, check if it has
547
if current_link_target != parent_entry.symlink_target:
550
# unchanged, carry over.
551
ie.revision = parent_entry.revision
552
ie.symlink_target = parent_entry.symlink_target
553
return self._get_delta(ie, basis_inv, path), False, None
554
ie.symlink_target = current_link_target
555
self._add_text_to_weave(ie.file_id, '', heads, None)
556
elif kind == 'tree-reference':
558
if content_summary[3] != parent_entry.reference_revision:
561
# unchanged, carry over.
562
ie.reference_revision = parent_entry.reference_revision
563
ie.revision = parent_entry.revision
564
return self._get_delta(ie, basis_inv, path), False, None
565
ie.reference_revision = content_summary[3]
566
if ie.reference_revision is None:
567
raise AssertionError("invalid content_summary for nested tree: %r"
568
% (content_summary,))
569
self._add_text_to_weave(ie.file_id, '', heads, None)
571
raise NotImplementedError('unknown kind')
572
ie.revision = self._new_revision_id
573
self._any_changes = True
574
return self._get_delta(ie, basis_inv, path), True, fingerprint
576
def record_iter_changes(self, tree, basis_revision_id, iter_changes,
577
_entry_factory=entry_factory):
209
raise NotImplementedError(self.will_record_deletes)
211
def record_iter_changes(self, tree, basis_revision_id, iter_changes):
578
212
"""Record a new tree via iter_changes.
580
214
:param tree: The tree to obtain text contents from for changed objects.
585
219
to basis_revision_id. The iterator must not include any items with
586
220
a current kind of None - missing items must be either filtered out
587
221
or errored-on beefore record_iter_changes sees the item.
588
:param _entry_factory: Private method to bind entry_factory locally for
590
222
:return: A generator of (file_id, relpath, fs_hash) tuples for use with
591
223
tree._observed_sha1.
593
# Create an inventory delta based on deltas between all the parents and
594
# deltas between all the parent inventories. We use inventory delta's
595
# between the inventory objects because iter_changes masks
596
# last-changed-field only changes.
598
# file_id -> change map, change is fileid, paths, changed, versioneds,
599
# parents, names, kinds, executables
601
# {file_id -> revision_id -> inventory entry, for entries in parent
602
# trees that are not parents[0]
606
revtrees = list(self.repository.revision_trees(self.parents))
607
except errors.NoSuchRevision:
608
# one or more ghosts, slow path.
610
for revision_id in self.parents:
612
revtrees.append(self.repository.revision_tree(revision_id))
613
except errors.NoSuchRevision:
615
basis_revision_id = _mod_revision.NULL_REVISION
617
revtrees.append(self.repository.revision_tree(
618
_mod_revision.NULL_REVISION))
619
# The basis inventory from a repository
621
basis_inv = revtrees[0].inventory
623
basis_inv = self.repository.revision_tree(
624
_mod_revision.NULL_REVISION).inventory
625
if len(self.parents) > 0:
626
if basis_revision_id != self.parents[0] and not ghost_basis:
628
"arbitrary basis parents not yet supported with merges")
629
for revtree in revtrees[1:]:
630
for change in revtree.inventory._make_delta(basis_inv):
631
if change[1] is None:
632
# Not present in this parent.
634
if change[2] not in merged_ids:
635
if change[0] is not None:
636
basis_entry = basis_inv[change[2]]
637
merged_ids[change[2]] = [
639
basis_entry.revision,
642
parent_entries[change[2]] = {
644
basis_entry.revision:basis_entry,
646
change[3].revision:change[3],
649
merged_ids[change[2]] = [change[3].revision]
650
parent_entries[change[2]] = {change[3].revision:change[3]}
652
merged_ids[change[2]].append(change[3].revision)
653
parent_entries[change[2]][change[3].revision] = change[3]
656
# Setup the changes from the tree:
657
# changes maps file_id -> (change, [parent revision_ids])
659
for change in iter_changes:
660
# This probably looks up in basis_inv way to much.
661
if change[1][0] is not None:
662
head_candidate = [basis_inv[change[0]].revision]
665
changes[change[0]] = change, merged_ids.get(change[0],
667
unchanged_merged = set(merged_ids) - set(changes)
668
# Extend the changes dict with synthetic changes to record merges of
670
for file_id in unchanged_merged:
671
# Record a merged version of these items that did not change vs the
672
# basis. This can be either identical parallel changes, or a revert
673
# of a specific file after a merge. The recorded content will be
674
# that of the current tree (which is the same as the basis), but
675
# the per-file graph will reflect a merge.
676
# NB:XXX: We are reconstructing path information we had, this
677
# should be preserved instead.
678
# inv delta change: (file_id, (path_in_source, path_in_target),
679
# changed_content, versioned, parent, name, kind,
682
basis_entry = basis_inv[file_id]
683
except errors.NoSuchId:
684
# a change from basis->some_parents but file_id isn't in basis
685
# so was new in the merge, which means it must have changed
686
# from basis -> current, and as it hasn't the add was reverted
687
# by the user. So we discard this change.
691
(basis_inv.id2path(file_id), tree.id2path(file_id)),
693
(basis_entry.parent_id, basis_entry.parent_id),
694
(basis_entry.name, basis_entry.name),
695
(basis_entry.kind, basis_entry.kind),
696
(basis_entry.executable, basis_entry.executable))
697
changes[file_id] = (change, merged_ids[file_id])
698
# changes contains tuples with the change and a set of inventory
699
# candidates for the file.
701
# old_path, new_path, file_id, new_inventory_entry
702
seen_root = False # Is the root in the basis delta?
703
inv_delta = self._basis_delta
704
modified_rev = self._new_revision_id
705
for change, head_candidates in changes.values():
706
if change[3][1]: # versioned in target.
707
# Several things may be happening here:
708
# We may have a fork in the per-file graph
709
# - record a change with the content from tree
710
# We may have a change against < all trees
711
# - carry over the tree that hasn't changed
712
# We may have a change against all trees
713
# - record the change with the content from tree
716
entry = _entry_factory[kind](file_id, change[5][1],
718
head_set = self._heads(change[0], set(head_candidates))
721
for head_candidate in head_candidates:
722
if head_candidate in head_set:
723
heads.append(head_candidate)
724
head_set.remove(head_candidate)
727
# Could be a carry-over situation:
728
parent_entry_revs = parent_entries.get(file_id, None)
729
if parent_entry_revs:
730
parent_entry = parent_entry_revs.get(heads[0], None)
733
if parent_entry is None:
734
# The parent iter_changes was called against is the one
735
# that is the per-file head, so any change is relevant
736
# iter_changes is valid.
737
carry_over_possible = False
739
# could be a carry over situation
740
# A change against the basis may just indicate a merge,
741
# we need to check the content against the source of the
742
# merge to determine if it was changed after the merge
744
if (parent_entry.kind != entry.kind or
745
parent_entry.parent_id != entry.parent_id or
746
parent_entry.name != entry.name):
747
# Metadata common to all entries has changed
748
# against per-file parent
749
carry_over_possible = False
751
carry_over_possible = True
752
# per-type checks for changes against the parent_entry
755
# Cannot be a carry-over situation
756
carry_over_possible = False
757
# Populate the entry in the delta
759
# XXX: There is still a small race here: If someone reverts the content of a file
760
# after iter_changes examines and decides it has changed,
761
# we will unconditionally record a new version even if some
762
# other process reverts it while commit is running (with
763
# the revert happening after iter_changes did its
766
entry.executable = True
768
entry.executable = False
769
if (carry_over_possible and
770
parent_entry.executable == entry.executable):
771
# Check the file length, content hash after reading
773
nostore_sha = parent_entry.text_sha1
776
file_obj, stat_value = tree.get_file_with_stat(file_id, change[1][1])
778
text = file_obj.read()
782
entry.text_sha1, entry.text_size = self._add_text_to_weave(
783
file_id, text, heads, nostore_sha)
784
yield file_id, change[1][1], (entry.text_sha1, stat_value)
785
except errors.ExistingContent:
786
# No content change against a carry_over parent
787
# Perhaps this should also yield a fs hash update?
789
entry.text_size = parent_entry.text_size
790
entry.text_sha1 = parent_entry.text_sha1
791
elif kind == 'symlink':
793
entry.symlink_target = tree.get_symlink_target(file_id)
794
if (carry_over_possible and
795
parent_entry.symlink_target == entry.symlink_target):
798
self._add_text_to_weave(change[0], '', heads, None)
799
elif kind == 'directory':
800
if carry_over_possible:
803
# Nothing to set on the entry.
804
# XXX: split into the Root and nonRoot versions.
805
if change[1][1] != '' or self.repository.supports_rich_root():
806
self._add_text_to_weave(change[0], '', heads, None)
807
elif kind == 'tree-reference':
808
if not self.repository._format.supports_tree_reference:
809
# This isn't quite sane as an error, but we shouldn't
810
# ever see this code path in practice: tree's don't
811
# permit references when the repo doesn't support tree
813
raise errors.UnsupportedOperation(tree.add_reference,
815
reference_revision = tree.get_reference_revision(change[0])
816
entry.reference_revision = reference_revision
817
if (carry_over_possible and
818
parent_entry.reference_revision == reference_revision):
821
self._add_text_to_weave(change[0], '', heads, None)
823
raise AssertionError('unknown kind %r' % kind)
825
entry.revision = modified_rev
827
entry.revision = parent_entry.revision
830
new_path = change[1][1]
831
inv_delta.append((change[1][0], new_path, change[0], entry))
834
self.new_inventory = None
836
# This should perhaps be guarded by a check that the basis we
837
# commit against is the basis for the commit and if not do a delta
839
self._any_changes = True
841
# housekeeping root entry changes do not affect no-change commits.
842
self._require_root_change(tree)
843
self.basis_delta_revision = basis_revision_id
845
def _add_text_to_weave(self, file_id, new_text, parents, nostore_sha):
846
parent_keys = tuple([(file_id, parent) for parent in parents])
847
return self.repository.texts._add_text(
848
(file_id, self._new_revision_id), parent_keys, new_text,
849
nostore_sha=nostore_sha, random_id=self.random_revid)[0:2]
852
class RootCommitBuilder(CommitBuilder):
853
"""This commitbuilder actually records the root id"""
855
# the root entry gets versioned properly by this builder.
856
_versioned_root = True
858
def _check_root(self, ie, parent_invs, tree):
859
"""Helper for record_entry_contents.
861
:param ie: An entry being added.
862
:param parent_invs: The inventories of the parent revisions of the
864
:param tree: The tree that is being committed.
867
def _require_root_change(self, tree):
868
"""Enforce an appropriate root object change.
870
This is called once when record_iter_changes is called, if and only if
871
the root was not in the delta calculated by record_iter_changes.
873
:param tree: The tree which is being committed.
875
# versioned roots do not change unless the tree found a change.
225
raise NotImplementedError(self.record_iter_changes)
878
228
class RepositoryWriteLockResult(LogicalLockResult):
1033
317
return InterRepository._assert_same_model(self, repository)
1035
def add_inventory(self, revision_id, inv, parents):
1036
"""Add the inventory inv to the repository as revision_id.
1038
:param parents: The revision ids of the parents that revision_id
1039
is known to have and are in the repository already.
1041
:returns: The validator(which is a sha1 digest, though what is sha'd is
1042
repository format specific) of the serialized inventory.
1044
if not self.is_in_write_group():
1045
raise AssertionError("%r not in write group" % (self,))
1046
_mod_revision.check_not_reserved_id(revision_id)
1047
if not (inv.revision_id is None or inv.revision_id == revision_id):
1048
raise AssertionError(
1049
"Mismatch between inventory revision"
1050
" id and insertion revid (%r, %r)"
1051
% (inv.revision_id, revision_id))
1052
if inv.root is None:
1053
raise errors.RootMissing()
1054
return self._add_inventory_checked(revision_id, inv, parents)
1056
def _add_inventory_checked(self, revision_id, inv, parents):
1057
"""Add inv to the repository after checking the inputs.
1059
This function can be overridden to allow different inventory styles.
1061
:seealso: add_inventory, for the contract.
1063
inv_lines = self._serializer.write_inventory_to_lines(inv)
1064
return self._inventory_add_lines(revision_id, parents,
1065
inv_lines, check_content=False)
1067
def add_inventory_by_delta(self, basis_revision_id, delta, new_revision_id,
1068
parents, basis_inv=None, propagate_caches=False):
1069
"""Add a new inventory expressed as a delta against another revision.
1071
See the inventory developers documentation for the theory behind
1074
:param basis_revision_id: The inventory id the delta was created
1075
against. (This does not have to be a direct parent.)
1076
:param delta: The inventory delta (see Inventory.apply_delta for
1078
:param new_revision_id: The revision id that the inventory is being
1080
:param parents: The revision ids of the parents that revision_id is
1081
known to have and are in the repository already. These are supplied
1082
for repositories that depend on the inventory graph for revision
1083
graph access, as well as for those that pun ancestry with delta
1085
:param basis_inv: The basis inventory if it is already known,
1087
:param propagate_caches: If True, the caches for this inventory are
1088
copied to and updated for the result if possible.
1090
:returns: (validator, new_inv)
1091
The validator(which is a sha1 digest, though what is sha'd is
1092
repository format specific) of the serialized inventory, and the
1093
resulting inventory.
1095
if not self.is_in_write_group():
1096
raise AssertionError("%r not in write group" % (self,))
1097
_mod_revision.check_not_reserved_id(new_revision_id)
1098
basis_tree = self.revision_tree(basis_revision_id)
1099
basis_tree.lock_read()
1101
# Note that this mutates the inventory of basis_tree, which not all
1102
# inventory implementations may support: A better idiom would be to
1103
# return a new inventory, but as there is no revision tree cache in
1104
# repository this is safe for now - RBC 20081013
1105
if basis_inv is None:
1106
basis_inv = basis_tree.inventory
1107
basis_inv.apply_delta(delta)
1108
basis_inv.revision_id = new_revision_id
1109
return (self.add_inventory(new_revision_id, basis_inv, parents),
1114
def _inventory_add_lines(self, revision_id, parents, lines,
1115
check_content=True):
1116
"""Store lines in inv_vf and return the sha1 of the inventory."""
1117
parents = [(parent,) for parent in parents]
1118
result = self.inventories.add_lines((revision_id,), parents, lines,
1119
check_content=check_content)[0]
1120
self.inventories._access.flush()
1123
def add_revision(self, revision_id, rev, inv=None, config=None):
1124
"""Add rev to the revision store as revision_id.
1126
:param revision_id: the revision id to use.
1127
:param rev: The revision object.
1128
:param inv: The inventory for the revision. if None, it will be looked
1129
up in the inventory storer
1130
:param config: If None no digital signature will be created.
1131
If supplied its signature_needed method will be used
1132
to determine if a signature should be made.
1134
# TODO: jam 20070210 Shouldn't we check rev.revision_id and
1136
_mod_revision.check_not_reserved_id(revision_id)
1137
if config is not None and config.signature_needed():
1139
inv = self.get_inventory(revision_id)
1140
plaintext = Testament(rev, inv).as_short_text()
1141
self.store_revision_signature(
1142
gpg.GPGStrategy(config), plaintext, revision_id)
1143
# check inventory present
1144
if not self.inventories.get_parent_map([(revision_id,)]):
1146
raise errors.WeaveRevisionNotPresent(revision_id,
1149
# yes, this is not suitable for adding with ghosts.
1150
rev.inventory_sha1 = self.add_inventory(revision_id, inv,
1153
key = (revision_id,)
1154
rev.inventory_sha1 = self.inventories.get_sha1s([key])[key]
1155
self._add_revision(rev)
1157
def _add_revision(self, revision):
1158
text = self._serializer.write_revision_to_string(revision)
1159
key = (revision.revision_id,)
1160
parents = tuple((parent,) for parent in revision.parent_ids)
1161
self.revisions.add_lines(key, parents, osutils.split_lines(text))
1163
319
def all_revision_ids(self):
1164
320
"""Returns a list of all the revision ids in the repository.
1189
345
self.control_files.break_lock()
1192
def _eliminate_revisions_not_present(self, revision_ids):
1193
"""Check every revision id in revision_ids to see if we have it.
1195
Returns a set of the present revisions.
1198
graph = self.get_graph()
1199
parent_map = graph.get_parent_map(revision_ids)
1200
# The old API returned a list, should this actually be a set?
1201
return parent_map.keys()
1203
def _check_inventories(self, checker):
1204
"""Check the inventories found from the revision scan.
1206
This is responsible for verifying the sha1 of inventories and
1207
creating a pending_keys set that covers data referenced by inventories.
1209
bar = ui.ui_factory.nested_progress_bar()
1211
self._do_check_inventories(checker, bar)
1215
def _do_check_inventories(self, checker, bar):
1216
"""Helper for _check_inventories."""
1218
keys = {'chk_bytes':set(), 'inventories':set(), 'texts':set()}
1219
kinds = ['chk_bytes', 'texts']
1220
count = len(checker.pending_keys)
1221
bar.update("inventories", 0, 2)
1222
current_keys = checker.pending_keys
1223
checker.pending_keys = {}
1224
# Accumulate current checks.
1225
for key in current_keys:
1226
if key[0] != 'inventories' and key[0] not in kinds:
1227
checker._report_items.append('unknown key type %r' % (key,))
1228
keys[key[0]].add(key[1:])
1229
if keys['inventories']:
1230
# NB: output order *should* be roughly sorted - topo or
1231
# inverse topo depending on repository - either way decent
1232
# to just delta against. However, pre-CHK formats didn't
1233
# try to optimise inventory layout on disk. As such the
1234
# pre-CHK code path does not use inventory deltas.
1236
for record in self.inventories.check(keys=keys['inventories']):
1237
if record.storage_kind == 'absent':
1238
checker._report_items.append(
1239
'Missing inventory {%s}' % (record.key,))
1241
last_object = self._check_record('inventories', record,
1242
checker, last_object,
1243
current_keys[('inventories',) + record.key])
1244
del keys['inventories']
1247
bar.update("texts", 1)
1248
while (checker.pending_keys or keys['chk_bytes']
1250
# Something to check.
1251
current_keys = checker.pending_keys
1252
checker.pending_keys = {}
1253
# Accumulate current checks.
1254
for key in current_keys:
1255
if key[0] not in kinds:
1256
checker._report_items.append('unknown key type %r' % (key,))
1257
keys[key[0]].add(key[1:])
1258
# Check the outermost kind only - inventories || chk_bytes || texts
1262
for record in getattr(self, kind).check(keys=keys[kind]):
1263
if record.storage_kind == 'absent':
1264
checker._report_items.append(
1265
'Missing %s {%s}' % (kind, record.key,))
1267
last_object = self._check_record(kind, record,
1268
checker, last_object, current_keys[(kind,) + record.key])
1272
def _check_record(self, kind, record, checker, last_object, item_data):
1273
"""Check a single text from this repository."""
1274
if kind == 'inventories':
1275
rev_id = record.key[0]
1276
inv = self._deserialise_inventory(rev_id,
1277
record.get_bytes_as('fulltext'))
1278
if last_object is not None:
1279
delta = inv._make_delta(last_object)
1280
for old_path, path, file_id, ie in delta:
1283
ie.check(checker, rev_id, inv)
1285
for path, ie in inv.iter_entries():
1286
ie.check(checker, rev_id, inv)
1287
if self._format.fast_deltas:
1289
elif kind == 'chk_bytes':
1290
# No code written to check chk_bytes for this repo format.
1291
checker._report_items.append(
1292
'unsupported key type chk_bytes for %s' % (record.key,))
1293
elif kind == 'texts':
1294
self._check_text(record, checker, item_data)
1296
checker._report_items.append(
1297
'unknown key type %s for %s' % (kind, record.key))
1299
def _check_text(self, record, checker, item_data):
1300
"""Check a single text."""
1301
# Check it is extractable.
1302
# TODO: check length.
1303
if record.storage_kind == 'chunked':
1304
chunks = record.get_bytes_as(record.storage_kind)
1305
sha1 = osutils.sha_strings(chunks)
1306
length = sum(map(len, chunks))
1308
content = record.get_bytes_as('fulltext')
1309
sha1 = osutils.sha_string(content)
1310
length = len(content)
1311
if item_data and sha1 != item_data[1]:
1312
checker._report_items.append(
1313
'sha1 mismatch: %s has sha1 %s expected %s referenced by %s' %
1314
(record.key, sha1, item_data[1], item_data[2]))
1317
def create(a_bzrdir):
1318
"""Construct the current default format repository in a_bzrdir."""
1319
return RepositoryFormat.get_default_format().initialize(a_bzrdir)
348
def create(controldir):
349
"""Construct the current default format repository in controldir."""
350
return RepositoryFormat.get_default_format().initialize(controldir)
1321
def __init__(self, _format, a_bzrdir, control_files):
352
def __init__(self, _format, controldir, control_files):
1322
353
"""instantiate a Repository.
1324
355
:param _format: The format of the repository on disk.
1325
:param a_bzrdir: The BzrDir of the repository.
356
:param controldir: The ControlDir of the repository.
357
:param control_files: Control files to use for locking, etc.
1327
359
# In the future we will have a single api for all stores for
1328
360
# getting file texts, inventories and revisions, then
2001
907
signature = gpg_strategy.sign(plaintext)
2002
908
self.add_signature_text(revision_id, signature)
2005
910
def add_signature_text(self, revision_id, signature):
2006
self.signatures.add_lines((revision_id,), (),
2007
osutils.split_lines(signature))
2009
def find_text_key_references(self):
2010
"""Find the text key references within the repository.
2012
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
2013
to whether they were referred to by the inventory of the
2014
revision_id that they contain. The inventory texts from all present
2015
revision ids are assessed to generate this report.
2017
revision_keys = self.revisions.keys()
2018
w = self.inventories
2019
pb = ui.ui_factory.nested_progress_bar()
2021
return self._find_text_key_references_from_xml_inventory_lines(
2022
w.iter_lines_added_or_present_in_keys(revision_keys, pb=pb))
2026
def _find_text_key_references_from_xml_inventory_lines(self,
2028
"""Core routine for extracting references to texts from inventories.
2030
This performs the translation of xml lines to revision ids.
2032
:param line_iterator: An iterator of lines, origin_version_id
2033
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
2034
to whether they were referred to by the inventory of the
2035
revision_id that they contain. Note that if that revision_id was
2036
not part of the line_iterator's output then False will be given -
2037
even though it may actually refer to that key.
2039
if not self._serializer.support_altered_by_hack:
2040
raise AssertionError(
2041
"_find_text_key_references_from_xml_inventory_lines only "
2042
"supported for branches which store inventory as unnested xml"
2043
", not on %r" % self)
2046
# this code needs to read every new line in every inventory for the
2047
# inventories [revision_ids]. Seeing a line twice is ok. Seeing a line
2048
# not present in one of those inventories is unnecessary but not
2049
# harmful because we are filtering by the revision id marker in the
2050
# inventory lines : we only select file ids altered in one of those
2051
# revisions. We don't need to see all lines in the inventory because
2052
# only those added in an inventory in rev X can contain a revision=X
2054
unescape_revid_cache = {}
2055
unescape_fileid_cache = {}
2057
# jam 20061218 In a big fetch, this handles hundreds of thousands
2058
# of lines, so it has had a lot of inlining and optimizing done.
2059
# Sorry that it is a little bit messy.
2060
# Move several functions to be local variables, since this is a long
2062
search = self._file_ids_altered_regex.search
2063
unescape = _unescape_xml
2064
setdefault = result.setdefault
2065
for line, line_key in line_iterator:
2066
match = search(line)
2069
# One call to match.group() returning multiple items is quite a
2070
# bit faster than 2 calls to match.group() each returning 1
2071
file_id, revision_id = match.group('file_id', 'revision_id')
2073
# Inlining the cache lookups helps a lot when you make 170,000
2074
# lines and 350k ids, versus 8.4 unique ids.
2075
# Using a cache helps in 2 ways:
2076
# 1) Avoids unnecessary decoding calls
2077
# 2) Re-uses cached strings, which helps in future set and
2079
# (2) is enough that removing encoding entirely along with
2080
# the cache (so we are using plain strings) results in no
2081
# performance improvement.
2083
revision_id = unescape_revid_cache[revision_id]
2085
unescaped = unescape(revision_id)
2086
unescape_revid_cache[revision_id] = unescaped
2087
revision_id = unescaped
2089
# Note that unconditionally unescaping means that we deserialise
2090
# every fileid, which for general 'pull' is not great, but we don't
2091
# really want to have some many fulltexts that this matters anyway.
2094
file_id = unescape_fileid_cache[file_id]
2096
unescaped = unescape(file_id)
2097
unescape_fileid_cache[file_id] = unescaped
2100
key = (file_id, revision_id)
2101
setdefault(key, False)
2102
if revision_id == line_key[-1]:
2106
def _inventory_xml_lines_for_keys(self, keys):
2107
"""Get a line iterator of the sort needed for findind references.
2109
Not relevant for non-xml inventory repositories.
2111
Ghosts in revision_keys are ignored.
2113
:param revision_keys: The revision keys for the inventories to inspect.
2114
:return: An iterator over (inventory line, revid) for the fulltexts of
2115
all of the xml inventories specified by revision_keys.
2117
stream = self.inventories.get_record_stream(keys, 'unordered', True)
2118
for record in stream:
2119
if record.storage_kind != 'absent':
2120
chunks = record.get_bytes_as('chunked')
2121
revid = record.key[-1]
2122
lines = osutils.chunks_to_lines(chunks)
2126
def _find_file_ids_from_xml_inventory_lines(self, line_iterator,
2128
"""Helper routine for fileids_altered_by_revision_ids.
2130
This performs the translation of xml lines to revision ids.
2132
:param line_iterator: An iterator of lines, origin_version_id
2133
:param revision_keys: The revision ids to filter for. This should be a
2134
set or other type which supports efficient __contains__ lookups, as
2135
the revision key from each parsed line will be looked up in the
2136
revision_keys filter.
2137
:return: a dictionary mapping altered file-ids to an iterable of
2138
revision_ids. Each altered file-ids has the exact revision_ids that
2139
altered it listed explicitly.
2141
seen = set(self._find_text_key_references_from_xml_inventory_lines(
2142
line_iterator).iterkeys())
2143
parent_keys = self._find_parent_keys_of_revisions(revision_keys)
2144
parent_seen = set(self._find_text_key_references_from_xml_inventory_lines(
2145
self._inventory_xml_lines_for_keys(parent_keys)))
2146
new_keys = seen - parent_seen
2148
setdefault = result.setdefault
2149
for key in new_keys:
2150
setdefault(key[0], set()).add(key[-1])
911
"""Store a signature text for a revision.
913
:param revision_id: Revision id of the revision
914
:param signature: Signature text.
916
raise NotImplementedError(self.add_signature_text)
2153
918
def _find_parent_ids_of_revisions(self, revision_ids):
2154
919
"""Find all parent ids that are mentioned in the revision graph.
2205
939
uniquely identify the file version in the caller's context. (Examples:
2206
940
an index number or a TreeTransform trans_id.)
2208
bytes_iterator is an iterable of bytestrings for the file. The
2209
kind of iterable and length of the bytestrings are unspecified, but for
2210
this implementation, it is a list of bytes produced by
2211
VersionedFile.get_record_stream().
2213
942
:param desired_files: a list of (file_id, revision_id, identifier)
2217
for file_id, revision_id, callable_data in desired_files:
2218
text_keys[(file_id, revision_id)] = callable_data
2219
for record in self.texts.get_record_stream(text_keys, 'unordered', True):
2220
if record.storage_kind == 'absent':
2221
raise errors.RevisionNotPresent(record.key, self)
2222
yield text_keys[record.key], record.get_bytes_as('chunked')
2224
def _generate_text_key_index(self, text_key_references=None,
2226
"""Generate a new text key index for the repository.
2228
This is an expensive function that will take considerable time to run.
2230
:return: A dict mapping text keys ((file_id, revision_id) tuples) to a
2231
list of parents, also text keys. When a given key has no parents,
2232
the parents list will be [NULL_REVISION].
2234
# All revisions, to find inventory parents.
2235
if ancestors is None:
2236
graph = self.get_graph()
2237
ancestors = graph.get_parent_map(self.all_revision_ids())
2238
if text_key_references is None:
2239
text_key_references = self.find_text_key_references()
2240
pb = ui.ui_factory.nested_progress_bar()
2242
return self._do_generate_text_key_index(ancestors,
2243
text_key_references, pb)
2247
def _do_generate_text_key_index(self, ancestors, text_key_references, pb):
2248
"""Helper for _generate_text_key_index to avoid deep nesting."""
2249
revision_order = tsort.topo_sort(ancestors)
2250
invalid_keys = set()
2252
for revision_id in revision_order:
2253
revision_keys[revision_id] = set()
2254
text_count = len(text_key_references)
2255
# a cache of the text keys to allow reuse; costs a dict of all the
2256
# keys, but saves a 2-tuple for every child of a given key.
2258
for text_key, valid in text_key_references.iteritems():
2260
invalid_keys.add(text_key)
2262
revision_keys[text_key[1]].add(text_key)
2263
text_key_cache[text_key] = text_key
2264
del text_key_references
2266
text_graph = graph.Graph(graph.DictParentsProvider(text_index))
2267
NULL_REVISION = _mod_revision.NULL_REVISION
2268
# Set a cache with a size of 10 - this suffices for bzr.dev but may be
2269
# too small for large or very branchy trees. However, for 55K path
2270
# trees, it would be easy to use too much memory trivially. Ideally we
2271
# could gauge this by looking at available real memory etc, but this is
2272
# always a tricky proposition.
2273
inventory_cache = lru_cache.LRUCache(10)
2274
batch_size = 10 # should be ~150MB on a 55K path tree
2275
batch_count = len(revision_order) / batch_size + 1
2277
pb.update("Calculating text parents", processed_texts, text_count)
2278
for offset in xrange(batch_count):
2279
to_query = revision_order[offset * batch_size:(offset + 1) *
2283
for revision_id in to_query:
2284
parent_ids = ancestors[revision_id]
2285
for text_key in revision_keys[revision_id]:
2286
pb.update("Calculating text parents", processed_texts)
2287
processed_texts += 1
2288
candidate_parents = []
2289
for parent_id in parent_ids:
2290
parent_text_key = (text_key[0], parent_id)
2292
check_parent = parent_text_key not in \
2293
revision_keys[parent_id]
2295
# the parent parent_id is a ghost:
2296
check_parent = False
2297
# truncate the derived graph against this ghost.
2298
parent_text_key = None
2300
# look at the parent commit details inventories to
2301
# determine possible candidates in the per file graph.
2304
inv = inventory_cache[parent_id]
2306
inv = self.revision_tree(parent_id).inventory
2307
inventory_cache[parent_id] = inv
2309
parent_entry = inv[text_key[0]]
2310
except (KeyError, errors.NoSuchId):
2312
if parent_entry is not None:
2314
text_key[0], parent_entry.revision)
2316
parent_text_key = None
2317
if parent_text_key is not None:
2318
candidate_parents.append(
2319
text_key_cache[parent_text_key])
2320
parent_heads = text_graph.heads(candidate_parents)
2321
new_parents = list(parent_heads)
2322
new_parents.sort(key=lambda x:candidate_parents.index(x))
2323
if new_parents == []:
2324
new_parents = [NULL_REVISION]
2325
text_index[text_key] = new_parents
2327
for text_key in invalid_keys:
2328
text_index[text_key] = [NULL_REVISION]
2331
def item_keys_introduced_by(self, revision_ids, _files_pb=None):
2332
"""Get an iterable listing the keys of all the data introduced by a set
2335
The keys will be ordered so that the corresponding items can be safely
2336
fetched and inserted in that order.
2338
:returns: An iterable producing tuples of (knit-kind, file-id,
2339
versions). knit-kind is one of 'file', 'inventory', 'signatures',
2340
'revisions'. file-id is None unless knit-kind is 'file'.
2342
for result in self._find_file_keys_to_fetch(revision_ids, _files_pb):
2345
for result in self._find_non_file_keys_to_fetch(revision_ids):
2348
def _find_file_keys_to_fetch(self, revision_ids, pb):
2349
# XXX: it's a bit weird to control the inventory weave caching in this
2350
# generator. Ideally the caching would be done in fetch.py I think. Or
2351
# maybe this generator should explicitly have the contract that it
2352
# should not be iterated until the previously yielded item has been
2354
inv_w = self.inventories
2356
# file ids that changed
2357
file_ids = self.fileids_altered_by_revision_ids(revision_ids, inv_w)
2359
num_file_ids = len(file_ids)
2360
for file_id, altered_versions in file_ids.iteritems():
2362
pb.update("Fetch texts", count, num_file_ids)
2364
yield ("file", file_id, altered_versions)
2366
def _find_non_file_keys_to_fetch(self, revision_ids):
2368
yield ("inventory", None, revision_ids)
2371
# XXX: Note ATM no callers actually pay attention to this return
2372
# instead they just use the list of revision ids and ignore
2373
# missing sigs. Consider removing this work entirely
2374
revisions_with_signatures = set(self.signatures.get_parent_map(
2375
[(r,) for r in revision_ids]))
2376
revisions_with_signatures = set(
2377
[r for (r,) in revisions_with_signatures])
2378
revisions_with_signatures.intersection_update(revision_ids)
2379
yield ("signatures", None, revisions_with_signatures)
2382
yield ("revisions", None, revision_ids)
2385
def get_inventory(self, revision_id):
2386
"""Get Inventory object by revision id."""
2387
return self.iter_inventories([revision_id]).next()
2389
def iter_inventories(self, revision_ids, ordering=None):
2390
"""Get many inventories by revision_ids.
2392
This will buffer some or all of the texts used in constructing the
2393
inventories in memory, but will only parse a single inventory at a
2396
:param revision_ids: The expected revision ids of the inventories.
2397
:param ordering: optional ordering, e.g. 'topological'. If not
2398
specified, the order of revision_ids will be preserved (by
2399
buffering if necessary).
2400
:return: An iterator of inventories.
2402
if ((None in revision_ids)
2403
or (_mod_revision.NULL_REVISION in revision_ids)):
2404
raise ValueError('cannot get null revision inventory')
2405
return self._iter_inventories(revision_ids, ordering)
2407
def _iter_inventories(self, revision_ids, ordering):
2408
"""single-document based inventory iteration."""
2409
inv_xmls = self._iter_inventory_xmls(revision_ids, ordering)
2410
for text, revision_id in inv_xmls:
2411
yield self._deserialise_inventory(revision_id, text)
2413
def _iter_inventory_xmls(self, revision_ids, ordering):
2414
if ordering is None:
2415
order_as_requested = True
2416
ordering = 'unordered'
2418
order_as_requested = False
2419
keys = [(revision_id,) for revision_id in revision_ids]
2422
if order_as_requested:
2423
key_iter = iter(keys)
2424
next_key = key_iter.next()
2425
stream = self.inventories.get_record_stream(keys, ordering, True)
2427
for record in stream:
2428
if record.storage_kind != 'absent':
2429
chunks = record.get_bytes_as('chunked')
2430
if order_as_requested:
2431
text_chunks[record.key] = chunks
2433
yield ''.join(chunks), record.key[-1]
2435
raise errors.NoSuchRevision(self, record.key)
2436
if order_as_requested:
2437
# Yield as many results as we can while preserving order.
2438
while next_key in text_chunks:
2439
chunks = text_chunks.pop(next_key)
2440
yield ''.join(chunks), next_key[-1]
2442
next_key = key_iter.next()
2443
except StopIteration:
2444
# We still want to fully consume the get_record_stream,
2445
# just in case it is not actually finished at this point
2449
def _deserialise_inventory(self, revision_id, xml):
2450
"""Transform the xml into an inventory object.
2452
:param revision_id: The expected revision id of the inventory.
2453
:param xml: A serialised inventory.
2455
result = self._serializer.read_inventory_from_string(xml, revision_id,
2456
entry_cache=self._inventory_entry_cache,
2457
return_from_cache=self._safe_to_return_from_cache)
2458
if result.revision_id != revision_id:
2459
raise AssertionError('revision id mismatch %s != %s' % (
2460
result.revision_id, revision_id))
2463
def get_serializer_format(self):
2464
return self._serializer.format_num
2467
def _get_inventory_xml(self, revision_id):
2468
"""Get serialized inventory as a string."""
2469
texts = self._iter_inventory_xmls([revision_id], 'unordered')
2471
text, revision_id = texts.next()
2472
except StopIteration:
2473
raise errors.HistoryMissing(self, 'inventory', revision_id)
945
raise NotImplementedError(self.iter_files_bytes)
2476
947
def get_rev_id_for_revno(self, revno, known_pair):
2477
948
"""Return the revision id of a revno, given a later (revno, revid)
2810
1252
except UnicodeDecodeError:
2811
1253
raise errors.NonAsciiRevisionId(method, self)
2813
def revision_graph_can_have_wrong_parents(self):
2814
"""Is it possible for this repository to have a revision graph with
2817
If True, then this repository must also implement
2818
_find_inconsistent_revision_parents so that check and reconcile can
2819
check for inconsistencies before proceeding with other checks that may
2820
depend on the revision index being consistent.
2822
raise NotImplementedError(self.revision_graph_can_have_wrong_parents)
2825
# remove these delegates a while after bzr 0.15
2826
def __make_delegated(name, from_module):
2827
def _deprecated_repository_forwarder():
2828
symbol_versioning.warn('%s moved to %s in bzr 0.15'
2829
% (name, from_module),
2833
return pyutils.get_named_object(from_module, name)
2834
except AttributeError:
2835
raise AttributeError('module %s has no name %s'
2836
% (sys.modules[from_module], name))
2837
globals()[name] = _deprecated_repository_forwarder
2840
'AllInOneRepository',
2841
'WeaveMetaDirRepository',
2842
'PreSplitOutRepositoryFormat',
2843
'RepositoryFormat4',
2844
'RepositoryFormat5',
2845
'RepositoryFormat6',
2846
'RepositoryFormat7',
2848
__make_delegated(_name, 'bzrlib.repofmt.weaverepo')
2852
'RepositoryFormatKnit',
2853
'RepositoryFormatKnit1',
2855
__make_delegated(_name, 'bzrlib.repofmt.knitrepo')
2858
def install_revision(repository, rev, revision_tree):
2859
"""Install all revision data into a repository."""
2860
install_revisions(repository, [(rev, revision_tree, None)])
2863
def install_revisions(repository, iterable, num_revisions=None, pb=None):
2864
"""Install all revision data into a repository.
2866
Accepts an iterable of revision, tree, signature tuples. The signature
2869
repository.start_write_group()
2871
inventory_cache = lru_cache.LRUCache(10)
2872
for n, (revision, revision_tree, signature) in enumerate(iterable):
2873
_install_revision(repository, revision, revision_tree, signature,
2876
pb.update('Transferring revisions', n + 1, num_revisions)
2878
repository.abort_write_group()
2881
repository.commit_write_group()
2884
def _install_revision(repository, rev, revision_tree, signature,
2886
"""Install all revision data into a repository."""
2887
present_parents = []
2889
for p_id in rev.parent_ids:
2890
if repository.has_revision(p_id):
2891
present_parents.append(p_id)
2892
parent_trees[p_id] = repository.revision_tree(p_id)
2894
parent_trees[p_id] = repository.revision_tree(
2895
_mod_revision.NULL_REVISION)
2897
inv = revision_tree.inventory
2898
entries = inv.iter_entries()
2899
# backwards compatibility hack: skip the root id.
2900
if not repository.supports_rich_root():
2901
path, root = entries.next()
2902
if root.revision != rev.revision_id:
2903
raise errors.IncompatibleRevision(repr(repository))
2905
for path, ie in entries:
2906
text_keys[(ie.file_id, ie.revision)] = ie
2907
text_parent_map = repository.texts.get_parent_map(text_keys)
2908
missing_texts = set(text_keys) - set(text_parent_map)
2909
# Add the texts that are not already present
2910
for text_key in missing_texts:
2911
ie = text_keys[text_key]
2913
# FIXME: TODO: The following loop overlaps/duplicates that done by
2914
# commit to determine parents. There is a latent/real bug here where
2915
# the parents inserted are not those commit would do - in particular
2916
# they are not filtered by heads(). RBC, AB
2917
for revision, tree in parent_trees.iteritems():
2918
if ie.file_id not in tree:
2920
parent_id = tree.inventory[ie.file_id].revision
2921
if parent_id in text_parents:
2923
text_parents.append((ie.file_id, parent_id))
2924
lines = revision_tree.get_file(ie.file_id).readlines()
2925
repository.texts.add_lines(text_key, text_parents, lines)
2927
# install the inventory
2928
if repository._format._commit_inv_deltas and len(rev.parent_ids):
2929
# Cache this inventory
2930
inventory_cache[rev.revision_id] = inv
2932
basis_inv = inventory_cache[rev.parent_ids[0]]
2934
repository.add_inventory(rev.revision_id, inv, present_parents)
2936
delta = inv._make_delta(basis_inv)
2937
repository.add_inventory_by_delta(rev.parent_ids[0], delta,
2938
rev.revision_id, present_parents)
2940
repository.add_inventory(rev.revision_id, inv, present_parents)
2941
except errors.RevisionAlreadyPresent:
2943
if signature is not None:
2944
repository.add_signature_text(rev.revision_id, signature)
2945
repository.add_revision(rev.revision_id, rev, inv)
2948
1256
class MetaDirRepository(Repository):
2949
1257
"""Repositories in the new meta-dir layout.
3314
1587
# NOTE: These are experimental in 0.92. Stable in 1.0 and above
3315
1588
format_registry.register_lazy(
3316
1589
'Bazaar pack repository format 1 (needs bzr 0.92)\n',
3317
'bzrlib.repofmt.pack_repo',
1590
'bzrlib.repofmt.knitpack_repo',
3318
1591
'RepositoryFormatKnitPack1',
3320
1593
format_registry.register_lazy(
3321
1594
'Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n',
3322
'bzrlib.repofmt.pack_repo',
1595
'bzrlib.repofmt.knitpack_repo',
3323
1596
'RepositoryFormatKnitPack3',
3325
1598
format_registry.register_lazy(
3326
1599
'Bazaar pack repository format 1 with rich root (needs bzr 1.0)\n',
3327
'bzrlib.repofmt.pack_repo',
1600
'bzrlib.repofmt.knitpack_repo',
3328
1601
'RepositoryFormatKnitPack4',
3330
1603
format_registry.register_lazy(
3331
1604
'Bazaar RepositoryFormatKnitPack5 (bzr 1.6)\n',
3332
'bzrlib.repofmt.pack_repo',
1605
'bzrlib.repofmt.knitpack_repo',
3333
1606
'RepositoryFormatKnitPack5',
3335
1608
format_registry.register_lazy(
3336
1609
'Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6.1)\n',
3337
'bzrlib.repofmt.pack_repo',
1610
'bzrlib.repofmt.knitpack_repo',
3338
1611
'RepositoryFormatKnitPack5RichRoot',
3340
1613
format_registry.register_lazy(
3341
1614
'Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6)\n',
3342
'bzrlib.repofmt.pack_repo',
1615
'bzrlib.repofmt.knitpack_repo',
3343
1616
'RepositoryFormatKnitPack5RichRootBroken',
3345
1618
format_registry.register_lazy(
3346
1619
'Bazaar RepositoryFormatKnitPack6 (bzr 1.9)\n',
3347
'bzrlib.repofmt.pack_repo',
1620
'bzrlib.repofmt.knitpack_repo',
3348
1621
'RepositoryFormatKnitPack6',
3350
1623
format_registry.register_lazy(
3351
1624
'Bazaar RepositoryFormatKnitPack6RichRoot (bzr 1.9)\n',
3352
'bzrlib.repofmt.pack_repo',
1625
'bzrlib.repofmt.knitpack_repo',
3353
1626
'RepositoryFormatKnitPack6RichRoot',
1628
format_registry.register_lazy(
1629
'Bazaar repository format 2a (needs bzr 1.16 or later)\n',
1630
'bzrlib.repofmt.groupcompress_repo',
1631
'RepositoryFormat2a',
3356
1634
# Development formats.
3357
# Obsolete but kept pending a CHK based subtree format.
1635
# Check their docstrings to see if/when they are obsolete.
3358
1636
format_registry.register_lazy(
3359
1637
("Bazaar development format 2 with subtree support "
3360
1638
"(needs bzr.dev from before 1.8)\n"),
3361
'bzrlib.repofmt.pack_repo',
1639
'bzrlib.repofmt.knitpack_repo',
3362
1640
'RepositoryFormatPackDevelopment2Subtree',
3365
# 1.14->1.16 go below here
3366
format_registry.register_lazy(
3367
'Bazaar development format - group compression and chk inventory'
3368
' (needs bzr.dev from 1.14)\n',
3369
'bzrlib.repofmt.groupcompress_repo',
3370
'RepositoryFormatCHK1',
3373
format_registry.register_lazy(
3374
'Bazaar development format - chk repository with bencode revision '
3375
'serialization (needs bzr.dev from 1.16)\n',
3376
'bzrlib.repofmt.groupcompress_repo',
3377
'RepositoryFormatCHK2',
3379
format_registry.register_lazy(
3380
'Bazaar repository format 2a (needs bzr 1.16 or later)\n',
3381
'bzrlib.repofmt.groupcompress_repo',
3382
'RepositoryFormat2a',
3384
1642
format_registry.register_lazy(
3385
1643
'Bazaar development format 8\n',
3386
1644
'bzrlib.repofmt.groupcompress_repo',
3421
1678
self.target.fetch(self.source, revision_id=revision_id)
3423
1680
@needs_write_lock
3424
def fetch(self, revision_id=None, pb=None, find_ghosts=False,
1681
def fetch(self, revision_id=None, find_ghosts=False):
3426
1682
"""Fetch the content required to construct revision_id.
3428
1684
The content is copied from self.source to self.target.
3430
1686
:param revision_id: if None all content is copied, if NULL_REVISION no
3431
1687
content is copied.
3435
ui.ui_factory.warn_experimental_format_fetch(self)
3436
from bzrlib.fetch import RepoFetcher
3437
# See <https://launchpad.net/bugs/456077> asking for a warning here
3438
if self.source._format.network_name() != self.target._format.network_name():
3439
ui.ui_factory.show_user_warning('cross_format_fetch',
3440
from_format=self.source._format,
3441
to_format=self.target._format)
3442
f = RepoFetcher(to_repository=self.target,
3443
from_repository=self.source,
3444
last_revision=revision_id,
3445
fetch_spec=fetch_spec,
3446
find_ghosts=find_ghosts)
3448
def _walk_to_common_revisions(self, revision_ids):
3449
"""Walk out from revision_ids in source to revisions target has.
3451
:param revision_ids: The start point for the search.
3452
:return: A set of revision ids.
3454
target_graph = self.target.get_graph()
3455
revision_ids = frozenset(revision_ids)
3456
missing_revs = set()
3457
source_graph = self.source.get_graph()
3458
# ensure we don't pay silly lookup costs.
3459
searcher = source_graph._make_breadth_first_searcher(revision_ids)
3460
null_set = frozenset([_mod_revision.NULL_REVISION])
3461
searcher_exhausted = False
3465
# Iterate the searcher until we have enough next_revs
3466
while len(next_revs) < self._walk_to_common_revisions_batch_size:
3468
next_revs_part, ghosts_part = searcher.next_with_ghosts()
3469
next_revs.update(next_revs_part)
3470
ghosts.update(ghosts_part)
3471
except StopIteration:
3472
searcher_exhausted = True
3474
# If there are ghosts in the source graph, and the caller asked for
3475
# them, make sure that they are present in the target.
3476
# We don't care about other ghosts as we can't fetch them and
3477
# haven't been asked to.
3478
ghosts_to_check = set(revision_ids.intersection(ghosts))
3479
revs_to_get = set(next_revs).union(ghosts_to_check)
3481
have_revs = set(target_graph.get_parent_map(revs_to_get))
3482
# we always have NULL_REVISION present.
3483
have_revs = have_revs.union(null_set)
3484
# Check if the target is missing any ghosts we need.
3485
ghosts_to_check.difference_update(have_revs)
3487
# One of the caller's revision_ids is a ghost in both the
3488
# source and the target.
3489
raise errors.NoSuchRevision(
3490
self.source, ghosts_to_check.pop())
3491
missing_revs.update(next_revs - have_revs)
3492
# Because we may have walked past the original stop point, make
3493
# sure everything is stopped
3494
stop_revs = searcher.find_seen_ancestors(have_revs)
3495
searcher.stop_searching_any(stop_revs)
3496
if searcher_exhausted:
3498
return searcher.get_result()
1690
raise NotImplementedError(self.fetch)
3500
1692
@needs_read_lock
3501
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
1693
def search_missing_revision_ids(self,
1694
revision_id=symbol_versioning.DEPRECATED_PARAMETER,
1695
find_ghosts=True, revision_ids=None, if_present_ids=None,
3502
1697
"""Return the revision ids that source has that target does not.
3504
1699
:param revision_id: only return revision ids included by this
1701
:param revision_ids: return revision ids included by these
1702
revision_ids. NoSuchRevision will be raised if any of these
1703
revisions are not present.
1704
:param if_present_ids: like revision_ids, but will not cause
1705
NoSuchRevision if any of these are absent, instead they will simply
1706
not be in the result. This is useful for e.g. finding revisions
1707
to fetch for tags, which may reference absent revisions.
3506
1708
:param find_ghosts: If True find missing revisions in deep history
3507
1709
rather than just finding the surface difference.
1710
:param limit: Maximum number of revisions to return, topologically
3508
1712
:return: A bzrlib.graph.SearchResult.
3510
# stop searching at found target revisions.
3511
if not find_ghosts and revision_id is not None:
3512
return self._walk_to_common_revisions([revision_id])
3513
# generic, possibly worst case, slow code path.
3514
target_ids = set(self.target.all_revision_ids())
3515
if revision_id is not None:
3516
source_ids = self.source.get_ancestry(revision_id)
3517
if source_ids[0] is not None:
3518
raise AssertionError()
3521
source_ids = self.source.all_revision_ids()
3522
result_set = set(source_ids).difference(target_ids)
3523
return self.source.revision_ids_to_search_result(result_set)
1714
raise NotImplementedError(self.search_missing_revision_ids)
3526
1717
def _same_model(source, target):
3547
1738
"different serializers")
3550
class InterSameDataRepository(InterRepository):
3551
"""Code for converting between repositories that represent the same data.
3553
Data format and model must match for this to work.
3557
def _get_repo_format_to_test(self):
3558
"""Repository format for testing with.
3560
InterSameData can pull from subtree to subtree and from non-subtree to
3561
non-subtree, so we test this with the richest repository format.
3563
from bzrlib.repofmt import knitrepo
3564
return knitrepo.RepositoryFormatKnit3()
3567
def is_compatible(source, target):
3568
return InterRepository._same_model(source, target)
3571
class InterWeaveRepo(InterSameDataRepository):
3572
"""Optimised code paths between Weave based repositories.
3574
This should be in bzrlib/repofmt/weaverepo.py but we have not yet
3575
implemented lazy inter-object optimisation.
3579
def _get_repo_format_to_test(self):
3580
from bzrlib.repofmt import weaverepo
3581
return weaverepo.RepositoryFormat7()
3584
def is_compatible(source, target):
3585
"""Be compatible with known Weave formats.
3587
We don't test for the stores being of specific types because that
3588
could lead to confusing results, and there is no need to be
3591
from bzrlib.repofmt.weaverepo import (
3597
return (isinstance(source._format, (RepositoryFormat5,
3599
RepositoryFormat7)) and
3600
isinstance(target._format, (RepositoryFormat5,
3602
RepositoryFormat7)))
3603
except AttributeError:
3607
def copy_content(self, revision_id=None):
3608
"""See InterRepository.copy_content()."""
3609
# weave specific optimised path:
3611
self.target.set_make_working_trees(self.source.make_working_trees())
3612
except (errors.RepositoryUpgradeRequired, NotImplemented):
3614
# FIXME do not peek!
3615
if self.source._transport.listable():
3616
pb = ui.ui_factory.nested_progress_bar()
3618
self.target.texts.insert_record_stream(
3619
self.source.texts.get_record_stream(
3620
self.source.texts.keys(), 'topological', False))
3621
pb.update('Copying inventory', 0, 1)
3622
self.target.inventories.insert_record_stream(
3623
self.source.inventories.get_record_stream(
3624
self.source.inventories.keys(), 'topological', False))
3625
self.target.signatures.insert_record_stream(
3626
self.source.signatures.get_record_stream(
3627
self.source.signatures.keys(),
3629
self.target.revisions.insert_record_stream(
3630
self.source.revisions.get_record_stream(
3631
self.source.revisions.keys(),
3632
'topological', True))
3636
self.target.fetch(self.source, revision_id=revision_id)
3639
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
3640
"""See InterRepository.missing_revision_ids()."""
3641
# we want all revisions to satisfy revision_id in source.
3642
# but we don't want to stat every file here and there.
3643
# we want then, all revisions other needs to satisfy revision_id
3644
# checked, but not those that we have locally.
3645
# so the first thing is to get a subset of the revisions to
3646
# satisfy revision_id in source, and then eliminate those that
3647
# we do already have.
3648
# this is slow on high latency connection to self, but as this
3649
# disk format scales terribly for push anyway due to rewriting
3650
# inventory.weave, this is considered acceptable.
3652
if revision_id is not None:
3653
source_ids = self.source.get_ancestry(revision_id)
3654
if source_ids[0] is not None:
3655
raise AssertionError()
3658
source_ids = self.source._all_possible_ids()
3659
source_ids_set = set(source_ids)
3660
# source_ids is the worst possible case we may need to pull.
3661
# now we want to filter source_ids against what we actually
3662
# have in target, but don't try to check for existence where we know
3663
# we do not have a revision as that would be pointless.
3664
target_ids = set(self.target._all_possible_ids())
3665
possibly_present_revisions = target_ids.intersection(source_ids_set)
3666
actually_present_revisions = set(
3667
self.target._eliminate_revisions_not_present(possibly_present_revisions))
3668
required_revisions = source_ids_set.difference(actually_present_revisions)
3669
if revision_id is not None:
3670
# we used get_ancestry to determine source_ids then we are assured all
3671
# revisions referenced are present as they are installed in topological order.
3672
# and the tip revision was validated by get_ancestry.
3673
result_set = required_revisions
3675
# if we just grabbed the possibly available ids, then
3676
# we only have an estimate of whats available and need to validate
3677
# that against the revision records.
3679
self.source._eliminate_revisions_not_present(required_revisions))
3680
return self.source.revision_ids_to_search_result(result_set)
3683
class InterKnitRepo(InterSameDataRepository):
3684
"""Optimised code paths between Knit based repositories."""
3687
def _get_repo_format_to_test(self):
3688
from bzrlib.repofmt import knitrepo
3689
return knitrepo.RepositoryFormatKnit1()
3692
def is_compatible(source, target):
3693
"""Be compatible with known Knit formats.
3695
We don't test for the stores being of specific types because that
3696
could lead to confusing results, and there is no need to be
3699
from bzrlib.repofmt.knitrepo import RepositoryFormatKnit
3701
are_knits = (isinstance(source._format, RepositoryFormatKnit) and
3702
isinstance(target._format, RepositoryFormatKnit))
3703
except AttributeError:
3705
return are_knits and InterRepository._same_model(source, target)
3708
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
3709
"""See InterRepository.missing_revision_ids()."""
3710
if revision_id is not None:
3711
source_ids = self.source.get_ancestry(revision_id)
3712
if source_ids[0] is not None:
3713
raise AssertionError()
3716
source_ids = self.source.all_revision_ids()
3717
source_ids_set = set(source_ids)
3718
# source_ids is the worst possible case we may need to pull.
3719
# now we want to filter source_ids against what we actually
3720
# have in target, but don't try to check for existence where we know
3721
# we do not have a revision as that would be pointless.
3722
target_ids = set(self.target.all_revision_ids())
3723
possibly_present_revisions = target_ids.intersection(source_ids_set)
3724
actually_present_revisions = set(
3725
self.target._eliminate_revisions_not_present(possibly_present_revisions))
3726
required_revisions = source_ids_set.difference(actually_present_revisions)
3727
if revision_id is not None:
3728
# we used get_ancestry to determine source_ids then we are assured all
3729
# revisions referenced are present as they are installed in topological order.
3730
# and the tip revision was validated by get_ancestry.
3731
result_set = required_revisions
3733
# if we just grabbed the possibly available ids, then
3734
# we only have an estimate of whats available and need to validate
3735
# that against the revision records.
3737
self.source._eliminate_revisions_not_present(required_revisions))
3738
return self.source.revision_ids_to_search_result(result_set)
3741
class InterDifferingSerializer(InterRepository):
3744
def _get_repo_format_to_test(self):
3748
def is_compatible(source, target):
3749
"""Be compatible with Knit2 source and Knit3 target"""
3750
# This is redundant with format.check_conversion_target(), however that
3751
# raises an exception, and we just want to say "False" as in we won't
3752
# support converting between these formats.
3753
if 'IDS_never' in debug.debug_flags:
3755
if source.supports_rich_root() and not target.supports_rich_root():
3757
if (source._format.supports_tree_reference
3758
and not target._format.supports_tree_reference):
3760
if target._fallback_repositories and target._format.supports_chks:
3761
# IDS doesn't know how to copy CHKs for the parent inventories it
3762
# adds to stacked repos.
3764
if 'IDS_always' in debug.debug_flags:
3766
# Only use this code path for local source and target. IDS does far
3767
# too much IO (both bandwidth and roundtrips) over a network.
3768
if not source.bzrdir.transport.base.startswith('file:///'):
3770
if not target.bzrdir.transport.base.startswith('file:///'):
3774
def _get_trees(self, revision_ids, cache):
3776
for rev_id in revision_ids:
3778
possible_trees.append((rev_id, cache[rev_id]))
3780
# Not cached, but inventory might be present anyway.
3782
tree = self.source.revision_tree(rev_id)
3783
except errors.NoSuchRevision:
3784
# Nope, parent is ghost.
3787
cache[rev_id] = tree
3788
possible_trees.append((rev_id, tree))
3789
return possible_trees
3791
def _get_delta_for_revision(self, tree, parent_ids, possible_trees):
3792
"""Get the best delta and base for this revision.
3794
:return: (basis_id, delta)
3797
# Generate deltas against each tree, to find the shortest.
3798
texts_possibly_new_in_tree = set()
3799
for basis_id, basis_tree in possible_trees:
3800
delta = tree.inventory._make_delta(basis_tree.inventory)
3801
for old_path, new_path, file_id, new_entry in delta:
3802
if new_path is None:
3803
# This file_id isn't present in the new rev, so we don't
3807
# Rich roots are handled elsewhere...
3809
kind = new_entry.kind
3810
if kind != 'directory' and kind != 'file':
3811
# No text record associated with this inventory entry.
3813
# This is a directory or file that has changed somehow.
3814
texts_possibly_new_in_tree.add((file_id, new_entry.revision))
3815
deltas.append((len(delta), basis_id, delta))
3817
return deltas[0][1:]
3819
def _fetch_parent_invs_for_stacking(self, parent_map, cache):
3820
"""Find all parent revisions that are absent, but for which the
3821
inventory is present, and copy those inventories.
3823
This is necessary to preserve correctness when the source is stacked
3824
without fallbacks configured. (Note that in cases like upgrade the
3825
source may be not have _fallback_repositories even though it is
3829
for parents in parent_map.values():
3830
parent_revs.update(parents)
3831
present_parents = self.source.get_parent_map(parent_revs)
3832
absent_parents = set(parent_revs).difference(present_parents)
3833
parent_invs_keys_for_stacking = self.source.inventories.get_parent_map(
3834
(rev_id,) for rev_id in absent_parents)
3835
parent_inv_ids = [key[-1] for key in parent_invs_keys_for_stacking]
3836
for parent_tree in self.source.revision_trees(parent_inv_ids):
3837
current_revision_id = parent_tree.get_revision_id()
3838
parents_parents_keys = parent_invs_keys_for_stacking[
3839
(current_revision_id,)]
3840
parents_parents = [key[-1] for key in parents_parents_keys]
3841
basis_id = _mod_revision.NULL_REVISION
3842
basis_tree = self.source.revision_tree(basis_id)
3843
delta = parent_tree.inventory._make_delta(basis_tree.inventory)
3844
self.target.add_inventory_by_delta(
3845
basis_id, delta, current_revision_id, parents_parents)
3846
cache[current_revision_id] = parent_tree
3848
def _fetch_batch(self, revision_ids, basis_id, cache):
3849
"""Fetch across a few revisions.
3851
:param revision_ids: The revisions to copy
3852
:param basis_id: The revision_id of a tree that must be in cache, used
3853
as a basis for delta when no other base is available
3854
:param cache: A cache of RevisionTrees that we can use.
3855
:return: The revision_id of the last converted tree. The RevisionTree
3856
for it will be in cache
3858
# Walk though all revisions; get inventory deltas, copy referenced
3859
# texts that delta references, insert the delta, revision and
3861
root_keys_to_create = set()
3864
pending_revisions = []
3865
parent_map = self.source.get_parent_map(revision_ids)
3866
self._fetch_parent_invs_for_stacking(parent_map, cache)
3867
self.source._safe_to_return_from_cache = True
3868
for tree in self.source.revision_trees(revision_ids):
3869
# Find a inventory delta for this revision.
3870
# Find text entries that need to be copied, too.
3871
current_revision_id = tree.get_revision_id()
3872
parent_ids = parent_map.get(current_revision_id, ())
3873
parent_trees = self._get_trees(parent_ids, cache)
3874
possible_trees = list(parent_trees)
3875
if len(possible_trees) == 0:
3876
# There either aren't any parents, or the parents are ghosts,
3877
# so just use the last converted tree.
3878
possible_trees.append((basis_id, cache[basis_id]))
3879
basis_id, delta = self._get_delta_for_revision(tree, parent_ids,
3881
revision = self.source.get_revision(current_revision_id)
3882
pending_deltas.append((basis_id, delta,
3883
current_revision_id, revision.parent_ids))
3884
if self._converting_to_rich_root:
3885
self._revision_id_to_root_id[current_revision_id] = \
3887
# Determine which texts are in present in this revision but not in
3888
# any of the available parents.
3889
texts_possibly_new_in_tree = set()
3890
for old_path, new_path, file_id, entry in delta:
3891
if new_path is None:
3892
# This file_id isn't present in the new rev
3896
if not self.target.supports_rich_root():
3897
# The target doesn't support rich root, so we don't
3900
if self._converting_to_rich_root:
3901
# This can't be copied normally, we have to insert
3903
root_keys_to_create.add((file_id, entry.revision))
3906
texts_possibly_new_in_tree.add((file_id, entry.revision))
3907
for basis_id, basis_tree in possible_trees:
3908
basis_inv = basis_tree.inventory
3909
for file_key in list(texts_possibly_new_in_tree):
3910
file_id, file_revision = file_key
3912
entry = basis_inv[file_id]
3913
except errors.NoSuchId:
3915
if entry.revision == file_revision:
3916
texts_possibly_new_in_tree.remove(file_key)
3917
text_keys.update(texts_possibly_new_in_tree)
3918
pending_revisions.append(revision)
3919
cache[current_revision_id] = tree
3920
basis_id = current_revision_id
3921
self.source._safe_to_return_from_cache = False
3923
from_texts = self.source.texts
3924
to_texts = self.target.texts
3925
if root_keys_to_create:
3926
root_stream = _mod_fetch._new_root_data_stream(
3927
root_keys_to_create, self._revision_id_to_root_id, parent_map,
3929
to_texts.insert_record_stream(root_stream)
3930
to_texts.insert_record_stream(from_texts.get_record_stream(
3931
text_keys, self.target._format._fetch_order,
3932
not self.target._format._fetch_uses_deltas))
3933
# insert inventory deltas
3934
for delta in pending_deltas:
3935
self.target.add_inventory_by_delta(*delta)
3936
if self.target._fallback_repositories:
3937
# Make sure this stacked repository has all the parent inventories
3938
# for the new revisions that we are about to insert. We do this
3939
# before adding the revisions so that no revision is added until
3940
# all the inventories it may depend on are added.
3941
# Note that this is overzealous, as we may have fetched these in an
3944
revision_ids = set()
3945
for revision in pending_revisions:
3946
revision_ids.add(revision.revision_id)
3947
parent_ids.update(revision.parent_ids)
3948
parent_ids.difference_update(revision_ids)
3949
parent_ids.discard(_mod_revision.NULL_REVISION)
3950
parent_map = self.source.get_parent_map(parent_ids)
3951
# we iterate over parent_map and not parent_ids because we don't
3952
# want to try copying any revision which is a ghost
3953
for parent_tree in self.source.revision_trees(parent_map):
3954
current_revision_id = parent_tree.get_revision_id()
3955
parents_parents = parent_map[current_revision_id]
3956
possible_trees = self._get_trees(parents_parents, cache)
3957
if len(possible_trees) == 0:
3958
# There either aren't any parents, or the parents are
3959
# ghosts, so just use the last converted tree.
3960
possible_trees.append((basis_id, cache[basis_id]))
3961
basis_id, delta = self._get_delta_for_revision(parent_tree,
3962
parents_parents, possible_trees)
3963
self.target.add_inventory_by_delta(
3964
basis_id, delta, current_revision_id, parents_parents)
3965
# insert signatures and revisions
3966
for revision in pending_revisions:
3968
signature = self.source.get_signature_text(
3969
revision.revision_id)
3970
self.target.add_signature_text(revision.revision_id,
3972
except errors.NoSuchRevision:
3974
self.target.add_revision(revision.revision_id, revision)
3977
def _fetch_all_revisions(self, revision_ids, pb):
3978
"""Fetch everything for the list of revisions.
3980
:param revision_ids: The list of revisions to fetch. Must be in
3982
:param pb: A ProgressTask
3985
basis_id, basis_tree = self._get_basis(revision_ids[0])
3987
cache = lru_cache.LRUCache(100)
3988
cache[basis_id] = basis_tree
3989
del basis_tree # We don't want to hang on to it here
3993
for offset in range(0, len(revision_ids), batch_size):
3994
self.target.start_write_group()
3996
pb.update('Transferring revisions', offset,
3998
batch = revision_ids[offset:offset+batch_size]
3999
basis_id = self._fetch_batch(batch, basis_id, cache)
4001
self.source._safe_to_return_from_cache = False
4002
self.target.abort_write_group()
4005
hint = self.target.commit_write_group()
4008
if hints and self.target._format.pack_compresses:
4009
self.target.pack(hint=hints)
4010
pb.update('Transferring revisions', len(revision_ids),
4014
def fetch(self, revision_id=None, pb=None, find_ghosts=False,
4016
"""See InterRepository.fetch()."""
4017
if fetch_spec is not None:
4018
raise AssertionError("Not implemented yet...")
4019
ui.ui_factory.warn_experimental_format_fetch(self)
4020
if (not self.source.supports_rich_root()
4021
and self.target.supports_rich_root()):
4022
self._converting_to_rich_root = True
4023
self._revision_id_to_root_id = {}
4025
self._converting_to_rich_root = False
4026
# See <https://launchpad.net/bugs/456077> asking for a warning here
4027
if self.source._format.network_name() != self.target._format.network_name():
4028
ui.ui_factory.show_user_warning('cross_format_fetch',
4029
from_format=self.source._format,
4030
to_format=self.target._format)
4031
revision_ids = self.target.search_missing_revision_ids(self.source,
4032
revision_id, find_ghosts=find_ghosts).get_keys()
4033
if not revision_ids:
4035
revision_ids = tsort.topo_sort(
4036
self.source.get_graph().get_parent_map(revision_ids))
4037
if not revision_ids:
4039
# Walk though all revisions; get inventory deltas, copy referenced
4040
# texts that delta references, insert the delta, revision and
4043
my_pb = ui.ui_factory.nested_progress_bar()
4046
symbol_versioning.warn(
4047
symbol_versioning.deprecated_in((1, 14, 0))
4048
% "pb parameter to fetch()")
4051
self._fetch_all_revisions(revision_ids, pb)
4053
if my_pb is not None:
4055
return len(revision_ids), 0
4057
def _get_basis(self, first_revision_id):
4058
"""Get a revision and tree which exists in the target.
4060
This assumes that first_revision_id is selected for transmission
4061
because all other ancestors are already present. If we can't find an
4062
ancestor we fall back to NULL_REVISION since we know that is safe.
4064
:return: (basis_id, basis_tree)
4066
first_rev = self.source.get_revision(first_revision_id)
4068
basis_id = first_rev.parent_ids[0]
4069
# only valid as a basis if the target has it
4070
self.target.get_revision(basis_id)
4071
# Try to get a basis tree - if it's a ghost it will hit the
4072
# NoSuchRevision case.
4073
basis_tree = self.source.revision_tree(basis_id)
4074
except (IndexError, errors.NoSuchRevision):
4075
basis_id = _mod_revision.NULL_REVISION
4076
basis_tree = self.source.revision_tree(basis_id)
4077
return basis_id, basis_tree
4080
InterRepository.register_optimiser(InterDifferingSerializer)
4081
InterRepository.register_optimiser(InterSameDataRepository)
4082
InterRepository.register_optimiser(InterWeaveRepo)
4083
InterRepository.register_optimiser(InterKnitRepo)
4086
1741
class CopyConverter(object):
4087
1742
"""A repository conversion tool which just performs a copy of the content.
4109
1764
# trigger an assertion if not such
4110
1765
repo._format.get_format_string()
4111
1766
self.repo_dir = repo.bzrdir
4112
pb.update('Moving repository to repository.backup')
1767
pb.update(gettext('Moving repository to repository.backup'))
4113
1768
self.repo_dir.transport.move('repository', 'repository.backup')
4114
1769
backup_transport = self.repo_dir.transport.clone('repository.backup')
4115
1770
repo._format.check_conversion_target(self.target_format)
4116
1771
self.source_repo = repo._format.open(self.repo_dir,
4118
1773
_override_transport=backup_transport)
4119
pb.update('Creating new repository')
1774
pb.update(gettext('Creating new repository'))
4120
1775
converted = self.target_format.initialize(self.repo_dir,
4121
1776
self.source_repo.is_shared())
4122
1777
converted.lock_write()
4124
pb.update('Copying content')
1779
pb.update(gettext('Copying content'))
4125
1780
self.source_repo.copy_content_into(converted)
4127
1782
converted.unlock()
4128
pb.update('Deleting old repository content')
1783
pb.update(gettext('Deleting old repository content'))
4129
1784
self.repo_dir.transport.delete_tree('repository.backup')
4130
ui.ui_factory.note('repository converted')
1785
ui.ui_factory.note(gettext('repository converted'))
4143
def _unescaper(match, _map=_unescape_map):
4144
code = match.group(1)
4148
if not code.startswith('#'):
4150
return unichr(int(code[1:])).encode('utf8')
4156
def _unescape_xml(data):
4157
"""Unescape predefined XML entities in a string of data."""
4159
if _unescape_re is None:
4160
_unescape_re = re.compile('\&([^;]*);')
4161
return _unescape_re.sub(_unescaper, data)
4164
class _VersionedFileChecker(object):
4166
def __init__(self, repository, text_key_references=None, ancestors=None):
4167
self.repository = repository
4168
self.text_index = self.repository._generate_text_key_index(
4169
text_key_references=text_key_references, ancestors=ancestors)
4171
def calculate_file_version_parents(self, text_key):
4172
"""Calculate the correct parents for a file version according to
4175
parent_keys = self.text_index[text_key]
4176
if parent_keys == [_mod_revision.NULL_REVISION]:
4178
return tuple(parent_keys)
4180
def check_file_version_parents(self, texts, progress_bar=None):
4181
"""Check the parents stored in a versioned file are correct.
4183
It also detects file versions that are not referenced by their
4184
corresponding revision's inventory.
4186
:returns: A tuple of (wrong_parents, dangling_file_versions).
4187
wrong_parents is a dict mapping {revision_id: (stored_parents,
4188
correct_parents)} for each revision_id where the stored parents
4189
are not correct. dangling_file_versions is a set of (file_id,
4190
revision_id) tuples for versions that are present in this versioned
4191
file, but not used by the corresponding inventory.
4193
local_progress = None
4194
if progress_bar is None:
4195
local_progress = ui.ui_factory.nested_progress_bar()
4196
progress_bar = local_progress
4198
return self._check_file_version_parents(texts, progress_bar)
4201
local_progress.finished()
4203
def _check_file_version_parents(self, texts, progress_bar):
4204
"""See check_file_version_parents."""
4206
self.file_ids = set([file_id for file_id, _ in
4207
self.text_index.iterkeys()])
4208
# text keys is now grouped by file_id
4209
n_versions = len(self.text_index)
4210
progress_bar.update('loading text store', 0, n_versions)
4211
parent_map = self.repository.texts.get_parent_map(self.text_index)
4212
# On unlistable transports this could well be empty/error...
4213
text_keys = self.repository.texts.keys()
4214
unused_keys = frozenset(text_keys) - set(self.text_index)
4215
for num, key in enumerate(self.text_index.iterkeys()):
4216
progress_bar.update('checking text graph', num, n_versions)
4217
correct_parents = self.calculate_file_version_parents(key)
4219
knit_parents = parent_map[key]
4220
except errors.RevisionNotPresent:
4223
if correct_parents != knit_parents:
4224
wrong_parents[key] = (knit_parents, correct_parents)
4225
return wrong_parents, unused_keys
4228
def _old_get_graph(repository, revision_id):
4229
"""DO NOT USE. That is all. I'm serious."""
4230
graph = repository.get_graph()
4231
revision_graph = dict(((key, value) for key, value in
4232
graph.iter_ancestry([revision_id]) if value is not None))
4233
return _strip_NULL_ghosts(revision_graph)
4236
1789
def _strip_NULL_ghosts(revision_graph):
4237
1790
"""Also don't use this. more compatibility code for unmigrated clients."""
4238
1791
# Filter ghosts, and null:
4244
1797
return revision_graph
4247
class StreamSink(object):
4248
"""An object that can insert a stream into a repository.
4250
This interface handles the complexity of reserialising inventories and
4251
revisions from different formats, and allows unidirectional insertion into
4252
stacked repositories without looking for the missing basis parents
4256
def __init__(self, target_repo):
4257
self.target_repo = target_repo
4259
def insert_stream(self, stream, src_format, resume_tokens):
4260
"""Insert a stream's content into the target repository.
4262
:param src_format: a bzr repository format.
4264
:return: a list of resume tokens and an iterable of keys additional
4265
items required before the insertion can be completed.
4267
self.target_repo.lock_write()
4270
self.target_repo.resume_write_group(resume_tokens)
4273
self.target_repo.start_write_group()
4276
# locked_insert_stream performs a commit|suspend.
4277
return self._locked_insert_stream(stream, src_format,
4280
self.target_repo.abort_write_group(suppress_errors=True)
4283
self.target_repo.unlock()
4285
def _locked_insert_stream(self, stream, src_format, is_resume):
4286
to_serializer = self.target_repo._format._serializer
4287
src_serializer = src_format._serializer
4289
if to_serializer == src_serializer:
4290
# If serializers match and the target is a pack repository, set the
4291
# write cache size on the new pack. This avoids poor performance
4292
# on transports where append is unbuffered (such as
4293
# RemoteTransport). This is safe to do because nothing should read
4294
# back from the target repository while a stream with matching
4295
# serialization is being inserted.
4296
# The exception is that a delta record from the source that should
4297
# be a fulltext may need to be expanded by the target (see
4298
# test_fetch_revisions_with_deltas_into_pack); but we take care to
4299
# explicitly flush any buffered writes first in that rare case.
4301
new_pack = self.target_repo._pack_collection._new_pack
4302
except AttributeError:
4303
# Not a pack repository
4306
new_pack.set_write_cache_size(1024*1024)
4307
for substream_type, substream in stream:
4308
if 'stream' in debug.debug_flags:
4309
mutter('inserting substream: %s', substream_type)
4310
if substream_type == 'texts':
4311
self.target_repo.texts.insert_record_stream(substream)
4312
elif substream_type == 'inventories':
4313
if src_serializer == to_serializer:
4314
self.target_repo.inventories.insert_record_stream(
4317
self._extract_and_insert_inventories(
4318
substream, src_serializer)
4319
elif substream_type == 'inventory-deltas':
4320
self._extract_and_insert_inventory_deltas(
4321
substream, src_serializer)
4322
elif substream_type == 'chk_bytes':
4323
# XXX: This doesn't support conversions, as it assumes the
4324
# conversion was done in the fetch code.
4325
self.target_repo.chk_bytes.insert_record_stream(substream)
4326
elif substream_type == 'revisions':
4327
# This may fallback to extract-and-insert more often than
4328
# required if the serializers are different only in terms of
4330
if src_serializer == to_serializer:
4331
self.target_repo.revisions.insert_record_stream(substream)
4333
self._extract_and_insert_revisions(substream,
4335
elif substream_type == 'signatures':
4336
self.target_repo.signatures.insert_record_stream(substream)
4338
raise AssertionError('kaboom! %s' % (substream_type,))
4339
# Done inserting data, and the missing_keys calculations will try to
4340
# read back from the inserted data, so flush the writes to the new pack
4341
# (if this is pack format).
4342
if new_pack is not None:
4343
new_pack._write_data('', flush=True)
4344
# Find all the new revisions (including ones from resume_tokens)
4345
missing_keys = self.target_repo.get_missing_parent_inventories(
4346
check_for_missing_texts=is_resume)
4348
for prefix, versioned_file in (
4349
('texts', self.target_repo.texts),
4350
('inventories', self.target_repo.inventories),
4351
('revisions', self.target_repo.revisions),
4352
('signatures', self.target_repo.signatures),
4353
('chk_bytes', self.target_repo.chk_bytes),
4355
if versioned_file is None:
4357
# TODO: key is often going to be a StaticTuple object
4358
# I don't believe we can define a method by which
4359
# (prefix,) + StaticTuple will work, though we could
4360
# define a StaticTuple.sq_concat that would allow you to
4361
# pass in either a tuple or a StaticTuple as the second
4362
# object, so instead we could have:
4363
# StaticTuple(prefix) + key here...
4364
missing_keys.update((prefix,) + key for key in
4365
versioned_file.get_missing_compression_parent_keys())
4366
except NotImplementedError:
4367
# cannot even attempt suspending, and missing would have failed
4368
# during stream insertion.
4369
missing_keys = set()
4372
# suspend the write group and tell the caller what we is
4373
# missing. We know we can suspend or else we would not have
4374
# entered this code path. (All repositories that can handle
4375
# missing keys can handle suspending a write group).
4376
write_group_tokens = self.target_repo.suspend_write_group()
4377
return write_group_tokens, missing_keys
4378
hint = self.target_repo.commit_write_group()
4379
if (to_serializer != src_serializer and
4380
self.target_repo._format.pack_compresses):
4381
self.target_repo.pack(hint=hint)
4384
def _extract_and_insert_inventory_deltas(self, substream, serializer):
4385
target_rich_root = self.target_repo._format.rich_root_data
4386
target_tree_refs = self.target_repo._format.supports_tree_reference
4387
for record in substream:
4388
# Insert the delta directly
4389
inventory_delta_bytes = record.get_bytes_as('fulltext')
4390
deserialiser = inventory_delta.InventoryDeltaDeserializer()
4392
parse_result = deserialiser.parse_text_bytes(
4393
inventory_delta_bytes)
4394
except inventory_delta.IncompatibleInventoryDelta, err:
4395
trace.mutter("Incompatible delta: %s", err.msg)
4396
raise errors.IncompatibleRevision(self.target_repo._format)
4397
basis_id, new_id, rich_root, tree_refs, inv_delta = parse_result
4398
revision_id = new_id
4399
parents = [key[0] for key in record.parents]
4400
self.target_repo.add_inventory_by_delta(
4401
basis_id, inv_delta, revision_id, parents)
4403
def _extract_and_insert_inventories(self, substream, serializer,
4405
"""Generate a new inventory versionedfile in target, converting data.
4407
The inventory is retrieved from the source, (deserializing it), and
4408
stored in the target (reserializing it in a different format).
4410
target_rich_root = self.target_repo._format.rich_root_data
4411
target_tree_refs = self.target_repo._format.supports_tree_reference
4412
for record in substream:
4413
# It's not a delta, so it must be a fulltext in the source
4414
# serializer's format.
4415
bytes = record.get_bytes_as('fulltext')
4416
revision_id = record.key[0]
4417
inv = serializer.read_inventory_from_string(bytes, revision_id)
4418
parents = [key[0] for key in record.parents]
4419
self.target_repo.add_inventory(revision_id, inv, parents)
4420
# No need to keep holding this full inv in memory when the rest of
4421
# the substream is likely to be all deltas.
4424
def _extract_and_insert_revisions(self, substream, serializer):
4425
for record in substream:
4426
bytes = record.get_bytes_as('fulltext')
4427
revision_id = record.key[0]
4428
rev = serializer.read_revision_from_string(bytes)
4429
if rev.revision_id != revision_id:
4430
raise AssertionError('wtf: %s != %s' % (rev, revision_id))
4431
self.target_repo.add_revision(revision_id, rev)
4434
if self.target_repo._format._fetch_reconcile:
4435
self.target_repo.reconcile()
4438
class StreamSource(object):
4439
"""A source of a stream for fetching between repositories."""
4441
def __init__(self, from_repository, to_format):
4442
"""Create a StreamSource streaming from from_repository."""
4443
self.from_repository = from_repository
4444
self.to_format = to_format
4445
self._record_counter = RecordCounter()
4447
def delta_on_metadata(self):
4448
"""Return True if delta's are permitted on metadata streams.
4450
That is on revisions and signatures.
4452
src_serializer = self.from_repository._format._serializer
4453
target_serializer = self.to_format._serializer
4454
return (self.to_format._fetch_uses_deltas and
4455
src_serializer == target_serializer)
4457
def _fetch_revision_texts(self, revs):
4458
# fetch signatures first and then the revision texts
4459
# may need to be a InterRevisionStore call here.
4460
from_sf = self.from_repository.signatures
4461
# A missing signature is just skipped.
4462
keys = [(rev_id,) for rev_id in revs]
4463
signatures = versionedfile.filter_absent(from_sf.get_record_stream(
4465
self.to_format._fetch_order,
4466
not self.to_format._fetch_uses_deltas))
4467
# If a revision has a delta, this is actually expanded inside the
4468
# insert_record_stream code now, which is an alternate fix for
4470
from_rf = self.from_repository.revisions
4471
revisions = from_rf.get_record_stream(
4473
self.to_format._fetch_order,
4474
not self.delta_on_metadata())
4475
return [('signatures', signatures), ('revisions', revisions)]
4477
def _generate_root_texts(self, revs):
4478
"""This will be called by get_stream between fetching weave texts and
4479
fetching the inventory weave.
4481
if self._rich_root_upgrade():
4482
return _mod_fetch.Inter1and2Helper(
4483
self.from_repository).generate_root_texts(revs)
4487
def get_stream(self, search):
4489
revs = search.get_keys()
4490
graph = self.from_repository.get_graph()
4491
revs = tsort.topo_sort(graph.get_parent_map(revs))
4492
data_to_fetch = self.from_repository.item_keys_introduced_by(revs)
4494
for knit_kind, file_id, revisions in data_to_fetch:
4495
if knit_kind != phase:
4497
# Make a new progress bar for this phase
4498
if knit_kind == "file":
4499
# Accumulate file texts
4500
text_keys.extend([(file_id, revision) for revision in
4502
elif knit_kind == "inventory":
4503
# Now copy the file texts.
4504
from_texts = self.from_repository.texts
4505
yield ('texts', from_texts.get_record_stream(
4506
text_keys, self.to_format._fetch_order,
4507
not self.to_format._fetch_uses_deltas))
4508
# Cause an error if a text occurs after we have done the
4511
# Before we process the inventory we generate the root
4512
# texts (if necessary) so that the inventories references
4514
for _ in self._generate_root_texts(revs):
4516
# we fetch only the referenced inventories because we do not
4517
# know for unselected inventories whether all their required
4518
# texts are present in the other repository - it could be
4520
for info in self._get_inventory_stream(revs):
4522
elif knit_kind == "signatures":
4523
# Nothing to do here; this will be taken care of when
4524
# _fetch_revision_texts happens.
4526
elif knit_kind == "revisions":
4527
for record in self._fetch_revision_texts(revs):
4530
raise AssertionError("Unknown knit kind %r" % knit_kind)
4532
def get_stream_for_missing_keys(self, missing_keys):
4533
# missing keys can only occur when we are byte copying and not
4534
# translating (because translation means we don't send
4535
# unreconstructable deltas ever).
4537
keys['texts'] = set()
4538
keys['revisions'] = set()
4539
keys['inventories'] = set()
4540
keys['chk_bytes'] = set()
4541
keys['signatures'] = set()
4542
for key in missing_keys:
4543
keys[key[0]].add(key[1:])
4544
if len(keys['revisions']):
4545
# If we allowed copying revisions at this point, we could end up
4546
# copying a revision without copying its required texts: a
4547
# violation of the requirements for repository integrity.
4548
raise AssertionError(
4549
'cannot copy revisions to fill in missing deltas %s' % (
4550
keys['revisions'],))
4551
for substream_kind, keys in keys.iteritems():
4552
vf = getattr(self.from_repository, substream_kind)
4553
if vf is None and keys:
4554
raise AssertionError(
4555
"cannot fill in keys for a versioned file we don't"
4556
" have: %s needs %s" % (substream_kind, keys))
4558
# No need to stream something we don't have
4560
if substream_kind == 'inventories':
4561
# Some missing keys are genuinely ghosts, filter those out.
4562
present = self.from_repository.inventories.get_parent_map(keys)
4563
revs = [key[0] for key in present]
4564
# Get the inventory stream more-or-less as we do for the
4565
# original stream; there's no reason to assume that records
4566
# direct from the source will be suitable for the sink. (Think
4567
# e.g. 2a -> 1.9-rich-root).
4568
for info in self._get_inventory_stream(revs, missing=True):
4572
# Ask for full texts always so that we don't need more round trips
4573
# after this stream.
4574
# Some of the missing keys are genuinely ghosts, so filter absent
4575
# records. The Sink is responsible for doing another check to
4576
# ensure that ghosts don't introduce missing data for future
4578
stream = versionedfile.filter_absent(vf.get_record_stream(keys,
4579
self.to_format._fetch_order, True))
4580
yield substream_kind, stream
4582
def inventory_fetch_order(self):
4583
if self._rich_root_upgrade():
4584
return 'topological'
4586
return self.to_format._fetch_order
4588
def _rich_root_upgrade(self):
4589
return (not self.from_repository._format.rich_root_data and
4590
self.to_format.rich_root_data)
4592
def _get_inventory_stream(self, revision_ids, missing=False):
4593
from_format = self.from_repository._format
4594
if (from_format.supports_chks and self.to_format.supports_chks and
4595
from_format.network_name() == self.to_format.network_name()):
4596
raise AssertionError(
4597
"this case should be handled by GroupCHKStreamSource")
4598
elif 'forceinvdeltas' in debug.debug_flags:
4599
return self._get_convertable_inventory_stream(revision_ids,
4600
delta_versus_null=missing)
4601
elif from_format.network_name() == self.to_format.network_name():
4603
return self._get_simple_inventory_stream(revision_ids,
4605
elif (not from_format.supports_chks and not self.to_format.supports_chks
4606
and from_format._serializer == self.to_format._serializer):
4607
# Essentially the same format.
4608
return self._get_simple_inventory_stream(revision_ids,
4611
# Any time we switch serializations, we want to use an
4612
# inventory-delta based approach.
4613
return self._get_convertable_inventory_stream(revision_ids,
4614
delta_versus_null=missing)
4616
def _get_simple_inventory_stream(self, revision_ids, missing=False):
4617
# NB: This currently reopens the inventory weave in source;
4618
# using a single stream interface instead would avoid this.
4619
from_weave = self.from_repository.inventories
4621
delta_closure = True
4623
delta_closure = not self.delta_on_metadata()
4624
yield ('inventories', from_weave.get_record_stream(
4625
[(rev_id,) for rev_id in revision_ids],
4626
self.inventory_fetch_order(), delta_closure))
4628
def _get_convertable_inventory_stream(self, revision_ids,
4629
delta_versus_null=False):
4630
# The two formats are sufficiently different that there is no fast
4631
# path, so we need to send just inventorydeltas, which any
4632
# sufficiently modern client can insert into any repository.
4633
# The StreamSink code expects to be able to
4634
# convert on the target, so we need to put bytes-on-the-wire that can
4635
# be converted. That means inventory deltas (if the remote is <1.19,
4636
# RemoteStreamSink will fallback to VFS to insert the deltas).
4637
yield ('inventory-deltas',
4638
self._stream_invs_as_deltas(revision_ids,
4639
delta_versus_null=delta_versus_null))
4641
def _stream_invs_as_deltas(self, revision_ids, delta_versus_null=False):
4642
"""Return a stream of inventory-deltas for the given rev ids.
4644
:param revision_ids: The list of inventories to transmit
4645
:param delta_versus_null: Don't try to find a minimal delta for this
4646
entry, instead compute the delta versus the NULL_REVISION. This
4647
effectively streams a complete inventory. Used for stuff like
4648
filling in missing parents, etc.
4650
from_repo = self.from_repository
4651
revision_keys = [(rev_id,) for rev_id in revision_ids]
4652
parent_map = from_repo.inventories.get_parent_map(revision_keys)
4653
# XXX: possibly repos could implement a more efficient iter_inv_deltas
4655
inventories = self.from_repository.iter_inventories(
4656
revision_ids, 'topological')
4657
format = from_repo._format
4658
invs_sent_so_far = set([_mod_revision.NULL_REVISION])
4659
inventory_cache = lru_cache.LRUCache(50)
4660
null_inventory = from_repo.revision_tree(
4661
_mod_revision.NULL_REVISION).inventory
4662
# XXX: ideally the rich-root/tree-refs flags would be per-revision, not
4663
# per-repo (e.g. streaming a non-rich-root revision out of a rich-root
4664
# repo back into a non-rich-root repo ought to be allowed)
4665
serializer = inventory_delta.InventoryDeltaSerializer(
4666
versioned_root=format.rich_root_data,
4667
tree_references=format.supports_tree_reference)
4668
for inv in inventories:
4669
key = (inv.revision_id,)
4670
parent_keys = parent_map.get(key, ())
4672
if not delta_versus_null and parent_keys:
4673
# The caller did not ask for complete inventories and we have
4674
# some parents that we can delta against. Make a delta against
4675
# each parent so that we can find the smallest.
4676
parent_ids = [parent_key[0] for parent_key in parent_keys]
4677
for parent_id in parent_ids:
4678
if parent_id not in invs_sent_so_far:
4679
# We don't know that the remote side has this basis, so
4682
if parent_id == _mod_revision.NULL_REVISION:
4683
parent_inv = null_inventory
4685
parent_inv = inventory_cache.get(parent_id, None)
4686
if parent_inv is None:
4687
parent_inv = from_repo.get_inventory(parent_id)
4688
candidate_delta = inv._make_delta(parent_inv)
4689
if (delta is None or
4690
len(delta) > len(candidate_delta)):
4691
delta = candidate_delta
4692
basis_id = parent_id
4694
# Either none of the parents ended up being suitable, or we
4695
# were asked to delta against NULL
4696
basis_id = _mod_revision.NULL_REVISION
4697
delta = inv._make_delta(null_inventory)
4698
invs_sent_so_far.add(inv.revision_id)
4699
inventory_cache[inv.revision_id] = inv
4700
delta_serialized = ''.join(
4701
serializer.delta_to_lines(basis_id, key[-1], delta))
4702
yield versionedfile.FulltextContentFactory(
4703
key, parent_keys, None, delta_serialized)
4706
1800
def _iter_for_revno(repo, partial_history_cache, stop_index=None,
4707
1801
stop_revision=None):
4708
1802
"""Extend the partial history to include a given index