345
208
commit to be valid, deletes against the basis MUST be recorded via
346
209
builder.record_delete().
348
self._recording_deletes = True
350
basis_id = self.parents[0]
352
basis_id = _mod_revision.NULL_REVISION
353
self.basis_delta_revision = basis_id
355
def record_entry_contents(self, ie, parent_invs, path, tree,
357
"""Record the content of ie from tree into the commit if needed.
359
Side effect: sets ie.revision when unchanged
361
:param ie: An inventory entry present in the commit.
362
:param parent_invs: The inventories of the parent revisions of the
364
:param path: The path the entry is at in the tree.
365
:param tree: The tree which contains this entry and should be used to
367
:param content_summary: Summary data from the tree about the paths
368
content - stat, length, exec, sha/link target. This is only
369
accessed when the entry has a revision of None - that is when it is
370
a candidate to commit.
371
:return: A tuple (change_delta, version_recorded, fs_hash).
372
change_delta is an inventory_delta change for this entry against
373
the basis tree of the commit, or None if no change occured against
375
version_recorded is True if a new version of the entry has been
376
recorded. For instance, committing a merge where a file was only
377
changed on the other side will return (delta, False).
378
fs_hash is either None, or the hash details for the path (currently
379
a tuple of the contents sha1 and the statvalue returned by
380
tree.get_file_with_stat()).
382
if self.new_inventory.root is None:
383
if ie.parent_id is not None:
384
raise errors.RootMissing()
385
self._check_root(ie, parent_invs, tree)
386
if ie.revision is None:
387
kind = content_summary[0]
389
# ie is carried over from a prior commit
391
# XXX: repository specific check for nested tree support goes here - if
392
# the repo doesn't want nested trees we skip it ?
393
if (kind == 'tree-reference' and
394
not self.repository._format.supports_tree_reference):
395
# mismatch between commit builder logic and repository:
396
# this needs the entry creation pushed down into the builder.
397
raise NotImplementedError('Missing repository subtree support.')
398
self.new_inventory.add(ie)
400
# TODO: slow, take it out of the inner loop.
402
basis_inv = parent_invs[0]
404
basis_inv = Inventory(root_id=None)
406
# ie.revision is always None if the InventoryEntry is considered
407
# for committing. We may record the previous parents revision if the
408
# content is actually unchanged against a sole head.
409
if ie.revision is not None:
410
if not self._versioned_root and path == '':
411
# repositories that do not version the root set the root's
412
# revision to the new commit even when no change occurs (more
413
# specifically, they do not record a revision on the root; and
414
# the rev id is assigned to the root during deserialisation -
415
# this masks when a change may have occurred against the basis.
416
# To match this we always issue a delta, because the revision
417
# of the root will always be changing.
418
if ie.file_id in basis_inv:
419
delta = (basis_inv.id2path(ie.file_id), path,
423
delta = (None, path, ie.file_id, ie)
424
self._basis_delta.append(delta)
425
return delta, False, None
427
# we don't need to commit this, because the caller already
428
# determined that an existing revision of this file is
429
# appropriate. If its not being considered for committing then
430
# it and all its parents to the root must be unaltered so
431
# no-change against the basis.
432
if ie.revision == self._new_revision_id:
433
raise AssertionError("Impossible situation, a skipped "
434
"inventory entry (%r) claims to be modified in this "
435
"commit (%r).", (ie, self._new_revision_id))
436
return None, False, None
437
# XXX: Friction: parent_candidates should return a list not a dict
438
# so that we don't have to walk the inventories again.
439
parent_candiate_entries = ie.parent_candidates(parent_invs)
440
head_set = self._heads(ie.file_id, parent_candiate_entries.keys())
442
for inv in parent_invs:
443
if ie.file_id in inv:
444
old_rev = inv[ie.file_id].revision
445
if old_rev in head_set:
446
heads.append(inv[ie.file_id].revision)
447
head_set.remove(inv[ie.file_id].revision)
450
# now we check to see if we need to write a new record to the
452
# We write a new entry unless there is one head to the ancestors, and
453
# the kind-derived content is unchanged.
455
# Cheapest check first: no ancestors, or more the one head in the
456
# ancestors, we write a new node.
460
# There is a single head, look it up for comparison
461
parent_entry = parent_candiate_entries[heads[0]]
462
# if the non-content specific data has changed, we'll be writing a
464
if (parent_entry.parent_id != ie.parent_id or
465
parent_entry.name != ie.name):
467
# now we need to do content specific checks:
469
# if the kind changed the content obviously has
470
if kind != parent_entry.kind:
472
# Stat cache fingerprint feedback for the caller - None as we usually
473
# don't generate one.
476
if content_summary[2] is None:
477
raise ValueError("Files must not have executable = None")
479
if (# if the file length changed we have to store:
480
parent_entry.text_size != content_summary[1] or
481
# if the exec bit has changed we have to store:
482
parent_entry.executable != content_summary[2]):
484
elif parent_entry.text_sha1 == content_summary[3]:
485
# all meta and content is unchanged (using a hash cache
486
# hit to check the sha)
487
ie.revision = parent_entry.revision
488
ie.text_size = parent_entry.text_size
489
ie.text_sha1 = parent_entry.text_sha1
490
ie.executable = parent_entry.executable
491
return self._get_delta(ie, basis_inv, path), False, None
493
# Either there is only a hash change(no hash cache entry,
494
# or same size content change), or there is no change on
496
# Provide the parent's hash to the store layer, so that the
497
# content is unchanged we will not store a new node.
498
nostore_sha = parent_entry.text_sha1
500
# We want to record a new node regardless of the presence or
501
# absence of a content change in the file.
503
ie.executable = content_summary[2]
504
file_obj, stat_value = tree.get_file_with_stat(ie.file_id, path)
506
lines = file_obj.readlines()
510
ie.text_sha1, ie.text_size = self._add_text_to_weave(
511
ie.file_id, lines, heads, nostore_sha)
512
# Let the caller know we generated a stat fingerprint.
513
fingerprint = (ie.text_sha1, stat_value)
514
except errors.ExistingContent:
515
# Turns out that the file content was unchanged, and we were
516
# only going to store a new node if it was changed. Carry over
518
ie.revision = parent_entry.revision
519
ie.text_size = parent_entry.text_size
520
ie.text_sha1 = parent_entry.text_sha1
521
ie.executable = parent_entry.executable
522
return self._get_delta(ie, basis_inv, path), False, None
523
elif kind == 'directory':
525
# all data is meta here, nothing specific to directory, so
527
ie.revision = parent_entry.revision
528
return self._get_delta(ie, basis_inv, path), False, None
530
self._add_text_to_weave(ie.file_id, lines, heads, None)
531
elif kind == 'symlink':
532
current_link_target = content_summary[3]
534
# symlink target is not generic metadata, check if it has
536
if current_link_target != parent_entry.symlink_target:
539
# unchanged, carry over.
540
ie.revision = parent_entry.revision
541
ie.symlink_target = parent_entry.symlink_target
542
return self._get_delta(ie, basis_inv, path), False, None
543
ie.symlink_target = current_link_target
545
self._add_text_to_weave(ie.file_id, lines, heads, None)
546
elif kind == 'tree-reference':
548
if content_summary[3] != parent_entry.reference_revision:
551
# unchanged, carry over.
552
ie.reference_revision = parent_entry.reference_revision
553
ie.revision = parent_entry.revision
554
return self._get_delta(ie, basis_inv, path), False, None
555
ie.reference_revision = content_summary[3]
557
self._add_text_to_weave(ie.file_id, lines, heads, None)
559
raise NotImplementedError('unknown kind')
560
ie.revision = self._new_revision_id
561
self._any_changes = True
562
return self._get_delta(ie, basis_inv, path), True, fingerprint
564
def record_iter_changes(self, tree, basis_revision_id, iter_changes,
565
_entry_factory=entry_factory):
211
raise NotImplementedError(self.will_record_deletes)
213
def record_iter_changes(self, tree, basis_revision_id, iter_changes):
566
214
"""Record a new tree via iter_changes.
568
216
:param tree: The tree to obtain text contents from for changed objects.
570
218
has been generated against. Currently assumed to be the same
571
219
as self.parents[0] - if it is not, errors may occur.
572
220
:param iter_changes: An iter_changes iterator with the changes to apply
573
to basis_revision_id.
574
:param _entry_factory: Private method to bind entry_factory locally for
578
# Create an inventory delta based on deltas between all the parents and
579
# deltas between all the parent inventories. We use inventory delta's
580
# between the inventory objects because iter_changes masks
581
# last-changed-field only changes.
583
# file_id -> change map, change is fileid, paths, changed, versioneds,
584
# parents, names, kinds, executables
586
# {file_id -> revision_id -> inventory entry, for entries in parent
587
# trees that are not parents[0]
589
revtrees = list(self.repository.revision_trees(self.parents))
590
# The basis inventory from a repository
592
basis_inv = revtrees[0].inventory
594
basis_inv = self.repository.revision_tree(
595
_mod_revision.NULL_REVISION).inventory
596
if len(self.parents) > 0:
597
if basis_revision_id != self.parents[0]:
599
"arbitrary basis parents not yet supported with merges")
600
for revtree in revtrees[1:]:
601
for change in revtree.inventory._make_delta(basis_inv):
602
if change[1] is None:
603
# Not present in this parent.
605
if change[2] not in merged_ids:
606
if change[0] is not None:
607
merged_ids[change[2]] = [
608
basis_inv[change[2]].revision,
611
merged_ids[change[2]] = [change[3].revision]
612
parent_entries[change[2]] = {change[3].revision:change[3]}
614
merged_ids[change[2]].append(change[3].revision)
615
parent_entries[change[2]][change[3].revision] = change[3]
618
# Setup the changes from the tree:
619
# changes maps file_id -> (change, [parent revision_ids])
621
for change in iter_changes:
622
# This probably looks up in basis_inv way to much.
623
if change[1][0] is not None:
624
head_candidate = [basis_inv[change[0]].revision]
627
changes[change[0]] = change, merged_ids.get(change[0],
629
unchanged_merged = set(merged_ids) - set(changes)
630
# Extend the changes dict with synthetic changes to record merges of
632
for file_id in unchanged_merged:
633
# Record a merged version of these items that did not change vs the
634
# basis. This can be either identical parallel changes, or a revert
635
# of a specific file after a merge. The recorded content will be
636
# that of the current tree (which is the same as the basis), but
637
# the per-file graph will reflect a merge.
638
# NB:XXX: We are reconstructing path information we had, this
639
# should be preserved instead.
640
# inv delta change: (file_id, (path_in_source, path_in_target),
641
# changed_content, versioned, parent, name, kind,
643
basis_entry = basis_inv[file_id]
645
(basis_inv.id2path(file_id), tree.id2path(file_id)),
647
(basis_entry.parent_id, basis_entry.parent_id),
648
(basis_entry.name, basis_entry.name),
649
(basis_entry.kind, basis_entry.kind),
650
(basis_entry.executable, basis_entry.executable))
651
changes[file_id] = (change, merged_ids[file_id])
652
# changes contains tuples with the change and a set of inventory
653
# candidates for the file.
655
# old_path, new_path, file_id, new_inventory_entry
656
seen_root = False # Is the root in the basis delta?
657
inv_delta = self._basis_delta
658
modified_rev = self._new_revision_id
659
for change, head_candidates in changes.values():
660
if change[3][1]: # versioned in target.
661
# Several things may be happening here:
662
# We may have a fork in the per-file graph
663
# - record a change with the content from tree
664
# We may have a change against < all trees
665
# - carry over the tree that hasn't changed
666
# We may have a change against all trees
667
# - record the change with the content from tree
670
entry = _entry_factory[kind](file_id, change[5][1],
672
head_set = self._heads(change[0], set(head_candidates))
675
for head_candidate in head_candidates:
676
if head_candidate in head_set:
677
heads.append(head_candidate)
678
head_set.remove(head_candidate)
681
# Could be a carry-over situation:
682
parent_entry_revs = parent_entries.get(file_id, None)
683
if parent_entry_revs:
684
parent_entry = parent_entry_revs.get(heads[0], None)
687
if parent_entry is None:
688
# The parent iter_changes was called against is the one
689
# that is the per-file head, so any change is relevant
690
# iter_changes is valid.
691
carry_over_possible = False
693
# could be a carry over situation
694
# A change against the basis may just indicate a merge,
695
# we need to check the content against the source of the
696
# merge to determine if it was changed after the merge
698
if (parent_entry.kind != entry.kind or
699
parent_entry.parent_id != entry.parent_id or
700
parent_entry.name != entry.name):
701
# Metadata common to all entries has changed
702
# against per-file parent
703
carry_over_possible = False
705
carry_over_possible = True
706
# per-type checks for changes against the parent_entry
709
# Cannot be a carry-over situation
710
carry_over_possible = False
711
# Populate the entry in the delta
713
# XXX: There is still a small race here: If someone reverts the content of a file
714
# after iter_changes examines and decides it has changed,
715
# we will unconditionally record a new version even if some
716
# other process reverts it while commit is running (with
717
# the revert happening after iter_changes did it's
720
entry.executable = True
722
entry.executable = False
723
if (carry_over_possible and
724
parent_entry.executable == entry.executable):
725
# Check the file length, content hash after reading
727
nostore_sha = parent_entry.text_sha1
730
file_obj, stat_value = tree.get_file_with_stat(file_id, change[1][1])
732
lines = file_obj.readlines()
736
entry.text_sha1, entry.text_size = self._add_text_to_weave(
737
file_id, lines, heads, nostore_sha)
738
except errors.ExistingContent:
739
# No content change against a carry_over parent
741
entry.text_size = parent_entry.text_size
742
entry.text_sha1 = parent_entry.text_sha1
743
elif kind == 'symlink':
745
entry.symlink_target = tree.get_symlink_target(file_id)
746
if (carry_over_possible and
747
parent_entry.symlink_target == entry.symlink_target):
750
self._add_text_to_weave(change[0], [], heads, None)
751
elif kind == 'directory':
752
if carry_over_possible:
755
# Nothing to set on the entry.
756
# XXX: split into the Root and nonRoot versions.
757
if change[1][1] != '' or self.repository.supports_rich_root():
758
self._add_text_to_weave(change[0], [], heads, None)
759
elif kind == 'tree-reference':
760
raise AssertionError('unknown kind %r' % kind)
762
raise AssertionError('unknown kind %r' % kind)
764
entry.revision = modified_rev
766
entry.revision = parent_entry.revision
769
new_path = change[1][1]
770
inv_delta.append((change[1][0], new_path, change[0], entry))
773
self.new_inventory = None
775
self._any_changes = True
777
# housekeeping root entry changes do not affect no-change commits.
778
self._require_root_change(tree)
779
self.basis_delta_revision = basis_revision_id
781
def _add_text_to_weave(self, file_id, new_lines, parents, nostore_sha):
782
# Note: as we read the content directly from the tree, we know its not
783
# been turned into unicode or badly split - but a broken tree
784
# implementation could give us bad output from readlines() so this is
785
# not a guarantee of safety. What would be better is always checking
786
# the content during test suite execution. RBC 20070912
787
parent_keys = tuple((file_id, parent) for parent in parents)
788
return self.repository.texts.add_lines(
789
(file_id, self._new_revision_id), parent_keys, new_lines,
790
nostore_sha=nostore_sha, random_id=self.random_revid,
791
check_content=False)[0:2]
794
class RootCommitBuilder(CommitBuilder):
795
"""This commitbuilder actually records the root id"""
797
# the root entry gets versioned properly by this builder.
798
_versioned_root = True
800
def _check_root(self, ie, parent_invs, tree):
801
"""Helper for record_entry_contents.
803
:param ie: An entry being added.
804
:param parent_invs: The inventories of the parent revisions of the
806
:param tree: The tree that is being committed.
809
def _require_root_change(self, tree):
810
"""Enforce an appropriate root object change.
812
This is called once when record_iter_changes is called, if and only if
813
the root was not in the delta calculated by record_iter_changes.
815
:param tree: The tree which is being committed.
817
# versioned roots do not change unless the tree found a change.
221
to basis_revision_id. The iterator must not include any items with
222
a current kind of None - missing items must be either filtered out
223
or errored-on beefore record_iter_changes sees the item.
224
:return: A generator of (file_id, relpath, fs_hash) tuples for use with
227
raise NotImplementedError(self.record_iter_changes)
230
class RepositoryWriteLockResult(LogicalLockResult):
231
"""The result of write locking a repository.
233
:ivar repository_token: The token obtained from the underlying lock, or
235
:ivar unlock: A callable which will unlock the lock.
238
def __init__(self, unlock, repository_token):
239
LogicalLockResult.__init__(self, unlock)
240
self.repository_token = repository_token
243
return "RepositoryWriteLockResult(%s, %s)" % (self.repository_token,
820
247
######################################################################
823
class Repository(object):
251
class Repository(_RelockDebugMixin, controldir.ControlComponent):
824
252
"""Repository holding history for one or more branches.
826
254
The repository holds and retrieves historical information including
827
255
revisions and file history. It's normally accessed only by the Branch,
828
256
which views a particular line of development through that history.
830
The Repository builds on top of some byte storage facilies (the revisions,
831
signatures, inventories, texts and chk_bytes attributes) and a Transport,
832
which respectively provide byte storage and a means to access the (possibly
835
The byte storage facilities are addressed via tuples, which we refer to
836
as 'keys' throughout the code base. Revision_keys, inventory_keys and
837
signature_keys are all 1-tuples: (revision_id,). text_keys are two-tuples:
838
(file_id, revision_id). chk_bytes uses CHK keys - a 1-tuple with a single
839
byte string made up of a hash identifier and a hash value.
840
We use this interface because it allows low friction with the underlying
841
code that implements disk indices, network encoding and other parts of
844
:ivar revisions: A bzrlib.versionedfile.VersionedFiles instance containing
845
the serialised revisions for the repository. This can be used to obtain
846
revision graph information or to access raw serialised revisions.
847
The result of trying to insert data into the repository via this store
848
is undefined: it should be considered read-only except for implementors
850
:ivar signatures: A bzrlib.versionedfile.VersionedFiles instance containing
851
the serialised signatures for the repository. This can be used to
852
obtain access to raw serialised signatures. The result of trying to
853
insert data into the repository via this store is undefined: it should
854
be considered read-only except for implementors of repositories.
855
:ivar inventories: A bzrlib.versionedfile.VersionedFiles instance containing
856
the serialised inventories for the repository. This can be used to
857
obtain unserialised inventories. The result of trying to insert data
858
into the repository via this store is undefined: it should be
859
considered read-only except for implementors of repositories.
860
:ivar texts: A bzrlib.versionedfile.VersionedFiles instance containing the
861
texts of files and directories for the repository. This can be used to
862
obtain file texts or file graphs. Note that Repository.iter_file_bytes
863
is usually a better interface for accessing file texts.
864
The result of trying to insert data into the repository via this store
865
is undefined: it should be considered read-only except for implementors
867
:ivar chk_bytes: A bzrlib.versionedfile.VersioedFiles instance containing
868
any data the repository chooses to store or have indexed by its hash.
869
The result of trying to insert data into the repository via this store
870
is undefined: it should be considered read-only except for implementors
872
:ivar _transport: Transport for file access to repository, typically
873
pointing to .bzr/repository.
258
See VersionedFileRepository in bzrlib.vf_repository for the
259
base class for most Bazaar repositories.
876
# What class to use for a CommitBuilder. Often its simpler to change this
877
# in a Repository class subclass rather than to override
878
# get_commit_builder.
879
_commit_builder_class = CommitBuilder
880
# The search regex used by xml based repositories to determine what things
881
# where changed in a single commit.
882
_file_ids_altered_regex = lazy_regex.lazy_compile(
883
r'file_id="(?P<file_id>[^"]+)"'
884
r'.* revision="(?P<revision_id>[^"]+)"'
887
262
def abort_write_group(self, suppress_errors=False):
888
263
"""Commit the contents accrued within the current write group.
949
319
return InterRepository._assert_same_model(self, repository)
951
def add_inventory(self, revision_id, inv, parents):
952
"""Add the inventory inv to the repository as revision_id.
954
:param parents: The revision ids of the parents that revision_id
955
is known to have and are in the repository already.
957
:returns: The validator(which is a sha1 digest, though what is sha'd is
958
repository format specific) of the serialized inventory.
960
if not self.is_in_write_group():
961
raise AssertionError("%r not in write group" % (self,))
962
_mod_revision.check_not_reserved_id(revision_id)
963
if not (inv.revision_id is None or inv.revision_id == revision_id):
964
raise AssertionError(
965
"Mismatch between inventory revision"
966
" id and insertion revid (%r, %r)"
967
% (inv.revision_id, revision_id))
969
raise AssertionError()
970
return self._add_inventory_checked(revision_id, inv, parents)
972
def _add_inventory_checked(self, revision_id, inv, parents):
973
"""Add inv to the repository after checking the inputs.
975
This function can be overridden to allow different inventory styles.
977
:seealso: add_inventory, for the contract.
979
inv_lines = self._serialise_inventory_to_lines(inv)
980
return self._inventory_add_lines(revision_id, parents,
981
inv_lines, check_content=False)
983
def add_inventory_by_delta(self, basis_revision_id, delta, new_revision_id,
984
parents, basis_inv=None, propagate_caches=False):
985
"""Add a new inventory expressed as a delta against another revision.
987
:param basis_revision_id: The inventory id the delta was created
988
against. (This does not have to be a direct parent.)
989
:param delta: The inventory delta (see Inventory.apply_delta for
991
:param new_revision_id: The revision id that the inventory is being
993
:param parents: The revision ids of the parents that revision_id is
994
known to have and are in the repository already. These are supplied
995
for repositories that depend on the inventory graph for revision
996
graph access, as well as for those that pun ancestry with delta
998
:param basis_inv: The basis inventory if it is already known,
1000
:param propagate_caches: If True, the caches for this inventory are
1001
copied to and updated for the result if possible.
1003
:returns: (validator, new_inv)
1004
The validator(which is a sha1 digest, though what is sha'd is
1005
repository format specific) of the serialized inventory, and the
1006
resulting inventory.
1008
if not self.is_in_write_group():
1009
raise AssertionError("%r not in write group" % (self,))
1010
_mod_revision.check_not_reserved_id(new_revision_id)
1011
basis_tree = self.revision_tree(basis_revision_id)
1012
basis_tree.lock_read()
1014
# Note that this mutates the inventory of basis_tree, which not all
1015
# inventory implementations may support: A better idiom would be to
1016
# return a new inventory, but as there is no revision tree cache in
1017
# repository this is safe for now - RBC 20081013
1018
if basis_inv is None:
1019
basis_inv = basis_tree.inventory
1020
basis_inv.apply_delta(delta)
1021
basis_inv.revision_id = new_revision_id
1022
return (self.add_inventory(new_revision_id, basis_inv, parents),
1027
def _inventory_add_lines(self, revision_id, parents, lines,
1028
check_content=True):
1029
"""Store lines in inv_vf and return the sha1 of the inventory."""
1030
parents = [(parent,) for parent in parents]
1031
return self.inventories.add_lines((revision_id,), parents, lines,
1032
check_content=check_content)[0]
1034
def add_revision(self, revision_id, rev, inv=None, config=None):
1035
"""Add rev to the revision store as revision_id.
1037
:param revision_id: the revision id to use.
1038
:param rev: The revision object.
1039
:param inv: The inventory for the revision. if None, it will be looked
1040
up in the inventory storer
1041
:param config: If None no digital signature will be created.
1042
If supplied its signature_needed method will be used
1043
to determine if a signature should be made.
1045
# TODO: jam 20070210 Shouldn't we check rev.revision_id and
1047
_mod_revision.check_not_reserved_id(revision_id)
1048
if config is not None and config.signature_needed():
1050
inv = self.get_inventory(revision_id)
1051
plaintext = Testament(rev, inv).as_short_text()
1052
self.store_revision_signature(
1053
gpg.GPGStrategy(config), plaintext, revision_id)
1054
# check inventory present
1055
if not self.inventories.get_parent_map([(revision_id,)]):
1057
raise errors.WeaveRevisionNotPresent(revision_id,
1060
# yes, this is not suitable for adding with ghosts.
1061
rev.inventory_sha1 = self.add_inventory(revision_id, inv,
1064
key = (revision_id,)
1065
rev.inventory_sha1 = self.inventories.get_sha1s([key])[key]
1066
self._add_revision(rev)
1068
def _add_revision(self, revision):
1069
text = self._serializer.write_revision_to_string(revision)
1070
key = (revision.revision_id,)
1071
parents = tuple((parent,) for parent in revision.parent_ids)
1072
self.revisions.add_lines(key, parents, osutils.split_lines(text))
1074
321
def all_revision_ids(self):
1075
322
"""Returns a list of all the revision ids in the repository.
1693
909
signature = gpg_strategy.sign(plaintext)
1694
910
self.add_signature_text(revision_id, signature)
1697
912
def add_signature_text(self, revision_id, signature):
1698
self.signatures.add_lines((revision_id,), (),
1699
osutils.split_lines(signature))
1701
def find_text_key_references(self):
1702
"""Find the text key references within the repository.
1704
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
1705
to whether they were referred to by the inventory of the
1706
revision_id that they contain. The inventory texts from all present
1707
revision ids are assessed to generate this report.
1709
revision_keys = self.revisions.keys()
1710
w = self.inventories
1711
pb = ui.ui_factory.nested_progress_bar()
1713
return self._find_text_key_references_from_xml_inventory_lines(
1714
w.iter_lines_added_or_present_in_keys(revision_keys, pb=pb))
1718
def _find_text_key_references_from_xml_inventory_lines(self,
1720
"""Core routine for extracting references to texts from inventories.
1722
This performs the translation of xml lines to revision ids.
1724
:param line_iterator: An iterator of lines, origin_version_id
1725
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
1726
to whether they were referred to by the inventory of the
1727
revision_id that they contain. Note that if that revision_id was
1728
not part of the line_iterator's output then False will be given -
1729
even though it may actually refer to that key.
1731
if not self._serializer.support_altered_by_hack:
1732
raise AssertionError(
1733
"_find_text_key_references_from_xml_inventory_lines only "
1734
"supported for branches which store inventory as unnested xml"
1735
", not on %r" % self)
1738
# this code needs to read every new line in every inventory for the
1739
# inventories [revision_ids]. Seeing a line twice is ok. Seeing a line
1740
# not present in one of those inventories is unnecessary but not
1741
# harmful because we are filtering by the revision id marker in the
1742
# inventory lines : we only select file ids altered in one of those
1743
# revisions. We don't need to see all lines in the inventory because
1744
# only those added in an inventory in rev X can contain a revision=X
1746
unescape_revid_cache = {}
1747
unescape_fileid_cache = {}
1749
# jam 20061218 In a big fetch, this handles hundreds of thousands
1750
# of lines, so it has had a lot of inlining and optimizing done.
1751
# Sorry that it is a little bit messy.
1752
# Move several functions to be local variables, since this is a long
1754
search = self._file_ids_altered_regex.search
1755
unescape = _unescape_xml
1756
setdefault = result.setdefault
1757
for line, line_key in line_iterator:
1758
match = search(line)
1761
# One call to match.group() returning multiple items is quite a
1762
# bit faster than 2 calls to match.group() each returning 1
1763
file_id, revision_id = match.group('file_id', 'revision_id')
1765
# Inlining the cache lookups helps a lot when you make 170,000
1766
# lines and 350k ids, versus 8.4 unique ids.
1767
# Using a cache helps in 2 ways:
1768
# 1) Avoids unnecessary decoding calls
1769
# 2) Re-uses cached strings, which helps in future set and
1771
# (2) is enough that removing encoding entirely along with
1772
# the cache (so we are using plain strings) results in no
1773
# performance improvement.
1775
revision_id = unescape_revid_cache[revision_id]
1777
unescaped = unescape(revision_id)
1778
unescape_revid_cache[revision_id] = unescaped
1779
revision_id = unescaped
1781
# Note that unconditionally unescaping means that we deserialise
1782
# every fileid, which for general 'pull' is not great, but we don't
1783
# really want to have some many fulltexts that this matters anyway.
1786
file_id = unescape_fileid_cache[file_id]
1788
unescaped = unescape(file_id)
1789
unescape_fileid_cache[file_id] = unescaped
1792
key = (file_id, revision_id)
1793
setdefault(key, False)
1794
if revision_id == line_key[-1]:
1798
def _inventory_xml_lines_for_keys(self, keys):
1799
"""Get a line iterator of the sort needed for findind references.
1801
Not relevant for non-xml inventory repositories.
1803
Ghosts in revision_keys are ignored.
1805
:param revision_keys: The revision keys for the inventories to inspect.
1806
:return: An iterator over (inventory line, revid) for the fulltexts of
1807
all of the xml inventories specified by revision_keys.
1809
stream = self.inventories.get_record_stream(keys, 'unordered', True)
1810
for record in stream:
1811
if record.storage_kind != 'absent':
1812
chunks = record.get_bytes_as('chunked')
1813
revid = record.key[-1]
1814
lines = osutils.chunks_to_lines(chunks)
1818
def _find_file_ids_from_xml_inventory_lines(self, line_iterator,
1820
"""Helper routine for fileids_altered_by_revision_ids.
1822
This performs the translation of xml lines to revision ids.
1824
:param line_iterator: An iterator of lines, origin_version_id
1825
:param revision_ids: The revision ids to filter for. This should be a
1826
set or other type which supports efficient __contains__ lookups, as
1827
the revision id from each parsed line will be looked up in the
1828
revision_ids filter.
1829
:return: a dictionary mapping altered file-ids to an iterable of
1830
revision_ids. Each altered file-ids has the exact revision_ids that
1831
altered it listed explicitly.
1833
seen = set(self._find_text_key_references_from_xml_inventory_lines(
1834
line_iterator).iterkeys())
1835
# Note that revision_ids are revision keys.
1836
parent_maps = self.revisions.get_parent_map(revision_ids)
1838
map(parents.update, parent_maps.itervalues())
1839
parents.difference_update(revision_ids)
1840
parent_seen = set(self._find_text_key_references_from_xml_inventory_lines(
1841
self._inventory_xml_lines_for_keys(parents)))
1842
new_keys = seen - parent_seen
1844
setdefault = result.setdefault
1845
for key in new_keys:
1846
setdefault(key[0], set()).add(key[-1])
1849
def fileids_altered_by_revision_ids(self, revision_ids, _inv_weave=None):
1850
"""Find the file ids and versions affected by revisions.
1852
:param revisions: an iterable containing revision ids.
1853
:param _inv_weave: The inventory weave from this repository or None.
1854
If None, the inventory weave will be opened automatically.
1855
:return: a dictionary mapping altered file-ids to an iterable of
1856
revision_ids. Each altered file-ids has the exact revision_ids that
1857
altered it listed explicitly.
1859
selected_keys = set((revid,) for revid in revision_ids)
1860
w = _inv_weave or self.inventories
1861
pb = ui.ui_factory.nested_progress_bar()
1863
return self._find_file_ids_from_xml_inventory_lines(
1864
w.iter_lines_added_or_present_in_keys(
1865
selected_keys, pb=pb),
913
"""Store a signature text for a revision.
915
:param revision_id: Revision id of the revision
916
:param signature: Signature text.
918
raise NotImplementedError(self.add_signature_text)
920
def _find_parent_ids_of_revisions(self, revision_ids):
921
"""Find all parent ids that are mentioned in the revision graph.
923
:return: set of revisions that are parents of revision_ids which are
924
not part of revision_ids themselves
926
parent_map = self.get_parent_map(revision_ids)
928
map(parent_ids.update, parent_map.itervalues())
929
parent_ids.difference_update(revision_ids)
930
parent_ids.discard(_mod_revision.NULL_REVISION)
1870
933
def iter_files_bytes(self, desired_files):
1871
934
"""Iterate through file versions.
1878
941
uniquely identify the file version in the caller's context. (Examples:
1879
942
an index number or a TreeTransform trans_id.)
1881
bytes_iterator is an iterable of bytestrings for the file. The
1882
kind of iterable and length of the bytestrings are unspecified, but for
1883
this implementation, it is a list of bytes produced by
1884
VersionedFile.get_record_stream().
1886
944
:param desired_files: a list of (file_id, revision_id, identifier)
1890
for file_id, revision_id, callable_data in desired_files:
1891
text_keys[(file_id, revision_id)] = callable_data
1892
for record in self.texts.get_record_stream(text_keys, 'unordered', True):
1893
if record.storage_kind == 'absent':
1894
raise errors.RevisionNotPresent(record.key, self)
1895
yield text_keys[record.key], record.get_bytes_as('chunked')
1897
def _generate_text_key_index(self, text_key_references=None,
1899
"""Generate a new text key index for the repository.
1901
This is an expensive function that will take considerable time to run.
1903
:return: A dict mapping text keys ((file_id, revision_id) tuples) to a
1904
list of parents, also text keys. When a given key has no parents,
1905
the parents list will be [NULL_REVISION].
1907
# All revisions, to find inventory parents.
1908
if ancestors is None:
1909
graph = self.get_graph()
1910
ancestors = graph.get_parent_map(self.all_revision_ids())
1911
if text_key_references is None:
1912
text_key_references = self.find_text_key_references()
1913
pb = ui.ui_factory.nested_progress_bar()
1915
return self._do_generate_text_key_index(ancestors,
1916
text_key_references, pb)
1920
def _do_generate_text_key_index(self, ancestors, text_key_references, pb):
1921
"""Helper for _generate_text_key_index to avoid deep nesting."""
1922
revision_order = tsort.topo_sort(ancestors)
1923
invalid_keys = set()
1925
for revision_id in revision_order:
1926
revision_keys[revision_id] = set()
1927
text_count = len(text_key_references)
1928
# a cache of the text keys to allow reuse; costs a dict of all the
1929
# keys, but saves a 2-tuple for every child of a given key.
1931
for text_key, valid in text_key_references.iteritems():
1933
invalid_keys.add(text_key)
1935
revision_keys[text_key[1]].add(text_key)
1936
text_key_cache[text_key] = text_key
1937
del text_key_references
1939
text_graph = graph.Graph(graph.DictParentsProvider(text_index))
1940
NULL_REVISION = _mod_revision.NULL_REVISION
1941
# Set a cache with a size of 10 - this suffices for bzr.dev but may be
1942
# too small for large or very branchy trees. However, for 55K path
1943
# trees, it would be easy to use too much memory trivially. Ideally we
1944
# could gauge this by looking at available real memory etc, but this is
1945
# always a tricky proposition.
1946
inventory_cache = lru_cache.LRUCache(10)
1947
batch_size = 10 # should be ~150MB on a 55K path tree
1948
batch_count = len(revision_order) / batch_size + 1
1950
pb.update("Calculating text parents", processed_texts, text_count)
1951
for offset in xrange(batch_count):
1952
to_query = revision_order[offset * batch_size:(offset + 1) *
1956
for rev_tree in self.revision_trees(to_query):
1957
revision_id = rev_tree.get_revision_id()
1958
parent_ids = ancestors[revision_id]
1959
for text_key in revision_keys[revision_id]:
1960
pb.update("Calculating text parents", processed_texts)
1961
processed_texts += 1
1962
candidate_parents = []
1963
for parent_id in parent_ids:
1964
parent_text_key = (text_key[0], parent_id)
1966
check_parent = parent_text_key not in \
1967
revision_keys[parent_id]
1969
# the parent parent_id is a ghost:
1970
check_parent = False
1971
# truncate the derived graph against this ghost.
1972
parent_text_key = None
1974
# look at the parent commit details inventories to
1975
# determine possible candidates in the per file graph.
1978
inv = inventory_cache[parent_id]
1980
inv = self.revision_tree(parent_id).inventory
1981
inventory_cache[parent_id] = inv
1983
parent_entry = inv[text_key[0]]
1984
except (KeyError, errors.NoSuchId):
1986
if parent_entry is not None:
1988
text_key[0], parent_entry.revision)
1990
parent_text_key = None
1991
if parent_text_key is not None:
1992
candidate_parents.append(
1993
text_key_cache[parent_text_key])
1994
parent_heads = text_graph.heads(candidate_parents)
1995
new_parents = list(parent_heads)
1996
new_parents.sort(key=lambda x:candidate_parents.index(x))
1997
if new_parents == []:
1998
new_parents = [NULL_REVISION]
1999
text_index[text_key] = new_parents
2001
for text_key in invalid_keys:
2002
text_index[text_key] = [NULL_REVISION]
2005
def item_keys_introduced_by(self, revision_ids, _files_pb=None):
2006
"""Get an iterable listing the keys of all the data introduced by a set
2009
The keys will be ordered so that the corresponding items can be safely
2010
fetched and inserted in that order.
2012
:returns: An iterable producing tuples of (knit-kind, file-id,
2013
versions). knit-kind is one of 'file', 'inventory', 'signatures',
2014
'revisions'. file-id is None unless knit-kind is 'file'.
2016
for result in self._find_file_keys_to_fetch(revision_ids, _files_pb):
2019
for result in self._find_non_file_keys_to_fetch(revision_ids):
2022
def _find_file_keys_to_fetch(self, revision_ids, pb):
2023
# XXX: it's a bit weird to control the inventory weave caching in this
2024
# generator. Ideally the caching would be done in fetch.py I think. Or
2025
# maybe this generator should explicitly have the contract that it
2026
# should not be iterated until the previously yielded item has been
2028
inv_w = self.inventories
2030
# file ids that changed
2031
file_ids = self.fileids_altered_by_revision_ids(revision_ids, inv_w)
2033
num_file_ids = len(file_ids)
2034
for file_id, altered_versions in file_ids.iteritems():
2036
pb.update("fetch texts", count, num_file_ids)
2038
yield ("file", file_id, altered_versions)
2040
def _find_non_file_keys_to_fetch(self, revision_ids):
2042
yield ("inventory", None, revision_ids)
2045
# XXX: Note ATM no callers actually pay attention to this return
2046
# instead they just use the list of revision ids and ignore
2047
# missing sigs. Consider removing this work entirely
2048
revisions_with_signatures = set(self.signatures.get_parent_map(
2049
[(r,) for r in revision_ids]))
2050
revisions_with_signatures = set(
2051
[r for (r,) in revisions_with_signatures])
2052
revisions_with_signatures.intersection_update(revision_ids)
2053
yield ("signatures", None, revisions_with_signatures)
2056
yield ("revisions", None, revision_ids)
2059
def get_inventory(self, revision_id):
2060
"""Get Inventory object by revision id."""
2061
return self.iter_inventories([revision_id]).next()
2063
def iter_inventories(self, revision_ids):
2064
"""Get many inventories by revision_ids.
2066
This will buffer some or all of the texts used in constructing the
2067
inventories in memory, but will only parse a single inventory at a
2070
:param revision_ids: The expected revision ids of the inventories.
2071
:return: An iterator of inventories.
2073
if ((None in revision_ids)
2074
or (_mod_revision.NULL_REVISION in revision_ids)):
2075
raise ValueError('cannot get null revision inventory')
2076
return self._iter_inventories(revision_ids)
2078
def _iter_inventories(self, revision_ids):
2079
"""single-document based inventory iteration."""
2080
for text, revision_id in self._iter_inventory_xmls(revision_ids):
2081
yield self.deserialise_inventory(revision_id, text)
2083
def _iter_inventory_xmls(self, revision_ids):
2084
keys = [(revision_id,) for revision_id in revision_ids]
2085
stream = self.inventories.get_record_stream(keys, 'unordered', True)
2087
for record in stream:
2088
if record.storage_kind != 'absent':
2089
text_chunks[record.key] = record.get_bytes_as('chunked')
2091
raise errors.NoSuchRevision(self, record.key)
2093
chunks = text_chunks.pop(key)
2094
yield ''.join(chunks), key[-1]
2096
def deserialise_inventory(self, revision_id, xml):
2097
"""Transform the xml into an inventory object.
2099
:param revision_id: The expected revision id of the inventory.
2100
:param xml: A serialised inventory.
2102
result = self._serializer.read_inventory_from_string(xml, revision_id,
2103
entry_cache=self._inventory_entry_cache)
2104
if result.revision_id != revision_id:
2105
raise AssertionError('revision id mismatch %s != %s' % (
2106
result.revision_id, revision_id))
2109
def serialise_inventory(self, inv):
2110
return self._serializer.write_inventory_to_string(inv)
2112
def _serialise_inventory_to_lines(self, inv):
2113
return self._serializer.write_inventory_to_lines(inv)
2115
def get_serializer_format(self):
2116
return self._serializer.format_num
2119
def get_inventory_xml(self, revision_id):
2120
"""Get inventory XML as a file object."""
2121
texts = self._iter_inventory_xmls([revision_id])
2123
text, revision_id = texts.next()
2124
except StopIteration:
2125
raise errors.HistoryMissing(self, 'inventory', revision_id)
2129
def get_inventory_sha1(self, revision_id):
2130
"""Return the sha1 hash of the inventory entry
2132
return self.get_revision(revision_id).inventory_sha1
2134
def iter_reverse_revision_history(self, revision_id):
2135
"""Iterate backwards through revision ids in the lefthand history
2137
:param revision_id: The revision id to start with. All its lefthand
2138
ancestors will be traversed.
2140
graph = self.get_graph()
2141
next_id = revision_id
2143
if next_id in (None, _mod_revision.NULL_REVISION):
2146
# Note: The following line may raise KeyError in the event of
2147
# truncated history. We decided not to have a try:except:raise
2148
# RevisionNotPresent here until we see a use for it, because of the
2149
# cost in an inner loop that is by its very nature O(history).
2150
# Robert Collins 20080326
2151
parents = graph.get_parent_map([next_id])[next_id]
2152
if len(parents) == 0:
2155
next_id = parents[0]
2158
def get_revision_inventory(self, revision_id):
2159
"""Return inventory of a past revision."""
2160
# TODO: Unify this with get_inventory()
2161
# bzr 0.0.6 and later imposes the constraint that the inventory_id
2162
# must be the same as its revision, so this is trivial.
2163
if revision_id is None:
2164
# This does not make sense: if there is no revision,
2165
# then it is the current tree inventory surely ?!
2166
# and thus get_root_id() is something that looks at the last
2167
# commit on the branch, and the get_root_id is an inventory check.
2168
raise NotImplementedError
2169
# return Inventory(self.get_root_id())
2171
return self.get_inventory(revision_id)
947
raise NotImplementedError(self.iter_files_bytes)
949
def get_rev_id_for_revno(self, revno, known_pair):
950
"""Return the revision id of a revno, given a later (revno, revid)
951
pair in the same history.
953
:return: if found (True, revid). If the available history ran out
954
before reaching the revno, then this returns
955
(False, (closest_revno, closest_revid)).
957
known_revno, known_revid = known_pair
958
partial_history = [known_revid]
959
distance_from_known = known_revno - revno
960
if distance_from_known < 0:
962
'requested revno (%d) is later than given known revno (%d)'
963
% (revno, known_revno))
966
self, partial_history, stop_index=distance_from_known)
967
except errors.RevisionNotPresent, err:
968
if err.revision_id == known_revid:
969
# The start revision (known_revid) wasn't found.
971
# This is a stacked repository with no fallbacks, or a there's a
972
# left-hand ghost. Either way, even though the revision named in
973
# the error isn't in this repo, we know it's the next step in this
975
partial_history.append(err.revision_id)
976
if len(partial_history) <= distance_from_known:
977
# Didn't find enough history to get a revid for the revno.
978
earliest_revno = known_revno - len(partial_history) + 1
979
return (False, (earliest_revno, partial_history[-1]))
980
if len(partial_history) - 1 > distance_from_known:
981
raise AssertionError('_iter_for_revno returned too much history')
982
return (True, partial_history[-1])
2173
984
def is_shared(self):
2174
985
"""Return True if this repository is flagged as a shared repository."""
2360
1123
@needs_write_lock
2361
1124
def sign_revision(self, revision_id, gpg_strategy):
2362
plaintext = Testament.from_revision(self, revision_id).as_short_text()
1125
testament = _mod_testament.Testament.from_revision(self, revision_id)
1126
plaintext = testament.as_short_text()
2363
1127
self.store_revision_signature(gpg_strategy, plaintext, revision_id)
2365
1129
@needs_read_lock
1130
def verify_revision_signature(self, revision_id, gpg_strategy):
1131
"""Verify the signature on a revision.
1133
:param revision_id: the revision to verify
1134
:gpg_strategy: the GPGStrategy object to used
1136
:return: gpg.SIGNATURE_VALID or a failed SIGNATURE_ value
1138
if not self.has_signature_for_revision_id(revision_id):
1139
return gpg.SIGNATURE_NOT_SIGNED, None
1140
signature = self.get_signature_text(revision_id)
1142
testament = _mod_testament.Testament.from_revision(self, revision_id)
1143
plaintext = testament.as_short_text()
1145
return gpg_strategy.verify(signature, plaintext)
1148
def verify_revision_signatures(self, revision_ids, gpg_strategy):
1149
"""Verify revision signatures for a number of revisions.
1151
:param revision_id: the revision to verify
1152
:gpg_strategy: the GPGStrategy object to used
1153
:return: Iterator over tuples with revision id, result and keys
1155
for revid in revision_ids:
1156
(result, key) = self.verify_revision_signature(revid, gpg_strategy)
1157
yield revid, result, key
2366
1159
def has_signature_for_revision_id(self, revision_id):
2367
1160
"""Query for a revision signature for revision_id in the repository."""
2368
if not self.has_revision(revision_id):
2369
raise errors.NoSuchRevision(self, revision_id)
2370
sig_present = (1 == len(
2371
self.signatures.get_parent_map([(revision_id,)])))
1161
raise NotImplementedError(self.has_signature_for_revision_id)
2375
1163
def get_signature_text(self, revision_id):
2376
1164
"""Return the text for a signature."""
2377
stream = self.signatures.get_record_stream([(revision_id,)],
2379
record = stream.next()
2380
if record.storage_kind == 'absent':
2381
raise errors.NoSuchRevision(self, revision_id)
2382
return record.get_bytes_as('fulltext')
1165
raise NotImplementedError(self.get_signature_text)
2385
def check(self, revision_ids=None):
1167
def check(self, revision_ids=None, callback_refs=None, check_repo=True):
2386
1168
"""Check consistency of all history of given revision_ids.
2388
1170
Different repository implementations should override _check().
2390
1172
:param revision_ids: A non-empty list of revision_ids whose ancestry
2391
1173
will be checked. Typically the last revision_id of a branch.
1174
:param callback_refs: A dict of check-refs to resolve and callback
1175
the check/_check method on the items listed as wanting the ref.
1177
:param check_repo: If False do not check the repository contents, just
1178
calculate the data callback_refs requires and call them back.
2393
return self._check(revision_ids)
2395
def _check(self, revision_ids):
2396
result = check.Check(self)
2400
def _warn_if_deprecated(self):
1180
return self._check(revision_ids=revision_ids, callback_refs=callback_refs,
1181
check_repo=check_repo)
1183
def _check(self, revision_ids=None, callback_refs=None, check_repo=True):
1184
raise NotImplementedError(self.check)
1186
def _warn_if_deprecated(self, branch=None):
1187
if not self._format.is_deprecated():
2401
1189
global _deprecation_warning_done
2402
1190
if _deprecation_warning_done:
2404
_deprecation_warning_done = True
2405
warning("Format %s for %s is deprecated - please use 'bzr upgrade' to get better performance"
2406
% (self._format, self.bzrdir.transport.base))
1194
conf = config.GlobalStack()
1196
conf = branch.get_config_stack()
1197
if 'format_deprecation' in conf.get('suppress_warnings'):
1199
warning("Format %s for %s is deprecated -"
1200
" please use 'bzr upgrade' to get better performance"
1201
% (self._format, self.bzrdir.transport.base))
1203
_deprecation_warning_done = True
2408
1205
def supports_rich_root(self):
2409
1206
return self._format.rich_root_data
2424
1221
except UnicodeDecodeError:
2425
1222
raise errors.NonAsciiRevisionId(method, self)
2427
def revision_graph_can_have_wrong_parents(self):
2428
"""Is it possible for this repository to have a revision graph with
2431
If True, then this repository must also implement
2432
_find_inconsistent_revision_parents so that check and reconcile can
2433
check for inconsistencies before proceeding with other checks that may
2434
depend on the revision index being consistent.
2436
raise NotImplementedError(self.revision_graph_can_have_wrong_parents)
2439
# remove these delegates a while after bzr 0.15
2440
def __make_delegated(name, from_module):
2441
def _deprecated_repository_forwarder():
2442
symbol_versioning.warn('%s moved to %s in bzr 0.15'
2443
% (name, from_module),
2446
m = __import__(from_module, globals(), locals(), [name])
2448
return getattr(m, name)
2449
except AttributeError:
2450
raise AttributeError('module %s has no name %s'
2452
globals()[name] = _deprecated_repository_forwarder
2455
'AllInOneRepository',
2456
'WeaveMetaDirRepository',
2457
'PreSplitOutRepositoryFormat',
2458
'RepositoryFormat4',
2459
'RepositoryFormat5',
2460
'RepositoryFormat6',
2461
'RepositoryFormat7',
2463
__make_delegated(_name, 'bzrlib.repofmt.weaverepo')
2467
'RepositoryFormatKnit',
2468
'RepositoryFormatKnit1',
2470
__make_delegated(_name, 'bzrlib.repofmt.knitrepo')
2473
def install_revision(repository, rev, revision_tree):
2474
"""Install all revision data into a repository."""
2475
install_revisions(repository, [(rev, revision_tree, None)])
2478
def install_revisions(repository, iterable, num_revisions=None, pb=None):
2479
"""Install all revision data into a repository.
2481
Accepts an iterable of revision, tree, signature tuples. The signature
2484
repository.start_write_group()
2486
inventory_cache = lru_cache.LRUCache(10)
2487
for n, (revision, revision_tree, signature) in enumerate(iterable):
2488
_install_revision(repository, revision, revision_tree, signature,
2491
pb.update('Transferring revisions', n + 1, num_revisions)
2493
repository.abort_write_group()
2496
repository.commit_write_group()
2499
def _install_revision(repository, rev, revision_tree, signature,
2501
"""Install all revision data into a repository."""
2502
present_parents = []
2504
for p_id in rev.parent_ids:
2505
if repository.has_revision(p_id):
2506
present_parents.append(p_id)
2507
parent_trees[p_id] = repository.revision_tree(p_id)
2509
parent_trees[p_id] = repository.revision_tree(
2510
_mod_revision.NULL_REVISION)
2512
inv = revision_tree.inventory
2513
entries = inv.iter_entries()
2514
# backwards compatibility hack: skip the root id.
2515
if not repository.supports_rich_root():
2516
path, root = entries.next()
2517
if root.revision != rev.revision_id:
2518
raise errors.IncompatibleRevision(repr(repository))
2520
for path, ie in entries:
2521
text_keys[(ie.file_id, ie.revision)] = ie
2522
text_parent_map = repository.texts.get_parent_map(text_keys)
2523
missing_texts = set(text_keys) - set(text_parent_map)
2524
# Add the texts that are not already present
2525
for text_key in missing_texts:
2526
ie = text_keys[text_key]
2528
# FIXME: TODO: The following loop overlaps/duplicates that done by
2529
# commit to determine parents. There is a latent/real bug here where
2530
# the parents inserted are not those commit would do - in particular
2531
# they are not filtered by heads(). RBC, AB
2532
for revision, tree in parent_trees.iteritems():
2533
if ie.file_id not in tree:
2535
parent_id = tree.inventory[ie.file_id].revision
2536
if parent_id in text_parents:
2538
text_parents.append((ie.file_id, parent_id))
2539
lines = revision_tree.get_file(ie.file_id).readlines()
2540
repository.texts.add_lines(text_key, text_parents, lines)
2542
# install the inventory
2543
if repository._format._commit_inv_deltas and len(rev.parent_ids):
2544
# Cache this inventory
2545
inventory_cache[rev.revision_id] = inv
2547
basis_inv = inventory_cache[rev.parent_ids[0]]
2549
repository.add_inventory(rev.revision_id, inv, present_parents)
2551
delta = inv._make_delta(basis_inv)
2552
repository.add_inventory_by_delta(rev.parent_ids[0], delta,
2553
rev.revision_id, present_parents)
2555
repository.add_inventory(rev.revision_id, inv, present_parents)
2556
except errors.RevisionAlreadyPresent:
2558
if signature is not None:
2559
repository.add_signature_text(rev.revision_id, signature)
2560
repository.add_revision(rev.revision_id, rev, inv)
2563
1225
class MetaDirRepository(Repository):
2564
1226
"""Repositories in the new meta-dir layout.
2903
1558
# NOTE: These are experimental in 0.92. Stable in 1.0 and above
2904
1559
format_registry.register_lazy(
2905
1560
'Bazaar pack repository format 1 (needs bzr 0.92)\n',
2906
'bzrlib.repofmt.pack_repo',
1561
'bzrlib.repofmt.knitpack_repo',
2907
1562
'RepositoryFormatKnitPack1',
2909
1564
format_registry.register_lazy(
2910
1565
'Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n',
2911
'bzrlib.repofmt.pack_repo',
1566
'bzrlib.repofmt.knitpack_repo',
2912
1567
'RepositoryFormatKnitPack3',
2914
1569
format_registry.register_lazy(
2915
1570
'Bazaar pack repository format 1 with rich root (needs bzr 1.0)\n',
2916
'bzrlib.repofmt.pack_repo',
1571
'bzrlib.repofmt.knitpack_repo',
2917
1572
'RepositoryFormatKnitPack4',
2919
1574
format_registry.register_lazy(
2920
1575
'Bazaar RepositoryFormatKnitPack5 (bzr 1.6)\n',
2921
'bzrlib.repofmt.pack_repo',
1576
'bzrlib.repofmt.knitpack_repo',
2922
1577
'RepositoryFormatKnitPack5',
2924
1579
format_registry.register_lazy(
2925
1580
'Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6.1)\n',
2926
'bzrlib.repofmt.pack_repo',
1581
'bzrlib.repofmt.knitpack_repo',
2927
1582
'RepositoryFormatKnitPack5RichRoot',
2929
1584
format_registry.register_lazy(
2930
1585
'Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6)\n',
2931
'bzrlib.repofmt.pack_repo',
1586
'bzrlib.repofmt.knitpack_repo',
2932
1587
'RepositoryFormatKnitPack5RichRootBroken',
2934
1589
format_registry.register_lazy(
2935
1590
'Bazaar RepositoryFormatKnitPack6 (bzr 1.9)\n',
2936
'bzrlib.repofmt.pack_repo',
1591
'bzrlib.repofmt.knitpack_repo',
2937
1592
'RepositoryFormatKnitPack6',
2939
1594
format_registry.register_lazy(
2940
1595
'Bazaar RepositoryFormatKnitPack6RichRoot (bzr 1.9)\n',
2941
'bzrlib.repofmt.pack_repo',
1596
'bzrlib.repofmt.knitpack_repo',
2942
1597
'RepositoryFormatKnitPack6RichRoot',
1599
format_registry.register_lazy(
1600
'Bazaar repository format 2a (needs bzr 1.16 or later)\n',
1601
'bzrlib.repofmt.groupcompress_repo',
1602
'RepositoryFormat2a',
2945
1605
# Development formats.
2946
# 1.7->1.8 go below here
2947
format_registry.register_lazy(
2948
"Bazaar development format 2 (needs bzr.dev from before 1.8)\n",
2949
'bzrlib.repofmt.pack_repo',
2950
'RepositoryFormatPackDevelopment2',
1606
# Check their docstrings to see if/when they are obsolete.
2952
1607
format_registry.register_lazy(
2953
1608
("Bazaar development format 2 with subtree support "
2954
1609
"(needs bzr.dev from before 1.8)\n"),
2955
'bzrlib.repofmt.pack_repo',
1610
'bzrlib.repofmt.knitpack_repo',
2956
1611
'RepositoryFormatPackDevelopment2Subtree',
2958
# 1.9->1.110 go below here
2959
format_registry.register_lazy(
2960
# merge-bbc-dev4-to-bzr.dev
2961
"Bazaar development format 5 (needs bzr.dev from before 1.13)\n",
2962
'bzrlib.repofmt.pack_repo',
2963
'RepositoryFormatPackDevelopment5',
2965
format_registry.register_lazy(
2966
# merge-bbc-dev4-to-bzr.dev
2967
("Bazaar development format 5 with subtree support"
2968
" (needs bzr.dev from before 1.13)\n"),
2969
'bzrlib.repofmt.pack_repo',
2970
'RepositoryFormatPackDevelopment5Subtree',
2972
format_registry.register_lazy(
2973
# merge-bbc-dev4-to-bzr.dev
2974
('Bazaar development format 5 hash 16'
2975
' (needs bzr.dev from before 1.13)\n'),
2976
'bzrlib.repofmt.pack_repo',
2977
'RepositoryFormatPackDevelopment5Hash16',
2979
format_registry.register_lazy(
2980
# merge-bbc-dev4-to-bzr.dev
2981
('Bazaar development format 5 hash 255'
2982
' (needs bzr.dev from before 1.13)\n'),
2983
'bzrlib.repofmt.pack_repo',
2984
'RepositoryFormatPackDevelopment5Hash255',
2986
format_registry.register_lazy(
2987
'Bazaar development format - hash16chk+gc rich-root (needs bzr.dev from 1.14)\n',
2988
'bzrlib.repofmt.groupcompress_repo',
2989
'RepositoryFormatPackGCCHK16',
2991
format_registry.register_lazy(
2992
'Bazaar development format - hash255chk+gc rich-root (needs bzr.dev from 1.14)\n',
2993
'bzrlib.repofmt.groupcompress_repo',
2994
'RepositoryFormatPackGCCHK255',
2996
format_registry.register_lazy(
2997
'Bazaar development format - hash255chk+gc rich-root bigpage (needs bzr.dev from 1.14)\n',
2998
'bzrlib.repofmt.groupcompress_repo',
2999
'RepositoryFormatPackGCCHK255Big',
1613
format_registry.register_lazy(
1614
'Bazaar development format 8\n',
1615
'bzrlib.repofmt.groupcompress_repo',
1616
'RepositoryFormat2aSubtree',
3033
1649
self.target.fetch(self.source, revision_id=revision_id)
3035
1651
@needs_write_lock
3036
def fetch(self, revision_id=None, pb=None, find_ghosts=False,
1652
def fetch(self, revision_id=None, find_ghosts=False):
3038
1653
"""Fetch the content required to construct revision_id.
3040
1655
The content is copied from self.source to self.target.
3042
1657
:param revision_id: if None all content is copied, if NULL_REVISION no
3043
1658
content is copied.
3044
:param pb: optional progress bar to use for progress reports. If not
3045
provided a default one will be created.
3048
from bzrlib.fetch import RepoFetcher
3049
f = RepoFetcher(to_repository=self.target,
3050
from_repository=self.source,
3051
last_revision=revision_id,
3052
fetch_spec=fetch_spec,
3053
pb=pb, find_ghosts=find_ghosts)
3055
def _walk_to_common_revisions(self, revision_ids):
3056
"""Walk out from revision_ids in source to revisions target has.
3058
:param revision_ids: The start point for the search.
3059
:return: A set of revision ids.
3061
target_graph = self.target.get_graph()
3062
revision_ids = frozenset(revision_ids)
3063
# Fast path for the case where all the revisions are already in the
3065
# (Although this does incur an extra round trip for the
3066
# fairly common case where the target doesn't already have the revision
3068
if set(target_graph.get_parent_map(revision_ids)) == revision_ids:
3069
return graph.SearchResult(revision_ids, set(), 0, set())
3070
missing_revs = set()
3071
source_graph = self.source.get_graph()
3072
# ensure we don't pay silly lookup costs.
3073
searcher = source_graph._make_breadth_first_searcher(revision_ids)
3074
null_set = frozenset([_mod_revision.NULL_REVISION])
3075
searcher_exhausted = False
3079
# Iterate the searcher until we have enough next_revs
3080
while len(next_revs) < self._walk_to_common_revisions_batch_size:
3082
next_revs_part, ghosts_part = searcher.next_with_ghosts()
3083
next_revs.update(next_revs_part)
3084
ghosts.update(ghosts_part)
3085
except StopIteration:
3086
searcher_exhausted = True
3088
# If there are ghosts in the source graph, and the caller asked for
3089
# them, make sure that they are present in the target.
3090
# We don't care about other ghosts as we can't fetch them and
3091
# haven't been asked to.
3092
ghosts_to_check = set(revision_ids.intersection(ghosts))
3093
revs_to_get = set(next_revs).union(ghosts_to_check)
3095
have_revs = set(target_graph.get_parent_map(revs_to_get))
3096
# we always have NULL_REVISION present.
3097
have_revs = have_revs.union(null_set)
3098
# Check if the target is missing any ghosts we need.
3099
ghosts_to_check.difference_update(have_revs)
3101
# One of the caller's revision_ids is a ghost in both the
3102
# source and the target.
3103
raise errors.NoSuchRevision(
3104
self.source, ghosts_to_check.pop())
3105
missing_revs.update(next_revs - have_revs)
3106
# Because we may have walked past the original stop point, make
3107
# sure everything is stopped
3108
stop_revs = searcher.find_seen_ancestors(have_revs)
3109
searcher.stop_searching_any(stop_revs)
3110
if searcher_exhausted:
3112
return searcher.get_result()
1661
raise NotImplementedError(self.fetch)
3114
1663
@needs_read_lock
3115
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
1664
def search_missing_revision_ids(self,
1665
revision_id=symbol_versioning.DEPRECATED_PARAMETER,
1666
find_ghosts=True, revision_ids=None, if_present_ids=None,
3116
1668
"""Return the revision ids that source has that target does not.
3118
1670
:param revision_id: only return revision ids included by this
1672
:param revision_ids: return revision ids included by these
1673
revision_ids. NoSuchRevision will be raised if any of these
1674
revisions are not present.
1675
:param if_present_ids: like revision_ids, but will not cause
1676
NoSuchRevision if any of these are absent, instead they will simply
1677
not be in the result. This is useful for e.g. finding revisions
1678
to fetch for tags, which may reference absent revisions.
3120
1679
:param find_ghosts: If True find missing revisions in deep history
3121
1680
rather than just finding the surface difference.
1681
:param limit: Maximum number of revisions to return, topologically
3122
1683
:return: A bzrlib.graph.SearchResult.
3124
# stop searching at found target revisions.
3125
if not find_ghosts and revision_id is not None:
3126
return self._walk_to_common_revisions([revision_id])
3127
# generic, possibly worst case, slow code path.
3128
target_ids = set(self.target.all_revision_ids())
3129
if revision_id is not None:
3130
source_ids = self.source.get_ancestry(revision_id)
3131
if source_ids[0] is not None:
3132
raise AssertionError()
3135
source_ids = self.source.all_revision_ids()
3136
result_set = set(source_ids).difference(target_ids)
3137
return self.source.revision_ids_to_search_result(result_set)
1685
raise NotImplementedError(self.search_missing_revision_ids)
3140
1688
def _same_model(source, target):
3161
1709
"different serializers")
3164
class InterSameDataRepository(InterRepository):
3165
"""Code for converting between repositories that represent the same data.
3167
Data format and model must match for this to work.
3171
def _get_repo_format_to_test(self):
3172
"""Repository format for testing with.
3174
InterSameData can pull from subtree to subtree and from non-subtree to
3175
non-subtree, so we test this with the richest repository format.
3177
from bzrlib.repofmt import knitrepo
3178
return knitrepo.RepositoryFormatKnit3()
3181
def is_compatible(source, target):
3182
return InterRepository._same_model(source, target)
3185
class InterWeaveRepo(InterSameDataRepository):
3186
"""Optimised code paths between Weave based repositories.
3188
This should be in bzrlib/repofmt/weaverepo.py but we have not yet
3189
implemented lazy inter-object optimisation.
3193
def _get_repo_format_to_test(self):
3194
from bzrlib.repofmt import weaverepo
3195
return weaverepo.RepositoryFormat7()
3198
def is_compatible(source, target):
3199
"""Be compatible with known Weave formats.
3201
We don't test for the stores being of specific types because that
3202
could lead to confusing results, and there is no need to be
3205
from bzrlib.repofmt.weaverepo import (
3211
return (isinstance(source._format, (RepositoryFormat5,
3213
RepositoryFormat7)) and
3214
isinstance(target._format, (RepositoryFormat5,
3216
RepositoryFormat7)))
3217
except AttributeError:
3221
def copy_content(self, revision_id=None):
3222
"""See InterRepository.copy_content()."""
3223
# weave specific optimised path:
3225
self.target.set_make_working_trees(self.source.make_working_trees())
3226
except (errors.RepositoryUpgradeRequired, NotImplemented):
3228
# FIXME do not peek!
3229
if self.source._transport.listable():
3230
pb = ui.ui_factory.nested_progress_bar()
3232
self.target.texts.insert_record_stream(
3233
self.source.texts.get_record_stream(
3234
self.source.texts.keys(), 'topological', False))
3235
pb.update('copying inventory', 0, 1)
3236
self.target.inventories.insert_record_stream(
3237
self.source.inventories.get_record_stream(
3238
self.source.inventories.keys(), 'topological', False))
3239
self.target.signatures.insert_record_stream(
3240
self.source.signatures.get_record_stream(
3241
self.source.signatures.keys(),
3243
self.target.revisions.insert_record_stream(
3244
self.source.revisions.get_record_stream(
3245
self.source.revisions.keys(),
3246
'topological', True))
3250
self.target.fetch(self.source, revision_id=revision_id)
3253
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
3254
"""See InterRepository.missing_revision_ids()."""
3255
# we want all revisions to satisfy revision_id in source.
3256
# but we don't want to stat every file here and there.
3257
# we want then, all revisions other needs to satisfy revision_id
3258
# checked, but not those that we have locally.
3259
# so the first thing is to get a subset of the revisions to
3260
# satisfy revision_id in source, and then eliminate those that
3261
# we do already have.
3262
# this is slow on high latency connection to self, but as as this
3263
# disk format scales terribly for push anyway due to rewriting
3264
# inventory.weave, this is considered acceptable.
3266
if revision_id is not None:
3267
source_ids = self.source.get_ancestry(revision_id)
3268
if source_ids[0] is not None:
3269
raise AssertionError()
3272
source_ids = self.source._all_possible_ids()
3273
source_ids_set = set(source_ids)
3274
# source_ids is the worst possible case we may need to pull.
3275
# now we want to filter source_ids against what we actually
3276
# have in target, but don't try to check for existence where we know
3277
# we do not have a revision as that would be pointless.
3278
target_ids = set(self.target._all_possible_ids())
3279
possibly_present_revisions = target_ids.intersection(source_ids_set)
3280
actually_present_revisions = set(
3281
self.target._eliminate_revisions_not_present(possibly_present_revisions))
3282
required_revisions = source_ids_set.difference(actually_present_revisions)
3283
if revision_id is not None:
3284
# we used get_ancestry to determine source_ids then we are assured all
3285
# revisions referenced are present as they are installed in topological order.
3286
# and the tip revision was validated by get_ancestry.
3287
result_set = required_revisions
3289
# if we just grabbed the possibly available ids, then
3290
# we only have an estimate of whats available and need to validate
3291
# that against the revision records.
3293
self.source._eliminate_revisions_not_present(required_revisions))
3294
return self.source.revision_ids_to_search_result(result_set)
3297
class InterKnitRepo(InterSameDataRepository):
3298
"""Optimised code paths between Knit based repositories."""
3301
def _get_repo_format_to_test(self):
3302
from bzrlib.repofmt import knitrepo
3303
return knitrepo.RepositoryFormatKnit1()
3306
def is_compatible(source, target):
3307
"""Be compatible with known Knit formats.
3309
We don't test for the stores being of specific types because that
3310
could lead to confusing results, and there is no need to be
3313
from bzrlib.repofmt.knitrepo import RepositoryFormatKnit
3315
are_knits = (isinstance(source._format, RepositoryFormatKnit) and
3316
isinstance(target._format, RepositoryFormatKnit))
3317
except AttributeError:
3319
return are_knits and InterRepository._same_model(source, target)
3322
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
3323
"""See InterRepository.missing_revision_ids()."""
3324
if revision_id is not None:
3325
source_ids = self.source.get_ancestry(revision_id)
3326
if source_ids[0] is not None:
3327
raise AssertionError()
3330
source_ids = self.source.all_revision_ids()
3331
source_ids_set = set(source_ids)
3332
# source_ids is the worst possible case we may need to pull.
3333
# now we want to filter source_ids against what we actually
3334
# have in target, but don't try to check for existence where we know
3335
# we do not have a revision as that would be pointless.
3336
target_ids = set(self.target.all_revision_ids())
3337
possibly_present_revisions = target_ids.intersection(source_ids_set)
3338
actually_present_revisions = set(
3339
self.target._eliminate_revisions_not_present(possibly_present_revisions))
3340
required_revisions = source_ids_set.difference(actually_present_revisions)
3341
if revision_id is not None:
3342
# we used get_ancestry to determine source_ids then we are assured all
3343
# revisions referenced are present as they are installed in topological order.
3344
# and the tip revision was validated by get_ancestry.
3345
result_set = required_revisions
3347
# if we just grabbed the possibly available ids, then
3348
# we only have an estimate of whats available and need to validate
3349
# that against the revision records.
3351
self.source._eliminate_revisions_not_present(required_revisions))
3352
return self.source.revision_ids_to_search_result(result_set)
3355
class InterPackRepo(InterSameDataRepository):
3356
"""Optimised code paths between Pack based repositories."""
3359
def _get_repo_format_to_test(self):
3360
from bzrlib.repofmt import pack_repo
3361
return pack_repo.RepositoryFormatKnitPack1()
3364
def is_compatible(source, target):
3365
"""Be compatible with known Pack formats.
3367
We don't test for the stores being of specific types because that
3368
could lead to confusing results, and there is no need to be
3371
Do not support CHK based repositories at this point.
3373
from bzrlib.repofmt.pack_repo import RepositoryFormatPack
3374
# XXX: This format is scheduled for termination
3375
# from bzrlib.repofmt.groupcompress_repo import (
3376
# RepositoryFormatPackGCPlain,
3379
are_packs = (isinstance(source._format, RepositoryFormatPack) and
3380
isinstance(target._format, RepositoryFormatPack))
3381
except AttributeError:
3385
# if (isinstance(source._format, RepositoryFormatPackGCPlain)
3386
# or isinstance(target._format, RepositoryFormatPackGCPlain)):
3388
return (InterRepository._same_model(source, target) and
3389
not source._format.supports_chks)
3392
def fetch(self, revision_id=None, pb=None, find_ghosts=False,
3394
"""See InterRepository.fetch()."""
3395
if (len(self.source._fallback_repositories) > 0 or
3396
len(self.target._fallback_repositories) > 0):
3397
# The pack layer is not aware of fallback repositories, so when
3398
# fetching from a stacked repository or into a stacked repository
3399
# we use the generic fetch logic which uses the VersionedFiles
3400
# attributes on repository.
3401
from bzrlib.fetch import RepoFetcher
3402
fetcher = RepoFetcher(self.target, self.source, revision_id,
3403
pb, find_ghosts, fetch_spec=fetch_spec)
3404
if fetch_spec is not None:
3405
if len(list(fetch_spec.heads)) != 1:
3406
raise AssertionError(
3407
"InterPackRepo.fetch doesn't support "
3408
"fetching multiple heads yet.")
3409
revision_id = list(fetch_spec.heads)[0]
3411
if revision_id is None:
3413
# everything to do - use pack logic
3414
# to fetch from all packs to one without
3415
# inventory parsing etc, IFF nothing to be copied is in the target.
3417
source_revision_ids = frozenset(self.source.all_revision_ids())
3418
revision_ids = source_revision_ids - \
3419
frozenset(self.target.get_parent_map(source_revision_ids))
3420
revision_keys = [(revid,) for revid in revision_ids]
3421
index = self.target._pack_collection.revision_index.combined_index
3422
present_revision_ids = set(item[1][0] for item in
3423
index.iter_entries(revision_keys))
3424
revision_ids = set(revision_ids) - present_revision_ids
3425
# implementing the TODO will involve:
3426
# - detecting when all of a pack is selected
3427
# - avoiding as much as possible pre-selection, so the
3428
# more-core routines such as create_pack_from_packs can filter in
3429
# a just-in-time fashion. (though having a HEADS list on a
3430
# repository might make this a lot easier, because we could
3431
# sensibly detect 'new revisions' without doing a full index scan.
3432
elif _mod_revision.is_null(revision_id):
3437
revision_ids = self.search_missing_revision_ids(revision_id,
3438
find_ghosts=find_ghosts).get_keys()
3439
except errors.NoSuchRevision:
3440
raise errors.InstallFailed([revision_id])
3441
if len(revision_ids) == 0:
3443
return self._pack(self.source, self.target, revision_ids)
3445
def _pack(self, source, target, revision_ids):
3446
from bzrlib.repofmt.pack_repo import Packer
3447
packs = source._pack_collection.all_packs()
3448
pack = Packer(self.target._pack_collection, packs, '.fetch',
3449
revision_ids).pack()
3450
if pack is not None:
3451
self.target._pack_collection._save_pack_names()
3452
copied_revs = pack.get_revision_count()
3453
# Trigger an autopack. This may duplicate effort as we've just done
3454
# a pack creation, but for now it is simpler to think about as
3455
# 'upload data, then repack if needed'.
3456
self.target._pack_collection.autopack()
3457
return (copied_revs, [])
3462
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
3463
"""See InterRepository.missing_revision_ids().
3465
:param find_ghosts: Find ghosts throughout the ancestry of
3468
if not find_ghosts and revision_id is not None:
3469
return self._walk_to_common_revisions([revision_id])
3470
elif revision_id is not None:
3471
# Find ghosts: search for revisions pointing from one repository to
3472
# the other, and vice versa, anywhere in the history of revision_id.
3473
graph = self.target.get_graph(other_repository=self.source)
3474
searcher = graph._make_breadth_first_searcher([revision_id])
3478
next_revs, ghosts = searcher.next_with_ghosts()
3479
except StopIteration:
3481
if revision_id in ghosts:
3482
raise errors.NoSuchRevision(self.source, revision_id)
3483
found_ids.update(next_revs)
3484
found_ids.update(ghosts)
3485
found_ids = frozenset(found_ids)
3486
# Double query here: should be able to avoid this by changing the
3487
# graph api further.
3488
result_set = found_ids - frozenset(
3489
self.target.get_parent_map(found_ids))
3491
source_ids = self.source.all_revision_ids()
3492
# source_ids is the worst possible case we may need to pull.
3493
# now we want to filter source_ids against what we actually
3494
# have in target, but don't try to check for existence where we know
3495
# we do not have a revision as that would be pointless.
3496
target_ids = set(self.target.all_revision_ids())
3497
result_set = set(source_ids).difference(target_ids)
3498
return self.source.revision_ids_to_search_result(result_set)
3501
class InterDifferingSerializer(InterKnitRepo):
3504
def _get_repo_format_to_test(self):
3508
def is_compatible(source, target):
3509
"""Be compatible with Knit2 source and Knit3 target"""
3510
# XXX: What do we need to do to support fetching them?
3511
# if source.supports_rich_root() != target.supports_rich_root():
3513
# Ideally, we'd support fetching if the source had no tree references
3514
# even if it supported them...
3515
# XXX: What do we need to do to support fetching them?
3516
# if (getattr(source._format, 'supports_tree_reference', False) and
3517
# not getattr(target._format, 'supports_tree_reference', False)):
3521
def _get_delta_for_revision(self, tree, parent_ids, basis_id, cache):
3522
"""Get the best delta and base for this revision.
3524
:return: (basis_id, delta)
3526
possible_trees = [(parent_id, cache[parent_id])
3527
for parent_id in parent_ids
3528
if parent_id in cache]
3529
if len(possible_trees) == 0:
3530
# There either aren't any parents, or the parents aren't in the
3531
# cache, so just use the last converted tree
3532
possible_trees.append((basis_id, cache[basis_id]))
3534
for basis_id, basis_tree in possible_trees:
3535
delta = tree.inventory._make_delta(basis_tree.inventory)
3536
deltas.append((len(delta), basis_id, delta))
3538
return deltas[0][1:]
3540
def _fetch_batch(self, revision_ids, basis_id, cache):
3541
"""Fetch across a few revisions.
3543
:param revision_ids: The revisions to copy
3544
:param basis_id: The revision_id of a tree that must be in cache, used
3545
as a basis for delta when no other base is available
3546
:param cache: A cache of RevisionTrees that we can use.
3547
:return: The revision_id of the last converted tree. The RevisionTree
3548
for it will be in cache
3550
# Walk though all revisions; get inventory deltas, copy referenced
3551
# texts that delta references, insert the delta, revision and
3553
root_keys_to_create = set()
3556
pending_revisions = []
3557
parent_map = self.source.get_parent_map(revision_ids)
3558
# NB: This fails with dubious inventory data (when inv A has rev OLD
3559
# for file F, and in B, after A, has rev A for file F) when A and B are
3560
# in different groups.
3561
for tree in self.source.revision_trees(revision_ids):
3562
current_revision_id = tree.get_revision_id()
3563
parent_ids = parent_map.get(current_revision_id, ())
3564
basis_id, delta = self._get_delta_for_revision(tree, parent_ids,
3566
if self._converting_to_rich_root:
3567
self._revision_id_to_root_id[current_revision_id] = \
3569
# Find text entries that need to be copied
3570
for old_path, new_path, file_id, entry in delta:
3571
if new_path is not None:
3574
if not self.target.supports_rich_root():
3575
# The target doesn't support rich root, so we don't
3578
if self._converting_to_rich_root:
3579
# This can't be copied normally, we have to insert
3581
root_keys_to_create.add((file_id, entry.revision))
3583
text_keys.add((file_id, entry.revision))
3584
revision = self.source.get_revision(current_revision_id)
3585
pending_deltas.append((basis_id, delta,
3586
current_revision_id, revision.parent_ids))
3587
pending_revisions.append(revision)
3588
cache[current_revision_id] = tree
3589
basis_id = current_revision_id
3591
from_texts = self.source.texts
3592
to_texts = self.target.texts
3593
if root_keys_to_create:
3594
NULL_REVISION = _mod_revision.NULL_REVISION
3595
def _get_parent_keys(root_key):
3596
root_id, rev_id = root_key
3597
# Include direct parents of the revision, but only if they used
3599
parent_keys = tuple([(root_id, parent_id)
3600
for parent_id in parent_map[rev_id]
3601
if parent_id != NULL_REVISION and
3602
self._revision_id_to_root_id.get(parent_id, root_id) == root_id])
3604
def new_root_data_stream():
3605
for root_key in root_keys_to_create:
3606
parent_keys = _get_parent_keys(root_key)
3607
yield versionedfile.FulltextContentFactory(root_key,
3608
parent_keys, None, '')
3609
to_texts.insert_record_stream(new_root_data_stream())
3610
to_texts.insert_record_stream(from_texts.get_record_stream(
3611
text_keys, self.target._format._fetch_order,
3612
not self.target._format._fetch_uses_deltas))
3614
for delta in pending_deltas:
3615
self.target.add_inventory_by_delta(*delta)
3616
# insert signatures and revisions
3617
for revision in pending_revisions:
3619
signature = self.source.get_signature_text(
3620
revision.revision_id)
3621
self.target.add_signature_text(revision.revision_id,
3623
except errors.NoSuchRevision:
3625
self.target.add_revision(revision.revision_id, revision)
3628
def _fetch_all_revisions(self, revision_ids, pb):
3629
"""Fetch everything for the list of revisions.
3631
:param revision_ids: The list of revisions to fetch. Must be in
3633
:param pb: A ProgressBar
3636
basis_id, basis_tree = self._get_basis(revision_ids[0])
3638
cache = lru_cache.LRUCache(100)
3639
cache[basis_id] = basis_tree
3640
del basis_tree # We don't want to hang on to it here
3641
for offset in range(0, len(revision_ids), batch_size):
3642
self.target.start_write_group()
3644
pb.update('Transferring revisions', offset,
3646
batch = revision_ids[offset:offset+batch_size]
3647
basis_id = self._fetch_batch(batch, basis_id, cache)
3649
self.target.abort_write_group()
3652
self.target.commit_write_group()
3653
pb.update('Transferring revisions', len(revision_ids),
3657
def fetch(self, revision_id=None, pb=None, find_ghosts=False,
3659
"""See InterRepository.fetch()."""
3660
if fetch_spec is not None:
3661
raise AssertionError("Not implemented yet...")
3662
if (not self.source.supports_rich_root()
3663
and self.target.supports_rich_root()):
3664
self._converting_to_rich_root = True
3665
self._revision_id_to_root_id = {}
3667
self._converting_to_rich_root = False
3668
revision_ids = self.target.search_missing_revision_ids(self.source,
3669
revision_id, find_ghosts=find_ghosts).get_keys()
3670
if not revision_ids:
3672
revision_ids = tsort.topo_sort(
3673
self.source.get_graph().get_parent_map(revision_ids))
3674
if not revision_ids:
3676
# Walk though all revisions; get inventory deltas, copy referenced
3677
# texts that delta references, insert the delta, revision and
3679
first_rev = self.source.get_revision(revision_ids[0])
3681
my_pb = ui.ui_factory.nested_progress_bar()
3684
symbol_versioning.warn(
3685
symbol_versioning.deprecated_in((1, 14, 0))
3686
% "pb parameter to fetch()")
3689
self._fetch_all_revisions(revision_ids, pb)
3691
if my_pb is not None:
3693
return len(revision_ids), 0
3695
def _get_basis(self, first_revision_id):
3696
"""Get a revision and tree which exists in the target.
3698
This assumes that first_revision_id is selected for transmission
3699
because all other ancestors are already present. If we can't find an
3700
ancestor we fall back to NULL_REVISION since we know that is safe.
3702
:return: (basis_id, basis_tree)
3704
first_rev = self.source.get_revision(first_revision_id)
3706
basis_id = first_rev.parent_ids[0]
3707
# only valid as a basis if the target has it
3708
self.target.get_revision(basis_id)
3709
# Try to get a basis tree - if its a ghost it will hit the
3710
# NoSuchRevision case.
3711
basis_tree = self.source.revision_tree(basis_id)
3712
except (IndexError, errors.NoSuchRevision):
3713
basis_id = _mod_revision.NULL_REVISION
3714
basis_tree = self.source.revision_tree(basis_id)
3715
return basis_id, basis_tree
3718
InterRepository.register_optimiser(InterDifferingSerializer)
3719
InterRepository.register_optimiser(InterSameDataRepository)
3720
InterRepository.register_optimiser(InterWeaveRepo)
3721
InterRepository.register_optimiser(InterKnitRepo)
3722
InterRepository.register_optimiser(InterPackRepo)
3725
1712
class CopyConverter(object):
3726
1713
"""A repository conversion tool which just performs a copy of the content.
3741
1728
:param to_convert: The disk object to convert.
3742
1729
:param pb: a progress bar to use for progress information.
1731
pb = ui.ui_factory.nested_progress_bar()
3747
1734
# this is only useful with metadir layouts - separated repo content.
3748
1735
# trigger an assertion if not such
3749
1736
repo._format.get_format_string()
3750
1737
self.repo_dir = repo.bzrdir
3751
self.step('Moving repository to repository.backup')
1738
pb.update(gettext('Moving repository to repository.backup'))
3752
1739
self.repo_dir.transport.move('repository', 'repository.backup')
3753
1740
backup_transport = self.repo_dir.transport.clone('repository.backup')
3754
1741
repo._format.check_conversion_target(self.target_format)
3755
1742
self.source_repo = repo._format.open(self.repo_dir,
3757
1744
_override_transport=backup_transport)
3758
self.step('Creating new repository')
1745
pb.update(gettext('Creating new repository'))
3759
1746
converted = self.target_format.initialize(self.repo_dir,
3760
1747
self.source_repo.is_shared())
3761
1748
converted.lock_write()
3763
self.step('Copying content into repository.')
1750
pb.update(gettext('Copying content'))
3764
1751
self.source_repo.copy_content_into(converted)
3766
1753
converted.unlock()
3767
self.step('Deleting old repository content.')
1754
pb.update(gettext('Deleting old repository content'))
3768
1755
self.repo_dir.transport.delete_tree('repository.backup')
3769
self.pb.note('repository converted')
3771
def step(self, message):
3772
"""Update the pb by a step."""
3774
self.pb.update(message, self.count, self.total)
3786
def _unescaper(match, _map=_unescape_map):
3787
code = match.group(1)
3791
if not code.startswith('#'):
3793
return unichr(int(code[1:])).encode('utf8')
3799
def _unescape_xml(data):
3800
"""Unescape predefined XML entities in a string of data."""
3802
if _unescape_re is None:
3803
_unescape_re = re.compile('\&([^;]*);')
3804
return _unescape_re.sub(_unescaper, data)
3807
class _VersionedFileChecker(object):
3809
def __init__(self, repository, text_key_references=None):
3810
self.repository = repository
3811
self.text_index = self.repository._generate_text_key_index(
3812
text_key_references=text_key_references)
3814
def calculate_file_version_parents(self, text_key):
3815
"""Calculate the correct parents for a file version according to
3818
parent_keys = self.text_index[text_key]
3819
if parent_keys == [_mod_revision.NULL_REVISION]:
3821
return tuple(parent_keys)
3823
def check_file_version_parents(self, texts, progress_bar=None):
3824
"""Check the parents stored in a versioned file are correct.
3826
It also detects file versions that are not referenced by their
3827
corresponding revision's inventory.
3829
:returns: A tuple of (wrong_parents, dangling_file_versions).
3830
wrong_parents is a dict mapping {revision_id: (stored_parents,
3831
correct_parents)} for each revision_id where the stored parents
3832
are not correct. dangling_file_versions is a set of (file_id,
3833
revision_id) tuples for versions that are present in this versioned
3834
file, but not used by the corresponding inventory.
3837
self.file_ids = set([file_id for file_id, _ in
3838
self.text_index.iterkeys()])
3839
# text keys is now grouped by file_id
3840
n_weaves = len(self.file_ids)
3841
files_in_revisions = {}
3842
revisions_of_files = {}
3843
n_versions = len(self.text_index)
3844
progress_bar.update('loading text store', 0, n_versions)
3845
parent_map = self.repository.texts.get_parent_map(self.text_index)
3846
# On unlistable transports this could well be empty/error...
3847
text_keys = self.repository.texts.keys()
3848
unused_keys = frozenset(text_keys) - set(self.text_index)
3849
for num, key in enumerate(self.text_index.iterkeys()):
3850
if progress_bar is not None:
3851
progress_bar.update('checking text graph', num, n_versions)
3852
correct_parents = self.calculate_file_version_parents(key)
3854
knit_parents = parent_map[key]
3855
except errors.RevisionNotPresent:
3858
if correct_parents != knit_parents:
3859
wrong_parents[key] = (knit_parents, correct_parents)
3860
return wrong_parents, unused_keys
3863
def _old_get_graph(repository, revision_id):
3864
"""DO NOT USE. That is all. I'm serious."""
3865
graph = repository.get_graph()
3866
revision_graph = dict(((key, value) for key, value in
3867
graph.iter_ancestry([revision_id]) if value is not None))
3868
return _strip_NULL_ghosts(revision_graph)
1756
ui.ui_factory.note(gettext('repository converted'))
3871
1760
def _strip_NULL_ghosts(revision_graph):
3879
1768
return revision_graph
3882
class StreamSink(object):
3883
"""An object that can insert a stream into a repository.
3885
This interface handles the complexity of reserialising inventories and
3886
revisions from different formats, and allows unidirectional insertion into
3887
stacked repositories without looking for the missing basis parents
3891
def __init__(self, target_repo):
3892
self.target_repo = target_repo
3894
def insert_stream(self, stream, src_format, resume_tokens):
3895
"""Insert a stream's content into the target repository.
3897
:param src_format: a bzr repository format.
3899
:return: a list of resume tokens and an iterable of keys additional
3900
items required before the insertion can be completed.
3902
self.target_repo.lock_write()
3905
self.target_repo.resume_write_group(resume_tokens)
3907
self.target_repo.start_write_group()
3909
# locked_insert_stream performs a commit|suspend.
3910
return self._locked_insert_stream(stream, src_format)
3912
self.target_repo.abort_write_group(suppress_errors=True)
3915
self.target_repo.unlock()
3917
def _locked_insert_stream(self, stream, src_format):
3918
to_serializer = self.target_repo._format._serializer
3919
src_serializer = src_format._serializer
3920
if to_serializer == src_serializer:
3921
# If serializers match and the target is a pack repository, set the
3922
# write cache size on the new pack. This avoids poor performance
3923
# on transports where append is unbuffered (such as
3924
# RemoteTransport). This is safe to do because nothing should read
3925
# back from the target repository while a stream with matching
3926
# serialization is being inserted.
3927
# The exception is that a delta record from the source that should
3928
# be a fulltext may need to be expanded by the target (see
3929
# test_fetch_revisions_with_deltas_into_pack); but we take care to
3930
# explicitly flush any buffered writes first in that rare case.
3932
new_pack = self.target_repo._pack_collection._new_pack
3933
except AttributeError:
3934
# Not a pack repository
3937
new_pack.set_write_cache_size(1024*1024)
3938
for substream_type, substream in stream:
3939
if substream_type == 'texts':
3940
self.target_repo.texts.insert_record_stream(substream)
3941
elif substream_type == 'inventories':
3942
if src_serializer == to_serializer:
3943
self.target_repo.inventories.insert_record_stream(
3946
self._extract_and_insert_inventories(
3947
substream, src_serializer)
3948
elif substream_type == 'chk_bytes':
3949
# XXX: This doesn't support conversions, as it assumes the
3950
# conversion was done in the fetch code.
3951
self.target_repo.chk_bytes.insert_record_stream(substream)
3952
elif substream_type == 'revisions':
3953
# This may fallback to extract-and-insert more often than
3954
# required if the serializers are different only in terms of
3956
if src_serializer == to_serializer:
3957
self.target_repo.revisions.insert_record_stream(
3960
self._extract_and_insert_revisions(substream,
3962
elif substream_type == 'signatures':
3963
self.target_repo.signatures.insert_record_stream(substream)
3965
raise AssertionError('kaboom! %s' % (substream_type,))
3967
missing_keys = set()
3968
for prefix, versioned_file in (
3969
('texts', self.target_repo.texts),
3970
('inventories', self.target_repo.inventories),
3971
('revisions', self.target_repo.revisions),
3972
('signatures', self.target_repo.signatures),
3974
missing_keys.update((prefix,) + key for key in
3975
versioned_file.get_missing_compression_parent_keys())
3976
except NotImplementedError:
3977
# cannot even attempt suspending, and missing would have failed
3978
# during stream insertion.
3979
missing_keys = set()
3982
# suspend the write group and tell the caller what we is
3983
# missing. We know we can suspend or else we would not have
3984
# entered this code path. (All repositories that can handle
3985
# missing keys can handle suspending a write group).
3986
write_group_tokens = self.target_repo.suspend_write_group()
3987
return write_group_tokens, missing_keys
3988
self.target_repo.commit_write_group()
3991
def _extract_and_insert_inventories(self, substream, serializer):
3992
"""Generate a new inventory versionedfile in target, converting data.
3994
The inventory is retrieved from the source, (deserializing it), and
3995
stored in the target (reserializing it in a different format).
3997
for record in substream:
3998
bytes = record.get_bytes_as('fulltext')
3999
revision_id = record.key[0]
4000
inv = serializer.read_inventory_from_string(bytes, revision_id)
4001
parents = [key[0] for key in record.parents]
4002
self.target_repo.add_inventory(revision_id, inv, parents)
4004
def _extract_and_insert_revisions(self, substream, serializer):
4005
for record in substream:
4006
bytes = record.get_bytes_as('fulltext')
4007
revision_id = record.key[0]
4008
rev = serializer.read_revision_from_string(bytes)
4009
if rev.revision_id != revision_id:
4010
raise AssertionError('wtf: %s != %s' % (rev, revision_id))
4011
self.target_repo.add_revision(revision_id, rev)
4014
if self.target_repo._format._fetch_reconcile:
4015
self.target_repo.reconcile()
4018
class StreamSource(object):
4019
"""A source of a stream for fetching between repositories."""
4021
def __init__(self, from_repository, to_format):
4022
"""Create a StreamSource streaming from from_repository."""
4023
self.from_repository = from_repository
4024
self.to_format = to_format
4026
def delta_on_metadata(self):
4027
"""Return True if delta's are permitted on metadata streams.
4029
That is on revisions and signatures.
4031
src_serializer = self.from_repository._format._serializer
4032
target_serializer = self.to_format._serializer
4033
return (self.to_format._fetch_uses_deltas and
4034
src_serializer == target_serializer)
4036
def _fetch_revision_texts(self, revs):
4037
# fetch signatures first and then the revision texts
4038
# may need to be a InterRevisionStore call here.
4039
from_sf = self.from_repository.signatures
4040
# A missing signature is just skipped.
4041
keys = [(rev_id,) for rev_id in revs]
4042
signatures = versionedfile.filter_absent(from_sf.get_record_stream(
4044
self.to_format._fetch_order,
4045
not self.to_format._fetch_uses_deltas))
4046
# If a revision has a delta, this is actually expanded inside the
4047
# insert_record_stream code now, which is an alternate fix for
4049
from_rf = self.from_repository.revisions
4050
revisions = from_rf.get_record_stream(
4052
self.to_format._fetch_order,
4053
not self.delta_on_metadata())
4054
return [('signatures', signatures), ('revisions', revisions)]
4056
def _generate_root_texts(self, revs):
4057
"""This will be called by __fetch between fetching weave texts and
4058
fetching the inventory weave.
4060
Subclasses should override this if they need to generate root texts
4061
after fetching weave texts.
4063
if self._rich_root_upgrade():
4065
return bzrlib.fetch.Inter1and2Helper(
4066
self.from_repository).generate_root_texts(revs)
4070
def get_stream(self, search):
4072
revs = search.get_keys()
4073
graph = self.from_repository.get_graph()
4074
revs = list(graph.iter_topo_order(revs))
4075
data_to_fetch = self.from_repository.item_keys_introduced_by(revs)
4077
for knit_kind, file_id, revisions in data_to_fetch:
4078
if knit_kind != phase:
4080
# Make a new progress bar for this phase
4081
if knit_kind == "file":
4082
# Accumulate file texts
4083
text_keys.extend([(file_id, revision) for revision in
4085
elif knit_kind == "inventory":
4086
# Now copy the file texts.
4087
from_texts = self.from_repository.texts
4088
yield ('texts', from_texts.get_record_stream(
4089
text_keys, self.to_format._fetch_order,
4090
not self.to_format._fetch_uses_deltas))
4091
# Cause an error if a text occurs after we have done the
4094
# Before we process the inventory we generate the root
4095
# texts (if necessary) so that the inventories references
4097
for _ in self._generate_root_texts(revs):
4099
# NB: This currently reopens the inventory weave in source;
4100
# using a single stream interface instead would avoid this.
4101
from_weave = self.from_repository.inventories
4102
# we fetch only the referenced inventories because we do not
4103
# know for unselected inventories whether all their required
4104
# texts are present in the other repository - it could be
4106
for info in self._get_inventory_stream(revs):
4108
elif knit_kind == "signatures":
4109
# Nothing to do here; this will be taken care of when
4110
# _fetch_revision_texts happens.
4112
elif knit_kind == "revisions":
4113
for record in self._fetch_revision_texts(revs):
4116
raise AssertionError("Unknown knit kind %r" % knit_kind)
4118
def get_stream_for_missing_keys(self, missing_keys):
4119
# missing keys can only occur when we are byte copying and not
4120
# translating (because translation means we don't send
4121
# unreconstructable deltas ever).
4123
keys['texts'] = set()
4124
keys['revisions'] = set()
4125
keys['inventories'] = set()
4126
keys['signatures'] = set()
4127
for key in missing_keys:
4128
keys[key[0]].add(key[1:])
4129
if len(keys['revisions']):
4130
# If we allowed copying revisions at this point, we could end up
4131
# copying a revision without copying its required texts: a
4132
# violation of the requirements for repository integrity.
4133
raise AssertionError(
4134
'cannot copy revisions to fill in missing deltas %s' % (
4135
keys['revisions'],))
4136
for substream_kind, keys in keys.iteritems():
4137
vf = getattr(self.from_repository, substream_kind)
4138
# Ask for full texts always so that we don't need more round trips
4139
# after this stream.
4140
stream = vf.get_record_stream(keys,
4141
self.to_format._fetch_order, True)
4142
yield substream_kind, stream
4144
def inventory_fetch_order(self):
4145
if self._rich_root_upgrade():
4146
return 'topological'
4148
return self.to_format._fetch_order
4150
def _rich_root_upgrade(self):
4151
return (not self.from_repository._format.rich_root_data and
4152
self.to_format.rich_root_data)
4154
def _get_inventory_stream(self, revision_ids):
4155
from_format = self.from_repository._format
4156
if (from_format.supports_chks and self.to_format.supports_chks
4157
and (from_format._serializer == self.to_format._serializer)):
4158
# Both sides support chks, and they use the same serializer, so it
4159
# is safe to transmit the chk pages and inventory pages across
4161
return self._get_chk_inventory_stream(revision_ids)
4162
elif (not from_format.supports_chks):
4163
# Source repository doesn't support chks. So we can transmit the
4164
# inventories 'as-is' and either they are just accepted on the
4165
# target, or the Sink will properly convert it.
4166
return self._get_simple_inventory_stream(revision_ids)
4168
# XXX: Hack to make not-chk->chk fetch: copy the inventories as
4169
# inventories. Note that this should probably be done somehow
4170
# as part of bzrlib.repository.StreamSink. Except JAM couldn't
4171
# figure out how a non-chk repository could possibly handle
4172
# deserializing an inventory stream from a chk repo, as it
4173
# doesn't have a way to understand individual pages.
4174
return self._get_convertable_inventory_stream(revision_ids)
4176
def _get_simple_inventory_stream(self, revision_ids):
4177
from_weave = self.from_repository.inventories
4178
yield ('inventories', from_weave.get_record_stream(
4179
[(rev_id,) for rev_id in revision_ids],
4180
self.inventory_fetch_order(),
4181
not self.delta_on_metadata()))
4183
def _get_chk_inventory_stream(self, revision_ids):
4184
"""Fetch the inventory texts, along with the associated chk maps."""
4185
# We want an inventory outside of the search set, so that we can filter
4186
# out uninteresting chk pages. For now we use
4187
# _find_revision_outside_set, but if we had a Search with cut_revs, we
4188
# could use that instead.
4189
start_rev_id = self.from_repository._find_revision_outside_set(
4191
start_rev_key = (start_rev_id,)
4192
inv_keys_to_fetch = [(rev_id,) for rev_id in revision_ids]
4193
if start_rev_id != _mod_revision.NULL_REVISION:
4194
inv_keys_to_fetch.append((start_rev_id,))
4195
# Any repo that supports chk_bytes must also support out-of-order
4196
# insertion. At least, that is how we expect it to work
4197
# We use get_record_stream instead of iter_inventories because we want
4198
# to be able to insert the stream as well. We could instead fetch
4199
# allowing deltas, and then iter_inventories, but we don't know whether
4200
# source or target is more 'local' anway.
4201
inv_stream = self.from_repository.inventories.get_record_stream(
4202
inv_keys_to_fetch, 'unordered',
4203
True) # We need them as full-texts so we can find their references
4204
uninteresting_chk_roots = set()
4205
interesting_chk_roots = set()
4206
def filter_inv_stream(inv_stream):
4207
for idx, record in enumerate(inv_stream):
4208
### child_pb.update('fetch inv', idx, len(inv_keys_to_fetch))
4209
bytes = record.get_bytes_as('fulltext')
4210
chk_inv = inventory.CHKInventory.deserialise(
4211
self.from_repository.chk_bytes, bytes, record.key)
4212
if record.key == start_rev_key:
4213
uninteresting_chk_roots.add(chk_inv.id_to_entry.key())
4214
p_id_map = chk_inv.parent_id_basename_to_file_id
4215
if p_id_map is not None:
4216
uninteresting_chk_roots.add(p_id_map.key())
4219
interesting_chk_roots.add(chk_inv.id_to_entry.key())
4220
p_id_map = chk_inv.parent_id_basename_to_file_id
4221
if p_id_map is not None:
4222
interesting_chk_roots.add(p_id_map.key())
4223
### pb.update('fetch inventory', 0, 2)
4224
yield ('inventories', filter_inv_stream(inv_stream))
4225
# Now that we have worked out all of the interesting root nodes, grab
4226
# all of the interesting pages and insert them
4227
### pb.update('fetch inventory', 1, 2)
4228
interesting = chk_map.iter_interesting_nodes(
4229
self.from_repository.chk_bytes, interesting_chk_roots,
4230
uninteresting_chk_roots)
4231
def to_stream_adapter():
4232
"""Adapt the iter_interesting_nodes result to a single stream.
4234
iter_interesting_nodes returns records as it processes them, which
4235
can be in batches. But we only want a single stream to be inserted.
4237
for record, items in interesting:
4238
for value in record.itervalues():
4240
# XXX: We could instead call get_record_stream(records.keys())
4241
# ATM, this will always insert the records as fulltexts, and
4242
# requires that you can hang on to records once you have gone
4243
# on to the next one. Further, it causes the target to
4244
# recompress the data. Testing shows it to be faster than
4245
# requesting the records again, though.
4246
yield ('chk_bytes', to_stream_adapter())
4247
### pb.update('fetch inventory', 2, 2)
4249
def _get_convertable_inventory_stream(self, revision_ids):
4250
# XXX: One of source or target is using chks, and they don't have
4251
# compatible serializations. The StreamSink code expects to be
4252
# able to convert on the target, so we need to put
4253
# bytes-on-the-wire that can be converted
4254
yield ('inventories', self._stream_invs_as_fulltexts(revision_ids))
4256
def _stream_invs_as_fulltexts(self, revision_ids):
4257
from_repo = self.from_repository
4258
from_serializer = from_repo._format._serializer
4259
revision_keys = [(rev_id,) for rev_id in revision_ids]
4260
parent_map = from_repo.inventories.get_parent_map(revision_keys)
4261
for inv in self.from_repository.iter_inventories(revision_ids):
4262
# XXX: This is a bit hackish, but it works. Basically,
4263
# CHKSerializer 'accidentally' supports
4264
# read/write_inventory_to_string, even though that is never
4265
# the format that is stored on disk. It *does* give us a
4266
# single string representation for an inventory, so live with
4268
# This would be far better if we had a 'serialized inventory
4269
# delta' form. Then we could use 'inventory._make_delta', and
4270
# transmit that. This would both be faster to generate, and
4271
# result in fewer bytes-on-the-wire.
4272
as_bytes = from_serializer.write_inventory_to_string(inv)
4273
key = (inv.revision_id,)
4274
parent_keys = parent_map.get(key, ())
4275
yield versionedfile.FulltextContentFactory(
4276
key, parent_keys, None, as_bytes)
1771
def _iter_for_revno(repo, partial_history_cache, stop_index=None,
1772
stop_revision=None):
1773
"""Extend the partial history to include a given index
1775
If a stop_index is supplied, stop when that index has been reached.
1776
If a stop_revision is supplied, stop when that revision is
1777
encountered. Otherwise, stop when the beginning of history is
1780
:param stop_index: The index which should be present. When it is
1781
present, history extension will stop.
1782
:param stop_revision: The revision id which should be present. When
1783
it is encountered, history extension will stop.
1785
start_revision = partial_history_cache[-1]
1786
graph = repo.get_graph()
1787
iterator = graph.iter_lefthand_ancestry(start_revision,
1788
(_mod_revision.NULL_REVISION,))
1790
# skip the last revision in the list
1793
if (stop_index is not None and
1794
len(partial_history_cache) > stop_index):
1796
if partial_history_cache[-1] == stop_revision:
1798
revision_id = iterator.next()
1799
partial_history_cache.append(revision_id)
1800
except StopIteration:
1805
class _LazyListJoin(object):
1806
"""An iterable yielding the contents of many lists as one list.
1808
Each iterator made from this will reflect the current contents of the lists
1809
at the time the iterator is made.
1811
This is used by Repository's _make_parents_provider implementation so that
1814
pp = repo._make_parents_provider() # uses a list of fallback repos
1815
pp.add_fallback_repository(other_repo) # appends to that list
1816
result = pp.get_parent_map(...)
1817
# The result will include revs from other_repo
1820
def __init__(self, *list_parts):
1821
self.list_parts = list_parts
1825
for list_part in self.list_parts:
1826
full_list.extend(list_part)
1827
return iter(full_list)
1830
return "%s.%s(%s)" % (self.__module__, self.__class__.__name__,