492
539
ie.revision = parent_entry.revision
493
540
return self._get_delta(ie, basis_inv, path), False, None
494
541
ie.reference_revision = content_summary[3]
496
self._add_text_to_weave(ie.file_id, lines, heads, None)
542
self._add_text_to_weave(ie.file_id, '', heads, None)
498
544
raise NotImplementedError('unknown kind')
499
545
ie.revision = self._new_revision_id
546
self._any_changes = True
500
547
return self._get_delta(ie, basis_inv, path), True, fingerprint
502
def _add_text_to_weave(self, file_id, new_lines, parents, nostore_sha):
503
# Note: as we read the content directly from the tree, we know its not
504
# been turned into unicode or badly split - but a broken tree
505
# implementation could give us bad output from readlines() so this is
506
# not a guarantee of safety. What would be better is always checking
507
# the content during test suite execution. RBC 20070912
508
parent_keys = tuple((file_id, parent) for parent in parents)
509
return self.repository.texts.add_lines(
510
(file_id, self._new_revision_id), parent_keys, new_lines,
511
nostore_sha=nostore_sha, random_id=self.random_revid,
512
check_content=False)[0:2]
549
def record_iter_changes(self, tree, basis_revision_id, iter_changes,
550
_entry_factory=entry_factory):
551
"""Record a new tree via iter_changes.
553
:param tree: The tree to obtain text contents from for changed objects.
554
:param basis_revision_id: The revision id of the tree the iter_changes
555
has been generated against. Currently assumed to be the same
556
as self.parents[0] - if it is not, errors may occur.
557
:param iter_changes: An iter_changes iterator with the changes to apply
558
to basis_revision_id. The iterator must not include any items with
559
a current kind of None - missing items must be either filtered out
560
or errored-on beefore record_iter_changes sees the item.
561
:param _entry_factory: Private method to bind entry_factory locally for
563
:return: A generator of (file_id, relpath, fs_hash) tuples for use with
566
# Create an inventory delta based on deltas between all the parents and
567
# deltas between all the parent inventories. We use inventory delta's
568
# between the inventory objects because iter_changes masks
569
# last-changed-field only changes.
571
# file_id -> change map, change is fileid, paths, changed, versioneds,
572
# parents, names, kinds, executables
574
# {file_id -> revision_id -> inventory entry, for entries in parent
575
# trees that are not parents[0]
579
revtrees = list(self.repository.revision_trees(self.parents))
580
except errors.NoSuchRevision:
581
# one or more ghosts, slow path.
583
for revision_id in self.parents:
585
revtrees.append(self.repository.revision_tree(revision_id))
586
except errors.NoSuchRevision:
588
basis_revision_id = _mod_revision.NULL_REVISION
590
revtrees.append(self.repository.revision_tree(
591
_mod_revision.NULL_REVISION))
592
# The basis inventory from a repository
594
basis_inv = revtrees[0].inventory
596
basis_inv = self.repository.revision_tree(
597
_mod_revision.NULL_REVISION).inventory
598
if len(self.parents) > 0:
599
if basis_revision_id != self.parents[0] and not ghost_basis:
601
"arbitrary basis parents not yet supported with merges")
602
for revtree in revtrees[1:]:
603
for change in revtree.inventory._make_delta(basis_inv):
604
if change[1] is None:
605
# Not present in this parent.
607
if change[2] not in merged_ids:
608
if change[0] is not None:
609
basis_entry = basis_inv[change[2]]
610
merged_ids[change[2]] = [
612
basis_entry.revision,
615
parent_entries[change[2]] = {
617
basis_entry.revision:basis_entry,
619
change[3].revision:change[3],
622
merged_ids[change[2]] = [change[3].revision]
623
parent_entries[change[2]] = {change[3].revision:change[3]}
625
merged_ids[change[2]].append(change[3].revision)
626
parent_entries[change[2]][change[3].revision] = change[3]
629
# Setup the changes from the tree:
630
# changes maps file_id -> (change, [parent revision_ids])
632
for change in iter_changes:
633
# This probably looks up in basis_inv way to much.
634
if change[1][0] is not None:
635
head_candidate = [basis_inv[change[0]].revision]
638
changes[change[0]] = change, merged_ids.get(change[0],
640
unchanged_merged = set(merged_ids) - set(changes)
641
# Extend the changes dict with synthetic changes to record merges of
643
for file_id in unchanged_merged:
644
# Record a merged version of these items that did not change vs the
645
# basis. This can be either identical parallel changes, or a revert
646
# of a specific file after a merge. The recorded content will be
647
# that of the current tree (which is the same as the basis), but
648
# the per-file graph will reflect a merge.
649
# NB:XXX: We are reconstructing path information we had, this
650
# should be preserved instead.
651
# inv delta change: (file_id, (path_in_source, path_in_target),
652
# changed_content, versioned, parent, name, kind,
655
basis_entry = basis_inv[file_id]
656
except errors.NoSuchId:
657
# a change from basis->some_parents but file_id isn't in basis
658
# so was new in the merge, which means it must have changed
659
# from basis -> current, and as it hasn't the add was reverted
660
# by the user. So we discard this change.
664
(basis_inv.id2path(file_id), tree.id2path(file_id)),
666
(basis_entry.parent_id, basis_entry.parent_id),
667
(basis_entry.name, basis_entry.name),
668
(basis_entry.kind, basis_entry.kind),
669
(basis_entry.executable, basis_entry.executable))
670
changes[file_id] = (change, merged_ids[file_id])
671
# changes contains tuples with the change and a set of inventory
672
# candidates for the file.
674
# old_path, new_path, file_id, new_inventory_entry
675
seen_root = False # Is the root in the basis delta?
676
inv_delta = self._basis_delta
677
modified_rev = self._new_revision_id
678
for change, head_candidates in changes.values():
679
if change[3][1]: # versioned in target.
680
# Several things may be happening here:
681
# We may have a fork in the per-file graph
682
# - record a change with the content from tree
683
# We may have a change against < all trees
684
# - carry over the tree that hasn't changed
685
# We may have a change against all trees
686
# - record the change with the content from tree
689
entry = _entry_factory[kind](file_id, change[5][1],
691
head_set = self._heads(change[0], set(head_candidates))
694
for head_candidate in head_candidates:
695
if head_candidate in head_set:
696
heads.append(head_candidate)
697
head_set.remove(head_candidate)
700
# Could be a carry-over situation:
701
parent_entry_revs = parent_entries.get(file_id, None)
702
if parent_entry_revs:
703
parent_entry = parent_entry_revs.get(heads[0], None)
706
if parent_entry is None:
707
# The parent iter_changes was called against is the one
708
# that is the per-file head, so any change is relevant
709
# iter_changes is valid.
710
carry_over_possible = False
712
# could be a carry over situation
713
# A change against the basis may just indicate a merge,
714
# we need to check the content against the source of the
715
# merge to determine if it was changed after the merge
717
if (parent_entry.kind != entry.kind or
718
parent_entry.parent_id != entry.parent_id or
719
parent_entry.name != entry.name):
720
# Metadata common to all entries has changed
721
# against per-file parent
722
carry_over_possible = False
724
carry_over_possible = True
725
# per-type checks for changes against the parent_entry
728
# Cannot be a carry-over situation
729
carry_over_possible = False
730
# Populate the entry in the delta
732
# XXX: There is still a small race here: If someone reverts the content of a file
733
# after iter_changes examines and decides it has changed,
734
# we will unconditionally record a new version even if some
735
# other process reverts it while commit is running (with
736
# the revert happening after iter_changes did it's
739
entry.executable = True
741
entry.executable = False
742
if (carry_over_possible and
743
parent_entry.executable == entry.executable):
744
# Check the file length, content hash after reading
746
nostore_sha = parent_entry.text_sha1
749
file_obj, stat_value = tree.get_file_with_stat(file_id, change[1][1])
751
text = file_obj.read()
755
entry.text_sha1, entry.text_size = self._add_text_to_weave(
756
file_id, text, heads, nostore_sha)
757
yield file_id, change[1][1], (entry.text_sha1, stat_value)
758
except errors.ExistingContent:
759
# No content change against a carry_over parent
760
# Perhaps this should also yield a fs hash update?
762
entry.text_size = parent_entry.text_size
763
entry.text_sha1 = parent_entry.text_sha1
764
elif kind == 'symlink':
766
entry.symlink_target = tree.get_symlink_target(file_id)
767
if (carry_over_possible and
768
parent_entry.symlink_target == entry.symlink_target):
771
self._add_text_to_weave(change[0], '', heads, None)
772
elif kind == 'directory':
773
if carry_over_possible:
776
# Nothing to set on the entry.
777
# XXX: split into the Root and nonRoot versions.
778
if change[1][1] != '' or self.repository.supports_rich_root():
779
self._add_text_to_weave(change[0], '', heads, None)
780
elif kind == 'tree-reference':
781
if not self.repository._format.supports_tree_reference:
782
# This isn't quite sane as an error, but we shouldn't
783
# ever see this code path in practice: tree's don't
784
# permit references when the repo doesn't support tree
786
raise errors.UnsupportedOperation(tree.add_reference,
788
reference_revision = tree.get_reference_revision(change[0])
789
entry.reference_revision = reference_revision
790
if (carry_over_possible and
791
parent_entry.reference_revision == reference_revision):
794
self._add_text_to_weave(change[0], '', heads, None)
796
raise AssertionError('unknown kind %r' % kind)
798
entry.revision = modified_rev
800
entry.revision = parent_entry.revision
803
new_path = change[1][1]
804
inv_delta.append((change[1][0], new_path, change[0], entry))
807
self.new_inventory = None
809
self._any_changes = True
811
# housekeeping root entry changes do not affect no-change commits.
812
self._require_root_change(tree)
813
self.basis_delta_revision = basis_revision_id
815
def _add_text_to_weave(self, file_id, new_text, parents, nostore_sha):
816
parent_keys = tuple([(file_id, parent) for parent in parents])
817
return self.repository.texts._add_text(
818
(file_id, self._new_revision_id), parent_keys, new_text,
819
nostore_sha=nostore_sha, random_id=self.random_revid)[0:2]
515
822
class RootCommitBuilder(CommitBuilder):
1084
1438
def suspend_write_group(self):
1085
1439
raise errors.UnsuspendableWriteGroup(self)
1441
def get_missing_parent_inventories(self, check_for_missing_texts=True):
1442
"""Return the keys of missing inventory parents for revisions added in
1445
A revision is not complete if the inventory delta for that revision
1446
cannot be calculated. Therefore if the parent inventories of a
1447
revision are not present, the revision is incomplete, and e.g. cannot
1448
be streamed by a smart server. This method finds missing inventory
1449
parents for revisions added in this write group.
1451
if not self._format.supports_external_lookups:
1452
# This is only an issue for stacked repositories
1454
if not self.is_in_write_group():
1455
raise AssertionError('not in a write group')
1457
# XXX: We assume that every added revision already has its
1458
# corresponding inventory, so we only check for parent inventories that
1459
# might be missing, rather than all inventories.
1460
parents = set(self.revisions._index.get_missing_parents())
1461
parents.discard(_mod_revision.NULL_REVISION)
1462
unstacked_inventories = self.inventories._index
1463
present_inventories = unstacked_inventories.get_parent_map(
1464
key[-1:] for key in parents)
1465
parents.difference_update(present_inventories)
1466
if len(parents) == 0:
1467
# No missing parent inventories.
1469
if not check_for_missing_texts:
1470
return set(('inventories', rev_id) for (rev_id,) in parents)
1471
# Ok, now we have a list of missing inventories. But these only matter
1472
# if the inventories that reference them are missing some texts they
1473
# appear to introduce.
1474
# XXX: Texts referenced by all added inventories need to be present,
1475
# but at the moment we're only checking for texts referenced by
1476
# inventories at the graph's edge.
1477
key_deps = self.revisions._index._key_dependencies
1478
key_deps.add_keys(present_inventories)
1479
referrers = frozenset(r[0] for r in key_deps.get_referrers())
1480
file_ids = self.fileids_altered_by_revision_ids(referrers)
1481
missing_texts = set()
1482
for file_id, version_ids in file_ids.iteritems():
1483
missing_texts.update(
1484
(file_id, version_id) for version_id in version_ids)
1485
present_texts = self.texts.get_parent_map(missing_texts)
1486
missing_texts.difference_update(present_texts)
1487
if not missing_texts:
1488
# No texts are missing, so all revisions and their deltas are
1491
# Alternatively the text versions could be returned as the missing
1492
# keys, but this is likely to be less data.
1493
missing_keys = set(('inventories', rev_id) for (rev_id,) in parents)
1496
def refresh_data(self):
1497
"""Re-read any data needed to to synchronise with disk.
1499
This method is intended to be called after another repository instance
1500
(such as one used by a smart server) has inserted data into the
1501
repository. It may not be called during a write group, but may be
1502
called at any other time.
1504
if self.is_in_write_group():
1505
raise errors.InternalBzrError(
1506
"May not refresh_data while in a write group.")
1507
self._refresh_data()
1087
1509
def resume_write_group(self, tokens):
1088
1510
if not self.is_write_locked():
1089
1511
raise errors.NotWriteLocked(self)
1440
1912
result[key] = True
1915
def _inventory_xml_lines_for_keys(self, keys):
1916
"""Get a line iterator of the sort needed for findind references.
1918
Not relevant for non-xml inventory repositories.
1920
Ghosts in revision_keys are ignored.
1922
:param revision_keys: The revision keys for the inventories to inspect.
1923
:return: An iterator over (inventory line, revid) for the fulltexts of
1924
all of the xml inventories specified by revision_keys.
1926
stream = self.inventories.get_record_stream(keys, 'unordered', True)
1927
for record in stream:
1928
if record.storage_kind != 'absent':
1929
chunks = record.get_bytes_as('chunked')
1930
revid = record.key[-1]
1931
lines = osutils.chunks_to_lines(chunks)
1443
1935
def _find_file_ids_from_xml_inventory_lines(self, line_iterator,
1445
1937
"""Helper routine for fileids_altered_by_revision_ids.
1447
1939
This performs the translation of xml lines to revision ids.
1449
1941
:param line_iterator: An iterator of lines, origin_version_id
1450
:param revision_ids: The revision ids to filter for. This should be a
1942
:param revision_keys: The revision ids to filter for. This should be a
1451
1943
set or other type which supports efficient __contains__ lookups, as
1452
the revision id from each parsed line will be looked up in the
1453
revision_ids filter.
1944
the revision key from each parsed line will be looked up in the
1945
revision_keys filter.
1454
1946
:return: a dictionary mapping altered file-ids to an iterable of
1455
1947
revision_ids. Each altered file-ids has the exact revision_ids that
1456
1948
altered it listed explicitly.
1950
seen = set(self._find_text_key_references_from_xml_inventory_lines(
1951
line_iterator).iterkeys())
1952
parent_keys = self._find_parent_keys_of_revisions(revision_keys)
1953
parent_seen = set(self._find_text_key_references_from_xml_inventory_lines(
1954
self._inventory_xml_lines_for_keys(parent_keys)))
1955
new_keys = seen - parent_seen
1459
1957
setdefault = result.setdefault
1461
self._find_text_key_references_from_xml_inventory_lines(
1462
line_iterator).iterkeys():
1463
# once data is all ensured-consistent; then this is
1464
# if revision_id == version_id
1465
if key[-1:] in revision_ids:
1466
setdefault(key[0], set()).add(key[-1])
1958
for key in new_keys:
1959
setdefault(key[0], set()).add(key[-1])
1962
def _find_parent_ids_of_revisions(self, revision_ids):
1963
"""Find all parent ids that are mentioned in the revision graph.
1965
:return: set of revisions that are parents of revision_ids which are
1966
not part of revision_ids themselves
1968
parent_map = self.get_parent_map(revision_ids)
1970
map(parent_ids.update, parent_map.itervalues())
1971
parent_ids.difference_update(revision_ids)
1972
parent_ids.discard(_mod_revision.NULL_REVISION)
1975
def _find_parent_keys_of_revisions(self, revision_keys):
1976
"""Similar to _find_parent_ids_of_revisions, but used with keys.
1978
:param revision_keys: An iterable of revision_keys.
1979
:return: The parents of all revision_keys that are not already in
1982
parent_map = self.revisions.get_parent_map(revision_keys)
1984
map(parent_keys.update, parent_map.itervalues())
1985
parent_keys.difference_update(revision_keys)
1986
parent_keys.discard(_mod_revision.NULL_REVISION)
1469
1989
def fileids_altered_by_revision_ids(self, revision_ids, _inv_weave=None):
1470
1990
"""Find the file ids and versions affected by revisions.
2936
3502
return self.source.revision_ids_to_search_result(result_set)
2939
class InterPackRepo(InterSameDataRepository):
2940
"""Optimised code paths between Pack based repositories."""
2943
def _get_repo_format_to_test(self):
2944
from bzrlib.repofmt import pack_repo
2945
return pack_repo.RepositoryFormatKnitPack1()
2948
def is_compatible(source, target):
2949
"""Be compatible with known Pack formats.
2951
We don't test for the stores being of specific types because that
2952
could lead to confusing results, and there is no need to be
2955
from bzrlib.repofmt.pack_repo import RepositoryFormatPack
2957
are_packs = (isinstance(source._format, RepositoryFormatPack) and
2958
isinstance(target._format, RepositoryFormatPack))
2959
except AttributeError:
2961
return are_packs and InterRepository._same_model(source, target)
2964
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2965
"""See InterRepository.fetch()."""
2966
if (len(self.source._fallback_repositories) > 0 or
2967
len(self.target._fallback_repositories) > 0):
2968
# The pack layer is not aware of fallback repositories, so when
2969
# fetching from a stacked repository or into a stacked repository
2970
# we use the generic fetch logic which uses the VersionedFiles
2971
# attributes on repository.
2972
from bzrlib.fetch import RepoFetcher
2973
fetcher = RepoFetcher(self.target, self.source, revision_id,
2975
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2976
self.source, self.source._format, self.target, self.target._format)
2977
if revision_id is None:
2979
# everything to do - use pack logic
2980
# to fetch from all packs to one without
2981
# inventory parsing etc, IFF nothing to be copied is in the target.
2983
source_revision_ids = frozenset(self.source.all_revision_ids())
2984
revision_ids = source_revision_ids - \
2985
frozenset(self.target_get_parent_map(source_revision_ids))
2986
revision_keys = [(revid,) for revid in revision_ids]
2987
target_pack_collection = self._get_target_pack_collection()
2988
index = target_pack_collection.revision_index.combined_index
2989
present_revision_ids = set(item[1][0] for item in
2990
index.iter_entries(revision_keys))
2991
revision_ids = set(revision_ids) - present_revision_ids
2992
# implementing the TODO will involve:
2993
# - detecting when all of a pack is selected
2994
# - avoiding as much as possible pre-selection, so the
2995
# more-core routines such as create_pack_from_packs can filter in
2996
# a just-in-time fashion. (though having a HEADS list on a
2997
# repository might make this a lot easier, because we could
2998
# sensibly detect 'new revisions' without doing a full index scan.
2999
elif _mod_revision.is_null(revision_id):
3004
revision_ids = self.search_missing_revision_ids(revision_id,
3005
find_ghosts=find_ghosts).get_keys()
3006
except errors.NoSuchRevision:
3007
raise errors.InstallFailed([revision_id])
3008
if len(revision_ids) == 0:
3010
return self._pack(self.source, self.target, revision_ids)
3012
def _pack(self, source, target, revision_ids):
3013
from bzrlib.repofmt.pack_repo import Packer
3014
target_pack_collection = self._get_target_pack_collection()
3015
packs = source._pack_collection.all_packs()
3016
pack = Packer(target_pack_collection, packs, '.fetch',
3017
revision_ids).pack()
3018
if pack is not None:
3019
target_pack_collection._save_pack_names()
3020
copied_revs = pack.get_revision_count()
3021
# Trigger an autopack. This may duplicate effort as we've just done
3022
# a pack creation, but for now it is simpler to think about as
3023
# 'upload data, then repack if needed'.
3025
return (copied_revs, [])
3029
def _autopack(self):
3030
self.target._pack_collection.autopack()
3032
def _get_target_pack_collection(self):
3033
return self.target._pack_collection
3036
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
3037
"""See InterRepository.missing_revision_ids().
3039
:param find_ghosts: Find ghosts throughout the ancestry of
3042
if not find_ghosts and revision_id is not None:
3043
return self._walk_to_common_revisions([revision_id])
3044
elif revision_id is not None:
3045
# Find ghosts: search for revisions pointing from one repository to
3046
# the other, and vice versa, anywhere in the history of revision_id.
3047
graph = self.target_get_graph(other_repository=self.source)
3048
searcher = graph._make_breadth_first_searcher([revision_id])
3052
next_revs, ghosts = searcher.next_with_ghosts()
3053
except StopIteration:
3055
if revision_id in ghosts:
3056
raise errors.NoSuchRevision(self.source, revision_id)
3057
found_ids.update(next_revs)
3058
found_ids.update(ghosts)
3059
found_ids = frozenset(found_ids)
3060
# Double query here: should be able to avoid this by changing the
3061
# graph api further.
3062
result_set = found_ids - frozenset(
3063
self.target_get_parent_map(found_ids))
3065
source_ids = self.source.all_revision_ids()
3066
# source_ids is the worst possible case we may need to pull.
3067
# now we want to filter source_ids against what we actually
3068
# have in target, but don't try to check for existence where we know
3069
# we do not have a revision as that would be pointless.
3070
target_ids = set(self.target.all_revision_ids())
3071
result_set = set(source_ids).difference(target_ids)
3072
return self.source.revision_ids_to_search_result(result_set)
3075
class InterDifferingSerializer(InterKnitRepo):
3505
class InterDifferingSerializer(InterRepository):
3078
3508
def _get_repo_format_to_test(self):
3110
3541
return deltas[0][1:]
3543
def _get_parent_keys(self, root_key, parent_map):
3544
"""Get the parent keys for a given root id."""
3545
root_id, rev_id = root_key
3546
# Include direct parents of the revision, but only if they used
3547
# the same root_id and are heads.
3549
for parent_id in parent_map[rev_id]:
3550
if parent_id == _mod_revision.NULL_REVISION:
3552
if parent_id not in self._revision_id_to_root_id:
3553
# We probably didn't read this revision, go spend the
3554
# extra effort to actually check
3556
tree = self.source.revision_tree(parent_id)
3557
except errors.NoSuchRevision:
3558
# Ghost, fill out _revision_id_to_root_id in case we
3559
# encounter this again.
3560
# But set parent_root_id to None since we don't really know
3561
parent_root_id = None
3563
parent_root_id = tree.get_root_id()
3564
self._revision_id_to_root_id[parent_id] = None
3566
parent_root_id = self._revision_id_to_root_id[parent_id]
3567
if root_id == parent_root_id:
3568
# With stacking we _might_ want to refer to a non-local
3569
# revision, but this code path only applies when we have the
3570
# full content available, so ghosts really are ghosts, not just
3571
# the edge of local data.
3572
parent_keys.append((parent_id,))
3574
# root_id may be in the parent anyway.
3576
tree = self.source.revision_tree(parent_id)
3577
except errors.NoSuchRevision:
3578
# ghost, can't refer to it.
3582
parent_keys.append((tree.inventory[root_id].revision,))
3583
except errors.NoSuchId:
3586
g = graph.Graph(self.source.revisions)
3587
heads = g.heads(parent_keys)
3589
for key in parent_keys:
3590
if key in heads and key not in selected_keys:
3591
selected_keys.append(key)
3592
return tuple([(root_id,)+ key for key in selected_keys])
3594
def _new_root_data_stream(self, root_keys_to_create, parent_map):
3595
for root_key in root_keys_to_create:
3596
parent_keys = self._get_parent_keys(root_key, parent_map)
3597
yield versionedfile.FulltextContentFactory(root_key,
3598
parent_keys, None, '')
3112
3600
def _fetch_batch(self, revision_ids, basis_id, cache):
3113
3601
"""Fetch across a few revisions.
3241
3783
return basis_id, basis_tree
3244
class InterOtherToRemote(InterRepository):
3245
"""An InterRepository that simply delegates to the 'real' InterRepository
3246
calculated for (source, target._real_repository).
3249
_walk_to_common_revisions_batch_size = 50
3251
def __init__(self, source, target):
3252
InterRepository.__init__(self, source, target)
3253
self._real_inter = None
3256
def is_compatible(source, target):
3257
if isinstance(target, remote.RemoteRepository):
3261
def _ensure_real_inter(self):
3262
if self._real_inter is None:
3263
self.target._ensure_real()
3264
real_target = self.target._real_repository
3265
self._real_inter = InterRepository.get(self.source, real_target)
3266
# Make _real_inter use the RemoteRepository for get_parent_map
3267
self._real_inter.target_get_graph = self.target.get_graph
3268
self._real_inter.target_get_parent_map = self.target.get_parent_map
3270
def copy_content(self, revision_id=None):
3271
self._ensure_real_inter()
3272
self._real_inter.copy_content(revision_id=revision_id)
3274
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
3275
self._ensure_real_inter()
3276
return self._real_inter.fetch(revision_id=revision_id, pb=pb,
3277
find_ghosts=find_ghosts)
3280
def _get_repo_format_to_test(self):
3284
class InterRemoteToOther(InterRepository):
3286
def __init__(self, source, target):
3287
InterRepository.__init__(self, source, target)
3288
self._real_inter = None
3291
def is_compatible(source, target):
3292
if not isinstance(source, remote.RemoteRepository):
3294
return InterRepository._same_model(source, target)
3296
def _ensure_real_inter(self):
3297
if self._real_inter is None:
3298
self.source._ensure_real()
3299
real_source = self.source._real_repository
3300
self._real_inter = InterRepository.get(real_source, self.target)
3303
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
3304
"""See InterRepository.fetch()."""
3305
# Always fetch using the generic streaming fetch code, to allow
3306
# streaming fetching from remote servers.
3307
from bzrlib.fetch import RepoFetcher
3308
fetcher = RepoFetcher(self.target, self.source, revision_id,
3311
def copy_content(self, revision_id=None):
3312
self._ensure_real_inter()
3313
self._real_inter.copy_content(revision_id=revision_id)
3316
def _get_repo_format_to_test(self):
3321
class InterPackToRemotePack(InterPackRepo):
3322
"""A specialisation of InterPackRepo for a target that is a
3325
This will use the get_parent_map RPC rather than plain readvs, and also
3326
uses an RPC for autopacking.
3329
_walk_to_common_revisions_batch_size = 50
3332
def is_compatible(source, target):
3333
from bzrlib.repofmt.pack_repo import RepositoryFormatPack
3334
if isinstance(source._format, RepositoryFormatPack):
3335
if isinstance(target, remote.RemoteRepository):
3336
target._format._ensure_real()
3337
if isinstance(target._format._custom_format,
3338
RepositoryFormatPack):
3339
if InterRepository._same_model(source, target):
3343
def _autopack(self):
3344
self.target.autopack()
3347
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
3348
"""See InterRepository.fetch()."""
3349
# Always fetch using the generic streaming fetch code, to allow
3350
# streaming fetching into remote servers.
3351
from bzrlib.fetch import RepoFetcher
3352
fetcher = RepoFetcher(self.target, self.source, revision_id,
3355
def _get_target_pack_collection(self):
3356
return self.target._real_repository._pack_collection
3359
def _get_repo_format_to_test(self):
3363
3786
InterRepository.register_optimiser(InterDifferingSerializer)
3364
3787
InterRepository.register_optimiser(InterSameDataRepository)
3365
3788
InterRepository.register_optimiser(InterWeaveRepo)
3366
3789
InterRepository.register_optimiser(InterKnitRepo)
3367
InterRepository.register_optimiser(InterPackRepo)
3368
InterRepository.register_optimiser(InterOtherToRemote)
3369
InterRepository.register_optimiser(InterRemoteToOther)
3370
InterRepository.register_optimiser(InterPackToRemotePack)
3373
3792
class CopyConverter(object):
3778
4246
return (not self.from_repository._format.rich_root_data and
3779
4247
self.to_format.rich_root_data)
4249
def _get_inventory_stream(self, revision_ids):
4250
from_format = self.from_repository._format
4251
if (from_format.supports_chks and self.to_format.supports_chks
4252
and (from_format._serializer == self.to_format._serializer)):
4253
# Both sides support chks, and they use the same serializer, so it
4254
# is safe to transmit the chk pages and inventory pages across
4256
return self._get_chk_inventory_stream(revision_ids)
4257
elif (not from_format.supports_chks):
4258
# Source repository doesn't support chks. So we can transmit the
4259
# inventories 'as-is' and either they are just accepted on the
4260
# target, or the Sink will properly convert it.
4261
return self._get_simple_inventory_stream(revision_ids)
4263
# XXX: Hack to make not-chk->chk fetch: copy the inventories as
4264
# inventories. Note that this should probably be done somehow
4265
# as part of bzrlib.repository.StreamSink. Except JAM couldn't
4266
# figure out how a non-chk repository could possibly handle
4267
# deserializing an inventory stream from a chk repo, as it
4268
# doesn't have a way to understand individual pages.
4269
return self._get_convertable_inventory_stream(revision_ids)
4271
def _get_simple_inventory_stream(self, revision_ids):
4272
from_weave = self.from_repository.inventories
4273
yield ('inventories', from_weave.get_record_stream(
4274
[(rev_id,) for rev_id in revision_ids],
4275
self.inventory_fetch_order(),
4276
not self.delta_on_metadata()))
4278
def _get_chk_inventory_stream(self, revision_ids):
4279
"""Fetch the inventory texts, along with the associated chk maps."""
4280
# We want an inventory outside of the search set, so that we can filter
4281
# out uninteresting chk pages. For now we use
4282
# _find_revision_outside_set, but if we had a Search with cut_revs, we
4283
# could use that instead.
4284
start_rev_id = self.from_repository._find_revision_outside_set(
4286
start_rev_key = (start_rev_id,)
4287
inv_keys_to_fetch = [(rev_id,) for rev_id in revision_ids]
4288
if start_rev_id != _mod_revision.NULL_REVISION:
4289
inv_keys_to_fetch.append((start_rev_id,))
4290
# Any repo that supports chk_bytes must also support out-of-order
4291
# insertion. At least, that is how we expect it to work
4292
# We use get_record_stream instead of iter_inventories because we want
4293
# to be able to insert the stream as well. We could instead fetch
4294
# allowing deltas, and then iter_inventories, but we don't know whether
4295
# source or target is more 'local' anway.
4296
inv_stream = self.from_repository.inventories.get_record_stream(
4297
inv_keys_to_fetch, 'unordered',
4298
True) # We need them as full-texts so we can find their references
4299
uninteresting_chk_roots = set()
4300
interesting_chk_roots = set()
4301
def filter_inv_stream(inv_stream):
4302
for idx, record in enumerate(inv_stream):
4303
### child_pb.update('fetch inv', idx, len(inv_keys_to_fetch))
4304
bytes = record.get_bytes_as('fulltext')
4305
chk_inv = inventory.CHKInventory.deserialise(
4306
self.from_repository.chk_bytes, bytes, record.key)
4307
if record.key == start_rev_key:
4308
uninteresting_chk_roots.add(chk_inv.id_to_entry.key())
4309
p_id_map = chk_inv.parent_id_basename_to_file_id
4310
if p_id_map is not None:
4311
uninteresting_chk_roots.add(p_id_map.key())
4314
interesting_chk_roots.add(chk_inv.id_to_entry.key())
4315
p_id_map = chk_inv.parent_id_basename_to_file_id
4316
if p_id_map is not None:
4317
interesting_chk_roots.add(p_id_map.key())
4318
### pb.update('fetch inventory', 0, 2)
4319
yield ('inventories', filter_inv_stream(inv_stream))
4320
# Now that we have worked out all of the interesting root nodes, grab
4321
# all of the interesting pages and insert them
4322
### pb.update('fetch inventory', 1, 2)
4323
interesting = chk_map.iter_interesting_nodes(
4324
self.from_repository.chk_bytes, interesting_chk_roots,
4325
uninteresting_chk_roots)
4326
def to_stream_adapter():
4327
"""Adapt the iter_interesting_nodes result to a single stream.
4329
iter_interesting_nodes returns records as it processes them, along
4330
with keys. However, we only want to return the records themselves.
4332
for record, items in interesting:
4333
if record is not None:
4335
# XXX: We could instead call get_record_stream(records.keys())
4336
# ATM, this will always insert the records as fulltexts, and
4337
# requires that you can hang on to records once you have gone
4338
# on to the next one. Further, it causes the target to
4339
# recompress the data. Testing shows it to be faster than
4340
# requesting the records again, though.
4341
yield ('chk_bytes', to_stream_adapter())
4342
### pb.update('fetch inventory', 2, 2)
4344
def _get_convertable_inventory_stream(self, revision_ids):
4345
# XXX: One of source or target is using chks, and they don't have
4346
# compatible serializations. The StreamSink code expects to be
4347
# able to convert on the target, so we need to put
4348
# bytes-on-the-wire that can be converted
4349
yield ('inventories', self._stream_invs_as_fulltexts(revision_ids))
4351
def _stream_invs_as_fulltexts(self, revision_ids):
4352
from_repo = self.from_repository
4353
from_serializer = from_repo._format._serializer
4354
revision_keys = [(rev_id,) for rev_id in revision_ids]
4355
parent_map = from_repo.inventories.get_parent_map(revision_keys)
4356
for inv in self.from_repository.iter_inventories(revision_ids):
4357
# XXX: This is a bit hackish, but it works. Basically,
4358
# CHKSerializer 'accidentally' supports
4359
# read/write_inventory_to_string, even though that is never
4360
# the format that is stored on disk. It *does* give us a
4361
# single string representation for an inventory, so live with
4363
# This would be far better if we had a 'serialized inventory
4364
# delta' form. Then we could use 'inventory._make_delta', and
4365
# transmit that. This would both be faster to generate, and
4366
# result in fewer bytes-on-the-wire.
4367
as_bytes = from_serializer.write_inventory_to_string(inv)
4368
key = (inv.revision_id,)
4369
parent_keys = parent_map.get(key, ())
4370
yield versionedfile.FulltextContentFactory(
4371
key, parent_keys, None, as_bytes)
4374
def _iter_for_revno(repo, partial_history_cache, stop_index=None,
4375
stop_revision=None):
4376
"""Extend the partial history to include a given index
4378
If a stop_index is supplied, stop when that index has been reached.
4379
If a stop_revision is supplied, stop when that revision is
4380
encountered. Otherwise, stop when the beginning of history is
4383
:param stop_index: The index which should be present. When it is
4384
present, history extension will stop.
4385
:param stop_revision: The revision id which should be present. When
4386
it is encountered, history extension will stop.
4388
start_revision = partial_history_cache[-1]
4389
iterator = repo.iter_reverse_revision_history(start_revision)
4391
#skip the last revision in the list
4394
if (stop_index is not None and
4395
len(partial_history_cache) > stop_index):
4397
if partial_history_cache[-1] == stop_revision:
4399
revision_id = iterator.next()
4400
partial_history_cache.append(revision_id)
4401
except StopIteration: