674
675
return self._inventory_add_lines(revision_id, parents,
675
676
inv_lines, check_content=False)
678
def _create_inv_from_null(self, delta, revision_id):
679
"""This will mutate new_inv directly.
681
This is a simplified form of create_by_apply_delta which knows that all
682
the old values must be None, so everything is a create.
684
serializer = self._format._serializer
685
new_inv = inventory.CHKInventory(serializer.search_key_name)
686
new_inv.revision_id = revision_id
687
entry_to_bytes = new_inv._entry_to_bytes
688
id_to_entry_dict = {}
689
parent_id_basename_dict = {}
690
for old_path, new_path, file_id, entry in delta:
691
if old_path is not None:
692
raise ValueError('Invalid delta, somebody tried to delete %r'
693
' from the NULL_REVISION'
694
% ((old_path, file_id),))
696
raise ValueError('Invalid delta, delta from NULL_REVISION has'
697
' no new_path %r' % (file_id,))
699
new_inv.root_id = file_id
700
parent_id_basename_key = ('', '')
702
utf8_entry_name = entry.name.encode('utf-8')
703
parent_id_basename_key = (entry.parent_id, utf8_entry_name)
704
new_value = entry_to_bytes(entry)
706
# new_inv._path_to_fileid_cache[new_path] = file_id
707
id_to_entry_dict[(file_id,)] = new_value
708
parent_id_basename_dict[parent_id_basename_key] = file_id
710
new_inv._populate_from_dicts(self.chk_bytes, id_to_entry_dict,
711
parent_id_basename_dict, maximum_size=serializer.maximum_size)
677
714
def add_inventory_by_delta(self, basis_revision_id, delta, new_revision_id,
678
715
parents, basis_inv=None, propagate_caches=False):
679
716
"""Add a new inventory expressed as a delta against another revision.
699
736
repository format specific) of the serialized inventory, and the
700
737
resulting inventory.
702
if basis_revision_id == _mod_revision.NULL_REVISION:
703
return KnitPackRepository.add_inventory_by_delta(self,
704
basis_revision_id, delta, new_revision_id, parents)
705
739
if not self.is_in_write_group():
706
740
raise AssertionError("%r not in write group" % (self,))
707
741
_mod_revision.check_not_reserved_id(new_revision_id)
708
basis_tree = self.revision_tree(basis_revision_id)
709
basis_tree.lock_read()
711
if basis_inv is None:
743
if basis_inv is None:
744
if basis_revision_id == _mod_revision.NULL_REVISION:
745
new_inv = self._create_inv_from_null(delta, new_revision_id)
746
inv_lines = new_inv.to_lines()
747
return self._inventory_add_lines(new_revision_id, parents,
748
inv_lines, check_content=False), new_inv
750
basis_tree = self.revision_tree(basis_revision_id)
751
basis_tree.lock_read()
712
752
basis_inv = basis_tree.inventory
713
754
result = basis_inv.create_by_apply_delta(delta, new_revision_id,
714
755
propagate_caches=propagate_caches)
715
756
inv_lines = result.to_lines()
716
757
return self._inventory_add_lines(new_revision_id, parents,
717
758
inv_lines, check_content=False), result
760
if basis_tree is not None:
721
763
def _iter_inventories(self, revision_ids):
722
764
"""Iterate over many inventory objects."""
736
778
# make it raise to trap naughty direct users.
737
779
raise NotImplementedError(self._iter_inventory_xmls)
739
def _find_parent_ids_of_revisions(self, revision_ids):
740
# TODO: we probably want to make this a helper that other code can get
742
parent_map = self.get_parent_map(revision_ids)
744
map(parents.update, parent_map.itervalues())
745
parents.difference_update(revision_ids)
746
parents.discard(_mod_revision.NULL_REVISION)
749
def _find_present_inventory_ids(self, revision_ids):
750
keys = [(r,) for r in revision_ids]
751
parent_map = self.inventories.get_parent_map(keys)
752
present_inventory_ids = set(k[-1] for k in parent_map)
753
return present_inventory_ids
781
def _find_present_inventory_keys(self, revision_keys):
782
parent_map = self.inventories.get_parent_map(revision_keys)
783
present_inventory_keys = set(k for k in parent_map)
784
return present_inventory_keys
755
786
def fileids_altered_by_revision_ids(self, revision_ids, _inv_weave=None):
756
787
"""Find the file ids and versions affected by revisions.
767
798
file_id_revisions = {}
768
799
pb = ui.ui_factory.nested_progress_bar()
770
parent_ids = self._find_parent_ids_of_revisions(revision_ids)
771
present_parent_inv_ids = self._find_present_inventory_ids(parent_ids)
801
revision_keys = [(r,) for r in revision_ids]
802
parent_keys = self._find_parent_keys_of_revisions(revision_keys)
803
# TODO: instead of using _find_present_inventory_keys, change the
804
# code paths to allow missing inventories to be tolerated.
805
# However, we only want to tolerate missing parent
806
# inventories, not missing inventories for revision_ids
807
present_parent_inv_keys = self._find_present_inventory_keys(
809
present_parent_inv_ids = set(
810
[k[-1] for k in present_parent_inv_keys])
772
811
uninteresting_root_keys = set()
773
812
interesting_root_keys = set()
774
inventories_to_read = set(present_parent_inv_ids)
775
inventories_to_read.update(revision_ids)
813
inventories_to_read = set(revision_ids)
814
inventories_to_read.update(present_parent_inv_ids)
776
815
for inv in self.iter_inventories(inventories_to_read):
777
816
entry_chk_root_key = inv.id_to_entry.key()
778
817
if inv.revision_id in present_parent_inv_ids:
846
885
return super(CHKInventoryRepository, self)._get_source(to_format)
849
class GroupCHKStreamSource(repository.StreamSource):
888
class GroupCHKStreamSource(KnitPackStreamSource):
850
889
"""Used when both the source and target repo are GroupCHK repos."""
852
891
def __init__(self, from_repository, to_format):
854
893
super(GroupCHKStreamSource, self).__init__(from_repository, to_format)
855
894
self._revision_keys = None
856
895
self._text_keys = None
896
self._text_fetch_order = 'groupcompress'
857
897
self._chk_id_roots = None
858
898
self._chk_p_id_roots = None
898
938
p_id_roots_set.clear()
899
939
return ('inventories', _filtered_inv_stream())
901
def _find_present_inventories(self, revision_ids):
902
revision_keys = [(r,) for r in revision_ids]
903
inventories = self.from_repository.inventories
904
present_inventories = inventories.get_parent_map(revision_keys)
905
return [p[-1] for p in present_inventories]
907
def _get_filtered_chk_streams(self, excluded_revision_ids):
941
def _get_filtered_chk_streams(self, excluded_revision_keys):
908
942
self._text_keys = set()
909
excluded_revision_ids.discard(_mod_revision.NULL_REVISION)
910
if not excluded_revision_ids:
943
excluded_revision_keys.discard(_mod_revision.NULL_REVISION)
944
if not excluded_revision_keys:
911
945
uninteresting_root_keys = set()
912
946
uninteresting_pid_root_keys = set()
915
949
# actually present
916
950
# TODO: Update Repository.iter_inventories() to add
917
951
# ignore_missing=True
918
present_ids = self.from_repository._find_present_inventory_ids(
919
excluded_revision_ids)
920
present_ids = self._find_present_inventories(excluded_revision_ids)
952
present_keys = self.from_repository._find_present_inventory_keys(
953
excluded_revision_keys)
954
present_ids = [k[-1] for k in present_keys]
921
955
uninteresting_root_keys = set()
922
956
uninteresting_pid_root_keys = set()
923
957
for inv in self.from_repository.iter_inventories(present_ids):
948
982
self._chk_p_id_roots = None
949
983
yield 'chk_bytes', _get_parent_id_basename_to_file_id_pages()
951
def _get_text_stream(self):
952
# Note: We know we don't have to handle adding root keys, because both
953
# the source and target are GCCHK, and those always support rich-roots
954
# We may want to request as 'unordered', in case the source has done a
956
return ('texts', self.from_repository.texts.get_record_stream(
957
self._text_keys, 'groupcompress', False))
959
985
def get_stream(self, search):
960
986
revision_ids = search.get_keys()
961
987
for stream_info in self._fetch_revision_texts(revision_ids):
966
992
# For now, exclude all parents that are at the edge of ancestry, for
967
993
# which we have inventories
968
994
from_repo = self.from_repository
969
parent_ids = from_repo._find_parent_ids_of_revisions(revision_ids)
970
for stream_info in self._get_filtered_chk_streams(parent_ids):
995
parent_keys = from_repo._find_parent_keys_of_revisions(
997
for stream_info in self._get_filtered_chk_streams(parent_keys):
971
998
yield stream_info
972
999
yield self._get_text_stream()
991
1018
# no unavailable texts when the ghost inventories are not filled in.
992
1019
yield self._get_inventory_stream(missing_inventory_keys,
993
1020
allow_absent=True)
994
# We use the empty set for excluded_revision_ids, to make it clear that
995
# we want to transmit all referenced chk pages.
1021
# We use the empty set for excluded_revision_keys, to make it clear
1022
# that we want to transmit all referenced chk pages.
996
1023
for stream_info in self._get_filtered_chk_streams(set()):
997
1024
yield stream_info
1066
1093
def get_format_string(self):
1067
1094
"""See RepositoryFormat.get_format_string()."""
1068
1095
return ('Bazaar development format - chk repository with bencode '
1069
'revision serialization (needs bzr.dev from 1.15)\n')
1096
'revision serialization (needs bzr.dev from 1.16)\n')
1099
class RepositoryFormat2a(RepositoryFormatCHK2):
1100
"""A CHK repository that uses the bencode revision serializer.
1102
This is the same as RepositoryFormatCHK2 but with a public name.
1105
_serializer = chk_serializer.chk_bencode_serializer
1107
def _get_matching_bzrdir(self):
1108
return bzrdir.format_registry.make_bzrdir('2a')
1110
def _ignore_setting_bzrdir(self, format):
1113
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
1115
def get_format_string(self):
1116
return ('Bazaar repository format 2a (needs bzr 1.16 or later)\n')