217
216
p_id_roots_set = set()
218
217
stream = source_vf.get_record_stream(keys, 'groupcompress', True)
219
218
for idx, record in enumerate(stream):
219
# Inventories should always be with revisions; assume success.
220
220
bytes = record.get_bytes_as('fulltext')
221
221
chk_inv = inventory.CHKInventory.deserialise(None, bytes,
293
293
stream = source_vf.get_record_stream(cur_keys,
294
294
'as-requested', True)
295
295
for record in stream:
296
if record.storage_kind == 'absent':
297
# An absent CHK record: we assume that the missing
298
# record is in a different pack - e.g. a page not
299
# altered by the commit we're packing.
296
301
bytes = record.get_bytes_as('fulltext')
297
302
# We don't care about search_key_func for this code,
298
303
# because we only care about external references.
438
443
# is grabbing too many keys...
439
444
text_keys = source_vf.keys()
440
445
self._copy_stream(source_vf, target_vf, text_keys,
441
'text', self._get_progress_stream, 4)
446
'texts', self._get_progress_stream, 4)
443
448
def _copy_signature_texts(self):
444
449
source_vf, target_vf = self._build_vfs('signature', False, False)
557
562
pack_factory = GCPack
558
563
resumed_pack_factory = ResumedGCPack
560
def _already_packed(self):
561
"""Is the collection already packed?"""
562
# Always repack GC repositories for now
565
565
def _execute_pack_operations(self, pack_operations,
566
566
_packer_class=GCCHKPacker,
567
567
reload_func=None):
620
620
self.inventories = GroupCompressVersionedFiles(
621
621
_GCGraphIndex(self._pack_collection.inventory_index.combined_index,
622
622
add_callback=self._pack_collection.inventory_index.add_callback,
623
parents=True, is_locked=self.is_locked),
623
parents=True, is_locked=self.is_locked,
624
inconsistency_fatal=False),
624
625
access=self._pack_collection.inventory_index.data_access)
625
626
self.revisions = GroupCompressVersionedFiles(
626
627
_GCGraphIndex(self._pack_collection.revision_index.combined_index,
632
633
self.signatures = GroupCompressVersionedFiles(
633
634
_GCGraphIndex(self._pack_collection.signature_index.combined_index,
634
635
add_callback=self._pack_collection.signature_index.add_callback,
635
parents=False, is_locked=self.is_locked),
636
parents=False, is_locked=self.is_locked,
637
inconsistency_fatal=False),
636
638
access=self._pack_collection.signature_index.data_access,
638
640
self.texts = GroupCompressVersionedFiles(
639
641
_GCGraphIndex(self._pack_collection.text_index.combined_index,
640
642
add_callback=self._pack_collection.text_index.add_callback,
641
parents=True, is_locked=self.is_locked),
643
parents=True, is_locked=self.is_locked,
644
inconsistency_fatal=False),
642
645
access=self._pack_collection.text_index.data_access)
643
646
# No parents, individual CHK pages don't have specific ancestry
644
647
self.chk_bytes = GroupCompressVersionedFiles(
645
648
_GCGraphIndex(self._pack_collection.chk_index.combined_index,
646
649
add_callback=self._pack_collection.chk_index.add_callback,
647
parents=False, is_locked=self.is_locked),
650
parents=False, is_locked=self.is_locked,
651
inconsistency_fatal=False),
648
652
access=self._pack_collection.chk_index.data_access)
653
search_key_name = self._format._serializer.search_key_name
654
search_key_func = chk_map.search_key_registry.get(search_key_name)
655
self.chk_bytes._search_key_func = search_key_func
649
656
# True when the repository object is 'write locked' (as opposed to the
650
657
# physical lock only taken out around changes to the pack-names list.)
651
658
# Another way to represent this would be a decorator around the control
674
681
return self._inventory_add_lines(revision_id, parents,
675
682
inv_lines, check_content=False)
684
def _create_inv_from_null(self, delta, revision_id):
685
"""This will mutate new_inv directly.
687
This is a simplified form of create_by_apply_delta which knows that all
688
the old values must be None, so everything is a create.
690
serializer = self._format._serializer
691
new_inv = inventory.CHKInventory(serializer.search_key_name)
692
new_inv.revision_id = revision_id
693
entry_to_bytes = new_inv._entry_to_bytes
694
id_to_entry_dict = {}
695
parent_id_basename_dict = {}
696
for old_path, new_path, file_id, entry in delta:
697
if old_path is not None:
698
raise ValueError('Invalid delta, somebody tried to delete %r'
699
' from the NULL_REVISION'
700
% ((old_path, file_id),))
702
raise ValueError('Invalid delta, delta from NULL_REVISION has'
703
' no new_path %r' % (file_id,))
705
new_inv.root_id = file_id
706
parent_id_basename_key = ('', '')
708
utf8_entry_name = entry.name.encode('utf-8')
709
parent_id_basename_key = (entry.parent_id, utf8_entry_name)
710
new_value = entry_to_bytes(entry)
712
# new_inv._path_to_fileid_cache[new_path] = file_id
713
id_to_entry_dict[(file_id,)] = new_value
714
parent_id_basename_dict[parent_id_basename_key] = file_id
716
new_inv._populate_from_dicts(self.chk_bytes, id_to_entry_dict,
717
parent_id_basename_dict, maximum_size=serializer.maximum_size)
677
720
def add_inventory_by_delta(self, basis_revision_id, delta, new_revision_id,
678
721
parents, basis_inv=None, propagate_caches=False):
679
722
"""Add a new inventory expressed as a delta against another revision.
699
742
repository format specific) of the serialized inventory, and the
700
743
resulting inventory.
702
if basis_revision_id == _mod_revision.NULL_REVISION:
703
return KnitPackRepository.add_inventory_by_delta(self,
704
basis_revision_id, delta, new_revision_id, parents)
705
745
if not self.is_in_write_group():
706
746
raise AssertionError("%r not in write group" % (self,))
707
747
_mod_revision.check_not_reserved_id(new_revision_id)
708
basis_tree = self.revision_tree(basis_revision_id)
709
basis_tree.lock_read()
711
if basis_inv is None:
749
if basis_inv is None:
750
if basis_revision_id == _mod_revision.NULL_REVISION:
751
new_inv = self._create_inv_from_null(delta, new_revision_id)
752
inv_lines = new_inv.to_lines()
753
return self._inventory_add_lines(new_revision_id, parents,
754
inv_lines, check_content=False), new_inv
756
basis_tree = self.revision_tree(basis_revision_id)
757
basis_tree.lock_read()
712
758
basis_inv = basis_tree.inventory
713
760
result = basis_inv.create_by_apply_delta(delta, new_revision_id,
714
761
propagate_caches=propagate_caches)
715
762
inv_lines = result.to_lines()
716
763
return self._inventory_add_lines(new_revision_id, parents,
717
764
inv_lines, check_content=False), result
766
if basis_tree is not None:
721
769
def _iter_inventories(self, revision_ids):
722
770
"""Iterate over many inventory objects."""
736
784
# make it raise to trap naughty direct users.
737
785
raise NotImplementedError(self._iter_inventory_xmls)
739
def _find_parent_ids_of_revisions(self, revision_ids):
740
# TODO: we probably want to make this a helper that other code can get
742
parent_map = self.get_parent_map(revision_ids)
744
map(parents.update, parent_map.itervalues())
745
parents.difference_update(revision_ids)
746
parents.discard(_mod_revision.NULL_REVISION)
749
def _find_present_inventory_ids(self, revision_ids):
750
keys = [(r,) for r in revision_ids]
751
parent_map = self.inventories.get_parent_map(keys)
752
present_inventory_ids = set(k[-1] for k in parent_map)
753
return present_inventory_ids
787
def _find_present_inventory_keys(self, revision_keys):
788
parent_map = self.inventories.get_parent_map(revision_keys)
789
present_inventory_keys = set(k for k in parent_map)
790
return present_inventory_keys
755
792
def fileids_altered_by_revision_ids(self, revision_ids, _inv_weave=None):
756
793
"""Find the file ids and versions affected by revisions.
767
804
file_id_revisions = {}
768
805
pb = ui.ui_factory.nested_progress_bar()
770
parent_ids = self._find_parent_ids_of_revisions(revision_ids)
771
present_parent_inv_ids = self._find_present_inventory_ids(parent_ids)
807
revision_keys = [(r,) for r in revision_ids]
808
parent_keys = self._find_parent_keys_of_revisions(revision_keys)
809
# TODO: instead of using _find_present_inventory_keys, change the
810
# code paths to allow missing inventories to be tolerated.
811
# However, we only want to tolerate missing parent
812
# inventories, not missing inventories for revision_ids
813
present_parent_inv_keys = self._find_present_inventory_keys(
815
present_parent_inv_ids = set(
816
[k[-1] for k in present_parent_inv_keys])
772
817
uninteresting_root_keys = set()
773
818
interesting_root_keys = set()
774
inventories_to_read = set(present_parent_inv_ids)
775
inventories_to_read.update(revision_ids)
819
inventories_to_read = set(revision_ids)
820
inventories_to_read.update(present_parent_inv_ids)
776
821
for inv in self.iter_inventories(inventories_to_read):
777
822
entry_chk_root_key = inv.id_to_entry.key()
778
823
if inv.revision_id in present_parent_inv_ids:
846
891
return super(CHKInventoryRepository, self)._get_source(to_format)
849
class GroupCHKStreamSource(repository.StreamSource):
894
class GroupCHKStreamSource(KnitPackStreamSource):
850
895
"""Used when both the source and target repo are GroupCHK repos."""
852
897
def __init__(self, from_repository, to_format):
854
899
super(GroupCHKStreamSource, self).__init__(from_repository, to_format)
855
900
self._revision_keys = None
856
901
self._text_keys = None
902
self._text_fetch_order = 'groupcompress'
857
903
self._chk_id_roots = None
858
904
self._chk_p_id_roots = None
898
944
p_id_roots_set.clear()
899
945
return ('inventories', _filtered_inv_stream())
901
def _find_present_inventories(self, revision_ids):
902
revision_keys = [(r,) for r in revision_ids]
903
inventories = self.from_repository.inventories
904
present_inventories = inventories.get_parent_map(revision_keys)
905
return [p[-1] for p in present_inventories]
907
def _get_filtered_chk_streams(self, excluded_revision_ids):
947
def _get_filtered_chk_streams(self, excluded_revision_keys):
908
948
self._text_keys = set()
909
excluded_revision_ids.discard(_mod_revision.NULL_REVISION)
910
if not excluded_revision_ids:
949
excluded_revision_keys.discard(_mod_revision.NULL_REVISION)
950
if not excluded_revision_keys:
911
951
uninteresting_root_keys = set()
912
952
uninteresting_pid_root_keys = set()
915
955
# actually present
916
956
# TODO: Update Repository.iter_inventories() to add
917
957
# ignore_missing=True
918
present_ids = self.from_repository._find_present_inventory_ids(
919
excluded_revision_ids)
920
present_ids = self._find_present_inventories(excluded_revision_ids)
958
present_keys = self.from_repository._find_present_inventory_keys(
959
excluded_revision_keys)
960
present_ids = [k[-1] for k in present_keys]
921
961
uninteresting_root_keys = set()
922
962
uninteresting_pid_root_keys = set()
923
963
for inv in self.from_repository.iter_inventories(present_ids):
948
988
self._chk_p_id_roots = None
949
989
yield 'chk_bytes', _get_parent_id_basename_to_file_id_pages()
951
def _get_text_stream(self):
952
# Note: We know we don't have to handle adding root keys, because both
953
# the source and target are GCCHK, and those always support rich-roots
954
# We may want to request as 'unordered', in case the source has done a
956
return ('texts', self.from_repository.texts.get_record_stream(
957
self._text_keys, 'groupcompress', False))
959
991
def get_stream(self, search):
960
992
revision_ids = search.get_keys()
961
993
for stream_info in self._fetch_revision_texts(revision_ids):
966
998
# For now, exclude all parents that are at the edge of ancestry, for
967
999
# which we have inventories
968
1000
from_repo = self.from_repository
969
parent_ids = from_repo._find_parent_ids_of_revisions(revision_ids)
970
for stream_info in self._get_filtered_chk_streams(parent_ids):
1001
parent_keys = from_repo._find_parent_keys_of_revisions(
1002
self._revision_keys)
1003
for stream_info in self._get_filtered_chk_streams(parent_keys):
971
1004
yield stream_info
972
1005
yield self._get_text_stream()
991
1024
# no unavailable texts when the ghost inventories are not filled in.
992
1025
yield self._get_inventory_stream(missing_inventory_keys,
993
1026
allow_absent=True)
994
# We use the empty set for excluded_revision_ids, to make it clear that
995
# we want to transmit all referenced chk pages.
1027
# We use the empty set for excluded_revision_keys, to make it clear
1028
# that we want to transmit all referenced chk pages.
996
1029
for stream_info in self._get_filtered_chk_streams(set()):
997
1030
yield stream_info
1044
1078
if not target_format.rich_root_data:
1045
1079
raise errors.BadConversionTarget(
1046
1080
'Does not support rich root data.', target_format)
1047
if not getattr(target_format, 'supports_tree_reference', False):
1081
if (self.supports_tree_reference and
1082
not getattr(target_format, 'supports_tree_reference', False)):
1048
1083
raise errors.BadConversionTarget(
1049
1084
'Does not support nested trees', target_format)
1066
1101
def get_format_string(self):
1067
1102
"""See RepositoryFormat.get_format_string()."""
1068
1103
return ('Bazaar development format - chk repository with bencode '
1069
'revision serialization (needs bzr.dev from 1.15)\n')
1104
'revision serialization (needs bzr.dev from 1.16)\n')
1107
class RepositoryFormat2a(RepositoryFormatCHK2):
1108
"""A CHK repository that uses the bencode revision serializer.
1110
This is the same as RepositoryFormatCHK2 but with a public name.
1113
_serializer = chk_serializer.chk_bencode_serializer
1115
def _get_matching_bzrdir(self):
1116
return bzrdir.format_registry.make_bzrdir('2a')
1118
def _ignore_setting_bzrdir(self, format):
1121
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
1123
def get_format_string(self):
1124
return ('Bazaar repository format 2a (needs bzr 1.16 or later)\n')