533
583
return new_pack.data_inserted() and self._data_changed
586
class GCCHKCanonicalizingPacker(GCCHKPacker):
587
"""A packer that ensures inventories have canonical-form CHK maps.
589
Ideally this would be part of reconcile, but it's very slow and rarely
590
needed. (It repairs repositories affected by
591
https://bugs.launchpad.net/bzr/+bug/522637).
594
def __init__(self, *args, **kwargs):
595
super(GCCHKCanonicalizingPacker, self).__init__(*args, **kwargs)
596
self._data_changed = False
598
def _exhaust_stream(self, source_vf, keys, message, vf_to_stream, pb_offset):
599
"""Create and exhaust a stream, but don't insert it.
601
This is useful to get the side-effects of generating a stream.
603
self.pb.update('scanning %s' % (message,), pb_offset)
604
child_pb = ui.ui_factory.nested_progress_bar()
606
list(vf_to_stream(source_vf, keys, message, child_pb))
610
def _copy_inventory_texts(self):
611
source_vf, target_vf = self._build_vfs('inventory', True, True)
612
source_chk_vf, target_chk_vf = self._get_chk_vfs_for_copy()
613
inventory_keys = source_vf.keys()
614
# First, copy the existing CHKs on the assumption that most of them
615
# will be correct. This will save us from having to reinsert (and
616
# recompress) these records later at the cost of perhaps preserving a
618
# (Iterate but don't insert _get_filtered_inv_stream to populate the
619
# variables needed by GCCHKPacker._copy_chk_texts.)
620
self._exhaust_stream(source_vf, inventory_keys, 'inventories',
621
self._get_filtered_inv_stream, 2)
622
GCCHKPacker._copy_chk_texts(self)
623
# Now copy and fix the inventories, and any regenerated CHKs.
624
def chk_canonicalizing_inv_stream(source_vf, keys, message, pb=None):
625
return self._get_filtered_canonicalizing_inv_stream(
626
source_vf, keys, message, pb, source_chk_vf, target_chk_vf)
627
self._copy_stream(source_vf, target_vf, inventory_keys,
628
'inventories', chk_canonicalizing_inv_stream, 4)
630
def _copy_chk_texts(self):
631
# No-op; in this class this happens during _copy_inventory_texts.
634
def _get_filtered_canonicalizing_inv_stream(self, source_vf, keys, message,
635
pb=None, source_chk_vf=None, target_chk_vf=None):
636
"""Filter the texts of inventories, regenerating CHKs to make sure they
639
total_keys = len(keys)
640
target_chk_vf = versionedfile.NoDupeAddLinesDecorator(target_chk_vf)
641
def _filtered_inv_stream():
642
stream = source_vf.get_record_stream(keys, 'groupcompress', True)
643
search_key_name = None
644
for idx, record in enumerate(stream):
645
# Inventories should always be with revisions; assume success.
646
bytes = record.get_bytes_as('fulltext')
647
chk_inv = inventory.CHKInventory.deserialise(
648
source_chk_vf, bytes, record.key)
650
pb.update('inv', idx, total_keys)
651
chk_inv.id_to_entry._ensure_root()
652
if search_key_name is None:
653
# Find the name corresponding to the search_key_func
654
search_key_reg = chk_map.search_key_registry
655
for search_key_name, func in search_key_reg.iteritems():
656
if func == chk_inv.id_to_entry._search_key_func:
658
canonical_inv = inventory.CHKInventory.from_inventory(
659
target_chk_vf, chk_inv,
660
maximum_size=chk_inv.id_to_entry._root_node._maximum_size,
661
search_key_name=search_key_name)
662
if chk_inv.id_to_entry.key() != canonical_inv.id_to_entry.key():
664
'Non-canonical CHK map for id_to_entry of inv: %s '
665
'(root is %s, should be %s)' % (chk_inv.revision_id,
666
chk_inv.id_to_entry.key()[0],
667
canonical_inv.id_to_entry.key()[0]))
668
self._data_changed = True
669
p_id_map = chk_inv.parent_id_basename_to_file_id
670
p_id_map._ensure_root()
671
canon_p_id_map = canonical_inv.parent_id_basename_to_file_id
672
if p_id_map.key() != canon_p_id_map.key():
674
'Non-canonical CHK map for parent_id_to_basename of '
675
'inv: %s (root is %s, should be %s)'
676
% (chk_inv.revision_id, p_id_map.key()[0],
677
canon_p_id_map.key()[0]))
678
self._data_changed = True
679
yield versionedfile.ChunkedContentFactory(record.key,
680
record.parents, record.sha1,
681
canonical_inv.to_lines())
682
# We have finished processing all of the inventory records, we
683
# don't need these sets anymore
684
return _filtered_inv_stream()
686
def _use_pack(self, new_pack):
687
"""Override _use_pack to check for reconcile having changed content."""
688
return new_pack.data_inserted() and self._data_changed
536
691
class GCRepositoryPackCollection(RepositoryPackCollection):
538
693
pack_factory = GCPack
540
def _already_packed(self):
541
"""Is the collection already packed?"""
542
# Always repack GC repositories for now
694
resumed_pack_factory = ResumedGCPack
696
def _check_new_inventories(self):
697
"""Detect missing inventories or chk root entries for the new revisions
700
:returns: list of strs, summarising any problems found. If the list is
701
empty no problems were found.
703
# Ensure that all revisions added in this write group have:
704
# - corresponding inventories,
705
# - chk root entries for those inventories,
706
# - and any present parent inventories have their chk root
708
# And all this should be independent of any fallback repository.
710
key_deps = self.repo.revisions._index._key_dependencies
711
new_revisions_keys = key_deps.get_new_keys()
712
no_fallback_inv_index = self.repo.inventories._index
713
no_fallback_chk_bytes_index = self.repo.chk_bytes._index
714
no_fallback_texts_index = self.repo.texts._index
715
inv_parent_map = no_fallback_inv_index.get_parent_map(
717
# Are any inventories for corresponding to the new revisions missing?
718
corresponding_invs = set(inv_parent_map)
719
missing_corresponding = set(new_revisions_keys)
720
missing_corresponding.difference_update(corresponding_invs)
721
if missing_corresponding:
722
problems.append("inventories missing for revisions %s" %
723
(sorted(missing_corresponding),))
725
# Are any chk root entries missing for any inventories? This includes
726
# any present parent inventories, which may be used when calculating
727
# deltas for streaming.
728
all_inv_keys = set(corresponding_invs)
729
for parent_inv_keys in inv_parent_map.itervalues():
730
all_inv_keys.update(parent_inv_keys)
731
# Filter out ghost parents.
732
all_inv_keys.intersection_update(
733
no_fallback_inv_index.get_parent_map(all_inv_keys))
734
parent_invs_only_keys = all_inv_keys.symmetric_difference(
737
inv_ids = [key[-1] for key in all_inv_keys]
738
parent_invs_only_ids = [key[-1] for key in parent_invs_only_keys]
739
root_key_info = _build_interesting_key_sets(
740
self.repo, inv_ids, parent_invs_only_ids)
741
expected_chk_roots = root_key_info.all_keys()
742
present_chk_roots = no_fallback_chk_bytes_index.get_parent_map(
744
missing_chk_roots = expected_chk_roots.difference(present_chk_roots)
745
if missing_chk_roots:
746
problems.append("missing referenced chk root keys: %s"
747
% (sorted(missing_chk_roots),))
748
# Don't bother checking any further.
750
# Find all interesting chk_bytes records, and make sure they are
751
# present, as well as the text keys they reference.
752
chk_bytes_no_fallbacks = self.repo.chk_bytes.without_fallbacks()
753
chk_bytes_no_fallbacks._search_key_func = \
754
self.repo.chk_bytes._search_key_func
755
chk_diff = chk_map.iter_interesting_nodes(
756
chk_bytes_no_fallbacks, root_key_info.interesting_root_keys,
757
root_key_info.uninteresting_root_keys)
760
for record in _filter_text_keys(chk_diff, text_keys,
761
chk_map._bytes_to_text_key):
763
except errors.NoSuchRevision, e:
764
# XXX: It would be nice if we could give a more precise error here.
765
problems.append("missing chk node(s) for id_to_entry maps")
766
chk_diff = chk_map.iter_interesting_nodes(
767
chk_bytes_no_fallbacks, root_key_info.interesting_pid_root_keys,
768
root_key_info.uninteresting_pid_root_keys)
770
for interesting_rec, interesting_map in chk_diff:
772
except errors.NoSuchRevision, e:
774
"missing chk node(s) for parent_id_basename_to_file_id maps")
775
present_text_keys = no_fallback_texts_index.get_parent_map(text_keys)
776
missing_text_keys = text_keys.difference(present_text_keys)
777
if missing_text_keys:
778
problems.append("missing text keys: %r"
779
% (sorted(missing_text_keys),))
545
782
def _execute_pack_operations(self, pack_operations,
546
783
_packer_class=GCCHKPacker,
569
806
if packer.new_pack is not None:
570
807
packer.new_pack.abort()
572
811
for pack in packs:
573
812
self._remove_pack_from_memory(pack)
574
813
# record the newly available packs and stop advertising the old
576
self._save_pack_names(clear_obsolete_packs=True)
577
# Move the old packs out of the way now they are no longer referenced.
578
for revision_count, packs in pack_operations:
579
self._obsolete_packs(packs)
582
# XXX: This format is scheduled for termination
584
# class GCPackRepository(KnitPackRepository):
585
# """GC customisation of KnitPackRepository."""
587
# def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
589
# """Overridden to change pack collection class."""
590
# KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
591
# _commit_builder_class, _serializer)
592
# # and now replace everything it did :)
593
# index_transport = self._transport.clone('indices')
594
# self._pack_collection = GCRepositoryPackCollection(self,
595
# self._transport, index_transport,
596
# self._transport.clone('upload'),
597
# self._transport.clone('packs'),
598
# _format.index_builder_class,
599
# _format.index_class,
600
# use_chk_index=self._format.supports_chks,
602
# self.inventories = GroupCompressVersionedFiles(
603
# _GCGraphIndex(self._pack_collection.inventory_index.combined_index,
604
# add_callback=self._pack_collection.inventory_index.add_callback,
605
# parents=True, is_locked=self.is_locked),
606
# access=self._pack_collection.inventory_index.data_access)
607
# self.revisions = GroupCompressVersionedFiles(
608
# _GCGraphIndex(self._pack_collection.revision_index.combined_index,
609
# add_callback=self._pack_collection.revision_index.add_callback,
610
# parents=True, is_locked=self.is_locked),
611
# access=self._pack_collection.revision_index.data_access,
613
# self.signatures = GroupCompressVersionedFiles(
614
# _GCGraphIndex(self._pack_collection.signature_index.combined_index,
615
# add_callback=self._pack_collection.signature_index.add_callback,
616
# parents=False, is_locked=self.is_locked),
617
# access=self._pack_collection.signature_index.data_access,
619
# self.texts = GroupCompressVersionedFiles(
620
# _GCGraphIndex(self._pack_collection.text_index.combined_index,
621
# add_callback=self._pack_collection.text_index.add_callback,
622
# parents=True, is_locked=self.is_locked),
623
# access=self._pack_collection.text_index.data_access)
624
# if _format.supports_chks:
625
# # No graph, no compression:- references from chks are between
626
# # different objects not temporal versions of the same; and without
627
# # some sort of temporal structure knit compression will just fail.
628
# self.chk_bytes = GroupCompressVersionedFiles(
629
# _GCGraphIndex(self._pack_collection.chk_index.combined_index,
630
# add_callback=self._pack_collection.chk_index.add_callback,
631
# parents=False, is_locked=self.is_locked),
632
# access=self._pack_collection.chk_index.data_access)
634
# self.chk_bytes = None
635
# # True when the repository object is 'write locked' (as opposed to the
636
# # physical lock only taken out around changes to the pack-names list.)
637
# # Another way to represent this would be a decorator around the control
638
# # files object that presents logical locks as physical ones - if this
639
# # gets ugly consider that alternative design. RBC 20071011
640
# self._write_lock_count = 0
641
# self._transaction = None
643
# self._reconcile_does_inventory_gc = True
644
# self._reconcile_fixes_text_parents = True
645
# self._reconcile_backsup_inventory = False
647
# def suspend_write_group(self):
648
# raise errors.UnsuspendableWriteGroup(self)
650
# def _resume_write_group(self, tokens):
651
# raise errors.UnsuspendableWriteGroup(self)
653
# def _reconcile_pack(self, collection, packs, extension, revs, pb):
655
# return packer.pack(pb)
658
class GCCHKPackRepository(CHKInventoryRepository):
659
"""GC customisation of CHKInventoryRepository."""
816
for _, packs in pack_operations:
817
to_be_obsoleted.extend(packs)
818
result = self._save_pack_names(clear_obsolete_packs=True,
819
obsolete_packs=to_be_obsoleted)
823
class CHKInventoryRepository(KnitPackRepository):
824
"""subclass of KnitPackRepository that uses CHK based inventories."""
661
826
def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
713
886
self._reconcile_fixes_text_parents = True
714
887
self._reconcile_backsup_inventory = False
716
def suspend_write_group(self):
717
raise errors.UnsuspendableWriteGroup(self)
719
def _resume_write_group(self, tokens):
720
raise errors.UnsuspendableWriteGroup(self)
889
def _add_inventory_checked(self, revision_id, inv, parents):
890
"""Add inv to the repository after checking the inputs.
892
This function can be overridden to allow different inventory styles.
894
:seealso: add_inventory, for the contract.
897
serializer = self._format._serializer
898
result = inventory.CHKInventory.from_inventory(self.chk_bytes, inv,
899
maximum_size=serializer.maximum_size,
900
search_key_name=serializer.search_key_name)
901
inv_lines = result.to_lines()
902
return self._inventory_add_lines(revision_id, parents,
903
inv_lines, check_content=False)
905
def _create_inv_from_null(self, delta, revision_id):
906
"""This will mutate new_inv directly.
908
This is a simplified form of create_by_apply_delta which knows that all
909
the old values must be None, so everything is a create.
911
serializer = self._format._serializer
912
new_inv = inventory.CHKInventory(serializer.search_key_name)
913
new_inv.revision_id = revision_id
914
entry_to_bytes = new_inv._entry_to_bytes
915
id_to_entry_dict = {}
916
parent_id_basename_dict = {}
917
for old_path, new_path, file_id, entry in delta:
918
if old_path is not None:
919
raise ValueError('Invalid delta, somebody tried to delete %r'
920
' from the NULL_REVISION'
921
% ((old_path, file_id),))
923
raise ValueError('Invalid delta, delta from NULL_REVISION has'
924
' no new_path %r' % (file_id,))
926
new_inv.root_id = file_id
927
parent_id_basename_key = StaticTuple('', '').intern()
929
utf8_entry_name = entry.name.encode('utf-8')
930
parent_id_basename_key = StaticTuple(entry.parent_id,
931
utf8_entry_name).intern()
932
new_value = entry_to_bytes(entry)
934
# new_inv._path_to_fileid_cache[new_path] = file_id
935
key = StaticTuple(file_id).intern()
936
id_to_entry_dict[key] = new_value
937
parent_id_basename_dict[parent_id_basename_key] = file_id
939
new_inv._populate_from_dicts(self.chk_bytes, id_to_entry_dict,
940
parent_id_basename_dict, maximum_size=serializer.maximum_size)
943
def add_inventory_by_delta(self, basis_revision_id, delta, new_revision_id,
944
parents, basis_inv=None, propagate_caches=False):
945
"""Add a new inventory expressed as a delta against another revision.
947
:param basis_revision_id: The inventory id the delta was created
949
:param delta: The inventory delta (see Inventory.apply_delta for
951
:param new_revision_id: The revision id that the inventory is being
953
:param parents: The revision ids of the parents that revision_id is
954
known to have and are in the repository already. These are supplied
955
for repositories that depend on the inventory graph for revision
956
graph access, as well as for those that pun ancestry with delta
958
:param basis_inv: The basis inventory if it is already known,
960
:param propagate_caches: If True, the caches for this inventory are
961
copied to and updated for the result if possible.
963
:returns: (validator, new_inv)
964
The validator(which is a sha1 digest, though what is sha'd is
965
repository format specific) of the serialized inventory, and the
968
if not self.is_in_write_group():
969
raise AssertionError("%r not in write group" % (self,))
970
_mod_revision.check_not_reserved_id(new_revision_id)
972
if basis_inv is None:
973
if basis_revision_id == _mod_revision.NULL_REVISION:
974
new_inv = self._create_inv_from_null(delta, new_revision_id)
975
if new_inv.root_id is None:
976
raise errors.RootMissing()
977
inv_lines = new_inv.to_lines()
978
return self._inventory_add_lines(new_revision_id, parents,
979
inv_lines, check_content=False), new_inv
981
basis_tree = self.revision_tree(basis_revision_id)
982
basis_tree.lock_read()
983
basis_inv = basis_tree.inventory
985
result = basis_inv.create_by_apply_delta(delta, new_revision_id,
986
propagate_caches=propagate_caches)
987
inv_lines = result.to_lines()
988
return self._inventory_add_lines(new_revision_id, parents,
989
inv_lines, check_content=False), result
991
if basis_tree is not None:
994
def _deserialise_inventory(self, revision_id, bytes):
995
return inventory.CHKInventory.deserialise(self.chk_bytes, bytes,
998
def _iter_inventories(self, revision_ids, ordering):
999
"""Iterate over many inventory objects."""
1000
if ordering is None:
1001
ordering = 'unordered'
1002
keys = [(revision_id,) for revision_id in revision_ids]
1003
stream = self.inventories.get_record_stream(keys, ordering, True)
1005
for record in stream:
1006
if record.storage_kind != 'absent':
1007
texts[record.key] = record.get_bytes_as('fulltext')
1009
raise errors.NoSuchRevision(self, record.key)
1011
yield inventory.CHKInventory.deserialise(self.chk_bytes, texts[key], key)
1013
def _iter_inventory_xmls(self, revision_ids, ordering):
1014
# Without a native 'xml' inventory, this method doesn't make sense.
1015
# However older working trees, and older bundles want it - so we supply
1016
# it allowing _get_inventory_xml to work. Bundles currently use the
1017
# serializer directly; this also isn't ideal, but there isn't an xml
1018
# iteration interface offered at all for repositories. We could make
1019
# _iter_inventory_xmls be part of the contract, even if kept private.
1020
inv_to_str = self._serializer.write_inventory_to_string
1021
for inv in self.iter_inventories(revision_ids, ordering=ordering):
1022
yield inv_to_str(inv), inv.revision_id
1024
def _find_present_inventory_keys(self, revision_keys):
1025
parent_map = self.inventories.get_parent_map(revision_keys)
1026
present_inventory_keys = set(k for k in parent_map)
1027
return present_inventory_keys
1029
def fileids_altered_by_revision_ids(self, revision_ids, _inv_weave=None):
1030
"""Find the file ids and versions affected by revisions.
1032
:param revisions: an iterable containing revision ids.
1033
:param _inv_weave: The inventory weave from this repository or None.
1034
If None, the inventory weave will be opened automatically.
1035
:return: a dictionary mapping altered file-ids to an iterable of
1036
revision_ids. Each altered file-ids has the exact revision_ids that
1037
altered it listed explicitly.
1039
rich_root = self.supports_rich_root()
1040
bytes_to_info = inventory.CHKInventory._bytes_to_utf8name_key
1041
file_id_revisions = {}
1042
pb = ui.ui_factory.nested_progress_bar()
1044
revision_keys = [(r,) for r in revision_ids]
1045
parent_keys = self._find_parent_keys_of_revisions(revision_keys)
1046
# TODO: instead of using _find_present_inventory_keys, change the
1047
# code paths to allow missing inventories to be tolerated.
1048
# However, we only want to tolerate missing parent
1049
# inventories, not missing inventories for revision_ids
1050
present_parent_inv_keys = self._find_present_inventory_keys(
1052
present_parent_inv_ids = set(
1053
[k[-1] for k in present_parent_inv_keys])
1054
inventories_to_read = set(revision_ids)
1055
inventories_to_read.update(present_parent_inv_ids)
1056
root_key_info = _build_interesting_key_sets(
1057
self, inventories_to_read, present_parent_inv_ids)
1058
interesting_root_keys = root_key_info.interesting_root_keys
1059
uninteresting_root_keys = root_key_info.uninteresting_root_keys
1060
chk_bytes = self.chk_bytes
1061
for record, items in chk_map.iter_interesting_nodes(chk_bytes,
1062
interesting_root_keys, uninteresting_root_keys,
1064
for name, bytes in items:
1065
(name_utf8, file_id, revision_id) = bytes_to_info(bytes)
1066
# TODO: consider interning file_id, revision_id here, or
1067
# pushing that intern() into bytes_to_info()
1068
# TODO: rich_root should always be True here, for all
1069
# repositories that support chk_bytes
1070
if not rich_root and name_utf8 == '':
1073
file_id_revisions[file_id].add(revision_id)
1075
file_id_revisions[file_id] = set([revision_id])
1078
return file_id_revisions
1080
def find_text_key_references(self):
1081
"""Find the text key references within the repository.
1083
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
1084
to whether they were referred to by the inventory of the
1085
revision_id that they contain. The inventory texts from all present
1086
revision ids are assessed to generate this report.
1088
# XXX: Slow version but correct: rewrite as a series of delta
1089
# examinations/direct tree traversal. Note that that will require care
1090
# as a common node is reachable both from the inventory that added it,
1091
# and others afterwards.
1092
revision_keys = self.revisions.keys()
1094
rich_roots = self.supports_rich_root()
1095
pb = ui.ui_factory.nested_progress_bar()
1097
all_revs = self.all_revision_ids()
1098
total = len(all_revs)
1099
for pos, inv in enumerate(self.iter_inventories(all_revs)):
1100
pb.update("Finding text references", pos, total)
1101
for _, entry in inv.iter_entries():
1102
if not rich_roots and entry.file_id == inv.root_id:
1104
key = (entry.file_id, entry.revision)
1105
result.setdefault(key, False)
1106
if entry.revision == inv.revision_id:
1113
def reconcile_canonicalize_chks(self):
1114
"""Reconcile this repository to make sure all CHKs are in canonical
1117
from bzrlib.reconcile import PackReconciler
1118
reconciler = PackReconciler(self, thorough=True, canonicalize_chks=True)
1119
reconciler.reconcile()
722
1122
def _reconcile_pack(self, collection, packs, extension, revs, pb):
723
# assert revs is None
724
1123
packer = GCCHKReconcilePacker(collection, packs, extension)
725
1124
return packer.pack(pb)
728
class RepositoryFormatPackGCCHK16(RepositoryFormatPackDevelopment5Hash16):
1126
def _canonicalize_chks_pack(self, collection, packs, extension, revs, pb):
1127
packer = GCCHKCanonicalizingPacker(collection, packs, extension, revs)
1128
return packer.pack(pb)
1130
def _get_source(self, to_format):
1131
"""Return a source for streaming from this repository."""
1132
if self._format._serializer == to_format._serializer:
1133
# We must be exactly the same format, otherwise stuff like the chk
1134
# page layout might be different.
1135
# Actually, this test is just slightly looser than exact so that
1136
# CHK2 <-> 2a transfers will work.
1137
return GroupCHKStreamSource(self, to_format)
1138
return super(CHKInventoryRepository, self)._get_source(to_format)
1141
class GroupCHKStreamSource(KnitPackStreamSource):
1142
"""Used when both the source and target repo are GroupCHK repos."""
1144
def __init__(self, from_repository, to_format):
1145
"""Create a StreamSource streaming from from_repository."""
1146
super(GroupCHKStreamSource, self).__init__(from_repository, to_format)
1147
self._revision_keys = None
1148
self._text_keys = None
1149
self._text_fetch_order = 'groupcompress'
1150
self._chk_id_roots = None
1151
self._chk_p_id_roots = None
1153
def _get_inventory_stream(self, inventory_keys, allow_absent=False):
1154
"""Get a stream of inventory texts.
1156
When this function returns, self._chk_id_roots and self._chk_p_id_roots
1157
should be populated.
1159
self._chk_id_roots = []
1160
self._chk_p_id_roots = []
1161
def _filtered_inv_stream():
1162
id_roots_set = set()
1163
p_id_roots_set = set()
1164
source_vf = self.from_repository.inventories
1165
stream = source_vf.get_record_stream(inventory_keys,
1166
'groupcompress', True)
1167
for record in stream:
1168
if record.storage_kind == 'absent':
1172
raise errors.NoSuchRevision(self, record.key)
1173
bytes = record.get_bytes_as('fulltext')
1174
chk_inv = inventory.CHKInventory.deserialise(None, bytes,
1176
key = chk_inv.id_to_entry.key()
1177
if key not in id_roots_set:
1178
self._chk_id_roots.append(key)
1179
id_roots_set.add(key)
1180
p_id_map = chk_inv.parent_id_basename_to_file_id
1181
if p_id_map is None:
1182
raise AssertionError('Parent id -> file_id map not set')
1183
key = p_id_map.key()
1184
if key not in p_id_roots_set:
1185
p_id_roots_set.add(key)
1186
self._chk_p_id_roots.append(key)
1188
# We have finished processing all of the inventory records, we
1189
# don't need these sets anymore
1190
id_roots_set.clear()
1191
p_id_roots_set.clear()
1192
return ('inventories', _filtered_inv_stream())
1194
def _get_filtered_chk_streams(self, excluded_revision_keys):
1195
self._text_keys = set()
1196
excluded_revision_keys.discard(_mod_revision.NULL_REVISION)
1197
if not excluded_revision_keys:
1198
uninteresting_root_keys = set()
1199
uninteresting_pid_root_keys = set()
1201
# filter out any excluded revisions whose inventories are not
1203
# TODO: Update Repository.iter_inventories() to add
1204
# ignore_missing=True
1205
present_keys = self.from_repository._find_present_inventory_keys(
1206
excluded_revision_keys)
1207
present_ids = [k[-1] for k in present_keys]
1208
uninteresting_root_keys = set()
1209
uninteresting_pid_root_keys = set()
1210
for inv in self.from_repository.iter_inventories(present_ids):
1211
uninteresting_root_keys.add(inv.id_to_entry.key())
1212
uninteresting_pid_root_keys.add(
1213
inv.parent_id_basename_to_file_id.key())
1214
chk_bytes = self.from_repository.chk_bytes
1215
def _filter_id_to_entry():
1216
interesting_nodes = chk_map.iter_interesting_nodes(chk_bytes,
1217
self._chk_id_roots, uninteresting_root_keys)
1218
for record in _filter_text_keys(interesting_nodes, self._text_keys,
1219
chk_map._bytes_to_text_key):
1220
if record is not None:
1223
self._chk_id_roots = None
1224
yield 'chk_bytes', _filter_id_to_entry()
1225
def _get_parent_id_basename_to_file_id_pages():
1226
for record, items in chk_map.iter_interesting_nodes(chk_bytes,
1227
self._chk_p_id_roots, uninteresting_pid_root_keys):
1228
if record is not None:
1231
self._chk_p_id_roots = None
1232
yield 'chk_bytes', _get_parent_id_basename_to_file_id_pages()
1234
def get_stream(self, search):
1235
def wrap_and_count(pb, rc, stream):
1236
"""Yield records from stream while showing progress."""
1238
for record in stream:
1239
if count == rc.STEP:
1241
pb.update('Estimate', rc.current, rc.max)
1246
revision_ids = search.get_keys()
1247
pb = ui.ui_factory.nested_progress_bar()
1248
rc = self._record_counter
1249
self._record_counter.setup(len(revision_ids))
1250
for stream_info in self._fetch_revision_texts(revision_ids):
1251
yield (stream_info[0],
1252
wrap_and_count(pb, rc, stream_info[1]))
1253
self._revision_keys = [(rev_id,) for rev_id in revision_ids]
1254
self.from_repository.revisions.clear_cache()
1255
self.from_repository.signatures.clear_cache()
1256
s = self._get_inventory_stream(self._revision_keys)
1257
yield (s[0], wrap_and_count(pb, rc, s[1]))
1258
self.from_repository.inventories.clear_cache()
1259
# TODO: The keys to exclude might be part of the search recipe
1260
# For now, exclude all parents that are at the edge of ancestry, for
1261
# which we have inventories
1262
from_repo = self.from_repository
1263
parent_keys = from_repo._find_parent_keys_of_revisions(
1264
self._revision_keys)
1265
for stream_info in self._get_filtered_chk_streams(parent_keys):
1266
yield (stream_info[0], wrap_and_count(pb, rc, stream_info[1]))
1267
self.from_repository.chk_bytes.clear_cache()
1268
s = self._get_text_stream()
1269
yield (s[0], wrap_and_count(pb, rc, s[1]))
1270
self.from_repository.texts.clear_cache()
1271
pb.update('Done', rc.max, rc.max)
1274
def get_stream_for_missing_keys(self, missing_keys):
1275
# missing keys can only occur when we are byte copying and not
1276
# translating (because translation means we don't send
1277
# unreconstructable deltas ever).
1278
missing_inventory_keys = set()
1279
for key in missing_keys:
1280
if key[0] != 'inventories':
1281
raise AssertionError('The only missing keys we should'
1282
' be filling in are inventory keys, not %s'
1284
missing_inventory_keys.add(key[1:])
1285
if self._chk_id_roots or self._chk_p_id_roots:
1286
raise AssertionError('Cannot call get_stream_for_missing_keys'
1287
' until all of get_stream() has been consumed.')
1288
# Yield the inventory stream, so we can find the chk stream
1289
# Some of the missing_keys will be missing because they are ghosts.
1290
# As such, we can ignore them. The Sink is required to verify there are
1291
# no unavailable texts when the ghost inventories are not filled in.
1292
yield self._get_inventory_stream(missing_inventory_keys,
1294
# We use the empty set for excluded_revision_keys, to make it clear
1295
# that we want to transmit all referenced chk pages.
1296
for stream_info in self._get_filtered_chk_streams(set()):
1300
class _InterestingKeyInfo(object):
1302
self.interesting_root_keys = set()
1303
self.interesting_pid_root_keys = set()
1304
self.uninteresting_root_keys = set()
1305
self.uninteresting_pid_root_keys = set()
1307
def all_interesting(self):
1308
return self.interesting_root_keys.union(self.interesting_pid_root_keys)
1310
def all_uninteresting(self):
1311
return self.uninteresting_root_keys.union(
1312
self.uninteresting_pid_root_keys)
1315
return self.all_interesting().union(self.all_uninteresting())
1318
def _build_interesting_key_sets(repo, inventory_ids, parent_only_inv_ids):
1319
result = _InterestingKeyInfo()
1320
for inv in repo.iter_inventories(inventory_ids, 'unordered'):
1321
root_key = inv.id_to_entry.key()
1322
pid_root_key = inv.parent_id_basename_to_file_id.key()
1323
if inv.revision_id in parent_only_inv_ids:
1324
result.uninteresting_root_keys.add(root_key)
1325
result.uninteresting_pid_root_keys.add(pid_root_key)
1327
result.interesting_root_keys.add(root_key)
1328
result.interesting_pid_root_keys.add(pid_root_key)
1332
def _filter_text_keys(interesting_nodes_iterable, text_keys, bytes_to_text_key):
1333
"""Iterate the result of iter_interesting_nodes, yielding the records
1334
and adding to text_keys.
1336
text_keys_update = text_keys.update
1337
for record, items in interesting_nodes_iterable:
1338
text_keys_update([bytes_to_text_key(b) for n,b in items])
1344
class RepositoryFormatCHK1(RepositoryFormatPack):
729
1345
"""A hashed CHK+group compress pack repository."""
731
repository_class = GCCHKPackRepository
732
_commit_builder_class = PackRootCommitBuilder
733
rich_root_data = True
1347
repository_class = CHKInventoryRepository
734
1348
supports_external_lookups = True
735
supports_tree_reference = True
737
# Note: We cannot unpack a delta that references a text we haven't
738
# seen yet. There are 2 options, work in fulltexts, or require
739
# topological sorting. Using fulltexts is more optimal for local
740
# operations, because the source can be smart about extracting
741
# multiple in-a-row (and sharing strings). Topological is better
742
# for remote, because we access less data.
743
_fetch_order = 'unordered'
744
_fetch_uses_deltas = False
746
def _get_matching_bzrdir(self):
747
return bzrdir.format_registry.make_bzrdir('gc-chk16')
749
def _ignore_setting_bzrdir(self, format):
752
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
754
def get_format_string(self):
755
"""See RepositoryFormat.get_format_string()."""
756
return ('Bazaar development format - hash16chk+gc rich-root'
757
' (needs bzr.dev from 1.14)\n')
759
def get_format_description(self):
760
"""See RepositoryFormat.get_format_description()."""
761
return ("Development repository format - hash16chk+groupcompress")
763
def check_conversion_target(self, target_format):
764
if not target_format.rich_root_data:
765
raise errors.BadConversionTarget(
766
'Does not support rich root data.', target_format)
767
if not getattr(target_format, 'supports_tree_reference', False):
768
raise errors.BadConversionTarget(
769
'Does not support nested trees', target_format)
772
class RepositoryFormatPackGCCHK255(RepositoryFormatPackDevelopment5Hash255):
773
"""A hashed CHK+group compress pack repository."""
775
repository_class = GCCHKPackRepository
777
# Setting this to True causes us to use InterModel1And2, so for now set
778
# it to False which uses InterDifferingSerializer. When IM1&2 is
779
# removed (as it is in bzr.dev) we can set this back to True.
780
_commit_builder_class = PackRootCommitBuilder
781
rich_root_data = True
783
def _get_matching_bzrdir(self):
784
return bzrdir.format_registry.make_bzrdir('gc-chk255')
786
def _ignore_setting_bzrdir(self, format):
789
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
791
def get_format_string(self):
792
"""See RepositoryFormat.get_format_string()."""
793
return ('Bazaar development format - hash255chk+gc rich-root'
794
' (needs bzr.dev from 1.14)\n')
796
def get_format_description(self):
797
"""See RepositoryFormat.get_format_description()."""
798
return ("Development repository format - hash255chk+groupcompress")
800
def check_conversion_target(self, target_format):
801
if not target_format.rich_root_data:
802
raise errors.BadConversionTarget(
803
'Does not support rich root data.', target_format)
804
if not getattr(target_format, 'supports_tree_reference', False):
805
raise errors.BadConversionTarget(
806
'Does not support nested trees', target_format)
809
class RepositoryFormatPackGCCHK255Big(RepositoryFormatPackGCCHK255):
810
"""A hashed CHK+group compress pack repository."""
812
repository_class = GCCHKPackRepository
813
1349
supports_chks = True
814
1350
# For right now, setting this to True gives us InterModel1And2 rather
815
1351
# than InterDifferingSerializer
816
1352
_commit_builder_class = PackRootCommitBuilder
817
1353
rich_root_data = True
818
1354
_serializer = chk_serializer.chk_serializer_255_bigpage
1355
_commit_inv_deltas = True
1356
# What index classes to use
1357
index_builder_class = BTreeBuilder
1358
index_class = BTreeGraphIndex
819
1359
# Note: We cannot unpack a delta that references a text we haven't
820
1360
# seen yet. There are 2 options, work in fulltexts, or require
821
1361
# topological sorting. Using fulltexts is more optimal for local