~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/repofmt/groupcompress_repo.py

  • Committer: Canonical.com Patch Queue Manager
  • Date: 2009-12-03 05:08:05 UTC
  • mfrom: (4854.1.1 merge-2.0-into-devel)
  • Revision ID: pqm@pqm.ubuntu.com-20091203050805-s70sybyrsrbbs10c
(andrew) Merge lp:bzr/2.0 into lp:bzr.

Show diffs side-by-side

added added

removed removed

Lines of Context:
53
53
    ResumedPack,
54
54
    Packer,
55
55
    )
 
56
from bzrlib.static_tuple import StaticTuple
56
57
 
57
58
 
58
59
class GCPack(NewPack):
584
585
    pack_factory = GCPack
585
586
    resumed_pack_factory = ResumedGCPack
586
587
 
 
588
    def _check_new_inventories(self):
 
589
        """Detect missing inventories or chk root entries for the new revisions
 
590
        in this write group.
 
591
 
 
592
        :returns: list of strs, summarising any problems found.  If the list is
 
593
            empty no problems were found.
 
594
        """
 
595
        # Ensure that all revisions added in this write group have:
 
596
        #   - corresponding inventories,
 
597
        #   - chk root entries for those inventories,
 
598
        #   - and any present parent inventories have their chk root
 
599
        #     entries too.
 
600
        # And all this should be independent of any fallback repository.
 
601
        problems = []
 
602
        key_deps = self.repo.revisions._index._key_dependencies
 
603
        new_revisions_keys = key_deps.get_new_keys()
 
604
        no_fallback_inv_index = self.repo.inventories._index
 
605
        no_fallback_chk_bytes_index = self.repo.chk_bytes._index
 
606
        no_fallback_texts_index = self.repo.texts._index
 
607
        inv_parent_map = no_fallback_inv_index.get_parent_map(
 
608
            new_revisions_keys)
 
609
        # Are any inventories for corresponding to the new revisions missing?
 
610
        corresponding_invs = set(inv_parent_map)
 
611
        missing_corresponding = set(new_revisions_keys)
 
612
        missing_corresponding.difference_update(corresponding_invs)
 
613
        if missing_corresponding:
 
614
            problems.append("inventories missing for revisions %s" %
 
615
                (sorted(missing_corresponding),))
 
616
            return problems
 
617
        # Are any chk root entries missing for any inventories?  This includes
 
618
        # any present parent inventories, which may be used when calculating
 
619
        # deltas for streaming.
 
620
        all_inv_keys = set(corresponding_invs)
 
621
        for parent_inv_keys in inv_parent_map.itervalues():
 
622
            all_inv_keys.update(parent_inv_keys)
 
623
        # Filter out ghost parents.
 
624
        all_inv_keys.intersection_update(
 
625
            no_fallback_inv_index.get_parent_map(all_inv_keys))
 
626
        parent_invs_only_keys = all_inv_keys.symmetric_difference(
 
627
            corresponding_invs)
 
628
        all_missing = set()
 
629
        inv_ids = [key[-1] for key in all_inv_keys]
 
630
        parent_invs_only_ids = [key[-1] for key in parent_invs_only_keys]
 
631
        root_key_info = _build_interesting_key_sets(
 
632
            self.repo, inv_ids, parent_invs_only_ids)
 
633
        expected_chk_roots = root_key_info.all_keys()
 
634
        present_chk_roots = no_fallback_chk_bytes_index.get_parent_map(
 
635
            expected_chk_roots)
 
636
        missing_chk_roots = expected_chk_roots.difference(present_chk_roots)
 
637
        if missing_chk_roots:
 
638
            problems.append("missing referenced chk root keys: %s"
 
639
                % (sorted(missing_chk_roots),))
 
640
            # Don't bother checking any further.
 
641
            return problems
 
642
        # Find all interesting chk_bytes records, and make sure they are
 
643
        # present, as well as the text keys they reference.
 
644
        chk_bytes_no_fallbacks = self.repo.chk_bytes.without_fallbacks()
 
645
        chk_bytes_no_fallbacks._search_key_func = \
 
646
            self.repo.chk_bytes._search_key_func
 
647
        chk_diff = chk_map.iter_interesting_nodes(
 
648
            chk_bytes_no_fallbacks, root_key_info.interesting_root_keys,
 
649
            root_key_info.uninteresting_root_keys)
 
650
        bytes_to_info = inventory.CHKInventory._bytes_to_utf8name_key
 
651
        text_keys = set()
 
652
        try:
 
653
            for record in _filter_text_keys(chk_diff, text_keys, bytes_to_info):
 
654
                pass
 
655
        except errors.NoSuchRevision, e:
 
656
            # XXX: It would be nice if we could give a more precise error here.
 
657
            problems.append("missing chk node(s) for id_to_entry maps")
 
658
        chk_diff = chk_map.iter_interesting_nodes(
 
659
            chk_bytes_no_fallbacks, root_key_info.interesting_pid_root_keys,
 
660
            root_key_info.uninteresting_pid_root_keys)
 
661
        try:
 
662
            for interesting_rec, interesting_map in chk_diff:
 
663
                pass
 
664
        except errors.NoSuchRevision, e:
 
665
            problems.append(
 
666
                "missing chk node(s) for parent_id_basename_to_file_id maps")
 
667
        present_text_keys = no_fallback_texts_index.get_parent_map(text_keys)
 
668
        missing_text_keys = text_keys.difference(present_text_keys)
 
669
        if missing_text_keys:
 
670
            problems.append("missing text keys: %r"
 
671
                % (sorted(missing_text_keys),))
 
672
        return problems
 
673
 
587
674
    def _execute_pack_operations(self, pack_operations,
588
675
                                 _packer_class=GCCHKPacker,
589
676
                                 reload_func=None):
617
704
                self._remove_pack_from_memory(pack)
618
705
        # record the newly available packs and stop advertising the old
619
706
        # packs
620
 
        self._save_pack_names(clear_obsolete_packs=True)
 
707
        result = self._save_pack_names(clear_obsolete_packs=True)
621
708
        # Move the old packs out of the way now they are no longer referenced.
622
709
        for revision_count, packs in pack_operations:
623
710
            self._obsolete_packs(packs)
 
711
        return result
624
712
 
625
713
 
626
714
class CHKInventoryRepository(KnitPackRepository):
651
739
            _GCGraphIndex(self._pack_collection.revision_index.combined_index,
652
740
                add_callback=self._pack_collection.revision_index.add_callback,
653
741
                parents=True, is_locked=self.is_locked,
654
 
                track_external_parent_refs=True),
 
742
                track_external_parent_refs=True, track_new_keys=True),
655
743
            access=self._pack_collection.revision_index.data_access,
656
744
            delta=False)
657
745
        self.signatures = GroupCompressVersionedFiles(
727
815
                                 ' no new_path %r' % (file_id,))
728
816
            if new_path == '':
729
817
                new_inv.root_id = file_id
730
 
                parent_id_basename_key = ('', '')
 
818
                parent_id_basename_key = StaticTuple('', '').intern()
731
819
            else:
732
820
                utf8_entry_name = entry.name.encode('utf-8')
733
 
                parent_id_basename_key = (entry.parent_id, utf8_entry_name)
 
821
                parent_id_basename_key = StaticTuple(entry.parent_id,
 
822
                                                     utf8_entry_name).intern()
734
823
            new_value = entry_to_bytes(entry)
735
824
            # Populate Caches?
736
825
            # new_inv._path_to_fileid_cache[new_path] = file_id
737
 
            id_to_entry_dict[(file_id,)] = new_value
 
826
            key = StaticTuple(file_id).intern()
 
827
            id_to_entry_dict[key] = new_value
738
828
            parent_id_basename_dict[parent_id_basename_key] = file_id
739
829
 
740
830
        new_inv._populate_from_dicts(self.chk_bytes, id_to_entry_dict,
850
940
                                        parent_keys)
851
941
            present_parent_inv_ids = set(
852
942
                [k[-1] for k in present_parent_inv_keys])
853
 
            uninteresting_root_keys = set()
854
 
            interesting_root_keys = set()
855
943
            inventories_to_read = set(revision_ids)
856
944
            inventories_to_read.update(present_parent_inv_ids)
857
 
            for inv in self.iter_inventories(inventories_to_read):
858
 
                entry_chk_root_key = inv.id_to_entry.key()
859
 
                if inv.revision_id in present_parent_inv_ids:
860
 
                    uninteresting_root_keys.add(entry_chk_root_key)
861
 
                else:
862
 
                    interesting_root_keys.add(entry_chk_root_key)
863
 
 
 
945
            root_key_info = _build_interesting_key_sets(
 
946
                self, inventories_to_read, present_parent_inv_ids)
 
947
            interesting_root_keys = root_key_info.interesting_root_keys
 
948
            uninteresting_root_keys = root_key_info.uninteresting_root_keys
864
949
            chk_bytes = self.chk_bytes
865
950
            for record, items in chk_map.iter_interesting_nodes(chk_bytes,
866
951
                        interesting_root_keys, uninteresting_root_keys,
867
952
                        pb=pb):
868
953
                for name, bytes in items:
869
954
                    (name_utf8, file_id, revision_id) = bytes_to_info(bytes)
 
955
                    # TODO: consider interning file_id, revision_id here, or
 
956
                    #       pushing that intern() into bytes_to_info()
 
957
                    # TODO: rich_root should always be True here, for all
 
958
                    #       repositories that support chk_bytes
870
959
                    if not rich_root and name_utf8 == '':
871
960
                        continue
872
961
                    try:
932
1021
        super(GroupCHKStreamSource, self).__init__(from_repository, to_format)
933
1022
        self._revision_keys = None
934
1023
        self._text_keys = None
935
 
        # self._text_fetch_order = 'unordered'
 
1024
        self._text_fetch_order = 'groupcompress'
936
1025
        self._chk_id_roots = None
937
1026
        self._chk_p_id_roots = None
938
1027
 
949
1038
            p_id_roots_set = set()
950
1039
            source_vf = self.from_repository.inventories
951
1040
            stream = source_vf.get_record_stream(inventory_keys,
952
 
                                                 'unordered', True)
 
1041
                                                 'groupcompress', True)
953
1042
            for record in stream:
954
1043
                if record.storage_kind == 'absent':
955
1044
                    if allow_absent:
1000
1089
        bytes_to_info = inventory.CHKInventory._bytes_to_utf8name_key
1001
1090
        chk_bytes = self.from_repository.chk_bytes
1002
1091
        def _filter_id_to_entry():
1003
 
            for record, items in chk_map.iter_interesting_nodes(chk_bytes,
1004
 
                        self._chk_id_roots, uninteresting_root_keys):
1005
 
                for name, bytes in items:
1006
 
                    # Note: we don't care about name_utf8, because we are always
1007
 
                    # rich-root = True
1008
 
                    _, file_id, revision_id = bytes_to_info(bytes)
1009
 
                    self._text_keys.add((file_id, revision_id))
 
1092
            interesting_nodes = chk_map.iter_interesting_nodes(chk_bytes,
 
1093
                        self._chk_id_roots, uninteresting_root_keys)
 
1094
            for record in _filter_text_keys(interesting_nodes, self._text_keys,
 
1095
                    bytes_to_info):
1010
1096
                if record is not None:
1011
1097
                    yield record
1012
1098
            # Consumed
1026
1112
        for stream_info in self._fetch_revision_texts(revision_ids):
1027
1113
            yield stream_info
1028
1114
        self._revision_keys = [(rev_id,) for rev_id in revision_ids]
 
1115
        self.from_repository.revisions.clear_cache()
 
1116
        self.from_repository.signatures.clear_cache()
1029
1117
        yield self._get_inventory_stream(self._revision_keys)
 
1118
        self.from_repository.inventories.clear_cache()
1030
1119
        # TODO: The keys to exclude might be part of the search recipe
1031
1120
        # For now, exclude all parents that are at the edge of ancestry, for
1032
1121
        # which we have inventories
1035
1124
                        self._revision_keys)
1036
1125
        for stream_info in self._get_filtered_chk_streams(parent_keys):
1037
1126
            yield stream_info
 
1127
        self.from_repository.chk_bytes.clear_cache()
1038
1128
        yield self._get_text_stream()
 
1129
        self.from_repository.texts.clear_cache()
1039
1130
 
1040
1131
    def get_stream_for_missing_keys(self, missing_keys):
1041
1132
        # missing keys can only occur when we are byte copying and not
1050
1141
            missing_inventory_keys.add(key[1:])
1051
1142
        if self._chk_id_roots or self._chk_p_id_roots:
1052
1143
            raise AssertionError('Cannot call get_stream_for_missing_keys'
1053
 
                ' untill all of get_stream() has been consumed.')
 
1144
                ' until all of get_stream() has been consumed.')
1054
1145
        # Yield the inventory stream, so we can find the chk stream
1055
1146
        # Some of the missing_keys will be missing because they are ghosts.
1056
1147
        # As such, we can ignore them. The Sink is required to verify there are
1063
1154
            yield stream_info
1064
1155
 
1065
1156
 
 
1157
class _InterestingKeyInfo(object):
 
1158
    def __init__(self):
 
1159
        self.interesting_root_keys = set()
 
1160
        self.interesting_pid_root_keys = set()
 
1161
        self.uninteresting_root_keys = set()
 
1162
        self.uninteresting_pid_root_keys = set()
 
1163
 
 
1164
    def all_interesting(self):
 
1165
        return self.interesting_root_keys.union(self.interesting_pid_root_keys)
 
1166
 
 
1167
    def all_uninteresting(self):
 
1168
        return self.uninteresting_root_keys.union(
 
1169
            self.uninteresting_pid_root_keys)
 
1170
 
 
1171
    def all_keys(self):
 
1172
        return self.all_interesting().union(self.all_uninteresting())
 
1173
 
 
1174
 
 
1175
def _build_interesting_key_sets(repo, inventory_ids, parent_only_inv_ids):
 
1176
    result = _InterestingKeyInfo()
 
1177
    for inv in repo.iter_inventories(inventory_ids, 'unordered'):
 
1178
        root_key = inv.id_to_entry.key()
 
1179
        pid_root_key = inv.parent_id_basename_to_file_id.key()
 
1180
        if inv.revision_id in parent_only_inv_ids:
 
1181
            result.uninteresting_root_keys.add(root_key)
 
1182
            result.uninteresting_pid_root_keys.add(pid_root_key)
 
1183
        else:
 
1184
            result.interesting_root_keys.add(root_key)
 
1185
            result.interesting_pid_root_keys.add(pid_root_key)
 
1186
    return result
 
1187
 
 
1188
 
 
1189
def _filter_text_keys(interesting_nodes_iterable, text_keys, bytes_to_info):
 
1190
    """Iterate the result of iter_interesting_nodes, yielding the records
 
1191
    and adding to text_keys.
 
1192
    """
 
1193
    for record, items in interesting_nodes_iterable:
 
1194
        for name, bytes in items:
 
1195
            # Note: we don't care about name_utf8, because groupcompress repos
 
1196
            # are always rich-root, so there are no synthesised root records to
 
1197
            # ignore.
 
1198
            _, file_id, revision_id = bytes_to_info(bytes)
 
1199
            file_id = intern(file_id)
 
1200
            revision_id = intern(revision_id)
 
1201
            text_keys.add(StaticTuple(file_id, revision_id).intern())
 
1202
        yield record
 
1203
 
 
1204
 
 
1205
 
 
1206
 
1066
1207
class RepositoryFormatCHK1(RepositoryFormatPack):
1067
1208
    """A hashed CHK+group compress pack repository."""
1068
1209
 
1145
1286
 
1146
1287
    def get_format_string(self):
1147
1288
        return ('Bazaar repository format 2a (needs bzr 1.16 or later)\n')
 
1289
 
 
1290
    def get_format_description(self):
 
1291
        """See RepositoryFormat.get_format_description()."""
 
1292
        return ("Repository format 2a - rich roots, group compression"
 
1293
            " and chk inventories")