~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/repofmt/groupcompress_repo.py

  • Committer: Canonical.com Patch Queue Manager
  • Date: 2009-09-09 13:49:07 UTC
  • mfrom: (4679.1.1 merge-2.0-into-bzr.dev)
  • Revision ID: pqm@pqm.ubuntu.com-20090909134907-nnrr06jw5o011doh
(andrew) Merge 2.0 tip, including fixes for 408841, 423506, 406687,
        418931.

Show diffs side-by-side

added added

removed removed

Lines of Context:
588
588
        """Detect missing inventories or chk root entries for the new revisions
589
589
        in this write group.
590
590
 
591
 
        :returns: set of missing keys.  Note that not every missing key is
592
 
            guaranteed to be reported.
 
591
        :returns: list of strs, summarising any problems found.  If the list is
 
592
            empty no problems were found.
593
593
        """
594
 
        if getattr(self.repo, 'chk_bytes', None) is None:
595
 
            return set()
596
594
        # Ensure that all revisions added in this write group have:
597
595
        #   - corresponding inventories,
598
596
        #   - chk root entries for those inventories,
599
597
        #   - and any present parent inventories have their chk root
600
598
        #     entries too.
601
599
        # And all this should be independent of any fallback repository.
 
600
        problems = []
602
601
        key_deps = self.repo.revisions._index._key_dependencies
603
602
        new_revisions_keys = key_deps.get_new_keys()
604
603
        no_fallback_inv_index = self.repo.inventories._index
605
604
        no_fallback_chk_bytes_index = self.repo.chk_bytes._index
 
605
        no_fallback_texts_index = self.repo.texts._index
606
606
        inv_parent_map = no_fallback_inv_index.get_parent_map(
607
607
            new_revisions_keys)
608
608
        # Are any inventories for corresponding to the new revisions missing?
610
610
        missing_corresponding = set(new_revisions_keys)
611
611
        missing_corresponding.difference_update(corresponding_invs)
612
612
        if missing_corresponding:
613
 
            return [('inventories', key) for key in missing_corresponding]
 
613
            problems.append("inventories missing for revisions %s" %
 
614
                (sorted(missing_corresponding),))
 
615
            return problems
614
616
        # Are any chk root entries missing for any inventories?  This includes
615
617
        # any present parent inventories, which may be used when calculating
616
618
        # deltas for streaming.
620
622
        # Filter out ghost parents.
621
623
        all_inv_keys.intersection_update(
622
624
            no_fallback_inv_index.get_parent_map(all_inv_keys))
 
625
        parent_invs_only_keys = all_inv_keys.symmetric_difference(
 
626
            corresponding_invs)
623
627
        all_missing = set()
624
628
        inv_ids = [key[-1] for key in all_inv_keys]
625
 
        for inv in self.repo.iter_inventories(inv_ids, 'unordered'):
626
 
            root_keys = set([inv.id_to_entry.key()])
627
 
            if inv.parent_id_basename_to_file_id is not None:
628
 
                root_keys.add(inv.parent_id_basename_to_file_id.key())
629
 
            present = no_fallback_chk_bytes_index.get_parent_map(root_keys)
630
 
            missing = root_keys.difference(present)
631
 
            all_missing.update([('chk_bytes',) + key for key in missing])
632
 
        return all_missing
633
 
        
 
629
        parent_invs_only_ids = [key[-1] for key in parent_invs_only_keys]
 
630
        root_key_info = _build_interesting_key_sets(
 
631
            self.repo, inv_ids, parent_invs_only_ids)
 
632
        expected_chk_roots = root_key_info.all_keys()
 
633
        present_chk_roots = no_fallback_chk_bytes_index.get_parent_map(
 
634
            expected_chk_roots)
 
635
        missing_chk_roots = expected_chk_roots.difference(present_chk_roots)
 
636
        if missing_chk_roots:
 
637
            problems.append("missing referenced chk root keys: %s"
 
638
                % (sorted(missing_chk_roots),))
 
639
            # Don't bother checking any further.
 
640
            return problems
 
641
        # Find all interesting chk_bytes records, and make sure they are
 
642
        # present, as well as the text keys they reference.
 
643
        chk_bytes_no_fallbacks = self.repo.chk_bytes.without_fallbacks()
 
644
        chk_bytes_no_fallbacks._search_key_func = \
 
645
            self.repo.chk_bytes._search_key_func
 
646
        chk_diff = chk_map.iter_interesting_nodes(
 
647
            chk_bytes_no_fallbacks, root_key_info.interesting_root_keys,
 
648
            root_key_info.uninteresting_root_keys)
 
649
        bytes_to_info = inventory.CHKInventory._bytes_to_utf8name_key
 
650
        text_keys = set()
 
651
        try:
 
652
            for record in _filter_text_keys(chk_diff, text_keys, bytes_to_info):
 
653
                pass
 
654
        except errors.NoSuchRevision, e:
 
655
            # XXX: It would be nice if we could give a more precise error here.
 
656
            problems.append("missing chk node(s) for id_to_entry maps")
 
657
        chk_diff = chk_map.iter_interesting_nodes(
 
658
            chk_bytes_no_fallbacks, root_key_info.interesting_pid_root_keys,
 
659
            root_key_info.uninteresting_pid_root_keys)
 
660
        try:
 
661
            for interesting_rec, interesting_map in chk_diff:
 
662
                pass
 
663
        except errors.NoSuchRevision, e:
 
664
            problems.append(
 
665
                "missing chk node(s) for parent_id_basename_to_file_id maps")
 
666
        present_text_keys = no_fallback_texts_index.get_parent_map(text_keys)
 
667
        missing_text_keys = text_keys.difference(present_text_keys)
 
668
        if missing_text_keys:
 
669
            problems.append("missing text keys: %r"
 
670
                % (sorted(missing_text_keys),))
 
671
        return problems
 
672
 
634
673
    def _execute_pack_operations(self, pack_operations,
635
674
                                 _packer_class=GCCHKPacker,
636
675
                                 reload_func=None):
898
937
                                        parent_keys)
899
938
            present_parent_inv_ids = set(
900
939
                [k[-1] for k in present_parent_inv_keys])
901
 
            uninteresting_root_keys = set()
902
 
            interesting_root_keys = set()
903
940
            inventories_to_read = set(revision_ids)
904
941
            inventories_to_read.update(present_parent_inv_ids)
905
 
            for inv in self.iter_inventories(inventories_to_read):
906
 
                entry_chk_root_key = inv.id_to_entry.key()
907
 
                if inv.revision_id in present_parent_inv_ids:
908
 
                    uninteresting_root_keys.add(entry_chk_root_key)
909
 
                else:
910
 
                    interesting_root_keys.add(entry_chk_root_key)
911
 
 
 
942
            root_key_info = _build_interesting_key_sets(
 
943
                self, inventories_to_read, present_parent_inv_ids)
 
944
            interesting_root_keys = root_key_info.interesting_root_keys
 
945
            uninteresting_root_keys = root_key_info.uninteresting_root_keys
912
946
            chk_bytes = self.chk_bytes
913
947
            for record, items in chk_map.iter_interesting_nodes(chk_bytes,
914
948
                        interesting_root_keys, uninteresting_root_keys,
1048
1082
        bytes_to_info = inventory.CHKInventory._bytes_to_utf8name_key
1049
1083
        chk_bytes = self.from_repository.chk_bytes
1050
1084
        def _filter_id_to_entry():
1051
 
            for record, items in chk_map.iter_interesting_nodes(chk_bytes,
1052
 
                        self._chk_id_roots, uninteresting_root_keys):
1053
 
                for name, bytes in items:
1054
 
                    # Note: we don't care about name_utf8, because we are always
1055
 
                    # rich-root = True
1056
 
                    _, file_id, revision_id = bytes_to_info(bytes)
1057
 
                    self._text_keys.add((file_id, revision_id))
 
1085
            interesting_nodes = chk_map.iter_interesting_nodes(chk_bytes,
 
1086
                        self._chk_id_roots, uninteresting_root_keys)
 
1087
            for record in _filter_text_keys(interesting_nodes, self._text_keys,
 
1088
                    bytes_to_info):
1058
1089
                if record is not None:
1059
1090
                    yield record
1060
1091
            # Consumed
1098
1129
            missing_inventory_keys.add(key[1:])
1099
1130
        if self._chk_id_roots or self._chk_p_id_roots:
1100
1131
            raise AssertionError('Cannot call get_stream_for_missing_keys'
1101
 
                ' untill all of get_stream() has been consumed.')
 
1132
                ' until all of get_stream() has been consumed.')
1102
1133
        # Yield the inventory stream, so we can find the chk stream
1103
1134
        # Some of the missing_keys will be missing because they are ghosts.
1104
1135
        # As such, we can ignore them. The Sink is required to verify there are
1111
1142
            yield stream_info
1112
1143
 
1113
1144
 
 
1145
class _InterestingKeyInfo(object):
 
1146
    def __init__(self):
 
1147
        self.interesting_root_keys = set()
 
1148
        self.interesting_pid_root_keys = set()
 
1149
        self.uninteresting_root_keys = set()
 
1150
        self.uninteresting_pid_root_keys = set()
 
1151
 
 
1152
    def all_interesting(self):
 
1153
        return self.interesting_root_keys.union(self.interesting_pid_root_keys)
 
1154
 
 
1155
    def all_uninteresting(self):
 
1156
        return self.uninteresting_root_keys.union(
 
1157
            self.uninteresting_pid_root_keys)
 
1158
 
 
1159
    def all_keys(self):
 
1160
        return self.all_interesting().union(self.all_uninteresting())
 
1161
 
 
1162
 
 
1163
def _build_interesting_key_sets(repo, inventory_ids, parent_only_inv_ids):
 
1164
    result = _InterestingKeyInfo()
 
1165
    for inv in repo.iter_inventories(inventory_ids, 'unordered'):
 
1166
        root_key = inv.id_to_entry.key()
 
1167
        pid_root_key = inv.parent_id_basename_to_file_id.key()
 
1168
        if inv.revision_id in parent_only_inv_ids:
 
1169
            result.uninteresting_root_keys.add(root_key)
 
1170
            result.uninteresting_pid_root_keys.add(pid_root_key)
 
1171
        else:
 
1172
            result.interesting_root_keys.add(root_key)
 
1173
            result.interesting_pid_root_keys.add(pid_root_key)
 
1174
    return result
 
1175
 
 
1176
 
 
1177
def _filter_text_keys(interesting_nodes_iterable, text_keys, bytes_to_info):
 
1178
    """Iterate the result of iter_interesting_nodes, yielding the records
 
1179
    and adding to text_keys.
 
1180
    """
 
1181
    for record, items in interesting_nodes_iterable:
 
1182
        for name, bytes in items:
 
1183
            # Note: we don't care about name_utf8, because groupcompress repos
 
1184
            # are always rich-root, so there are no synthesised root records to
 
1185
            # ignore.
 
1186
            _, file_id, revision_id = bytes_to_info(bytes)
 
1187
            text_keys.add((file_id, revision_id))
 
1188
        yield record
 
1189
 
 
1190
 
 
1191
 
 
1192
 
1114
1193
class RepositoryFormatCHK1(RepositoryFormatPack):
1115
1194
    """A hashed CHK+group compress pack repository."""
1116
1195