~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/repository.py

  • Committer: Robert Collins
  • Date: 2009-06-16 00:37:55 UTC
  • mto: This revision was merged to the branch mainline in revision 4593.
  • Revision ID: robertc@robertcollins.net-20090616003755-pmlsfdnx8e5obnwm
Start checking file texts in a single pass.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1153
1153
    def _check_inventories(self, checker):
1154
1154
        """Check the inventories found from the revision scan.
1155
1155
        
1156
 
        This checks all data that is tree-shape and not file-content.
 
1156
        This is responsible for verifying the sha1 of inventories and
 
1157
        creating a pending_keys set that covers data referenced by inventories.
1157
1158
        """
1158
 
        revbar = ui.ui_factory.nested_progress_bar()
 
1159
        bar = ui.ui_factory.nested_progress_bar()
 
1160
        try:
 
1161
            self._do_check_inventories(checker, bar)
 
1162
        finally:
 
1163
            bar.finished()
 
1164
 
 
1165
    def _do_check_inventories(self, checker, bar):
 
1166
        """Helper for _check_inventories."""
1159
1167
        revno = 0
 
1168
        keys = {'chk_bytes':set(), 'inventories':set(), 'texts':set()}
 
1169
        kinds = ['chk_bytes', 'texts']
1160
1170
        count = len(checker.pending_keys)
 
1171
        bar.update("inventories", 0, 2)
1161
1172
        current_keys = checker.pending_keys
1162
1173
        checker.pending_keys = {}
1163
 
        keys = set()
 
1174
        # Accumulate current checks.
1164
1175
        for key in current_keys:
1165
 
            if key[0] != 'inventories':
1166
 
                checker._report_items.append('unknown key type %r' % key)
1167
 
            keys.add(key[1:])
1168
 
        # XXX: below is to-go code that accesses texts one at a time.
1169
 
        try:
1170
 
            while revno < len(checker.planned_revisions):
1171
 
                rev_id = checker.planned_revisions[revno]
1172
 
                revbar.update('checking revision', revno,
1173
 
                    len(checker.planned_revisions))
1174
 
                revno += 1
1175
 
                try:
1176
 
                    tree = self.revision_tree(rev_id)
1177
 
                except errors.NoSuchRevision:
1178
 
                    self._report_items.append(
1179
 
                        "Missing inventory for revision {%s}" % rev_id)
1180
 
                inv = tree.inventory
 
1176
            if key[0] != 'inventories' and key[0] not in kinds:
 
1177
                checker._report_items.append('unknown key type %r' % (key,))
 
1178
            keys[key[0]].add(key[1:])
 
1179
        if keys['inventories']:
 
1180
            # NB: output order *should* be roughly sorted - topo or
 
1181
            # inverse topo depending on repository - either way decent
 
1182
            # to just delta against. However, pre-CHK formats didn't
 
1183
            # try to optimise inventory layout on disk. As such the
 
1184
            # pre-CHK code path does not use inventory deltas.
 
1185
            last_object = None
 
1186
            for record in self.inventories.check(keys=keys['inventories']):
 
1187
                if record.storage_kind == 'absent':
 
1188
                    checker._report_items.append(
 
1189
                        'Missing inventory {%s}' % (record.key,))
 
1190
                else:
 
1191
                    last_object = self._check_record('inventories', record,
 
1192
                        checker, last_object,
 
1193
                        current_keys[('inventories',) + record.key])
 
1194
            del keys['inventories']
 
1195
        else:
 
1196
            return
 
1197
        bar.update("texts", 1)
 
1198
        while (checker.pending_keys or keys['chk_bytes']
 
1199
            or keys['texts']):
 
1200
            # Something to check.
 
1201
            current_keys = checker.pending_keys
 
1202
            checker.pending_keys = {}
 
1203
            # Accumulate current checks.
 
1204
            for key in current_keys:
 
1205
                if key[0] not in kinds:
 
1206
                    checker._report_items.append('unknown key type %r' % (key,))
 
1207
                keys[key[0]].add(key[1:])
 
1208
            # Check the outermost kind only - inventories || chk_bytes || texts
 
1209
            for kind in kinds:
 
1210
                if keys[kind]:
 
1211
                    last_object = None
 
1212
                    for record in getattr(self, kind).check(keys=keys[kind]):
 
1213
                        if record.storage_kind == 'absent':
 
1214
                            checker._report_items.append(
 
1215
                                'Missing inventory {%s}' % (record.key,))
 
1216
                        else:
 
1217
                            last_object = self._check_record(kind, record,
 
1218
                                checker, last_object, current_keys[(kind,) + record.key])
 
1219
                    keys[kind] = set()
 
1220
                    break
 
1221
 
 
1222
    def _check_record(self, kind, record, checker, last_object, item_data):
 
1223
        """Check a single text from this repository."""
 
1224
        if kind == 'inventories':
 
1225
            rev_id = record.key[0]
 
1226
            inv = self.deserialise_inventory(rev_id,
 
1227
                record.get_bytes_as('fulltext'))
 
1228
            if last_object is not None:
 
1229
                delta = inv._make_delta(last_object)
 
1230
                for old_path, path, file_id, ie in delta:
 
1231
                    if ie is None:
 
1232
                        continue
 
1233
                    ie.check(checker, rev_id, inv)
 
1234
            else:
1181
1235
                for path, ie in inv.iter_entries():
1182
 
                    checker._add_entry_to_text_key_references(inv, ie)
1183
 
                    file_id = ie.file_id
1184
 
                    ie.check(checker, rev_id, inv, tree)
1185
 
        finally:
1186
 
            revbar.finished()
 
1236
                    ie.check(checker, rev_id, inv)
 
1237
            if self._format.fast_deltas:
 
1238
                return inv
 
1239
        elif kind == 'chk_bytes':
 
1240
            # No code written to check chk_bytes for this repo format.
 
1241
            checker._report_items.append(
 
1242
                'unsupported key type chk_bytes for %s' % (record.key,))
 
1243
        elif kind == 'texts':
 
1244
            self._check_text(record, checker, item_data)
 
1245
        else:
 
1246
            checker._report_items.append(
 
1247
                'unknown key type %s for %s' % (kind, record.key))
 
1248
 
 
1249
    def _check_text(self, record, checker, item_data):
 
1250
        """Check a single text."""
 
1251
        # Check it is extractable.
 
1252
        # TODO: check length.
 
1253
        if record.storage_kind == 'chunked':
 
1254
            chunks = record.get_bytes_as(record.storage_kind)
 
1255
            sha1 = osutils.sha_strings(chunks)
 
1256
            length = sum(map(len, chunks))
 
1257
        else:
 
1258
            content = record.get_bytes_as('fulltext')
 
1259
            sha1 = osutils.sha_string(content)
 
1260
            length = len(content)
 
1261
        if item_data and sha1 != item_data[1]:
 
1262
            checker._report_items.append(
 
1263
                'sha1 mismatch: %s has sha1 %s expected %s referenced by %s' %
 
1264
                (record.key, sha1, item_data[1], item_data[2]))
1187
1265
 
1188
1266
    @staticmethod
1189
1267
    def create(a_bzrdir):