65
50
# The Check object interacts with InventoryEntry.check, etc.
67
def __init__(self, repository, check_repo=True):
52
def __init__(self, repository):
68
53
self.repository = repository
54
self.checked_text_cnt = 0
69
55
self.checked_rev_cnt = 0
57
self.repeated_text_cnt = 0
71
58
self.missing_parent_links = {}
72
59
self.missing_inventory_sha_cnt = 0
73
60
self.missing_revision_cnt = 0
61
# maps (file-id, version) -> sha1; used by InventoryFile._check
62
self.checked_texts = {}
74
63
self.checked_weaves = set()
75
64
self.unreferenced_versions = set()
76
65
self.inconsistent_parents = []
77
self.rich_roots = repository.supports_rich_root()
78
self.text_key_references = {}
79
self.check_repo = check_repo
80
self.other_results = []
81
# Plain text lines to include in the report
82
self._report_items = []
83
# Keys we are looking for; may be large and need spilling to disk.
84
# key->(type(revision/inventory/text/signature/map), sha1, first-referer)
85
self.pending_keys = {}
86
# Ancestors map for all of revisions being checked; while large helper
87
# functions we call would create it anyway, so better to have once and
91
def check(self, callback_refs=None, check_repo=True):
92
if callback_refs is None:
94
68
self.repository.lock_read()
95
69
self.progress = bzrlib.ui.ui_factory.nested_progress_bar()
97
self.progress.update('check', 0, 4)
99
self.progress.update('checking revisions', 0)
100
self.check_revisions()
101
self.progress.update('checking commit contents', 1)
102
self.repository._check_inventories(self)
103
self.progress.update('checking file graphs', 2)
104
# check_weaves is done after the revision scan so that
105
# revision index is known to be valid.
107
self.progress.update('checking branches and trees', 3)
109
repo = self.repository
110
# calculate all refs, and callback the objects requesting them.
112
wanting_items = set()
113
# Current crude version calculates everything and calls
114
# everything at once. Doing a queue and popping as things are
115
# satisfied would be cheaper on memory [but few people have
116
# huge numbers of working trees today. TODO: fix before
120
for ref, wantlist in callback_refs.iteritems():
121
wanting_items.update(wantlist)
124
refs[ref] = repo.revision_tree(value)
125
elif kind == 'lefthand-distance':
127
elif kind == 'revision-existence':
128
existences.add(value)
130
raise AssertionError(
131
'unknown ref kind for ref %s' % ref)
132
node_distances = repo.get_graph().find_lefthand_distances(distances)
133
for key, distance in node_distances.iteritems():
134
refs[('lefthand-distance', key)] = distance
135
if key in existences and distance > 0:
136
refs[('revision-existence', key)] = True
137
existences.remove(key)
138
parent_map = repo.get_graph().get_parent_map(existences)
139
for key in parent_map:
140
refs[('revision-existence', key)] = True
141
existences.remove(key)
142
for key in existences:
143
refs[('revision-existence', key)] = False
144
for item in wanting_items:
145
if isinstance(item, WorkingTree):
147
if isinstance(item, Branch):
148
self.other_results.append(item.check(refs))
71
self.progress.update('retrieving inventory', 0, 2)
72
# do not put in init, as it should be done with progess,
73
# and inside the lock.
74
self.inventory_weave = self.repository.inventories
75
self.progress.update('checking revision graph', 1)
76
self.check_revision_graph()
79
while revno < len(self.planned_revisions):
80
rev_id = self.planned_revisions[revno]
81
self.progress.update('checking revision', revno,
82
len(self.planned_revisions))
84
self.check_one_rev(rev_id)
85
# check_weaves is done after the revision scan so that
86
# revision index is known to be valid.
150
89
self.progress.finished()
151
90
self.repository.unlock()
153
def _check_revisions(self, revisions_iterator):
154
"""Check revision objects by decorating a generator.
156
:param revisions_iterator: An iterator of(revid, Revision-or-None).
157
:return: A generator of the contents of revisions_iterator.
159
self.planned_revisions = set()
160
for revid, revision in revisions_iterator:
161
yield revid, revision
162
self._check_one_rev(revid, revision)
163
# Flatten the revisions we found to guarantee consistent later
165
self.planned_revisions = list(self.planned_revisions)
166
# TODO: extract digital signatures as items to callback on too.
168
def check_revisions(self):
169
"""Scan revisions, checking data directly available as we go."""
170
revision_iterator = self.repository._iter_revisions(None)
171
revision_iterator = self._check_revisions(revision_iterator)
172
# We read the all revisions here:
173
# - doing this allows later code to depend on the revision index.
174
# - we can fill out existence flags at this point
175
# - we can read the revision inventory sha at this point
176
# - we can check properties and serialisers etc.
92
def check_revision_graph(self):
177
93
if not self.repository.revision_graph_can_have_wrong_parents():
178
# The check against the index isn't needed.
94
# This check is not necessary.
179
95
self.revs_with_bad_parents_in_index = None
180
for thing in revision_iterator:
183
bad_revisions = self.repository._find_inconsistent_revision_parents(
185
self.revs_with_bad_parents_in_index = list(bad_revisions)
97
bad_revisions = self.repository._find_inconsistent_revision_parents()
98
self.revs_with_bad_parents_in_index = list(bad_revisions)
100
def plan_revisions(self):
101
repository = self.repository
102
self.planned_revisions = repository.all_revision_ids()
103
self.progress.clear()
104
inventoried = set(key[-1] for key in self.inventory_weave.keys())
105
awol = set(self.planned_revisions) - inventoried
107
raise BzrCheckError('Stored revisions missing from inventory'
108
'{%s}' % ','.join([f for f in awol]))
187
110
def report_results(self, verbose):
189
self._report_repo_results(verbose)
190
for result in self.other_results:
191
result.report_results(verbose)
193
def _report_repo_results(self, verbose):
194
111
note('checked repository %s format %s',
195
112
self.repository.bzrdir.root_transport,
196
113
self.repository._format)
197
114
note('%6d revisions', self.checked_rev_cnt)
198
115
note('%6d file-ids', len(self.checked_weaves))
200
note('%6d unreferenced text versions',
201
len(self.unreferenced_versions))
202
if verbose and len(self.unreferenced_versions):
203
for file_id, revision_id in self.unreferenced_versions:
204
note('unreferenced version: {%s} in %s', revision_id,
116
note('%6d unique file texts', self.checked_text_cnt)
117
note('%6d repeated file texts', self.repeated_text_cnt)
118
note('%6d unreferenced text versions',
119
len(self.unreferenced_versions))
206
120
if self.missing_inventory_sha_cnt:
207
121
note('%6d revisions are missing inventory_sha1',
208
122
self.missing_inventory_sha_cnt)
241
159
' %s has wrong parents in index: '
242
160
'%r should be %r',
243
161
revision_id, index_parents, actual_parents)
244
for item in self._report_items:
247
def _check_one_rev(self, rev_id, rev):
248
"""Cross-check one revision.
250
:param rev_id: A revision id to check.
251
:param rev: A revision or None to indicate a missing revision.
163
def check_one_rev(self, rev_id):
164
"""Check one revision.
166
rev_id - the one to check
168
rev = self.repository.get_revision(rev_id)
253
170
if rev.revision_id != rev_id:
254
self._report_items.append(
255
'Mismatched internal revid {%s} and index revid {%s}' % (
256
rev.revision_id, rev_id))
257
rev_id = rev.revision_id
258
# Check this revision tree etc, and count as seen when we encounter a
260
self.planned_revisions.add(rev_id)
262
self.ghosts.discard(rev_id)
263
# Count all parents as ghosts if we haven't seen them yet.
171
raise BzrCheckError('wrong internal revision id in revision {%s}'
264
174
for parent in rev.parent_ids:
265
175
if not parent in self.planned_revisions:
266
self.ghosts.add(parent)
268
self.ancestors[rev_id] = tuple(rev.parent_ids) or (NULL_REVISION,)
269
self.add_pending_item(rev_id, ('inventories', rev_id), 'inventory',
176
missing_links = self.missing_parent_links.get(parent, [])
177
missing_links.append(rev_id)
178
self.missing_parent_links[parent] = missing_links
179
# list based so somewhat slow,
180
# TODO have a planned_revisions list and set.
181
if self.repository.has_revision(parent):
182
missing_ancestry = self.repository.get_ancestry(parent)
183
for missing in missing_ancestry:
184
if (missing is not None
185
and missing not in self.planned_revisions):
186
self.planned_revisions.append(missing)
188
self.ghosts.append(rev_id)
190
if rev.inventory_sha1:
191
inv_sha1 = self.repository.get_inventory_sha1(rev_id)
192
if inv_sha1 != rev.inventory_sha1:
193
raise BzrCheckError('Inventory sha1 hash doesn\'t match'
194
' value in revision {%s}' % rev_id)
195
self._check_revision_tree(rev_id)
271
196
self.checked_rev_cnt += 1
273
def add_pending_item(self, referer, key, kind, sha1):
274
"""Add a reference to a sha1 to be cross checked against a key.
276
:param referer: The referer that expects key to have sha1.
277
:param key: A storage key e.g. ('texts', 'foo@bar-20040504-1234')
278
:param kind: revision/inventory/text/map/signature
279
:param sha1: A hex sha1 or None if no sha1 is known.
281
existing = self.pending_keys.get(key)
283
if sha1 != existing[1]:
284
self._report_items.append('Multiple expected sha1s for %s. {%s}'
285
' expects {%s}, {%s} expects {%s}', (
286
key, referer, sha1, existing[1], existing[0]))
288
self.pending_keys[key] = (kind, sha1, referer)
290
198
def check_weaves(self):
291
199
"""Check all the weaves we can get our hands on.
294
storebar = bzrlib.ui.ui_factory.nested_progress_bar()
296
self._check_weaves(storebar)
300
def _check_weaves(self, storebar):
301
storebar.update('text-index', 0, 2)
302
if self.repository._format.fast_deltas:
303
# We haven't considered every fileid instance so far.
304
weave_checker = self.repository._get_versioned_file_checker(
305
ancestors=self.ancestors)
307
weave_checker = self.repository._get_versioned_file_checker(
308
text_key_references=self.text_key_references,
309
ancestors=self.ancestors)
310
storebar.update('file-graph', 1)
202
self.progress.update('checking inventory', 0, 2)
203
self.inventory_weave.check(progress_bar=self.progress)
204
self.progress.update('checking text storage', 1, 2)
205
self.repository.texts.check(progress_bar=self.progress)
206
weave_checker = self.repository._get_versioned_file_checker()
311
207
result = weave_checker.check_file_version_parents(
312
self.repository.texts)
208
self.repository.texts, progress_bar=self.progress)
313
209
self.checked_weaves = weave_checker.file_ids
314
210
bad_parents, unused_versions = result
315
211
bad_parents = bad_parents.items()
323
219
(revision_id, weave_id, weave_parents, correct_parents))
324
220
self.unreferenced_versions.update(unused_versions)
326
def _add_entry_to_text_key_references(self, inv, entry):
327
if not self.rich_roots and entry.name == '':
329
key = (entry.file_id, entry.revision)
330
self.text_key_references.setdefault(key, False)
331
if entry.revision == inv.revision_id:
332
self.text_key_references[key] = True
222
def _check_revision_tree(self, rev_id):
223
tree = self.repository.revision_tree(rev_id)
227
if file_id in seen_ids:
228
raise BzrCheckError('duplicated file_id {%s} '
229
'in inventory for revision {%s}'
231
seen_ids[file_id] = True
234
ie.check(self, rev_id, inv, tree)
236
for path, ie in inv.iter_entries():
237
if path in seen_names:
238
raise BzrCheckError('duplicated path %s '
239
'in inventory for revision {%s}'
241
seen_names[path] = True
335
244
@deprecated_function(deprecated_in((1,6,0)))
336
245
def check(branch, verbose):
337
246
"""Run consistency checks on a branch.
339
248
Results are reported through logging.
341
Deprecated in 1.6. Please use check_dwim instead.
250
Deprecated in 1.6. Please use check_branch instead.
343
252
:raise BzrCheckError: if there's a consistency error.
345
254
check_branch(branch, verbose)
348
@deprecated_function(deprecated_in((1,16,0)))
349
257
def check_branch(branch, verbose):
350
258
"""Run consistency checks on a branch.
356
264
branch.lock_read()
359
for ref in branch._get_check_refs():
360
needed_refs.setdefault(ref, []).append(branch)
361
result = branch.repository.check([branch.last_revision()], needed_refs)
362
branch_result = result.other_results[0]
266
branch_result = branch.check()
365
269
branch_result.report_results(verbose)
368
def scan_branch(branch, needed_refs, to_unlock):
369
"""Scan a branch for refs.
371
:param branch: The branch to schedule for checking.
372
:param needed_refs: Refs we are accumulating.
373
:param to_unlock: The unlock list accumulating.
375
note("Checking branch at '%s'." % (branch.base,))
377
to_unlock.append(branch)
378
branch_refs = branch._get_check_refs()
379
for ref in branch_refs:
380
reflist = needed_refs.setdefault(ref, [])
381
reflist.append(branch)
384
def scan_tree(base_tree, tree, needed_refs, to_unlock):
385
"""Scan a tree for refs.
387
:param base_tree: The original tree check opened, used to detect duplicate
389
:param tree: The tree to schedule for checking.
390
:param needed_refs: Refs we are accumulating.
391
:param to_unlock: The unlock list accumulating.
393
if base_tree is not None and tree.basedir == base_tree.basedir:
395
note("Checking working tree at '%s'." % (tree.basedir,))
397
to_unlock.append(tree)
398
tree_refs = tree._get_check_refs()
399
for ref in tree_refs:
400
reflist = needed_refs.setdefault(ref, [])
404
272
def check_dwim(path, verbose, do_branch=False, do_repo=False, do_tree=False):
405
"""Check multiple objects.
407
If errors occur they are accumulated and reported as far as possible, and
408
an exception raised at the end of the process.
411
base_tree, branch, repo, relpath = \
274
tree, branch, repo, relpath = \
412
275
BzrDir.open_containing_tree_branch_or_repository(path)
413
276
except errors.NotBranchError:
414
base_tree = branch = repo = None
419
if base_tree is not None:
420
# If the tree is a lightweight checkout we won't see it in
421
# repo.find_branches - add now.
423
scan_tree(None, base_tree, needed_refs, to_unlock)
424
branch = base_tree.branch
425
if branch is not None:
428
# The branch is in a shared repository
429
repo = branch.repository
432
to_unlock.append(repo)
433
branches = repo.find_branches(using=True)
435
if do_branch or do_tree:
436
for branch in branches:
439
tree = branch.bzrdir.open_workingtree()
441
except (errors.NotLocalUrl, errors.NoWorkingTree):
444
scan_tree(base_tree, tree, needed_refs, to_unlock)
446
scan_branch(branch, needed_refs, to_unlock)
447
if do_branch and not branches:
448
note("No branch found at specified location.")
449
if do_tree and base_tree is None and not saw_tree:
450
note("No working tree found at specified location.")
451
if do_repo or do_branch or do_tree:
453
note("Checking repository at '%s'."
454
% (repo.bzrdir.root_transport.base,))
455
result = repo.check(None, callback_refs=needed_refs,
277
tree = branch = repo = None
281
note("Checking working tree at '%s'."
282
% (tree.bzrdir.root_transport.base,))
285
log_error("No working tree found at specified location.")
287
if branch is not None:
290
# The branch is in a shared repository
291
repo = branch.repository
293
elif repo is not None:
294
branches = repo.find_branches(using=True)
300
note("Checking repository at '%s'."
301
% (repo.bzrdir.root_transport.base,))
302
result = repo.check()
457
303
result.report_results(verbose)
460
note("No working tree found at specified location.")
462
note("No branch found at specified location.")
464
note("No repository found at specified location.")
466
for thing in to_unlock:
306
log_error("No branch found at specified location.")
308
for branch in branches:
309
note("Checking branch at '%s'."
310
% (branch.bzrdir.root_transport.base,))
311
check_branch(branch, verbose)
316
log_error("No branch found at specified location.")
318
log_error("No repository found at specified location.")