1
# Copyright (C) 2004, 2005 by Martin Pool
2
# Copyright (C) 2005 by Canonical Ltd
1
# Copyright (C) 2005, 2006 Canonical Ltd
4
3
# This program is free software; you can redistribute it and/or modify
5
4
# it under the terms of the GNU General Public License as published by
6
5
# the Free Software Foundation; either version 2 of the License, or
7
6
# (at your option) any later version.
9
8
# This program is distributed in the hope that it will be useful,
10
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
11
# GNU General Public License for more details.
14
13
# You should have received a copy of the GNU General Public License
15
14
# along with this program; if not, write to the Free Software
16
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
# TODO: Check ancestries are correct for every revision: includes
18
# every committed so far, and in a reasonable order.
20
# TODO: Also check non-mainline revisions mentioned as parents.
22
# TODO: Check for extra files in the control directory.
24
# TODO: Check revision, inventory and entry objects have all
27
# TODO: Get every revision in the revision-store even if they're not
28
# referenced by history and make sure they're all valid.
30
# TODO: Perhaps have a way to record errors other than by raising exceptions;
31
# would perhaps be enough to accumulate exception objects in a list without
32
# raising them. If there's more than one exception it'd be good to see them
35
"""Checking of bzr objects.
37
check_refs is a concept used for optimising check. Objects that depend on other
38
objects (e.g. tree on repository) can list the objects they would be requesting
39
so that when the dependent object is checked, matches can be pulled out and
40
evaluated in-line rather than re-reading the same data many times.
41
check_refs are tuples (kind, value). Currently defined kinds are:
42
* 'trees', where value is a revid and the looked up objects are revision trees.
43
* 'lefthand-distance', where value is a revid and the looked up objects are the
44
distance along the lefthand path to NULL for that revid.
45
* 'revision-existence', where value is a revid, and the result is True or False
46
indicating that the revision was found/not found.
49
from bzrlib import errors, osutils
50
from bzrlib import repository as _mod_repository
51
from bzrlib import revision
52
from bzrlib.branch import Branch
53
from bzrlib.bzrdir import BzrDir
54
from bzrlib.errors import BzrCheckError
55
from bzrlib.repository import Repository
56
from bzrlib.revision import NULL_REVISION
57
from bzrlib.symbol_versioning import deprecated_function, deprecated_in
58
from bzrlib.trace import note
19
from bzrlib.trace import note, warning
21
def _update_store_entry(obj, obj_id, branch, store_name, store):
22
"""This is just a meta-function, which handles both revision entries
23
and inventory entries.
25
from bzrlib.trace import mutter
26
import tempfile, os, errno
27
from osutils import rename
28
obj_tmp = tempfile.TemporaryFile()
29
obj.write_xml(obj_tmp)
32
tmpfd, tmp_path = tempfile.mkstemp(prefix=obj_id, suffix='.gz',
33
dir=branch.controlfilename(store_name))
36
orig_obj_path = branch.controlfilename([store_name, obj_id+'.gz'])
37
# Remove the old entry out of the way
38
rename(orig_obj_path, tmp_path)
60
from bzrlib.workingtree import WorkingTree
63
"""Check a repository"""
65
# The Check object interacts with InventoryEntry.check, etc.
67
def __init__(self, repository, check_repo=True):
68
self.repository = repository
69
self.checked_rev_cnt = 0
71
self.missing_parent_links = {}
72
self.missing_inventory_sha_cnt = 0
73
self.missing_revision_cnt = 0
74
self.checked_weaves = set()
75
self.unreferenced_versions = set()
76
self.inconsistent_parents = []
77
self.rich_roots = repository.supports_rich_root()
78
self.text_key_references = {}
79
self.check_repo = check_repo
80
self.other_results = []
81
# Plain text lines to include in the report
82
self._report_items = []
83
# Keys we are looking for; may be large and need spilling to disk.
84
# key->(type(revision/inventory/text/signature/map), sha1, first-referer)
85
self.pending_keys = {}
86
# Ancestors map for all of revisions being checked; while large helper
87
# functions we call would create it anyway, so better to have once and
91
def check(self, callback_refs=None, check_repo=True):
92
if callback_refs is None:
94
self.repository.lock_read()
95
self.progress = bzrlib.ui.ui_factory.nested_progress_bar()
40
# TODO: We may need to handle the case where the old
41
# entry was not compressed (and thus did not end with .gz)
43
store.add(obj_tmp, obj_id) # Add the new one
44
os.remove(tmp_path) # Remove the old name
45
mutter(' Updated %s entry {%s}' % (store_name, obj_id))
47
# On any exception, restore the old entry
48
rename(tmp_path, orig_obj_path)
51
if os.path.exists(tmp_path):
52
# Unfortunately, the next command might throw
53
# an exception, which will mask a previous exception.
57
def _update_revision_entry(rev, branch):
58
"""After updating the values in a revision, make sure to
59
write out the data, but try to do it in an atomic manner.
61
:param rev: The Revision object to store
62
:param branch: The Branch object where this Revision is to be stored.
64
_update_store_entry(rev, rev.revision_id, branch,
65
'revision-store', branch.revision_store)
67
def _update_inventory_entry(inv, inv_id, branch):
68
"""When an inventory has been modified (such as by adding a unique tree root)
69
this atomically re-generates the file.
71
:param inv: The Inventory
72
:param inv_id: The inventory id for this inventory
73
:param branch: The Branch where this entry will be stored.
75
raise NotImplementedError("can't update existing inventory entry")
79
"""Run consistency checks on a branch.
81
TODO: Also check non-mainline revisions mentioned as parents.
83
TODO: Check for extra files in the control directory.
85
from bzrlib.trace import mutter
86
from bzrlib.errors import BzrCheckError, NoSuchRevision
87
from bzrlib.osutils import fingerprint_file
88
from bzrlib.inventory import ROOT_ID
89
from bzrlib.branch import gen_root_id
96
missing_inventory_sha_cnt = 0
97
missing_revision_sha_cnt = 0
98
missing_revision_cnt = 0
100
history = branch.revision_history()
102
revcount = len(history)
105
# for all texts checked, text_id -> sha1
108
progress = bzrlib.ui.ui_factory.progress_bar()
110
for rev_id in history:
112
progress.update('checking revision', revno, revcount)
113
# mutter(' revision {%s}' % rev_id)
114
rev = branch.get_revision(rev_id)
115
if rev.revision_id != rev_id:
116
raise BzrCheckError('wrong internal revision id in revision {%s}'
119
# check the previous history entry is a parent of this entry
121
if last_rev_id is None:
122
raise BzrCheckError("revision {%s} has %d parents, but is the "
123
"start of the branch"
124
% (rev_id, len(rev.parents)))
125
for prr in rev.parents:
126
if prr.revision_id == last_rev_id:
129
raise BzrCheckError("previous revision {%s} not listed among "
131
% (last_rev_id, rev_id))
133
for prr in rev.parents:
134
if prr.revision_sha1 is None:
135
missing_revision_sha_cnt += 1
137
prid = prr.revision_id
140
actual_sha = branch.get_revision_sha1(prid)
141
except NoSuchRevision:
142
missing_revision_cnt += 1
143
mutter("parent {%s} of {%s} not present in store",
147
if prr.revision_sha1 != actual_sha:
148
raise BzrCheckError("mismatched revision sha1 for "
149
"parent {%s} of {%s}: %s vs %s"
151
prr.revision_sha1, actual_sha))
153
raise BzrCheckError("revision {%s} has no parents listed but preceded "
155
% (rev_id, last_rev_id))
157
if hasattr(rev, 'inventory_id') and rev.inventory_id != rev_id:
158
mismatch_inv_id.append(rev_id)
160
## TODO: Check all the required fields are present on the revision.
162
if rev.inventory_sha1:
163
inv_sha1 = branch.get_inventory_sha1(rev_id)
164
if inv_sha1 != rev.inventory_sha1:
165
raise BzrCheckError('Inventory sha1 hash doesn\'t match'
166
' value in revision {%s}' % rev_id)
168
missing_inventory_sha_cnt += 1
169
mutter("no inventory_sha1 on revision {%s}" % rev_id)
171
inv = branch.get_inventory(rev_id)
175
## p('revision %d/%d file ids' % (revno, revcount))
177
if file_id in seen_ids:
178
raise BzrCheckError('duplicated file_id {%s} '
179
'in inventory for revision {%s}'
181
seen_ids[file_id] = True
191
if ie.parent_id != None:
192
if ie.parent_id not in seen_ids:
193
raise BzrCheckError('missing parent {%s} in inventory for revision {%s}'
194
% (ie.parent_id, rev_id))
196
if ie.kind == 'file':
197
if ie.text_id in checked_texts:
198
fp = checked_texts[ie.text_id]
97
self.progress.update('check', 0, 4)
99
self.progress.update('checking revisions', 0)
100
self.check_revisions()
101
self.progress.update('checking commit contents', 1)
102
self.repository._check_inventories(self)
103
self.progress.update('checking file graphs', 2)
104
# check_weaves is done after the revision scan so that
105
# revision index is known to be valid.
107
self.progress.update('checking branches and trees', 3)
109
repo = self.repository
110
# calculate all refs, and callback the objects requesting them.
112
wanting_items = set()
113
# Current crude version calculates everything and calls
114
# everything at once. Doing a queue and popping as things are
115
# satisfied would be cheaper on memory [but few people have
116
# huge numbers of working trees today. TODO: fix before
120
for ref, wantlist in callback_refs.iteritems():
121
wanting_items.update(wantlist)
124
refs[ref] = repo.revision_tree(value)
125
elif kind == 'lefthand-distance':
127
elif kind == 'revision-existence':
128
existences.add(value)
200
if not ie.text_id in branch.text_store:
201
raise BzrCheckError('text {%s} not in text_store' % ie.text_id)
203
tf = branch.text_store[ie.text_id]
204
fp = fingerprint_file(tf)
205
checked_texts[ie.text_id] = fp
207
if ie.text_size != fp['size']:
208
raise BzrCheckError('text {%s} wrong size' % ie.text_id)
209
if ie.text_sha1 != fp['sha1']:
210
raise BzrCheckError('text {%s} wrong sha1' % ie.text_id)
211
elif ie.kind == 'directory':
212
if ie.text_sha1 != None or ie.text_size != None or ie.text_id != None:
213
raise BzrCheckError('directory {%s} has text in revision {%s}'
217
for path, ie in inv.iter_entries():
218
if path in seen_names:
219
raise BzrCheckError('duplicated path %s '
220
'in inventory for revision {%s}'
222
seen_names[path] = True
130
raise AssertionError(
131
'unknown ref kind for ref %s' % ref)
132
node_distances = repo.get_graph().find_lefthand_distances(distances)
133
for key, distance in node_distances.iteritems():
134
refs[('lefthand-distance', key)] = distance
135
if key in existences and distance > 0:
136
refs[('revision-existence', key)] = True
137
existences.remove(key)
138
parent_map = repo.get_graph().get_parent_map(existences)
139
for key in parent_map:
140
refs[('revision-existence', key)] = True
141
existences.remove(key)
142
for key in existences:
143
refs[('revision-existence', key)] = False
144
for item in wanting_items:
145
if isinstance(item, WorkingTree):
147
if isinstance(item, Branch):
148
self.other_results.append(item.check(refs))
150
self.progress.finished()
151
self.repository.unlock()
153
def _check_revisions(self, revisions_iterator):
154
"""Check revision objects by decorating a generator.
156
:param revisions_iterator: An iterator of(revid, Revision-or-None).
157
:return: A generator of the contents of revisions_iterator.
159
self.planned_revisions = set()
160
for revid, revision in revisions_iterator:
161
yield revid, revision
162
self._check_one_rev(revid, revision)
163
# Flatten the revisions we found to guarantee consistent later
165
self.planned_revisions = list(self.planned_revisions)
166
# TODO: extract digital signatures as items to callback on too.
168
def check_revisions(self):
169
"""Scan revisions, checking data directly available as we go."""
170
revision_iterator = self.repository._iter_revisions(None)
171
revision_iterator = self._check_revisions(revision_iterator)
172
# We read the all revisions here:
173
# - doing this allows later code to depend on the revision index.
174
# - we can fill out existence flags at this point
175
# - we can read the revision inventory sha at this point
176
# - we can check properties and serialisers etc.
177
if not self.repository.revision_graph_can_have_wrong_parents():
178
# The check against the index isn't needed.
179
self.revs_with_bad_parents_in_index = None
180
for thing in revision_iterator:
183
bad_revisions = self.repository._find_inconsistent_revision_parents(
185
self.revs_with_bad_parents_in_index = list(bad_revisions)
187
def report_results(self, verbose):
189
self._report_repo_results(verbose)
190
for result in self.other_results:
191
result.report_results(verbose)
193
def _report_repo_results(self, verbose):
194
note('checked repository %s format %s',
195
self.repository.bzrdir.root_transport,
196
self.repository._format)
197
note('%6d revisions', self.checked_rev_cnt)
198
note('%6d file-ids', len(self.checked_weaves))
200
note('%6d unreferenced text versions',
201
len(self.unreferenced_versions))
202
if verbose and len(self.unreferenced_versions):
203
for file_id, revision_id in self.unreferenced_versions:
204
note('unreferenced version: {%s} in %s', revision_id,
206
if self.missing_inventory_sha_cnt:
207
note('%6d revisions are missing inventory_sha1',
208
self.missing_inventory_sha_cnt)
209
if self.missing_revision_cnt:
210
note('%6d revisions are mentioned but not present',
211
self.missing_revision_cnt)
213
note('%6d ghost revisions', len(self.ghosts))
215
for ghost in self.ghosts:
217
if len(self.missing_parent_links):
218
note('%6d revisions missing parents in ancestry',
219
len(self.missing_parent_links))
221
for link, linkers in self.missing_parent_links.items():
222
note(' %s should be in the ancestry for:', link)
223
for linker in linkers:
224
note(' * %s', linker)
225
if len(self.inconsistent_parents):
226
note('%6d inconsistent parents', len(self.inconsistent_parents))
228
for info in self.inconsistent_parents:
229
revision_id, file_id, found_parents, correct_parents = info
230
note(' * %s version %s has parents %r '
232
% (file_id, revision_id, found_parents,
234
if self.revs_with_bad_parents_in_index:
235
note('%6d revisions have incorrect parents in the revision index',
236
len(self.revs_with_bad_parents_in_index))
238
for item in self.revs_with_bad_parents_in_index:
239
revision_id, index_parents, actual_parents = item
241
' %s has wrong parents in index: '
243
revision_id, index_parents, actual_parents)
244
for item in self._report_items:
247
def _check_one_rev(self, rev_id, rev):
248
"""Cross-check one revision.
250
:param rev_id: A revision id to check.
251
:param rev: A revision or None to indicate a missing revision.
253
if rev.revision_id != rev_id:
254
self._report_items.append(
255
'Mismatched internal revid {%s} and index revid {%s}' % (
256
rev.revision_id, rev_id))
257
rev_id = rev.revision_id
258
# Check this revision tree etc, and count as seen when we encounter a
260
self.planned_revisions.add(rev_id)
262
self.ghosts.discard(rev_id)
263
# Count all parents as ghosts if we haven't seen them yet.
264
for parent in rev.parent_ids:
265
if not parent in self.planned_revisions:
266
self.ghosts.add(parent)
268
self.ancestors[rev_id] = tuple(rev.parent_ids) or (NULL_REVISION,)
269
self.add_pending_item(rev_id, ('inventories', rev_id), 'inventory',
271
self.checked_rev_cnt += 1
273
def add_pending_item(self, referer, key, kind, sha1):
274
"""Add a reference to a sha1 to be cross checked against a key.
276
:param referer: The referer that expects key to have sha1.
277
:param key: A storage key e.g. ('texts', 'foo@bar-20040504-1234')
278
:param kind: revision/inventory/text/map/signature
279
:param sha1: A hex sha1 or None if no sha1 is known.
281
existing = self.pending_keys.get(key)
283
if sha1 != existing[1]:
284
self._report_items.append('Multiple expected sha1s for %s. {%s}'
285
' expects {%s}, {%s} expects {%s}', (
286
key, referer, sha1, existing[1], existing[0]))
288
self.pending_keys[key] = (kind, sha1, referer)
290
def check_weaves(self):
291
"""Check all the weaves we can get our hands on.
294
storebar = bzrlib.ui.ui_factory.nested_progress_bar()
296
self._check_weaves(storebar)
300
def _check_weaves(self, storebar):
301
storebar.update('text-index', 0, 2)
302
if self.repository._format.fast_deltas:
303
# We haven't considered every fileid instance so far.
304
weave_checker = self.repository._get_versioned_file_checker(
305
ancestors=self.ancestors)
307
weave_checker = self.repository._get_versioned_file_checker(
308
text_key_references=self.text_key_references,
309
ancestors=self.ancestors)
310
storebar.update('file-graph', 1)
311
result = weave_checker.check_file_version_parents(
312
self.repository.texts)
313
self.checked_weaves = weave_checker.file_ids
314
bad_parents, unused_versions = result
315
bad_parents = bad_parents.items()
316
for text_key, (stored_parents, correct_parents) in bad_parents:
317
# XXX not ready for id join/split operations.
318
weave_id = text_key[0]
319
revision_id = text_key[-1]
320
weave_parents = tuple([parent[-1] for parent in stored_parents])
321
correct_parents = tuple([parent[-1] for parent in correct_parents])
322
self.inconsistent_parents.append(
323
(revision_id, weave_id, weave_parents, correct_parents))
324
self.unreferenced_versions.update(unused_versions)
326
def _add_entry_to_text_key_references(self, inv, entry):
327
if not self.rich_roots and entry.name == '':
329
key = (entry.file_id, entry.revision)
330
self.text_key_references.setdefault(key, False)
331
if entry.revision == inv.revision_id:
332
self.text_key_references[key] = True
335
@deprecated_function(deprecated_in((1,6,0)))
336
def check(branch, verbose):
337
"""Run consistency checks on a branch.
339
Results are reported through logging.
341
Deprecated in 1.6. Please use check_dwim instead.
343
:raise BzrCheckError: if there's a consistency error.
345
check_branch(branch, verbose)
348
@deprecated_function(deprecated_in((1,16,0)))
349
def check_branch(branch, verbose):
350
"""Run consistency checks on a branch.
352
Results are reported through logging.
354
:raise BzrCheckError: if there's a consistency error.
359
for ref in branch._get_check_refs():
360
needed_refs.setdefault(ref, []).append(branch)
361
result = branch.repository.check([branch.last_revision()], needed_refs)
362
branch_result = result.other_results[0]
230
note('checked %d revisions, %d file texts' % (revcount, len(checked_texts)))
232
if missing_inventory_sha_cnt:
233
note('%d revisions are missing inventory_sha1' % missing_inventory_sha_cnt)
235
if missing_revision_sha_cnt:
236
note('%d parent links are missing revision_sha1' % missing_revision_sha_cnt)
238
if missing_revision_cnt:
239
note('%d revisions are mentioned but not present' % missing_revision_cnt)
241
if missing_revision_cnt:
242
print '%d revisions are mentioned but not present' % missing_revision_cnt
244
# stub this out for now because the main bzr branch has references
245
# to revisions that aren't present in the store -- mbp 20050804
246
# if (missing_inventory_sha_cnt
247
# or missing_revision_sha_cnt):
248
# print ' (use "bzr upgrade" to fix them)'
251
warning('%d revisions have mismatched inventory ids:' % len(mismatch_inv_id))
252
for rev_id in mismatch_inv_id:
253
warning(' %s', rev_id)
365
branch_result.report_results(verbose)
368
def scan_branch(branch, needed_refs, to_unlock):
369
"""Scan a branch for refs.
371
:param branch: The branch to schedule for checking.
372
:param needed_refs: Refs we are accumulating.
373
:param to_unlock: The unlock list accumulating.
375
note("Checking branch at '%s'." % (branch.base,))
377
to_unlock.append(branch)
378
branch_refs = branch._get_check_refs()
379
for ref in branch_refs:
380
reflist = needed_refs.setdefault(ref, [])
381
reflist.append(branch)
384
def scan_tree(base_tree, tree, needed_refs, to_unlock):
385
"""Scan a tree for refs.
387
:param base_tree: The original tree check opened, used to detect duplicate
389
:param tree: The tree to schedule for checking.
390
:param needed_refs: Refs we are accumulating.
391
:param to_unlock: The unlock list accumulating.
393
if base_tree is not None and tree.basedir == base_tree.basedir:
395
note("Checking working tree at '%s'." % (tree.basedir,))
397
to_unlock.append(tree)
398
tree_refs = tree._get_check_refs()
399
for ref in tree_refs:
400
reflist = needed_refs.setdefault(ref, [])
404
def check_dwim(path, verbose, do_branch=False, do_repo=False, do_tree=False):
405
"""Check multiple objects.
407
If errors occur they are accumulated and reported as far as possible, and
408
an exception raised at the end of the process.
411
base_tree, branch, repo, relpath = \
412
BzrDir.open_containing_tree_branch_or_repository(path)
413
except errors.NotBranchError:
414
base_tree = branch = repo = None
419
if base_tree is not None:
420
# If the tree is a lightweight checkout we won't see it in
421
# repo.find_branches - add now.
423
scan_tree(None, base_tree, needed_refs, to_unlock)
424
branch = base_tree.branch
425
if branch is not None:
428
# The branch is in a shared repository
429
repo = branch.repository
432
to_unlock.append(repo)
433
branches = repo.find_branches(using=True)
435
if do_branch or do_tree:
436
for branch in branches:
439
tree = branch.bzrdir.open_workingtree()
441
except (errors.NotLocalUrl, errors.NoWorkingTree):
444
scan_tree(base_tree, tree, needed_refs, to_unlock)
446
scan_branch(branch, needed_refs, to_unlock)
447
if do_branch and not branches:
448
note("No branch found at specified location.")
449
if do_tree and base_tree is None and not saw_tree:
450
note("No working tree found at specified location.")
451
if do_repo or do_branch or do_tree:
453
note("Checking repository at '%s'."
454
% (repo.bzrdir.root_transport.base,))
455
result = repo.check(None, callback_refs=needed_refs,
457
result.report_results(verbose)
460
note("No working tree found at specified location.")
462
note("No branch found at specified location.")
464
note("No repository found at specified location.")
466
for thing in to_unlock: