1
# Copyright (C) 2004, 2005 by Martin Pool
2
# Copyright (C) 2005 by Canonical Ltd
1
# Copyright (C) 2005, 2006 Canonical Ltd
4
3
# This program is free software; you can redistribute it and/or modify
5
4
# it under the terms of the GNU General Public License as published by
6
5
# the Free Software Foundation; either version 2 of the License, or
7
6
# (at your option) any later version.
9
8
# This program is distributed in the hope that it will be useful,
10
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
11
# GNU General Public License for more details.
14
13
# You should have received a copy of the GNU General Public License
15
14
# along with this program; if not, write to the Free Software
16
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
def _update_store_entry(obj, obj_id, branch, store_name, store):
20
"""This is just a meta-function, which handles both revision entries
21
and inventory entries.
23
from bzrlib.trace import mutter
24
import tempfile, os, errno
25
from osutils import rename
26
obj_tmp = tempfile.TemporaryFile()
27
obj.write_xml(obj_tmp)
30
tmpfd, tmp_path = tempfile.mkstemp(prefix=obj_id, suffix='.gz',
31
dir=branch.controlfilename(store_name))
34
orig_obj_path = branch.controlfilename([store_name, obj_id+'.gz'])
35
# Remove the old entry out of the way
36
rename(orig_obj_path, tmp_path)
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
# TODO: Check ancestries are correct for every revision: includes
18
# every committed so far, and in a reasonable order.
20
# TODO: Also check non-mainline revisions mentioned as parents.
22
# TODO: Check for extra files in the control directory.
24
# TODO: Check revision, inventory and entry objects have all
27
# TODO: Get every revision in the revision-store even if they're not
28
# referenced by history and make sure they're all valid.
30
# TODO: Perhaps have a way to record errors other than by raising exceptions;
31
# would perhaps be enough to accumulate exception objects in a list without
32
# raising them. If there's more than one exception it'd be good to see them
35
"""Checking of bzr objects.
37
check_refs is a concept used for optimising check. Objects that depend on other
38
objects (e.g. tree on repository) can list the objects they would be requesting
39
so that when the dependent object is checked, matches can be pulled out and
40
evaluated in-line rather than re-reading the same data many times.
41
check_refs are tuples (kind, value). Currently defined kinds are:
42
* 'trees', where value is a revid and the looked up objects are revision trees.
43
* 'lefthand-distance', where value is a revid and the looked up objects are the
44
distance along the lefthand path to NULL for that revid.
45
* 'revision-existence', where value is a revid, and the result is True or False
46
indicating that the revision was found/not found.
49
from bzrlib import errors, osutils
50
from bzrlib import repository as _mod_repository
51
from bzrlib import revision
52
from bzrlib.branch import Branch
53
from bzrlib.bzrdir import BzrDir
54
from bzrlib.errors import BzrCheckError
55
from bzrlib.repository import Repository
56
from bzrlib.revision import NULL_REVISION
57
from bzrlib.symbol_versioning import deprecated_function, deprecated_in
58
from bzrlib.trace import log_error, note
60
from bzrlib.workingtree import WorkingTree
63
"""Check a repository"""
65
# The Check object interacts with InventoryEntry.check, etc.
67
def __init__(self, repository, check_repo=True):
68
self.repository = repository
69
self.checked_rev_cnt = 0
71
self.missing_parent_links = {}
72
self.missing_inventory_sha_cnt = 0
73
self.missing_revision_cnt = 0
74
self.checked_weaves = set()
75
self.unreferenced_versions = set()
76
self.inconsistent_parents = []
77
self.rich_roots = repository.supports_rich_root()
78
self.text_key_references = {}
79
self.check_repo = check_repo
80
self.other_results = []
81
# Plain text lines to include in the report
82
self._report_items = []
83
# Keys we are looking for; may be large and need spilling to disk.
84
# key->(type(revision/inventory/text/signature/map), sha1, first-referer)
85
self.pending_keys = {}
86
# Ancestors map for all of revisions being checked; while large helper
87
# functions we call would create it anyway, so better to have once and
91
def check(self, callback_refs=None, check_repo=True):
92
if callback_refs is None:
94
self.repository.lock_read()
95
self.progress = bzrlib.ui.ui_factory.nested_progress_bar()
38
# TODO: We may need to handle the case where the old
39
# entry was not compressed (and thus did not end with .gz)
41
store.add(obj_tmp, obj_id) # Add the new one
42
os.remove(tmp_path) # Remove the old name
43
mutter(' Updated %s entry {%s}' % (store_name, obj_id))
45
# On any exception, restore the old entry
46
rename(tmp_path, orig_obj_path)
49
if os.path.exists(tmp_path):
50
# Unfortunately, the next command might throw
51
# an exception, which will mask a previous exception.
55
def _update_revision_entry(rev, branch):
56
"""After updating the values in a revision, make sure to
57
write out the data, but try to do it in an atomic manner.
59
:param rev: The Revision object to store
60
:param branch: The Branch object where this Revision is to be stored.
62
_update_store_entry(rev, rev.revision_id, branch,
63
'revision-store', branch.revision_store)
65
def _update_inventory_entry(inv, inv_id, branch):
66
"""When an inventory has been modified (such as by adding a unique tree root)
67
this atomically re-generates the file.
69
:param inv: The Inventory
70
:param inv_id: The inventory id for this inventory
71
:param branch: The Branch where this entry will be stored.
73
_update_store_entry(inv, inv_id, branch,
74
'inventory-store', branch.inventory_store)
77
"""Run consistency checks on a branch.
79
TODO: Also check non-mainline revisions mentioned as parents.
81
TODO: Check for extra files in the control directory.
83
from bzrlib.trace import mutter
84
from bzrlib.errors import BzrCheckError
85
from bzrlib.osutils import fingerprint_file
86
from bzrlib.progress import ProgressBar
87
from bzrlib.inventory import ROOT_ID
88
from bzrlib.branch import gen_root_id
93
pb = ProgressBar(show_spinner=True)
96
missing_inventory_sha_cnt = 0
97
missing_revision_sha_cnt = 0
99
history = branch.revision_history()
101
revcount = len(history)
104
# for all texts checked, text_id -> sha1
107
for rev_id in history:
109
pb.update('checking revision', revno, revcount)
110
mutter(' revision {%s}' % rev_id)
111
rev = branch.get_revision(rev_id)
112
if rev.revision_id != rev_id:
113
raise BzrCheckError('wrong internal revision id in revision {%s}'
116
# check the previous history entry is a parent of this entry
118
if last_rev_id is None:
119
raise BzrCheckError("revision {%s} has %d parents, but is the "
120
"start of the branch"
121
% (rev_id, len(rev.parents)))
122
for prr in rev.parents:
123
if prr.revision_id == last_rev_id:
126
raise BzrCheckError("previous revision {%s} not listed among "
128
% (last_rev_id, rev_id))
130
for prr in rev.parents:
131
if prr.revision_sha1 is None:
132
missing_revision_sha_cnt += 1
134
prid = prr.revision_id
135
actual_sha = branch.get_revision_sha1(prid)
136
if prr.revision_sha1 != actual_sha:
137
raise BzrCheckError("mismatched revision sha1 for "
138
"parent {%s} of {%s}: %s vs %s"
140
prr.revision_sha1, actual_sha))
142
raise BzrCheckError("revision {%s} has no parents listed but preceded "
144
% (rev_id, last_rev_id))
146
if rev.inventory_id != rev_id:
147
mismatch_inv_id.append(rev_id)
149
## TODO: Check all the required fields are present on the revision.
151
if rev.inventory_sha1:
152
inv_sha1 = branch.get_inventory_sha1(rev.inventory_id)
153
if inv_sha1 != rev.inventory_sha1:
154
raise BzrCheckError('Inventory sha1 hash doesn\'t match'
155
' value in revision {%s}' % rev_id)
157
missing_inventory_sha_cnt += 1
158
mutter("no inventory_sha1 on revision {%s}" % rev_id)
160
inv = branch.get_inventory(rev.inventory_id)
164
## p('revision %d/%d file ids' % (revno, revcount))
166
if file_id in seen_ids:
167
raise BzrCheckError('duplicated file_id {%s} '
168
'in inventory for revision {%s}'
170
seen_ids[file_id] = True
180
if ie.parent_id != None:
181
if ie.parent_id not in seen_ids:
182
raise BzrCheckError('missing parent {%s} in inventory for revision {%s}'
183
% (ie.parent_id, rev_id))
185
if ie.kind == 'file':
186
if ie.text_id in checked_texts:
187
fp = checked_texts[ie.text_id]
97
self.progress.update('check', 0, 4)
99
self.progress.update('checking revisions', 0)
100
self.check_revisions()
101
self.progress.update('checking commit contents', 1)
102
self.repository._check_inventories(self)
103
self.progress.update('checking file graphs', 2)
104
# check_weaves is done after the revision scan so that
105
# revision index is known to be valid.
107
self.progress.update('checking branches and trees', 3)
109
repo = self.repository
110
# calculate all refs, and callback the objects requesting them.
112
wanting_items = set()
113
# Current crude version calculates everything and calls
114
# everything at once. Doing a queue and popping as things are
115
# satisfied would be cheaper on memory [but few people have
116
# huge numbers of working trees today. TODO: fix before
120
for ref, wantlist in callback_refs.iteritems():
121
wanting_items.update(wantlist)
124
refs[ref] = repo.revision_tree(value)
125
elif kind == 'lefthand-distance':
127
elif kind == 'revision-existence':
128
existences.add(value)
189
if not ie.text_id in branch.text_store:
190
raise BzrCheckError('text {%s} not in text_store' % ie.text_id)
192
tf = branch.text_store[ie.text_id]
193
fp = fingerprint_file(tf)
194
checked_texts[ie.text_id] = fp
196
if ie.text_size != fp['size']:
197
raise BzrCheckError('text {%s} wrong size' % ie.text_id)
198
if ie.text_sha1 != fp['sha1']:
199
raise BzrCheckError('text {%s} wrong sha1' % ie.text_id)
200
elif ie.kind == 'directory':
201
if ie.text_sha1 != None or ie.text_size != None or ie.text_id != None:
202
raise BzrCheckError('directory {%s} has text in revision {%s}'
206
for path, ie in inv.iter_entries():
207
if path in seen_names:
208
raise BzrCheckError('duplicated path %s '
209
'in inventory for revision {%s}'
211
seen_names[path] = True
130
raise AssertionError(
131
'unknown ref kind for ref %s' % ref)
132
node_distances = repo.get_graph().find_lefthand_distances(distances)
133
for key, distance in node_distances.iteritems():
134
refs[('lefthand-distance', key)] = distance
135
if key in existences and distance > 0:
136
refs[('revision-existence', key)] = True
137
existences.remove(key)
138
parent_map = repo.get_graph().get_parent_map(existences)
139
for key in parent_map:
140
refs[('revision-existence', key)] = True
141
existences.remove(key)
142
for key in existences:
143
refs[('revision-existence', key)] = False
144
for item in wanting_items:
145
if isinstance(item, WorkingTree):
147
if isinstance(item, Branch):
148
self.other_results.append(item.check(refs))
150
self.progress.finished()
151
self.repository.unlock()
153
def _check_revisions(self, revisions_iterator):
154
"""Check revision objects by decorating a generator.
156
:param revisions_iterator: An iterator of(revid, Revision-or-None).
157
:return: A generator of the contents of revisions_iterator.
159
self.planned_revisions = set()
160
for revid, revision in revisions_iterator:
161
yield revid, revision
162
self._check_one_rev(revid, revision)
163
# Flatten the revisions we found to guarantee consistent later
165
self.planned_revisions = list(self.planned_revisions)
166
# TODO: extract digital signatures as items to callback on too.
168
def check_revisions(self):
169
"""Scan revisions, checking data directly available as we go."""
170
revision_iterator = self.repository._iter_revisions(None)
171
revision_iterator = self._check_revisions(revision_iterator)
172
# We read the all revisions here:
173
# - doing this allows later code to depend on the revision index.
174
# - we can fill out existence flags at this point
175
# - we can read the revision inventory sha at this point
176
# - we can check properties and serialisers etc.
177
if not self.repository.revision_graph_can_have_wrong_parents():
178
# The check against the index isn't needed.
179
self.revs_with_bad_parents_in_index = None
180
for thing in revision_iterator:
183
bad_revisions = self.repository._find_inconsistent_revision_parents(
185
self.revs_with_bad_parents_in_index = list(bad_revisions)
187
def report_results(self, verbose):
189
self._report_repo_results(verbose)
190
for result in self.other_results:
191
result.report_results(verbose)
193
def _report_repo_results(self, verbose):
194
note('checked repository %s format %s',
195
self.repository.bzrdir.root_transport,
196
self.repository._format)
197
note('%6d revisions', self.checked_rev_cnt)
198
note('%6d file-ids', len(self.checked_weaves))
200
note('%6d unreferenced text versions',
201
len(self.unreferenced_versions))
202
if verbose and len(self.unreferenced_versions):
203
for file_id, revision_id in self.unreferenced_versions:
204
log_error('unreferenced version: {%s} in %s', revision_id,
206
if self.missing_inventory_sha_cnt:
207
note('%6d revisions are missing inventory_sha1',
208
self.missing_inventory_sha_cnt)
209
if self.missing_revision_cnt:
210
note('%6d revisions are mentioned but not present',
211
self.missing_revision_cnt)
213
note('%6d ghost revisions', len(self.ghosts))
215
for ghost in self.ghosts:
217
if len(self.missing_parent_links):
218
note('%6d revisions missing parents in ancestry',
219
len(self.missing_parent_links))
221
for link, linkers in self.missing_parent_links.items():
222
note(' %s should be in the ancestry for:', link)
223
for linker in linkers:
224
note(' * %s', linker)
225
if len(self.inconsistent_parents):
226
note('%6d inconsistent parents', len(self.inconsistent_parents))
228
for info in self.inconsistent_parents:
229
revision_id, file_id, found_parents, correct_parents = info
230
note(' * %s version %s has parents %r '
232
% (file_id, revision_id, found_parents,
234
if self.revs_with_bad_parents_in_index:
235
note('%6d revisions have incorrect parents in the revision index',
236
len(self.revs_with_bad_parents_in_index))
238
for item in self.revs_with_bad_parents_in_index:
239
revision_id, index_parents, actual_parents = item
241
' %s has wrong parents in index: '
243
revision_id, index_parents, actual_parents)
244
for item in self._report_items:
247
def _check_one_rev(self, rev_id, rev):
248
"""Cross-check one revision.
250
:param rev_id: A revision id to check.
251
:param rev: A revision or None to indicate a missing revision.
253
if rev.revision_id != rev_id:
254
self._report_items.append(
255
'Mismatched internal revid {%s} and index revid {%s}' % (
256
rev.revision_id, rev_id))
257
rev_id = rev.revision_id
258
# Check this revision tree etc, and count as seen when we encounter a
260
self.planned_revisions.add(rev_id)
262
self.ghosts.discard(rev_id)
263
# Count all parents as ghosts if we haven't seen them yet.
264
for parent in rev.parent_ids:
265
if not parent in self.planned_revisions:
266
self.ghosts.add(parent)
268
self.ancestors[rev_id] = tuple(rev.parent_ids) or (NULL_REVISION,)
269
self.add_pending_item(rev_id, ('inventories', rev_id), 'inventory',
271
self.checked_rev_cnt += 1
273
def add_pending_item(self, referer, key, kind, sha1):
274
"""Add a reference to a sha1 to be cross checked against a key.
276
:param referer: The referer that expects key to have sha1.
277
:param key: A storage key e.g. ('texts', 'foo@bar-20040504-1234')
278
:param kind: revision/inventory/text/map/signature
279
:param sha1: A hex sha1 or None if no sha1 is known.
281
existing = self.pending_keys.get(key)
283
if sha1 != existing[1]:
284
self._report_items.append('Multiple expected sha1s for %s. {%s}'
285
' expects {%s}, {%s} expects {%s}', (
286
key, referer, sha1, existing[1], existing[0]))
288
self.pending_keys[key] = (kind, sha1, referer)
290
def check_weaves(self):
291
"""Check all the weaves we can get our hands on.
294
storebar = bzrlib.ui.ui_factory.nested_progress_bar()
296
self._check_weaves(storebar)
300
def _check_weaves(self, storebar):
301
storebar.update('text-index', 0, 2)
302
if self.repository._format.fast_deltas:
303
# We haven't considered every fileid instance so far.
304
weave_checker = self.repository._get_versioned_file_checker(
305
ancestors=self.ancestors)
307
weave_checker = self.repository._get_versioned_file_checker(
308
text_key_references=self.text_key_references,
309
ancestors=self.ancestors)
310
storebar.update('file-graph', 1)
311
result = weave_checker.check_file_version_parents(
312
self.repository.texts)
313
self.checked_weaves = weave_checker.file_ids
314
bad_parents, unused_versions = result
315
bad_parents = bad_parents.items()
316
for text_key, (stored_parents, correct_parents) in bad_parents:
317
# XXX not ready for id join/split operations.
318
weave_id = text_key[0]
319
revision_id = text_key[-1]
320
weave_parents = tuple([parent[-1] for parent in stored_parents])
321
correct_parents = tuple([parent[-1] for parent in correct_parents])
322
self.inconsistent_parents.append(
323
(revision_id, weave_id, weave_parents, correct_parents))
324
self.unreferenced_versions.update(unused_versions)
326
def _add_entry_to_text_key_references(self, inv, entry):
327
if not self.rich_roots and entry.name == '':
329
key = (entry.file_id, entry.revision)
330
self.text_key_references.setdefault(key, False)
331
if entry.revision == inv.revision_id:
332
self.text_key_references[key] = True
335
@deprecated_function(deprecated_in((1,6,0)))
336
def check(branch, verbose):
337
"""Run consistency checks on a branch.
339
Results are reported through logging.
341
Deprecated in 1.6. Please use check_dwim instead.
343
:raise BzrCheckError: if there's a consistency error.
345
check_branch(branch, verbose)
348
@deprecated_function(deprecated_in((1,16,0)))
349
def check_branch(branch, verbose):
350
"""Run consistency checks on a branch.
352
Results are reported through logging.
354
:raise BzrCheckError: if there's a consistency error.
359
for ref in branch._get_check_refs():
360
needed_refs.setdefault(ref, []).append(branch)
361
result = branch.repository.check([branch.last_revision()], needed_refs)
362
branch_result = result.other_results[0]
219
print 'checked %d revisions, %d file texts' % (revcount, len(checked_texts))
221
if missing_inventory_sha_cnt:
222
print '%d revisions are missing inventory_sha1' % missing_inventory_sha_cnt
224
if missing_revision_sha_cnt:
225
print '%d parent links are missing revision_sha1' % missing_revision_sha_cnt
227
# stub this out for now because the main bzr branch has references
228
# to revisions that aren't present in the store -- mbp 20050804
229
# if (missing_inventory_sha_cnt
230
# or missing_revision_sha_cnt):
231
# print ' (use "bzr upgrade" to fix them)'
234
print '%d revisions have mismatched inventory ids:' % len(mismatch_inv_id)
235
for rev_id in mismatch_inv_id:
365
branch_result.report_results(verbose)
368
def scan_branch(branch, needed_refs, to_unlock):
369
"""Scan a branch for refs.
371
:param branch: The branch to schedule for checking.
372
:param needed_refs: Refs we are accumulating.
373
:param to_unlock: The unlock list accumulating.
375
note("Checking branch at '%s'." % (branch.base,))
377
to_unlock.append(branch)
378
branch_refs = branch._get_check_refs()
379
for ref in branch_refs:
380
reflist = needed_refs.setdefault(ref, [])
381
reflist.append(branch)
384
def scan_tree(base_tree, tree, needed_refs, to_unlock):
385
"""Scan a tree for refs.
387
:param base_tree: The original tree check opened, used to detect duplicate
389
:param tree: The tree to schedule for checking.
390
:param needed_refs: Refs we are accumulating.
391
:param to_unlock: The unlock list accumulating.
393
if base_tree is not None and tree.basedir == base_tree.basedir:
395
note("Checking working tree at '%s'." % (tree.basedir,))
397
to_unlock.append(tree)
398
tree_refs = tree._get_check_refs()
399
for ref in tree_refs:
400
reflist = needed_refs.setdefault(ref, [])
404
def check_dwim(path, verbose, do_branch=False, do_repo=False, do_tree=False):
405
"""Check multiple objects.
407
If errors occur they are accumulated and reported as far as possible, and
408
an exception raised at the end of the process.
411
base_tree, branch, repo, relpath = \
412
BzrDir.open_containing_tree_branch_or_repository(path)
413
except errors.NotBranchError:
414
base_tree = branch = repo = None
419
if base_tree is not None:
420
# If the tree is a lightweight checkout we won't see it in
421
# repo.find_branches - add now.
423
scan_tree(None, base_tree, needed_refs, to_unlock)
424
branch = base_tree.branch
425
if branch is not None:
428
# The branch is in a shared repository
429
repo = branch.repository
432
to_unlock.append(repo)
433
branches = repo.find_branches(using=True)
435
if do_branch or do_tree:
436
for branch in branches:
439
tree = branch.bzrdir.open_workingtree()
441
except (errors.NotLocalUrl, errors.NoWorkingTree):
444
scan_tree(base_tree, tree, needed_refs, to_unlock)
446
scan_branch(branch, needed_refs, to_unlock)
447
if do_branch and not branches:
448
log_error("No branch found at specified location.")
449
if do_tree and base_tree is None and not saw_tree:
450
log_error("No working tree found at specified location.")
451
if do_repo or do_branch or do_tree:
453
note("Checking repository at '%s'."
454
% (repo.bzrdir.root_transport.base,))
455
result = repo.check(None, callback_refs=needed_refs,
457
result.report_results(verbose)
460
log_error("No working tree found at specified location.")
462
log_error("No branch found at specified location.")
464
log_error("No repository found at specified location.")
466
for thing in to_unlock: