1
# Copyright (C) 2004, 2005 by Martin Pool
2
# Copyright (C) 2005 by Canonical Ltd
1
# Copyright (C) 2005, 2006 Canonical Ltd
4
3
# This program is free software; you can redistribute it and/or modify
5
4
# it under the terms of the GNU General Public License as published by
6
5
# the Free Software Foundation; either version 2 of the License, or
7
6
# (at your option) any later version.
9
8
# This program is distributed in the hope that it will be useful,
10
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
11
# GNU General Public License for more details.
14
13
# You should have received a copy of the GNU General Public License
15
14
# along with this program; if not, write to the Free Software
16
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
def _update_store_entry(obj, obj_id, branch, store_name, store):
20
"""This is just a meta-function, which handles both revision entries
21
and inventory entries.
23
from bzrlib.trace import mutter
24
import tempfile, os, errno
25
from osutils import rename
26
obj_tmp = tempfile.TemporaryFile()
27
obj.write_xml(obj_tmp)
30
tmpfd, tmp_path = tempfile.mkstemp(prefix=obj_id, suffix='.gz',
31
dir=branch.controlfilename(store_name))
34
orig_obj_path = branch.controlfilename([store_name, obj_id+'.gz'])
35
# Remove the old entry out of the way
36
rename(orig_obj_path, tmp_path)
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
# TODO: Check ancestries are correct for every revision: includes
18
# every committed so far, and in a reasonable order.
20
# TODO: Also check non-mainline revisions mentioned as parents.
22
# TODO: Check for extra files in the control directory.
24
# TODO: Check revision, inventory and entry objects have all
27
# TODO: Get every revision in the revision-store even if they're not
28
# referenced by history and make sure they're all valid.
30
# TODO: Perhaps have a way to record errors other than by raising exceptions;
31
# would perhaps be enough to accumulate exception objects in a list without
32
# raising them. If there's more than one exception it'd be good to see them
35
"""Checking of bzr objects.
37
check_refs is a concept used for optimising check. Objects that depend on other
38
objects (e.g. tree on repository) can list the objects they would be requesting
39
so that when the dependent object is checked, matches can be pulled out and
40
evaluated in-line rather than re-reading the same data many times.
41
check_refs are tuples (kind, value). Currently defined kinds are:
42
* 'trees', where value is a revid and the looked up objects are revision trees.
43
* 'lefthand-distance', where value is a revid and the looked up objects are the
44
distance along the lefthand path to NULL for that revid.
45
* 'revision-existence', where value is a revid, and the result is True or False
46
indicating that the revision was found/not found.
49
from bzrlib import errors, osutils
50
from bzrlib import repository as _mod_repository
51
from bzrlib import revision
52
from bzrlib.branch import Branch
53
from bzrlib.bzrdir import BzrDir
54
from bzrlib.errors import BzrCheckError
55
from bzrlib.repository import Repository
56
from bzrlib.revision import NULL_REVISION
57
from bzrlib.symbol_versioning import deprecated_function, deprecated_in
58
from bzrlib.trace import log_error, note
60
from bzrlib.workingtree import WorkingTree
63
"""Check a repository"""
65
# The Check object interacts with InventoryEntry.check, etc.
67
def __init__(self, repository, check_repo=True):
68
self.repository = repository
69
self.checked_text_cnt = 0
70
self.checked_rev_cnt = 0
72
self.repeated_text_cnt = 0
73
self.missing_parent_links = {}
74
self.missing_inventory_sha_cnt = 0
75
self.missing_revision_cnt = 0
76
# maps (file-id, version) -> sha1; used by InventoryFile._check
77
self.checked_texts = {}
78
self.checked_weaves = set()
79
self.unreferenced_versions = set()
80
self.inconsistent_parents = []
81
self.rich_roots = repository.supports_rich_root()
82
self.text_key_references = {}
83
self.check_repo = check_repo
84
self.other_results = []
85
# Plain text lines to include in the report
86
self._report_items = []
87
# Sha1 expectations; may be large and need spilling to disk.
88
# key->(sha1, first-referer)
89
self.expected_sha1 = {}
90
# Ancestors map for all of revisions being checked; while large helper
91
# functions we call would create it anyway, so better to have once and
95
def check(self, callback_refs=None, check_repo=True):
96
if callback_refs is None:
98
self.repository.lock_read()
99
self.progress = bzrlib.ui.ui_factory.nested_progress_bar()
38
# TODO: We may need to handle the case where the old
39
# entry was not compressed (and thus did not end with .gz)
41
store.add(obj_tmp, obj_id) # Add the new one
42
os.remove(tmp_path) # Remove the old name
43
mutter(' Updated %s entry {%s}' % (store_name, obj_id))
45
# On any exception, restore the old entry
46
rename(tmp_path, orig_obj_path)
49
if os.path.exists(tmp_path):
50
# Unfortunately, the next command might throw
51
# an exception, which will mask a previous exception.
55
def _update_revision_entry(rev, branch):
56
"""After updating the values in a revision, make sure to
57
write out the data, but try to do it in an atomic manner.
59
:param rev: The Revision object to store
60
:param branch: The Branch object where this Revision is to be stored.
62
_update_store_entry(rev, rev.revision_id, branch,
63
'revision-store', branch.revision_store)
65
def _update_inventory_entry(inv, inv_id, branch):
66
"""When an inventory has been modified (such as by adding a unique tree root)
67
this atomically re-generates the file.
69
:param inv: The Inventory
70
:param inv_id: The inventory id for this inventory
71
:param branch: The Branch where this entry will be stored.
73
_update_store_entry(inv, inv_id, branch,
74
'inventory-store', branch.inventory_store)
77
"""Run consistency checks on a branch.
79
TODO: Also check non-mainline revisions mentioned as parents.
81
TODO: Check for extra files in the control directory.
83
from bzrlib.trace import mutter
84
from bzrlib.errors import BzrCheckError
85
from bzrlib.osutils import fingerprint_file
86
from bzrlib.progress import ProgressBar
87
from bzrlib.inventory import ROOT_ID
88
from bzrlib.branch import gen_root_id
93
pb = ProgressBar(show_spinner=True)
96
missing_inventory_sha_cnt = 0
97
missing_revision_sha_cnt = 0
99
history = branch.revision_history()
101
revcount = len(history)
104
# for all texts checked, text_id -> sha1
107
for rev_id in history:
109
pb.update('checking revision', revno, revcount)
110
mutter(' revision {%s}' % rev_id)
111
rev = branch.get_revision(rev_id)
112
if rev.revision_id != rev_id:
113
raise BzrCheckError('wrong internal revision id in revision {%s}'
116
# check the previous history entry is a parent of this entry
118
if last_rev_id is None:
119
raise BzrCheckError("revision {%s} has %d parents, but is the "
120
"start of the branch"
121
% (rev_id, len(rev.parents)))
122
for prr in rev.parents:
123
if prr.revision_id == last_rev_id:
126
raise BzrCheckError("previous revision {%s} not listed among "
128
% (last_rev_id, rev_id))
130
for prr in rev.parents:
131
if prr.revision_sha1 is None:
132
missing_revision_sha_cnt += 1
134
prid = prr.revision_id
135
actual_sha = branch.get_revision_sha1(prid)
136
if prr.revision_sha1 != actual_sha:
137
raise BzrCheckError("mismatched revision sha1 for "
138
"parent {%s} of {%s}: %s vs %s"
140
prr.revision_sha1, actual_sha))
142
raise BzrCheckError("revision {%s} has no parents listed but preceded "
144
% (rev_id, last_rev_id))
146
if rev.inventory_id != rev_id:
147
mismatch_inv_id.append(rev_id)
149
## TODO: Check all the required fields are present on the revision.
151
if rev.inventory_sha1:
152
inv_sha1 = branch.get_inventory_sha1(rev.inventory_id)
153
if inv_sha1 != rev.inventory_sha1:
154
raise BzrCheckError('Inventory sha1 hash doesn\'t match'
155
' value in revision {%s}' % rev_id)
157
missing_inventory_sha_cnt += 1
158
mutter("no inventory_sha1 on revision {%s}" % rev_id)
160
inv = branch.get_inventory(rev.inventory_id)
164
## p('revision %d/%d file ids' % (revno, revcount))
166
if file_id in seen_ids:
167
raise BzrCheckError('duplicated file_id {%s} '
168
'in inventory for revision {%s}'
170
seen_ids[file_id] = True
180
if ie.parent_id != None:
181
if ie.parent_id not in seen_ids:
182
raise BzrCheckError('missing parent {%s} in inventory for revision {%s}'
183
% (ie.parent_id, rev_id))
185
if ie.kind == 'file':
186
if ie.text_id in checked_texts:
187
fp = checked_texts[ie.text_id]
102
self.progress.update('retrieving inventory', 0, 2)
103
# do not put in init, as it should be done with progess,
104
# and inside the lock.
105
self.inventory_weave = self.repository.inventories
106
self.progress.update('checking revision graph', 1)
107
self.check_revision_graph()
108
self.plan_revisions()
110
while revno < len(self.planned_revisions):
111
rev_id = self.planned_revisions[revno]
112
self.progress.update('checking revision', revno,
113
len(self.planned_revisions))
115
self._check_revision_tree(rev_id)
116
# check_weaves is done after the revision scan so that
117
# revision index is known to be valid.
120
repo = self.repository
121
# calculate all refs, and callback the objects requesting them.
123
wanting_items = set()
124
# Current crude version calculates everything and calls
125
# everything at once. Doing a queue and popping as things are
126
# satisfied would be cheaper on memory [but few people have
127
# huge numbers of working trees today. TODO: fix before
131
for ref, wantlist in callback_refs.iteritems():
132
wanting_items.update(wantlist)
135
refs[ref] = repo.revision_tree(value)
136
elif kind == 'lefthand-distance':
138
elif kind == 'revision-existence':
139
existences.add(value)
189
if not ie.text_id in branch.text_store:
190
raise BzrCheckError('text {%s} not in text_store' % ie.text_id)
192
tf = branch.text_store[ie.text_id]
193
fp = fingerprint_file(tf)
194
checked_texts[ie.text_id] = fp
196
if ie.text_size != fp['size']:
197
raise BzrCheckError('text {%s} wrong size' % ie.text_id)
198
if ie.text_sha1 != fp['sha1']:
199
raise BzrCheckError('text {%s} wrong sha1' % ie.text_id)
200
elif ie.kind == 'directory':
201
if ie.text_sha1 != None or ie.text_size != None or ie.text_id != None:
202
raise BzrCheckError('directory {%s} has text in revision {%s}'
206
for path, ie in inv.iter_entries():
207
if path in seen_names:
208
raise BzrCheckError('duplicated path %s '
209
'in inventory for revision {%s}'
211
seen_names[path] = True
141
raise AssertionError(
142
'unknown ref kind for ref %s' % ref)
143
node_distances = repo.get_graph().find_lefthand_distances(distances)
144
for key, distance in node_distances.iteritems():
145
refs[('lefthand-distance', key)] = distance
146
if key in existences and distance > 0:
147
refs[('revision-existence', key)] = True
148
existences.remove(key)
149
parent_map = repo.get_graph().get_parent_map(existences)
150
for key in parent_map:
151
refs[('revision-existence', key)] = True
152
existences.remove(key)
153
for key in existences:
154
refs[('revision-existence', key)] = False
155
for item in wanting_items:
156
if isinstance(item, WorkingTree):
158
if isinstance(item, Branch):
159
self.other_results.append(item.check(refs))
161
self.progress.finished()
162
self.repository.unlock()
164
def check_revisions(self, revisions_iterator):
165
"""Check revision objects by decorating a generator.
167
:param revisions_iterator: An iterator of(revid, Revision-or-None).
168
:return: A generator of the contents of revisions_iterator.
170
self.planned_revisions = set()
171
for revid, revision in revisions_iterator:
172
yield revid, revision
173
self._check_one_rev(revid, revision)
175
def check_revision_graph(self):
176
revision_iterator = self.repository._iter_revisions(None)
177
revision_iterator = self.check_revisions(revision_iterator)
178
# We read the all revisions here:
179
# - doing this allows later code to depend on the revision index.
180
# - we can fill out existence flags at this point
181
# - we can read the revision inventory sha at this point
182
# - we can check properties and serialisers etc.
183
if not self.repository.revision_graph_can_have_wrong_parents():
184
# The check against the index isn't needed.
185
self.revs_with_bad_parents_in_index = None
186
for thing in revision_iterator:
189
bad_revisions = self.repository._find_inconsistent_revision_parents(
191
self.revs_with_bad_parents_in_index = list(bad_revisions)
193
def plan_revisions(self):
194
repository = self.repository
195
self.planned_revisions = repository.all_revision_ids()
196
self.progress.clear()
197
inventoried = set(key[-1] for key in self.inventory_weave.keys())
198
awol = set(self.planned_revisions) - inventoried
200
raise BzrCheckError('Stored revisions missing from inventory'
201
'{%s}' % ','.join([f for f in awol]))
203
def report_results(self, verbose):
205
self._report_repo_results(verbose)
206
for result in self.other_results:
207
result.report_results(verbose)
209
def _report_repo_results(self, verbose):
210
note('checked repository %s format %s',
211
self.repository.bzrdir.root_transport,
212
self.repository._format)
213
note('%6d revisions', self.checked_rev_cnt)
214
note('%6d file-ids', len(self.checked_weaves))
215
note('%6d unique file texts', self.checked_text_cnt)
216
note('%6d repeated file texts', self.repeated_text_cnt)
218
note('%6d unreferenced text versions',
219
len(self.unreferenced_versions))
220
if verbose and len(self.unreferenced_versions):
221
for file_id, revision_id in self.unreferenced_versions:
222
log_error('unreferenced version: {%s} in %s', revision_id,
224
if self.missing_inventory_sha_cnt:
225
note('%6d revisions are missing inventory_sha1',
226
self.missing_inventory_sha_cnt)
227
if self.missing_revision_cnt:
228
note('%6d revisions are mentioned but not present',
229
self.missing_revision_cnt)
231
note('%6d ghost revisions', len(self.ghosts))
233
for ghost in self.ghosts:
235
if len(self.missing_parent_links):
236
note('%6d revisions missing parents in ancestry',
237
len(self.missing_parent_links))
239
for link, linkers in self.missing_parent_links.items():
240
note(' %s should be in the ancestry for:', link)
241
for linker in linkers:
242
note(' * %s', linker)
243
if len(self.inconsistent_parents):
244
note('%6d inconsistent parents', len(self.inconsistent_parents))
246
for info in self.inconsistent_parents:
247
revision_id, file_id, found_parents, correct_parents = info
248
note(' * %s version %s has parents %r '
250
% (file_id, revision_id, found_parents,
252
if self.revs_with_bad_parents_in_index:
253
note('%6d revisions have incorrect parents in the revision index',
254
len(self.revs_with_bad_parents_in_index))
256
for item in self.revs_with_bad_parents_in_index:
257
revision_id, index_parents, actual_parents = item
259
' %s has wrong parents in index: '
261
revision_id, index_parents, actual_parents)
262
for item in self._report_items:
265
def _check_one_rev(self, rev_id, rev):
266
"""Cross-check one revision.
268
:param rev_id: A revision id to check.
269
:param rev: A revision or None to indicate a missing revision.
271
if rev.revision_id != rev_id:
272
self._report_items.append(
273
'Mismatched internal revid {%s} and index revid {%s}' % (
274
rev.revision_id, rev_id))
275
rev_id = rev.revision_id
276
# Check this revision tree etc, and count as seen when we encounter a
278
self.planned_revisions.add(rev_id)
280
self.ghosts.discard(rev_id)
281
# Count all parents as ghosts if we haven't seen them yet.
282
for parent in rev.parent_ids:
283
if not parent in self.planned_revisions:
284
self.ghosts.add(parent)
286
self.ancestors[rev_id] = tuple(rev.parent_ids) or (NULL_REVISION,)
287
# If the revision has an inventory sha, we want to cross check it later.
288
if rev.inventory_sha1:
289
self.add_sha_check(rev_id, ('inventories', rev_id),
291
self.checked_rev_cnt += 1
293
def add_sha_check(self, referer, key, sha1):
294
"""Add a reference to a sha1 to be cross checked against a key.
296
:param referer: The referer that expects key to have sha1.
297
:param key: A storage key e.g. ('texts', 'foo@bar-20040504-1234')
298
:param sha1: A hex sha1.
300
existing = self.expected_sha1.get(key)
302
if sha1 != existing[0]:
303
self._report_items.append('Multiple expected sha1s for %s. {%s}'
304
' expects {%s}, {%s} expects {%s}', (
305
key, referer, sha1, existing[1], existing[0]))
307
self.expected_sha1[key] = (sha1, referer)
309
def check_weaves(self):
310
"""Check all the weaves we can get our hands on.
313
self.progress.update('checking inventory', 0, 2)
314
self.inventory_weave.check(progress_bar=self.progress)
315
self.progress.update('checking text storage', 1, 2)
316
self.repository.texts.check(progress_bar=self.progress)
317
weave_checker = self.repository._get_versioned_file_checker(
318
text_key_references=self.text_key_references,
319
ancestors=self.ancestors)
320
result = weave_checker.check_file_version_parents(
321
self.repository.texts, progress_bar=self.progress)
322
self.checked_weaves = weave_checker.file_ids
323
bad_parents, unused_versions = result
324
bad_parents = bad_parents.items()
325
for text_key, (stored_parents, correct_parents) in bad_parents:
326
# XXX not ready for id join/split operations.
327
weave_id = text_key[0]
328
revision_id = text_key[-1]
329
weave_parents = tuple([parent[-1] for parent in stored_parents])
330
correct_parents = tuple([parent[-1] for parent in correct_parents])
331
self.inconsistent_parents.append(
332
(revision_id, weave_id, weave_parents, correct_parents))
333
self.unreferenced_versions.update(unused_versions)
335
def _check_revision_tree(self, rev_id):
336
tree = self.repository.revision_tree(rev_id)
340
for path, ie in inv.iter_entries():
341
self._add_entry_to_text_key_references(inv, ie)
343
if file_id in seen_ids:
344
raise BzrCheckError('duplicated file_id {%s} '
345
'in inventory for revision {%s}'
347
seen_ids.add(file_id)
348
ie.check(self, rev_id, inv, tree)
349
if path in seen_names:
350
raise BzrCheckError('duplicated path %s '
351
'in inventory for revision {%s}'
355
def _add_entry_to_text_key_references(self, inv, entry):
356
if not self.rich_roots and entry == inv.root:
358
key = (entry.file_id, entry.revision)
359
self.text_key_references.setdefault(key, False)
360
if entry.revision == inv.revision_id:
361
self.text_key_references[key] = True
364
@deprecated_function(deprecated_in((1,6,0)))
365
def check(branch, verbose):
366
"""Run consistency checks on a branch.
368
Results are reported through logging.
370
Deprecated in 1.6. Please use check_dwim instead.
372
:raise BzrCheckError: if there's a consistency error.
374
check_branch(branch, verbose)
377
@deprecated_function(deprecated_in((1,16,0)))
378
def check_branch(branch, verbose):
379
"""Run consistency checks on a branch.
381
Results are reported through logging.
383
:raise BzrCheckError: if there's a consistency error.
388
for ref in branch._get_check_refs():
389
needed_refs.setdefault(ref, []).append(branch)
390
result = branch.repository.check([branch.last_revision()], needed_refs)
391
branch_result = result.other_results[0]
219
print 'checked %d revisions, %d file texts' % (revcount, len(checked_texts))
221
if missing_inventory_sha_cnt:
222
print '%d revisions are missing inventory_sha1' % missing_inventory_sha_cnt
224
if missing_revision_sha_cnt:
225
print '%d parent links are missing revision_sha1' % missing_revision_sha_cnt
227
# stub this out for now because the main bzr branch has references
228
# to revisions that aren't present in the store -- mbp 20050804
229
# if (missing_inventory_sha_cnt
230
# or missing_revision_sha_cnt):
231
# print ' (use "bzr upgrade" to fix them)'
234
print '%d revisions have mismatched inventory ids:' % len(mismatch_inv_id)
235
for rev_id in mismatch_inv_id:
394
branch_result.report_results(verbose)
397
def scan_branch(branch, needed_refs, to_unlock):
398
"""Scan a branch for refs.
400
:param branch: The branch to schedule for checking.
401
:param needed_refs: Refs we are accumulating.
402
:param to_unlock: The unlock list accumulating.
404
note("Checking branch at '%s'." % (branch.base,))
406
to_unlock.append(branch)
407
branch_refs = branch._get_check_refs()
408
for ref in branch_refs:
409
reflist = needed_refs.setdefault(ref, [])
410
reflist.append(branch)
413
def scan_tree(base_tree, tree, needed_refs, to_unlock):
414
"""Scan a tree for refs.
416
:param base_tree: The original tree check opened, used to detect duplicate
418
:param tree: The tree to schedule for checking.
419
:param needed_refs: Refs we are accumulating.
420
:param to_unlock: The unlock list accumulating.
422
if base_tree is not None and tree.basedir == base_tree.basedir:
424
note("Checking working tree at '%s'." % (tree.basedir,))
426
to_unlock.append(tree)
427
tree_refs = tree._get_check_refs()
428
for ref in tree_refs:
429
reflist = needed_refs.setdefault(ref, [])
433
def check_dwim(path, verbose, do_branch=False, do_repo=False, do_tree=False):
435
base_tree, branch, repo, relpath = \
436
BzrDir.open_containing_tree_branch_or_repository(path)
437
except errors.NotBranchError:
438
base_tree = branch = repo = None
443
if base_tree is not None:
444
# If the tree is a lightweight checkout we won't see it in
445
# repo.find_branches - add now.
447
scan_tree(None, base_tree, needed_refs, to_unlock)
448
branch = base_tree.branch
449
if branch is not None:
452
# The branch is in a shared repository
453
repo = branch.repository
456
to_unlock.append(repo)
457
branches = repo.find_branches(using=True)
459
if do_branch or do_tree:
460
for branch in branches:
463
tree = branch.bzrdir.open_workingtree()
465
except (errors.NotLocalUrl, errors.NoWorkingTree):
468
scan_tree(base_tree, tree, needed_refs, to_unlock)
470
scan_branch(branch, needed_refs, to_unlock)
471
if do_branch and not branches:
472
log_error("No branch found at specified location.")
473
if do_tree and base_tree is None and not saw_tree:
474
log_error("No working tree found at specified location.")
475
if do_repo or do_branch or do_tree:
477
note("Checking repository at '%s'."
478
% (repo.bzrdir.root_transport.base,))
479
result = repo.check(None, callback_refs=needed_refs,
481
result.report_results(verbose)
484
log_error("No working tree found at specified location.")
486
log_error("No branch found at specified location.")
488
log_error("No repository found at specified location.")
490
for thing in to_unlock: