1
# Copyright (C) 2005, 2006 Canonical Ltd
4
# Copyright (C) 2004, 2005 by Martin Pool
5
# Copyright (C) 2005 by Canonical Ltd
3
8
# This program is free software; you can redistribute it and/or modify
4
9
# it under the terms of the GNU General Public License as published by
5
10
# the Free Software Foundation; either version 2 of the License, or
6
11
# (at your option) any later version.
8
13
# This program is distributed in the hope that it will be useful,
9
14
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
15
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
16
# GNU General Public License for more details.
13
18
# You should have received a copy of the GNU General Public License
14
19
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
# TODO: Check ancestries are correct for every revision: includes
18
# every committed so far, and in a reasonable order.
20
# TODO: Also check non-mainline revisions mentioned as parents.
22
# TODO: Check for extra files in the control directory.
24
# TODO: Check revision, inventory and entry objects have all
27
# TODO: Get every revision in the revision-store even if they're not
28
# referenced by history and make sure they're all valid.
30
# TODO: Perhaps have a way to record errors other than by raising exceptions;
31
# would perhaps be enough to accumulate exception objects in a list without
32
# raising them. If there's more than one exception it'd be good to see them
35
from bzrlib import errors, osutils
36
from bzrlib import repository as _mod_repository
37
from bzrlib import revision
38
from bzrlib.branch import Branch
39
from bzrlib.bzrdir import BzrDir
40
from bzrlib.errors import BzrCheckError
41
from bzrlib.repository import Repository
42
from bzrlib.symbol_versioning import deprecated_function, deprecated_in
43
from bzrlib.trace import log_error, note
45
from bzrlib.workingtree import WorkingTree
48
"""Check a repository"""
50
# The Check object interacts with InventoryEntry.check, etc.
52
def __init__(self, repository):
53
self.repository = repository
54
self.checked_text_cnt = 0
55
self.checked_rev_cnt = 0
57
self.repeated_text_cnt = 0
58
self.missing_parent_links = {}
59
self.missing_inventory_sha_cnt = 0
60
self.missing_revision_cnt = 0
61
# maps (file-id, version) -> sha1; used by InventoryFile._check
62
self.checked_texts = {}
63
self.checked_weaves = set()
64
self.unreferenced_versions = set()
65
self.inconsistent_parents = []
66
self.rich_roots = repository.supports_rich_root()
67
self.text_key_references = {}
70
self.repository.lock_read()
71
self.progress = bzrlib.ui.ui_factory.nested_progress_bar()
73
self.progress.update('retrieving inventory', 0, 2)
74
# do not put in init, as it should be done with progess,
75
# and inside the lock.
76
self.inventory_weave = self.repository.inventories
77
self.progress.update('checking revision graph', 1)
78
self.check_revision_graph()
81
while revno < len(self.planned_revisions):
82
rev_id = self.planned_revisions[revno]
83
self.progress.update('checking revision', revno,
84
len(self.planned_revisions))
86
self.check_one_rev(rev_id)
87
# check_weaves is done after the revision scan so that
88
# revision index is known to be valid.
91
self.progress.finished()
92
self.repository.unlock()
94
def check_revision_graph(self):
95
if not self.repository.revision_graph_can_have_wrong_parents():
96
# This check is not necessary.
97
self.revs_with_bad_parents_in_index = None
99
bad_revisions = self.repository._find_inconsistent_revision_parents()
100
self.revs_with_bad_parents_in_index = list(bad_revisions)
102
def plan_revisions(self):
103
repository = self.repository
104
self.planned_revisions = repository.all_revision_ids()
105
self.progress.clear()
106
inventoried = set(key[-1] for key in self.inventory_weave.keys())
107
awol = set(self.planned_revisions) - inventoried
109
raise BzrCheckError('Stored revisions missing from inventory'
110
'{%s}' % ','.join([f for f in awol]))
112
def report_results(self, verbose):
113
note('checked repository %s format %s',
114
self.repository.bzrdir.root_transport,
115
self.repository._format)
116
note('%6d revisions', self.checked_rev_cnt)
117
note('%6d file-ids', len(self.checked_weaves))
118
note('%6d unique file texts', self.checked_text_cnt)
119
note('%6d repeated file texts', self.repeated_text_cnt)
120
note('%6d unreferenced text versions',
121
len(self.unreferenced_versions))
122
if self.missing_inventory_sha_cnt:
123
note('%6d revisions are missing inventory_sha1',
124
self.missing_inventory_sha_cnt)
125
if self.missing_revision_cnt:
126
note('%6d revisions are mentioned but not present',
127
self.missing_revision_cnt)
129
note('%6d ghost revisions', len(self.ghosts))
131
for ghost in self.ghosts:
133
if len(self.missing_parent_links):
134
note('%6d revisions missing parents in ancestry',
135
len(self.missing_parent_links))
137
for link, linkers in self.missing_parent_links.items():
138
note(' %s should be in the ancestry for:', link)
139
for linker in linkers:
140
note(' * %s', linker)
142
for file_id, revision_id in self.unreferenced_versions:
143
log_error('unreferenced version: {%s} in %s', revision_id,
145
if len(self.inconsistent_parents):
146
note('%6d inconsistent parents', len(self.inconsistent_parents))
148
for info in self.inconsistent_parents:
149
revision_id, file_id, found_parents, correct_parents = info
150
note(' * %s version %s has parents %r '
152
% (file_id, revision_id, found_parents,
154
if self.revs_with_bad_parents_in_index:
155
note('%6d revisions have incorrect parents in the revision index',
156
len(self.revs_with_bad_parents_in_index))
158
for item in self.revs_with_bad_parents_in_index:
159
revision_id, index_parents, actual_parents = item
161
' %s has wrong parents in index: '
163
revision_id, index_parents, actual_parents)
165
def check_one_rev(self, rev_id):
166
"""Check one revision.
168
rev_id - the one to check
170
rev = self.repository.get_revision(rev_id)
172
if rev.revision_id != rev_id:
173
raise BzrCheckError('wrong internal revision id in revision {%s}'
176
for parent in rev.parent_ids:
177
if not parent in self.planned_revisions:
178
# rev has a parent we didn't know about.
179
missing_links = self.missing_parent_links.get(parent, [])
180
missing_links.append(rev_id)
181
self.missing_parent_links[parent] = missing_links
182
# list based so somewhat slow,
183
# TODO have a planned_revisions list and set.
184
if self.repository.has_revision(parent):
185
missing_ancestry = self.repository.get_ancestry(parent)
186
for missing in missing_ancestry:
187
if (missing is not None
188
and missing not in self.planned_revisions):
189
self.planned_revisions.append(missing)
191
self.ghosts.append(rev_id)
193
if rev.inventory_sha1:
194
# Loopback - this is currently circular logic as the
195
# knit get_inventory_sha1 call returns rev.inventory_sha1.
196
# Repository.py's get_inventory_sha1 should instead return
197
# inventories.get_record_stream([(revid,)]).next().sha1 or
199
inv_sha1 = self.repository.get_inventory_sha1(rev_id)
200
if inv_sha1 != rev.inventory_sha1:
201
raise BzrCheckError('Inventory sha1 hash doesn\'t match'
202
' value in revision {%s}' % rev_id)
203
self._check_revision_tree(rev_id)
204
self.checked_rev_cnt += 1
206
def check_weaves(self):
207
"""Check all the weaves we can get our hands on.
210
self.progress.update('checking inventory', 0, 2)
211
self.inventory_weave.check(progress_bar=self.progress)
212
self.progress.update('checking text storage', 1, 2)
213
self.repository.texts.check(progress_bar=self.progress)
214
weave_checker = self.repository._get_versioned_file_checker(
215
text_key_references=self.text_key_references)
216
result = weave_checker.check_file_version_parents(
217
self.repository.texts, progress_bar=self.progress)
218
self.checked_weaves = weave_checker.file_ids
219
bad_parents, unused_versions = result
220
bad_parents = bad_parents.items()
221
for text_key, (stored_parents, correct_parents) in bad_parents:
222
# XXX not ready for id join/split operations.
223
weave_id = text_key[0]
224
revision_id = text_key[-1]
225
weave_parents = tuple([parent[-1] for parent in stored_parents])
226
correct_parents = tuple([parent[-1] for parent in correct_parents])
227
self.inconsistent_parents.append(
228
(revision_id, weave_id, weave_parents, correct_parents))
229
self.unreferenced_versions.update(unused_versions)
231
def _check_revision_tree(self, rev_id):
232
tree = self.repository.revision_tree(rev_id)
236
for path, ie in inv.iter_entries():
237
self._add_entry_to_text_key_references(inv, ie)
239
if file_id in seen_ids:
240
raise BzrCheckError('duplicated file_id {%s} '
241
'in inventory for revision {%s}'
243
seen_ids.add(file_id)
244
ie.check(self, rev_id, inv, tree)
245
if path in seen_names:
246
raise BzrCheckError('duplicated path %s '
247
'in inventory for revision {%s}'
251
def _add_entry_to_text_key_references(self, inv, entry):
252
if not self.rich_roots and entry == inv.root:
254
key = (entry.file_id, entry.revision)
255
self.text_key_references.setdefault(key, False)
256
if entry.revision == inv.revision_id:
257
self.text_key_references[key] = True
260
@deprecated_function(deprecated_in((1,6,0)))
261
def check(branch, verbose):
262
"""Run consistency checks on a branch.
264
Results are reported through logging.
266
Deprecated in 1.6. Please use check_branch instead.
268
:raise BzrCheckError: if there's a consistency error.
270
check_branch(branch, verbose)
273
def check_branch(branch, verbose):
274
"""Run consistency checks on a branch.
276
Results are reported through logging.
278
:raise BzrCheckError: if there's a consistency error.
282
branch_result = branch.check()
285
branch_result.report_results(verbose)
288
def check_dwim(path, verbose, do_branch=False, do_repo=False, do_tree=False):
290
tree, branch, repo, relpath = \
291
BzrDir.open_containing_tree_branch_or_repository(path)
292
except errors.NotBranchError:
293
tree = branch = repo = None
297
note("Checking working tree at '%s'."
298
% (tree.bzrdir.root_transport.base,))
301
log_error("No working tree found at specified location.")
303
if branch is not None:
306
# The branch is in a shared repository
307
repo = branch.repository
309
elif repo is not None:
310
branches = repo.find_branches(using=True)
316
note("Checking repository at '%s'."
317
% (repo.bzrdir.root_transport.base,))
318
result = repo.check()
319
result.report_results(verbose)
322
log_error("No branch found at specified location.")
324
for branch in branches:
325
note("Checking branch at '%s'."
326
% (branch.bzrdir.root_transport.base,))
327
check_branch(branch, verbose)
332
log_error("No branch found at specified location.")
334
log_error("No repository found at specified location.")
20
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24
######################################################################
28
"""Consistency check of tree."""
30
mutter("checking tree")
32
check_patch_chaining()
33
check_patch_uniqueness()
35
mutter("tree looks OK")
36
## TODO: Check that previous-inventory and previous-manifest
37
## are the same as those stored in the previous changeset.
39
## TODO: Check all patches present in patch directory are
40
## mentioned in patch history; having an orphaned patch only gives
43
## TODO: Check cached data is consistent with data reconstructed
46
## TODO: Check no control files are versioned.
48
## TODO: Check that the before-hash of each file in a later
49
## revision matches the after-hash in the previous revision to
53
def check_inventory():
54
mutter("checking inventory file and ids...")
58
for l in controlfile('inventory').readlines():
61
bailout("malformed inventory line: " + `l`)
64
if file_id in seen_ids:
65
bailout("duplicated file id " + file_id)
68
if name in seen_names:
69
bailout("duplicated file name in inventory: " + quotefn(name))
72
if is_control_file(name):
73
raise BzrError("control file %s present in inventory" % quotefn(name))
76
def check_patches_exist():
77
"""Check constraint of current version: all patches exist"""
78
mutter("checking all patches are present...")
79
for pid in revision_history():
80
read_patch_header(pid)
83
def check_patch_chaining():
84
"""Check ancestry of patches and history file is consistent"""
85
mutter("checking patch chaining...")
87
for pid in revision_history():
88
log_prev = read_patch_header(pid).precursor
90
bailout("inconsistent precursor links on " + pid)
94
def check_patch_uniqueness():
95
"""Make sure no patch is listed twice in the history.
97
This should be implied by having correct ancestry but I'll check it
99
mutter("checking history for duplicates...")
101
for pid in revision_history():
103
bailout("patch " + pid + " appears twice in history")