1
# Copyright (C) 2004, 2005 by Martin Pool
2
# Copyright (C) 2005 by Canonical Ltd
1
# Copyright (C) 2005, 2006 Canonical Ltd
4
3
# This program is free software; you can redistribute it and/or modify
5
4
# it under the terms of the GNU General Public License as published by
6
5
# the Free Software Foundation; either version 2 of the License, or
7
6
# (at your option) any later version.
9
8
# This program is distributed in the hope that it will be useful,
10
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
11
# GNU General Public License for more details.
14
13
# You should have received a copy of the GNU General Public License
15
14
# along with this program; if not, write to the Free Software
16
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
24
# TODO: Check revision, inventory and entry objects have all
27
# TODO: Get every revision in the revision-store even if they're not
28
# referenced by history and make sure they're all valid.
30
# TODO: Perhaps have a way to record errors other than by raising exceptions;
31
# would perhaps be enough to accumulate exception objects in a list without
32
# raising them. If there's more than one exception it'd be good to see them
35
from bzrlib.errors import BzrCheckError
30
from bzrlib.trace import note, warning
31
from bzrlib.osutils import rename, sha_string, fingerprint_file
32
from bzrlib.trace import mutter
33
from bzrlib.errors import BzrCheckError, NoSuchRevision
34
from bzrlib.inventory import ROOT_ID
35
from bzrlib.branch import gen_root_id
37
from bzrlib.trace import note
38
39
class Check(object):
41
def __init__(self, branch):
40
"""Check a repository"""
42
# The Check object interacts with InventoryEntry.check, etc.
44
def __init__(self, repository):
45
self.repository = repository
43
46
self.checked_text_cnt = 0
44
47
self.checked_rev_cnt = 0
47
50
self.missing_parent_links = {}
48
51
self.missing_inventory_sha_cnt = 0
49
52
self.missing_revision_cnt = 0
50
# maps (file-id, version) -> sha1
53
# maps (file-id, version) -> sha1; used by InventoryFile._check
51
54
self.checked_texts = {}
55
self.checked_weaves = {}
54
self.branch.lock_read()
58
self.repository.lock_read()
59
self.progress = bzrlib.ui.ui_factory.nested_progress_bar()
56
self.history = self.branch.revision_history()
57
if not len(self.history):
60
self.planned_revisions = self.branch.get_ancestry(self.history[-1])
61
self.planned_revisions.remove(None)
61
self.progress.update('retrieving inventory', 0, 0)
62
# do not put in init, as it should be done with progess,
63
# and inside the lock.
64
self.inventory_weave = self.repository.get_inventory_weave()
64
self.progress = bzrlib.ui.ui_factory.progress_bar()
65
68
while revno < len(self.planned_revisions):
66
69
rev_id = self.planned_revisions[revno]
67
70
self.progress.update('checking revision', revno,
68
71
len(self.planned_revisions))
70
73
self.check_one_rev(rev_id)
75
self.progress.finished()
76
self.repository.unlock()
78
def plan_revisions(self):
79
repository = self.repository
80
self.planned_revisions = set(repository.all_revision_ids())
82
inventoried = set(self.inventory_weave.versions())
83
awol = self.planned_revisions - inventoried
85
raise BzrCheckError('Stored revisions missing from inventory'
86
'{%s}' % ','.join([f for f in awol]))
87
self.planned_revisions = list(self.planned_revisions)
75
89
def report_results(self, verbose):
76
note('checked branch %s format %d',
78
self.branch._branch_format)
90
note('checked repository %s format %s',
91
self.repository.bzrdir.root_transport,
92
self.repository._format)
80
93
note('%6d revisions', self.checked_rev_cnt)
81
94
note('%6d unique file texts', self.checked_text_cnt)
82
95
note('%6d repeated file texts', self.repeated_text_cnt)
96
note('%6d weaves', len(self.checked_weaves))
83
97
if self.missing_inventory_sha_cnt:
84
98
note('%6d revisions are missing inventory_sha1',
85
99
self.missing_inventory_sha_cnt)
104
118
"""Check one revision.
106
120
rev_id - the one to check
108
last_rev_id - the previous one on the mainline, if any.
111
# mutter(' revision {%s}' % rev_id)
114
rev_history_position = self.history.index(rev_id)
116
rev_history_position = None
118
if rev_history_position:
119
rev = branch.get_revision(rev_id)
120
if rev_history_position > 0:
121
last_rev_id = self.history[rev_history_position - 1]
123
rev = branch.get_revision(rev_id)
122
rev = self.repository.get_revision(rev_id)
125
124
if rev.revision_id != rev_id:
126
125
raise BzrCheckError('wrong internal revision id in revision {%s}'
129
# check the previous history entry is a parent of this entry
131
if last_rev_id is None and rev_history_position is not None:
132
# what if the start is a ghost ? i.e. conceptually the
134
raise BzrCheckError("revision {%s} has %d parents, but is the "
135
"start of the branch"
136
% (rev_id, len(rev.parent_ids)))
137
if last_rev_id is not None:
138
for parent_id in rev.parent_ids:
139
if parent_id == last_rev_id:
128
for parent in rev.parent_ids:
129
if not parent in self.planned_revisions:
130
missing_links = self.missing_parent_links.get(parent, [])
131
missing_links.append(rev_id)
132
self.missing_parent_links[parent] = missing_links
133
# list based so somewhat slow,
134
# TODO have a planned_revisions list and set.
135
if self.repository.has_revision(parent):
136
missing_ancestry = self.repository.get_ancestry(parent)
137
for missing in missing_ancestry:
138
if (missing is not None
139
and missing not in self.planned_revisions):
140
self.planned_revisions.append(missing)
142
raise BzrCheckError("previous revision {%s} not listed among "
144
% (last_rev_id, rev_id))
145
for parent in rev.parent_ids:
146
if not parent in self.planned_revisions:
147
missing_links = self.missing_parent_links.get(parent, [])
148
missing_links.append(rev_id)
149
self.missing_parent_links[parent] = missing_links
150
# list based so slow, TODO have a planned_revisions list and set.
151
if self.branch.has_revision(parent):
152
missing_ancestry = self.branch.get_ancestry(parent)
153
for missing in missing_ancestry:
154
if (missing is not None
155
and missing not in self.planned_revisions):
156
self.planned_revisions.append(missing)
158
self.ghosts.append(rev_id)
160
raise BzrCheckError("revision {%s} has no parents listed "
161
"but preceded by {%s}"
162
% (rev_id, last_rev_id))
142
self.ghosts.append(rev_id)
164
144
if rev.inventory_sha1:
165
inv_sha1 = branch.get_inventory_sha1(rev_id)
145
inv_sha1 = self.repository.get_inventory_sha1(rev_id)
166
146
if inv_sha1 != rev.inventory_sha1:
167
147
raise BzrCheckError('Inventory sha1 hash doesn\'t match'
168
148
' value in revision {%s}' % rev_id)
170
missing_inventory_sha_cnt += 1
171
mutter("no inventory_sha1 on revision {%s}" % rev_id)
172
149
self._check_revision_tree(rev_id)
173
150
self.checked_rev_cnt += 1
152
def check_weaves(self):
153
"""Check all the weaves we can get our hands on.
157
if self.repository.weave_store.listable():
158
weave_ids = list(self.repository.weave_store)
159
n_weaves = len(weave_ids)
160
self.progress.update('checking weave', 0, n_weaves)
161
self.inventory_weave.check(progress_bar=self.progress)
162
for i, weave_id in enumerate(weave_ids):
163
self.progress.update('checking weave', i, n_weaves)
164
w = self.repository.weave_store.get_weave(weave_id,
165
self.repository.get_transaction())
166
# No progress here, because it looks ugly.
168
self.checked_weaves[weave_id] = True
175
170
def _check_revision_tree(self, rev_id):
176
tree = self.branch.revision_tree(rev_id)
171
tree = self.repository.revision_tree(rev_id)
177
172
inv = tree.inventory
179
174
for file_id in inv: