25
24
# TODO: Check revision, inventory and entry objects have all
27
# TODO: Get every revision in the revision-store even if they're not
28
# referenced by history and make sure they're all valid.
30
# TODO: Perhaps have a way to record errors other than by raising exceptions;
31
# would perhaps be enough to accumulate exception objects in a list without
32
# raising them. If there's more than one exception it'd be good to see them
35
from bzrlib.errors import BzrCheckError
30
from bzrlib.trace import note, warning
31
from bzrlib.osutils import rename, sha_string, fingerprint_file
32
from bzrlib.trace import mutter
33
from bzrlib.errors import BzrCheckError, NoSuchRevision
34
from bzrlib.inventory import ROOT_ID
35
from bzrlib.branch import gen_root_id
37
from bzrlib.trace import note
38
39
class Check(object):
40
def __init__(self, branch):
44
branch.weave_store.enable_cache = True
45
branch.control_weaves.enable_cache = True
49
branch.weave_store.enable_cache = False
50
branch.control_weaves.enable_cache = False
40
"""Check a repository"""
42
# The Check object interacts with InventoryEntry.check, etc.
44
def __init__(self, repository):
45
self.repository = repository
57
46
self.checked_text_cnt = 0
58
47
self.checked_rev_cnt = 0
59
49
self.repeated_text_cnt = 0
50
self.missing_parent_links = {}
60
51
self.missing_inventory_sha_cnt = 0
61
52
self.missing_revision_cnt = 0
62
# maps (file-id, version) -> sha1
53
# maps (file-id, version) -> sha1; used by InventoryFile._check
63
54
self.checked_texts = {}
65
history = branch.revision_history()
67
revcount = len(history)
70
self.progress = bzrlib.ui.ui_factory.progress_bar()
71
for rev_id in history:
72
self.progress.update('checking revision', revno, revcount)
74
self.check_one_rev(rev_id, last_rev_id)
55
self.checked_weaves = {}
58
self.repository.lock_read()
59
self.progress = bzrlib.ui.ui_factory.nested_progress_bar()
61
self.progress.update('retrieving inventory', 0, 0)
62
# do not put in init, as it should be done with progess,
63
# and inside the lock.
64
self.inventory_weave = self.repository.get_inventory_weave()
68
while revno < len(self.planned_revisions):
69
rev_id = self.planned_revisions[revno]
70
self.progress.update('checking revision', revno,
71
len(self.planned_revisions))
73
self.check_one_rev(rev_id)
75
self.progress.finished()
76
self.repository.unlock()
78
def plan_revisions(self):
79
repository = self.repository
80
self.planned_revisions = set(repository.all_revision_ids())
76
81
self.progress.clear()
80
def report_results(self):
81
note('checked branch %s format %d',
83
self.branch._branch_format)
82
inventoried = set(self.inventory_weave.versions())
83
awol = self.planned_revisions - inventoried
85
raise BzrCheckError('Stored revisions missing from inventory'
86
'{%s}' % ','.join([f for f in awol]))
87
self.planned_revisions = list(self.planned_revisions)
89
def report_results(self, verbose):
90
note('checked repository %s format %s',
91
self.repository.bzrdir.root_transport,
92
self.repository._format)
85
93
note('%6d revisions', self.checked_rev_cnt)
86
94
note('%6d unique file texts', self.checked_text_cnt)
87
95
note('%6d repeated file texts', self.repeated_text_cnt)
96
note('%6d weaves', len(self.checked_weaves))
88
97
if self.missing_inventory_sha_cnt:
89
note('%d revisions are missing inventory_sha1',
98
note('%6d revisions are missing inventory_sha1',
90
99
self.missing_inventory_sha_cnt)
91
100
if self.missing_revision_cnt:
92
note('%d revisions are mentioned but not present',
101
note('%6d revisions are mentioned but not present',
93
102
self.missing_revision_cnt)
96
def check_one_rev(self, rev_id, last_rev_id):
104
note('%6d ghost revisions', len(self.ghosts))
106
for ghost in self.ghosts:
108
if len(self.missing_parent_links):
109
note('%6d revisions missing parents in ancestry',
110
len(self.missing_parent_links))
112
for link, linkers in self.missing_parent_links.items():
113
note(' %s should be in the ancestry for:', link)
114
for linker in linkers:
115
note(' * %s', linker)
117
def check_one_rev(self, rev_id):
97
118
"""Check one revision.
99
120
rev_id - the one to check
101
last_rev_id - the previous one on the mainline, if any.
104
# mutter(' revision {%s}' % rev_id)
106
rev = branch.get_revision(rev_id)
122
rev = self.repository.get_revision(rev_id)
107
124
if rev.revision_id != rev_id:
108
125
raise BzrCheckError('wrong internal revision id in revision {%s}'
111
# check the previous history entry is a parent of this entry
113
if last_rev_id is None:
114
raise BzrCheckError("revision {%s} has %d parents, but is the "
115
"start of the branch"
116
% (rev_id, len(rev.parent_ids)))
117
for parent_id in rev.parent_ids:
118
if parent_id == last_rev_id:
121
raise BzrCheckError("previous revision {%s} not listed among "
123
% (last_rev_id, rev_id))
125
raise BzrCheckError("revision {%s} has no parents listed "
126
"but preceded by {%s}"
127
% (rev_id, last_rev_id))
128
for parent in rev.parent_ids:
129
if not parent in self.planned_revisions:
130
missing_links = self.missing_parent_links.get(parent, [])
131
missing_links.append(rev_id)
132
self.missing_parent_links[parent] = missing_links
133
# list based so somewhat slow,
134
# TODO have a planned_revisions list and set.
135
if self.repository.has_revision(parent):
136
missing_ancestry = self.repository.get_ancestry(parent)
137
for missing in missing_ancestry:
138
if (missing is not None
139
and missing not in self.planned_revisions):
140
self.planned_revisions.append(missing)
142
self.ghosts.append(rev_id)
129
144
if rev.inventory_sha1:
130
inv_sha1 = branch.get_inventory_sha1(rev_id)
145
inv_sha1 = self.repository.get_inventory_sha1(rev_id)
131
146
if inv_sha1 != rev.inventory_sha1:
132
147
raise BzrCheckError('Inventory sha1 hash doesn\'t match'
133
148
' value in revision {%s}' % rev_id)
135
missing_inventory_sha_cnt += 1
136
mutter("no inventory_sha1 on revision {%s}" % rev_id)
137
149
self._check_revision_tree(rev_id)
138
150
self.checked_rev_cnt += 1
152
def check_weaves(self):
153
"""Check all the weaves we can get our hands on.
157
if self.repository.weave_store.listable():
158
weave_ids = list(self.repository.weave_store)
159
n_weaves = len(weave_ids)
160
self.progress.update('checking weave', 0, n_weaves)
161
self.inventory_weave.check(progress_bar=self.progress)
162
for i, weave_id in enumerate(weave_ids):
163
self.progress.update('checking weave', i, n_weaves)
164
w = self.repository.weave_store.get_weave(weave_id,
165
self.repository.get_transaction())
166
# No progress here, because it looks ugly.
168
self.checked_weaves[weave_id] = True
140
170
def _check_revision_tree(self, rev_id):
141
tree = self.branch.revision_tree(rev_id)
171
tree = self.repository.revision_tree(rev_id)
142
172
inv = tree.inventory
144
174
for file_id in inv: