~bzr-pqm/bzr/bzr.dev

2052.3.2 by John Arbash Meinel
Change Copyright .. by Canonical to Copyright ... Canonical
1
# Copyright (C) 2005, 2006 Canonical Ltd
1887.1.1 by Adeodato Simó
Do not separate paragraphs in the copyright statement with blank lines,
2
#
1 by mbp at sourcefrog
import from baz patch-364
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
1887.1.1 by Adeodato Simó
Do not separate paragraphs in the copyright statement with blank lines,
7
#
1 by mbp at sourcefrog
import from baz patch-364
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
1887.1.1 by Adeodato Simó
Do not separate paragraphs in the copyright statement with blank lines,
12
#
1 by mbp at sourcefrog
import from baz patch-364
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
1335 by Martin Pool
doc
17
# TODO: Check ancestries are correct for every revision: includes
18
# every committed so far, and in a reasonable order.
19
1347 by Martin Pool
- refactor check code into method object
20
# TODO: Also check non-mainline revisions mentioned as parents.
21
22
# TODO: Check for extra files in the control directory.
23
1348 by Martin Pool
- more refactoring of check code
24
# TODO: Check revision, inventory and entry objects have all 
25
# required fields.
26
1185.16.101 by mbp at sourcefrog
todo
27
# TODO: Get every revision in the revision-store even if they're not
28
# referenced by history and make sure they're all valid.
1347 by Martin Pool
- refactor check code into method object
29
1616.1.5 by Martin Pool
Cleanup and document some check code
30
# TODO: Perhaps have a way to record errors other than by raising exceptions;
31
# would perhaps be enough to accumulate exception objects in a list without
32
# raising them.  If there's more than one exception it'd be good to see them
33
# all.
34
3015.3.8 by Daniel Watkins
Added _scan_for_branches.
35
from bzrlib import errors, osutils
2745.6.16 by Aaron Bentley
Update from review
36
from bzrlib import repository as _mod_repository
2745.6.47 by Andrew Bennetts
Move check_parents out of VersionedFile.
37
from bzrlib import revision
3015.3.2 by Daniel Watkins
Check.check now takes a path rather than a branch.
38
from bzrlib.branch import Branch
3015.3.40 by Daniel Watkins
Modified bzrlib.check.check_dwim to use bzrlib.bzrdir.BzrDir.open_containing_tree_branch_or_repository.
39
from bzrlib.bzrdir import BzrDir
1773.4.1 by Martin Pool
Add pyflakes makefile target; fix many warnings
40
from bzrlib.errors import BzrCheckError
3015.3.3 by Daniel Watkins
Added _check_repository.
41
from bzrlib.repository import Repository
3015.3.59 by Daniel Watkins
Further tweaks as requested on-list.
42
from bzrlib.symbol_versioning import deprecated_function, deprecated_in
3015.3.35 by Daniel Watkins
Reintroduced bzrlib.check.check() with a deprecation warning.
43
from bzrlib.trace import log_error, note
1104 by Martin Pool
- Add a simple UIFactory
44
import bzrlib.ui
3015.3.11 by Daniel Watkins
Move WT checking from builtins to check.
45
from bzrlib.workingtree import WorkingTree
1104 by Martin Pool
- Add a simple UIFactory
46
1347 by Martin Pool
- refactor check code into method object
47
class Check(object):
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
48
    """Check a repository"""
1449 by Robert Collins
teach check about ghosts
49
1616.1.5 by Martin Pool
Cleanup and document some check code
50
    # The Check object interacts with InventoryEntry.check, etc.
51
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
52
    def __init__(self, repository):
53
        self.repository = repository
1383 by Martin Pool
- untabify only
54
        self.checked_text_cnt = 0
55
        self.checked_rev_cnt = 0
1449 by Robert Collins
teach check about ghosts
56
        self.ghosts = []
1365 by Martin Pool
- try to avoid checking texts repeatedly
57
        self.repeated_text_cnt = 0
1449 by Robert Collins
teach check about ghosts
58
        self.missing_parent_links = {}
1348 by Martin Pool
- more refactoring of check code
59
        self.missing_inventory_sha_cnt = 0
60
        self.missing_revision_cnt = 0
1616.1.5 by Martin Pool
Cleanup and document some check code
61
        # maps (file-id, version) -> sha1; used by InventoryFile._check
1365 by Martin Pool
- try to avoid checking texts repeatedly
62
        self.checked_texts = {}
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
63
        self.checked_weaves = set()
2988.1.8 by Robert Collins
Change check and reconcile to use the new _generate_text_key_index rather
64
        self.unreferenced_versions = set()
2745.6.33 by Andrew Bennetts
Add VersionedFile.check_parents, and use it instead of find_bad_ancestors in reconcile.
65
        self.inconsistent_parents = []
676 by Martin Pool
- lock branch while checking
66
1449 by Robert Collins
teach check about ghosts
67
    def check(self):
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
68
        self.repository.lock_read()
1594.1.3 by Robert Collins
Fixup pb usage to use nested_progress_bar.
69
        self.progress = bzrlib.ui.ui_factory.nested_progress_bar()
1449 by Robert Collins
teach check about ghosts
70
        try:
2819.2.3 by Andrew Bennetts
Add test that repo.check will report on wrong parents in the revision graph.
71
            self.progress.update('retrieving inventory', 0, 2)
1510 by Robert Collins
Merge from mpool, adjusting check to retain HTTP support.
72
            # do not put in init, as it should be done with progess,
73
            # and inside the lock.
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
74
            self.inventory_weave = self.repository.inventories
2819.2.3 by Andrew Bennetts
Add test that repo.check will report on wrong parents in the revision graph.
75
            self.progress.update('checking revision graph', 1)
76
            self.check_revision_graph()
1510 by Robert Collins
Merge from mpool, adjusting check to retain HTTP support.
77
            self.plan_revisions()
78
            revno = 0
79
            while revno < len(self.planned_revisions):
80
                rev_id = self.planned_revisions[revno]
81
                self.progress.update('checking revision', revno,
1449 by Robert Collins
teach check about ghosts
82
                                     len(self.planned_revisions))
1510 by Robert Collins
Merge from mpool, adjusting check to retain HTTP support.
83
                revno += 1
1449 by Robert Collins
teach check about ghosts
84
                self.check_one_rev(rev_id)
2745.6.16 by Aaron Bentley
Update from review
85
            # check_weaves is done after the revision scan so that
2988.1.8 by Robert Collins
Change check and reconcile to use the new _generate_text_key_index rather
86
            # revision index is known to be valid.
2745.6.3 by Aaron Bentley
Implement versionedfile checking for bzr check
87
            self.check_weaves()
1185.35.34 by Aaron Bentley
Made bzr check for stored revisions missing from ancestry
88
        finally:
1594.1.3 by Robert Collins
Fixup pb usage to use nested_progress_bar.
89
            self.progress.finished()
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
90
            self.repository.unlock()
1449 by Robert Collins
teach check about ghosts
91
2819.2.3 by Andrew Bennetts
Add test that repo.check will report on wrong parents in the revision graph.
92
    def check_revision_graph(self):
2819.2.4 by Andrew Bennetts
Add a 'revision_graph_can_have_wrong_parents' method to repository.
93
        if not self.repository.revision_graph_can_have_wrong_parents():
94
            # This check is not necessary.
2819.2.3 by Andrew Bennetts
Add test that repo.check will report on wrong parents in the revision graph.
95
            self.revs_with_bad_parents_in_index = None
96
            return
97
        bad_revisions = self.repository._find_inconsistent_revision_parents()
98
        self.revs_with_bad_parents_in_index = list(bad_revisions)
99
1510 by Robert Collins
Merge from mpool, adjusting check to retain HTTP support.
100
    def plan_revisions(self):
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
101
        repository = self.repository
3221.17.2 by Ian Clatworthy
back out unnecessary list() around repo.all_revision_ids in check.py
102
        self.planned_revisions = repository.all_revision_ids()
1563.2.22 by Robert Collins
Move responsibility for repository.has_revision into RevisionStore
103
        self.progress.clear()
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
104
        inventoried = set(key[-1] for key in self.inventory_weave.keys())
2745.6.3 by Aaron Bentley
Implement versionedfile checking for bzr check
105
        awol = set(self.planned_revisions) - inventoried
1510 by Robert Collins
Merge from mpool, adjusting check to retain HTTP support.
106
        if len(awol) > 0:
107
            raise BzrCheckError('Stored revisions missing from inventory'
108
                '{%s}' % ','.join([f for f in awol]))
109
1449 by Robert Collins
teach check about ghosts
110
    def report_results(self, verbose):
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
111
        note('checked repository %s format %s',
112
             self.repository.bzrdir.root_transport,
113
             self.repository._format)
1365 by Martin Pool
- try to avoid checking texts repeatedly
114
        note('%6d revisions', self.checked_rev_cnt)
2745.6.47 by Andrew Bennetts
Move check_parents out of VersionedFile.
115
        note('%6d file-ids', len(self.checked_weaves))
1365 by Martin Pool
- try to avoid checking texts repeatedly
116
        note('%6d unique file texts', self.checked_text_cnt)
117
        note('%6d repeated file texts', self.repeated_text_cnt)
2988.1.8 by Robert Collins
Change check and reconcile to use the new _generate_text_key_index rather
118
        note('%6d unreferenced text versions',
119
             len(self.unreferenced_versions))
1348 by Martin Pool
- more refactoring of check code
120
        if self.missing_inventory_sha_cnt:
1449 by Robert Collins
teach check about ghosts
121
            note('%6d revisions are missing inventory_sha1',
1383 by Martin Pool
- untabify only
122
                 self.missing_inventory_sha_cnt)
1348 by Martin Pool
- more refactoring of check code
123
        if self.missing_revision_cnt:
1449 by Robert Collins
teach check about ghosts
124
            note('%6d revisions are mentioned but not present',
1383 by Martin Pool
- untabify only
125
                 self.missing_revision_cnt)
1449 by Robert Collins
teach check about ghosts
126
        if len(self.ghosts):
127
            note('%6d ghost revisions', len(self.ghosts))
128
            if verbose:
129
                for ghost in self.ghosts:
130
                    note('      %s', ghost)
131
        if len(self.missing_parent_links):
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
132
            note('%6d revisions missing parents in ancestry',
1449 by Robert Collins
teach check about ghosts
133
                 len(self.missing_parent_links))
134
            if verbose:
135
                for link, linkers in self.missing_parent_links.items():
136
                    note('      %s should be in the ancestry for:', link)
137
                    for linker in linkers:
138
                        note('       * %s', linker)
2745.6.6 by Aaron Bentley
Add unreferenced ancestors to check output
139
            if verbose:
2988.1.8 by Robert Collins
Change check and reconcile to use the new _generate_text_key_index rather
140
                for file_id, revision_id in self.unreferenced_versions:
141
                    log_error('unreferenced version: {%s} in %s', revision_id,
2745.6.6 by Aaron Bentley
Add unreferenced ancestors to check output
142
                        file_id)
2745.6.39 by Andrew Bennetts
Use scenario in test_check too, and make check actually report inconsistent parents to the end user.
143
        if len(self.inconsistent_parents):
144
            note('%6d inconsistent parents', len(self.inconsistent_parents))
145
            if verbose:
146
                for info in self.inconsistent_parents:
147
                    revision_id, file_id, found_parents, correct_parents = info
148
                    note('      * %s version %s has parents %r '
149
                         'but should have %r'
150
                         % (file_id, revision_id, found_parents,
151
                             correct_parents))
2819.2.3 by Andrew Bennetts
Add test that repo.check will report on wrong parents in the revision graph.
152
        if self.revs_with_bad_parents_in_index:
153
            note('%6d revisions have incorrect parents in the revision index',
154
                 len(self.revs_with_bad_parents_in_index))
155
            if verbose:
156
                for item in self.revs_with_bad_parents_in_index:
157
                    revision_id, index_parents, actual_parents = item
158
                    note(
159
                        '       %s has wrong parents in index: '
160
                        '%r should be %r',
161
                        revision_id, index_parents, actual_parents)
1449 by Robert Collins
teach check about ghosts
162
163
    def check_one_rev(self, rev_id):
1383 by Martin Pool
- untabify only
164
        """Check one revision.
165
166
        rev_id - the one to check
167
        """
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
168
        rev = self.repository.get_revision(rev_id)
1449 by Robert Collins
teach check about ghosts
169
                
1383 by Martin Pool
- untabify only
170
        if rev.revision_id != rev_id:
171
            raise BzrCheckError('wrong internal revision id in revision {%s}'
172
                                % rev_id)
173
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
174
        for parent in rev.parent_ids:
175
            if not parent in self.planned_revisions:
176
                missing_links = self.missing_parent_links.get(parent, [])
177
                missing_links.append(rev_id)
178
                self.missing_parent_links[parent] = missing_links
179
                # list based so somewhat slow,
180
                # TODO have a planned_revisions list and set.
181
                if self.repository.has_revision(parent):
182
                    missing_ancestry = self.repository.get_ancestry(parent)
183
                    for missing in missing_ancestry:
184
                        if (missing is not None 
185
                            and missing not in self.planned_revisions):
186
                            self.planned_revisions.append(missing)
1449 by Robert Collins
teach check about ghosts
187
                else:
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
188
                    self.ghosts.append(rev_id)
1383 by Martin Pool
- untabify only
189
190
        if rev.inventory_sha1:
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
191
            inv_sha1 = self.repository.get_inventory_sha1(rev_id)
1383 by Martin Pool
- untabify only
192
            if inv_sha1 != rev.inventory_sha1:
193
                raise BzrCheckError('Inventory sha1 hash doesn\'t match'
194
                    ' value in revision {%s}' % rev_id)
195
        self._check_revision_tree(rev_id)
1362 by Martin Pool
- keep track of number of checked revisions
196
        self.checked_rev_cnt += 1
1349 by Martin Pool
- more refactoring of check code
197
1185.50.28 by John Arbash Meinel
Lots of updates for 'bzr check'
198
    def check_weaves(self):
199
        """Check all the weaves we can get our hands on.
200
        """
201
        weave_ids = []
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
202
        self.progress.update('checking inventory', 0, 2)
1185.50.28 by John Arbash Meinel
Lots of updates for 'bzr check'
203
        self.inventory_weave.check(progress_bar=self.progress)
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
204
        self.progress.update('checking text storage', 1, 2)
205
        self.repository.texts.check(progress_bar=self.progress)
3036.1.3 by Robert Collins
Privatise VersionedFileChecker.
206
        weave_checker = self.repository._get_versioned_file_checker()
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
207
        result = weave_checker.check_file_version_parents(
208
            self.repository.texts, progress_bar=self.progress)
209
        self.checked_weaves = weave_checker.file_ids
210
        bad_parents, unused_versions = result
211
        bad_parents = bad_parents.items()
212
        for text_key, (stored_parents, correct_parents) in bad_parents:
213
            # XXX not ready for id join/split operations.
214
            weave_id = text_key[0]
215
            revision_id = text_key[-1]
216
            weave_parents = tuple([parent[-1] for parent in stored_parents])
217
            correct_parents = tuple([parent[-1] for parent in correct_parents])
218
            self.inconsistent_parents.append(
219
                (revision_id, weave_id, weave_parents, correct_parents))
220
        self.unreferenced_versions.update(unused_versions)
1185.50.28 by John Arbash Meinel
Lots of updates for 'bzr check'
221
1349 by Martin Pool
- more refactoring of check code
222
    def _check_revision_tree(self, rev_id):
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
223
        tree = self.repository.revision_tree(rev_id)
1383 by Martin Pool
- untabify only
224
        inv = tree.inventory
225
        seen_ids = {}
226
        for file_id in inv:
227
            if file_id in seen_ids:
228
                raise BzrCheckError('duplicated file_id {%s} '
229
                                    'in inventory for revision {%s}'
230
                                    % (file_id, rev_id))
231
            seen_ids[file_id] = True
232
        for file_id in inv:
1092.2.20 by Robert Collins
symlink and weaves, whaddya know
233
            ie = inv[file_id]
234
            ie.check(self, rev_id, inv, tree)
1383 by Martin Pool
- untabify only
235
        seen_names = {}
236
        for path, ie in inv.iter_entries():
237
            if path in seen_names:
238
                raise BzrCheckError('duplicated path %s '
239
                                    'in inventory for revision {%s}'
240
                                    % (path, rev_id))
241
            seen_names[path] = True
1349 by Martin Pool
- more refactoring of check code
242
1347 by Martin Pool
- refactor check code into method object
243
3015.3.59 by Daniel Watkins
Further tweaks as requested on-list.
244
@deprecated_function(deprecated_in((1,6,0)))
3015.3.35 by Daniel Watkins
Reintroduced bzrlib.check.check() with a deprecation warning.
245
def check(branch, verbose):
246
    """Run consistency checks on a branch.
247
    
248
    Results are reported through logging.
249
    
3015.3.58 by Daniel Watkins
Various other cleanup as requested by reviews.
250
    Deprecated in 1.6.  Please use check_branch instead.
3015.3.37 by Daniel Watkins
Added deprecation comment to docstring.
251
3015.3.35 by Daniel Watkins
Reintroduced bzrlib.check.check() with a deprecation warning.
252
    :raise BzrCheckError: if there's a consistency error.
253
    """
254
    check_branch(branch, verbose)
255
256
3015.3.7 by Daniel Watkins
Fixed failing tests.
257
def check_branch(branch, verbose):
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
258
    """Run consistency checks on a branch.
3015.3.35 by Daniel Watkins
Reintroduced bzrlib.check.check() with a deprecation warning.
259
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
260
    Results are reported through logging.
3015.3.35 by Daniel Watkins
Reintroduced bzrlib.check.check() with a deprecation warning.
261
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
262
    :raise BzrCheckError: if there's a consistency error.
263
    """
264
    branch.lock_read()
265
    try:
266
        branch_result = branch.check()
267
    finally:
268
        branch.unlock()
269
    branch_result.report_results(verbose)
2745.6.47 by Andrew Bennetts
Move check_parents out of VersionedFile.
270
271
3015.4.5 by Daniel Watkins
Each option selects only the specific thing to be checked.
272
def check_dwim(path, verbose, do_branch=False, do_repo=False, do_tree=False):
3015.4.16 by Daniel Watkins
Added implementation of error reporting when objects are missing.
273
    try:
274
        tree, branch, repo, relpath = \
3015.4.11 by Daniel Watkins
Fixed long line.
275
                        BzrDir.open_containing_tree_branch_or_repository(path)
3015.4.16 by Daniel Watkins
Added implementation of error reporting when objects are missing.
276
    except errors.NotBranchError:
277
        tree = branch = repo = None
3015.3.23 by Daniel Watkins
Abstracted discovery of elements away.
278
3015.4.5 by Daniel Watkins
Each option selects only the specific thing to be checked.
279
    if do_tree:
280
        if tree is not None:
281
            note("Checking working tree at '%s'." 
282
                 % (tree.bzrdir.root_transport.base,))
3015.4.10 by Daniel Watkins
Merged check changes.
283
            tree._check()
3015.4.16 by Daniel Watkins
Added implementation of error reporting when objects are missing.
284
        else:
285
            log_error("No working tree found at specified location.")
3015.3.23 by Daniel Watkins
Abstracted discovery of elements away.
286
3015.4.5 by Daniel Watkins
Each option selects only the specific thing to be checked.
287
    if branch is not None:
3015.3.20 by Daniel Watkins
Made code path a little clearer.
288
        # We have a branch
3015.3.5 by Daniel Watkins
Removed needless duplication of repository checks.
289
        if repo is None:
3015.3.10 by Daniel Watkins
Reorganised comments.
290
            # The branch is in a shared repository
3015.3.5 by Daniel Watkins
Removed needless duplication of repository checks.
291
            repo = branch.repository
3015.3.18 by Daniel Watkins
Improved errors.
292
        branches = [branch]
3015.3.59 by Daniel Watkins
Further tweaks as requested on-list.
293
    elif repo is not None:
294
        branches = repo.find_branches(using=True)
3015.3.9 by Daniel Watkins
Scan for branches and check them.
295
3015.3.4 by Daniel Watkins
If not in a branch or a repo, that check is simply skipped.
296
    if repo is not None:
3015.3.21 by Daniel Watkins
Fixed misused 'repository'.
297
        repo.lock_read()
3015.3.19 by Daniel Watkins
Repositories are now held read-locked for as long as possible.
298
        try:
3015.4.3 by Daniel Watkins
Implemented CLI options.
299
            if do_repo:
300
                note("Checking repository at '%s'."
301
                     % (repo.bzrdir.root_transport.base,))
302
                result = repo.check()
303
                result.report_results(verbose)
304
            if do_branch:
3015.4.16 by Daniel Watkins
Added implementation of error reporting when objects are missing.
305
                if branches == []:
306
                    log_error("No branch found at specified location.")
307
                else:
308
                    for branch in branches:
309
                        note("Checking branch at '%s'."
310
                             % (branch.bzrdir.root_transport.base,))
311
                        check_branch(branch, verbose)
3015.3.19 by Daniel Watkins
Repositories are now held read-locked for as long as possible.
312
        finally:
3015.3.21 by Daniel Watkins
Fixed misused 'repository'.
313
            repo.unlock()
3015.4.16 by Daniel Watkins
Added implementation of error reporting when objects are missing.
314
    else:
315
        if do_branch:
316
            log_error("No branch found at specified location.")
317
        if do_repo:
318
            log_error("No repository found at specified location.")