~bzr-pqm/bzr/bzr.dev

2052.3.2 by John Arbash Meinel
Change Copyright .. by Canonical to Copyright ... Canonical
1
# Copyright (C) 2005, 2006 Canonical Ltd
1887.1.1 by Adeodato Simó
Do not separate paragraphs in the copyright statement with blank lines,
2
#
1 by mbp at sourcefrog
import from baz patch-364
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
1887.1.1 by Adeodato Simó
Do not separate paragraphs in the copyright statement with blank lines,
7
#
1 by mbp at sourcefrog
import from baz patch-364
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
1887.1.1 by Adeodato Simó
Do not separate paragraphs in the copyright statement with blank lines,
12
#
1 by mbp at sourcefrog
import from baz patch-364
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
1335 by Martin Pool
doc
17
# TODO: Check ancestries are correct for every revision: includes
18
# every committed so far, and in a reasonable order.
19
1347 by Martin Pool
- refactor check code into method object
20
# TODO: Also check non-mainline revisions mentioned as parents.
21
22
# TODO: Check for extra files in the control directory.
23
1348 by Martin Pool
- more refactoring of check code
24
# TODO: Check revision, inventory and entry objects have all 
25
# required fields.
26
1185.16.101 by mbp at sourcefrog
todo
27
# TODO: Get every revision in the revision-store even if they're not
28
# referenced by history and make sure they're all valid.
1347 by Martin Pool
- refactor check code into method object
29
1616.1.5 by Martin Pool
Cleanup and document some check code
30
# TODO: Perhaps have a way to record errors other than by raising exceptions;
31
# would perhaps be enough to accumulate exception objects in a list without
32
# raising them.  If there's more than one exception it'd be good to see them
33
# all.
34
2745.6.47 by Andrew Bennetts
Move check_parents out of VersionedFile.
35
from bzrlib import errors
2745.6.16 by Aaron Bentley
Update from review
36
from bzrlib import repository as _mod_repository
2745.6.47 by Andrew Bennetts
Move check_parents out of VersionedFile.
37
from bzrlib import revision
1773.4.1 by Martin Pool
Add pyflakes makefile target; fix many warnings
38
from bzrlib.errors import BzrCheckError
1104 by Martin Pool
- Add a simple UIFactory
39
import bzrlib.ui
2745.6.61 by Andrew Bennetts
Remove some useless mutters, and cope with a file_id that isn't present in a revision's inventory.
40
from bzrlib.trace import log_error, note
1104 by Martin Pool
- Add a simple UIFactory
41
1347 by Martin Pool
- refactor check code into method object
42
class Check(object):
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
43
    """Check a repository"""
1449 by Robert Collins
teach check about ghosts
44
1616.1.5 by Martin Pool
Cleanup and document some check code
45
    # The Check object interacts with InventoryEntry.check, etc.
46
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
47
    def __init__(self, repository):
48
        self.repository = repository
1383 by Martin Pool
- untabify only
49
        self.checked_text_cnt = 0
50
        self.checked_rev_cnt = 0
1449 by Robert Collins
teach check about ghosts
51
        self.ghosts = []
1365 by Martin Pool
- try to avoid checking texts repeatedly
52
        self.repeated_text_cnt = 0
1449 by Robert Collins
teach check about ghosts
53
        self.missing_parent_links = {}
1348 by Martin Pool
- more refactoring of check code
54
        self.missing_inventory_sha_cnt = 0
55
        self.missing_revision_cnt = 0
1616.1.5 by Martin Pool
Cleanup and document some check code
56
        # maps (file-id, version) -> sha1; used by InventoryFile._check
1365 by Martin Pool
- try to avoid checking texts repeatedly
57
        self.checked_texts = {}
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
58
        self.checked_weaves = set()
2988.1.8 by Robert Collins
Change check and reconcile to use the new _generate_text_key_index rather
59
        self.unreferenced_versions = set()
2745.6.33 by Andrew Bennetts
Add VersionedFile.check_parents, and use it instead of find_bad_ancestors in reconcile.
60
        self.inconsistent_parents = []
676 by Martin Pool
- lock branch while checking
61
1449 by Robert Collins
teach check about ghosts
62
    def check(self):
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
63
        self.repository.lock_read()
1594.1.3 by Robert Collins
Fixup pb usage to use nested_progress_bar.
64
        self.progress = bzrlib.ui.ui_factory.nested_progress_bar()
1449 by Robert Collins
teach check about ghosts
65
        try:
2819.2.3 by Andrew Bennetts
Add test that repo.check will report on wrong parents in the revision graph.
66
            self.progress.update('retrieving inventory', 0, 2)
1510 by Robert Collins
Merge from mpool, adjusting check to retain HTTP support.
67
            # do not put in init, as it should be done with progess,
68
            # and inside the lock.
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
69
            self.inventory_weave = self.repository.inventories
2819.2.3 by Andrew Bennetts
Add test that repo.check will report on wrong parents in the revision graph.
70
            self.progress.update('checking revision graph', 1)
71
            self.check_revision_graph()
1510 by Robert Collins
Merge from mpool, adjusting check to retain HTTP support.
72
            self.plan_revisions()
73
            revno = 0
74
            while revno < len(self.planned_revisions):
75
                rev_id = self.planned_revisions[revno]
76
                self.progress.update('checking revision', revno,
1449 by Robert Collins
teach check about ghosts
77
                                     len(self.planned_revisions))
1510 by Robert Collins
Merge from mpool, adjusting check to retain HTTP support.
78
                revno += 1
1449 by Robert Collins
teach check about ghosts
79
                self.check_one_rev(rev_id)
2745.6.16 by Aaron Bentley
Update from review
80
            # check_weaves is done after the revision scan so that
2988.1.8 by Robert Collins
Change check and reconcile to use the new _generate_text_key_index rather
81
            # revision index is known to be valid.
2745.6.3 by Aaron Bentley
Implement versionedfile checking for bzr check
82
            self.check_weaves()
1185.35.34 by Aaron Bentley
Made bzr check for stored revisions missing from ancestry
83
        finally:
1594.1.3 by Robert Collins
Fixup pb usage to use nested_progress_bar.
84
            self.progress.finished()
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
85
            self.repository.unlock()
1449 by Robert Collins
teach check about ghosts
86
2819.2.3 by Andrew Bennetts
Add test that repo.check will report on wrong parents in the revision graph.
87
    def check_revision_graph(self):
2819.2.4 by Andrew Bennetts
Add a 'revision_graph_can_have_wrong_parents' method to repository.
88
        if not self.repository.revision_graph_can_have_wrong_parents():
89
            # This check is not necessary.
2819.2.3 by Andrew Bennetts
Add test that repo.check will report on wrong parents in the revision graph.
90
            self.revs_with_bad_parents_in_index = None
91
            return
92
        bad_revisions = self.repository._find_inconsistent_revision_parents()
93
        self.revs_with_bad_parents_in_index = list(bad_revisions)
94
1510 by Robert Collins
Merge from mpool, adjusting check to retain HTTP support.
95
    def plan_revisions(self):
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
96
        repository = self.repository
2745.6.3 by Aaron Bentley
Implement versionedfile checking for bzr check
97
        self.planned_revisions = repository.all_revision_ids()
1563.2.22 by Robert Collins
Move responsibility for repository.has_revision into RevisionStore
98
        self.progress.clear()
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
99
        inventoried = set(key[-1] for key in self.inventory_weave.keys())
2745.6.3 by Aaron Bentley
Implement versionedfile checking for bzr check
100
        awol = set(self.planned_revisions) - inventoried
1510 by Robert Collins
Merge from mpool, adjusting check to retain HTTP support.
101
        if len(awol) > 0:
102
            raise BzrCheckError('Stored revisions missing from inventory'
103
                '{%s}' % ','.join([f for f in awol]))
104
1449 by Robert Collins
teach check about ghosts
105
    def report_results(self, verbose):
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
106
        note('checked repository %s format %s',
107
             self.repository.bzrdir.root_transport,
108
             self.repository._format)
1365 by Martin Pool
- try to avoid checking texts repeatedly
109
        note('%6d revisions', self.checked_rev_cnt)
2745.6.47 by Andrew Bennetts
Move check_parents out of VersionedFile.
110
        note('%6d file-ids', len(self.checked_weaves))
1365 by Martin Pool
- try to avoid checking texts repeatedly
111
        note('%6d unique file texts', self.checked_text_cnt)
112
        note('%6d repeated file texts', self.repeated_text_cnt)
2988.1.8 by Robert Collins
Change check and reconcile to use the new _generate_text_key_index rather
113
        note('%6d unreferenced text versions',
114
             len(self.unreferenced_versions))
1348 by Martin Pool
- more refactoring of check code
115
        if self.missing_inventory_sha_cnt:
1449 by Robert Collins
teach check about ghosts
116
            note('%6d revisions are missing inventory_sha1',
1383 by Martin Pool
- untabify only
117
                 self.missing_inventory_sha_cnt)
1348 by Martin Pool
- more refactoring of check code
118
        if self.missing_revision_cnt:
1449 by Robert Collins
teach check about ghosts
119
            note('%6d revisions are mentioned but not present',
1383 by Martin Pool
- untabify only
120
                 self.missing_revision_cnt)
1449 by Robert Collins
teach check about ghosts
121
        if len(self.ghosts):
122
            note('%6d ghost revisions', len(self.ghosts))
123
            if verbose:
124
                for ghost in self.ghosts:
125
                    note('      %s', ghost)
126
        if len(self.missing_parent_links):
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
127
            note('%6d revisions missing parents in ancestry',
1449 by Robert Collins
teach check about ghosts
128
                 len(self.missing_parent_links))
129
            if verbose:
130
                for link, linkers in self.missing_parent_links.items():
131
                    note('      %s should be in the ancestry for:', link)
132
                    for linker in linkers:
133
                        note('       * %s', linker)
2745.6.6 by Aaron Bentley
Add unreferenced ancestors to check output
134
            if verbose:
2988.1.8 by Robert Collins
Change check and reconcile to use the new _generate_text_key_index rather
135
                for file_id, revision_id in self.unreferenced_versions:
136
                    log_error('unreferenced version: {%s} in %s', revision_id,
2745.6.6 by Aaron Bentley
Add unreferenced ancestors to check output
137
                        file_id)
2745.6.39 by Andrew Bennetts
Use scenario in test_check too, and make check actually report inconsistent parents to the end user.
138
        if len(self.inconsistent_parents):
139
            note('%6d inconsistent parents', len(self.inconsistent_parents))
140
            if verbose:
141
                for info in self.inconsistent_parents:
142
                    revision_id, file_id, found_parents, correct_parents = info
143
                    note('      * %s version %s has parents %r '
144
                         'but should have %r'
145
                         % (file_id, revision_id, found_parents,
146
                             correct_parents))
2819.2.3 by Andrew Bennetts
Add test that repo.check will report on wrong parents in the revision graph.
147
        if self.revs_with_bad_parents_in_index:
148
            note('%6d revisions have incorrect parents in the revision index',
149
                 len(self.revs_with_bad_parents_in_index))
150
            if verbose:
151
                for item in self.revs_with_bad_parents_in_index:
152
                    revision_id, index_parents, actual_parents = item
153
                    note(
154
                        '       %s has wrong parents in index: '
155
                        '%r should be %r',
156
                        revision_id, index_parents, actual_parents)
1449 by Robert Collins
teach check about ghosts
157
158
    def check_one_rev(self, rev_id):
1383 by Martin Pool
- untabify only
159
        """Check one revision.
160
161
        rev_id - the one to check
162
        """
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
163
        rev = self.repository.get_revision(rev_id)
1449 by Robert Collins
teach check about ghosts
164
                
1383 by Martin Pool
- untabify only
165
        if rev.revision_id != rev_id:
166
            raise BzrCheckError('wrong internal revision id in revision {%s}'
167
                                % rev_id)
168
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
169
        for parent in rev.parent_ids:
170
            if not parent in self.planned_revisions:
171
                missing_links = self.missing_parent_links.get(parent, [])
172
                missing_links.append(rev_id)
173
                self.missing_parent_links[parent] = missing_links
174
                # list based so somewhat slow,
175
                # TODO have a planned_revisions list and set.
176
                if self.repository.has_revision(parent):
177
                    missing_ancestry = self.repository.get_ancestry(parent)
178
                    for missing in missing_ancestry:
179
                        if (missing is not None 
180
                            and missing not in self.planned_revisions):
181
                            self.planned_revisions.append(missing)
1449 by Robert Collins
teach check about ghosts
182
                else:
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
183
                    self.ghosts.append(rev_id)
1383 by Martin Pool
- untabify only
184
185
        if rev.inventory_sha1:
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
186
            inv_sha1 = self.repository.get_inventory_sha1(rev_id)
1383 by Martin Pool
- untabify only
187
            if inv_sha1 != rev.inventory_sha1:
188
                raise BzrCheckError('Inventory sha1 hash doesn\'t match'
189
                    ' value in revision {%s}' % rev_id)
190
        self._check_revision_tree(rev_id)
1362 by Martin Pool
- keep track of number of checked revisions
191
        self.checked_rev_cnt += 1
1349 by Martin Pool
- more refactoring of check code
192
1185.50.28 by John Arbash Meinel
Lots of updates for 'bzr check'
193
    def check_weaves(self):
194
        """Check all the weaves we can get our hands on.
195
        """
196
        weave_ids = []
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
197
        self.progress.update('checking inventory', 0, 2)
1185.50.28 by John Arbash Meinel
Lots of updates for 'bzr check'
198
        self.inventory_weave.check(progress_bar=self.progress)
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
199
        self.progress.update('checking text storage', 1, 2)
200
        self.repository.texts.check(progress_bar=self.progress)
3036.1.3 by Robert Collins
Privatise VersionedFileChecker.
201
        weave_checker = self.repository._get_versioned_file_checker()
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
202
        result = weave_checker.check_file_version_parents(
203
            self.repository.texts, progress_bar=self.progress)
204
        self.checked_weaves = weave_checker.file_ids
205
        bad_parents, unused_versions = result
206
        bad_parents = bad_parents.items()
207
        for text_key, (stored_parents, correct_parents) in bad_parents:
208
            # XXX not ready for id join/split operations.
209
            weave_id = text_key[0]
210
            revision_id = text_key[-1]
211
            weave_parents = tuple([parent[-1] for parent in stored_parents])
212
            correct_parents = tuple([parent[-1] for parent in correct_parents])
213
            self.inconsistent_parents.append(
214
                (revision_id, weave_id, weave_parents, correct_parents))
215
        self.unreferenced_versions.update(unused_versions)
1185.50.28 by John Arbash Meinel
Lots of updates for 'bzr check'
216
1349 by Martin Pool
- more refactoring of check code
217
    def _check_revision_tree(self, rev_id):
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
218
        tree = self.repository.revision_tree(rev_id)
1383 by Martin Pool
- untabify only
219
        inv = tree.inventory
220
        seen_ids = {}
221
        for file_id in inv:
222
            if file_id in seen_ids:
223
                raise BzrCheckError('duplicated file_id {%s} '
224
                                    'in inventory for revision {%s}'
225
                                    % (file_id, rev_id))
226
            seen_ids[file_id] = True
227
        for file_id in inv:
1092.2.20 by Robert Collins
symlink and weaves, whaddya know
228
            ie = inv[file_id]
229
            ie.check(self, rev_id, inv, tree)
1383 by Martin Pool
- untabify only
230
        seen_names = {}
231
        for path, ie in inv.iter_entries():
232
            if path in seen_names:
233
                raise BzrCheckError('duplicated path %s '
234
                                    'in inventory for revision {%s}'
235
                                    % (path, rev_id))
236
            seen_names[path] = True
1349 by Martin Pool
- more refactoring of check code
237
1347 by Martin Pool
- refactor check code into method object
238
1449 by Robert Collins
teach check about ghosts
239
def check(branch, verbose):
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
240
    """Run consistency checks on a branch.
241
    
242
    Results are reported through logging.
243
    
244
    :raise BzrCheckError: if there's a consistency error.
245
    """
246
    branch.lock_read()
247
    try:
248
        branch_result = branch.check()
249
        repo_result = branch.repository.check([branch.last_revision()])
250
    finally:
251
        branch.unlock()
252
    branch_result.report_results(verbose)
253
    repo_result.report_results(verbose)
2745.6.47 by Andrew Bennetts
Move check_parents out of VersionedFile.
254
255
256