~bzr-pqm/bzr/bzr.dev

1616.1.5 by Martin Pool
Cleanup and document some check code
1
# Copyright (C) 2005, 2006 by Canonical Ltd
1887.1.1 by Adeodato Simó
Do not separate paragraphs in the copyright statement with blank lines,
2
#
1 by mbp at sourcefrog
import from baz patch-364
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
1887.1.1 by Adeodato Simó
Do not separate paragraphs in the copyright statement with blank lines,
7
#
1 by mbp at sourcefrog
import from baz patch-364
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
1887.1.1 by Adeodato Simó
Do not separate paragraphs in the copyright statement with blank lines,
12
#
1 by mbp at sourcefrog
import from baz patch-364
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
1335 by Martin Pool
doc
17
# TODO: Check ancestries are correct for every revision: includes
18
# every committed so far, and in a reasonable order.
19
1347 by Martin Pool
- refactor check code into method object
20
# TODO: Also check non-mainline revisions mentioned as parents.
21
22
# TODO: Check for extra files in the control directory.
23
1348 by Martin Pool
- more refactoring of check code
24
# TODO: Check revision, inventory and entry objects have all 
25
# required fields.
26
1185.16.101 by mbp at sourcefrog
todo
27
# TODO: Get every revision in the revision-store even if they're not
28
# referenced by history and make sure they're all valid.
1347 by Martin Pool
- refactor check code into method object
29
1616.1.5 by Martin Pool
Cleanup and document some check code
30
# TODO: Perhaps have a way to record errors other than by raising exceptions;
31
# would perhaps be enough to accumulate exception objects in a list without
32
# raising them.  If there's more than one exception it'd be good to see them
33
# all.
34
1773.4.1 by Martin Pool
Add pyflakes makefile target; fix many warnings
35
from bzrlib.errors import BzrCheckError
1104 by Martin Pool
- Add a simple UIFactory
36
import bzrlib.ui
1773.4.1 by Martin Pool
Add pyflakes makefile target; fix many warnings
37
from bzrlib.trace import note
1104 by Martin Pool
- Add a simple UIFactory
38
1347 by Martin Pool
- refactor check code into method object
39
class Check(object):
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
40
    """Check a repository"""
1449 by Robert Collins
teach check about ghosts
41
1616.1.5 by Martin Pool
Cleanup and document some check code
42
    # The Check object interacts with InventoryEntry.check, etc.
43
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
44
    def __init__(self, repository):
45
        self.repository = repository
1383 by Martin Pool
- untabify only
46
        self.checked_text_cnt = 0
47
        self.checked_rev_cnt = 0
1449 by Robert Collins
teach check about ghosts
48
        self.ghosts = []
1365 by Martin Pool
- try to avoid checking texts repeatedly
49
        self.repeated_text_cnt = 0
1449 by Robert Collins
teach check about ghosts
50
        self.missing_parent_links = {}
1348 by Martin Pool
- more refactoring of check code
51
        self.missing_inventory_sha_cnt = 0
52
        self.missing_revision_cnt = 0
1616.1.5 by Martin Pool
Cleanup and document some check code
53
        # maps (file-id, version) -> sha1; used by InventoryFile._check
1365 by Martin Pool
- try to avoid checking texts repeatedly
54
        self.checked_texts = {}
1185.50.28 by John Arbash Meinel
Lots of updates for 'bzr check'
55
        self.checked_weaves = {}
676 by Martin Pool
- lock branch while checking
56
1449 by Robert Collins
teach check about ghosts
57
    def check(self):
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
58
        self.repository.lock_read()
1594.1.3 by Robert Collins
Fixup pb usage to use nested_progress_bar.
59
        self.progress = bzrlib.ui.ui_factory.nested_progress_bar()
1449 by Robert Collins
teach check about ghosts
60
        try:
1510 by Robert Collins
Merge from mpool, adjusting check to retain HTTP support.
61
            self.progress.update('retrieving inventory', 0, 0)
62
            # do not put in init, as it should be done with progess,
63
            # and inside the lock.
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
64
            self.inventory_weave = self.repository.get_inventory_weave()
1510 by Robert Collins
Merge from mpool, adjusting check to retain HTTP support.
65
            self.plan_revisions()
66
            revno = 0
1185.50.28 by John Arbash Meinel
Lots of updates for 'bzr check'
67
            self.check_weaves()
1510 by Robert Collins
Merge from mpool, adjusting check to retain HTTP support.
68
            while revno < len(self.planned_revisions):
69
                rev_id = self.planned_revisions[revno]
70
                self.progress.update('checking revision', revno,
1449 by Robert Collins
teach check about ghosts
71
                                     len(self.planned_revisions))
1510 by Robert Collins
Merge from mpool, adjusting check to retain HTTP support.
72
                revno += 1
1449 by Robert Collins
teach check about ghosts
73
                self.check_one_rev(rev_id)
1185.35.34 by Aaron Bentley
Made bzr check for stored revisions missing from ancestry
74
        finally:
1594.1.3 by Robert Collins
Fixup pb usage to use nested_progress_bar.
75
            self.progress.finished()
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
76
            self.repository.unlock()
1449 by Robert Collins
teach check about ghosts
77
1510 by Robert Collins
Merge from mpool, adjusting check to retain HTTP support.
78
    def plan_revisions(self):
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
79
        repository = self.repository
1773.4.2 by Martin Pool
Cleanup of imports; undeprecate all_revision_ids()
80
        self.planned_revisions = set(repository.all_revision_ids())
1563.2.22 by Robert Collins
Move responsibility for repository.has_revision into RevisionStore
81
        self.progress.clear()
1563.2.35 by Robert Collins
cleanup deprecation warnings and finish conversion so the inventory is knit based too.
82
        inventoried = set(self.inventory_weave.versions())
1510 by Robert Collins
Merge from mpool, adjusting check to retain HTTP support.
83
        awol = self.planned_revisions - inventoried
84
        if len(awol) > 0:
85
            raise BzrCheckError('Stored revisions missing from inventory'
86
                '{%s}' % ','.join([f for f in awol]))
87
        self.planned_revisions = list(self.planned_revisions)
88
1449 by Robert Collins
teach check about ghosts
89
    def report_results(self, verbose):
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
90
        note('checked repository %s format %s',
91
             self.repository.bzrdir.root_transport,
92
             self.repository._format)
1365 by Martin Pool
- try to avoid checking texts repeatedly
93
        note('%6d revisions', self.checked_rev_cnt)
94
        note('%6d unique file texts', self.checked_text_cnt)
95
        note('%6d repeated file texts', self.repeated_text_cnt)
1185.50.28 by John Arbash Meinel
Lots of updates for 'bzr check'
96
        note('%6d weaves', len(self.checked_weaves))
1348 by Martin Pool
- more refactoring of check code
97
        if self.missing_inventory_sha_cnt:
1449 by Robert Collins
teach check about ghosts
98
            note('%6d revisions are missing inventory_sha1',
1383 by Martin Pool
- untabify only
99
                 self.missing_inventory_sha_cnt)
1348 by Martin Pool
- more refactoring of check code
100
        if self.missing_revision_cnt:
1449 by Robert Collins
teach check about ghosts
101
            note('%6d revisions are mentioned but not present',
1383 by Martin Pool
- untabify only
102
                 self.missing_revision_cnt)
1449 by Robert Collins
teach check about ghosts
103
        if len(self.ghosts):
104
            note('%6d ghost revisions', len(self.ghosts))
105
            if verbose:
106
                for ghost in self.ghosts:
107
                    note('      %s', ghost)
108
        if len(self.missing_parent_links):
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
109
            note('%6d revisions missing parents in ancestry',
1449 by Robert Collins
teach check about ghosts
110
                 len(self.missing_parent_links))
111
            if verbose:
112
                for link, linkers in self.missing_parent_links.items():
113
                    note('      %s should be in the ancestry for:', link)
114
                    for linker in linkers:
115
                        note('       * %s', linker)
116
117
    def check_one_rev(self, rev_id):
1383 by Martin Pool
- untabify only
118
        """Check one revision.
119
120
        rev_id - the one to check
121
        """
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
122
        rev = self.repository.get_revision(rev_id)
1449 by Robert Collins
teach check about ghosts
123
                
1383 by Martin Pool
- untabify only
124
        if rev.revision_id != rev_id:
125
            raise BzrCheckError('wrong internal revision id in revision {%s}'
126
                                % rev_id)
127
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
128
        for parent in rev.parent_ids:
129
            if not parent in self.planned_revisions:
130
                missing_links = self.missing_parent_links.get(parent, [])
131
                missing_links.append(rev_id)
132
                self.missing_parent_links[parent] = missing_links
133
                # list based so somewhat slow,
134
                # TODO have a planned_revisions list and set.
135
                if self.repository.has_revision(parent):
136
                    missing_ancestry = self.repository.get_ancestry(parent)
137
                    for missing in missing_ancestry:
138
                        if (missing is not None 
139
                            and missing not in self.planned_revisions):
140
                            self.planned_revisions.append(missing)
1449 by Robert Collins
teach check about ghosts
141
                else:
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
142
                    self.ghosts.append(rev_id)
1383 by Martin Pool
- untabify only
143
144
        if rev.inventory_sha1:
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
145
            inv_sha1 = self.repository.get_inventory_sha1(rev_id)
1383 by Martin Pool
- untabify only
146
            if inv_sha1 != rev.inventory_sha1:
147
                raise BzrCheckError('Inventory sha1 hash doesn\'t match'
148
                    ' value in revision {%s}' % rev_id)
149
        self._check_revision_tree(rev_id)
1362 by Martin Pool
- keep track of number of checked revisions
150
        self.checked_rev_cnt += 1
1349 by Martin Pool
- more refactoring of check code
151
1185.50.28 by John Arbash Meinel
Lots of updates for 'bzr check'
152
    def check_weaves(self):
153
        """Check all the weaves we can get our hands on.
154
        """
155
        n_weaves = 1
156
        weave_ids = []
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
157
        if self.repository.weave_store.listable():
158
            weave_ids = list(self.repository.weave_store)
1185.50.28 by John Arbash Meinel
Lots of updates for 'bzr check'
159
            n_weaves = len(weave_ids)
160
        self.progress.update('checking weave', 0, n_weaves)
161
        self.inventory_weave.check(progress_bar=self.progress)
162
        for i, weave_id in enumerate(weave_ids):
163
            self.progress.update('checking weave', i, n_weaves)
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
164
            w = self.repository.weave_store.get_weave(weave_id,
165
                    self.repository.get_transaction())
1185.50.28 by John Arbash Meinel
Lots of updates for 'bzr check'
166
            # No progress here, because it looks ugly.
167
            w.check()
168
            self.checked_weaves[weave_id] = True
169
1349 by Martin Pool
- more refactoring of check code
170
    def _check_revision_tree(self, rev_id):
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
171
        tree = self.repository.revision_tree(rev_id)
1383 by Martin Pool
- untabify only
172
        inv = tree.inventory
173
        seen_ids = {}
174
        for file_id in inv:
175
            if file_id in seen_ids:
176
                raise BzrCheckError('duplicated file_id {%s} '
177
                                    'in inventory for revision {%s}'
178
                                    % (file_id, rev_id))
179
            seen_ids[file_id] = True
180
        for file_id in inv:
1092.2.20 by Robert Collins
symlink and weaves, whaddya know
181
            ie = inv[file_id]
182
            ie.check(self, rev_id, inv, tree)
1383 by Martin Pool
- untabify only
183
        seen_names = {}
184
        for path, ie in inv.iter_entries():
185
            if path in seen_names:
186
                raise BzrCheckError('duplicated path %s '
187
                                    'in inventory for revision {%s}'
188
                                    % (path, rev_id))
189
            seen_names[path] = True
1349 by Martin Pool
- more refactoring of check code
190
1347 by Martin Pool
- refactor check code into method object
191
1449 by Robert Collins
teach check about ghosts
192
def check(branch, verbose):
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
193
    """Run consistency checks on a branch.
194
    
195
    Results are reported through logging.
196
    
197
    :raise BzrCheckError: if there's a consistency error.
198
    """
199
    branch.lock_read()
200
    try:
201
        branch_result = branch.check()
202
        repo_result = branch.repository.check([branch.last_revision()])
203
    finally:
204
        branch.unlock()
205
    branch_result.report_results(verbose)
206
    repo_result.report_results(verbose)