~bzr-pqm/bzr/bzr.dev

1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
1
# (C) 2005, 2006 Canonical Limited.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
1570.1.7 by Robert Collins
Replace the slow topo_sort routine with a much faster one for non trivial datasets.
17
"""Reconcilers are able to fix some potential data errors in a branch."""
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
18
19
1570.1.14 by Robert Collins
Enforce repository consistency during 'fetch' operations.
20
__all__ = ['reconcile', 'Reconciler', 'RepoReconciler']
21
22
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
23
import bzrlib.branch
1570.1.6 by Robert Collins
Update fast topological_sort to be a function and to have the topo_sort tests run against it.
24
import bzrlib.errors as errors
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
25
import bzrlib.progress
26
from bzrlib.trace import mutter
1570.1.7 by Robert Collins
Replace the slow topo_sort routine with a much faster one for non trivial datasets.
27
from bzrlib.tsort import TopoSorter
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
28
import bzrlib.ui as ui
29
30
31
def reconcile(dir):
32
    """Reconcile the data in dir.
33
34
    Currently this is limited to a inventory 'reweave'.
35
1570.1.8 by Robert Collins
Only reconcile if doing so will perform gc or correct ancestry.
36
    This is a convenience method, for using a Reconciler object.
37
38
    Directly using Reconciler is recommended for library users that
39
    desire fine grained control or analysis of the found issues.
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
40
    """
41
    reconciler = Reconciler(dir)
42
    reconciler.reconcile()
43
44
1570.1.6 by Robert Collins
Update fast topological_sort to be a function and to have the topo_sort tests run against it.
45
class Reconciler(object):
1570.1.14 by Robert Collins
Enforce repository consistency during 'fetch' operations.
46
    """Reconcilers are used to reconcile existing data."""
1570.1.6 by Robert Collins
Update fast topological_sort to be a function and to have the topo_sort tests run against it.
47
48
    def __init__(self, dir):
49
        self.bzrdir = dir
50
51
    def reconcile(self):
1570.1.8 by Robert Collins
Only reconcile if doing so will perform gc or correct ancestry.
52
        """Perform reconciliation.
53
        
54
        After reconciliation the following attributes document found issues:
55
        inconsistent_parents: The number of revisions in the repository whose
56
                              ancestry was being reported incorrectly.
57
        garbage_inventories: The number of inventory objects without revisions
58
                             that were garbage collected.
59
        """
1594.1.3 by Robert Collins
Fixup pb usage to use nested_progress_bar.
60
        self.pb = ui.ui_factory.nested_progress_bar()
61
        try:
62
            self._reconcile()
63
        finally:
64
            self.pb.finished()
65
66
    def _reconcile(self):
67
        """Helper function for performing reconciliation."""
1570.1.11 by Robert Collins
Make reconcile work with shared repositories.
68
        self.repo = self.bzrdir.find_repository()
1570.1.14 by Robert Collins
Enforce repository consistency during 'fetch' operations.
69
        self.pb.note('Reconciling repository %s',
70
                     self.repo.bzrdir.root_transport.base)
71
        repo_reconciler = RepoReconciler(self.repo)
72
        repo_reconciler.reconcile()
73
        self.inconsistent_parents = repo_reconciler.inconsistent_parents
74
        self.garbage_inventories = repo_reconciler.garbage_inventories
75
        self.pb.note('Reconciliation complete.')
76
77
78
class RepoReconciler(object):
79
    """Reconciler that reconciles a repository.
80
81
    Currently this consists of an inventory reweave with revision cross-checks.
82
    """
83
84
    def __init__(self, repo):
85
        self.repo = repo
86
87
    def reconcile(self):
88
        """Perform reconciliation.
89
        
90
        After reconciliation the following attributes document found issues:
91
        inconsistent_parents: The number of revisions in the repository whose
92
                              ancestry was being reported incorrectly.
93
        garbage_inventories: The number of inventory objects without revisions
94
                             that were garbage collected.
95
        """
1570.1.6 by Robert Collins
Update fast topological_sort to be a function and to have the topo_sort tests run against it.
96
        self.repo.lock_write()
97
        try:
1594.1.3 by Robert Collins
Fixup pb usage to use nested_progress_bar.
98
            self.pb = ui.ui_factory.nested_progress_bar()
99
            try:
100
                self._reweave_inventory()
101
            finally:
102
                self.pb.finished()
1570.1.6 by Robert Collins
Update fast topological_sort to be a function and to have the topo_sort tests run against it.
103
        finally:
104
            self.repo.unlock()
105
106
    def _reweave_inventory(self):
107
        """Regenerate the inventory weave for the repository from scratch."""
1563.2.42 by Robert Collins
Stop reconcile on weaves being quadratic.
108
        # local because its really a wart we want to hide
109
        from bzrlib.weave import WeaveFile, Weave
1563.2.29 by Robert Collins
Remove all but fetch references to repository.revision_store.
110
        transaction = self.repo.get_transaction()
1570.1.6 by Robert Collins
Update fast topological_sort to be a function and to have the topo_sort tests run against it.
111
        self.pb.update('Reading inventory data.')
112
        self.inventory = self.repo.get_inventory_weave()
113
        # the total set of revisions to process
1563.2.29 by Robert Collins
Remove all but fetch references to repository.revision_store.
114
        self.pending = set([rev_id for rev_id in self.repo._revision_store.all_revision_ids(transaction)])
1570.1.6 by Robert Collins
Update fast topological_sort to be a function and to have the topo_sort tests run against it.
115
116
        # mapping from revision_id to parents
117
        self._rev_graph = {}
1570.1.8 by Robert Collins
Only reconcile if doing so will perform gc or correct ancestry.
118
        # errors that we detect
119
        self.inconsistent_parents = 0
1570.1.6 by Robert Collins
Update fast topological_sort to be a function and to have the topo_sort tests run against it.
120
        # we need the revision id of each revision and its available parents list
1570.1.10 by Robert Collins
UI tweaks to reconcile - show progress for inventory backup.
121
        self._setup_steps(len(self.pending))
1570.1.6 by Robert Collins
Update fast topological_sort to be a function and to have the topo_sort tests run against it.
122
        for rev_id in self.pending:
123
            # put a revision into the graph.
124
            self._graph_revision(rev_id)
1594.2.2 by Robert Collins
Trivial change to reconcile to mutter the cause of reconciliation to bzr.log
125
        self._check_garbage_inventories()
1570.1.8 by Robert Collins
Only reconcile if doing so will perform gc or correct ancestry.
126
        if not self.inconsistent_parents and not self.garbage_inventories:
127
            self.pb.note('Inventory ok.')
128
            return
1570.1.10 by Robert Collins
UI tweaks to reconcile - show progress for inventory backup.
129
        self.pb.update('Backing up inventory...', 0, 0)
1563.2.25 by Robert Collins
Merge in upstream.
130
        self.repo.control_weaves.copy(self.inventory, 'inventory.backup', self.repo.get_transaction())
1570.1.8 by Robert Collins
Only reconcile if doing so will perform gc or correct ancestry.
131
        self.pb.note('Backup Inventory created.')
132
        # asking for '' should never return a non-empty weave
1563.2.25 by Robert Collins
Merge in upstream.
133
        new_inventory = self.repo.control_weaves.get_empty('inventory.new',
1570.1.8 by Robert Collins
Only reconcile if doing so will perform gc or correct ancestry.
134
            self.repo.get_transaction())
1570.1.6 by Robert Collins
Update fast topological_sort to be a function and to have the topo_sort tests run against it.
135
1570.1.4 by Robert Collins
Somewhat optimised version of reconciler.
136
        # we have topological order of revisions and non ghost parents ready.
1570.1.10 by Robert Collins
UI tweaks to reconcile - show progress for inventory backup.
137
        self._setup_steps(len(self._rev_graph))
1570.1.7 by Robert Collins
Replace the slow topo_sort routine with a much faster one for non trivial datasets.
138
        for rev_id in TopoSorter(self._rev_graph.items()).iter_topo_order():
139
            parents = self._rev_graph[rev_id]
1570.1.4 by Robert Collins
Somewhat optimised version of reconciler.
140
            # double check this really is in topological order.
141
            unavailable = [p for p in parents if p not in new_inventory]
142
            assert len(unavailable) == 0
143
            # this entry has all the non ghost parents in the inventory
144
            # file already.
145
            self._reweave_step('adding inventories')
1563.2.42 by Robert Collins
Stop reconcile on weaves being quadratic.
146
            # ugly but needed, weaves are just way tooooo slow else.
147
            if isinstance(new_inventory, WeaveFile):
148
                Weave.add_lines(new_inventory, rev_id, parents, self.inventory.get_lines(rev_id))
149
            else:
150
                new_inventory.add_lines(rev_id, parents, self.inventory.get_lines(rev_id))
1570.1.4 by Robert Collins
Somewhat optimised version of reconciler.
151
1563.2.42 by Robert Collins
Stop reconcile on weaves being quadratic.
152
        if isinstance(new_inventory, WeaveFile):
153
            new_inventory._save()
1570.1.4 by Robert Collins
Somewhat optimised version of reconciler.
154
        # if this worked, the set of new_inventory.names should equal
155
        # self.pending
1563.2.25 by Robert Collins
Merge in upstream.
156
        assert set(new_inventory.versions()) == self.pending
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
157
        self.pb.update('Writing weave')
1563.2.25 by Robert Collins
Merge in upstream.
158
        self.repo.control_weaves.copy(new_inventory, 'inventory', self.repo.get_transaction())
159
        self.repo.control_weaves.delete('inventory.new', self.repo.get_transaction())
1570.1.3 by Robert Collins
Optimise reconcilation to only hit each revision once.
160
        self.inventory = None
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
161
        self.pb.note('Inventory regenerated.')
1570.1.3 by Robert Collins
Optimise reconcilation to only hit each revision once.
162
1570.1.10 by Robert Collins
UI tweaks to reconcile - show progress for inventory backup.
163
    def _setup_steps(self, new_total):
164
        """Setup the markers we need to control the progress bar."""
165
        self.total = new_total
166
        self.count = 0
167
1570.1.4 by Robert Collins
Somewhat optimised version of reconciler.
168
    def _graph_revision(self, rev_id):
169
        """Load a revision into the revision graph."""
170
        # pick a random revision
171
        # analyse revision id rev_id and put it in the stack.
172
        self._reweave_step('loading revisions')
1570.1.13 by Robert Collins
Check for incorrect revision parentage in the weave during revision access.
173
        rev = self.repo.get_revision_reconcile(rev_id)
1570.1.3 by Robert Collins
Optimise reconcilation to only hit each revision once.
174
        assert rev.revision_id == rev_id
175
        parents = []
176
        for parent in rev.parent_ids:
1570.1.14 by Robert Collins
Enforce repository consistency during 'fetch' operations.
177
            if self._parent_is_available(parent):
1570.1.3 by Robert Collins
Optimise reconcilation to only hit each revision once.
178
                parents.append(parent)
179
            else:
180
                mutter('found ghost %s', parent)
1570.1.4 by Robert Collins
Somewhat optimised version of reconciler.
181
        self._rev_graph[rev_id] = parents   
1563.2.25 by Robert Collins
Merge in upstream.
182
        if set(self.inventory.get_parents(rev_id)) != set(parents):
1570.1.8 by Robert Collins
Only reconcile if doing so will perform gc or correct ancestry.
183
            self.inconsistent_parents += 1
1594.2.2 by Robert Collins
Trivial change to reconcile to mutter the cause of reconciliation to bzr.log
184
            mutter('Inconsistent inventory parents: id {%s} '
185
                   'inventory claims %r, '
186
                   'available parents are %r, '
187
                   'unavailable parents are %r',
188
                   rev_id, 
1563.2.39 by Robert Collins
Merge from integration.
189
                   set(self.inventory.get_parents(rev_id)),
1594.2.2 by Robert Collins
Trivial change to reconcile to mutter the cause of reconciliation to bzr.log
190
                   set(parents),
191
                   set(rev.parent_ids).difference(set(parents)))
192
193
    def _check_garbage_inventories(self):
194
        """Check for garbage inventories which we cannot trust
195
196
        We cant trust them because their pre-requisite file data may not
197
        be present - all we know is that their revision was not installed.
198
        """
1563.2.39 by Robert Collins
Merge from integration.
199
        inventories = set(self.inventory.versions())
1594.2.2 by Robert Collins
Trivial change to reconcile to mutter the cause of reconciliation to bzr.log
200
        revisions = set(self._rev_graph.keys())
201
        garbage = inventories.difference(revisions)
202
        self.garbage_inventories = len(garbage)
203
        for revision_id in garbage:
204
            mutter('Garbage inventory {%s} found.', revision_id)
1570.1.4 by Robert Collins
Somewhat optimised version of reconciler.
205
1570.1.14 by Robert Collins
Enforce repository consistency during 'fetch' operations.
206
    def _parent_is_available(self, parent):
207
        """True if parent is a fully available revision
208
209
        A fully available revision has a inventory and a revision object in the
210
        repository.
211
        """
212
        return (parent in self._rev_graph or 
213
                (parent in self.inventory and self.repo.has_revision(parent)))
214
1570.1.4 by Robert Collins
Somewhat optimised version of reconciler.
215
    def _reweave_step(self, message):
216
        """Mark a single step of regeneration complete."""
217
        self.pb.update(message, self.count, self.total)
218
        self.count += 1