~bzr-pqm/bzr/bzr.dev

1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
1
# (C) 2005, 2006 Canonical Limited.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
1570.1.7 by Robert Collins
Replace the slow topo_sort routine with a much faster one for non trivial datasets.
17
"""Reconcilers are able to fix some potential data errors in a branch."""
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
18
19
1570.1.14 by Robert Collins
Enforce repository consistency during 'fetch' operations.
20
__all__ = ['reconcile', 'Reconciler', 'RepoReconciler']
21
22
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
23
import bzrlib.branch
1570.1.6 by Robert Collins
Update fast topological_sort to be a function and to have the topo_sort tests run against it.
24
import bzrlib.errors as errors
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
25
import bzrlib.progress
26
from bzrlib.trace import mutter
1570.1.7 by Robert Collins
Replace the slow topo_sort routine with a much faster one for non trivial datasets.
27
from bzrlib.tsort import TopoSorter
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
28
import bzrlib.ui as ui
29
30
31
def reconcile(dir):
32
    """Reconcile the data in dir.
33
34
    Currently this is limited to a inventory 'reweave'.
35
1570.1.8 by Robert Collins
Only reconcile if doing so will perform gc or correct ancestry.
36
    This is a convenience method, for using a Reconciler object.
37
38
    Directly using Reconciler is recommended for library users that
39
    desire fine grained control or analysis of the found issues.
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
40
    """
41
    reconciler = Reconciler(dir)
42
    reconciler.reconcile()
43
44
1570.1.6 by Robert Collins
Update fast topological_sort to be a function and to have the topo_sort tests run against it.
45
class Reconciler(object):
1570.1.14 by Robert Collins
Enforce repository consistency during 'fetch' operations.
46
    """Reconcilers are used to reconcile existing data."""
1570.1.6 by Robert Collins
Update fast topological_sort to be a function and to have the topo_sort tests run against it.
47
48
    def __init__(self, dir):
49
        self.bzrdir = dir
50
51
    def reconcile(self):
1570.1.8 by Robert Collins
Only reconcile if doing so will perform gc or correct ancestry.
52
        """Perform reconciliation.
53
        
54
        After reconciliation the following attributes document found issues:
55
        inconsistent_parents: The number of revisions in the repository whose
56
                              ancestry was being reported incorrectly.
57
        garbage_inventories: The number of inventory objects without revisions
58
                             that were garbage collected.
59
        """
1594.1.3 by Robert Collins
Fixup pb usage to use nested_progress_bar.
60
        self.pb = ui.ui_factory.nested_progress_bar()
61
        try:
62
            self._reconcile()
63
        finally:
64
            self.pb.finished()
65
66
    def _reconcile(self):
67
        """Helper function for performing reconciliation."""
1570.1.11 by Robert Collins
Make reconcile work with shared repositories.
68
        self.repo = self.bzrdir.find_repository()
1570.1.14 by Robert Collins
Enforce repository consistency during 'fetch' operations.
69
        self.pb.note('Reconciling repository %s',
70
                     self.repo.bzrdir.root_transport.base)
71
        repo_reconciler = RepoReconciler(self.repo)
72
        repo_reconciler.reconcile()
73
        self.inconsistent_parents = repo_reconciler.inconsistent_parents
74
        self.garbage_inventories = repo_reconciler.garbage_inventories
75
        self.pb.note('Reconciliation complete.')
76
77
78
class RepoReconciler(object):
79
    """Reconciler that reconciles a repository.
80
81
    Currently this consists of an inventory reweave with revision cross-checks.
82
    """
83
84
    def __init__(self, repo):
85
        self.repo = repo
86
87
    def reconcile(self):
88
        """Perform reconciliation.
89
        
90
        After reconciliation the following attributes document found issues:
91
        inconsistent_parents: The number of revisions in the repository whose
92
                              ancestry was being reported incorrectly.
93
        garbage_inventories: The number of inventory objects without revisions
94
                             that were garbage collected.
95
        """
1570.1.6 by Robert Collins
Update fast topological_sort to be a function and to have the topo_sort tests run against it.
96
        self.repo.lock_write()
97
        try:
1594.1.3 by Robert Collins
Fixup pb usage to use nested_progress_bar.
98
            self.pb = ui.ui_factory.nested_progress_bar()
99
            try:
1594.2.7 by Robert Collins
Add versionedfile.fix_parents api for correcting data post hoc.
100
                self._reconcile_steps()
1594.1.3 by Robert Collins
Fixup pb usage to use nested_progress_bar.
101
            finally:
102
                self.pb.finished()
1570.1.6 by Robert Collins
Update fast topological_sort to be a function and to have the topo_sort tests run against it.
103
        finally:
104
            self.repo.unlock()
105
1594.2.7 by Robert Collins
Add versionedfile.fix_parents api for correcting data post hoc.
106
    def _reconcile_steps(self):
107
        """Perform the steps to reconcile this repository."""
108
        self._reweave_inventory()
109
1570.1.6 by Robert Collins
Update fast topological_sort to be a function and to have the topo_sort tests run against it.
110
    def _reweave_inventory(self):
111
        """Regenerate the inventory weave for the repository from scratch."""
1563.2.42 by Robert Collins
Stop reconcile on weaves being quadratic.
112
        # local because its really a wart we want to hide
113
        from bzrlib.weave import WeaveFile, Weave
1563.2.29 by Robert Collins
Remove all but fetch references to repository.revision_store.
114
        transaction = self.repo.get_transaction()
1570.1.6 by Robert Collins
Update fast topological_sort to be a function and to have the topo_sort tests run against it.
115
        self.pb.update('Reading inventory data.')
116
        self.inventory = self.repo.get_inventory_weave()
117
        # the total set of revisions to process
1563.2.29 by Robert Collins
Remove all but fetch references to repository.revision_store.
118
        self.pending = set([rev_id for rev_id in self.repo._revision_store.all_revision_ids(transaction)])
1570.1.6 by Robert Collins
Update fast topological_sort to be a function and to have the topo_sort tests run against it.
119
120
        # mapping from revision_id to parents
121
        self._rev_graph = {}
1570.1.8 by Robert Collins
Only reconcile if doing so will perform gc or correct ancestry.
122
        # errors that we detect
123
        self.inconsistent_parents = 0
1570.1.6 by Robert Collins
Update fast topological_sort to be a function and to have the topo_sort tests run against it.
124
        # we need the revision id of each revision and its available parents list
1570.1.10 by Robert Collins
UI tweaks to reconcile - show progress for inventory backup.
125
        self._setup_steps(len(self.pending))
1570.1.6 by Robert Collins
Update fast topological_sort to be a function and to have the topo_sort tests run against it.
126
        for rev_id in self.pending:
127
            # put a revision into the graph.
128
            self._graph_revision(rev_id)
1594.2.2 by Robert Collins
Trivial change to reconcile to mutter the cause of reconciliation to bzr.log
129
        self._check_garbage_inventories()
1570.1.8 by Robert Collins
Only reconcile if doing so will perform gc or correct ancestry.
130
        if not self.inconsistent_parents and not self.garbage_inventories:
131
            self.pb.note('Inventory ok.')
132
            return
1570.1.10 by Robert Collins
UI tweaks to reconcile - show progress for inventory backup.
133
        self.pb.update('Backing up inventory...', 0, 0)
1563.2.25 by Robert Collins
Merge in upstream.
134
        self.repo.control_weaves.copy(self.inventory, 'inventory.backup', self.repo.get_transaction())
1570.1.8 by Robert Collins
Only reconcile if doing so will perform gc or correct ancestry.
135
        self.pb.note('Backup Inventory created.')
136
        # asking for '' should never return a non-empty weave
1616.1.1 by Martin Pool
[merge] robertc
137
        new_inventory_vf = self.repo.control_weaves.get_empty('inventory.new',
1570.1.8 by Robert Collins
Only reconcile if doing so will perform gc or correct ancestry.
138
            self.repo.get_transaction())
1570.1.6 by Robert Collins
Update fast topological_sort to be a function and to have the topo_sort tests run against it.
139
1570.1.4 by Robert Collins
Somewhat optimised version of reconciler.
140
        # we have topological order of revisions and non ghost parents ready.
1570.1.10 by Robert Collins
UI tweaks to reconcile - show progress for inventory backup.
141
        self._setup_steps(len(self._rev_graph))
1570.1.7 by Robert Collins
Replace the slow topo_sort routine with a much faster one for non trivial datasets.
142
        for rev_id in TopoSorter(self._rev_graph.items()).iter_topo_order():
143
            parents = self._rev_graph[rev_id]
1570.1.4 by Robert Collins
Somewhat optimised version of reconciler.
144
            # double check this really is in topological order.
1616.1.1 by Martin Pool
[merge] robertc
145
            unavailable = [p for p in parents if p not in new_inventory_vf]
1570.1.4 by Robert Collins
Somewhat optimised version of reconciler.
146
            assert len(unavailable) == 0
147
            # this entry has all the non ghost parents in the inventory
148
            # file already.
149
            self._reweave_step('adding inventories')
1616.1.1 by Martin Pool
[merge] robertc
150
            if isinstance(new_inventory_vf, WeaveFile):
151
                # It's really a WeaveFile, but we call straight into the
152
                # Weave's add method to disable the auto-write-out behaviour.
1607.1.11 by Robert Collins
Merge from bzr.dev
153
                # This is done to avoid a revision_count * time-to-write additional overhead on 
154
                # reconcile.
1616.1.1 by Martin Pool
[merge] robertc
155
                new_inventory_vf._check_write_ok()
156
                Weave._add_lines(new_inventory_vf, rev_id, parents, self.inventory.get_lines(rev_id),
157
                                 None)
1563.2.42 by Robert Collins
Stop reconcile on weaves being quadratic.
158
            else:
1616.1.1 by Martin Pool
[merge] robertc
159
                new_inventory_vf.add_lines(rev_id, parents, self.inventory.get_lines(rev_id))
1570.1.4 by Robert Collins
Somewhat optimised version of reconciler.
160
1616.1.1 by Martin Pool
[merge] robertc
161
        if isinstance(new_inventory_vf, WeaveFile):
162
            new_inventory_vf._save()
163
        # if this worked, the set of new_inventory_vf.names should equal
1570.1.4 by Robert Collins
Somewhat optimised version of reconciler.
164
        # self.pending
1616.1.1 by Martin Pool
[merge] robertc
165
        assert set(new_inventory_vf.versions()) == self.pending
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
166
        self.pb.update('Writing weave')
1616.1.1 by Martin Pool
[merge] robertc
167
        self.repo.control_weaves.copy(new_inventory_vf, 'inventory', self.repo.get_transaction())
1563.2.25 by Robert Collins
Merge in upstream.
168
        self.repo.control_weaves.delete('inventory.new', self.repo.get_transaction())
1570.1.3 by Robert Collins
Optimise reconcilation to only hit each revision once.
169
        self.inventory = None
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
170
        self.pb.note('Inventory regenerated.')
1570.1.3 by Robert Collins
Optimise reconcilation to only hit each revision once.
171
1570.1.10 by Robert Collins
UI tweaks to reconcile - show progress for inventory backup.
172
    def _setup_steps(self, new_total):
173
        """Setup the markers we need to control the progress bar."""
174
        self.total = new_total
175
        self.count = 0
176
1570.1.4 by Robert Collins
Somewhat optimised version of reconciler.
177
    def _graph_revision(self, rev_id):
178
        """Load a revision into the revision graph."""
179
        # pick a random revision
180
        # analyse revision id rev_id and put it in the stack.
181
        self._reweave_step('loading revisions')
1570.1.13 by Robert Collins
Check for incorrect revision parentage in the weave during revision access.
182
        rev = self.repo.get_revision_reconcile(rev_id)
1570.1.3 by Robert Collins
Optimise reconcilation to only hit each revision once.
183
        assert rev.revision_id == rev_id
184
        parents = []
185
        for parent in rev.parent_ids:
1570.1.14 by Robert Collins
Enforce repository consistency during 'fetch' operations.
186
            if self._parent_is_available(parent):
1570.1.3 by Robert Collins
Optimise reconcilation to only hit each revision once.
187
                parents.append(parent)
188
            else:
189
                mutter('found ghost %s', parent)
1570.1.4 by Robert Collins
Somewhat optimised version of reconciler.
190
        self._rev_graph[rev_id] = parents   
1563.2.25 by Robert Collins
Merge in upstream.
191
        if set(self.inventory.get_parents(rev_id)) != set(parents):
1570.1.8 by Robert Collins
Only reconcile if doing so will perform gc or correct ancestry.
192
            self.inconsistent_parents += 1
1594.2.2 by Robert Collins
Trivial change to reconcile to mutter the cause of reconciliation to bzr.log
193
            mutter('Inconsistent inventory parents: id {%s} '
194
                   'inventory claims %r, '
195
                   'available parents are %r, '
196
                   'unavailable parents are %r',
197
                   rev_id, 
1563.2.39 by Robert Collins
Merge from integration.
198
                   set(self.inventory.get_parents(rev_id)),
1594.2.2 by Robert Collins
Trivial change to reconcile to mutter the cause of reconciliation to bzr.log
199
                   set(parents),
200
                   set(rev.parent_ids).difference(set(parents)))
201
202
    def _check_garbage_inventories(self):
203
        """Check for garbage inventories which we cannot trust
204
205
        We cant trust them because their pre-requisite file data may not
206
        be present - all we know is that their revision was not installed.
207
        """
1563.2.39 by Robert Collins
Merge from integration.
208
        inventories = set(self.inventory.versions())
1594.2.2 by Robert Collins
Trivial change to reconcile to mutter the cause of reconciliation to bzr.log
209
        revisions = set(self._rev_graph.keys())
210
        garbage = inventories.difference(revisions)
211
        self.garbage_inventories = len(garbage)
212
        for revision_id in garbage:
213
            mutter('Garbage inventory {%s} found.', revision_id)
1570.1.4 by Robert Collins
Somewhat optimised version of reconciler.
214
1570.1.14 by Robert Collins
Enforce repository consistency during 'fetch' operations.
215
    def _parent_is_available(self, parent):
216
        """True if parent is a fully available revision
217
218
        A fully available revision has a inventory and a revision object in the
219
        repository.
220
        """
221
        return (parent in self._rev_graph or 
222
                (parent in self.inventory and self.repo.has_revision(parent)))
223
1570.1.4 by Robert Collins
Somewhat optimised version of reconciler.
224
    def _reweave_step(self, message):
225
        """Mark a single step of regeneration complete."""
226
        self.pb.update(message, self.count, self.total)
227
        self.count += 1
1594.2.7 by Robert Collins
Add versionedfile.fix_parents api for correcting data post hoc.
228
229
230
class KnitReconciler(RepoReconciler):
231
    """Reconciler that reconciles a knit format repository.
232
233
    This will detect garbage inventories and remove them.
234
235
    Inconsistent parentage is checked for in the revision weave.
236
    """
237
238
    def _reconcile_steps(self):
239
        """Perform the steps to reconcile this repository."""
240
        self._load_indexes()
1594.2.9 by Robert Collins
Teach Knit repositories how to handle ghosts without corrupting at all.
241
        # knits never suffer this
242
        self.inconsistent_parents = 0
1594.2.7 by Robert Collins
Add versionedfile.fix_parents api for correcting data post hoc.
243
        self._gc_inventory()
244
245
    def _load_indexes(self):
246
        """Load indexes for the reconciliation."""
247
        self.transaction = self.repo.get_transaction()
248
        self.pb.update('Reading indexes.', 0, 2)
249
        self.inventory = self.repo.get_inventory_weave()
250
        self.pb.update('Reading indexes.', 1, 2)
251
        self.revisions = self.repo._revision_store.get_revision_file(self.transaction)
252
        self.pb.update('Reading indexes.', 2, 2)
253
254
    def _gc_inventory(self):
255
        """Remove inventories that are not referenced from the revision store."""
256
        self.pb.update('Checking unused inventories.', 0, 1)
257
        self._check_garbage_inventories()
258
        self.pb.update('Checking unused inventories.', 1, 3)
259
        if not self.garbage_inventories:
260
            self.pb.note('Inventory ok.')
261
            return
262
        self.pb.update('Backing up inventory...', 0, 0)
263
        self.repo.control_weaves.copy(self.inventory, 'inventory.backup', self.transaction)
264
        self.pb.note('Backup Inventory created.')
265
        # asking for '' should never return a non-empty weave
1616.1.1 by Martin Pool
[merge] robertc
266
        new_inventory_vf = self.repo.control_weaves.get_empty('inventory.new',
1594.2.7 by Robert Collins
Add versionedfile.fix_parents api for correcting data post hoc.
267
            self.transaction)
268
269
        # we have topological order of revisions and non ghost parents ready.
1594.2.9 by Robert Collins
Teach Knit repositories how to handle ghosts without corrupting at all.
270
        self._setup_steps(len(self.revisions))
271
        for rev_id in TopoSorter(self.revisions.get_graph().items()).iter_topo_order():
272
            parents = self.revisions.get_parents(rev_id)
1594.2.7 by Robert Collins
Add versionedfile.fix_parents api for correcting data post hoc.
273
            # double check this really is in topological order.
1616.1.1 by Martin Pool
[merge] robertc
274
            unavailable = [p for p in parents if p not in new_inventory_vf]
1594.2.7 by Robert Collins
Add versionedfile.fix_parents api for correcting data post hoc.
275
            assert len(unavailable) == 0
276
            # this entry has all the non ghost parents in the inventory
277
            # file already.
278
            self._reweave_step('adding inventories')
279
            # ugly but needed, weaves are just way tooooo slow else.
1616.1.1 by Martin Pool
[merge] robertc
280
            new_inventory_vf.add_lines(rev_id, parents, self.inventory.get_lines(rev_id))
1594.2.7 by Robert Collins
Add versionedfile.fix_parents api for correcting data post hoc.
281
1616.1.1 by Martin Pool
[merge] robertc
282
        # if this worked, the set of new_inventory_vf.names should equal
1594.2.7 by Robert Collins
Add versionedfile.fix_parents api for correcting data post hoc.
283
        # self.pending
1616.1.1 by Martin Pool
[merge] robertc
284
        assert set(new_inventory_vf.versions()) == set(self.revisions.versions())
1594.2.7 by Robert Collins
Add versionedfile.fix_parents api for correcting data post hoc.
285
        self.pb.update('Writing weave')
1616.1.1 by Martin Pool
[merge] robertc
286
        self.repo.control_weaves.copy(new_inventory_vf, 'inventory', self.transaction)
1594.2.7 by Robert Collins
Add versionedfile.fix_parents api for correcting data post hoc.
287
        self.repo.control_weaves.delete('inventory.new', self.transaction)
288
        self.inventory = None
289
        self.pb.note('Inventory regenerated.')
290
291
    def _reinsert_revisions(self):
292
        """Correct the revision history for revisions in the revision knit."""
293
        # the total set of revisions to process
294
        self.pending = set(self.revisions.versions())
295
296
        # mapping from revision_id to parents
297
        self._rev_graph = {}
298
        # errors that we detect
299
        self.inconsistent_parents = 0
300
        # we need the revision id of each revision and its available parents list
301
        self._setup_steps(len(self.pending))
302
        for rev_id in self.pending:
303
            # put a revision into the graph.
304
            self._graph_revision(rev_id)
305
306
        if not self.inconsistent_parents:
307
            self.pb.note('Revision history accurate.')
308
            return
309
        self._setup_steps(len(self._rev_graph))
310
        for rev_id, parents in self._rev_graph.items():
311
            if parents != self.revisions.get_parents(rev_id):
312
                self.revisions.fix_parents(rev_id, parents)
313
            self._reweave_step('Fixing parents')
314
        self.pb.note('Ancestry corrected.')
315
316
    def _graph_revision(self, rev_id):
317
        """Load a revision into the revision graph."""
318
        # pick a random revision
319
        # analyse revision id rev_id and put it in the stack.
320
        self._reweave_step('loading revisions')
321
        rev = self.repo._revision_store.get_revision(rev_id, self.transaction)
322
        assert rev.revision_id == rev_id
323
        parents = []
324
        for parent in rev.parent_ids:
325
            if self.revisions.has_version(parent):
326
                parents.append(parent)
327
            else:
328
                mutter('found ghost %s', parent)
329
        self._rev_graph[rev_id] = parents   
330
        if set(self.inventory.get_parents(rev_id)) != set(parents):
331
            self.inconsistent_parents += 1
332
            mutter('Inconsistent inventory parents: id {%s} '
333
                   'inventory claims %r, '
334
                   'available parents are %r, '
335
                   'unavailable parents are %r',
336
                   rev_id, 
337
                   set(self.inventory.get_parents(rev_id)),
338
                   set(parents),
339
                   set(rev.parent_ids).difference(set(parents)))
340
341
    def _check_garbage_inventories(self):
342
        """Check for garbage inventories which we cannot trust
343
344
        We cant trust them because their pre-requisite file data may not
345
        be present - all we know is that their revision was not installed.
346
        """
347
        inventories = set(self.inventory.versions())
348
        revisions = set(self.revisions.versions())
349
        garbage = inventories.difference(revisions)
350
        self.garbage_inventories = len(garbage)
351
        for revision_id in garbage:
352
            mutter('Garbage inventory {%s} found.', revision_id)