~bzr-pqm/bzr/bzr.dev

1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
1
# (C) 2005, 2006 Canonical Limited.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
1570.1.7 by Robert Collins
Replace the slow topo_sort routine with a much faster one for non trivial datasets.
17
"""Reconcilers are able to fix some potential data errors in a branch."""
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
18
19
1692.1.1 by Robert Collins
* Repository.reconcile now takes a thorough keyword parameter to allow
20
__all__ = ['reconcile', 'Reconciler', 'RepoReconciler', 'KnitReconciler']
1570.1.14 by Robert Collins
Enforce repository consistency during 'fetch' operations.
21
22
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
23
import bzrlib.branch
1570.1.6 by Robert Collins
Update fast topological_sort to be a function and to have the topo_sort tests run against it.
24
import bzrlib.errors as errors
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
25
import bzrlib.progress
26
from bzrlib.trace import mutter
1570.1.7 by Robert Collins
Replace the slow topo_sort routine with a much faster one for non trivial datasets.
27
from bzrlib.tsort import TopoSorter
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
28
import bzrlib.ui as ui
29
30
1692.1.1 by Robert Collins
* Repository.reconcile now takes a thorough keyword parameter to allow
31
def reconcile(dir, other=None):
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
32
    """Reconcile the data in dir.
33
34
    Currently this is limited to a inventory 'reweave'.
35
1570.1.8 by Robert Collins
Only reconcile if doing so will perform gc or correct ancestry.
36
    This is a convenience method, for using a Reconciler object.
37
38
    Directly using Reconciler is recommended for library users that
39
    desire fine grained control or analysis of the found issues.
1692.1.1 by Robert Collins
* Repository.reconcile now takes a thorough keyword parameter to allow
40
41
    :param other: another bzrdir to reconcile against.
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
42
    """
1692.1.1 by Robert Collins
* Repository.reconcile now takes a thorough keyword parameter to allow
43
    reconciler = Reconciler(dir, other=other)
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
44
    reconciler.reconcile()
45
46
1570.1.6 by Robert Collins
Update fast topological_sort to be a function and to have the topo_sort tests run against it.
47
class Reconciler(object):
1570.1.14 by Robert Collins
Enforce repository consistency during 'fetch' operations.
48
    """Reconcilers are used to reconcile existing data."""
1570.1.6 by Robert Collins
Update fast topological_sort to be a function and to have the topo_sort tests run against it.
49
1692.1.1 by Robert Collins
* Repository.reconcile now takes a thorough keyword parameter to allow
50
    def __init__(self, dir, other=None):
51
        """Create a Reconciler."""
1570.1.6 by Robert Collins
Update fast topological_sort to be a function and to have the topo_sort tests run against it.
52
        self.bzrdir = dir
53
54
    def reconcile(self):
1570.1.8 by Robert Collins
Only reconcile if doing so will perform gc or correct ancestry.
55
        """Perform reconciliation.
56
        
57
        After reconciliation the following attributes document found issues:
58
        inconsistent_parents: The number of revisions in the repository whose
59
                              ancestry was being reported incorrectly.
60
        garbage_inventories: The number of inventory objects without revisions
61
                             that were garbage collected.
62
        """
1594.1.3 by Robert Collins
Fixup pb usage to use nested_progress_bar.
63
        self.pb = ui.ui_factory.nested_progress_bar()
64
        try:
65
            self._reconcile()
66
        finally:
67
            self.pb.finished()
68
69
    def _reconcile(self):
70
        """Helper function for performing reconciliation."""
1570.1.11 by Robert Collins
Make reconcile work with shared repositories.
71
        self.repo = self.bzrdir.find_repository()
1570.1.14 by Robert Collins
Enforce repository consistency during 'fetch' operations.
72
        self.pb.note('Reconciling repository %s',
73
                     self.repo.bzrdir.root_transport.base)
1692.1.1 by Robert Collins
* Repository.reconcile now takes a thorough keyword parameter to allow
74
        repo_reconciler = self.repo.reconcile(thorough=True)
1570.1.14 by Robert Collins
Enforce repository consistency during 'fetch' operations.
75
        self.inconsistent_parents = repo_reconciler.inconsistent_parents
76
        self.garbage_inventories = repo_reconciler.garbage_inventories
77
        self.pb.note('Reconciliation complete.')
78
79
80
class RepoReconciler(object):
81
    """Reconciler that reconciles a repository.
82
83
    Currently this consists of an inventory reweave with revision cross-checks.
84
    """
85
1692.1.1 by Robert Collins
* Repository.reconcile now takes a thorough keyword parameter to allow
86
    def __init__(self, repo, other=None, thorough=False):
87
        """Construct a RepoReconciler.
88
89
        :param thorough: perform a thorough check which may take longer but
90
                         will correct non-data loss issues such as incorrect
91
                         cached data.
92
        """
93
        self.garbage_inventories = 0
94
        self.inconsistent_parents = 0
1570.1.14 by Robert Collins
Enforce repository consistency during 'fetch' operations.
95
        self.repo = repo
1692.1.1 by Robert Collins
* Repository.reconcile now takes a thorough keyword parameter to allow
96
        self.thorough = thorough
1570.1.14 by Robert Collins
Enforce repository consistency during 'fetch' operations.
97
98
    def reconcile(self):
99
        """Perform reconciliation.
100
        
101
        After reconciliation the following attributes document found issues:
102
        inconsistent_parents: The number of revisions in the repository whose
103
                              ancestry was being reported incorrectly.
104
        garbage_inventories: The number of inventory objects without revisions
105
                             that were garbage collected.
106
        """
1570.1.6 by Robert Collins
Update fast topological_sort to be a function and to have the topo_sort tests run against it.
107
        self.repo.lock_write()
108
        try:
1594.1.3 by Robert Collins
Fixup pb usage to use nested_progress_bar.
109
            self.pb = ui.ui_factory.nested_progress_bar()
110
            try:
1594.2.7 by Robert Collins
Add versionedfile.fix_parents api for correcting data post hoc.
111
                self._reconcile_steps()
1594.1.3 by Robert Collins
Fixup pb usage to use nested_progress_bar.
112
            finally:
113
                self.pb.finished()
1570.1.6 by Robert Collins
Update fast topological_sort to be a function and to have the topo_sort tests run against it.
114
        finally:
115
            self.repo.unlock()
116
1594.2.7 by Robert Collins
Add versionedfile.fix_parents api for correcting data post hoc.
117
    def _reconcile_steps(self):
118
        """Perform the steps to reconcile this repository."""
1692.1.3 by Robert Collins
Finish the reconcile tweak: filled in ghosts are a data loss issue and need to be checked during fast reconciles.
119
        self._reweave_inventory()
1594.2.7 by Robert Collins
Add versionedfile.fix_parents api for correcting data post hoc.
120
1570.1.6 by Robert Collins
Update fast topological_sort to be a function and to have the topo_sort tests run against it.
121
    def _reweave_inventory(self):
1692.1.3 by Robert Collins
Finish the reconcile tweak: filled in ghosts are a data loss issue and need to be checked during fast reconciles.
122
        """Regenerate the inventory weave for the repository from scratch.
123
        
124
        This is a smart function: it will only do the reweave if doing it 
125
        will correct data issues. The self.thorough flag controls whether
126
        only data-loss causing issues (!self.thorough) or all issues
127
        (self.thorough) are treated as requiring the reweave.
128
        """
129
        # local because needing to know about WeaveFile is a wart we want to hide
1563.2.42 by Robert Collins
Stop reconcile on weaves being quadratic.
130
        from bzrlib.weave import WeaveFile, Weave
1563.2.29 by Robert Collins
Remove all but fetch references to repository.revision_store.
131
        transaction = self.repo.get_transaction()
1570.1.6 by Robert Collins
Update fast topological_sort to be a function and to have the topo_sort tests run against it.
132
        self.pb.update('Reading inventory data.')
133
        self.inventory = self.repo.get_inventory_weave()
134
        # the total set of revisions to process
1563.2.29 by Robert Collins
Remove all but fetch references to repository.revision_store.
135
        self.pending = set([rev_id for rev_id in self.repo._revision_store.all_revision_ids(transaction)])
1570.1.6 by Robert Collins
Update fast topological_sort to be a function and to have the topo_sort tests run against it.
136
137
        # mapping from revision_id to parents
138
        self._rev_graph = {}
1570.1.8 by Robert Collins
Only reconcile if doing so will perform gc or correct ancestry.
139
        # errors that we detect
140
        self.inconsistent_parents = 0
1570.1.6 by Robert Collins
Update fast topological_sort to be a function and to have the topo_sort tests run against it.
141
        # we need the revision id of each revision and its available parents list
1570.1.10 by Robert Collins
UI tweaks to reconcile - show progress for inventory backup.
142
        self._setup_steps(len(self.pending))
1570.1.6 by Robert Collins
Update fast topological_sort to be a function and to have the topo_sort tests run against it.
143
        for rev_id in self.pending:
144
            # put a revision into the graph.
145
            self._graph_revision(rev_id)
1594.2.2 by Robert Collins
Trivial change to reconcile to mutter the cause of reconciliation to bzr.log
146
        self._check_garbage_inventories()
1692.1.3 by Robert Collins
Finish the reconcile tweak: filled in ghosts are a data loss issue and need to be checked during fast reconciles.
147
        # if there are no inconsistent_parents and 
148
        # (no garbage inventories or we are not doing a thorough check)
149
        if (not self.inconsistent_parents and 
150
            (not self.garbage_inventories or not self.thorough)):
1570.1.8 by Robert Collins
Only reconcile if doing so will perform gc or correct ancestry.
151
            self.pb.note('Inventory ok.')
152
            return
1570.1.10 by Robert Collins
UI tweaks to reconcile - show progress for inventory backup.
153
        self.pb.update('Backing up inventory...', 0, 0)
1563.2.25 by Robert Collins
Merge in upstream.
154
        self.repo.control_weaves.copy(self.inventory, 'inventory.backup', self.repo.get_transaction())
1570.1.8 by Robert Collins
Only reconcile if doing so will perform gc or correct ancestry.
155
        self.pb.note('Backup Inventory created.')
156
        # asking for '' should never return a non-empty weave
1616.1.1 by Martin Pool
[merge] robertc
157
        new_inventory_vf = self.repo.control_weaves.get_empty('inventory.new',
1570.1.8 by Robert Collins
Only reconcile if doing so will perform gc or correct ancestry.
158
            self.repo.get_transaction())
1570.1.6 by Robert Collins
Update fast topological_sort to be a function and to have the topo_sort tests run against it.
159
1570.1.4 by Robert Collins
Somewhat optimised version of reconciler.
160
        # we have topological order of revisions and non ghost parents ready.
1570.1.10 by Robert Collins
UI tweaks to reconcile - show progress for inventory backup.
161
        self._setup_steps(len(self._rev_graph))
1570.1.7 by Robert Collins
Replace the slow topo_sort routine with a much faster one for non trivial datasets.
162
        for rev_id in TopoSorter(self._rev_graph.items()).iter_topo_order():
163
            parents = self._rev_graph[rev_id]
1570.1.4 by Robert Collins
Somewhat optimised version of reconciler.
164
            # double check this really is in topological order.
1616.1.1 by Martin Pool
[merge] robertc
165
            unavailable = [p for p in parents if p not in new_inventory_vf]
1570.1.4 by Robert Collins
Somewhat optimised version of reconciler.
166
            assert len(unavailable) == 0
167
            # this entry has all the non ghost parents in the inventory
168
            # file already.
169
            self._reweave_step('adding inventories')
1616.1.1 by Martin Pool
[merge] robertc
170
            if isinstance(new_inventory_vf, WeaveFile):
171
                # It's really a WeaveFile, but we call straight into the
172
                # Weave's add method to disable the auto-write-out behaviour.
1607.1.11 by Robert Collins
Merge from bzr.dev
173
                # This is done to avoid a revision_count * time-to-write additional overhead on 
174
                # reconcile.
1616.1.1 by Martin Pool
[merge] robertc
175
                new_inventory_vf._check_write_ok()
176
                Weave._add_lines(new_inventory_vf, rev_id, parents, self.inventory.get_lines(rev_id),
177
                                 None)
1563.2.42 by Robert Collins
Stop reconcile on weaves being quadratic.
178
            else:
1616.1.1 by Martin Pool
[merge] robertc
179
                new_inventory_vf.add_lines(rev_id, parents, self.inventory.get_lines(rev_id))
1570.1.4 by Robert Collins
Somewhat optimised version of reconciler.
180
1616.1.1 by Martin Pool
[merge] robertc
181
        if isinstance(new_inventory_vf, WeaveFile):
182
            new_inventory_vf._save()
183
        # if this worked, the set of new_inventory_vf.names should equal
1570.1.4 by Robert Collins
Somewhat optimised version of reconciler.
184
        # self.pending
1616.1.1 by Martin Pool
[merge] robertc
185
        assert set(new_inventory_vf.versions()) == self.pending
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
186
        self.pb.update('Writing weave')
1616.1.1 by Martin Pool
[merge] robertc
187
        self.repo.control_weaves.copy(new_inventory_vf, 'inventory', self.repo.get_transaction())
1563.2.25 by Robert Collins
Merge in upstream.
188
        self.repo.control_weaves.delete('inventory.new', self.repo.get_transaction())
1570.1.3 by Robert Collins
Optimise reconcilation to only hit each revision once.
189
        self.inventory = None
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
190
        self.pb.note('Inventory regenerated.')
1570.1.3 by Robert Collins
Optimise reconcilation to only hit each revision once.
191
1570.1.10 by Robert Collins
UI tweaks to reconcile - show progress for inventory backup.
192
    def _setup_steps(self, new_total):
193
        """Setup the markers we need to control the progress bar."""
194
        self.total = new_total
195
        self.count = 0
196
1570.1.4 by Robert Collins
Somewhat optimised version of reconciler.
197
    def _graph_revision(self, rev_id):
198
        """Load a revision into the revision graph."""
199
        # pick a random revision
200
        # analyse revision id rev_id and put it in the stack.
201
        self._reweave_step('loading revisions')
1570.1.13 by Robert Collins
Check for incorrect revision parentage in the weave during revision access.
202
        rev = self.repo.get_revision_reconcile(rev_id)
1570.1.3 by Robert Collins
Optimise reconcilation to only hit each revision once.
203
        assert rev.revision_id == rev_id
204
        parents = []
205
        for parent in rev.parent_ids:
1570.1.14 by Robert Collins
Enforce repository consistency during 'fetch' operations.
206
            if self._parent_is_available(parent):
1570.1.3 by Robert Collins
Optimise reconcilation to only hit each revision once.
207
                parents.append(parent)
208
            else:
209
                mutter('found ghost %s', parent)
1570.1.4 by Robert Collins
Somewhat optimised version of reconciler.
210
        self._rev_graph[rev_id] = parents   
1692.1.3 by Robert Collins
Finish the reconcile tweak: filled in ghosts are a data loss issue and need to be checked during fast reconciles.
211
        if self._parents_are_inconsistent(rev_id, parents):
1570.1.8 by Robert Collins
Only reconcile if doing so will perform gc or correct ancestry.
212
            self.inconsistent_parents += 1
1594.2.2 by Robert Collins
Trivial change to reconcile to mutter the cause of reconciliation to bzr.log
213
            mutter('Inconsistent inventory parents: id {%s} '
214
                   'inventory claims %r, '
215
                   'available parents are %r, '
216
                   'unavailable parents are %r',
217
                   rev_id, 
1563.2.39 by Robert Collins
Merge from integration.
218
                   set(self.inventory.get_parents(rev_id)),
1594.2.2 by Robert Collins
Trivial change to reconcile to mutter the cause of reconciliation to bzr.log
219
                   set(parents),
220
                   set(rev.parent_ids).difference(set(parents)))
221
1692.1.3 by Robert Collins
Finish the reconcile tweak: filled in ghosts are a data loss issue and need to be checked during fast reconciles.
222
    def _parents_are_inconsistent(self, rev_id, parents):
223
        """Return True if the parents list of rev_id does not match the weave.
224
225
        This detect inconsistences based on the self.thorough value:
226
        if thorough is on, the first parent value is checked as well as ghost
227
        differences.
228
        Otherwise only the ghost differences are evaluated.
229
        """
230
        weave_parents = self.inventory.get_parents(rev_id)
231
        weave_missing_old_ghosts = set(weave_parents) != set(parents)
232
        first_parent_is_wrong = (
233
            len(weave_parents) and len(parents) and
234
            parents[0] != weave_parents[0])
235
        if self.thorough:
236
            return weave_missing_old_ghosts or first_parent_is_wrong
237
        else:
238
            return weave_missing_old_ghosts
239
1594.2.2 by Robert Collins
Trivial change to reconcile to mutter the cause of reconciliation to bzr.log
240
    def _check_garbage_inventories(self):
241
        """Check for garbage inventories which we cannot trust
242
243
        We cant trust them because their pre-requisite file data may not
244
        be present - all we know is that their revision was not installed.
245
        """
1692.1.3 by Robert Collins
Finish the reconcile tweak: filled in ghosts are a data loss issue and need to be checked during fast reconciles.
246
        if not self.thorough:
247
            return
1563.2.39 by Robert Collins
Merge from integration.
248
        inventories = set(self.inventory.versions())
1594.2.2 by Robert Collins
Trivial change to reconcile to mutter the cause of reconciliation to bzr.log
249
        revisions = set(self._rev_graph.keys())
250
        garbage = inventories.difference(revisions)
251
        self.garbage_inventories = len(garbage)
252
        for revision_id in garbage:
253
            mutter('Garbage inventory {%s} found.', revision_id)
1570.1.4 by Robert Collins
Somewhat optimised version of reconciler.
254
1570.1.14 by Robert Collins
Enforce repository consistency during 'fetch' operations.
255
    def _parent_is_available(self, parent):
256
        """True if parent is a fully available revision
257
258
        A fully available revision has a inventory and a revision object in the
259
        repository.
260
        """
261
        return (parent in self._rev_graph or 
262
                (parent in self.inventory and self.repo.has_revision(parent)))
263
1570.1.4 by Robert Collins
Somewhat optimised version of reconciler.
264
    def _reweave_step(self, message):
265
        """Mark a single step of regeneration complete."""
266
        self.pb.update(message, self.count, self.total)
267
        self.count += 1
1594.2.7 by Robert Collins
Add versionedfile.fix_parents api for correcting data post hoc.
268
269
270
class KnitReconciler(RepoReconciler):
271
    """Reconciler that reconciles a knit format repository.
272
273
    This will detect garbage inventories and remove them.
274
275
    Inconsistent parentage is checked for in the revision weave.
276
    """
277
278
    def _reconcile_steps(self):
279
        """Perform the steps to reconcile this repository."""
1692.1.1 by Robert Collins
* Repository.reconcile now takes a thorough keyword parameter to allow
280
        if self.thorough:
281
            self._load_indexes()
282
            # knits never suffer this
283
            self._gc_inventory()
1594.2.7 by Robert Collins
Add versionedfile.fix_parents api for correcting data post hoc.
284
285
    def _load_indexes(self):
286
        """Load indexes for the reconciliation."""
287
        self.transaction = self.repo.get_transaction()
288
        self.pb.update('Reading indexes.', 0, 2)
289
        self.inventory = self.repo.get_inventory_weave()
290
        self.pb.update('Reading indexes.', 1, 2)
291
        self.revisions = self.repo._revision_store.get_revision_file(self.transaction)
292
        self.pb.update('Reading indexes.', 2, 2)
293
294
    def _gc_inventory(self):
295
        """Remove inventories that are not referenced from the revision store."""
296
        self.pb.update('Checking unused inventories.', 0, 1)
297
        self._check_garbage_inventories()
298
        self.pb.update('Checking unused inventories.', 1, 3)
299
        if not self.garbage_inventories:
300
            self.pb.note('Inventory ok.')
301
            return
302
        self.pb.update('Backing up inventory...', 0, 0)
303
        self.repo.control_weaves.copy(self.inventory, 'inventory.backup', self.transaction)
304
        self.pb.note('Backup Inventory created.')
305
        # asking for '' should never return a non-empty weave
1616.1.1 by Martin Pool
[merge] robertc
306
        new_inventory_vf = self.repo.control_weaves.get_empty('inventory.new',
1594.2.7 by Robert Collins
Add versionedfile.fix_parents api for correcting data post hoc.
307
            self.transaction)
308
309
        # we have topological order of revisions and non ghost parents ready.
1594.2.9 by Robert Collins
Teach Knit repositories how to handle ghosts without corrupting at all.
310
        self._setup_steps(len(self.revisions))
311
        for rev_id in TopoSorter(self.revisions.get_graph().items()).iter_topo_order():
312
            parents = self.revisions.get_parents(rev_id)
1594.2.7 by Robert Collins
Add versionedfile.fix_parents api for correcting data post hoc.
313
            # double check this really is in topological order.
1616.1.1 by Martin Pool
[merge] robertc
314
            unavailable = [p for p in parents if p not in new_inventory_vf]
1594.2.7 by Robert Collins
Add versionedfile.fix_parents api for correcting data post hoc.
315
            assert len(unavailable) == 0
316
            # this entry has all the non ghost parents in the inventory
317
            # file already.
318
            self._reweave_step('adding inventories')
319
            # ugly but needed, weaves are just way tooooo slow else.
1616.1.1 by Martin Pool
[merge] robertc
320
            new_inventory_vf.add_lines(rev_id, parents, self.inventory.get_lines(rev_id))
1594.2.7 by Robert Collins
Add versionedfile.fix_parents api for correcting data post hoc.
321
1616.1.1 by Martin Pool
[merge] robertc
322
        # if this worked, the set of new_inventory_vf.names should equal
1594.2.7 by Robert Collins
Add versionedfile.fix_parents api for correcting data post hoc.
323
        # self.pending
1616.1.1 by Martin Pool
[merge] robertc
324
        assert set(new_inventory_vf.versions()) == set(self.revisions.versions())
1594.2.7 by Robert Collins
Add versionedfile.fix_parents api for correcting data post hoc.
325
        self.pb.update('Writing weave')
1616.1.1 by Martin Pool
[merge] robertc
326
        self.repo.control_weaves.copy(new_inventory_vf, 'inventory', self.transaction)
1594.2.7 by Robert Collins
Add versionedfile.fix_parents api for correcting data post hoc.
327
        self.repo.control_weaves.delete('inventory.new', self.transaction)
328
        self.inventory = None
329
        self.pb.note('Inventory regenerated.')
330
331
    def _check_garbage_inventories(self):
332
        """Check for garbage inventories which we cannot trust
333
334
        We cant trust them because their pre-requisite file data may not
335
        be present - all we know is that their revision was not installed.
336
        """
337
        inventories = set(self.inventory.versions())
338
        revisions = set(self.revisions.versions())
339
        garbage = inventories.difference(revisions)
340
        self.garbage_inventories = len(garbage)
341
        for revision_id in garbage:
342
            mutter('Garbage inventory {%s} found.', revision_id)