17
17
"""Reconcilers are able to fix some potential data errors in a branch."""
20
__all__ = ['reconcile', 'Reconciler', 'RepoReconciler', 'KnitReconciler']
20
__all__ = ['reconcile', 'Reconciler', 'RepoReconciler']
24
import bzrlib.errors as errors
25
import bzrlib.progress
24
26
from bzrlib.trace import mutter
25
27
from bzrlib.tsort import TopoSorter
28
def reconcile(dir, other=None):
28
import bzrlib.ui as ui
29
32
"""Reconcile the data in dir.
31
34
Currently this is limited to a inventory 'reweave'.
35
38
Directly using Reconciler is recommended for library users that
36
39
desire fine grained control or analysis of the found issues.
38
:param other: another bzrdir to reconcile against.
40
reconciler = Reconciler(dir, other=other)
41
reconciler = Reconciler(dir)
41
42
reconciler.reconcile()
44
45
class Reconciler(object):
45
46
"""Reconcilers are used to reconcile existing data."""
47
def __init__(self, dir, other=None):
48
"""Create a Reconciler."""
48
def __init__(self, dir):
51
51
def reconcile(self):
68
68
self.repo = self.bzrdir.find_repository()
69
69
self.pb.note('Reconciling repository %s',
70
70
self.repo.bzrdir.root_transport.base)
71
repo_reconciler = self.repo.reconcile(thorough=True)
71
repo_reconciler = RepoReconciler(self.repo)
72
repo_reconciler.reconcile()
72
73
self.inconsistent_parents = repo_reconciler.inconsistent_parents
73
74
self.garbage_inventories = repo_reconciler.garbage_inventories
74
75
self.pb.note('Reconciliation complete.')
80
81
Currently this consists of an inventory reweave with revision cross-checks.
83
def __init__(self, repo, other=None, thorough=False):
84
"""Construct a RepoReconciler.
86
:param thorough: perform a thorough check which may take longer but
87
will correct non-data loss issues such as incorrect
90
self.garbage_inventories = 0
91
self.inconsistent_parents = 0
84
def __init__(self, repo):
93
self.thorough = thorough
95
87
def reconcile(self):
96
88
"""Perform reconciliation.
116
108
self._reweave_inventory()
118
110
def _reweave_inventory(self):
119
"""Regenerate the inventory weave for the repository from scratch.
121
This is a smart function: it will only do the reweave if doing it
122
will correct data issues. The self.thorough flag controls whether
123
only data-loss causing issues (!self.thorough) or all issues
124
(self.thorough) are treated as requiring the reweave.
126
# local because needing to know about WeaveFile is a wart we want to hide
111
"""Regenerate the inventory weave for the repository from scratch."""
112
# local because its really a wart we want to hide
127
113
from bzrlib.weave import WeaveFile, Weave
128
114
transaction = self.repo.get_transaction()
129
115
self.pb.update('Reading inventory data.')
141
127
# put a revision into the graph.
142
128
self._graph_revision(rev_id)
143
129
self._check_garbage_inventories()
144
# if there are no inconsistent_parents and
145
# (no garbage inventories or we are not doing a thorough check)
146
if (not self.inconsistent_parents and
147
(not self.garbage_inventories or not self.thorough)):
130
if not self.inconsistent_parents and not self.garbage_inventories:
148
131
self.pb.note('Inventory ok.')
150
133
self.pb.update('Backing up inventory...', 0, 0)
167
150
if isinstance(new_inventory_vf, WeaveFile):
168
151
# It's really a WeaveFile, but we call straight into the
169
152
# Weave's add method to disable the auto-write-out behaviour.
170
# This is done to avoid a revision_count * time-to-write additional overhead on
172
153
new_inventory_vf._check_write_ok()
173
154
Weave._add_lines(new_inventory_vf, rev_id, parents, self.inventory.get_lines(rev_id),
206
187
mutter('found ghost %s', parent)
207
188
self._rev_graph[rev_id] = parents
208
if self._parents_are_inconsistent(rev_id, parents):
189
if set(self.inventory.get_parents(rev_id)) != set(parents):
209
190
self.inconsistent_parents += 1
210
191
mutter('Inconsistent inventory parents: id {%s} '
211
192
'inventory claims %r, '
217
198
set(rev.parent_ids).difference(set(parents)))
219
def _parents_are_inconsistent(self, rev_id, parents):
220
"""Return True if the parents list of rev_id does not match the weave.
222
This detects inconsistencies based on the self.thorough value:
223
if thorough is on, the first parent value is checked as well as ghost
225
Otherwise only the ghost differences are evaluated.
227
weave_parents = self.inventory.get_parents(rev_id)
228
weave_missing_old_ghosts = set(weave_parents) != set(parents)
229
first_parent_is_wrong = (
230
len(weave_parents) and len(parents) and
231
parents[0] != weave_parents[0])
233
return weave_missing_old_ghosts or first_parent_is_wrong
235
return weave_missing_old_ghosts
237
200
def _check_garbage_inventories(self):
238
201
"""Check for garbage inventories which we cannot trust
240
203
We cant trust them because their pre-requisite file data may not
241
204
be present - all we know is that their revision was not installed.
243
if not self.thorough:
245
206
inventories = set(self.inventory.versions())
246
207
revisions = set(self._rev_graph.keys())
247
208
garbage = inventories.difference(revisions)
275
236
def _reconcile_steps(self):
276
237
"""Perform the steps to reconcile this repository."""
279
# knits never suffer this
239
# knits never suffer this
240
self.inconsistent_parents = 0
282
243
def _load_indexes(self):
283
244
"""Load indexes for the reconciliation."""
325
286
self.inventory = None
326
287
self.pb.note('Inventory regenerated.')
289
def _reinsert_revisions(self):
290
"""Correct the revision history for revisions in the revision knit."""
291
# the total set of revisions to process
292
self.pending = set(self.revisions.versions())
294
# mapping from revision_id to parents
296
# errors that we detect
297
self.inconsistent_parents = 0
298
# we need the revision id of each revision and its available parents list
299
self._setup_steps(len(self.pending))
300
for rev_id in self.pending:
301
# put a revision into the graph.
302
self._graph_revision(rev_id)
304
if not self.inconsistent_parents:
305
self.pb.note('Revision history accurate.')
307
self._setup_steps(len(self._rev_graph))
308
for rev_id, parents in self._rev_graph.items():
309
if parents != self.revisions.get_parents(rev_id):
310
self.revisions.fix_parents(rev_id, parents)
311
self._reweave_step('Fixing parents')
312
self.pb.note('Ancestry corrected.')
314
def _graph_revision(self, rev_id):
315
"""Load a revision into the revision graph."""
316
# pick a random revision
317
# analyse revision id rev_id and put it in the stack.
318
self._reweave_step('loading revisions')
319
rev = self.repo._revision_store.get_revision(rev_id, self.transaction)
320
assert rev.revision_id == rev_id
322
for parent in rev.parent_ids:
323
if self.revisions.has_version(parent):
324
parents.append(parent)
326
mutter('found ghost %s', parent)
327
self._rev_graph[rev_id] = parents
328
if set(self.inventory.get_parents(rev_id)) != set(parents):
329
self.inconsistent_parents += 1
330
mutter('Inconsistent inventory parents: id {%s} '
331
'inventory claims %r, '
332
'available parents are %r, '
333
'unavailable parents are %r',
335
set(self.inventory.get_parents(rev_id)),
337
set(rev.parent_ids).difference(set(parents)))
328
339
def _check_garbage_inventories(self):
329
340
"""Check for garbage inventories which we cannot trust