1
# Copyright (C) 2005, 2006 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Reconcilers are able to fix some potential data errors in a branch."""
30
from bzrlib.trace import mutter
31
from bzrlib.tsort import TopoSorter
34
def reconcile(dir, other=None):
35
"""Reconcile the data in dir.
37
Currently this is limited to a inventory 'reweave'.
39
This is a convenience method, for using a Reconciler object.
41
Directly using Reconciler is recommended for library users that
42
desire fine grained control or analysis of the found issues.
44
:param other: another bzrdir to reconcile against.
46
reconciler = Reconciler(dir, other=other)
47
reconciler.reconcile()
50
class Reconciler(object):
51
"""Reconcilers are used to reconcile existing data."""
53
def __init__(self, dir, other=None):
54
"""Create a Reconciler."""
58
"""Perform reconciliation.
60
After reconciliation the following attributes document found issues:
61
inconsistent_parents: The number of revisions in the repository whose
62
ancestry was being reported incorrectly.
63
garbage_inventories: The number of inventory objects without revisions
64
that were garbage collected.
66
self.pb = ui.ui_factory.nested_progress_bar()
73
"""Helper function for performing reconciliation."""
74
self.repo = self.bzrdir.find_repository()
75
self.pb.note('Reconciling repository %s',
76
self.repo.bzrdir.root_transport.base)
77
repo_reconciler = self.repo.reconcile(thorough=True)
78
self.inconsistent_parents = repo_reconciler.inconsistent_parents
79
self.garbage_inventories = repo_reconciler.garbage_inventories
80
self.pb.note('Reconciliation complete.')
83
class RepoReconciler(object):
84
"""Reconciler that reconciles a repository.
86
The goal of repository reconciliation is to make any derived daata
87
consistent with the core data committed by a user. This can involve
88
reindexing, or removing unreferenced data if that can interfere with
89
queries in a given repository.
91
Currently this consists of an inventory reweave with revision cross-checks.
94
def __init__(self, repo, other=None, thorough=False):
95
"""Construct a RepoReconciler.
97
:param thorough: perform a thorough check which may take longer but
98
will correct non-data loss issues such as incorrect
101
self.garbage_inventories = 0
102
self.inconsistent_parents = 0
104
self.thorough = thorough
107
"""Perform reconciliation.
109
After reconciliation the following attributes document found issues:
110
inconsistent_parents: The number of revisions in the repository whose
111
ancestry was being reported incorrectly.
112
garbage_inventories: The number of inventory objects without revisions
113
that were garbage collected.
115
self.repo.lock_write()
117
self.pb = ui.ui_factory.nested_progress_bar()
119
self._reconcile_steps()
125
def _reconcile_steps(self):
126
"""Perform the steps to reconcile this repository."""
127
self._reweave_inventory()
129
def _reweave_inventory(self):
130
"""Regenerate the inventory weave for the repository from scratch.
132
This is a smart function: it will only do the reweave if doing it
133
will correct data issues. The self.thorough flag controls whether
134
only data-loss causing issues (!self.thorough) or all issues
135
(self.thorough) are treated as requiring the reweave.
137
# local because needing to know about WeaveFile is a wart we want to hide
138
from bzrlib.weave import WeaveFile, Weave
139
transaction = self.repo.get_transaction()
140
self.pb.update('Reading inventory data.')
141
self.inventory = self.repo.get_inventory_weave()
142
# the total set of revisions to process
143
self.pending = set([rev_id for rev_id in self.repo._revision_store.all_revision_ids(transaction)])
145
# mapping from revision_id to parents
147
# errors that we detect
148
self.inconsistent_parents = 0
149
# we need the revision id of each revision and its available parents list
150
self._setup_steps(len(self.pending))
151
for rev_id in self.pending:
152
# put a revision into the graph.
153
self._graph_revision(rev_id)
154
self._check_garbage_inventories()
155
# if there are no inconsistent_parents and
156
# (no garbage inventories or we are not doing a thorough check)
157
if (not self.inconsistent_parents and
158
(not self.garbage_inventories or not self.thorough)):
159
self.pb.note('Inventory ok.')
161
self.pb.update('Backing up inventory...', 0, 0)
162
self.repo.control_weaves.copy(self.inventory, 'inventory.backup', self.repo.get_transaction())
163
self.pb.note('Backup Inventory created.')
164
# asking for '' should never return a non-empty weave
165
new_inventory_vf = self.repo.control_weaves.get_empty('inventory.new',
166
self.repo.get_transaction())
168
# we have topological order of revisions and non ghost parents ready.
169
self._setup_steps(len(self._rev_graph))
170
for rev_id in TopoSorter(self._rev_graph.items()).iter_topo_order():
171
parents = self._rev_graph[rev_id]
172
# double check this really is in topological order.
173
unavailable = [p for p in parents if p not in new_inventory_vf]
174
assert len(unavailable) == 0
175
# this entry has all the non ghost parents in the inventory
177
self._reweave_step('adding inventories')
178
if isinstance(new_inventory_vf, WeaveFile):
179
# It's really a WeaveFile, but we call straight into the
180
# Weave's add method to disable the auto-write-out behaviour.
181
# This is done to avoid a revision_count * time-to-write additional overhead on
183
new_inventory_vf._check_write_ok()
184
Weave._add_lines(new_inventory_vf, rev_id, parents,
185
self.inventory.get_lines(rev_id), None, None, None, False, True)
187
new_inventory_vf.add_lines(rev_id, parents, self.inventory.get_lines(rev_id))
189
if isinstance(new_inventory_vf, WeaveFile):
190
new_inventory_vf._save()
191
# if this worked, the set of new_inventory_vf.names should equal
193
assert set(new_inventory_vf.versions()) == self.pending
194
self.pb.update('Writing weave')
195
self.repo.control_weaves.copy(new_inventory_vf, 'inventory', self.repo.get_transaction())
196
self.repo.control_weaves.delete('inventory.new', self.repo.get_transaction())
197
self.inventory = None
198
self.pb.note('Inventory regenerated.')
200
def _setup_steps(self, new_total):
201
"""Setup the markers we need to control the progress bar."""
202
self.total = new_total
205
def _graph_revision(self, rev_id):
206
"""Load a revision into the revision graph."""
207
# pick a random revision
208
# analyse revision id rev_id and put it in the stack.
209
self._reweave_step('loading revisions')
210
rev = self.repo.get_revision_reconcile(rev_id)
211
assert rev.revision_id == rev_id
213
for parent in rev.parent_ids:
214
if self._parent_is_available(parent):
215
parents.append(parent)
217
mutter('found ghost %s', parent)
218
self._rev_graph[rev_id] = parents
219
if self._parents_are_inconsistent(rev_id, parents):
220
self.inconsistent_parents += 1
221
mutter('Inconsistent inventory parents: id {%s} '
222
'inventory claims %r, '
223
'available parents are %r, '
224
'unavailable parents are %r',
226
set(self.inventory.get_parents(rev_id)),
228
set(rev.parent_ids).difference(set(parents)))
230
def _parents_are_inconsistent(self, rev_id, parents):
231
"""Return True if the parents list of rev_id does not match the weave.
233
This detects inconsistencies based on the self.thorough value:
234
if thorough is on, the first parent value is checked as well as ghost
236
Otherwise only the ghost differences are evaluated.
238
weave_parents = self.inventory.get_parents(rev_id)
239
weave_missing_old_ghosts = set(weave_parents) != set(parents)
240
first_parent_is_wrong = (
241
len(weave_parents) and len(parents) and
242
parents[0] != weave_parents[0])
244
return weave_missing_old_ghosts or first_parent_is_wrong
246
return weave_missing_old_ghosts
248
def _check_garbage_inventories(self):
249
"""Check for garbage inventories which we cannot trust
251
We cant trust them because their pre-requisite file data may not
252
be present - all we know is that their revision was not installed.
254
if not self.thorough:
256
inventories = set(self.inventory.versions())
257
revisions = set(self._rev_graph.keys())
258
garbage = inventories.difference(revisions)
259
self.garbage_inventories = len(garbage)
260
for revision_id in garbage:
261
mutter('Garbage inventory {%s} found.', revision_id)
263
def _parent_is_available(self, parent):
264
"""True if parent is a fully available revision
266
A fully available revision has a inventory and a revision object in the
269
return (parent in self._rev_graph or
270
(parent in self.inventory and self.repo.has_revision(parent)))
272
def _reweave_step(self, message):
273
"""Mark a single step of regeneration complete."""
274
self.pb.update(message, self.count, self.total)
278
class KnitReconciler(RepoReconciler):
279
"""Reconciler that reconciles a knit format repository.
281
This will detect garbage inventories and remove them in thorough mode.
284
def _reconcile_steps(self):
285
"""Perform the steps to reconcile this repository."""
288
# knits never suffer this
291
def _load_indexes(self):
292
"""Load indexes for the reconciliation."""
293
self.transaction = self.repo.get_transaction()
294
self.pb.update('Reading indexes.', 0, 2)
295
self.inventory = self.repo.get_inventory_weave()
296
self.pb.update('Reading indexes.', 1, 2)
297
self.revisions = self.repo._revision_store.get_revision_file(self.transaction)
298
self.pb.update('Reading indexes.', 2, 2)
300
def _gc_inventory(self):
301
"""Remove inventories that are not referenced from the revision store."""
302
self.pb.update('Checking unused inventories.', 0, 1)
303
self._check_garbage_inventories()
304
self.pb.update('Checking unused inventories.', 1, 3)
305
if not self.garbage_inventories:
306
self.pb.note('Inventory ok.')
308
self.pb.update('Backing up inventory...', 0, 0)
309
self.repo.control_weaves.copy(self.inventory, 'inventory.backup', self.transaction)
310
self.pb.note('Backup Inventory created.')
311
# asking for '' should never return a non-empty weave
312
new_inventory_vf = self.repo.control_weaves.get_empty('inventory.new',
315
# we have topological order of revisions and non ghost parents ready.
316
self._setup_steps(len(self.revisions))
317
for rev_id in TopoSorter(self.revisions.get_graph().items()).iter_topo_order():
318
parents = self.revisions.get_parents(rev_id)
319
# double check this really is in topological order.
320
unavailable = [p for p in parents if p not in new_inventory_vf]
321
assert len(unavailable) == 0
322
# this entry has all the non ghost parents in the inventory
324
self._reweave_step('adding inventories')
325
# ugly but needed, weaves are just way tooooo slow else.
326
new_inventory_vf.add_lines(rev_id, parents, self.inventory.get_lines(rev_id))
328
# if this worked, the set of new_inventory_vf.names should equal
330
assert set(new_inventory_vf.versions()) == set(self.revisions.versions())
331
self.pb.update('Writing weave')
332
self.repo.control_weaves.copy(new_inventory_vf, 'inventory', self.transaction)
333
self.repo.control_weaves.delete('inventory.new', self.transaction)
334
self.inventory = None
335
self.pb.note('Inventory regenerated.')
337
def _check_garbage_inventories(self):
338
"""Check for garbage inventories which we cannot trust
340
We cant trust them because their pre-requisite file data may not
341
be present - all we know is that their revision was not installed.
343
inventories = set(self.inventory.versions())
344
revisions = set(self.revisions.versions())
345
garbage = inventories.difference(revisions)
346
self.garbage_inventories = len(garbage)
347
for revision_id in garbage:
348
mutter('Garbage inventory {%s} found.', revision_id)
351
class PackReconciler(RepoReconciler):
352
"""Reconciler that reconciles a pack based repository.
354
Garbage inventories do not affect ancestry queries, and removal is
355
considerably more expensive as there is no separate versioned file for
356
them, so they are not cleaned. In short it is currently a no-op.
358
In future this may be a good place to hook in annotation cache checking,
359
index recreation etc.
362
def _reconcile_steps(self):
363
"""Perform the steps to reconcile this repository."""