1
# Copyright (C) 2005, 2006 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Reconcilers are able to fix some potential data errors in a branch."""
35
from bzrlib.trace import mutter, note
36
from bzrlib.tsort import TopoSorter
39
def reconcile(dir, other=None):
40
"""Reconcile the data in dir.
42
Currently this is limited to a inventory 'reweave'.
44
This is a convenience method, for using a Reconciler object.
46
Directly using Reconciler is recommended for library users that
47
desire fine grained control or analysis of the found issues.
49
:param other: another bzrdir to reconcile against.
51
reconciler = Reconciler(dir, other=other)
52
reconciler.reconcile()
55
class Reconciler(object):
56
"""Reconcilers are used to reconcile existing data."""
58
def __init__(self, dir, other=None):
59
"""Create a Reconciler."""
63
"""Perform reconciliation.
65
After reconciliation the following attributes document found issues:
66
inconsistent_parents: The number of revisions in the repository whose
67
ancestry was being reported incorrectly.
68
garbage_inventories: The number of inventory objects without revisions
69
that were garbage collected.
70
fixed_branch_history: None if there was no branch, False if the branch
71
history was correct, True if the branch history
72
needed to be re-normalized.
74
self.pb = ui.ui_factory.nested_progress_bar()
81
"""Helper function for performing reconciliation."""
82
self._reconcile_branch()
83
self._reconcile_repository()
85
def _reconcile_branch(self):
87
self.branch = self.bzrdir.open_branch()
88
except errors.NotBranchError:
89
# Nothing to check here
90
self.fixed_branch_history = None
92
self.pb.note('Reconciling branch %s',
94
branch_reconciler = self.branch.reconcile(thorough=True)
95
self.fixed_branch_history = branch_reconciler.fixed_history
97
def _reconcile_repository(self):
98
self.repo = self.bzrdir.find_repository()
99
self.pb.note('Reconciling repository %s',
100
self.repo.bzrdir.root_transport.base)
101
self.pb.update("Reconciling repository", 0, 1)
102
repo_reconciler = self.repo.reconcile(thorough=True)
103
self.inconsistent_parents = repo_reconciler.inconsistent_parents
104
self.garbage_inventories = repo_reconciler.garbage_inventories
105
if repo_reconciler.aborted:
107
'Reconcile aborted: revision index has inconsistent parents.')
109
'Run "bzr check" for more details.')
111
self.pb.note('Reconciliation complete.')
114
class BranchReconciler(object):
115
"""Reconciler that works on a branch."""
117
def __init__(self, a_branch, thorough=False):
118
self.fixed_history = None
119
self.thorough = thorough
120
self.branch = a_branch
123
self.branch.lock_write()
125
self.pb = ui.ui_factory.nested_progress_bar()
127
self._reconcile_steps()
133
def _reconcile_steps(self):
134
self._reconcile_revision_history()
136
def _reconcile_revision_history(self):
137
repo = self.branch.repository
138
last_revno, last_revision_id = self.branch.last_revision_info()
139
real_history = list(repo.iter_reverse_revision_history(
141
real_history.reverse()
142
if last_revno != len(real_history):
143
self.fixed_history = True
144
# Technically for Branch5 formats, it is more efficient to use
145
# set_revision_history, as this will regenerate it again.
146
# Not really worth a whole BranchReconciler class just for this,
148
self.pb.note('Fixing last revision info %s => %s',
149
last_revno, len(real_history))
150
self.branch.set_last_revision_info(len(real_history),
153
self.fixed_history = False
154
self.pb.note('revision_history ok.')
157
class RepoReconciler(object):
158
"""Reconciler that reconciles a repository.
160
The goal of repository reconciliation is to make any derived data
161
consistent with the core data committed by a user. This can involve
162
reindexing, or removing unreferenced data if that can interfere with
163
queries in a given repository.
165
Currently this consists of an inventory reweave with revision cross-checks.
168
def __init__(self, repo, other=None, thorough=False):
169
"""Construct a RepoReconciler.
171
:param thorough: perform a thorough check which may take longer but
172
will correct non-data loss issues such as incorrect
175
self.garbage_inventories = 0
176
self.inconsistent_parents = 0
179
self.thorough = thorough
182
"""Perform reconciliation.
184
After reconciliation the following attributes document found issues:
185
inconsistent_parents: The number of revisions in the repository whose
186
ancestry was being reported incorrectly.
187
garbage_inventories: The number of inventory objects without revisions
188
that were garbage collected.
190
self.repo.lock_write()
192
self.pb = ui.ui_factory.nested_progress_bar()
194
self._reconcile_steps()
200
def _reconcile_steps(self):
201
"""Perform the steps to reconcile this repository."""
202
self._reweave_inventory()
204
def _reweave_inventory(self):
205
"""Regenerate the inventory weave for the repository from scratch.
207
This is a smart function: it will only do the reweave if doing it
208
will correct data issues. The self.thorough flag controls whether
209
only data-loss causing issues (!self.thorough) or all issues
210
(self.thorough) are treated as requiring the reweave.
212
# local because needing to know about WeaveFile is a wart we want to hide
213
from bzrlib.weave import WeaveFile, Weave
214
transaction = self.repo.get_transaction()
215
self.pb.update('Reading inventory data.')
216
self.inventory = self.repo.get_inventory_weave()
217
# the total set of revisions to process
218
self.pending = set([rev_id for rev_id in self.repo._revision_store.all_revision_ids(transaction)])
220
# mapping from revision_id to parents
222
# errors that we detect
223
self.inconsistent_parents = 0
224
# we need the revision id of each revision and its available parents list
225
self._setup_steps(len(self.pending))
226
for rev_id in self.pending:
227
# put a revision into the graph.
228
self._graph_revision(rev_id)
229
self._check_garbage_inventories()
230
# if there are no inconsistent_parents and
231
# (no garbage inventories or we are not doing a thorough check)
232
if (not self.inconsistent_parents and
233
(not self.garbage_inventories or not self.thorough)):
234
self.pb.note('Inventory ok.')
236
self.pb.update('Backing up inventory...', 0, 0)
237
self.repo.control_weaves.copy(self.inventory, 'inventory.backup', self.repo.get_transaction())
238
self.pb.note('Backup Inventory created.')
239
# asking for '' should never return a non-empty weave
240
new_inventory_vf = self.repo.control_weaves.get_empty('inventory.new',
241
self.repo.get_transaction())
243
# we have topological order of revisions and non ghost parents ready.
244
self._setup_steps(len(self._rev_graph))
245
for rev_id in TopoSorter(self._rev_graph.items()).iter_topo_order():
246
parents = self._rev_graph[rev_id]
247
# double check this really is in topological order.
248
unavailable = [p for p in parents if p not in new_inventory_vf]
250
raise AssertionError('unavailable parents: %r'
252
# this entry has all the non ghost parents in the inventory
254
self._reweave_step('adding inventories')
255
if isinstance(new_inventory_vf, WeaveFile):
256
# It's really a WeaveFile, but we call straight into the
257
# Weave's add method to disable the auto-write-out behaviour.
258
# This is done to avoid a revision_count * time-to-write additional overhead on
260
new_inventory_vf._check_write_ok()
261
Weave._add_lines(new_inventory_vf, rev_id, parents,
262
self.inventory.get_lines(rev_id), None, None, None, False, True)
264
new_inventory_vf.add_lines(rev_id, parents, self.inventory.get_lines(rev_id))
266
if isinstance(new_inventory_vf, WeaveFile):
267
new_inventory_vf._save()
268
# if this worked, the set of new_inventory_vf.names should equal
270
if not (set(new_inventory_vf.versions()) == self.pending):
271
raise AssertionError()
272
self.pb.update('Writing weave')
273
self.repo.control_weaves.copy(new_inventory_vf, 'inventory', self.repo.get_transaction())
274
self.repo.control_weaves.delete('inventory.new', self.repo.get_transaction())
275
self.inventory = None
276
self.pb.note('Inventory regenerated.')
278
def _setup_steps(self, new_total):
279
"""Setup the markers we need to control the progress bar."""
280
self.total = new_total
283
def _graph_revision(self, rev_id):
284
"""Load a revision into the revision graph."""
285
# pick a random revision
286
# analyse revision id rev_id and put it in the stack.
287
self._reweave_step('loading revisions')
288
rev = self.repo.get_revision_reconcile(rev_id)
290
for parent in rev.parent_ids:
291
if self._parent_is_available(parent):
292
parents.append(parent)
294
mutter('found ghost %s', parent)
295
self._rev_graph[rev_id] = parents
296
if self._parents_are_inconsistent(rev_id, parents):
297
self.inconsistent_parents += 1
298
mutter('Inconsistent inventory parents: id {%s} '
299
'inventory claims %r, '
300
'available parents are %r, '
301
'unavailable parents are %r',
303
set(self.inventory.get_parent_map([rev_id])[rev_id]),
305
set(rev.parent_ids).difference(set(parents)))
307
def _parents_are_inconsistent(self, rev_id, parents):
308
"""Return True if the parents list of rev_id does not match the weave.
310
This detects inconsistencies based on the self.thorough value:
311
if thorough is on, the first parent value is checked as well as ghost
313
Otherwise only the ghost differences are evaluated.
315
weave_parents = self.inventory.get_parent_map([rev_id])[rev_id]
316
weave_missing_old_ghosts = set(weave_parents) != set(parents)
317
first_parent_is_wrong = (
318
len(weave_parents) and len(parents) and
319
parents[0] != weave_parents[0])
321
return weave_missing_old_ghosts or first_parent_is_wrong
323
return weave_missing_old_ghosts
325
def _check_garbage_inventories(self):
326
"""Check for garbage inventories which we cannot trust
328
We cant trust them because their pre-requisite file data may not
329
be present - all we know is that their revision was not installed.
331
if not self.thorough:
333
inventories = set(self.inventory.versions())
334
revisions = set(self._rev_graph.keys())
335
garbage = inventories.difference(revisions)
336
self.garbage_inventories = len(garbage)
337
for revision_id in garbage:
338
mutter('Garbage inventory {%s} found.', revision_id)
340
def _parent_is_available(self, parent):
341
"""True if parent is a fully available revision
343
A fully available revision has a inventory and a revision object in the
346
return (parent in self._rev_graph or
347
(parent in self.inventory and self.repo.has_revision(parent)))
349
def _reweave_step(self, message):
350
"""Mark a single step of regeneration complete."""
351
self.pb.update(message, self.count, self.total)
355
class KnitReconciler(RepoReconciler):
356
"""Reconciler that reconciles a knit format repository.
358
This will detect garbage inventories and remove them in thorough mode.
361
def _reconcile_steps(self):
362
"""Perform the steps to reconcile this repository."""
366
except errors.BzrCheckError:
369
# knits never suffer this
371
self._fix_text_parents()
373
def _load_indexes(self):
374
"""Load indexes for the reconciliation."""
375
self.transaction = self.repo.get_transaction()
376
self.pb.update('Reading indexes.', 0, 2)
377
self.inventory = self.repo.get_inventory_weave()
378
self.pb.update('Reading indexes.', 1, 2)
379
self.repo._check_for_inconsistent_revision_parents()
380
self.revisions = self.repo._revision_store.get_revision_file(self.transaction)
381
self.pb.update('Reading indexes.', 2, 2)
383
def _gc_inventory(self):
384
"""Remove inventories that are not referenced from the revision store."""
385
self.pb.update('Checking unused inventories.', 0, 1)
386
self._check_garbage_inventories()
387
self.pb.update('Checking unused inventories.', 1, 3)
388
if not self.garbage_inventories:
389
self.pb.note('Inventory ok.')
391
self.pb.update('Backing up inventory...', 0, 0)
392
self.repo.control_weaves.copy(self.inventory, 'inventory.backup', self.transaction)
393
self.pb.note('Backup Inventory created.')
394
# asking for '' should never return a non-empty weave
395
new_inventory_vf = self.repo.control_weaves.get_empty('inventory.new',
398
# we have topological order of revisions and non ghost parents ready.
399
self._setup_steps(len(self.revisions))
400
revision_ids = self.revisions.versions()
401
graph = self.revisions.get_parent_map(revision_ids)
402
for rev_id in TopoSorter(graph.items()).iter_topo_order():
403
parents = graph[rev_id]
404
# double check this really is in topological order, ignoring existing ghosts.
405
unavailable = [p for p in parents if p not in new_inventory_vf and
408
raise AssertionError(
409
'unavailable parents: %r' % (unavailable,))
410
# this entry has all the non ghost parents in the inventory
412
self._reweave_step('adding inventories')
413
# ugly but needed, weaves are just way tooooo slow else.
414
new_inventory_vf.add_lines_with_ghosts(rev_id, parents,
415
self.inventory.get_lines(rev_id))
417
# if this worked, the set of new_inventory_vf.names should equal
419
if not(set(new_inventory_vf.versions()) == set(self.revisions.versions())):
420
raise AssertionError()
421
self.pb.update('Writing weave')
422
self.repo.control_weaves.copy(new_inventory_vf, 'inventory', self.transaction)
423
self.repo.control_weaves.delete('inventory.new', self.transaction)
424
self.inventory = None
425
self.pb.note('Inventory regenerated.')
427
def _check_garbage_inventories(self):
428
"""Check for garbage inventories which we cannot trust
430
We cant trust them because their pre-requisite file data may not
431
be present - all we know is that their revision was not installed.
433
inventories = set(self.inventory.versions())
434
revisions = set(self.revisions.versions())
435
garbage = inventories.difference(revisions)
436
self.garbage_inventories = len(garbage)
437
for revision_id in garbage:
438
mutter('Garbage inventory {%s} found.', revision_id)
440
def _fix_text_parents(self):
441
"""Fix bad versionedfile parent entries.
443
It is possible for the parents entry in a versionedfile entry to be
444
inconsistent with the values in the revision and inventory.
446
This method finds entries with such inconsistencies, corrects their
447
parent lists, and replaces the versionedfile with a corrected version.
449
transaction = self.repo.get_transaction()
450
versions = self.revisions.versions()
451
mutter('Prepopulating revision text cache with %d revisions',
453
vf_checker = self.repo._get_versioned_file_checker()
454
# List all weaves before altering, to avoid race conditions when we
455
# delete unused weaves.
456
weaves = list(enumerate(self.repo.weave_store))
457
for num, file_id in weaves:
458
self.pb.update('Fixing text parents', num,
459
len(self.repo.weave_store))
460
vf = self.repo.weave_store.get_weave(file_id, transaction)
461
versions_with_bad_parents, unused_versions = \
462
vf_checker.check_file_version_parents(vf, file_id)
463
if (len(versions_with_bad_parents) == 0 and
464
len(unused_versions) == 0):
466
full_text_versions = set()
467
self._fix_text_parent(file_id, vf, versions_with_bad_parents,
468
full_text_versions, unused_versions)
470
def _fix_text_parent(self, file_id, vf, versions_with_bad_parents,
471
full_text_versions, unused_versions):
472
"""Fix bad versionedfile entries in a single versioned file."""
473
mutter('fixing text parent: %r (%d versions)', file_id,
474
len(versions_with_bad_parents))
475
mutter('(%d need to be full texts, %d are unused)',
476
len(full_text_versions), len(unused_versions))
477
new_vf = self.repo.weave_store.get_empty('temp:%s' % file_id,
480
for version in vf.versions():
481
if version in unused_versions:
483
elif version in versions_with_bad_parents:
484
parents = versions_with_bad_parents[version][1]
486
parents = vf.get_parent_map([version])[version]
487
new_parents[version] = parents
488
if not len(new_parents):
489
# No used versions, remove the VF.
490
self.repo.weave_store.delete(file_id, self.transaction)
492
for version in TopoSorter(new_parents.items()).iter_topo_order():
493
lines = vf.get_lines(version)
494
parents = new_parents[version]
495
if parents and (parents[0] in full_text_versions):
496
# Force this record to be a fulltext, not a delta.
497
new_vf._add(version, lines, parents, False,
498
None, None, None, False)
500
new_vf.add_lines(version, parents, lines)
501
self.repo.weave_store.copy(new_vf, file_id, self.transaction)
502
self.repo.weave_store.delete('temp:%s' % file_id, self.transaction)
505
class PackReconciler(RepoReconciler):
506
"""Reconciler that reconciles a pack based repository.
508
Garbage inventories do not affect ancestry queries, and removal is
509
considerably more expensive as there is no separate versioned file for
510
them, so they are not cleaned. In short it is currently a no-op.
512
In future this may be a good place to hook in annotation cache checking,
513
index recreation etc.
516
# XXX: The index corruption that _fix_text_parents performs is needed for
517
# packs, but not yet implemented. The basic approach is to:
518
# - lock the names list
519
# - perform a customised pack() that regenerates data as needed
520
# - unlock the names list
521
# https://bugs.edge.launchpad.net/bzr/+bug/154173
523
def _reconcile_steps(self):
524
"""Perform the steps to reconcile this repository."""
525
if not self.thorough:
527
collection = self.repo._pack_collection
528
collection.ensure_loaded()
529
collection.lock_names()
531
packs = collection.all_packs()
532
all_revisions = self.repo.all_revision_ids()
533
total_inventories = len(list(
534
collection.inventory_index.combined_index.iter_all_entries()))
535
if len(all_revisions):
536
self._packer = repofmt.pack_repo.ReconcilePacker(
537
collection, packs, ".reconcile", all_revisions)
538
new_pack = self._packer.pack(pb=self.pb)
539
if new_pack is not None:
540
self._discard_and_save(packs)
542
# only make a new pack when there is data to copy.
543
self._discard_and_save(packs)
544
self.garbage_inventories = total_inventories - len(list(
545
collection.inventory_index.combined_index.iter_all_entries()))
547
collection._unlock_names()
549
def _discard_and_save(self, packs):
550
"""Discard some packs from the repository.
552
This removes them from the memory index, saves the in-memory index
553
which makes the newly reconciled pack visible and hides the packs to be
554
discarded, and finally renames the packs being discarded into the
555
obsolete packs directory.
557
:param packs: The packs to discard.
560
self.repo._pack_collection._remove_pack_from_memory(pack)
561
self.repo._pack_collection._save_pack_names()
562
self.repo._pack_collection._obsolete_packs(packs)