66
80
def _reconcile(self):
67
81
"""Helper function for performing reconciliation."""
82
self._reconcile_branch()
83
self._reconcile_repository()
85
def _reconcile_branch(self):
87
self.branch = self.bzrdir.open_branch()
88
except errors.NotBranchError:
89
# Nothing to check here
90
self.fixed_branch_history = None
92
self.pb.note('Reconciling branch %s',
94
branch_reconciler = self.branch.reconcile(thorough=True)
95
self.fixed_branch_history = branch_reconciler.fixed_history
97
def _reconcile_repository(self):
68
98
self.repo = self.bzrdir.find_repository()
69
99
self.pb.note('Reconciling repository %s',
70
100
self.repo.bzrdir.root_transport.base)
71
repo_reconciler = RepoReconciler(self.repo)
72
repo_reconciler.reconcile()
101
self.pb.update("Reconciling repository", 0, 1)
102
repo_reconciler = self.repo.reconcile(thorough=True)
73
103
self.inconsistent_parents = repo_reconciler.inconsistent_parents
74
104
self.garbage_inventories = repo_reconciler.garbage_inventories
75
self.pb.note('Reconciliation complete.')
105
if repo_reconciler.aborted:
107
'Reconcile aborted: revision index has inconsistent parents.')
109
'Run "bzr check" for more details.')
111
self.pb.note('Reconciliation complete.')
114
class BranchReconciler(object):
115
"""Reconciler that works on a branch."""
117
def __init__(self, a_branch, thorough=False):
118
self.fixed_history = None
119
self.thorough = thorough
120
self.branch = a_branch
123
self.branch.lock_write()
125
self.pb = ui.ui_factory.nested_progress_bar()
127
self._reconcile_steps()
133
def _reconcile_steps(self):
134
self._reconcile_revision_history()
136
def _reconcile_revision_history(self):
137
repo = self.branch.repository
138
last_revno, last_revision_id = self.branch.last_revision_info()
139
real_history = list(repo.iter_reverse_revision_history(
141
real_history.reverse()
142
if last_revno != len(real_history):
143
self.fixed_history = True
144
# Technically for Branch5 formats, it is more efficient to use
145
# set_revision_history, as this will regenerate it again.
146
# Not really worth a whole BranchReconciler class just for this,
148
self.pb.note('Fixing last revision info %s => %s',
149
last_revno, len(real_history))
150
self.branch.set_last_revision_info(len(real_history),
153
self.fixed_history = False
154
self.pb.note('revision_history ok.')
78
157
class RepoReconciler(object):
79
158
"""Reconciler that reconciles a repository.
160
The goal of repository reconciliation is to make any derived data
161
consistent with the core data committed by a user. This can involve
162
reindexing, or removing unreferenced data if that can interfere with
163
queries in a given repository.
81
165
Currently this consists of an inventory reweave with revision cross-checks.
84
def __init__(self, repo):
168
def __init__(self, repo, other=None, thorough=False):
169
"""Construct a RepoReconciler.
171
:param thorough: perform a thorough check which may take longer but
172
will correct non-data loss issues such as incorrect
175
self.garbage_inventories = 0
176
self.inconsistent_parents = 0
179
self.thorough = thorough
87
181
def reconcile(self):
88
182
"""Perform reconciliation.
180
286
# analyse revision id rev_id and put it in the stack.
181
287
self._reweave_step('loading revisions')
182
288
rev = self.repo.get_revision_reconcile(rev_id)
183
assert rev.revision_id == rev_id
185
290
for parent in rev.parent_ids:
186
291
if self._parent_is_available(parent):
187
292
parents.append(parent)
189
294
mutter('found ghost %s', parent)
190
self._rev_graph[rev_id] = parents
191
if set(self.inventory.get_parents(rev_id)) != set(parents):
295
self._rev_graph[rev_id] = parents
296
if self._parents_are_inconsistent(rev_id, parents):
192
297
self.inconsistent_parents += 1
193
298
mutter('Inconsistent inventory parents: id {%s} '
194
299
'inventory claims %r, '
195
300
'available parents are %r, '
196
301
'unavailable parents are %r',
198
set(self.inventory.get_parents(rev_id)),
303
set(self.inventory.get_parent_map([rev_id])[rev_id]),
200
305
set(rev.parent_ids).difference(set(parents)))
307
def _parents_are_inconsistent(self, rev_id, parents):
308
"""Return True if the parents list of rev_id does not match the weave.
310
This detects inconsistencies based on the self.thorough value:
311
if thorough is on, the first parent value is checked as well as ghost
313
Otherwise only the ghost differences are evaluated.
315
weave_parents = self.inventory.get_parent_map([rev_id])[rev_id]
316
weave_missing_old_ghosts = set(weave_parents) != set(parents)
317
first_parent_is_wrong = (
318
len(weave_parents) and len(parents) and
319
parents[0] != weave_parents[0])
321
return weave_missing_old_ghosts or first_parent_is_wrong
323
return weave_missing_old_ghosts
202
325
def _check_garbage_inventories(self):
203
326
"""Check for garbage inventories which we cannot trust
205
328
We cant trust them because their pre-requisite file data may not
206
329
be present - all we know is that their revision was not installed.
331
if not self.thorough:
208
333
inventories = set(self.inventory.versions())
209
334
revisions = set(self._rev_graph.keys())
210
335
garbage = inventories.difference(revisions)
269
398
# we have topological order of revisions and non ghost parents ready.
270
399
self._setup_steps(len(self.revisions))
271
for rev_id in TopoSorter(self.revisions.get_graph().items()).iter_topo_order():
272
parents = self.revisions.get_parents(rev_id)
273
# double check this really is in topological order.
274
unavailable = [p for p in parents if p not in new_inventory_vf]
275
assert len(unavailable) == 0
400
revision_ids = self.revisions.versions()
401
graph = self.revisions.get_parent_map(revision_ids)
402
for rev_id in TopoSorter(graph.items()).iter_topo_order():
403
parents = graph[rev_id]
404
# double check this really is in topological order, ignoring existing ghosts.
405
unavailable = [p for p in parents if p not in new_inventory_vf and
408
raise AssertionError(
409
'unavailable parents: %r' % (unavailable,))
276
410
# this entry has all the non ghost parents in the inventory
278
412
self._reweave_step('adding inventories')
279
413
# ugly but needed, weaves are just way tooooo slow else.
280
new_inventory_vf.add_lines(rev_id, parents, self.inventory.get_lines(rev_id))
414
new_inventory_vf.add_lines_with_ghosts(rev_id, parents,
415
self.inventory.get_lines(rev_id))
282
417
# if this worked, the set of new_inventory_vf.names should equal
284
assert set(new_inventory_vf.versions()) == set(self.revisions.versions())
419
if not(set(new_inventory_vf.versions()) == set(self.revisions.versions())):
420
raise AssertionError()
285
421
self.pb.update('Writing weave')
286
422
self.repo.control_weaves.copy(new_inventory_vf, 'inventory', self.transaction)
287
423
self.repo.control_weaves.delete('inventory.new', self.transaction)
288
424
self.inventory = None
289
425
self.pb.note('Inventory regenerated.')
291
def _reinsert_revisions(self):
292
"""Correct the revision history for revisions in the revision knit."""
293
# the total set of revisions to process
294
self.pending = set(self.revisions.versions())
296
# mapping from revision_id to parents
298
# errors that we detect
299
self.inconsistent_parents = 0
300
# we need the revision id of each revision and its available parents list
301
self._setup_steps(len(self.pending))
302
for rev_id in self.pending:
303
# put a revision into the graph.
304
self._graph_revision(rev_id)
306
if not self.inconsistent_parents:
307
self.pb.note('Revision history accurate.')
309
self._setup_steps(len(self._rev_graph))
310
for rev_id, parents in self._rev_graph.items():
311
if parents != self.revisions.get_parents(rev_id):
312
self.revisions.fix_parents(rev_id, parents)
313
self._reweave_step('Fixing parents')
314
self.pb.note('Ancestry corrected.')
316
def _graph_revision(self, rev_id):
317
"""Load a revision into the revision graph."""
318
# pick a random revision
319
# analyse revision id rev_id and put it in the stack.
320
self._reweave_step('loading revisions')
321
rev = self.repo._revision_store.get_revision(rev_id, self.transaction)
322
assert rev.revision_id == rev_id
324
for parent in rev.parent_ids:
325
if self.revisions.has_version(parent):
326
parents.append(parent)
328
mutter('found ghost %s', parent)
329
self._rev_graph[rev_id] = parents
330
if set(self.inventory.get_parents(rev_id)) != set(parents):
331
self.inconsistent_parents += 1
332
mutter('Inconsistent inventory parents: id {%s} '
333
'inventory claims %r, '
334
'available parents are %r, '
335
'unavailable parents are %r',
337
set(self.inventory.get_parents(rev_id)),
339
set(rev.parent_ids).difference(set(parents)))
341
427
def _check_garbage_inventories(self):
342
428
"""Check for garbage inventories which we cannot trust
350
436
self.garbage_inventories = len(garbage)
351
437
for revision_id in garbage:
352
438
mutter('Garbage inventory {%s} found.', revision_id)
440
def _fix_text_parents(self):
441
"""Fix bad versionedfile parent entries.
443
It is possible for the parents entry in a versionedfile entry to be
444
inconsistent with the values in the revision and inventory.
446
This method finds entries with such inconsistencies, corrects their
447
parent lists, and replaces the versionedfile with a corrected version.
449
transaction = self.repo.get_transaction()
450
versions = self.revisions.versions()
451
mutter('Prepopulating revision text cache with %d revisions',
453
vf_checker = self.repo._get_versioned_file_checker()
454
# List all weaves before altering, to avoid race conditions when we
455
# delete unused weaves.
456
weaves = list(enumerate(self.repo.weave_store))
457
for num, file_id in weaves:
458
self.pb.update('Fixing text parents', num,
459
len(self.repo.weave_store))
460
vf = self.repo.weave_store.get_weave(file_id, transaction)
461
versions_with_bad_parents, unused_versions = \
462
vf_checker.check_file_version_parents(vf, file_id)
463
if (len(versions_with_bad_parents) == 0 and
464
len(unused_versions) == 0):
466
full_text_versions = set()
467
self._fix_text_parent(file_id, vf, versions_with_bad_parents,
468
full_text_versions, unused_versions)
470
def _fix_text_parent(self, file_id, vf, versions_with_bad_parents,
471
full_text_versions, unused_versions):
472
"""Fix bad versionedfile entries in a single versioned file."""
473
mutter('fixing text parent: %r (%d versions)', file_id,
474
len(versions_with_bad_parents))
475
mutter('(%d need to be full texts, %d are unused)',
476
len(full_text_versions), len(unused_versions))
477
new_vf = self.repo.weave_store.get_empty('temp:%s' % file_id,
480
for version in vf.versions():
481
if version in unused_versions:
483
elif version in versions_with_bad_parents:
484
parents = versions_with_bad_parents[version][1]
486
parents = vf.get_parent_map([version])[version]
487
new_parents[version] = parents
488
if not len(new_parents):
489
# No used versions, remove the VF.
490
self.repo.weave_store.delete(file_id, self.transaction)
492
for version in TopoSorter(new_parents.items()).iter_topo_order():
493
lines = vf.get_lines(version)
494
parents = new_parents[version]
495
if parents and (parents[0] in full_text_versions):
496
# Force this record to be a fulltext, not a delta.
497
new_vf._add(version, lines, parents, False,
498
None, None, None, False)
500
new_vf.add_lines(version, parents, lines)
501
self.repo.weave_store.copy(new_vf, file_id, self.transaction)
502
self.repo.weave_store.delete('temp:%s' % file_id, self.transaction)
505
class PackReconciler(RepoReconciler):
506
"""Reconciler that reconciles a pack based repository.
508
Garbage inventories do not affect ancestry queries, and removal is
509
considerably more expensive as there is no separate versioned file for
510
them, so they are not cleaned. In short it is currently a no-op.
512
In future this may be a good place to hook in annotation cache checking,
513
index recreation etc.
516
# XXX: The index corruption that _fix_text_parents performs is needed for
517
# packs, but not yet implemented. The basic approach is to:
518
# - lock the names list
519
# - perform a customised pack() that regenerates data as needed
520
# - unlock the names list
521
# https://bugs.edge.launchpad.net/bzr/+bug/154173
523
def _reconcile_steps(self):
524
"""Perform the steps to reconcile this repository."""
525
if not self.thorough:
527
collection = self.repo._pack_collection
528
collection.ensure_loaded()
529
collection.lock_names()
531
packs = collection.all_packs()
532
all_revisions = self.repo.all_revision_ids()
533
total_inventories = len(list(
534
collection.inventory_index.combined_index.iter_all_entries()))
535
if len(all_revisions):
536
self._packer = repofmt.pack_repo.ReconcilePacker(
537
collection, packs, ".reconcile", all_revisions)
538
new_pack = self._packer.pack(pb=self.pb)
539
if new_pack is not None:
540
self._discard_and_save(packs)
542
# only make a new pack when there is data to copy.
543
self._discard_and_save(packs)
544
self.garbage_inventories = total_inventories - len(list(
545
collection.inventory_index.combined_index.iter_all_entries()))
547
collection._unlock_names()
549
def _discard_and_save(self, packs):
550
"""Discard some packs from the repository.
552
This removes them from the memory index, saves the in-memory index
553
which makes the newly reconciled pack visible and hides the packs to be
554
discarded, and finally renames the packs being discarded into the
555
obsolete packs directory.
557
:param packs: The packs to discard.
560
self.repo._pack_collection._remove_pack_from_memory(pack)
561
self.repo._pack_collection._save_pack_names()
562
self.repo._pack_collection._obsolete_packs(packs)