68
79
self.repo = self.bzrdir.find_repository()
69
80
self.pb.note('Reconciling repository %s',
70
81
self.repo.bzrdir.root_transport.base)
82
self.pb.update("Reconciling repository", 0, 1)
71
83
repo_reconciler = self.repo.reconcile(thorough=True)
72
84
self.inconsistent_parents = repo_reconciler.inconsistent_parents
73
85
self.garbage_inventories = repo_reconciler.garbage_inventories
74
self.pb.note('Reconciliation complete.')
86
if repo_reconciler.aborted:
88
'Reconcile aborted: revision index has inconsistent parents.')
90
'Run "bzr check" for more details.')
92
self.pb.note('Reconciliation complete.')
77
95
class RepoReconciler(object):
78
96
"""Reconciler that reconciles a repository.
98
The goal of repository reconciliation is to make any derived data
99
consistent with the core data committed by a user. This can involve
100
reindexing, or removing unreferenced data if that can interfere with
101
queries in a given repository.
80
103
Currently this consists of an inventory reweave with revision cross-checks.
306
334
# we have topological order of revisions and non ghost parents ready.
307
335
self._setup_steps(len(self.revisions))
308
for rev_id in TopoSorter(self.revisions.get_graph().items()).iter_topo_order():
309
parents = self.revisions.get_parents(rev_id)
310
# double check this really is in topological order.
311
unavailable = [p for p in parents if p not in new_inventory_vf]
336
revision_ids = self.revisions.versions()
337
graph = self.revisions.get_parent_map(revision_ids)
338
for rev_id in TopoSorter(graph.items()).iter_topo_order():
339
parents = graph[rev_id]
340
# double check this really is in topological order, ignoring existing ghosts.
341
unavailable = [p for p in parents if p not in new_inventory_vf and
312
343
assert len(unavailable) == 0
313
344
# this entry has all the non ghost parents in the inventory
315
346
self._reweave_step('adding inventories')
316
347
# ugly but needed, weaves are just way tooooo slow else.
317
new_inventory_vf.add_lines(rev_id, parents, self.inventory.get_lines(rev_id))
348
new_inventory_vf.add_lines_with_ghosts(rev_id, parents,
349
self.inventory.get_lines(rev_id))
319
351
# if this worked, the set of new_inventory_vf.names should equal
337
369
self.garbage_inventories = len(garbage)
338
370
for revision_id in garbage:
339
371
mutter('Garbage inventory {%s} found.', revision_id)
373
def _fix_text_parents(self):
374
"""Fix bad versionedfile parent entries.
376
It is possible for the parents entry in a versionedfile entry to be
377
inconsistent with the values in the revision and inventory.
379
This method finds entries with such inconsistencies, corrects their
380
parent lists, and replaces the versionedfile with a corrected version.
382
transaction = self.repo.get_transaction()
383
versions = self.revisions.versions()
384
mutter('Prepopulating revision text cache with %d revisions',
386
vf_checker = self.repo._get_versioned_file_checker()
387
# List all weaves before altering, to avoid race conditions when we
388
# delete unused weaves.
389
weaves = list(enumerate(self.repo.weave_store))
390
for num, file_id in weaves:
391
self.pb.update('Fixing text parents', num,
392
len(self.repo.weave_store))
393
vf = self.repo.weave_store.get_weave(file_id, transaction)
394
versions_with_bad_parents, unused_versions = \
395
vf_checker.check_file_version_parents(vf, file_id)
396
if (len(versions_with_bad_parents) == 0 and
397
len(unused_versions) == 0):
399
full_text_versions = set()
400
self._fix_text_parent(file_id, vf, versions_with_bad_parents,
401
full_text_versions, unused_versions)
403
def _fix_text_parent(self, file_id, vf, versions_with_bad_parents,
404
full_text_versions, unused_versions):
405
"""Fix bad versionedfile entries in a single versioned file."""
406
mutter('fixing text parent: %r (%d versions)', file_id,
407
len(versions_with_bad_parents))
408
mutter('(%d need to be full texts, %d are unused)',
409
len(full_text_versions), len(unused_versions))
410
new_vf = self.repo.weave_store.get_empty('temp:%s' % file_id,
413
for version in vf.versions():
414
if version in unused_versions:
416
elif version in versions_with_bad_parents:
417
parents = versions_with_bad_parents[version][1]
419
parents = vf.get_parent_map([version])[version]
420
new_parents[version] = parents
421
if not len(new_parents):
422
# No used versions, remove the VF.
423
self.repo.weave_store.delete(file_id, self.transaction)
425
for version in TopoSorter(new_parents.items()).iter_topo_order():
426
lines = vf.get_lines(version)
427
parents = new_parents[version]
428
if parents and (parents[0] in full_text_versions):
429
# Force this record to be a fulltext, not a delta.
430
new_vf._add(version, lines, parents, False,
431
None, None, None, False)
433
new_vf.add_lines(version, parents, lines)
434
self.repo.weave_store.copy(new_vf, file_id, self.transaction)
435
self.repo.weave_store.delete('temp:%s' % file_id, self.transaction)
438
class PackReconciler(RepoReconciler):
439
"""Reconciler that reconciles a pack based repository.
441
Garbage inventories do not affect ancestry queries, and removal is
442
considerably more expensive as there is no separate versioned file for
443
them, so they are not cleaned. In short it is currently a no-op.
445
In future this may be a good place to hook in annotation cache checking,
446
index recreation etc.
449
# XXX: The index corruption that _fix_text_parents performs is needed for
450
# packs, but not yet implemented. The basic approach is to:
451
# - lock the names list
452
# - perform a customised pack() that regenerates data as needed
453
# - unlock the names list
454
# https://bugs.edge.launchpad.net/bzr/+bug/154173
456
def _reconcile_steps(self):
457
"""Perform the steps to reconcile this repository."""
458
if not self.thorough:
460
collection = self.repo._pack_collection
461
collection.ensure_loaded()
462
collection.lock_names()
464
packs = collection.all_packs()
465
all_revisions = self.repo.all_revision_ids()
466
total_inventories = len(list(
467
collection.inventory_index.combined_index.iter_all_entries()))
468
if len(all_revisions):
469
self._packer = repofmt.pack_repo.ReconcilePacker(
470
collection, packs, ".reconcile", all_revisions)
471
new_pack = self._packer.pack(pb=self.pb)
472
if new_pack is not None:
473
self._discard_and_save(packs)
475
# only make a new pack when there is data to copy.
476
self._discard_and_save(packs)
477
self.garbage_inventories = total_inventories - len(list(
478
collection.inventory_index.combined_index.iter_all_entries()))
480
collection._unlock_names()
482
def _discard_and_save(self, packs):
483
"""Discard some packs from the repository.
485
This removes them from the memory index, saves the in-memory index
486
which makes the newly reconciled pack visible and hides the packs to be
487
discarded, and finally renames the packs being discarded into the
488
obsolete packs directory.
490
:param packs: The packs to discard.
493
self.repo._pack_collection._remove_pack_from_memory(pack)
494
self.repo._pack_collection._save_pack_names()
495
self.repo._pack_collection._obsolete_packs(packs)