81
76
def _reconcile(self):
82
77
"""Helper function for performing reconciliation."""
83
self._reconcile_branch()
84
self._reconcile_repository()
86
def _reconcile_branch(self):
88
self.branch = self.bzrdir.open_branch()
89
except errors.NotBranchError:
90
# Nothing to check here
91
self.fixed_branch_history = None
93
self.pb.note('Reconciling branch %s',
95
branch_reconciler = self.branch.reconcile(thorough=True)
96
self.fixed_branch_history = branch_reconciler.fixed_history
98
def _reconcile_repository(self):
99
78
self.repo = self.bzrdir.find_repository()
100
79
self.pb.note('Reconciling repository %s',
101
80
self.repo.bzrdir.root_transport.base)
102
self.pb.update("Reconciling repository", 0, 1)
103
81
repo_reconciler = self.repo.reconcile(thorough=True)
104
82
self.inconsistent_parents = repo_reconciler.inconsistent_parents
105
83
self.garbage_inventories = repo_reconciler.garbage_inventories
112
90
self.pb.note('Reconciliation complete.')
115
class BranchReconciler(object):
116
"""Reconciler that works on a branch."""
118
def __init__(self, a_branch, thorough=False):
119
self.fixed_history = None
120
self.thorough = thorough
121
self.branch = a_branch
124
self.branch.lock_write()
126
self.pb = ui.ui_factory.nested_progress_bar()
128
self._reconcile_steps()
134
def _reconcile_steps(self):
135
self._reconcile_revision_history()
137
def _reconcile_revision_history(self):
138
repo = self.branch.repository
139
last_revno, last_revision_id = self.branch.last_revision_info()
140
real_history = list(repo.iter_reverse_revision_history(
142
real_history.reverse()
143
if last_revno != len(real_history):
144
self.fixed_history = True
145
# Technically for Branch5 formats, it is more efficient to use
146
# set_revision_history, as this will regenerate it again.
147
# Not really worth a whole BranchReconciler class just for this,
149
self.pb.note('Fixing last revision info %s => %s',
150
last_revno, len(real_history))
151
self.branch.set_last_revision_info(len(real_history),
154
self.fixed_history = False
155
self.pb.note('revision_history ok.')
158
93
class RepoReconciler(object):
159
94
"""Reconciler that reconciles a repository.
236
170
self.pb.note('Inventory ok.')
238
172
self.pb.update('Backing up inventory...', 0, 0)
239
self.repo._backup_inventory()
173
self.repo.control_weaves.copy(self.inventory, 'inventory.backup', self.repo.get_transaction())
240
174
self.pb.note('Backup Inventory created.')
241
new_inventories = self.repo._temp_inventories()
175
# asking for '' should never return a non-empty weave
176
new_inventory_vf = self.repo.control_weaves.get_empty('inventory.new',
177
self.repo.get_transaction())
243
179
# we have topological order of revisions and non ghost parents ready.
244
180
self._setup_steps(len(self._rev_graph))
245
revision_keys = [(rev_id,) for rev_id in
246
TopoSorter(self._rev_graph.items()).iter_topo_order()]
247
stream = self._change_inv_parents(
248
self.inventory.get_record_stream(revision_keys, 'unordered', True),
249
self._new_inv_parents,
251
new_inventories.insert_record_stream(stream)
252
# if this worked, the set of new_inventories.keys should equal
181
for rev_id in TopoSorter(self._rev_graph.items()).iter_topo_order():
182
parents = self._rev_graph[rev_id]
183
# double check this really is in topological order.
184
unavailable = [p for p in parents if p not in new_inventory_vf]
185
assert len(unavailable) == 0
186
# this entry has all the non ghost parents in the inventory
188
self._reweave_step('adding inventories')
189
if isinstance(new_inventory_vf, WeaveFile):
190
# It's really a WeaveFile, but we call straight into the
191
# Weave's add method to disable the auto-write-out behaviour.
192
# This is done to avoid a revision_count * time-to-write additional overhead on
194
new_inventory_vf._check_write_ok()
195
Weave._add_lines(new_inventory_vf, rev_id, parents,
196
self.inventory.get_lines(rev_id), None, None, None, False, True)
198
new_inventory_vf.add_lines(rev_id, parents, self.inventory.get_lines(rev_id))
200
if isinstance(new_inventory_vf, WeaveFile):
201
new_inventory_vf._save()
202
# if this worked, the set of new_inventory_vf.names should equal
254
if not (set(new_inventories.keys()) ==
255
set([(revid,) for revid in self.pending])):
256
raise AssertionError()
204
assert set(new_inventory_vf.versions()) == self.pending
257
205
self.pb.update('Writing weave')
258
self.repo._activate_new_inventory()
206
self.repo.control_weaves.copy(new_inventory_vf, 'inventory', self.repo.get_transaction())
207
self.repo.control_weaves.delete('inventory.new', self.repo.get_transaction())
259
208
self.inventory = None
260
209
self.pb.note('Inventory regenerated.')
262
def _new_inv_parents(self, revision_key):
263
"""Lookup ghost-filtered parents for revision_key."""
264
# Use the filtered ghostless parents list:
265
return tuple([(revid,) for revid in self._rev_graph[revision_key[-1]]])
267
def _change_inv_parents(self, stream, get_parents, all_revision_keys):
268
"""Adapt a record stream to reconcile the parents."""
269
for record in stream:
270
wanted_parents = get_parents(record.key)
271
if wanted_parents and wanted_parents[0] not in all_revision_keys:
272
# The check for the left most parent only handles knit
273
# compressors, but this code only applies to knit and weave
274
# repositories anyway.
275
bytes = record.get_bytes_as('fulltext')
276
yield FulltextContentFactory(record.key, wanted_parents, record.sha1, bytes)
278
adapted_record = AdapterFactory(record.key, wanted_parents, record)
280
self._reweave_step('adding inventories')
282
211
def _setup_steps(self, new_total):
283
212
"""Setup the markers we need to control the progress bar."""
284
213
self.total = new_total
290
219
# analyse revision id rev_id and put it in the stack.
291
220
self._reweave_step('loading revisions')
292
221
rev = self.repo.get_revision_reconcile(rev_id)
222
assert rev.revision_id == rev_id
294
224
for parent in rev.parent_ids:
295
225
if self._parent_is_available(parent):
296
226
parents.append(parent)
298
228
mutter('found ghost %s', parent)
299
self._rev_graph[rev_id] = parents
229
self._rev_graph[rev_id] = parents
230
if self._parents_are_inconsistent(rev_id, parents):
231
self.inconsistent_parents += 1
232
mutter('Inconsistent inventory parents: id {%s} '
233
'inventory claims %r, '
234
'available parents are %r, '
235
'unavailable parents are %r',
237
set(self.inventory.get_parents(rev_id)),
239
set(rev.parent_ids).difference(set(parents)))
241
def _parents_are_inconsistent(self, rev_id, parents):
242
"""Return True if the parents list of rev_id does not match the weave.
244
This detects inconsistencies based on the self.thorough value:
245
if thorough is on, the first parent value is checked as well as ghost
247
Otherwise only the ghost differences are evaluated.
249
weave_parents = self.inventory.get_parents(rev_id)
250
weave_missing_old_ghosts = set(weave_parents) != set(parents)
251
first_parent_is_wrong = (
252
len(weave_parents) and len(parents) and
253
parents[0] != weave_parents[0])
255
return weave_missing_old_ghosts or first_parent_is_wrong
257
return weave_missing_old_ghosts
301
259
def _check_garbage_inventories(self):
302
260
"""Check for garbage inventories which we cannot trust
367
323
self.pb.note('Inventory ok.')
369
325
self.pb.update('Backing up inventory...', 0, 0)
370
self.repo._backup_inventory()
326
self.repo.control_weaves.copy(self.inventory, 'inventory.backup', self.transaction)
371
327
self.pb.note('Backup Inventory created.')
372
328
# asking for '' should never return a non-empty weave
373
new_inventories = self.repo._temp_inventories()
329
new_inventory_vf = self.repo.control_weaves.get_empty('inventory.new',
374
332
# we have topological order of revisions and non ghost parents ready.
375
graph = self.revisions.get_parent_map(self.revisions.keys())
376
revision_keys = list(TopoSorter(graph).iter_topo_order())
377
revision_ids = [key[-1] for key in revision_keys]
378
self._setup_steps(len(revision_keys))
379
stream = self._change_inv_parents(
380
self.inventory.get_record_stream(revision_keys, 'unordered', True),
383
new_inventories.insert_record_stream(stream)
333
self._setup_steps(len(self.revisions))
334
for rev_id in TopoSorter(self.revisions.get_graph().items()).iter_topo_order():
335
parents = self.revisions.get_parents(rev_id)
336
# double check this really is in topological order.
337
unavailable = [p for p in parents if p not in new_inventory_vf]
338
assert len(unavailable) == 0
339
# this entry has all the non ghost parents in the inventory
341
self._reweave_step('adding inventories')
342
# ugly but needed, weaves are just way tooooo slow else.
343
new_inventory_vf.add_lines(rev_id, parents, self.inventory.get_lines(rev_id))
384
345
# if this worked, the set of new_inventory_vf.names should equal
385
# the revisionds list
386
if not(set(new_inventories.keys()) == set(revision_keys)):
387
raise AssertionError()
347
assert set(new_inventory_vf.versions()) == set(self.revisions.versions())
388
348
self.pb.update('Writing weave')
389
self.repo._activate_new_inventory()
349
self.repo.control_weaves.copy(new_inventory_vf, 'inventory', self.transaction)
350
self.repo.control_weaves.delete('inventory.new', self.transaction)
390
351
self.inventory = None
391
352
self.pb.note('Inventory regenerated.')
354
def _check_garbage_inventories(self):
355
"""Check for garbage inventories which we cannot trust
357
We cant trust them because their pre-requisite file data may not
358
be present - all we know is that their revision was not installed.
360
inventories = set(self.inventory.versions())
361
revisions = set(self.revisions.versions())
362
garbage = inventories.difference(revisions)
363
self.garbage_inventories = len(garbage)
364
for revision_id in garbage:
365
mutter('Garbage inventory {%s} found.', revision_id)
393
367
def _fix_text_parents(self):
394
368
"""Fix bad versionedfile parent entries.
400
374
parent lists, and replaces the versionedfile with a corrected version.
402
376
transaction = self.repo.get_transaction()
403
versions = [key[-1] for key in self.revisions.keys()]
404
mutter('Prepopulating revision text cache with %d revisions',
406
vf_checker = self.repo._get_versioned_file_checker()
407
bad_parents, unused_versions = vf_checker.check_file_version_parents(
408
self.repo.texts, self.pb)
409
text_index = vf_checker.text_index
410
per_id_bad_parents = {}
411
for key in unused_versions:
412
# Ensure that every file with unused versions gets rewritten.
413
# NB: This is really not needed, reconcile != pack.
414
per_id_bad_parents[key[0]] = {}
415
# Generate per-knit/weave data.
416
for key, details in bad_parents.iteritems():
419
knit_parents = tuple([parent[-1] for parent in details[0]])
420
correct_parents = tuple([parent[-1] for parent in details[1]])
421
file_details = per_id_bad_parents.setdefault(file_id, {})
422
file_details[rev_id] = (knit_parents, correct_parents)
423
file_id_versions = {}
424
for text_key in text_index:
425
versions_list = file_id_versions.setdefault(text_key[0], [])
426
versions_list.append(text_key[1])
427
# Do the reconcile of individual weaves.
428
for num, file_id in enumerate(per_id_bad_parents):
377
revision_versions = repository._RevisionTextVersionCache(self.repo)
378
versions = self.revisions.versions()
379
revision_versions.prepopulate_revs(versions)
380
for num, file_id in enumerate(self.repo.weave_store):
429
381
self.pb.update('Fixing text parents', num,
430
len(per_id_bad_parents))
431
versions_with_bad_parents = per_id_bad_parents[file_id]
432
id_unused_versions = set(key[-1] for key in unused_versions
433
if key[0] == file_id)
434
if file_id in file_id_versions:
435
file_versions = file_id_versions[file_id]
437
# This id was present in the disk store but is not referenced
438
# by any revision at all.
440
self._fix_text_parent(file_id, versions_with_bad_parents,
441
id_unused_versions, file_versions)
382
len(self.repo.weave_store))
383
vf = self.repo.weave_store.get_weave(file_id, transaction)
384
vf_checker = self.repo.get_versioned_file_checker(
385
versions, revision_versions)
386
versions_with_bad_parents = vf_checker.check_file_version_parents(
388
if len(versions_with_bad_parents) == 0:
390
self._fix_text_parent(file_id, vf, versions_with_bad_parents)
443
def _fix_text_parent(self, file_id, versions_with_bad_parents,
444
unused_versions, all_versions):
392
def _fix_text_parent(self, file_id, vf, versions_with_bad_parents):
445
393
"""Fix bad versionedfile entries in a single versioned file."""
446
mutter('fixing text parent: %r (%d versions)', file_id,
447
len(versions_with_bad_parents))
448
mutter('(%d are unused)', len(unused_versions))
449
new_file_id = 'temp:%s' % file_id
394
new_vf = self.repo.weave_store.get_empty('temp:%s' % file_id,
452
for version in all_versions:
453
if version in unused_versions:
455
elif version in versions_with_bad_parents:
397
for version in vf.versions():
398
if version in versions_with_bad_parents:
456
399
parents = versions_with_bad_parents[version][1]
458
pmap = self.repo.texts.get_parent_map([(file_id, version)])
459
parents = [key[-1] for key in pmap[(file_id, version)]]
460
new_parents[(new_file_id, version)] = [
461
(new_file_id, parent) for parent in parents]
462
needed_keys.add((file_id, version))
463
def fix_parents(stream):
464
for record in stream:
465
bytes = record.get_bytes_as('fulltext')
466
new_key = (new_file_id, record.key[-1])
467
parents = new_parents[new_key]
468
yield FulltextContentFactory(new_key, parents, record.sha1, bytes)
469
stream = self.repo.texts.get_record_stream(needed_keys, 'topological', True)
470
self.repo._remove_file_id(new_file_id)
471
self.repo.texts.insert_record_stream(fix_parents(stream))
472
self.repo._remove_file_id(file_id)
474
self.repo._move_file_id(new_file_id, file_id)
401
parents = vf.get_parents(version)
402
new_parents[version] = parents
403
for version in TopoSorter(new_parents.items()).iter_topo_order():
404
new_vf.add_lines(version, new_parents[version],
405
vf.get_lines(version))
406
self.repo.weave_store.copy(new_vf, file_id, self.transaction)
407
self.repo.weave_store.delete('temp:%s' % file_id, self.transaction)
477
410
class PackReconciler(RepoReconciler):
495
428
def _reconcile_steps(self):
496
429
"""Perform the steps to reconcile this repository."""
497
if not self.thorough:
499
collection = self.repo._pack_collection
500
collection.ensure_loaded()
501
collection.lock_names()
503
packs = collection.all_packs()
504
all_revisions = self.repo.all_revision_ids()
505
total_inventories = len(list(
506
collection.inventory_index.combined_index.iter_all_entries()))
507
if len(all_revisions):
508
self._packer = repofmt.pack_repo.ReconcilePacker(
509
collection, packs, ".reconcile", all_revisions)
510
new_pack = self._packer.pack(pb=self.pb)
511
if new_pack is not None:
512
self._discard_and_save(packs)
514
# only make a new pack when there is data to copy.
515
self._discard_and_save(packs)
516
self.garbage_inventories = total_inventories - len(list(
517
collection.inventory_index.combined_index.iter_all_entries()))
519
collection._unlock_names()
521
def _discard_and_save(self, packs):
522
"""Discard some packs from the repository.
524
This removes them from the memory index, saves the in-memory index
525
which makes the newly reconciled pack visible and hides the packs to be
526
discarded, and finally renames the packs being discarded into the
527
obsolete packs directory.
529
:param packs: The packs to discard.
532
self.repo._pack_collection._remove_pack_from_memory(pack)
533
self.repo._pack_collection._save_pack_names()
534
self.repo._pack_collection._obsolete_packs(packs)