156
192
self._writer.begin()
157
193
# what state is the pack in? (open, finished, aborted)
158
194
self._state = 'open'
159
# no name until we finish writing the content
162
def _check_references(self):
163
"""Make sure our external references are present.
165
Packs are allowed to have deltas whose base is not in the pack, but it
166
must be present somewhere in this collection. It is not allowed to
167
have deltas based on a fallback repository.
168
(See <https://bugs.launchpad.net/bzr/+bug/288751>)
170
# Groupcompress packs don't have any external references, arguably CHK
171
# pages have external references, but we cannot 'cheaply' determine
172
# them without actually walking all of the chk pages.
175
class ResumedGCPack(ResumedPack):
177
def _check_references(self):
178
"""Make sure our external compression parents are present."""
179
# See GCPack._check_references for why this is empty
181
def _get_external_refs(self, index):
182
# GC repositories don't have compression parents external to a given
187
class GCCHKPacker(Packer):
188
"""This class understand what it takes to collect a GCCHK repo."""
190
def __init__(self, pack_collection, packs, suffix, revision_ids=None,
192
super(GCCHKPacker, self).__init__(pack_collection, packs, suffix,
193
revision_ids=revision_ids,
194
reload_func=reload_func)
195
self._pack_collection = pack_collection
196
# ATM, We only support this for GCCHK repositories
197
if pack_collection.chk_index is None:
198
raise AssertionError('pack_collection.chk_index should not be None')
199
self._gather_text_refs = False
200
self._chk_id_roots = []
201
self._chk_p_id_roots = []
202
self._text_refs = None
203
# set by .pack() if self.revision_ids is not None
204
self.revision_keys = None
206
def _get_progress_stream(self, source_vf, keys, message, pb):
208
substream = source_vf.get_record_stream(keys, 'groupcompress', True)
209
for idx, record in enumerate(substream):
211
pb.update(message, idx + 1, len(keys))
215
def _get_filtered_inv_stream(self, source_vf, keys, message, pb=None):
197
RepositoryPackCollection.pack_factory = NewPack
199
class GCRepositoryPackCollection(RepositoryPackCollection):
201
pack_factory = GCPack
203
def _make_index(self, name, suffix):
204
"""Overridden to use BTreeGraphIndex objects."""
205
size_offset = self._suffix_offsets[suffix]
206
index_name = name + suffix
207
index_size = self._names[name][size_offset]
208
return BTreeGraphIndex(
209
self._index_transport, index_name, index_size)
211
def _start_write_group(self):
212
# Overridden to add 'self.pack_factory()'
213
# Do not permit preparation for writing if we're not in a 'write lock'.
214
if not self.repo.is_write_locked():
215
raise errors.NotWriteLocked(self)
216
self._new_pack = self.pack_factory(self, upload_suffix='.pack',
217
file_mode=self.repo.bzrdir._get_file_mode())
218
# allow writing: queue writes to a new index
219
self.revision_index.add_writable_index(self._new_pack.revision_index,
221
self.inventory_index.add_writable_index(self._new_pack.inventory_index,
223
self.text_index.add_writable_index(self._new_pack.text_index,
225
self.signature_index.add_writable_index(self._new_pack.signature_index,
227
if chk_support and self.chk_index is not None:
228
self.chk_index.add_writable_index(self._new_pack.chk_index,
230
self.repo.chk_bytes._index._add_callback = self.chk_index.add_callback
232
self.repo.inventories._index._add_callback = self.inventory_index.add_callback
233
self.repo.revisions._index._add_callback = self.revision_index.add_callback
234
self.repo.signatures._index._add_callback = self.signature_index.add_callback
235
self.repo.texts._index._add_callback = self.text_index.add_callback
237
def _get_filtered_inv_stream(self, source_vf, keys):
216
238
"""Filter the texts of inventories, to find the chk pages."""
217
total_keys = len(keys)
218
def _filtered_inv_stream():
220
p_id_roots_set = set()
221
stream = source_vf.get_record_stream(keys, 'groupcompress', True)
242
p_id_roots_set = set()
243
def _filter_inv_stream(stream):
222
244
for idx, record in enumerate(stream):
223
# Inventories should always be with revisions; assume success.
245
### child_pb.update('fetch inv', idx, len(inv_keys_to_fetch))
224
246
bytes = record.get_bytes_as('fulltext')
225
chk_inv = inventory.CHKInventory.deserialise(None, bytes,
228
pb.update('inv', idx, total_keys)
247
chk_inv = inventory.CHKInventory.deserialise(None, bytes, record.key)
229
248
key = chk_inv.id_to_entry.key()
230
249
if key not in id_roots_set:
231
self._chk_id_roots.append(key)
232
251
id_roots_set.add(key)
233
252
p_id_map = chk_inv.parent_id_basename_to_file_id
235
raise AssertionError('Parent id -> file_id map not set')
237
if key not in p_id_roots_set:
238
p_id_roots_set.add(key)
239
self._chk_p_id_roots.append(key)
253
if p_id_map is not None:
255
if key not in p_id_roots_set:
256
p_id_roots_set.add(key)
257
p_id_roots.append(key)
241
# We have finished processing all of the inventory records, we
242
# don't need these sets anymore
244
p_id_roots_set.clear()
245
return _filtered_inv_stream()
259
stream = source_vf.get_record_stream(keys, 'gc-optimal', True)
260
return _filter_inv_stream(stream), id_roots, p_id_roots
247
def _get_chk_streams(self, source_vf, keys, pb=None):
262
def _get_chk_stream(self, source_vf, keys, id_roots, p_id_roots, pb=None):
248
263
# We want to stream the keys from 'id_roots', and things they
249
264
# reference, and then stream things from p_id_roots and things they
250
265
# reference, and then any remaining keys that we didn't get to.
307
294
search_key_func=None)
308
295
common_base = node._search_prefix
309
296
if isinstance(node, chk_map.InternalNode):
310
handle_internal_node(node)
311
elif parse_leaf_nodes:
312
handle_leaf_node(node)
297
for prefix, value in node._items.iteritems():
298
if not isinstance(value, tuple):
299
raise AssertionError("value is %s when"
300
" tuple expected" % (value.__class__))
301
if value not in next_keys:
302
keys_by_search_prefix.setdefault(prefix,
314
306
if pb is not None:
315
pb.update('chk node', counter[0], total_keys)
307
pb.update('chk node', counter[0])
317
309
yield next_stream()
318
310
# Double check that we won't be emitting any keys twice
319
# If we get rid of the pre-calculation of all keys, we could
320
# turn this around and do
321
# next_keys.difference_update(seen_keys)
322
# However, we also may have references to chk pages in another
323
# pack file during autopack. We filter earlier, so we should no
324
# longer need to do this
325
# next_keys = next_keys.intersection(remaining_keys)
311
next_keys = next_keys.intersection(remaining_keys)
327
313
for prefix in sorted(keys_by_search_prefix):
328
cur_keys.extend(keys_by_search_prefix.pop(prefix))
329
for stream in _get_referenced_stream(self._chk_id_roots,
330
self._gather_text_refs):
314
cur_keys.extend(keys_by_search_prefix[prefix])
315
for stream in _get_referenced_stream(id_roots):
332
del self._chk_id_roots
333
# while it isn't really possible for chk_id_roots to not be in the
334
# local group of packs, it is possible that the tree shape has not
335
# changed recently, so we need to filter _chk_p_id_roots by the
337
chk_p_id_roots = [key for key in self._chk_p_id_roots
338
if key in remaining_keys]
339
del self._chk_p_id_roots
340
for stream in _get_referenced_stream(chk_p_id_roots, False):
317
for stream in _get_referenced_stream(p_id_roots):
342
319
if remaining_keys:
343
trace.mutter('There were %d keys in the chk index, %d of which'
344
' were not referenced', total_keys,
346
if self.revision_ids is None:
347
stream = source_vf.get_record_stream(remaining_keys,
351
def _build_vf(self, index_name, parents, delta, for_write=False):
352
"""Build a VersionedFiles instance on top of this group of packs."""
353
index_name = index_name + '_index'
355
access = knit._DirectPackAccess(index_to_pack,
356
reload_func=self._reload_func)
359
if self.new_pack is None:
360
raise AssertionError('No new pack has been set')
361
index = getattr(self.new_pack, index_name)
362
index_to_pack[index] = self.new_pack.access_tuple()
363
index.set_optimize(for_size=True)
364
access.set_writer(self.new_pack._writer, index,
365
self.new_pack.access_tuple())
366
add_callback = index.add_nodes
369
for pack in self.packs:
370
sub_index = getattr(pack, index_name)
371
index_to_pack[sub_index] = pack.access_tuple()
372
indices.append(sub_index)
373
index = _mod_index.CombinedGraphIndex(indices)
375
vf = GroupCompressVersionedFiles(
377
add_callback=add_callback,
379
is_locked=self._pack_collection.repo.is_locked),
384
def _build_vfs(self, index_name, parents, delta):
385
"""Build the source and target VersionedFiles."""
386
source_vf = self._build_vf(index_name, parents,
387
delta, for_write=False)
388
target_vf = self._build_vf(index_name, parents,
389
delta, for_write=True)
390
return source_vf, target_vf
392
def _copy_stream(self, source_vf, target_vf, keys, message, vf_to_stream,
394
trace.mutter('repacking %d %s', len(keys), message)
395
self.pb.update('repacking %s' % (message,), pb_offset)
396
child_pb = ui.ui_factory.nested_progress_bar()
398
stream = vf_to_stream(source_vf, keys, message, child_pb)
399
for _ in target_vf._insert_record_stream(stream,
406
def _copy_revision_texts(self):
407
source_vf, target_vf = self._build_vfs('revision', True, False)
408
if not self.revision_keys:
409
# We are doing a full fetch, aka 'pack'
410
self.revision_keys = source_vf.keys()
411
self._copy_stream(source_vf, target_vf, self.revision_keys,
412
'revisions', self._get_progress_stream, 1)
414
def _copy_inventory_texts(self):
415
source_vf, target_vf = self._build_vfs('inventory', True, True)
416
# It is not sufficient to just use self.revision_keys, as stacked
417
# repositories can have more inventories than they have revisions.
418
# One alternative would be to do something with
419
# get_parent_map(self.revision_keys), but that shouldn't be any faster
421
inventory_keys = source_vf.keys()
422
missing_inventories = set(self.revision_keys).difference(inventory_keys)
423
if missing_inventories:
424
missing_inventories = sorted(missing_inventories)
425
raise ValueError('We are missing inventories for revisions: %s'
426
% (missing_inventories,))
427
self._copy_stream(source_vf, target_vf, inventory_keys,
428
'inventories', self._get_filtered_inv_stream, 2)
430
def _get_chk_vfs_for_copy(self):
431
return self._build_vfs('chk', False, False)
433
def _copy_chk_texts(self):
434
source_vf, target_vf = self._get_chk_vfs_for_copy()
435
# TODO: This is technically spurious... if it is a performance issue,
437
total_keys = source_vf.keys()
438
trace.mutter('repacking chk: %d id_to_entry roots,'
439
' %d p_id_map roots, %d total keys',
440
len(self._chk_id_roots), len(self._chk_p_id_roots),
442
self.pb.update('repacking chk', 3)
443
child_pb = ui.ui_factory.nested_progress_bar()
445
for stream in self._get_chk_streams(source_vf, total_keys,
447
for _ in target_vf._insert_record_stream(stream,
454
def _copy_text_texts(self):
455
source_vf, target_vf = self._build_vfs('text', True, True)
456
# XXX: We don't walk the chk map to determine referenced (file_id,
457
# revision_id) keys. We don't do it yet because you really need
458
# to filter out the ones that are present in the parents of the
459
# rev just before the ones you are copying, otherwise the filter
460
# is grabbing too many keys...
461
text_keys = source_vf.keys()
462
self._copy_stream(source_vf, target_vf, text_keys,
463
'texts', self._get_progress_stream, 4)
465
def _copy_signature_texts(self):
466
source_vf, target_vf = self._build_vfs('signature', False, False)
467
signature_keys = source_vf.keys()
468
signature_keys.intersection(self.revision_keys)
469
self._copy_stream(source_vf, target_vf, signature_keys,
470
'signatures', self._get_progress_stream, 5)
472
def _create_pack_from_packs(self):
473
self.pb.update('repacking', 0, 7)
474
self.new_pack = self.open_pack()
475
# Is this necessary for GC ?
476
self.new_pack.set_write_cache_size(1024*1024)
477
self._copy_revision_texts()
478
self._copy_inventory_texts()
479
self._copy_chk_texts()
480
self._copy_text_texts()
481
self._copy_signature_texts()
482
self.new_pack._check_references()
483
if not self._use_pack(self.new_pack):
484
self.new_pack.abort()
486
self.new_pack.finish_content()
487
if len(self.packs) == 1:
488
old_pack = self.packs[0]
489
if old_pack.name == self.new_pack._hash.hexdigest():
490
# The single old pack was already optimally packed.
491
trace.mutter('single pack %s was already optimally packed',
493
self.new_pack.abort()
495
self.pb.update('finishing repack', 6, 7)
496
self.new_pack.finish()
497
self._pack_collection.allocate(self.new_pack)
501
class GCCHKReconcilePacker(GCCHKPacker):
502
"""A packer which regenerates indices etc as it copies.
504
This is used by ``bzr reconcile`` to cause parent text pointers to be
508
def __init__(self, *args, **kwargs):
509
super(GCCHKReconcilePacker, self).__init__(*args, **kwargs)
510
self._data_changed = False
511
self._gather_text_refs = True
513
def _copy_inventory_texts(self):
514
source_vf, target_vf = self._build_vfs('inventory', True, True)
515
self._copy_stream(source_vf, target_vf, self.revision_keys,
516
'inventories', self._get_filtered_inv_stream, 2)
517
if source_vf.keys() != self.revision_keys:
518
self._data_changed = True
520
def _copy_text_texts(self):
521
"""generate what texts we should have and then copy."""
522
source_vf, target_vf = self._build_vfs('text', True, True)
523
trace.mutter('repacking %d texts', len(self._text_refs))
524
self.pb.update("repacking texts", 4)
525
# we have three major tasks here:
526
# 1) generate the ideal index
527
repo = self._pack_collection.repo
528
# We want the one we just wrote, so base it on self.new_pack
529
revision_vf = self._build_vf('revision', True, False, for_write=True)
530
ancestor_keys = revision_vf.get_parent_map(revision_vf.keys())
531
# Strip keys back into revision_ids.
532
ancestors = dict((k[0], tuple([p[0] for p in parents]))
533
for k, parents in ancestor_keys.iteritems())
535
# TODO: _generate_text_key_index should be much cheaper to generate from
536
# a chk repository, rather than the current implementation
537
ideal_index = repo._generate_text_key_index(None, ancestors)
538
file_id_parent_map = source_vf.get_parent_map(self._text_refs)
539
# 2) generate a keys list that contains all the entries that can
540
# be used as-is, with corrected parents.
542
new_parent_keys = {} # (key, parent_keys)
544
NULL_REVISION = _mod_revision.NULL_REVISION
545
for key in self._text_refs:
551
ideal_parents = tuple(ideal_index[key])
553
discarded_keys.append(key)
554
self._data_changed = True
556
if ideal_parents == (NULL_REVISION,):
558
source_parents = file_id_parent_map[key]
559
if ideal_parents == source_parents:
563
# We need to change the parent graph, but we don't need to
564
# re-insert the text (since we don't pun the compression
565
# parent with the parents list)
566
self._data_changed = True
567
new_parent_keys[key] = ideal_parents
568
# we're finished with some data.
570
del file_id_parent_map
571
# 3) bulk copy the data, updating records than need it
572
def _update_parents_for_texts():
573
stream = source_vf.get_record_stream(self._text_refs,
574
'groupcompress', False)
575
for record in stream:
576
if record.key in new_parent_keys:
577
record.parents = new_parent_keys[record.key]
579
target_vf.insert_record_stream(_update_parents_for_texts())
581
def _use_pack(self, new_pack):
582
"""Override _use_pack to check for reconcile having changed content."""
583
return new_pack.data_inserted() and self._data_changed
586
class GCCHKCanonicalizingPacker(GCCHKPacker):
587
"""A packer that ensures inventories have canonical-form CHK maps.
589
Ideally this would be part of reconcile, but it's very slow and rarely
590
needed. (It repairs repositories affected by
591
https://bugs.launchpad.net/bzr/+bug/522637).
594
def __init__(self, *args, **kwargs):
595
super(GCCHKCanonicalizingPacker, self).__init__(*args, **kwargs)
596
self._data_changed = False
598
def _exhaust_stream(self, source_vf, keys, message, vf_to_stream, pb_offset):
599
"""Create and exhaust a stream, but don't insert it.
601
This is useful to get the side-effects of generating a stream.
603
self.pb.update('scanning %s' % (message,), pb_offset)
604
child_pb = ui.ui_factory.nested_progress_bar()
606
list(vf_to_stream(source_vf, keys, message, child_pb))
610
def _copy_inventory_texts(self):
611
source_vf, target_vf = self._build_vfs('inventory', True, True)
612
source_chk_vf, target_chk_vf = self._get_chk_vfs_for_copy()
613
inventory_keys = source_vf.keys()
614
# First, copy the existing CHKs on the assumption that most of them
615
# will be correct. This will save us from having to reinsert (and
616
# recompress) these records later at the cost of perhaps preserving a
618
# (Iterate but don't insert _get_filtered_inv_stream to populate the
619
# variables needed by GCCHKPacker._copy_chk_texts.)
620
self._exhaust_stream(source_vf, inventory_keys, 'inventories',
621
self._get_filtered_inv_stream, 2)
622
GCCHKPacker._copy_chk_texts(self)
623
# Now copy and fix the inventories, and any regenerated CHKs.
624
def chk_canonicalizing_inv_stream(source_vf, keys, message, pb=None):
625
return self._get_filtered_canonicalizing_inv_stream(
626
source_vf, keys, message, pb, source_chk_vf, target_chk_vf)
627
self._copy_stream(source_vf, target_vf, inventory_keys,
628
'inventories', chk_canonicalizing_inv_stream, 4)
630
def _copy_chk_texts(self):
631
# No-op; in this class this happens during _copy_inventory_texts.
634
def _get_filtered_canonicalizing_inv_stream(self, source_vf, keys, message,
635
pb=None, source_chk_vf=None, target_chk_vf=None):
636
"""Filter the texts of inventories, regenerating CHKs to make sure they
639
total_keys = len(keys)
640
target_chk_vf = versionedfile.NoDupeAddLinesDecorator(target_chk_vf)
641
def _filtered_inv_stream():
642
stream = source_vf.get_record_stream(keys, 'groupcompress', True)
643
search_key_name = None
644
for idx, record in enumerate(stream):
645
# Inventories should always be with revisions; assume success.
646
bytes = record.get_bytes_as('fulltext')
647
chk_inv = inventory.CHKInventory.deserialise(
648
source_chk_vf, bytes, record.key)
650
pb.update('inv', idx, total_keys)
651
chk_inv.id_to_entry._ensure_root()
652
if search_key_name is None:
653
# Find the name corresponding to the search_key_func
654
search_key_reg = chk_map.search_key_registry
655
for search_key_name, func in search_key_reg.iteritems():
656
if func == chk_inv.id_to_entry._search_key_func:
658
canonical_inv = inventory.CHKInventory.from_inventory(
659
target_chk_vf, chk_inv,
660
maximum_size=chk_inv.id_to_entry._root_node._maximum_size,
661
search_key_name=search_key_name)
662
if chk_inv.id_to_entry.key() != canonical_inv.id_to_entry.key():
664
'Non-canonical CHK map for id_to_entry of inv: %s '
665
'(root is %s, should be %s)' % (chk_inv.revision_id,
666
chk_inv.id_to_entry.key()[0],
667
canonical_inv.id_to_entry.key()[0]))
668
self._data_changed = True
669
p_id_map = chk_inv.parent_id_basename_to_file_id
670
p_id_map._ensure_root()
671
canon_p_id_map = canonical_inv.parent_id_basename_to_file_id
672
if p_id_map.key() != canon_p_id_map.key():
674
'Non-canonical CHK map for parent_id_to_basename of '
675
'inv: %s (root is %s, should be %s)'
676
% (chk_inv.revision_id, p_id_map.key()[0],
677
canon_p_id_map.key()[0]))
678
self._data_changed = True
679
yield versionedfile.ChunkedContentFactory(record.key,
680
record.parents, record.sha1,
681
canonical_inv.to_lines())
682
# We have finished processing all of the inventory records, we
683
# don't need these sets anymore
684
return _filtered_inv_stream()
686
def _use_pack(self, new_pack):
687
"""Override _use_pack to check for reconcile having changed content."""
688
return new_pack.data_inserted() and self._data_changed
691
class GCRepositoryPackCollection(RepositoryPackCollection):
693
pack_factory = GCPack
694
resumed_pack_factory = ResumedGCPack
696
def _check_new_inventories(self):
697
"""Detect missing inventories or chk root entries for the new revisions
700
:returns: list of strs, summarising any problems found. If the list is
701
empty no problems were found.
703
# Ensure that all revisions added in this write group have:
704
# - corresponding inventories,
705
# - chk root entries for those inventories,
706
# - and any present parent inventories have their chk root
708
# And all this should be independent of any fallback repository.
710
key_deps = self.repo.revisions._index._key_dependencies
711
new_revisions_keys = key_deps.get_new_keys()
712
no_fallback_inv_index = self.repo.inventories._index
713
no_fallback_chk_bytes_index = self.repo.chk_bytes._index
714
no_fallback_texts_index = self.repo.texts._index
715
inv_parent_map = no_fallback_inv_index.get_parent_map(
717
# Are any inventories for corresponding to the new revisions missing?
718
corresponding_invs = set(inv_parent_map)
719
missing_corresponding = set(new_revisions_keys)
720
missing_corresponding.difference_update(corresponding_invs)
721
if missing_corresponding:
722
problems.append("inventories missing for revisions %s" %
723
(sorted(missing_corresponding),))
725
# Are any chk root entries missing for any inventories? This includes
726
# any present parent inventories, which may be used when calculating
727
# deltas for streaming.
728
all_inv_keys = set(corresponding_invs)
729
for parent_inv_keys in inv_parent_map.itervalues():
730
all_inv_keys.update(parent_inv_keys)
731
# Filter out ghost parents.
732
all_inv_keys.intersection_update(
733
no_fallback_inv_index.get_parent_map(all_inv_keys))
734
parent_invs_only_keys = all_inv_keys.symmetric_difference(
737
inv_ids = [key[-1] for key in all_inv_keys]
738
parent_invs_only_ids = [key[-1] for key in parent_invs_only_keys]
739
root_key_info = _build_interesting_key_sets(
740
self.repo, inv_ids, parent_invs_only_ids)
741
expected_chk_roots = root_key_info.all_keys()
742
present_chk_roots = no_fallback_chk_bytes_index.get_parent_map(
744
missing_chk_roots = expected_chk_roots.difference(present_chk_roots)
745
if missing_chk_roots:
746
problems.append("missing referenced chk root keys: %s"
747
% (sorted(missing_chk_roots),))
748
# Don't bother checking any further.
750
# Find all interesting chk_bytes records, and make sure they are
751
# present, as well as the text keys they reference.
752
chk_bytes_no_fallbacks = self.repo.chk_bytes.without_fallbacks()
753
chk_bytes_no_fallbacks._search_key_func = \
754
self.repo.chk_bytes._search_key_func
755
chk_diff = chk_map.iter_interesting_nodes(
756
chk_bytes_no_fallbacks, root_key_info.interesting_root_keys,
757
root_key_info.uninteresting_root_keys)
760
for record in _filter_text_keys(chk_diff, text_keys,
761
chk_map._bytes_to_text_key):
763
except errors.NoSuchRevision, e:
764
# XXX: It would be nice if we could give a more precise error here.
765
problems.append("missing chk node(s) for id_to_entry maps")
766
chk_diff = chk_map.iter_interesting_nodes(
767
chk_bytes_no_fallbacks, root_key_info.interesting_pid_root_keys,
768
root_key_info.uninteresting_pid_root_keys)
770
for interesting_rec, interesting_map in chk_diff:
772
except errors.NoSuchRevision, e:
774
"missing chk node(s) for parent_id_basename_to_file_id maps")
775
present_text_keys = no_fallback_texts_index.get_parent_map(text_keys)
776
missing_text_keys = text_keys.difference(present_text_keys)
777
if missing_text_keys:
778
problems.append("missing text keys: %r"
779
% (sorted(missing_text_keys),))
782
def _execute_pack_operations(self, pack_operations,
783
_packer_class=GCCHKPacker,
320
trace.note('There were %d keys in the chk index, which were not'
321
' referenced from inventories', len(remaining_keys))
322
stream = source_vf.get_record_stream(remaining_keys, 'unordered',
326
def _execute_pack_operations(self, pack_operations, _packer_class=Packer,
784
327
reload_func=None):
785
328
"""Execute a series of pack operations.
885
508
self._reconcile_does_inventory_gc = True
886
509
self._reconcile_fixes_text_parents = True
887
510
self._reconcile_backsup_inventory = False
889
def _add_inventory_checked(self, revision_id, inv, parents):
890
"""Add inv to the repository after checking the inputs.
892
This function can be overridden to allow different inventory styles.
894
:seealso: add_inventory, for the contract.
897
serializer = self._format._serializer
898
result = inventory.CHKInventory.from_inventory(self.chk_bytes, inv,
899
maximum_size=serializer.maximum_size,
900
search_key_name=serializer.search_key_name)
901
inv_lines = result.to_lines()
902
return self._inventory_add_lines(revision_id, parents,
903
inv_lines, check_content=False)
905
def _create_inv_from_null(self, delta, revision_id):
906
"""This will mutate new_inv directly.
908
This is a simplified form of create_by_apply_delta which knows that all
909
the old values must be None, so everything is a create.
911
serializer = self._format._serializer
912
new_inv = inventory.CHKInventory(serializer.search_key_name)
913
new_inv.revision_id = revision_id
914
entry_to_bytes = new_inv._entry_to_bytes
915
id_to_entry_dict = {}
916
parent_id_basename_dict = {}
917
for old_path, new_path, file_id, entry in delta:
918
if old_path is not None:
919
raise ValueError('Invalid delta, somebody tried to delete %r'
920
' from the NULL_REVISION'
921
% ((old_path, file_id),))
923
raise ValueError('Invalid delta, delta from NULL_REVISION has'
924
' no new_path %r' % (file_id,))
926
new_inv.root_id = file_id
927
parent_id_basename_key = StaticTuple('', '').intern()
929
utf8_entry_name = entry.name.encode('utf-8')
930
parent_id_basename_key = StaticTuple(entry.parent_id,
931
utf8_entry_name).intern()
932
new_value = entry_to_bytes(entry)
934
# new_inv._path_to_fileid_cache[new_path] = file_id
935
key = StaticTuple(file_id).intern()
936
id_to_entry_dict[key] = new_value
937
parent_id_basename_dict[parent_id_basename_key] = file_id
939
new_inv._populate_from_dicts(self.chk_bytes, id_to_entry_dict,
940
parent_id_basename_dict, maximum_size=serializer.maximum_size)
943
def add_inventory_by_delta(self, basis_revision_id, delta, new_revision_id,
944
parents, basis_inv=None, propagate_caches=False):
945
"""Add a new inventory expressed as a delta against another revision.
947
:param basis_revision_id: The inventory id the delta was created
949
:param delta: The inventory delta (see Inventory.apply_delta for
951
:param new_revision_id: The revision id that the inventory is being
953
:param parents: The revision ids of the parents that revision_id is
954
known to have and are in the repository already. These are supplied
955
for repositories that depend on the inventory graph for revision
956
graph access, as well as for those that pun ancestry with delta
958
:param basis_inv: The basis inventory if it is already known,
960
:param propagate_caches: If True, the caches for this inventory are
961
copied to and updated for the result if possible.
963
:returns: (validator, new_inv)
964
The validator(which is a sha1 digest, though what is sha'd is
965
repository format specific) of the serialized inventory, and the
968
if not self.is_in_write_group():
969
raise AssertionError("%r not in write group" % (self,))
970
_mod_revision.check_not_reserved_id(new_revision_id)
972
if basis_inv is None:
973
if basis_revision_id == _mod_revision.NULL_REVISION:
974
new_inv = self._create_inv_from_null(delta, new_revision_id)
975
if new_inv.root_id is None:
976
raise errors.RootMissing()
977
inv_lines = new_inv.to_lines()
978
return self._inventory_add_lines(new_revision_id, parents,
979
inv_lines, check_content=False), new_inv
981
basis_tree = self.revision_tree(basis_revision_id)
982
basis_tree.lock_read()
983
basis_inv = basis_tree.inventory
985
result = basis_inv.create_by_apply_delta(delta, new_revision_id,
986
propagate_caches=propagate_caches)
987
inv_lines = result.to_lines()
988
return self._inventory_add_lines(new_revision_id, parents,
989
inv_lines, check_content=False), result
991
if basis_tree is not None:
994
def _deserialise_inventory(self, revision_id, bytes):
995
return inventory.CHKInventory.deserialise(self.chk_bytes, bytes,
998
def _iter_inventories(self, revision_ids, ordering):
999
"""Iterate over many inventory objects."""
1000
if ordering is None:
1001
ordering = 'unordered'
1002
keys = [(revision_id,) for revision_id in revision_ids]
1003
stream = self.inventories.get_record_stream(keys, ordering, True)
1005
for record in stream:
1006
if record.storage_kind != 'absent':
1007
texts[record.key] = record.get_bytes_as('fulltext')
1009
raise errors.NoSuchRevision(self, record.key)
1011
yield inventory.CHKInventory.deserialise(self.chk_bytes, texts[key], key)
1013
def _iter_inventory_xmls(self, revision_ids, ordering):
1014
# Without a native 'xml' inventory, this method doesn't make sense.
1015
# However older working trees, and older bundles want it - so we supply
1016
# it allowing _get_inventory_xml to work. Bundles currently use the
1017
# serializer directly; this also isn't ideal, but there isn't an xml
1018
# iteration interface offered at all for repositories. We could make
1019
# _iter_inventory_xmls be part of the contract, even if kept private.
1020
inv_to_str = self._serializer.write_inventory_to_string
1021
for inv in self.iter_inventories(revision_ids, ordering=ordering):
1022
yield inv_to_str(inv), inv.revision_id
1024
def _find_present_inventory_keys(self, revision_keys):
1025
parent_map = self.inventories.get_parent_map(revision_keys)
1026
present_inventory_keys = set(k for k in parent_map)
1027
return present_inventory_keys
1029
def fileids_altered_by_revision_ids(self, revision_ids, _inv_weave=None):
1030
"""Find the file ids and versions affected by revisions.
1032
:param revisions: an iterable containing revision ids.
1033
:param _inv_weave: The inventory weave from this repository or None.
1034
If None, the inventory weave will be opened automatically.
1035
:return: a dictionary mapping altered file-ids to an iterable of
1036
revision_ids. Each altered file-ids has the exact revision_ids that
1037
altered it listed explicitly.
1039
rich_root = self.supports_rich_root()
1040
bytes_to_info = inventory.CHKInventory._bytes_to_utf8name_key
1041
file_id_revisions = {}
1042
pb = ui.ui_factory.nested_progress_bar()
1044
revision_keys = [(r,) for r in revision_ids]
1045
parent_keys = self._find_parent_keys_of_revisions(revision_keys)
1046
# TODO: instead of using _find_present_inventory_keys, change the
1047
# code paths to allow missing inventories to be tolerated.
1048
# However, we only want to tolerate missing parent
1049
# inventories, not missing inventories for revision_ids
1050
present_parent_inv_keys = self._find_present_inventory_keys(
1052
present_parent_inv_ids = set(
1053
[k[-1] for k in present_parent_inv_keys])
1054
inventories_to_read = set(revision_ids)
1055
inventories_to_read.update(present_parent_inv_ids)
1056
root_key_info = _build_interesting_key_sets(
1057
self, inventories_to_read, present_parent_inv_ids)
1058
interesting_root_keys = root_key_info.interesting_root_keys
1059
uninteresting_root_keys = root_key_info.uninteresting_root_keys
1060
chk_bytes = self.chk_bytes
1061
for record, items in chk_map.iter_interesting_nodes(chk_bytes,
1062
interesting_root_keys, uninteresting_root_keys,
1064
for name, bytes in items:
1065
(name_utf8, file_id, revision_id) = bytes_to_info(bytes)
1066
# TODO: consider interning file_id, revision_id here, or
1067
# pushing that intern() into bytes_to_info()
1068
# TODO: rich_root should always be True here, for all
1069
# repositories that support chk_bytes
1070
if not rich_root and name_utf8 == '':
1073
file_id_revisions[file_id].add(revision_id)
1075
file_id_revisions[file_id] = set([revision_id])
1078
return file_id_revisions
1080
def find_text_key_references(self):
1081
"""Find the text key references within the repository.
1083
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
1084
to whether they were referred to by the inventory of the
1085
revision_id that they contain. The inventory texts from all present
1086
revision ids are assessed to generate this report.
1088
# XXX: Slow version but correct: rewrite as a series of delta
1089
# examinations/direct tree traversal. Note that that will require care
1090
# as a common node is reachable both from the inventory that added it,
1091
# and others afterwards.
1092
revision_keys = self.revisions.keys()
1094
rich_roots = self.supports_rich_root()
1095
pb = ui.ui_factory.nested_progress_bar()
1097
all_revs = self.all_revision_ids()
1098
total = len(all_revs)
1099
for pos, inv in enumerate(self.iter_inventories(all_revs)):
1100
pb.update("Finding text references", pos, total)
1101
for _, entry in inv.iter_entries():
1102
if not rich_roots and entry.file_id == inv.root_id:
1104
key = (entry.file_id, entry.revision)
1105
result.setdefault(key, False)
1106
if entry.revision == inv.revision_id:
1113
def reconcile_canonicalize_chks(self):
1114
"""Reconcile this repository to make sure all CHKs are in canonical
1117
from bzrlib.reconcile import PackReconciler
1118
reconciler = PackReconciler(self, thorough=True, canonicalize_chks=True)
1119
reconciler.reconcile()
1122
def _reconcile_pack(self, collection, packs, extension, revs, pb):
1123
packer = GCCHKReconcilePacker(collection, packs, extension)
1124
return packer.pack(pb)
1126
def _canonicalize_chks_pack(self, collection, packs, extension, revs, pb):
1127
packer = GCCHKCanonicalizingPacker(collection, packs, extension, revs)
1128
return packer.pack(pb)
1130
def _get_source(self, to_format):
1131
"""Return a source for streaming from this repository."""
1132
if self._format._serializer == to_format._serializer:
1133
# We must be exactly the same format, otherwise stuff like the chk
1134
# page layout might be different.
1135
# Actually, this test is just slightly looser than exact so that
1136
# CHK2 <-> 2a transfers will work.
1137
return GroupCHKStreamSource(self, to_format)
1138
return super(CHKInventoryRepository, self)._get_source(to_format)
1141
class GroupCHKStreamSource(KnitPackStreamSource):
1142
"""Used when both the source and target repo are GroupCHK repos."""
1144
def __init__(self, from_repository, to_format):
1145
"""Create a StreamSource streaming from from_repository."""
1146
super(GroupCHKStreamSource, self).__init__(from_repository, to_format)
1147
self._revision_keys = None
1148
self._text_keys = None
1149
self._text_fetch_order = 'groupcompress'
1150
self._chk_id_roots = None
1151
self._chk_p_id_roots = None
1153
def _get_inventory_stream(self, inventory_keys, allow_absent=False):
1154
"""Get a stream of inventory texts.
1156
When this function returns, self._chk_id_roots and self._chk_p_id_roots
1157
should be populated.
1159
self._chk_id_roots = []
1160
self._chk_p_id_roots = []
1161
def _filtered_inv_stream():
1162
id_roots_set = set()
1163
p_id_roots_set = set()
1164
source_vf = self.from_repository.inventories
1165
stream = source_vf.get_record_stream(inventory_keys,
1166
'groupcompress', True)
1167
for record in stream:
1168
if record.storage_kind == 'absent':
1172
raise errors.NoSuchRevision(self, record.key)
1173
bytes = record.get_bytes_as('fulltext')
1174
chk_inv = inventory.CHKInventory.deserialise(None, bytes,
1176
key = chk_inv.id_to_entry.key()
1177
if key not in id_roots_set:
1178
self._chk_id_roots.append(key)
1179
id_roots_set.add(key)
1180
p_id_map = chk_inv.parent_id_basename_to_file_id
1181
if p_id_map is None:
1182
raise AssertionError('Parent id -> file_id map not set')
1183
key = p_id_map.key()
1184
if key not in p_id_roots_set:
1185
p_id_roots_set.add(key)
1186
self._chk_p_id_roots.append(key)
1188
# We have finished processing all of the inventory records, we
1189
# don't need these sets anymore
1190
id_roots_set.clear()
1191
p_id_roots_set.clear()
1192
return ('inventories', _filtered_inv_stream())
1194
def _get_filtered_chk_streams(self, excluded_revision_keys):
1195
self._text_keys = set()
1196
excluded_revision_keys.discard(_mod_revision.NULL_REVISION)
1197
if not excluded_revision_keys:
1198
uninteresting_root_keys = set()
1199
uninteresting_pid_root_keys = set()
1201
# filter out any excluded revisions whose inventories are not
1203
# TODO: Update Repository.iter_inventories() to add
1204
# ignore_missing=True
1205
present_keys = self.from_repository._find_present_inventory_keys(
1206
excluded_revision_keys)
1207
present_ids = [k[-1] for k in present_keys]
1208
uninteresting_root_keys = set()
1209
uninteresting_pid_root_keys = set()
1210
for inv in self.from_repository.iter_inventories(present_ids):
1211
uninteresting_root_keys.add(inv.id_to_entry.key())
1212
uninteresting_pid_root_keys.add(
1213
inv.parent_id_basename_to_file_id.key())
1214
chk_bytes = self.from_repository.chk_bytes
1215
def _filter_id_to_entry():
1216
interesting_nodes = chk_map.iter_interesting_nodes(chk_bytes,
1217
self._chk_id_roots, uninteresting_root_keys)
1218
for record in _filter_text_keys(interesting_nodes, self._text_keys,
1219
chk_map._bytes_to_text_key):
1220
if record is not None:
1223
self._chk_id_roots = None
1224
yield 'chk_bytes', _filter_id_to_entry()
1225
def _get_parent_id_basename_to_file_id_pages():
1226
for record, items in chk_map.iter_interesting_nodes(chk_bytes,
1227
self._chk_p_id_roots, uninteresting_pid_root_keys):
1228
if record is not None:
1231
self._chk_p_id_roots = None
1232
yield 'chk_bytes', _get_parent_id_basename_to_file_id_pages()
1234
def get_stream(self, search):
1235
def wrap_and_count(pb, rc, stream):
1236
"""Yield records from stream while showing progress."""
1238
for record in stream:
1239
if count == rc.STEP:
1241
pb.update('Estimate', rc.current, rc.max)
1246
revision_ids = search.get_keys()
1247
pb = ui.ui_factory.nested_progress_bar()
1248
rc = self._record_counter
1249
self._record_counter.setup(len(revision_ids))
1250
for stream_info in self._fetch_revision_texts(revision_ids):
1251
yield (stream_info[0],
1252
wrap_and_count(pb, rc, stream_info[1]))
1253
self._revision_keys = [(rev_id,) for rev_id in revision_ids]
1254
self.from_repository.revisions.clear_cache()
1255
self.from_repository.signatures.clear_cache()
1256
s = self._get_inventory_stream(self._revision_keys)
1257
yield (s[0], wrap_and_count(pb, rc, s[1]))
1258
self.from_repository.inventories.clear_cache()
1259
# TODO: The keys to exclude might be part of the search recipe
1260
# For now, exclude all parents that are at the edge of ancestry, for
1261
# which we have inventories
1262
from_repo = self.from_repository
1263
parent_keys = from_repo._find_parent_keys_of_revisions(
1264
self._revision_keys)
1265
for stream_info in self._get_filtered_chk_streams(parent_keys):
1266
yield (stream_info[0], wrap_and_count(pb, rc, stream_info[1]))
1267
self.from_repository.chk_bytes.clear_cache()
1268
s = self._get_text_stream()
1269
yield (s[0], wrap_and_count(pb, rc, s[1]))
1270
self.from_repository.texts.clear_cache()
1271
pb.update('Done', rc.max, rc.max)
1274
def get_stream_for_missing_keys(self, missing_keys):
1275
# missing keys can only occur when we are byte copying and not
1276
# translating (because translation means we don't send
1277
# unreconstructable deltas ever).
1278
missing_inventory_keys = set()
1279
for key in missing_keys:
1280
if key[0] != 'inventories':
1281
raise AssertionError('The only missing keys we should'
1282
' be filling in are inventory keys, not %s'
1284
missing_inventory_keys.add(key[1:])
1285
if self._chk_id_roots or self._chk_p_id_roots:
1286
raise AssertionError('Cannot call get_stream_for_missing_keys'
1287
' until all of get_stream() has been consumed.')
1288
# Yield the inventory stream, so we can find the chk stream
1289
# Some of the missing_keys will be missing because they are ghosts.
1290
# As such, we can ignore them. The Sink is required to verify there are
1291
# no unavailable texts when the ghost inventories are not filled in.
1292
yield self._get_inventory_stream(missing_inventory_keys,
1294
# We use the empty set for excluded_revision_keys, to make it clear
1295
# that we want to transmit all referenced chk pages.
1296
for stream_info in self._get_filtered_chk_streams(set()):
1300
class _InterestingKeyInfo(object):
1302
self.interesting_root_keys = set()
1303
self.interesting_pid_root_keys = set()
1304
self.uninteresting_root_keys = set()
1305
self.uninteresting_pid_root_keys = set()
1307
def all_interesting(self):
1308
return self.interesting_root_keys.union(self.interesting_pid_root_keys)
1310
def all_uninteresting(self):
1311
return self.uninteresting_root_keys.union(
1312
self.uninteresting_pid_root_keys)
1315
return self.all_interesting().union(self.all_uninteresting())
1318
def _build_interesting_key_sets(repo, inventory_ids, parent_only_inv_ids):
1319
result = _InterestingKeyInfo()
1320
for inv in repo.iter_inventories(inventory_ids, 'unordered'):
1321
root_key = inv.id_to_entry.key()
1322
pid_root_key = inv.parent_id_basename_to_file_id.key()
1323
if inv.revision_id in parent_only_inv_ids:
1324
result.uninteresting_root_keys.add(root_key)
1325
result.uninteresting_pid_root_keys.add(pid_root_key)
1327
result.interesting_root_keys.add(root_key)
1328
result.interesting_pid_root_keys.add(pid_root_key)
1332
def _filter_text_keys(interesting_nodes_iterable, text_keys, bytes_to_text_key):
1333
"""Iterate the result of iter_interesting_nodes, yielding the records
1334
and adding to text_keys.
1336
text_keys_update = text_keys.update
1337
for record, items in interesting_nodes_iterable:
1338
text_keys_update([bytes_to_text_key(b) for n,b in items])
1344
class RepositoryFormatCHK1(RepositoryFormatPack):
1345
"""A hashed CHK+group compress pack repository."""
1347
repository_class = CHKInventoryRepository
1348
supports_external_lookups = True
1349
supports_chks = True
1350
# For right now, setting this to True gives us InterModel1And2 rather
1351
# than InterDifferingSerializer
1352
_commit_builder_class = PackRootCommitBuilder
1353
rich_root_data = True
1354
_serializer = chk_serializer.chk_serializer_255_bigpage
1355
_commit_inv_deltas = True
1356
# What index classes to use
1357
index_builder_class = BTreeBuilder
1358
index_class = BTreeGraphIndex
1359
# Note: We cannot unpack a delta that references a text we haven't
1360
# seen yet. There are 2 options, work in fulltexts, or require
1361
# topological sorting. Using fulltexts is more optimal for local
1362
# operations, because the source can be smart about extracting
1363
# multiple in-a-row (and sharing strings). Topological is better
1364
# for remote, because we access less data.
1365
_fetch_order = 'unordered'
1366
_fetch_uses_deltas = False # essentially ignored by the groupcompress code.
1368
pack_compresses = True
1370
def _get_matching_bzrdir(self):
1371
return bzrdir.format_registry.make_bzrdir('development6-rich-root')
1373
def _ignore_setting_bzrdir(self, format):
1376
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
1378
def get_format_string(self):
1379
"""See RepositoryFormat.get_format_string()."""
1380
return ('Bazaar development format - group compression and chk inventory'
1381
' (needs bzr.dev from 1.14)\n')
1383
def get_format_description(self):
1384
"""See RepositoryFormat.get_format_description()."""
1385
return ("Development repository format - rich roots, group compression"
1386
" and chk inventories")
1389
class RepositoryFormatCHK2(RepositoryFormatCHK1):
1390
"""A CHK repository that uses the bencode revision serializer."""
1392
_serializer = chk_serializer.chk_bencode_serializer
1394
def _get_matching_bzrdir(self):
1395
return bzrdir.format_registry.make_bzrdir('development7-rich-root')
1397
def _ignore_setting_bzrdir(self, format):
1400
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
1402
def get_format_string(self):
1403
"""See RepositoryFormat.get_format_string()."""
1404
return ('Bazaar development format - chk repository with bencode '
1405
'revision serialization (needs bzr.dev from 1.16)\n')
1408
class RepositoryFormat2a(RepositoryFormatCHK2):
1409
"""A CHK repository that uses the bencode revision serializer.
1411
This is the same as RepositoryFormatCHK2 but with a public name.
1414
_serializer = chk_serializer.chk_bencode_serializer
1416
def _get_matching_bzrdir(self):
1417
return bzrdir.format_registry.make_bzrdir('2a')
1419
def _ignore_setting_bzrdir(self, format):
1422
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
1424
def get_format_string(self):
1425
return ('Bazaar repository format 2a (needs bzr 1.16 or later)\n')
1427
def get_format_description(self):
1428
"""See RepositoryFormat.get_format_description()."""
1429
return ("Repository format 2a - rich roots, group compression"
1430
" and chk inventories")
511
# Note: We cannot unpack a delta that references a text we haven't seen yet.
512
# There are 2 options, work in fulltexts, or require topological
513
# sorting. Using fulltexts is more optimal for local operations,
514
# because the source can be smart about extracting multiple
515
# in-a-row (and sharing strings). Topological is better for
516
# remote, because we access less data.
517
self._fetch_order = 'unordered'
518
self._fetch_gc_optimal = True
519
self._fetch_uses_deltas = False
523
class GCCHKPackRepository(CHKInventoryRepository):
524
"""GC customisation of CHKInventoryRepository."""
526
def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
528
"""Overridden to change pack collection class."""
529
KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
530
_commit_builder_class, _serializer)
531
# and now replace everything it did :)
532
index_transport = self._transport.clone('indices')
533
self._pack_collection = GCRepositoryPackCollection(self,
534
self._transport, index_transport,
535
self._transport.clone('upload'),
536
self._transport.clone('packs'),
537
_format.index_builder_class,
539
use_chk_index=self._format.supports_chks,
541
self.inventories = GroupCompressVersionedFiles(
542
_GCGraphIndex(self._pack_collection.inventory_index.combined_index,
543
add_callback=self._pack_collection.inventory_index.add_callback,
544
parents=True, is_locked=self.is_locked),
545
access=self._pack_collection.inventory_index.data_access)
546
self.revisions = GroupCompressVersionedFiles(
547
_GCGraphIndex(self._pack_collection.revision_index.combined_index,
548
add_callback=self._pack_collection.revision_index.add_callback,
549
parents=True, is_locked=self.is_locked),
550
access=self._pack_collection.revision_index.data_access,
552
self.signatures = GroupCompressVersionedFiles(
553
_GCGraphIndex(self._pack_collection.signature_index.combined_index,
554
add_callback=self._pack_collection.signature_index.add_callback,
555
parents=False, is_locked=self.is_locked),
556
access=self._pack_collection.signature_index.data_access,
558
self.texts = GroupCompressVersionedFiles(
559
_GCGraphIndex(self._pack_collection.text_index.combined_index,
560
add_callback=self._pack_collection.text_index.add_callback,
561
parents=True, is_locked=self.is_locked),
562
access=self._pack_collection.text_index.data_access)
563
assert _format.supports_chks
564
# No parents, individual CHK pages don't have specific ancestry
565
self.chk_bytes = GroupCompressVersionedFiles(
566
_GCGraphIndex(self._pack_collection.chk_index.combined_index,
567
add_callback=self._pack_collection.chk_index.add_callback,
568
parents=False, is_locked=self.is_locked),
569
access=self._pack_collection.chk_index.data_access)
570
# True when the repository object is 'write locked' (as opposed to the
571
# physical lock only taken out around changes to the pack-names list.)
572
# Another way to represent this would be a decorator around the control
573
# files object that presents logical locks as physical ones - if this
574
# gets ugly consider that alternative design. RBC 20071011
575
self._write_lock_count = 0
576
self._transaction = None
578
self._reconcile_does_inventory_gc = True
579
self._reconcile_fixes_text_parents = True
580
self._reconcile_backsup_inventory = False
581
# Note: We cannot unpack a delta that references a text we haven't
582
# seen yet. There are 2 options, work in fulltexts, or require
583
# topological sorting. Using fulltexts is more optimal for local
584
# operations, because the source can be smart about extracting
585
# multiple in-a-row (and sharing strings). Topological is better
586
# for remote, because we access less data.
587
self._fetch_order = 'unordered'
588
self._fetch_gc_optimal = True
589
self._fetch_uses_deltas = False
592
class RepositoryFormatPackGCPlain(RepositoryFormatKnitPack6):
593
"""A B+Tree index using pack repository."""
595
repository_class = GCPackRepository
596
rich_root_data = False
598
def get_format_string(self):
599
"""See RepositoryFormat.get_format_string()."""
600
return ("Bazaar development format - btree+gc "
601
"(needs bzr.dev from 1.13)\n")
603
def get_format_description(self):
604
"""See RepositoryFormat.get_format_description()."""
605
return ("Development repository format - btree+groupcompress "
606
", interoperates with pack-0.92\n")
610
class RepositoryFormatPackGCCHK16(RepositoryFormatPackDevelopment5Hash16):
611
"""A hashed CHK+group compress pack repository."""
613
repository_class = GCCHKPackRepository
614
rich_root_data = True
616
def get_format_string(self):
617
"""See RepositoryFormat.get_format_string()."""
618
return ('Bazaar development format - hash16chk+gc rich-root'
619
' (needs bzr.dev from 1.13)\n')
621
def get_format_description(self):
622
"""See RepositoryFormat.get_format_description()."""
623
return ("Development repository format - hash16chk+groupcompress")
626
class RepositoryFormatPackGCCHK255(RepositoryFormatPackDevelopment5Hash255):
627
"""A hashed CHK+group compress pack repository."""
629
repository_class = GCCHKPackRepository
630
rich_root_data = True
632
def get_format_string(self):
633
"""See RepositoryFormat.get_format_string()."""
634
return ('Bazaar development format - hash255chk+gc rich-root'
635
' (needs bzr.dev from 1.13)\n')
637
def get_format_description(self):
638
"""See RepositoryFormat.get_format_description()."""
639
return ("Development repository format - hash255chk+groupcompress")
642
def pack_incompatible(source, target, orig_method=InterPackRepo.is_compatible):
643
"""Be incompatible with the regular fetch code."""
644
formats = (RepositoryFormatPackGC,)
646
formats = formats + (RepositoryFormatPackGCCHK16,
647
RepositoryFormatPackGCCHK255)
648
if isinstance(source._format, formats) or isinstance(target._format, formats):
651
return orig_method(source, target)
654
InterPackRepo.is_compatible = staticmethod(pack_incompatible)