186
156
self._writer.begin()
187
157
# what state is the pack in? (open, finished, aborted)
188
158
self._state = 'open'
191
RepositoryPackCollection.pack_factory = NewPack
159
# no name until we finish writing the content
162
def _check_references(self):
163
"""Make sure our external references are present.
165
Packs are allowed to have deltas whose base is not in the pack, but it
166
must be present somewhere in this collection. It is not allowed to
167
have deltas based on a fallback repository.
168
(See <https://bugs.launchpad.net/bzr/+bug/288751>)
170
# Groupcompress packs don't have any external references, arguably CHK
171
# pages have external references, but we cannot 'cheaply' determine
172
# them without actually walking all of the chk pages.
175
class ResumedGCPack(ResumedPack):
177
def _check_references(self):
178
"""Make sure our external compression parents are present."""
179
# See GCPack._check_references for why this is empty
181
def _get_external_refs(self, index):
182
# GC repositories don't have compression parents external to a given
187
class GCCHKPacker(Packer):
188
"""This class understand what it takes to collect a GCCHK repo."""
190
def __init__(self, pack_collection, packs, suffix, revision_ids=None,
192
super(GCCHKPacker, self).__init__(pack_collection, packs, suffix,
193
revision_ids=revision_ids,
194
reload_func=reload_func)
195
self._pack_collection = pack_collection
196
# ATM, We only support this for GCCHK repositories
197
if pack_collection.chk_index is None:
198
raise AssertionError('pack_collection.chk_index should not be None')
199
self._gather_text_refs = False
200
self._chk_id_roots = []
201
self._chk_p_id_roots = []
202
self._text_refs = None
203
# set by .pack() if self.revision_ids is not None
204
self.revision_keys = None
206
def _get_progress_stream(self, source_vf, keys, message, pb):
208
substream = source_vf.get_record_stream(keys, 'groupcompress', True)
209
for idx, record in enumerate(substream):
211
pb.update(message, idx + 1, len(keys))
215
def _get_filtered_inv_stream(self, source_vf, keys, message, pb=None):
216
"""Filter the texts of inventories, to find the chk pages."""
217
total_keys = len(keys)
218
def _filtered_inv_stream():
220
p_id_roots_set = set()
221
stream = source_vf.get_record_stream(keys, 'groupcompress', True)
222
for idx, record in enumerate(stream):
223
# Inventories should always be with revisions; assume success.
224
bytes = record.get_bytes_as('fulltext')
225
chk_inv = inventory.CHKInventory.deserialise(None, bytes,
228
pb.update('inv', idx, total_keys)
229
key = chk_inv.id_to_entry.key()
230
if key not in id_roots_set:
231
self._chk_id_roots.append(key)
232
id_roots_set.add(key)
233
p_id_map = chk_inv.parent_id_basename_to_file_id
235
raise AssertionError('Parent id -> file_id map not set')
237
if key not in p_id_roots_set:
238
p_id_roots_set.add(key)
239
self._chk_p_id_roots.append(key)
241
# We have finished processing all of the inventory records, we
242
# don't need these sets anymore
244
p_id_roots_set.clear()
245
return _filtered_inv_stream()
247
def _get_chk_streams(self, source_vf, keys, pb=None):
248
# We want to stream the keys from 'id_roots', and things they
249
# reference, and then stream things from p_id_roots and things they
250
# reference, and then any remaining keys that we didn't get to.
252
# We also group referenced texts together, so if one root references a
253
# text with prefix 'a', and another root references a node with prefix
254
# 'a', we want to yield those nodes before we yield the nodes for 'b'
255
# This keeps 'similar' nodes together.
257
# Note: We probably actually want multiple streams here, to help the
258
# client understand that the different levels won't compress well
259
# against each other.
260
# Test the difference between using one Group per level, and
261
# using 1 Group per prefix. (so '' (root) would get a group, then
262
# all the references to search-key 'a' would get a group, etc.)
263
total_keys = len(keys)
264
remaining_keys = set(keys)
266
if self._gather_text_refs:
267
self._text_refs = set()
268
def _get_referenced_stream(root_keys, parse_leaf_nodes=False):
271
keys_by_search_prefix = {}
272
remaining_keys.difference_update(cur_keys)
274
def handle_internal_node(node):
275
for prefix, value in node._items.iteritems():
276
# We don't want to request the same key twice, and we
277
# want to order it by the first time it is seen.
278
# Even further, we don't want to request a key which is
279
# not in this group of pack files (it should be in the
280
# repo, but it doesn't have to be in the group being
282
# TODO: consider how to treat externally referenced chk
283
# pages as 'external_references' so that we
284
# always fill them in for stacked branches
285
if value not in next_keys and value in remaining_keys:
286
keys_by_search_prefix.setdefault(prefix,
289
def handle_leaf_node(node):
290
# Store is None, because we know we have a LeafNode, and we
291
# just want its entries
292
for file_id, bytes in node.iteritems(None):
293
self._text_refs.add(chk_map._bytes_to_text_key(bytes))
295
stream = source_vf.get_record_stream(cur_keys,
296
'as-requested', True)
297
for record in stream:
298
if record.storage_kind == 'absent':
299
# An absent CHK record: we assume that the missing
300
# record is in a different pack - e.g. a page not
301
# altered by the commit we're packing.
303
bytes = record.get_bytes_as('fulltext')
304
# We don't care about search_key_func for this code,
305
# because we only care about external references.
306
node = chk_map._deserialise(bytes, record.key,
307
search_key_func=None)
308
common_base = node._search_prefix
309
if isinstance(node, chk_map.InternalNode):
310
handle_internal_node(node)
311
elif parse_leaf_nodes:
312
handle_leaf_node(node)
315
pb.update('chk node', counter[0], total_keys)
318
# Double check that we won't be emitting any keys twice
319
# If we get rid of the pre-calculation of all keys, we could
320
# turn this around and do
321
# next_keys.difference_update(seen_keys)
322
# However, we also may have references to chk pages in another
323
# pack file during autopack. We filter earlier, so we should no
324
# longer need to do this
325
# next_keys = next_keys.intersection(remaining_keys)
327
for prefix in sorted(keys_by_search_prefix):
328
cur_keys.extend(keys_by_search_prefix.pop(prefix))
329
for stream in _get_referenced_stream(self._chk_id_roots,
330
self._gather_text_refs):
332
del self._chk_id_roots
333
# while it isn't really possible for chk_id_roots to not be in the
334
# local group of packs, it is possible that the tree shape has not
335
# changed recently, so we need to filter _chk_p_id_roots by the
337
chk_p_id_roots = [key for key in self._chk_p_id_roots
338
if key in remaining_keys]
339
del self._chk_p_id_roots
340
for stream in _get_referenced_stream(chk_p_id_roots, False):
343
trace.mutter('There were %d keys in the chk index, %d of which'
344
' were not referenced', total_keys,
346
if self.revision_ids is None:
347
stream = source_vf.get_record_stream(remaining_keys,
351
def _build_vf(self, index_name, parents, delta, for_write=False):
352
"""Build a VersionedFiles instance on top of this group of packs."""
353
index_name = index_name + '_index'
355
access = knit._DirectPackAccess(index_to_pack,
356
reload_func=self._reload_func)
359
if self.new_pack is None:
360
raise AssertionError('No new pack has been set')
361
index = getattr(self.new_pack, index_name)
362
index_to_pack[index] = self.new_pack.access_tuple()
363
index.set_optimize(for_size=True)
364
access.set_writer(self.new_pack._writer, index,
365
self.new_pack.access_tuple())
366
add_callback = index.add_nodes
369
for pack in self.packs:
370
sub_index = getattr(pack, index_name)
371
index_to_pack[sub_index] = pack.access_tuple()
372
indices.append(sub_index)
373
index = _mod_index.CombinedGraphIndex(indices)
375
vf = GroupCompressVersionedFiles(
377
add_callback=add_callback,
379
is_locked=self._pack_collection.repo.is_locked),
384
def _build_vfs(self, index_name, parents, delta):
385
"""Build the source and target VersionedFiles."""
386
source_vf = self._build_vf(index_name, parents,
387
delta, for_write=False)
388
target_vf = self._build_vf(index_name, parents,
389
delta, for_write=True)
390
return source_vf, target_vf
392
def _copy_stream(self, source_vf, target_vf, keys, message, vf_to_stream,
394
trace.mutter('repacking %d %s', len(keys), message)
395
self.pb.update('repacking %s' % (message,), pb_offset)
396
child_pb = ui.ui_factory.nested_progress_bar()
398
stream = vf_to_stream(source_vf, keys, message, child_pb)
399
for _ in target_vf._insert_record_stream(stream,
406
def _copy_revision_texts(self):
407
source_vf, target_vf = self._build_vfs('revision', True, False)
408
if not self.revision_keys:
409
# We are doing a full fetch, aka 'pack'
410
self.revision_keys = source_vf.keys()
411
self._copy_stream(source_vf, target_vf, self.revision_keys,
412
'revisions', self._get_progress_stream, 1)
414
def _copy_inventory_texts(self):
415
source_vf, target_vf = self._build_vfs('inventory', True, True)
416
# It is not sufficient to just use self.revision_keys, as stacked
417
# repositories can have more inventories than they have revisions.
418
# One alternative would be to do something with
419
# get_parent_map(self.revision_keys), but that shouldn't be any faster
421
inventory_keys = source_vf.keys()
422
missing_inventories = set(self.revision_keys).difference(inventory_keys)
423
if missing_inventories:
424
missing_inventories = sorted(missing_inventories)
425
raise ValueError('We are missing inventories for revisions: %s'
426
% (missing_inventories,))
427
self._copy_stream(source_vf, target_vf, inventory_keys,
428
'inventories', self._get_filtered_inv_stream, 2)
430
def _get_chk_vfs_for_copy(self):
431
return self._build_vfs('chk', False, False)
433
def _copy_chk_texts(self):
434
source_vf, target_vf = self._get_chk_vfs_for_copy()
435
# TODO: This is technically spurious... if it is a performance issue,
437
total_keys = source_vf.keys()
438
trace.mutter('repacking chk: %d id_to_entry roots,'
439
' %d p_id_map roots, %d total keys',
440
len(self._chk_id_roots), len(self._chk_p_id_roots),
442
self.pb.update('repacking chk', 3)
443
child_pb = ui.ui_factory.nested_progress_bar()
445
for stream in self._get_chk_streams(source_vf, total_keys,
447
for _ in target_vf._insert_record_stream(stream,
454
def _copy_text_texts(self):
455
source_vf, target_vf = self._build_vfs('text', True, True)
456
# XXX: We don't walk the chk map to determine referenced (file_id,
457
# revision_id) keys. We don't do it yet because you really need
458
# to filter out the ones that are present in the parents of the
459
# rev just before the ones you are copying, otherwise the filter
460
# is grabbing too many keys...
461
text_keys = source_vf.keys()
462
self._copy_stream(source_vf, target_vf, text_keys,
463
'texts', self._get_progress_stream, 4)
465
def _copy_signature_texts(self):
466
source_vf, target_vf = self._build_vfs('signature', False, False)
467
signature_keys = source_vf.keys()
468
signature_keys.intersection(self.revision_keys)
469
self._copy_stream(source_vf, target_vf, signature_keys,
470
'signatures', self._get_progress_stream, 5)
472
def _create_pack_from_packs(self):
473
self.pb.update('repacking', 0, 7)
474
self.new_pack = self.open_pack()
475
# Is this necessary for GC ?
476
self.new_pack.set_write_cache_size(1024*1024)
477
self._copy_revision_texts()
478
self._copy_inventory_texts()
479
self._copy_chk_texts()
480
self._copy_text_texts()
481
self._copy_signature_texts()
482
self.new_pack._check_references()
483
if not self._use_pack(self.new_pack):
484
self.new_pack.abort()
486
self.new_pack.finish_content()
487
if len(self.packs) == 1:
488
old_pack = self.packs[0]
489
if old_pack.name == self.new_pack._hash.hexdigest():
490
# The single old pack was already optimally packed.
491
trace.mutter('single pack %s was already optimally packed',
493
self.new_pack.abort()
495
self.pb.update('finishing repack', 6, 7)
496
self.new_pack.finish()
497
self._pack_collection.allocate(self.new_pack)
501
class GCCHKReconcilePacker(GCCHKPacker):
502
"""A packer which regenerates indices etc as it copies.
504
This is used by ``bzr reconcile`` to cause parent text pointers to be
508
def __init__(self, *args, **kwargs):
509
super(GCCHKReconcilePacker, self).__init__(*args, **kwargs)
510
self._data_changed = False
511
self._gather_text_refs = True
513
def _copy_inventory_texts(self):
514
source_vf, target_vf = self._build_vfs('inventory', True, True)
515
self._copy_stream(source_vf, target_vf, self.revision_keys,
516
'inventories', self._get_filtered_inv_stream, 2)
517
if source_vf.keys() != self.revision_keys:
518
self._data_changed = True
520
def _copy_text_texts(self):
521
"""generate what texts we should have and then copy."""
522
source_vf, target_vf = self._build_vfs('text', True, True)
523
trace.mutter('repacking %d texts', len(self._text_refs))
524
self.pb.update("repacking texts", 4)
525
# we have three major tasks here:
526
# 1) generate the ideal index
527
repo = self._pack_collection.repo
528
# We want the one we just wrote, so base it on self.new_pack
529
revision_vf = self._build_vf('revision', True, False, for_write=True)
530
ancestor_keys = revision_vf.get_parent_map(revision_vf.keys())
531
# Strip keys back into revision_ids.
532
ancestors = dict((k[0], tuple([p[0] for p in parents]))
533
for k, parents in ancestor_keys.iteritems())
535
# TODO: _generate_text_key_index should be much cheaper to generate from
536
# a chk repository, rather than the current implementation
537
ideal_index = repo._generate_text_key_index(None, ancestors)
538
file_id_parent_map = source_vf.get_parent_map(self._text_refs)
539
# 2) generate a keys list that contains all the entries that can
540
# be used as-is, with corrected parents.
542
new_parent_keys = {} # (key, parent_keys)
544
NULL_REVISION = _mod_revision.NULL_REVISION
545
for key in self._text_refs:
551
ideal_parents = tuple(ideal_index[key])
553
discarded_keys.append(key)
554
self._data_changed = True
556
if ideal_parents == (NULL_REVISION,):
558
source_parents = file_id_parent_map[key]
559
if ideal_parents == source_parents:
563
# We need to change the parent graph, but we don't need to
564
# re-insert the text (since we don't pun the compression
565
# parent with the parents list)
566
self._data_changed = True
567
new_parent_keys[key] = ideal_parents
568
# we're finished with some data.
570
del file_id_parent_map
571
# 3) bulk copy the data, updating records than need it
572
def _update_parents_for_texts():
573
stream = source_vf.get_record_stream(self._text_refs,
574
'groupcompress', False)
575
for record in stream:
576
if record.key in new_parent_keys:
577
record.parents = new_parent_keys[record.key]
579
target_vf.insert_record_stream(_update_parents_for_texts())
581
def _use_pack(self, new_pack):
582
"""Override _use_pack to check for reconcile having changed content."""
583
return new_pack.data_inserted() and self._data_changed
586
class GCCHKCanonicalizingPacker(GCCHKPacker):
587
"""A packer that ensures inventories have canonical-form CHK maps.
589
Ideally this would be part of reconcile, but it's very slow and rarely
590
needed. (It repairs repositories affected by
591
https://bugs.launchpad.net/bzr/+bug/522637).
594
def __init__(self, *args, **kwargs):
595
super(GCCHKCanonicalizingPacker, self).__init__(*args, **kwargs)
596
self._data_changed = False
598
def _exhaust_stream(self, source_vf, keys, message, vf_to_stream, pb_offset):
599
"""Create and exhaust a stream, but don't insert it.
601
This is useful to get the side-effects of generating a stream.
603
self.pb.update('scanning %s' % (message,), pb_offset)
604
child_pb = ui.ui_factory.nested_progress_bar()
606
list(vf_to_stream(source_vf, keys, message, child_pb))
610
def _copy_inventory_texts(self):
611
source_vf, target_vf = self._build_vfs('inventory', True, True)
612
source_chk_vf, target_chk_vf = self._get_chk_vfs_for_copy()
613
inventory_keys = source_vf.keys()
614
# First, copy the existing CHKs on the assumption that most of them
615
# will be correct. This will save us from having to reinsert (and
616
# recompress) these records later at the cost of perhaps preserving a
618
# (Iterate but don't insert _get_filtered_inv_stream to populate the
619
# variables needed by GCCHKPacker._copy_chk_texts.)
620
self._exhaust_stream(source_vf, inventory_keys, 'inventories',
621
self._get_filtered_inv_stream, 2)
622
GCCHKPacker._copy_chk_texts(self)
623
# Now copy and fix the inventories, and any regenerated CHKs.
624
def chk_canonicalizing_inv_stream(source_vf, keys, message, pb=None):
625
return self._get_filtered_canonicalizing_inv_stream(
626
source_vf, keys, message, pb, source_chk_vf, target_chk_vf)
627
self._copy_stream(source_vf, target_vf, inventory_keys,
628
'inventories', chk_canonicalizing_inv_stream, 4)
630
def _copy_chk_texts(self):
631
# No-op; in this class this happens during _copy_inventory_texts.
634
def _get_filtered_canonicalizing_inv_stream(self, source_vf, keys, message,
635
pb=None, source_chk_vf=None, target_chk_vf=None):
636
"""Filter the texts of inventories, regenerating CHKs to make sure they
639
total_keys = len(keys)
640
target_chk_vf = versionedfile.NoDupeAddLinesDecorator(target_chk_vf)
641
def _filtered_inv_stream():
642
stream = source_vf.get_record_stream(keys, 'groupcompress', True)
643
search_key_name = None
644
for idx, record in enumerate(stream):
645
# Inventories should always be with revisions; assume success.
646
bytes = record.get_bytes_as('fulltext')
647
chk_inv = inventory.CHKInventory.deserialise(
648
source_chk_vf, bytes, record.key)
650
pb.update('inv', idx, total_keys)
651
chk_inv.id_to_entry._ensure_root()
652
if search_key_name is None:
653
# Find the name corresponding to the search_key_func
654
search_key_reg = chk_map.search_key_registry
655
for search_key_name, func in search_key_reg.iteritems():
656
if func == chk_inv.id_to_entry._search_key_func:
658
canonical_inv = inventory.CHKInventory.from_inventory(
659
target_chk_vf, chk_inv,
660
maximum_size=chk_inv.id_to_entry._root_node._maximum_size,
661
search_key_name=search_key_name)
662
if chk_inv.id_to_entry.key() != canonical_inv.id_to_entry.key():
664
'Non-canonical CHK map for id_to_entry of inv: %s '
665
'(root is %s, should be %s)' % (chk_inv.revision_id,
666
chk_inv.id_to_entry.key()[0],
667
canonical_inv.id_to_entry.key()[0]))
668
self._data_changed = True
669
p_id_map = chk_inv.parent_id_basename_to_file_id
670
p_id_map._ensure_root()
671
canon_p_id_map = canonical_inv.parent_id_basename_to_file_id
672
if p_id_map.key() != canon_p_id_map.key():
674
'Non-canonical CHK map for parent_id_to_basename of '
675
'inv: %s (root is %s, should be %s)'
676
% (chk_inv.revision_id, p_id_map.key()[0],
677
canon_p_id_map.key()[0]))
678
self._data_changed = True
679
yield versionedfile.ChunkedContentFactory(record.key,
680
record.parents, record.sha1,
681
canonical_inv.to_lines())
682
# We have finished processing all of the inventory records, we
683
# don't need these sets anymore
684
return _filtered_inv_stream()
686
def _use_pack(self, new_pack):
687
"""Override _use_pack to check for reconcile having changed content."""
688
return new_pack.data_inserted() and self._data_changed
193
691
class GCRepositoryPackCollection(RepositoryPackCollection):
195
693
pack_factory = GCPack
197
def _make_index(self, name, suffix):
198
"""Overridden to use BTreeGraphIndex objects."""
199
size_offset = self._suffix_offsets[suffix]
200
index_name = name + suffix
201
index_size = self._names[name][size_offset]
202
return BTreeGraphIndex(
203
self._index_transport, index_name, index_size)
205
def _start_write_group(self):
206
# Overridden to add 'self.pack_factory()'
207
# Do not permit preparation for writing if we're not in a 'write lock'.
208
if not self.repo.is_write_locked():
209
raise errors.NotWriteLocked(self)
210
self._new_pack = self.pack_factory(self, upload_suffix='.pack',
211
file_mode=self.repo.bzrdir._get_file_mode())
212
# allow writing: queue writes to a new index
213
self.revision_index.add_writable_index(self._new_pack.revision_index,
215
self.inventory_index.add_writable_index(self._new_pack.inventory_index,
217
self.text_index.add_writable_index(self._new_pack.text_index,
219
self.signature_index.add_writable_index(self._new_pack.signature_index,
221
if chk_support and self.chk_index is not None:
222
self.chk_index.add_writable_index(self._new_pack.chk_index,
224
self.repo.chk_bytes._index._add_callback = self.chk_index.add_callback
226
self.repo.inventories._index._add_callback = self.inventory_index.add_callback
227
self.repo.revisions._index._add_callback = self.revision_index.add_callback
228
self.repo.signatures._index._add_callback = self.signature_index.add_callback
229
self.repo.texts._index._add_callback = self.text_index.add_callback
233
class GCPackRepository(KnitPackRepository):
234
"""GC customisation of KnitPackRepository."""
694
resumed_pack_factory = ResumedGCPack
696
def _check_new_inventories(self):
697
"""Detect missing inventories or chk root entries for the new revisions
700
:returns: list of strs, summarising any problems found. If the list is
701
empty no problems were found.
703
# Ensure that all revisions added in this write group have:
704
# - corresponding inventories,
705
# - chk root entries for those inventories,
706
# - and any present parent inventories have their chk root
708
# And all this should be independent of any fallback repository.
710
key_deps = self.repo.revisions._index._key_dependencies
711
new_revisions_keys = key_deps.get_new_keys()
712
no_fallback_inv_index = self.repo.inventories._index
713
no_fallback_chk_bytes_index = self.repo.chk_bytes._index
714
no_fallback_texts_index = self.repo.texts._index
715
inv_parent_map = no_fallback_inv_index.get_parent_map(
717
# Are any inventories for corresponding to the new revisions missing?
718
corresponding_invs = set(inv_parent_map)
719
missing_corresponding = set(new_revisions_keys)
720
missing_corresponding.difference_update(corresponding_invs)
721
if missing_corresponding:
722
problems.append("inventories missing for revisions %s" %
723
(sorted(missing_corresponding),))
725
# Are any chk root entries missing for any inventories? This includes
726
# any present parent inventories, which may be used when calculating
727
# deltas for streaming.
728
all_inv_keys = set(corresponding_invs)
729
for parent_inv_keys in inv_parent_map.itervalues():
730
all_inv_keys.update(parent_inv_keys)
731
# Filter out ghost parents.
732
all_inv_keys.intersection_update(
733
no_fallback_inv_index.get_parent_map(all_inv_keys))
734
parent_invs_only_keys = all_inv_keys.symmetric_difference(
737
inv_ids = [key[-1] for key in all_inv_keys]
738
parent_invs_only_ids = [key[-1] for key in parent_invs_only_keys]
739
root_key_info = _build_interesting_key_sets(
740
self.repo, inv_ids, parent_invs_only_ids)
741
expected_chk_roots = root_key_info.all_keys()
742
present_chk_roots = no_fallback_chk_bytes_index.get_parent_map(
744
missing_chk_roots = expected_chk_roots.difference(present_chk_roots)
745
if missing_chk_roots:
746
problems.append("missing referenced chk root keys: %s"
747
% (sorted(missing_chk_roots),))
748
# Don't bother checking any further.
750
# Find all interesting chk_bytes records, and make sure they are
751
# present, as well as the text keys they reference.
752
chk_bytes_no_fallbacks = self.repo.chk_bytes.without_fallbacks()
753
chk_bytes_no_fallbacks._search_key_func = \
754
self.repo.chk_bytes._search_key_func
755
chk_diff = chk_map.iter_interesting_nodes(
756
chk_bytes_no_fallbacks, root_key_info.interesting_root_keys,
757
root_key_info.uninteresting_root_keys)
760
for record in _filter_text_keys(chk_diff, text_keys,
761
chk_map._bytes_to_text_key):
763
except errors.NoSuchRevision, e:
764
# XXX: It would be nice if we could give a more precise error here.
765
problems.append("missing chk node(s) for id_to_entry maps")
766
chk_diff = chk_map.iter_interesting_nodes(
767
chk_bytes_no_fallbacks, root_key_info.interesting_pid_root_keys,
768
root_key_info.uninteresting_pid_root_keys)
770
for interesting_rec, interesting_map in chk_diff:
772
except errors.NoSuchRevision, e:
774
"missing chk node(s) for parent_id_basename_to_file_id maps")
775
present_text_keys = no_fallback_texts_index.get_parent_map(text_keys)
776
missing_text_keys = text_keys.difference(present_text_keys)
777
if missing_text_keys:
778
problems.append("missing text keys: %r"
779
% (sorted(missing_text_keys),))
782
def _execute_pack_operations(self, pack_operations,
783
_packer_class=GCCHKPacker,
785
"""Execute a series of pack operations.
787
:param pack_operations: A list of [revision_count, packs_to_combine].
788
:param _packer_class: The class of packer to use (default: Packer).
791
# XXX: Copied across from RepositoryPackCollection simply because we
792
# want to override the _packer_class ... :(
793
for revision_count, packs in pack_operations:
794
# we may have no-ops from the setup logic
797
packer = GCCHKPacker(self, packs, '.autopack',
798
reload_func=reload_func)
800
result = packer.pack()
801
except errors.RetryWithNewPacks:
802
# An exception is propagating out of this context, make sure
803
# this packer has cleaned up. Packer() doesn't set its new_pack
804
# state into the RepositoryPackCollection object, so we only
805
# have access to it directly here.
806
if packer.new_pack is not None:
807
packer.new_pack.abort()
812
self._remove_pack_from_memory(pack)
813
# record the newly available packs and stop advertising the old
816
for _, packs in pack_operations:
817
to_be_obsoleted.extend(packs)
818
result = self._save_pack_names(clear_obsolete_packs=True,
819
obsolete_packs=to_be_obsoleted)
823
class CHKInventoryRepository(KnitPackRepository):
824
"""subclass of KnitPackRepository that uses CHK based inventories."""
236
826
def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
301
886
self._reconcile_fixes_text_parents = True
302
887
self._reconcile_backsup_inventory = False
306
class GCCHKPackRepository(CHKInventoryRepository):
307
"""GC customisation of CHKInventoryRepository."""
309
def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
311
"""Overridden to change pack collection class."""
312
KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
313
_commit_builder_class, _serializer)
314
# and now replace everything it did :)
315
index_transport = self._transport.clone('indices')
317
self._pack_collection = GCRepositoryPackCollection(self,
318
self._transport, index_transport,
319
self._transport.clone('upload'),
320
self._transport.clone('packs'),
321
_format.index_builder_class,
323
use_chk_index=self._format.supports_chks,
326
self._pack_collection = GCRepositoryPackCollection(self,
327
self._transport, index_transport,
328
self._transport.clone('upload'),
329
self._transport.clone('packs'),
330
_format.index_builder_class,
332
self.inventories = GroupCompressVersionedFiles(
333
_GCGraphIndex(self._pack_collection.inventory_index.combined_index,
334
add_callback=self._pack_collection.inventory_index.add_callback,
335
parents=True, is_locked=self.is_locked),
336
access=self._pack_collection.inventory_index.data_access)
337
self.revisions = GroupCompressVersionedFiles(
338
_GCGraphIndex(self._pack_collection.revision_index.combined_index,
339
add_callback=self._pack_collection.revision_index.add_callback,
340
parents=True, is_locked=self.is_locked),
341
access=self._pack_collection.revision_index.data_access,
343
self.signatures = GroupCompressVersionedFiles(
344
_GCGraphIndex(self._pack_collection.signature_index.combined_index,
345
add_callback=self._pack_collection.signature_index.add_callback,
346
parents=False, is_locked=self.is_locked),
347
access=self._pack_collection.signature_index.data_access,
349
self.texts = GroupCompressVersionedFiles(
350
_GCGraphIndex(self._pack_collection.text_index.combined_index,
351
add_callback=self._pack_collection.text_index.add_callback,
352
parents=True, is_locked=self.is_locked),
353
access=self._pack_collection.text_index.data_access)
354
if chk_support and _format.supports_chks:
355
# No graph, no compression:- references from chks are between
356
# different objects not temporal versions of the same; and without
357
# some sort of temporal structure knit compression will just fail.
358
self.chk_bytes = GroupCompressVersionedFiles(
359
_GCGraphIndex(self._pack_collection.chk_index.combined_index,
360
add_callback=self._pack_collection.chk_index.add_callback,
361
parents=False, is_locked=self.is_locked),
362
access=self._pack_collection.chk_index.data_access)
364
self.chk_bytes = None
365
# True when the repository object is 'write locked' (as opposed to the
366
# physical lock only taken out around changes to the pack-names list.)
367
# Another way to represent this would be a decorator around the control
368
# files object that presents logical locks as physical ones - if this
369
# gets ugly consider that alternative design. RBC 20071011
370
self._write_lock_count = 0
371
self._transaction = None
373
self._reconcile_does_inventory_gc = True
374
self._reconcile_fixes_text_parents = True
375
self._reconcile_backsup_inventory = False
378
class RepositoryFormatPackGCPlain(RepositoryFormatPackDevelopment2):
379
"""A B+Tree index using pack repository."""
381
repository_class = GCPackRepository
383
def get_format_string(self):
384
"""See RepositoryFormat.get_format_string()."""
385
return ("Bazaar development format - btree+gc "
386
"(needs bzr.dev from 1.6)\n")
388
def get_format_description(self):
389
"""See RepositoryFormat.get_format_description()."""
390
return ("Development repository format - btree+groupcompress "
391
", interoperates with pack-0.92\n")
394
class RepositoryFormatPackGCRichRoot(RepositoryFormatKnitPack4):
395
"""A B+Tree index using pack repository."""
397
repository_class = GCPackRepository
399
def get_format_string(self):
400
"""See RepositoryFormat.get_format_string()."""
401
return ("Bazaar development format - btree+gc-rich-root "
402
"(needs bzr.dev from 1.6)\n")
404
def get_format_description(self):
405
"""See RepositoryFormat.get_format_description()."""
406
return ("Development repository format - btree+groupcompress "
407
", interoperates with rich-root-pack\n")
410
class RepositoryFormatPackGCSubtrees(RepositoryFormatPackDevelopment2Subtree):
411
"""A B+Tree index using pack repository."""
413
repository_class = GCPackRepository
415
def get_format_string(self):
416
"""See RepositoryFormat.get_format_string()."""
417
return ("Bazaar development format - btree+gc-subtrees "
418
"(needs bzr.dev from 1.6)\n")
420
def get_format_description(self):
421
"""See RepositoryFormat.get_format_description()."""
422
return ("Development repository format - btree+groupcompress "
423
", interoperates with pack-0.92-subtrees\n")
426
'Bazaar development format - 1.9+gc (needs bzr.dev from 1.9)\n',
427
class RepositoryFormatPackGCPlainCHK(RepositoryFormatPackDevelopment4):
428
"""A CHK+group compress pack repository."""
430
repository_class = GCCHKPackRepository
432
def get_format_string(self):
433
"""See RepositoryFormat.get_format_string()."""
434
return ('Bazaar development format - chk+gc '
435
'(needs bzr.dev from 1.12)\n')
437
def get_format_description(self):
438
"""See RepositoryFormat.get_format_description()."""
439
return ("Development repository format - chk+groupcompress "
440
", interoperates with pack-0.92\n")
446
def pack_incompatible(source, target, orig_method=InterPackRepo.is_compatible):
447
"""Be incompatible with the regular fetch code."""
448
formats = (RepositoryFormatPackGCPlain, RepositoryFormatPackGCRichRoot,
449
RepositoryFormatPackGCSubtrees)
451
formats = formats = (RepositoryFormatPackGCPlain,)
452
if isinstance(source._format, formats) or isinstance(target._format, formats):
455
return orig_method(source, target)
458
InterPackRepo.is_compatible = staticmethod(pack_incompatible)
889
def _add_inventory_checked(self, revision_id, inv, parents):
890
"""Add inv to the repository after checking the inputs.
892
This function can be overridden to allow different inventory styles.
894
:seealso: add_inventory, for the contract.
897
serializer = self._format._serializer
898
result = inventory.CHKInventory.from_inventory(self.chk_bytes, inv,
899
maximum_size=serializer.maximum_size,
900
search_key_name=serializer.search_key_name)
901
inv_lines = result.to_lines()
902
return self._inventory_add_lines(revision_id, parents,
903
inv_lines, check_content=False)
905
def _create_inv_from_null(self, delta, revision_id):
906
"""This will mutate new_inv directly.
908
This is a simplified form of create_by_apply_delta which knows that all
909
the old values must be None, so everything is a create.
911
serializer = self._format._serializer
912
new_inv = inventory.CHKInventory(serializer.search_key_name)
913
new_inv.revision_id = revision_id
914
entry_to_bytes = new_inv._entry_to_bytes
915
id_to_entry_dict = {}
916
parent_id_basename_dict = {}
917
for old_path, new_path, file_id, entry in delta:
918
if old_path is not None:
919
raise ValueError('Invalid delta, somebody tried to delete %r'
920
' from the NULL_REVISION'
921
% ((old_path, file_id),))
923
raise ValueError('Invalid delta, delta from NULL_REVISION has'
924
' no new_path %r' % (file_id,))
926
new_inv.root_id = file_id
927
parent_id_basename_key = StaticTuple('', '').intern()
929
utf8_entry_name = entry.name.encode('utf-8')
930
parent_id_basename_key = StaticTuple(entry.parent_id,
931
utf8_entry_name).intern()
932
new_value = entry_to_bytes(entry)
934
# new_inv._path_to_fileid_cache[new_path] = file_id
935
key = StaticTuple(file_id).intern()
936
id_to_entry_dict[key] = new_value
937
parent_id_basename_dict[parent_id_basename_key] = file_id
939
new_inv._populate_from_dicts(self.chk_bytes, id_to_entry_dict,
940
parent_id_basename_dict, maximum_size=serializer.maximum_size)
943
def add_inventory_by_delta(self, basis_revision_id, delta, new_revision_id,
944
parents, basis_inv=None, propagate_caches=False):
945
"""Add a new inventory expressed as a delta against another revision.
947
:param basis_revision_id: The inventory id the delta was created
949
:param delta: The inventory delta (see Inventory.apply_delta for
951
:param new_revision_id: The revision id that the inventory is being
953
:param parents: The revision ids of the parents that revision_id is
954
known to have and are in the repository already. These are supplied
955
for repositories that depend on the inventory graph for revision
956
graph access, as well as for those that pun ancestry with delta
958
:param basis_inv: The basis inventory if it is already known,
960
:param propagate_caches: If True, the caches for this inventory are
961
copied to and updated for the result if possible.
963
:returns: (validator, new_inv)
964
The validator(which is a sha1 digest, though what is sha'd is
965
repository format specific) of the serialized inventory, and the
968
if not self.is_in_write_group():
969
raise AssertionError("%r not in write group" % (self,))
970
_mod_revision.check_not_reserved_id(new_revision_id)
972
if basis_inv is None:
973
if basis_revision_id == _mod_revision.NULL_REVISION:
974
new_inv = self._create_inv_from_null(delta, new_revision_id)
975
if new_inv.root_id is None:
976
raise errors.RootMissing()
977
inv_lines = new_inv.to_lines()
978
return self._inventory_add_lines(new_revision_id, parents,
979
inv_lines, check_content=False), new_inv
981
basis_tree = self.revision_tree(basis_revision_id)
982
basis_tree.lock_read()
983
basis_inv = basis_tree.inventory
985
result = basis_inv.create_by_apply_delta(delta, new_revision_id,
986
propagate_caches=propagate_caches)
987
inv_lines = result.to_lines()
988
return self._inventory_add_lines(new_revision_id, parents,
989
inv_lines, check_content=False), result
991
if basis_tree is not None:
994
def _deserialise_inventory(self, revision_id, bytes):
995
return inventory.CHKInventory.deserialise(self.chk_bytes, bytes,
998
def _iter_inventories(self, revision_ids, ordering):
999
"""Iterate over many inventory objects."""
1000
if ordering is None:
1001
ordering = 'unordered'
1002
keys = [(revision_id,) for revision_id in revision_ids]
1003
stream = self.inventories.get_record_stream(keys, ordering, True)
1005
for record in stream:
1006
if record.storage_kind != 'absent':
1007
texts[record.key] = record.get_bytes_as('fulltext')
1009
raise errors.NoSuchRevision(self, record.key)
1011
yield inventory.CHKInventory.deserialise(self.chk_bytes, texts[key], key)
1013
def _iter_inventory_xmls(self, revision_ids, ordering):
1014
# Without a native 'xml' inventory, this method doesn't make sense.
1015
# However older working trees, and older bundles want it - so we supply
1016
# it allowing _get_inventory_xml to work. Bundles currently use the
1017
# serializer directly; this also isn't ideal, but there isn't an xml
1018
# iteration interface offered at all for repositories. We could make
1019
# _iter_inventory_xmls be part of the contract, even if kept private.
1020
inv_to_str = self._serializer.write_inventory_to_string
1021
for inv in self.iter_inventories(revision_ids, ordering=ordering):
1022
yield inv_to_str(inv), inv.revision_id
1024
def _find_present_inventory_keys(self, revision_keys):
1025
parent_map = self.inventories.get_parent_map(revision_keys)
1026
present_inventory_keys = set(k for k in parent_map)
1027
return present_inventory_keys
1029
def fileids_altered_by_revision_ids(self, revision_ids, _inv_weave=None):
1030
"""Find the file ids and versions affected by revisions.
1032
:param revisions: an iterable containing revision ids.
1033
:param _inv_weave: The inventory weave from this repository or None.
1034
If None, the inventory weave will be opened automatically.
1035
:return: a dictionary mapping altered file-ids to an iterable of
1036
revision_ids. Each altered file-ids has the exact revision_ids that
1037
altered it listed explicitly.
1039
rich_root = self.supports_rich_root()
1040
bytes_to_info = inventory.CHKInventory._bytes_to_utf8name_key
1041
file_id_revisions = {}
1042
pb = ui.ui_factory.nested_progress_bar()
1044
revision_keys = [(r,) for r in revision_ids]
1045
parent_keys = self._find_parent_keys_of_revisions(revision_keys)
1046
# TODO: instead of using _find_present_inventory_keys, change the
1047
# code paths to allow missing inventories to be tolerated.
1048
# However, we only want to tolerate missing parent
1049
# inventories, not missing inventories for revision_ids
1050
present_parent_inv_keys = self._find_present_inventory_keys(
1052
present_parent_inv_ids = set(
1053
[k[-1] for k in present_parent_inv_keys])
1054
inventories_to_read = set(revision_ids)
1055
inventories_to_read.update(present_parent_inv_ids)
1056
root_key_info = _build_interesting_key_sets(
1057
self, inventories_to_read, present_parent_inv_ids)
1058
interesting_root_keys = root_key_info.interesting_root_keys
1059
uninteresting_root_keys = root_key_info.uninteresting_root_keys
1060
chk_bytes = self.chk_bytes
1061
for record, items in chk_map.iter_interesting_nodes(chk_bytes,
1062
interesting_root_keys, uninteresting_root_keys,
1064
for name, bytes in items:
1065
(name_utf8, file_id, revision_id) = bytes_to_info(bytes)
1066
# TODO: consider interning file_id, revision_id here, or
1067
# pushing that intern() into bytes_to_info()
1068
# TODO: rich_root should always be True here, for all
1069
# repositories that support chk_bytes
1070
if not rich_root and name_utf8 == '':
1073
file_id_revisions[file_id].add(revision_id)
1075
file_id_revisions[file_id] = set([revision_id])
1078
return file_id_revisions
1080
def find_text_key_references(self):
1081
"""Find the text key references within the repository.
1083
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
1084
to whether they were referred to by the inventory of the
1085
revision_id that they contain. The inventory texts from all present
1086
revision ids are assessed to generate this report.
1088
# XXX: Slow version but correct: rewrite as a series of delta
1089
# examinations/direct tree traversal. Note that that will require care
1090
# as a common node is reachable both from the inventory that added it,
1091
# and others afterwards.
1092
revision_keys = self.revisions.keys()
1094
rich_roots = self.supports_rich_root()
1095
pb = ui.ui_factory.nested_progress_bar()
1097
all_revs = self.all_revision_ids()
1098
total = len(all_revs)
1099
for pos, inv in enumerate(self.iter_inventories(all_revs)):
1100
pb.update("Finding text references", pos, total)
1101
for _, entry in inv.iter_entries():
1102
if not rich_roots and entry.file_id == inv.root_id:
1104
key = (entry.file_id, entry.revision)
1105
result.setdefault(key, False)
1106
if entry.revision == inv.revision_id:
1113
def reconcile_canonicalize_chks(self):
1114
"""Reconcile this repository to make sure all CHKs are in canonical
1117
from bzrlib.reconcile import PackReconciler
1118
reconciler = PackReconciler(self, thorough=True, canonicalize_chks=True)
1119
reconciler.reconcile()
1122
def _reconcile_pack(self, collection, packs, extension, revs, pb):
1123
packer = GCCHKReconcilePacker(collection, packs, extension)
1124
return packer.pack(pb)
1126
def _canonicalize_chks_pack(self, collection, packs, extension, revs, pb):
1127
packer = GCCHKCanonicalizingPacker(collection, packs, extension, revs)
1128
return packer.pack(pb)
1130
def _get_source(self, to_format):
1131
"""Return a source for streaming from this repository."""
1132
if self._format._serializer == to_format._serializer:
1133
# We must be exactly the same format, otherwise stuff like the chk
1134
# page layout might be different.
1135
# Actually, this test is just slightly looser than exact so that
1136
# CHK2 <-> 2a transfers will work.
1137
return GroupCHKStreamSource(self, to_format)
1138
return super(CHKInventoryRepository, self)._get_source(to_format)
1141
class GroupCHKStreamSource(KnitPackStreamSource):
1142
"""Used when both the source and target repo are GroupCHK repos."""
1144
def __init__(self, from_repository, to_format):
1145
"""Create a StreamSource streaming from from_repository."""
1146
super(GroupCHKStreamSource, self).__init__(from_repository, to_format)
1147
self._revision_keys = None
1148
self._text_keys = None
1149
self._text_fetch_order = 'groupcompress'
1150
self._chk_id_roots = None
1151
self._chk_p_id_roots = None
1153
def _get_inventory_stream(self, inventory_keys, allow_absent=False):
1154
"""Get a stream of inventory texts.
1156
When this function returns, self._chk_id_roots and self._chk_p_id_roots
1157
should be populated.
1159
self._chk_id_roots = []
1160
self._chk_p_id_roots = []
1161
def _filtered_inv_stream():
1162
id_roots_set = set()
1163
p_id_roots_set = set()
1164
source_vf = self.from_repository.inventories
1165
stream = source_vf.get_record_stream(inventory_keys,
1166
'groupcompress', True)
1167
for record in stream:
1168
if record.storage_kind == 'absent':
1172
raise errors.NoSuchRevision(self, record.key)
1173
bytes = record.get_bytes_as('fulltext')
1174
chk_inv = inventory.CHKInventory.deserialise(None, bytes,
1176
key = chk_inv.id_to_entry.key()
1177
if key not in id_roots_set:
1178
self._chk_id_roots.append(key)
1179
id_roots_set.add(key)
1180
p_id_map = chk_inv.parent_id_basename_to_file_id
1181
if p_id_map is None:
1182
raise AssertionError('Parent id -> file_id map not set')
1183
key = p_id_map.key()
1184
if key not in p_id_roots_set:
1185
p_id_roots_set.add(key)
1186
self._chk_p_id_roots.append(key)
1188
# We have finished processing all of the inventory records, we
1189
# don't need these sets anymore
1190
id_roots_set.clear()
1191
p_id_roots_set.clear()
1192
return ('inventories', _filtered_inv_stream())
1194
def _get_filtered_chk_streams(self, excluded_revision_keys):
1195
self._text_keys = set()
1196
excluded_revision_keys.discard(_mod_revision.NULL_REVISION)
1197
if not excluded_revision_keys:
1198
uninteresting_root_keys = set()
1199
uninteresting_pid_root_keys = set()
1201
# filter out any excluded revisions whose inventories are not
1203
# TODO: Update Repository.iter_inventories() to add
1204
# ignore_missing=True
1205
present_keys = self.from_repository._find_present_inventory_keys(
1206
excluded_revision_keys)
1207
present_ids = [k[-1] for k in present_keys]
1208
uninteresting_root_keys = set()
1209
uninteresting_pid_root_keys = set()
1210
for inv in self.from_repository.iter_inventories(present_ids):
1211
uninteresting_root_keys.add(inv.id_to_entry.key())
1212
uninteresting_pid_root_keys.add(
1213
inv.parent_id_basename_to_file_id.key())
1214
chk_bytes = self.from_repository.chk_bytes
1215
def _filter_id_to_entry():
1216
interesting_nodes = chk_map.iter_interesting_nodes(chk_bytes,
1217
self._chk_id_roots, uninteresting_root_keys)
1218
for record in _filter_text_keys(interesting_nodes, self._text_keys,
1219
chk_map._bytes_to_text_key):
1220
if record is not None:
1223
self._chk_id_roots = None
1224
yield 'chk_bytes', _filter_id_to_entry()
1225
def _get_parent_id_basename_to_file_id_pages():
1226
for record, items in chk_map.iter_interesting_nodes(chk_bytes,
1227
self._chk_p_id_roots, uninteresting_pid_root_keys):
1228
if record is not None:
1231
self._chk_p_id_roots = None
1232
yield 'chk_bytes', _get_parent_id_basename_to_file_id_pages()
1234
def get_stream(self, search):
1235
def wrap_and_count(pb, rc, stream):
1236
"""Yield records from stream while showing progress."""
1238
for record in stream:
1239
if count == rc.STEP:
1241
pb.update('Estimate', rc.current, rc.max)
1246
revision_ids = search.get_keys()
1247
pb = ui.ui_factory.nested_progress_bar()
1248
rc = self._record_counter
1249
self._record_counter.setup(len(revision_ids))
1250
for stream_info in self._fetch_revision_texts(revision_ids):
1251
yield (stream_info[0],
1252
wrap_and_count(pb, rc, stream_info[1]))
1253
self._revision_keys = [(rev_id,) for rev_id in revision_ids]
1254
self.from_repository.revisions.clear_cache()
1255
self.from_repository.signatures.clear_cache()
1256
s = self._get_inventory_stream(self._revision_keys)
1257
yield (s[0], wrap_and_count(pb, rc, s[1]))
1258
self.from_repository.inventories.clear_cache()
1259
# TODO: The keys to exclude might be part of the search recipe
1260
# For now, exclude all parents that are at the edge of ancestry, for
1261
# which we have inventories
1262
from_repo = self.from_repository
1263
parent_keys = from_repo._find_parent_keys_of_revisions(
1264
self._revision_keys)
1265
for stream_info in self._get_filtered_chk_streams(parent_keys):
1266
yield (stream_info[0], wrap_and_count(pb, rc, stream_info[1]))
1267
self.from_repository.chk_bytes.clear_cache()
1268
s = self._get_text_stream()
1269
yield (s[0], wrap_and_count(pb, rc, s[1]))
1270
self.from_repository.texts.clear_cache()
1271
pb.update('Done', rc.max, rc.max)
1274
def get_stream_for_missing_keys(self, missing_keys):
1275
# missing keys can only occur when we are byte copying and not
1276
# translating (because translation means we don't send
1277
# unreconstructable deltas ever).
1278
missing_inventory_keys = set()
1279
for key in missing_keys:
1280
if key[0] != 'inventories':
1281
raise AssertionError('The only missing keys we should'
1282
' be filling in are inventory keys, not %s'
1284
missing_inventory_keys.add(key[1:])
1285
if self._chk_id_roots or self._chk_p_id_roots:
1286
raise AssertionError('Cannot call get_stream_for_missing_keys'
1287
' until all of get_stream() has been consumed.')
1288
# Yield the inventory stream, so we can find the chk stream
1289
# Some of the missing_keys will be missing because they are ghosts.
1290
# As such, we can ignore them. The Sink is required to verify there are
1291
# no unavailable texts when the ghost inventories are not filled in.
1292
yield self._get_inventory_stream(missing_inventory_keys,
1294
# We use the empty set for excluded_revision_keys, to make it clear
1295
# that we want to transmit all referenced chk pages.
1296
for stream_info in self._get_filtered_chk_streams(set()):
1300
class _InterestingKeyInfo(object):
1302
self.interesting_root_keys = set()
1303
self.interesting_pid_root_keys = set()
1304
self.uninteresting_root_keys = set()
1305
self.uninteresting_pid_root_keys = set()
1307
def all_interesting(self):
1308
return self.interesting_root_keys.union(self.interesting_pid_root_keys)
1310
def all_uninteresting(self):
1311
return self.uninteresting_root_keys.union(
1312
self.uninteresting_pid_root_keys)
1315
return self.all_interesting().union(self.all_uninteresting())
1318
def _build_interesting_key_sets(repo, inventory_ids, parent_only_inv_ids):
1319
result = _InterestingKeyInfo()
1320
for inv in repo.iter_inventories(inventory_ids, 'unordered'):
1321
root_key = inv.id_to_entry.key()
1322
pid_root_key = inv.parent_id_basename_to_file_id.key()
1323
if inv.revision_id in parent_only_inv_ids:
1324
result.uninteresting_root_keys.add(root_key)
1325
result.uninteresting_pid_root_keys.add(pid_root_key)
1327
result.interesting_root_keys.add(root_key)
1328
result.interesting_pid_root_keys.add(pid_root_key)
1332
def _filter_text_keys(interesting_nodes_iterable, text_keys, bytes_to_text_key):
1333
"""Iterate the result of iter_interesting_nodes, yielding the records
1334
and adding to text_keys.
1336
text_keys_update = text_keys.update
1337
for record, items in interesting_nodes_iterable:
1338
text_keys_update([bytes_to_text_key(b) for n,b in items])
1342
class RepositoryFormat2a(RepositoryFormatPack):
1343
"""A CHK repository that uses the bencode revision serializer."""
1345
repository_class = CHKInventoryRepository
1346
supports_external_lookups = True
1347
supports_chks = True
1348
_commit_builder_class = PackRootCommitBuilder
1349
rich_root_data = True
1350
_serializer = chk_serializer.chk_bencode_serializer
1351
_commit_inv_deltas = True
1352
# What index classes to use
1353
index_builder_class = BTreeBuilder
1354
index_class = BTreeGraphIndex
1355
# Note: We cannot unpack a delta that references a text we haven't
1356
# seen yet. There are 2 options, work in fulltexts, or require
1357
# topological sorting. Using fulltexts is more optimal for local
1358
# operations, because the source can be smart about extracting
1359
# multiple in-a-row (and sharing strings). Topological is better
1360
# for remote, because we access less data.
1361
_fetch_order = 'unordered'
1362
_fetch_uses_deltas = False # essentially ignored by the groupcompress code.
1364
pack_compresses = True
1366
def _get_matching_bzrdir(self):
1367
return bzrdir.format_registry.make_bzrdir('2a')
1369
def _ignore_setting_bzrdir(self, format):
1372
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
1374
def get_format_string(self):
1375
return ('Bazaar repository format 2a (needs bzr 1.16 or later)\n')
1377
def get_format_description(self):
1378
"""See RepositoryFormat.get_format_description()."""
1379
return ("Repository format 2a - rich roots, group compression"
1380
" and chk inventories")
1383
class RepositoryFormat2aSubtree(RepositoryFormat2a):
1384
"""A 2a repository format that supports nested trees.
1388
def _get_matching_bzrdir(self):
1389
return bzrdir.format_registry.make_bzrdir('development-subtree')
1391
def _ignore_setting_bzrdir(self, format):
1394
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
1396
def get_format_string(self):
1397
return ('Bazaar development format 8\n')
1399
def get_format_description(self):
1400
"""See RepositoryFormat.get_format_description()."""
1401
return ("Development repository format 8 - nested trees, "
1402
"group compression and chk inventories")
1405
supports_tree_reference = True