352
356
"""Build a VersionedFiles instance on top of this group of packs."""
353
357
index_name = index_name + '_index'
354
358
index_to_pack = {}
355
access = knit._DirectPackAccess(index_to_pack,
356
reload_func=self._reload_func)
359
access = _DirectPackAccess(index_to_pack,
360
reload_func=self._reload_func)
359
363
if self.new_pack is None:
421
425
inventory_keys = source_vf.keys()
422
426
missing_inventories = set(self.revision_keys).difference(inventory_keys)
423
427
if missing_inventories:
424
missing_inventories = sorted(missing_inventories)
425
raise ValueError('We are missing inventories for revisions: %s'
426
% (missing_inventories,))
428
# Go back to the original repo, to see if these are really missing
429
# https://bugs.launchpad.net/bzr/+bug/437003
430
# If we are packing a subset of the repo, it is fine to just have
431
# the data in another Pack file, which is not included in this pack
433
inv_index = self._pack_collection.repo.inventories._index
434
pmap = inv_index.get_parent_map(missing_inventories)
435
really_missing = missing_inventories.difference(pmap)
437
missing_inventories = sorted(really_missing)
438
raise ValueError('We are missing inventories for revisions: %s'
439
% (missing_inventories,))
427
440
self._copy_stream(source_vf, target_vf, inventory_keys,
428
441
'inventories', self._get_filtered_inv_stream, 2)
594
607
def __init__(self, *args, **kwargs):
595
608
super(GCCHKCanonicalizingPacker, self).__init__(*args, **kwargs)
596
609
self._data_changed = False
598
611
def _exhaust_stream(self, source_vf, keys, message, vf_to_stream, pb_offset):
599
612
"""Create and exhaust a stream, but don't insert it.
601
614
This is useful to get the side-effects of generating a stream.
603
616
self.pb.update('scanning %s' % (message,), pb_offset)
779
797
% (sorted(missing_text_keys),))
782
def _execute_pack_operations(self, pack_operations,
783
_packer_class=GCCHKPacker,
785
"""Execute a series of pack operations.
787
:param pack_operations: A list of [revision_count, packs_to_combine].
788
:param _packer_class: The class of packer to use (default: Packer).
791
# XXX: Copied across from RepositoryPackCollection simply because we
792
# want to override the _packer_class ... :(
793
for revision_count, packs in pack_operations:
794
# we may have no-ops from the setup logic
797
packer = GCCHKPacker(self, packs, '.autopack',
798
reload_func=reload_func)
800
result = packer.pack()
801
except errors.RetryWithNewPacks:
802
# An exception is propagating out of this context, make sure
803
# this packer has cleaned up. Packer() doesn't set its new_pack
804
# state into the RepositoryPackCollection object, so we only
805
# have access to it directly here.
806
if packer.new_pack is not None:
807
packer.new_pack.abort()
812
self._remove_pack_from_memory(pack)
813
# record the newly available packs and stop advertising the old
816
for _, packs in pack_operations:
817
to_be_obsoleted.extend(packs)
818
result = self._save_pack_names(clear_obsolete_packs=True,
819
obsolete_packs=to_be_obsoleted)
823
class CHKInventoryRepository(KnitPackRepository):
824
"""subclass of KnitPackRepository that uses CHK based inventories."""
801
class CHKInventoryRepository(PackRepository):
802
"""subclass of PackRepository that uses CHK based inventories."""
826
804
def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
828
806
"""Overridden to change pack collection class."""
829
KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
830
_commit_builder_class, _serializer)
831
# and now replace everything it did :)
807
super(CHKInventoryRepository, self).__init__(_format, a_bzrdir,
808
control_files, _commit_builder_class, _serializer)
832
809
index_transport = self._transport.clone('indices')
833
810
self._pack_collection = GCRepositoryPackCollection(self,
834
811
self._transport, index_transport,
1006
983
if record.storage_kind != 'absent':
1007
984
texts[record.key] = record.get_bytes_as('fulltext')
1009
raise errors.NoSuchRevision(self, record.key)
986
texts[record.key] = None
1010
987
for key in keys:
1011
yield inventory.CHKInventory.deserialise(self.chk_bytes, texts[key], key)
990
yield (None, key[-1])
992
yield (inventory.CHKInventory.deserialise(
993
self.chk_bytes, bytes, key), key[-1])
1013
def _iter_inventory_xmls(self, revision_ids, ordering):
995
def _get_inventory_xml(self, revision_id):
996
"""Get serialized inventory as a string."""
1014
997
# Without a native 'xml' inventory, this method doesn't make sense.
1015
998
# However older working trees, and older bundles want it - so we supply
1016
999
# it allowing _get_inventory_xml to work. Bundles currently use the
1017
1000
# serializer directly; this also isn't ideal, but there isn't an xml
1018
# iteration interface offered at all for repositories. We could make
1019
# _iter_inventory_xmls be part of the contract, even if kept private.
1020
inv_to_str = self._serializer.write_inventory_to_string
1021
for inv in self.iter_inventories(revision_ids, ordering=ordering):
1022
yield inv_to_str(inv), inv.revision_id
1001
# iteration interface offered at all for repositories.
1002
return self._serializer.write_inventory_to_string(
1003
self.get_inventory(revision_id))
1024
1005
def _find_present_inventory_keys(self, revision_keys):
1025
1006
parent_map = self.inventories.get_parent_map(revision_keys)
1137
1118
return GroupCHKStreamSource(self, to_format)
1138
1119
return super(CHKInventoryRepository, self)._get_source(to_format)
1141
class GroupCHKStreamSource(KnitPackStreamSource):
1121
def _find_inconsistent_revision_parents(self, revisions_iterator=None):
1122
"""Find revisions with different parent lists in the revision object
1123
and in the index graph.
1125
:param revisions_iterator: None, or an iterator of (revid,
1126
Revision-or-None). This iterator controls the revisions checked.
1127
:returns: an iterator yielding tuples of (revison-id, parents-in-index,
1128
parents-in-revision).
1130
if not self.is_locked():
1131
raise AssertionError()
1133
if revisions_iterator is None:
1134
revisions_iterator = self._iter_revisions(None)
1135
for revid, revision in revisions_iterator:
1136
if revision is None:
1138
parent_map = vf.get_parent_map([(revid,)])
1139
parents_according_to_index = tuple(parent[-1] for parent in
1140
parent_map[(revid,)])
1141
parents_according_to_revision = tuple(revision.parent_ids)
1142
if parents_according_to_index != parents_according_to_revision:
1143
yield (revid, parents_according_to_index,
1144
parents_according_to_revision)
1146
def _check_for_inconsistent_revision_parents(self):
1147
inconsistencies = list(self._find_inconsistent_revision_parents())
1149
raise errors.BzrCheckError(
1150
"Revision index has inconsistent parents.")
1153
class GroupCHKStreamSource(StreamSource):
1142
1154
"""Used when both the source and target repo are GroupCHK repos."""
1144
1156
def __init__(self, from_repository, to_format):
1231
1243
self._chk_p_id_roots = None
1232
1244
yield 'chk_bytes', _get_parent_id_basename_to_file_id_pages()
1246
def _get_text_stream(self):
1247
# Note: We know we don't have to handle adding root keys, because both
1248
# the source and target are the identical network name.
1249
text_stream = self.from_repository.texts.get_record_stream(
1250
self._text_keys, self._text_fetch_order, False)
1251
return ('texts', text_stream)
1234
1253
def get_stream(self, search):
1235
1254
def wrap_and_count(pb, rc, stream):
1236
1255
"""Yield records from stream while showing progress."""
1251
1270
yield (stream_info[0],
1252
1271
wrap_and_count(pb, rc, stream_info[1]))
1253
1272
self._revision_keys = [(rev_id,) for rev_id in revision_ids]
1254
self.from_repository.revisions.clear_cache()
1255
self.from_repository.signatures.clear_cache()
1256
s = self._get_inventory_stream(self._revision_keys)
1257
yield (s[0], wrap_and_count(pb, rc, s[1]))
1258
self.from_repository.inventories.clear_cache()
1259
1273
# TODO: The keys to exclude might be part of the search recipe
1260
1274
# For now, exclude all parents that are at the edge of ancestry, for
1261
1275
# which we have inventories
1262
1276
from_repo = self.from_repository
1263
1277
parent_keys = from_repo._find_parent_keys_of_revisions(
1264
1278
self._revision_keys)
1279
self.from_repository.revisions.clear_cache()
1280
self.from_repository.signatures.clear_cache()
1281
# Clear the repo's get_parent_map cache too.
1282
self.from_repository._unstacked_provider.disable_cache()
1283
self.from_repository._unstacked_provider.enable_cache()
1284
s = self._get_inventory_stream(self._revision_keys)
1285
yield (s[0], wrap_and_count(pb, rc, s[1]))
1286
self.from_repository.inventories.clear_cache()
1265
1287
for stream_info in self._get_filtered_chk_streams(parent_keys):
1266
1288
yield (stream_info[0], wrap_and_count(pb, rc, stream_info[1]))
1267
1289
self.from_repository.chk_bytes.clear_cache()
1344
class RepositoryFormatCHK1(RepositoryFormatPack):
1345
"""A hashed CHK+group compress pack repository."""
1364
class RepositoryFormat2a(RepositoryFormatPack):
1365
"""A CHK repository that uses the bencode revision serializer."""
1347
1367
repository_class = CHKInventoryRepository
1348
1368
supports_external_lookups = True
1349
1369
supports_chks = True
1350
# For right now, setting this to True gives us InterModel1And2 rather
1351
# than InterDifferingSerializer
1352
1370
_commit_builder_class = PackRootCommitBuilder
1353
1371
rich_root_data = True
1354
_serializer = chk_serializer.chk_serializer_255_bigpage
1372
_serializer = chk_serializer.chk_bencode_serializer
1355
1373
_commit_inv_deltas = True
1356
1374
# What index classes to use
1357
1375
index_builder_class = BTreeBuilder
1368
1386
pack_compresses = True
1370
1388
def _get_matching_bzrdir(self):
1371
return bzrdir.format_registry.make_bzrdir('development6-rich-root')
1373
def _ignore_setting_bzrdir(self, format):
1376
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
1378
def get_format_string(self):
1379
"""See RepositoryFormat.get_format_string()."""
1380
return ('Bazaar development format - group compression and chk inventory'
1381
' (needs bzr.dev from 1.14)\n')
1383
def get_format_description(self):
1384
"""See RepositoryFormat.get_format_description()."""
1385
return ("Development repository format - rich roots, group compression"
1386
" and chk inventories")
1389
class RepositoryFormatCHK2(RepositoryFormatCHK1):
1390
"""A CHK repository that uses the bencode revision serializer."""
1392
_serializer = chk_serializer.chk_bencode_serializer
1394
def _get_matching_bzrdir(self):
1395
return bzrdir.format_registry.make_bzrdir('development7-rich-root')
1397
def _ignore_setting_bzrdir(self, format):
1400
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
1402
def get_format_string(self):
1403
"""See RepositoryFormat.get_format_string()."""
1404
return ('Bazaar development format - chk repository with bencode '
1405
'revision serialization (needs bzr.dev from 1.16)\n')
1408
class RepositoryFormat2a(RepositoryFormatCHK2):
1409
"""A CHK repository that uses the bencode revision serializer.
1411
This is the same as RepositoryFormatCHK2 but with a public name.
1414
_serializer = chk_serializer.chk_bencode_serializer
1416
def _get_matching_bzrdir(self):
1417
return bzrdir.format_registry.make_bzrdir('2a')
1419
def _ignore_setting_bzrdir(self, format):
1422
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
1424
def get_format_string(self):
1389
return controldir.format_registry.make_bzrdir('2a')
1391
def _ignore_setting_bzrdir(self, format):
1394
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
1397
def get_format_string(cls):
1425
1398
return ('Bazaar repository format 2a (needs bzr 1.16 or later)\n')
1427
1400
def get_format_description(self):
1438
1411
def _get_matching_bzrdir(self):
1439
return bzrdir.format_registry.make_bzrdir('development-subtree')
1412
return controldir.format_registry.make_bzrdir('development-subtree')
1441
1414
def _ignore_setting_bzrdir(self, format):
1444
1417
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
1446
def get_format_string(self):
1420
def get_format_string(cls):
1447
1421
return ('Bazaar development format 8\n')
1449
1423
def get_format_description(self):