816
828
which views a particular line of development through that history.
818
830
The Repository builds on top of some byte storage facilies (the revisions,
819
signatures, inventories and texts attributes) and a Transport, which
820
respectively provide byte storage and a means to access the (possibly
831
signatures, inventories, texts and chk_bytes attributes) and a Transport,
832
which respectively provide byte storage and a means to access the (possibly
823
835
The byte storage facilities are addressed via tuples, which we refer to
824
836
as 'keys' throughout the code base. Revision_keys, inventory_keys and
825
837
signature_keys are all 1-tuples: (revision_id,). text_keys are two-tuples:
826
(file_id, revision_id). We use this interface because it allows low
827
friction with the underlying code that implements disk indices, network
828
encoding and other parts of bzrlib.
838
(file_id, revision_id). chk_bytes uses CHK keys - a 1-tuple with a single
839
byte string made up of a hash identifier and a hash value.
840
We use this interface because it allows low friction with the underlying
841
code that implements disk indices, network encoding and other parts of
830
844
:ivar revisions: A bzrlib.versionedfile.VersionedFiles instance containing
831
845
the serialised revisions for the repository. This can be used to obtain
2888
2954
'bzrlib.repofmt.pack_repo',
2889
2955
'RepositoryFormatPackDevelopment2Subtree',
2957
# 1.9->1.110 go below here
2958
format_registry.register_lazy(
2959
# merge-bbc-dev4-to-bzr.dev
2960
"Bazaar development format 5 (needs bzr.dev from before 1.13)\n",
2961
'bzrlib.repofmt.pack_repo',
2962
'RepositoryFormatPackDevelopment5',
2964
format_registry.register_lazy(
2965
# merge-bbc-dev4-to-bzr.dev
2966
("Bazaar development format 5 with subtree support"
2967
" (needs bzr.dev from before 1.13)\n"),
2968
'bzrlib.repofmt.pack_repo',
2969
'RepositoryFormatPackDevelopment5Subtree',
2971
format_registry.register_lazy(
2972
# merge-bbc-dev4-to-bzr.dev
2973
('Bazaar development format 5 hash 16'
2974
' (needs bzr.dev from before 1.13)\n'),
2975
'bzrlib.repofmt.pack_repo',
2976
'RepositoryFormatPackDevelopment5Hash16',
2978
format_registry.register_lazy(
2979
# merge-bbc-dev4-to-bzr.dev
2980
('Bazaar development format 5 hash 255'
2981
' (needs bzr.dev from before 1.13)\n'),
2982
'bzrlib.repofmt.pack_repo',
2983
'RepositoryFormatPackDevelopment5Hash255',
2985
# XXX: This format is scheduled for termination
2986
# format_registry.register_lazy(
2987
# 'Bazaar development format - btree+gc (needs bzr.dev from 1.13)\n',
2988
# 'bzrlib.repofmt.groupcompress_repo',
2989
# 'RepositoryFormatPackGCPlain',
2991
format_registry.register_lazy(
2992
'Bazaar development format - hash16chk+gc rich-root (needs bzr.dev from 1.13)\n',
2993
'bzrlib.repofmt.groupcompress_repo',
2994
'RepositoryFormatPackGCCHK16',
2996
format_registry.register_lazy(
2997
'Bazaar development format - hash255chk+gc rich-root (needs bzr.dev from 1.13)\n',
2998
'bzrlib.repofmt.groupcompress_repo',
2999
'RepositoryFormatPackGCCHK255',
3001
format_registry.register_lazy(
3002
'Bazaar development format - hash255chk+gc rich-root bigpage (needs bzr.dev from 1.13)\n',
3003
'bzrlib.repofmt.groupcompress_repo',
3004
'RepositoryFormatPackGCCHK255Big',
2893
3008
class InterRepository(InterObject):
3426
3555
# Walk though all revisions; get inventory deltas, copy referenced
3427
3556
# texts that delta references, insert the delta, revision and
3558
root_keys_to_create = set()
3429
3559
text_keys = set()
3430
3560
pending_deltas = []
3431
3561
pending_revisions = []
3432
3562
parent_map = self.source.get_parent_map(revision_ids)
3563
# NB: This fails with dubious inventory data (when inv A has rev OLD
3564
# for file F, and in B, after A, has rev A for file F) when A and B are
3565
# in different groups.
3433
3566
for tree in self.source.revision_trees(revision_ids):
3434
3567
current_revision_id = tree.get_revision_id()
3435
3568
parent_ids = parent_map.get(current_revision_id, ())
3436
3569
basis_id, delta = self._get_delta_for_revision(tree, parent_ids,
3437
3570
basis_id, cache)
3571
if self._converting_to_rich_root:
3572
self._revision_id_to_root_id[current_revision_id] = \
3438
3574
# Find text entries that need to be copied
3439
3575
for old_path, new_path, file_id, entry in delta:
3440
3576
if new_path is not None:
3441
if not (new_path or self.target.supports_rich_root()):
3442
# We don't copy the text for the root node unless the
3443
# target supports_rich_root.
3579
if not self.target.supports_rich_root():
3580
# The target doesn't support rich root, so we don't
3583
if self._converting_to_rich_root:
3584
# This can't be copied normally, we have to insert
3586
root_keys_to_create.add((file_id, entry.revision))
3445
3588
text_keys.add((file_id, entry.revision))
3446
3589
revision = self.source.get_revision(current_revision_id)
3447
3590
pending_deltas.append((basis_id, delta,
3964
4138
return (not self.from_repository._format.rich_root_data and
3965
4139
self.to_format.rich_root_data)
4141
def _get_inventory_stream(self, revision_ids):
4142
from_format = self.from_repository._format
4143
if (from_format.supports_chks and self.to_format.supports_chks
4144
and (from_format._serializer == self.to_format._serializer)):
4145
# Both sides support chks, and they use the same serializer, so it
4146
# is safe to transmit the chk pages and inventory pages across
4148
return self._get_chk_inventory_stream(revision_ids)
4149
elif (not from_format.supports_chks):
4150
# Source repository doesn't support chks. So we can transmit the
4151
# inventories 'as-is' and either they are just accepted on the
4152
# target, or the Sink will properly convert it.
4153
return self._get_simple_inventory_stream(revision_ids)
4155
# XXX: Hack to make not-chk->chk fetch: copy the inventories as
4156
# inventories. Note that this should probably be done somehow
4157
# as part of bzrlib.repository.StreamSink. Except JAM couldn't
4158
# figure out how a non-chk repository could possibly handle
4159
# deserializing an inventory stream from a chk repo, as it
4160
# doesn't have a way to understand individual pages.
4161
return self._get_convertable_inventory_stream(revision_ids)
4163
def _get_simple_inventory_stream(self, revision_ids):
4164
from_weave = self.from_repository.inventories
4165
yield ('inventories', from_weave.get_record_stream(
4166
[(rev_id,) for rev_id in revision_ids],
4167
self.inventory_fetch_order(),
4168
not self.delta_on_metadata()))
4170
def _get_chk_inventory_stream(self, revision_ids):
4171
"""Fetch the inventory texts, along with the associated chk maps."""
4172
# We want an inventory outside of the search set, so that we can filter
4173
# out uninteresting chk pages. For now we use
4174
# _find_revision_outside_set, but if we had a Search with cut_revs, we
4175
# could use that instead.
4176
start_rev_id = self.from_repository._find_revision_outside_set(
4178
start_rev_key = (start_rev_id,)
4179
inv_keys_to_fetch = [(rev_id,) for rev_id in revision_ids]
4180
if start_rev_id != _mod_revision.NULL_REVISION:
4181
inv_keys_to_fetch.append((start_rev_id,))
4182
# Any repo that supports chk_bytes must also support out-of-order
4183
# insertion. At least, that is how we expect it to work
4184
# We use get_record_stream instead of iter_inventories because we want
4185
# to be able to insert the stream as well. We could instead fetch
4186
# allowing deltas, and then iter_inventories, but we don't know whether
4187
# source or target is more 'local' anway.
4188
inv_stream = self.from_repository.inventories.get_record_stream(
4189
inv_keys_to_fetch, 'unordered',
4190
True) # We need them as full-texts so we can find their references
4191
uninteresting_chk_roots = set()
4192
interesting_chk_roots = set()
4193
def filter_inv_stream(inv_stream):
4194
for idx, record in enumerate(inv_stream):
4195
### child_pb.update('fetch inv', idx, len(inv_keys_to_fetch))
4196
bytes = record.get_bytes_as('fulltext')
4197
chk_inv = inventory.CHKInventory.deserialise(
4198
self.from_repository.chk_bytes, bytes, record.key)
4199
if record.key == start_rev_key:
4200
uninteresting_chk_roots.add(chk_inv.id_to_entry.key())
4201
p_id_map = chk_inv.parent_id_basename_to_file_id
4202
if p_id_map is not None:
4203
uninteresting_chk_roots.add(p_id_map.key())
4206
interesting_chk_roots.add(chk_inv.id_to_entry.key())
4207
p_id_map = chk_inv.parent_id_basename_to_file_id
4208
if p_id_map is not None:
4209
interesting_chk_roots.add(p_id_map.key())
4210
### pb.update('fetch inventory', 0, 2)
4211
yield ('inventories', filter_inv_stream(inv_stream))
4212
# Now that we have worked out all of the interesting root nodes, grab
4213
# all of the interesting pages and insert them
4214
### pb.update('fetch inventory', 1, 2)
4215
interesting = chk_map.iter_interesting_nodes(
4216
self.from_repository.chk_bytes, interesting_chk_roots,
4217
uninteresting_chk_roots)
4218
def to_stream_adapter():
4219
"""Adapt the iter_interesting_nodes result to a single stream.
4221
iter_interesting_nodes returns records as it processes them, which
4222
can be in batches. But we only want a single stream to be inserted.
4224
for record, items in interesting:
4225
for value in record.itervalues():
4227
# XXX: We could instead call get_record_stream(records.keys())
4228
# ATM, this will always insert the records as fulltexts, and
4229
# requires that you can hang on to records once you have gone
4230
# on to the next one. Further, it causes the target to
4231
# recompress the data. Testing shows it to be faster than
4232
# requesting the records again, though.
4233
yield ('chk_bytes', to_stream_adapter())
4234
### pb.update('fetch inventory', 2, 2)
4236
def _get_convertable_inventory_stream(self, revision_ids):
4237
# XXX: One of source or target is using chks, and they don't have
4238
# compatible serializations. The StreamSink code expects to be
4239
# able to convert on the target, so we need to put
4240
# bytes-on-the-wire that can be converted
4241
yield ('inventories', self._stream_invs_as_fulltexts(revision_ids))
4243
def _stream_invs_as_fulltexts(self, revision_ids):
4244
from_repo = self.from_repository
4245
from_serializer = from_repo._format._serializer
4246
revision_keys = [(rev_id,) for rev_id in revision_ids]
4247
parent_map = from_repo.inventories.get_parent_map(revision_keys)
4248
for inv in self.from_repository.iter_inventories(revision_ids):
4249
# XXX: This is a bit hackish, but it works. Basically,
4250
# CHKSerializer 'accidentally' supports
4251
# read/write_inventory_to_string, even though that is never
4252
# the format that is stored on disk. It *does* give us a
4253
# single string representation for an inventory, so live with
4255
# This would be far better if we had a 'serialized inventory
4256
# delta' form. Then we could use 'inventory._make_delta', and
4257
# transmit that. This would both be faster to generate, and
4258
# result in fewer bytes-on-the-wire.
4259
as_bytes = from_serializer.write_inventory_to_string(inv)
4260
key = (inv.revision_id,)
4261
parent_keys = parent_map.get(key, ())
4262
yield versionedfile.FulltextContentFactory(
4263
key, parent_keys, None, as_bytes)