304
281
self.bundle.add_info_record(serializer=serializer_format,
305
282
supports_rich_root=supports_rich_root)
284
def iter_file_revisions(self):
285
"""Iterate through all relevant revisions of all files.
287
This is the correct implementation, but is not compatible with bzr.dev,
288
because certain old revisions were not converted correctly, and have
289
the wrong "revision" marker in inventories.
291
transaction = self.repository.get_transaction()
292
altered = self.repository.fileids_altered_by_revision_ids(
294
for file_id, file_revision_ids in altered.iteritems():
295
vf = self.repository.weave_store.get_weave(file_id, transaction)
296
yield vf, file_id, file_revision_ids
298
def iter_file_revisions_aggressive(self):
299
"""Iterate through all relevant revisions of all files.
301
This uses the standard iter_file_revisions to determine what revisions
302
are referred to by inventories, but then uses the versionedfile to
303
determine what the build-dependencies of each required revision.
305
All build dependencies which are not ancestors of the base revision
308
for vf, file_id, file_revision_ids in self.iter_file_revisions():
309
new_revision_ids = set()
310
pending = list(file_revision_ids)
311
while len(pending) > 0:
312
revision_id = pending.pop()
313
if revision_id in new_revision_ids:
315
if revision_id in self.base_ancestry:
317
new_revision_ids.add(revision_id)
318
pending.extend(vf.get_parents(revision_id))
319
yield vf, file_id, new_revision_ids
307
321
def write_files(self):
308
322
"""Write bundle records for all revisions of all files"""
310
altered_fileids = self.repository.fileids_altered_by_revision_ids(
312
for file_id, revision_ids in altered_fileids.iteritems():
313
for revision_id in revision_ids:
314
text_keys.append((file_id, revision_id))
315
self._add_mp_records_keys('file', self.repository.texts, text_keys)
323
for vf, file_id, revision_ids in self.iter_file_revisions_aggressive():
324
self.add_mp_records('file', file_id, vf, revision_ids)
317
326
def write_revisions(self):
318
327
"""Write bundle records for all revisions and signatures"""
319
inv_vf = self.repository.inventories
320
topological_order = [key[-1] for key in multiparent.topo_iter_keys(
321
inv_vf, self.revision_keys)]
322
revision_order = topological_order
328
inv_vf = self.repository.get_inventory_weave()
329
revision_order = list(multiparent.topo_iter(inv_vf, self.revision_ids))
323
330
if self.target is not None and self.target in self.revision_ids:
324
# Make sure the target revision is always the last entry
325
revision_order = list(topological_order)
326
331
revision_order.remove(self.target)
327
332
revision_order.append(self.target)
328
if self.repository._serializer.support_altered_by_hack:
329
# Repositories that support_altered_by_hack means that
330
# inventories.make_mpdiffs() contains all the data about the tree
331
# shape. Formats without support_altered_by_hack require
332
# chk_bytes/etc, so we use a different code path.
333
self._add_mp_records_keys('inventory', inv_vf,
334
[(revid,) for revid in topological_order])
336
# Inventories should always be added in pure-topological order, so
337
# that we can apply the mpdiff for the child to the parent texts.
338
self._add_inventory_mpdiffs_from_serializer(topological_order)
339
self._add_revision_texts(revision_order)
341
def _add_inventory_mpdiffs_from_serializer(self, revision_order):
342
"""Generate mpdiffs by serializing inventories.
344
The current repository only has part of the tree shape information in
345
the 'inventories' vf. So we use serializer.write_inventory_to_string to
346
get a 'full' representation of the tree shape, and then generate
347
mpdiffs on that data stream. This stream can then be reconstructed on
350
inventory_key_order = [(r,) for r in revision_order]
351
parent_map = self.repository.inventories.get_parent_map(
353
missing_keys = set(inventory_key_order).difference(parent_map)
355
raise errors.RevisionNotPresent(list(missing_keys)[0],
356
self.repository.inventories)
357
inv_to_str = self.repository._serializer.write_inventory_to_string
358
# Make sure that we grab the parent texts first
360
map(just_parents.update, parent_map.itervalues())
361
just_parents.difference_update(parent_map)
362
# Ignore ghost parents
363
present_parents = self.repository.inventories.get_parent_map(
365
ghost_keys = just_parents.difference(present_parents)
366
needed_inventories = list(present_parents) + inventory_key_order
367
needed_inventories = [k[-1] for k in needed_inventories]
369
for inv in self.repository.iter_inventories(needed_inventories):
370
revision_id = inv.revision_id
372
as_bytes = inv_to_str(inv)
373
# The sha1 is validated as the xml/textual form, not as the
374
# form-in-the-repository
375
sha1 = osutils.sha_string(as_bytes)
376
as_lines = osutils.split_lines(as_bytes)
378
all_lines[key] = as_lines
379
if key in just_parents:
380
# We don't transmit those entries
382
# Create an mpdiff for this text, and add it to the output
383
parent_keys = parent_map[key]
384
# See the comment in VF.make_mpdiffs about how this effects
385
# ordering when there are ghosts present. I think we have a latent
387
parent_lines = [all_lines[p_key] for p_key in parent_keys
388
if p_key not in ghost_keys]
389
diff = multiparent.MultiParent.from_lines(
390
as_lines, parent_lines)
391
text = ''.join(diff.to_patch())
392
parent_ids = [k[-1] for k in parent_keys]
393
self.bundle.add_multiparent_record(text, sha1, parent_ids,
394
'inventory', revision_id, None)
396
def _add_revision_texts(self, revision_order):
397
parent_map = self.repository.get_parent_map(revision_order)
398
revision_to_str = self.repository._serializer.write_revision_to_string
399
revisions = self.repository.get_revisions(revision_order)
400
for revision in revisions:
401
revision_id = revision.revision_id
402
parents = parent_map.get(revision_id, None)
403
revision_text = revision_to_str(revision)
333
self.add_mp_records('inventory', None, inv_vf, revision_order)
334
parents_list = self.repository.get_parents(revision_order)
335
for parents, revision_id in zip(parents_list, revision_order):
336
revision_text = self.repository.get_revision_xml(revision_id)
404
337
self.bundle.add_fulltext_record(revision_text, parents,
405
338
'revision', revision_id)
425
358
base = parents[0]
426
359
return base, target
428
def _add_mp_records_keys(self, repo_kind, vf, keys):
361
def add_mp_records(self, repo_kind, file_id, vf, revision_ids):
429
362
"""Add multi-parent diff records to a bundle"""
430
ordered_keys = list(multiparent.topo_iter_keys(vf, keys))
431
mpdiffs = vf.make_mpdiffs(ordered_keys)
432
sha1s = vf.get_sha1s(ordered_keys)
433
parent_map = vf.get_parent_map(ordered_keys)
434
for mpdiff, item_key, in zip(mpdiffs, ordered_keys):
435
sha1 = sha1s[item_key]
436
parents = [key[-1] for key in parent_map[item_key]]
363
revision_ids = list(multiparent.topo_iter(vf, revision_ids))
364
mpdiffs = vf.make_mpdiffs(revision_ids)
365
sha1s = vf.get_sha1s(revision_ids)
366
for mpdiff, revision_id, sha1, in zip(mpdiffs, revision_ids, sha1s):
367
parents = vf.get_parents(revision_id)
437
368
text = ''.join(mpdiff.to_patch())
438
# Infer file id records as appropriate.
439
if len(item_key) == 2:
440
file_id = item_key[0]
443
369
self.bundle.add_multiparent_record(text, sha1, parents, repo_kind,
444
item_key[-1], file_id)
370
revision_id, file_id)
447
373
class BundleInfoV4(object):
530
444
self._info = None
532
446
def install(self):
533
"""Perform the installation.
535
Must be called with the Repository locked.
537
self._repository.start_write_group()
539
result = self._install_in_write_group()
541
self._repository.abort_write_group()
543
self._repository.commit_write_group()
546
def _install_in_write_group(self):
447
"""Perform the installation"""
547
448
current_file = None
548
449
current_versionedfile = None
549
450
pending_file_records = []
551
pending_inventory_records = []
552
451
added_inv = set()
553
452
target_revision = None
554
453
for bytes, metadata, repo_kind, revision_id, file_id in\
555
454
self._container.iter_records():
556
455
if repo_kind == 'info':
557
if self._info is not None:
558
raise AssertionError()
456
assert self._info is None
559
457
self._handle_info(metadata)
560
if (pending_file_records and
561
(repo_kind, file_id) != ('file', current_file)):
562
# Flush the data for a single file - prevents memory
563
# spiking due to buffering all files in memory.
564
self._install_mp_records_keys(self._repository.texts,
458
if repo_kind != 'file':
459
self._install_mp_records(current_versionedfile,
565
460
pending_file_records)
566
461
current_file = None
567
del pending_file_records[:]
568
if len(pending_inventory_records) > 0 and repo_kind != 'inventory':
569
self._install_inventory_records(pending_inventory_records)
570
pending_inventory_records = []
571
if repo_kind == 'inventory':
572
pending_inventory_records.append(((revision_id,), metadata, bytes))
573
if repo_kind == 'revision':
574
target_revision = revision_id
575
self._install_revision(revision_id, metadata, bytes)
576
if repo_kind == 'signature':
577
self._install_signature(revision_id, metadata, bytes)
462
current_versionedfile = None
463
pending_file_records = []
464
if repo_kind == 'inventory':
465
self._install_inventory(revision_id, metadata, bytes)
466
if repo_kind == 'revision':
467
target_revision = revision_id
468
self._install_revision(revision_id, metadata, bytes)
469
if repo_kind == 'signature':
470
self._install_signature(revision_id, metadata, bytes)
578
471
if repo_kind == 'file':
579
current_file = file_id
580
pending_file_records.append(((file_id, revision_id), metadata, bytes))
581
self._install_mp_records_keys(self._repository.texts, pending_file_records)
472
if file_id != current_file:
473
self._install_mp_records(current_versionedfile,
474
pending_file_records)
475
current_file = file_id
476
current_versionedfile = \
477
self._repository.weave_store.get_weave_or_empty(
478
file_id, self._repository.get_transaction())
479
pending_file_records = []
480
if revision_id in current_versionedfile:
482
pending_file_records.append((revision_id, metadata, bytes))
483
self._install_mp_records(current_versionedfile, pending_file_records)
582
484
return target_revision
584
486
def _handle_info(self, info):
599
501
records if r not in versionedfile]
600
502
versionedfile.add_mpdiffs(vf_records)
602
def _install_mp_records_keys(self, versionedfile, records):
603
d_func = multiparent.MultiParent.from_patch
605
for key, meta, text in records:
606
# Adapt to tuple interface: A length two key is a file_id,
607
# revision_id pair, a length 1 key is a
608
# revision/signature/inventory. We need to do this because
609
# the metadata extraction from the bundle has not yet been updated
610
# to use the consistent tuple interface itself.
615
parents = [prefix + (parent,) for parent in meta['parents']]
616
vf_records.append((key, parents, meta['sha1'], d_func(text)))
617
versionedfile.add_mpdiffs(vf_records)
619
def _get_parent_inventory_texts(self, inventory_text_cache,
620
inventory_cache, parent_ids):
621
cached_parent_texts = {}
622
remaining_parent_ids = []
623
for parent_id in parent_ids:
624
p_text = inventory_text_cache.get(parent_id, None)
626
remaining_parent_ids.append(parent_id)
628
cached_parent_texts[parent_id] = p_text
630
# TODO: Use inventory_cache to grab inventories we already have in
632
if remaining_parent_ids:
633
# first determine what keys are actually present in the local
634
# inventories object (don't use revisions as they haven't been
636
parent_keys = [(r,) for r in remaining_parent_ids]
637
present_parent_map = self._repository.inventories.get_parent_map(
639
present_parent_ids = []
641
for p_id in remaining_parent_ids:
642
if (p_id,) in present_parent_map:
643
present_parent_ids.append(p_id)
646
to_string = self._source_serializer.write_inventory_to_string
647
for parent_inv in self._repository.iter_inventories(
649
p_text = to_string(parent_inv)
650
inventory_cache[parent_inv.revision_id] = parent_inv
651
cached_parent_texts[parent_inv.revision_id] = p_text
652
inventory_text_cache[parent_inv.revision_id] = p_text
654
parent_texts = [cached_parent_texts[parent_id]
655
for parent_id in parent_ids
656
if parent_id not in ghosts]
659
def _install_inventory_records(self, records):
660
if (self._info['serializer'] == self._repository._serializer.format_num
661
and self._repository._serializer.support_altered_by_hack):
662
return self._install_mp_records_keys(self._repository.inventories,
664
# Use a 10MB text cache, since these are string xml inventories. Note
665
# that 10MB is fairly small for large projects (a single inventory can
666
# be >5MB). Another possibility is to cache 10-20 inventory texts
668
inventory_text_cache = lru_cache.LRUSizeCache(10*1024*1024)
669
# Also cache the in-memory representation. This allows us to create
670
# inventory deltas to apply rather than calling add_inventory from
672
inventory_cache = lru_cache.LRUCache(10)
673
pb = ui.ui_factory.nested_progress_bar()
504
def _install_inventory(self, revision_id, metadata, text):
505
vf = self._repository.get_inventory_weave()
506
if revision_id in vf:
508
parent_ids = metadata['parents']
509
if self._info['serializer'] == self._repository._serializer.format_num:
510
return self._install_mp_records(vf, [(revision_id, metadata,
512
parents = [self._repository.get_inventory(p)
514
parent_texts = [self._source_serializer.write_inventory_to_string(p)
516
target_lines = multiparent.MultiParent.from_patch(text).to_lines(
518
sha1 = osutils.sha_strings(target_lines)
519
if sha1 != metadata['sha1']:
520
raise errors.BadBundle("Can't convert to target format")
521
target_inv = self._source_serializer.read_inventory_from_string(
522
''.join(target_lines))
523
self._handle_root(target_inv, parent_ids)
675
num_records = len(records)
676
for idx, (key, metadata, bytes) in enumerate(records):
677
pb.update('installing inventory', idx, num_records)
678
revision_id = key[-1]
679
parent_ids = metadata['parents']
680
# Note: This assumes the local ghosts are identical to the
681
# ghosts in the source, as the Bundle serialization
682
# format doesn't record ghosts.
683
p_texts = self._get_parent_inventory_texts(inventory_text_cache,
686
# Why does to_lines() take strings as the source, it seems that
687
# it would have to cast to a list of lines, which we get back
688
# as lines and then cast back to a string.
689
target_lines = multiparent.MultiParent.from_patch(bytes
691
inv_text = ''.join(target_lines)
693
sha1 = osutils.sha_string(inv_text)
694
if sha1 != metadata['sha1']:
695
raise errors.BadBundle("Can't convert to target format")
696
# Add this to the cache so we don't have to extract it again.
697
inventory_text_cache[revision_id] = inv_text
698
target_inv = self._source_serializer.read_inventory_from_string(
700
self._handle_root(target_inv, parent_ids)
703
parent_inv = inventory_cache.get(parent_ids[0], None)
705
if parent_inv is None:
706
self._repository.add_inventory(revision_id, target_inv,
709
delta = target_inv._make_delta(parent_inv)
710
self._repository.add_inventory_by_delta(parent_ids[0],
711
delta, revision_id, parent_ids)
712
except errors.UnsupportedInventoryKind:
713
raise errors.IncompatibleRevision(repr(self._repository))
714
inventory_cache[revision_id] = target_inv
525
self._repository.add_inventory(revision_id, target_inv, parent_ids)
526
except errors.UnsupportedInventoryKind:
527
raise errors.IncompatibleRevision(repr(self._repository))
718
529
def _handle_root(self, target_inv, parent_ids):
719
530
revision_id = target_inv.revision_id
720
531
if self.update_root:
721
text_key = (target_inv.root.file_id, revision_id)
722
parent_keys = [(target_inv.root.file_id, parent) for
723
parent in parent_ids]
724
self._repository.texts.add_lines(text_key, parent_keys, [])
532
target_inv.root.revision = revision_id
533
store = self._repository.weave_store
534
transaction = self._repository.get_transaction()
535
vf = store.get_weave_or_empty(target_inv.root.file_id, transaction)
536
vf.add_lines(revision_id, parent_ids, [])
725
537
elif not self._repository.supports_rich_root():
726
538
if target_inv.root.revision != revision_id:
727
539
raise errors.IncompatibleRevision(repr(self._repository))
729
542
def _install_revision(self, revision_id, metadata, text):
730
543
if self._repository.has_revision(revision_id):
732
revision = self._source_serializer.read_revision_from_string(text)
733
self._repository.add_revision(revision.revision_id, revision)
545
self._repository._add_revision_text(revision_id, text)
735
547
def _install_signature(self, revision_id, metadata, text):
736
548
transaction = self._repository.get_transaction()
737
if self._repository.has_signature_for_revision_id(revision_id):
549
if self._repository._revision_store.has_signature(revision_id,
739
self._repository.add_signature_text(revision_id, text)
552
self._repository._revision_store.add_revision_signature_text(
553
revision_id, text, transaction)