29
28
revision as _mod_revision,
33
versionedfile as _mod_versionedfile,
35
from bzrlib.bundle import bundle_data, serializer as bundle_serializer
36
from bzrlib import bencode
39
class _MPDiffInventoryGenerator(_mod_versionedfile._MPDiffGenerator):
40
"""Generate Inventory diffs serialized inventories."""
42
def __init__(self, repo, inventory_keys):
43
super(_MPDiffInventoryGenerator, self).__init__(repo.inventories,
49
"""Compute the diffs one at a time."""
50
# This is instead of compute_diffs() since we guarantee our ordering of
51
# inventories, we don't have to do any buffering
52
self._find_needed_keys()
53
# We actually use a slightly different ordering. We grab all of the
54
# parents first, and then grab the ordered requests.
55
needed_ids = [k[-1] for k in self.present_parents]
56
needed_ids.extend([k[-1] for k in self.ordered_keys])
57
inv_to_str = self.repo._serializer.write_inventory_to_string
58
for inv in self.repo.iter_inventories(needed_ids):
59
revision_id = inv.revision_id
61
if key in self.present_parents:
62
# Not a key we will transmit, which is a shame, since because
63
# of that bundles don't work with stacked branches
66
parent_ids = [k[-1] for k in self.parent_map[key]]
67
as_bytes = inv_to_str(inv)
68
self._process_one_record(key, (as_bytes,))
69
if parent_ids is None:
71
diff = self.diffs.pop(key)
72
sha1 = osutils.sha_string(as_bytes)
73
yield revision_id, parent_ids, sha1, diff
32
from bzrlib.bundle import bundle_data, serializer
33
from bzrlib.util import bencode
76
36
class BundleWriter(object):
342
293
self.bundle.add_info_record(serializer=serializer_format,
343
294
supports_rich_root=supports_rich_root)
296
def iter_file_revisions(self):
297
"""Iterate through all relevant revisions of all files.
299
This is the correct implementation, but is not compatible with bzr.dev,
300
because certain old revisions were not converted correctly, and have
301
the wrong "revision" marker in inventories.
303
transaction = self.repository.get_transaction()
304
altered = self.repository.fileids_altered_by_revision_ids(
306
for file_id, file_revision_ids in altered.iteritems():
307
vf = self.repository.weave_store.get_weave(file_id, transaction)
308
yield vf, file_id, file_revision_ids
310
def iter_file_revisions_aggressive(self):
311
"""Iterate through all relevant revisions of all files.
313
This uses the standard iter_file_revisions to determine what revisions
314
are referred to by inventories, but then uses the versionedfile to
315
determine what the build-dependencies of each required revision.
317
All build dependencies which are not ancestors of the base revision
320
for vf, file_id, file_revision_ids in self.iter_file_revisions():
321
new_revision_ids = set()
322
pending = list(file_revision_ids)
323
while len(pending) > 0:
324
revision_id = pending.pop()
325
if revision_id in new_revision_ids:
327
if revision_id in self.base_ancestry:
329
new_revision_ids.add(revision_id)
330
pending.extend(vf.get_parents(revision_id))
331
yield vf, file_id, new_revision_ids
345
333
def write_files(self):
346
334
"""Write bundle records for all revisions of all files"""
348
altered_fileids = self.repository.fileids_altered_by_revision_ids(
350
for file_id, revision_ids in altered_fileids.iteritems():
351
for revision_id in revision_ids:
352
text_keys.append((file_id, revision_id))
353
self._add_mp_records_keys('file', self.repository.texts, text_keys)
335
for vf, file_id, revision_ids in self.iter_file_revisions():
336
self.add_mp_records('file', file_id, vf, revision_ids)
355
338
def write_revisions(self):
356
339
"""Write bundle records for all revisions and signatures"""
357
inv_vf = self.repository.inventories
358
topological_order = [key[-1] for key in multiparent.topo_iter_keys(
359
inv_vf, self.revision_keys)]
360
revision_order = topological_order
340
inv_vf = self.repository.get_inventory_weave()
341
revision_order = list(multiparent.topo_iter(inv_vf, self.revision_ids))
361
342
if self.target is not None and self.target in self.revision_ids:
362
# Make sure the target revision is always the last entry
363
revision_order = list(topological_order)
364
343
revision_order.remove(self.target)
365
344
revision_order.append(self.target)
366
if self.repository._serializer.support_altered_by_hack:
367
# Repositories that support_altered_by_hack means that
368
# inventories.make_mpdiffs() contains all the data about the tree
369
# shape. Formats without support_altered_by_hack require
370
# chk_bytes/etc, so we use a different code path.
371
self._add_mp_records_keys('inventory', inv_vf,
372
[(revid,) for revid in topological_order])
374
# Inventories should always be added in pure-topological order, so
375
# that we can apply the mpdiff for the child to the parent texts.
376
self._add_inventory_mpdiffs_from_serializer(topological_order)
377
self._add_revision_texts(revision_order)
379
def _add_inventory_mpdiffs_from_serializer(self, revision_order):
380
"""Generate mpdiffs by serializing inventories.
382
The current repository only has part of the tree shape information in
383
the 'inventories' vf. So we use serializer.write_inventory_to_string to
384
get a 'full' representation of the tree shape, and then generate
385
mpdiffs on that data stream. This stream can then be reconstructed on
388
inventory_key_order = [(r,) for r in revision_order]
389
generator = _MPDiffInventoryGenerator(self.repository,
391
for revision_id, parent_ids, sha1, diff in generator.iter_diffs():
392
text = ''.join(diff.to_patch())
393
self.bundle.add_multiparent_record(text, sha1, parent_ids,
394
'inventory', revision_id, None)
396
def _add_revision_texts(self, revision_order):
397
parent_map = self.repository.get_parent_map(revision_order)
398
revision_to_str = self.repository._serializer.write_revision_to_string
399
revisions = self.repository.get_revisions(revision_order)
400
for revision in revisions:
401
revision_id = revision.revision_id
402
parents = parent_map.get(revision_id, None)
403
revision_text = revision_to_str(revision)
345
self.add_mp_records('inventory', None, inv_vf, revision_order)
346
parents_list = self.repository.get_parents(revision_order)
347
for parents, revision_id in zip(parents_list, revision_order):
348
revision_text = self.repository.get_revision_xml(revision_id)
404
349
self.bundle.add_fulltext_record(revision_text, parents,
405
350
'revision', revision_id)
425
370
base = parents[0]
426
371
return base, target
428
def _add_mp_records_keys(self, repo_kind, vf, keys):
373
def add_mp_records(self, repo_kind, file_id, vf, revision_ids):
429
374
"""Add multi-parent diff records to a bundle"""
430
ordered_keys = list(multiparent.topo_iter_keys(vf, keys))
431
mpdiffs = vf.make_mpdiffs(ordered_keys)
432
sha1s = vf.get_sha1s(ordered_keys)
433
parent_map = vf.get_parent_map(ordered_keys)
434
for mpdiff, item_key, in zip(mpdiffs, ordered_keys):
435
sha1 = sha1s[item_key]
436
parents = [key[-1] for key in parent_map[item_key]]
375
revision_ids = list(multiparent.topo_iter(vf, revision_ids))
376
mpdiffs = vf.make_mpdiffs(revision_ids)
377
sha1s = vf.get_sha1s(revision_ids)
378
for mpdiff, revision_id, sha1, in zip(mpdiffs, revision_ids, sha1s):
379
parents = vf.get_parents(revision_id)
437
380
text = ''.join(mpdiff.to_patch())
438
# Infer file id records as appropriate.
439
if len(item_key) == 2:
440
file_id = item_key[0]
443
381
self.bundle.add_multiparent_record(text, sha1, parents, repo_kind,
444
item_key[-1], file_id)
382
revision_id, file_id)
447
385
class BundleInfoV4(object):
554
492
for bytes, metadata, repo_kind, revision_id, file_id in\
555
493
self._container.iter_records():
556
494
if repo_kind == 'info':
557
if self._info is not None:
558
raise AssertionError()
495
assert self._info is None
559
496
self._handle_info(metadata)
560
if (pending_file_records and
561
(repo_kind, file_id) != ('file', current_file)):
562
# Flush the data for a single file - prevents memory
563
# spiking due to buffering all files in memory.
564
self._install_mp_records_keys(self._repository.texts,
565
pending_file_records)
497
if (repo_kind, file_id) != ('file', current_file):
498
if len(pending_file_records) > 0:
499
self._install_mp_records(current_versionedfile,
500
pending_file_records)
566
501
current_file = None
567
del pending_file_records[:]
502
current_versionedfile = None
503
pending_file_records = []
568
504
if len(pending_inventory_records) > 0 and repo_kind != 'inventory':
569
self._install_inventory_records(pending_inventory_records)
505
self._install_inventory_records(inventory_vf,
506
pending_inventory_records)
570
507
pending_inventory_records = []
571
508
if repo_kind == 'inventory':
572
pending_inventory_records.append(((revision_id,), metadata, bytes))
509
if inventory_vf is None:
510
inventory_vf = self._repository.get_inventory_weave()
511
if revision_id not in inventory_vf:
512
pending_inventory_records.append((revision_id, metadata,
573
514
if repo_kind == 'revision':
574
515
target_revision = revision_id
575
516
self._install_revision(revision_id, metadata, bytes)
599
547
records if r not in versionedfile]
600
548
versionedfile.add_mpdiffs(vf_records)
602
def _install_mp_records_keys(self, versionedfile, records):
603
d_func = multiparent.MultiParent.from_patch
605
for key, meta, text in records:
606
# Adapt to tuple interface: A length two key is a file_id,
607
# revision_id pair, a length 1 key is a
608
# revision/signature/inventory. We need to do this because
609
# the metadata extraction from the bundle has not yet been updated
610
# to use the consistent tuple interface itself.
615
parents = [prefix + (parent,) for parent in meta['parents']]
616
vf_records.append((key, parents, meta['sha1'], d_func(text)))
617
versionedfile.add_mpdiffs(vf_records)
619
def _get_parent_inventory_texts(self, inventory_text_cache,
620
inventory_cache, parent_ids):
621
cached_parent_texts = {}
622
remaining_parent_ids = []
623
for parent_id in parent_ids:
624
p_text = inventory_text_cache.get(parent_id, None)
626
remaining_parent_ids.append(parent_id)
628
cached_parent_texts[parent_id] = p_text
630
# TODO: Use inventory_cache to grab inventories we already have in
632
if remaining_parent_ids:
633
# first determine what keys are actually present in the local
634
# inventories object (don't use revisions as they haven't been
636
parent_keys = [(r,) for r in remaining_parent_ids]
637
present_parent_map = self._repository.inventories.get_parent_map(
639
present_parent_ids = []
641
for p_id in remaining_parent_ids:
642
if (p_id,) in present_parent_map:
643
present_parent_ids.append(p_id)
646
to_string = self._source_serializer.write_inventory_to_string
647
for parent_inv in self._repository.iter_inventories(
649
p_text = to_string(parent_inv)
650
inventory_cache[parent_inv.revision_id] = parent_inv
651
cached_parent_texts[parent_inv.revision_id] = p_text
652
inventory_text_cache[parent_inv.revision_id] = p_text
654
parent_texts = [cached_parent_texts[parent_id]
655
for parent_id in parent_ids
656
if parent_id not in ghosts]
659
def _install_inventory_records(self, records):
660
if (self._info['serializer'] == self._repository._serializer.format_num
661
and self._repository._serializer.support_altered_by_hack):
662
return self._install_mp_records_keys(self._repository.inventories,
664
# Use a 10MB text cache, since these are string xml inventories. Note
665
# that 10MB is fairly small for large projects (a single inventory can
666
# be >5MB). Another possibility is to cache 10-20 inventory texts
668
inventory_text_cache = lru_cache.LRUSizeCache(10*1024*1024)
669
# Also cache the in-memory representation. This allows us to create
670
# inventory deltas to apply rather than calling add_inventory from
672
inventory_cache = lru_cache.LRUCache(10)
673
pb = ui.ui_factory.nested_progress_bar()
675
num_records = len(records)
676
for idx, (key, metadata, bytes) in enumerate(records):
677
pb.update('installing inventory', idx, num_records)
678
revision_id = key[-1]
679
parent_ids = metadata['parents']
680
# Note: This assumes the local ghosts are identical to the
681
# ghosts in the source, as the Bundle serialization
682
# format doesn't record ghosts.
683
p_texts = self._get_parent_inventory_texts(inventory_text_cache,
686
# Why does to_lines() take strings as the source, it seems that
687
# it would have to cast to a list of lines, which we get back
688
# as lines and then cast back to a string.
689
target_lines = multiparent.MultiParent.from_patch(bytes
691
inv_text = ''.join(target_lines)
693
sha1 = osutils.sha_string(inv_text)
694
if sha1 != metadata['sha1']:
695
raise errors.BadBundle("Can't convert to target format")
696
# Add this to the cache so we don't have to extract it again.
697
inventory_text_cache[revision_id] = inv_text
698
target_inv = self._source_serializer.read_inventory_from_string(
700
self._handle_root(target_inv, parent_ids)
703
parent_inv = inventory_cache.get(parent_ids[0], None)
705
if parent_inv is None:
706
self._repository.add_inventory(revision_id, target_inv,
709
delta = target_inv._make_delta(parent_inv)
710
self._repository.add_inventory_by_delta(parent_ids[0],
711
delta, revision_id, parent_ids)
712
except errors.UnsupportedInventoryKind:
713
raise errors.IncompatibleRevision(repr(self._repository))
714
inventory_cache[revision_id] = target_inv
550
def _install_inventory_records(self, vf, records):
551
if self._info['serializer'] == self._repository._serializer.format_num:
552
return self._install_mp_records(vf, records)
553
for revision_id, metadata, bytes in records:
554
parent_ids = metadata['parents']
555
parents = [self._repository.get_inventory(p)
557
p_texts = [self._source_serializer.write_inventory_to_string(p)
559
target_lines = multiparent.MultiParent.from_patch(bytes).to_lines(
561
sha1 = osutils.sha_strings(target_lines)
562
if sha1 != metadata['sha1']:
563
raise errors.BadBundle("Can't convert to target format")
564
target_inv = self._source_serializer.read_inventory_from_string(
565
''.join(target_lines))
566
self._handle_root(target_inv, parent_ids)
568
self._repository.add_inventory(revision_id, target_inv,
570
except errors.UnsupportedInventoryKind:
571
raise errors.IncompatibleRevision(repr(self._repository))
718
573
def _handle_root(self, target_inv, parent_ids):
719
574
revision_id = target_inv.revision_id
720
575
if self.update_root:
721
text_key = (target_inv.root.file_id, revision_id)
722
parent_keys = [(target_inv.root.file_id, parent) for
723
parent in parent_ids]
724
self._repository.texts.add_lines(text_key, parent_keys, [])
576
target_inv.root.revision = revision_id
577
store = self._repository.weave_store
578
transaction = self._repository.get_transaction()
579
vf = store.get_weave_or_empty(target_inv.root.file_id, transaction)
580
vf.add_lines(revision_id, parent_ids, [])
725
581
elif not self._repository.supports_rich_root():
726
582
if target_inv.root.revision != revision_id:
727
583
raise errors.IncompatibleRevision(repr(self._repository))
729
586
def _install_revision(self, revision_id, metadata, text):
730
587
if self._repository.has_revision(revision_id):
732
revision = self._source_serializer.read_revision_from_string(text)
733
self._repository.add_revision(revision.revision_id, revision)
589
self._repository._add_revision_text(revision_id, text)
735
591
def _install_signature(self, revision_id, metadata, text):
736
592
transaction = self._repository.get_transaction()
737
if self._repository.has_signature_for_revision_id(revision_id):
593
if self._repository._revision_store.has_signature(revision_id,
739
self._repository.add_signature_text(revision_id, text)
596
self._repository._revision_store.add_revision_signature_text(
597
revision_id, text, transaction)