29
28
revision as _mod_revision,
33
versionedfile as _mod_versionedfile,
35
from bzrlib.bundle import bundle_data, serializer as bundle_serializer
36
from bzrlib import bencode
39
class _MPDiffInventoryGenerator(_mod_versionedfile._MPDiffGenerator):
40
"""Generate Inventory diffs serialized inventories."""
42
def __init__(self, repo, inventory_keys):
43
super(_MPDiffInventoryGenerator, self).__init__(repo.inventories,
49
"""Compute the diffs one at a time."""
50
# This is instead of compute_diffs() since we guarantee our ordering of
51
# inventories, we don't have to do any buffering
52
self._find_needed_keys()
53
# We actually use a slightly different ordering. We grab all of the
54
# parents first, and then grab the ordered requests.
55
needed_ids = [k[-1] for k in self.present_parents]
56
needed_ids.extend([k[-1] for k in self.ordered_keys])
57
inv_to_str = self.repo._serializer.write_inventory_to_string
58
for inv in self.repo.iter_inventories(needed_ids):
59
revision_id = inv.revision_id
61
if key in self.present_parents:
62
# Not a key we will transmit, which is a shame, since because
63
# of that bundles don't work with stacked branches
66
parent_ids = [k[-1] for k in self.parent_map[key]]
67
as_bytes = inv_to_str(inv)
68
self._process_one_record(key, (as_bytes,))
69
if parent_ids is None:
71
diff = self.diffs.pop(key)
72
sha1 = osutils.sha_string(as_bytes)
73
yield revision_id, parent_ids, sha1, diff
32
from bzrlib.bundle import bundle_data, serializer
33
from bzrlib.util import bencode
76
36
class BundleWriter(object):
355
315
def write_revisions(self):
356
316
"""Write bundle records for all revisions and signatures"""
357
317
inv_vf = self.repository.inventories
358
topological_order = [key[-1] for key in multiparent.topo_iter_keys(
359
inv_vf, self.revision_keys)]
360
revision_order = topological_order
318
revision_order = [key[-1] for key in multiparent.topo_iter_keys(inv_vf,
361
320
if self.target is not None and self.target in self.revision_ids:
362
# Make sure the target revision is always the last entry
363
revision_order = list(topological_order)
364
321
revision_order.remove(self.target)
365
322
revision_order.append(self.target)
366
if self.repository._serializer.support_altered_by_hack:
367
# Repositories that support_altered_by_hack means that
368
# inventories.make_mpdiffs() contains all the data about the tree
369
# shape. Formats without support_altered_by_hack require
370
# chk_bytes/etc, so we use a different code path.
371
self._add_mp_records_keys('inventory', inv_vf,
372
[(revid,) for revid in topological_order])
374
# Inventories should always be added in pure-topological order, so
375
# that we can apply the mpdiff for the child to the parent texts.
376
self._add_inventory_mpdiffs_from_serializer(topological_order)
377
self._add_revision_texts(revision_order)
379
def _add_inventory_mpdiffs_from_serializer(self, revision_order):
380
"""Generate mpdiffs by serializing inventories.
382
The current repository only has part of the tree shape information in
383
the 'inventories' vf. So we use serializer.write_inventory_to_string to
384
get a 'full' representation of the tree shape, and then generate
385
mpdiffs on that data stream. This stream can then be reconstructed on
388
inventory_key_order = [(r,) for r in revision_order]
389
generator = _MPDiffInventoryGenerator(self.repository,
391
for revision_id, parent_ids, sha1, diff in generator.iter_diffs():
392
text = ''.join(diff.to_patch())
393
self.bundle.add_multiparent_record(text, sha1, parent_ids,
394
'inventory', revision_id, None)
396
def _add_revision_texts(self, revision_order):
323
self._add_mp_records_keys('inventory', inv_vf, [(revid,) for revid in revision_order])
397
324
parent_map = self.repository.get_parent_map(revision_order)
398
revision_to_str = self.repository._serializer.write_revision_to_string
399
revisions = self.repository.get_revisions(revision_order)
400
for revision in revisions:
401
revision_id = revision.revision_id
325
for revision_id in revision_order:
402
326
parents = parent_map.get(revision_id, None)
403
revision_text = revision_to_str(revision)
327
revision_text = self.repository.get_revision_xml(revision_id)
404
328
self.bundle.add_fulltext_record(revision_text, parents,
405
329
'revision', revision_id)
616
540
vf_records.append((key, parents, meta['sha1'], d_func(text)))
617
541
versionedfile.add_mpdiffs(vf_records)
619
def _get_parent_inventory_texts(self, inventory_text_cache,
620
inventory_cache, parent_ids):
621
cached_parent_texts = {}
622
remaining_parent_ids = []
623
for parent_id in parent_ids:
624
p_text = inventory_text_cache.get(parent_id, None)
626
remaining_parent_ids.append(parent_id)
628
cached_parent_texts[parent_id] = p_text
630
# TODO: Use inventory_cache to grab inventories we already have in
632
if remaining_parent_ids:
633
# first determine what keys are actually present in the local
634
# inventories object (don't use revisions as they haven't been
636
parent_keys = [(r,) for r in remaining_parent_ids]
637
present_parent_map = self._repository.inventories.get_parent_map(
639
present_parent_ids = []
641
for p_id in remaining_parent_ids:
642
if (p_id,) in present_parent_map:
643
present_parent_ids.append(p_id)
646
to_string = self._source_serializer.write_inventory_to_string
647
for parent_inv in self._repository.iter_inventories(
649
p_text = to_string(parent_inv)
650
inventory_cache[parent_inv.revision_id] = parent_inv
651
cached_parent_texts[parent_inv.revision_id] = p_text
652
inventory_text_cache[parent_inv.revision_id] = p_text
654
parent_texts = [cached_parent_texts[parent_id]
655
for parent_id in parent_ids
656
if parent_id not in ghosts]
659
543
def _install_inventory_records(self, records):
660
if (self._info['serializer'] == self._repository._serializer.format_num
661
and self._repository._serializer.support_altered_by_hack):
544
if self._info['serializer'] == self._repository._serializer.format_num:
662
545
return self._install_mp_records_keys(self._repository.inventories,
664
# Use a 10MB text cache, since these are string xml inventories. Note
665
# that 10MB is fairly small for large projects (a single inventory can
666
# be >5MB). Another possibility is to cache 10-20 inventory texts
668
inventory_text_cache = lru_cache.LRUSizeCache(10*1024*1024)
669
# Also cache the in-memory representation. This allows us to create
670
# inventory deltas to apply rather than calling add_inventory from
672
inventory_cache = lru_cache.LRUCache(10)
673
pb = ui.ui_factory.nested_progress_bar()
675
num_records = len(records)
676
for idx, (key, metadata, bytes) in enumerate(records):
677
pb.update('installing inventory', idx, num_records)
678
revision_id = key[-1]
679
parent_ids = metadata['parents']
680
# Note: This assumes the local ghosts are identical to the
681
# ghosts in the source, as the Bundle serialization
682
# format doesn't record ghosts.
683
p_texts = self._get_parent_inventory_texts(inventory_text_cache,
686
# Why does to_lines() take strings as the source, it seems that
687
# it would have to cast to a list of lines, which we get back
688
# as lines and then cast back to a string.
689
target_lines = multiparent.MultiParent.from_patch(bytes
691
inv_text = ''.join(target_lines)
693
sha1 = osutils.sha_string(inv_text)
694
if sha1 != metadata['sha1']:
695
raise errors.BadBundle("Can't convert to target format")
696
# Add this to the cache so we don't have to extract it again.
697
inventory_text_cache[revision_id] = inv_text
698
target_inv = self._source_serializer.read_inventory_from_string(
700
self._handle_root(target_inv, parent_ids)
703
parent_inv = inventory_cache.get(parent_ids[0], None)
705
if parent_inv is None:
706
self._repository.add_inventory(revision_id, target_inv,
709
delta = target_inv._make_delta(parent_inv)
710
self._repository.add_inventory_by_delta(parent_ids[0],
711
delta, revision_id, parent_ids)
712
except errors.UnsupportedInventoryKind:
713
raise errors.IncompatibleRevision(repr(self._repository))
714
inventory_cache[revision_id] = target_inv
547
for key, metadata, bytes in records:
548
revision_id = key[-1]
549
parent_ids = metadata['parents']
550
parents = [self._repository.get_inventory(p)
552
p_texts = [self._source_serializer.write_inventory_to_string(p)
554
target_lines = multiparent.MultiParent.from_patch(bytes).to_lines(
556
sha1 = osutils.sha_strings(target_lines)
557
if sha1 != metadata['sha1']:
558
raise errors.BadBundle("Can't convert to target format")
559
target_inv = self._source_serializer.read_inventory_from_string(
560
''.join(target_lines))
561
self._handle_root(target_inv, parent_ids)
563
self._repository.add_inventory(revision_id, target_inv,
565
except errors.UnsupportedInventoryKind:
566
raise errors.IncompatibleRevision(repr(self._repository))
718
568
def _handle_root(self, target_inv, parent_ids):
719
569
revision_id = target_inv.revision_id