14
14
# along with this program; if not, write to the Free Software
15
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
from __future__ import absolute_import
17
19
from cStringIO import StringIO
21
23
from bzrlib import (
28
30
revision as _mod_revision,
34
versionedfile as _mod_versionedfile,
32
36
from bzrlib.bundle import bundle_data, serializer as bundle_serializer
37
from bzrlib.i18n import ngettext
33
38
from bzrlib import bencode
41
class _MPDiffInventoryGenerator(_mod_versionedfile._MPDiffGenerator):
42
"""Generate Inventory diffs serialized inventories."""
44
def __init__(self, repo, inventory_keys):
45
super(_MPDiffInventoryGenerator, self).__init__(repo.inventories,
51
"""Compute the diffs one at a time."""
52
# This is instead of compute_diffs() since we guarantee our ordering of
53
# inventories, we don't have to do any buffering
54
self._find_needed_keys()
55
# We actually use a slightly different ordering. We grab all of the
56
# parents first, and then grab the ordered requests.
57
needed_ids = [k[-1] for k in self.present_parents]
58
needed_ids.extend([k[-1] for k in self.ordered_keys])
59
inv_to_str = self.repo._serializer.write_inventory_to_string
60
for inv in self.repo.iter_inventories(needed_ids):
61
revision_id = inv.revision_id
63
if key in self.present_parents:
64
# Not a key we will transmit, which is a shame, since because
65
# of that bundles don't work with stacked branches
68
parent_ids = [k[-1] for k in self.parent_map[key]]
69
as_bytes = inv_to_str(inv)
70
self._process_one_record(key, (as_bytes,))
71
if parent_ids is None:
73
diff = self.diffs.pop(key)
74
sha1 = osutils.sha_string(as_bytes)
75
yield revision_id, parent_ids, sha1, diff
36
78
class BundleWriter(object):
37
79
"""Writer for bundle-format files.
315
358
def write_revisions(self):
316
359
"""Write bundle records for all revisions and signatures"""
317
360
inv_vf = self.repository.inventories
318
revision_order = [key[-1] for key in multiparent.topo_iter_keys(inv_vf,
361
topological_order = [key[-1] for key in multiparent.topo_iter_keys(
362
inv_vf, self.revision_keys)]
363
revision_order = topological_order
320
364
if self.target is not None and self.target in self.revision_ids:
365
# Make sure the target revision is always the last entry
366
revision_order = list(topological_order)
321
367
revision_order.remove(self.target)
322
368
revision_order.append(self.target)
323
self._add_mp_records_keys('inventory', inv_vf, [(revid,) for revid in revision_order])
369
if self.repository._serializer.support_altered_by_hack:
370
# Repositories that support_altered_by_hack means that
371
# inventories.make_mpdiffs() contains all the data about the tree
372
# shape. Formats without support_altered_by_hack require
373
# chk_bytes/etc, so we use a different code path.
374
self._add_mp_records_keys('inventory', inv_vf,
375
[(revid,) for revid in topological_order])
377
# Inventories should always be added in pure-topological order, so
378
# that we can apply the mpdiff for the child to the parent texts.
379
self._add_inventory_mpdiffs_from_serializer(topological_order)
380
self._add_revision_texts(revision_order)
382
def _add_inventory_mpdiffs_from_serializer(self, revision_order):
383
"""Generate mpdiffs by serializing inventories.
385
The current repository only has part of the tree shape information in
386
the 'inventories' vf. So we use serializer.write_inventory_to_string to
387
get a 'full' representation of the tree shape, and then generate
388
mpdiffs on that data stream. This stream can then be reconstructed on
391
inventory_key_order = [(r,) for r in revision_order]
392
generator = _MPDiffInventoryGenerator(self.repository,
394
for revision_id, parent_ids, sha1, diff in generator.iter_diffs():
395
text = ''.join(diff.to_patch())
396
self.bundle.add_multiparent_record(text, sha1, parent_ids,
397
'inventory', revision_id, None)
399
def _add_revision_texts(self, revision_order):
324
400
parent_map = self.repository.get_parent_map(revision_order)
325
401
revision_to_str = self.repository._serializer.write_revision_to_string
326
402
revisions = self.repository.get_revisions(revision_order)
543
619
vf_records.append((key, parents, meta['sha1'], d_func(text)))
544
620
versionedfile.add_mpdiffs(vf_records)
622
def _get_parent_inventory_texts(self, inventory_text_cache,
623
inventory_cache, parent_ids):
624
cached_parent_texts = {}
625
remaining_parent_ids = []
626
for parent_id in parent_ids:
627
p_text = inventory_text_cache.get(parent_id, None)
629
remaining_parent_ids.append(parent_id)
631
cached_parent_texts[parent_id] = p_text
633
# TODO: Use inventory_cache to grab inventories we already have in
635
if remaining_parent_ids:
636
# first determine what keys are actually present in the local
637
# inventories object (don't use revisions as they haven't been
639
parent_keys = [(r,) for r in remaining_parent_ids]
640
present_parent_map = self._repository.inventories.get_parent_map(
642
present_parent_ids = []
644
for p_id in remaining_parent_ids:
645
if (p_id,) in present_parent_map:
646
present_parent_ids.append(p_id)
649
to_string = self._source_serializer.write_inventory_to_string
650
for parent_inv in self._repository.iter_inventories(
652
p_text = to_string(parent_inv)
653
inventory_cache[parent_inv.revision_id] = parent_inv
654
cached_parent_texts[parent_inv.revision_id] = p_text
655
inventory_text_cache[parent_inv.revision_id] = p_text
657
parent_texts = [cached_parent_texts[parent_id]
658
for parent_id in parent_ids
659
if parent_id not in ghosts]
546
662
def _install_inventory_records(self, records):
547
if self._info['serializer'] == self._repository._serializer.format_num:
663
if (self._info['serializer'] == self._repository._serializer.format_num
664
and self._repository._serializer.support_altered_by_hack):
548
665
return self._install_mp_records_keys(self._repository.inventories,
550
for key, metadata, bytes in records:
551
revision_id = key[-1]
552
parent_ids = metadata['parents']
553
parents = [self._repository.get_inventory(p)
555
p_texts = [self._source_serializer.write_inventory_to_string(p)
557
target_lines = multiparent.MultiParent.from_patch(bytes).to_lines(
559
sha1 = osutils.sha_strings(target_lines)
560
if sha1 != metadata['sha1']:
561
raise errors.BadBundle("Can't convert to target format")
562
target_inv = self._source_serializer.read_inventory_from_string(
563
''.join(target_lines))
564
self._handle_root(target_inv, parent_ids)
566
self._repository.add_inventory(revision_id, target_inv,
568
except errors.UnsupportedInventoryKind:
569
raise errors.IncompatibleRevision(repr(self._repository))
667
# Use a 10MB text cache, since these are string xml inventories. Note
668
# that 10MB is fairly small for large projects (a single inventory can
669
# be >5MB). Another possibility is to cache 10-20 inventory texts
671
inventory_text_cache = lru_cache.LRUSizeCache(10*1024*1024)
672
# Also cache the in-memory representation. This allows us to create
673
# inventory deltas to apply rather than calling add_inventory from
675
inventory_cache = lru_cache.LRUCache(10)
676
pb = ui.ui_factory.nested_progress_bar()
678
num_records = len(records)
679
for idx, (key, metadata, bytes) in enumerate(records):
680
pb.update('installing inventory', idx, num_records)
681
revision_id = key[-1]
682
parent_ids = metadata['parents']
683
# Note: This assumes the local ghosts are identical to the
684
# ghosts in the source, as the Bundle serialization
685
# format doesn't record ghosts.
686
p_texts = self._get_parent_inventory_texts(inventory_text_cache,
689
# Why does to_lines() take strings as the source, it seems that
690
# it would have to cast to a list of lines, which we get back
691
# as lines and then cast back to a string.
692
target_lines = multiparent.MultiParent.from_patch(bytes
694
inv_text = ''.join(target_lines)
696
sha1 = osutils.sha_string(inv_text)
697
if sha1 != metadata['sha1']:
698
raise errors.BadBundle("Can't convert to target format")
699
# Add this to the cache so we don't have to extract it again.
700
inventory_text_cache[revision_id] = inv_text
701
target_inv = self._source_serializer.read_inventory_from_string(
703
self._handle_root(target_inv, parent_ids)
706
parent_inv = inventory_cache.get(parent_ids[0], None)
708
if parent_inv is None:
709
self._repository.add_inventory(revision_id, target_inv,
712
delta = target_inv._make_delta(parent_inv)
713
self._repository.add_inventory_by_delta(parent_ids[0],
714
delta, revision_id, parent_ids)
715
except errors.UnsupportedInventoryKind:
716
raise errors.IncompatibleRevision(repr(self._repository))
717
inventory_cache[revision_id] = target_inv
571
721
def _handle_root(self, target_inv, parent_ids):
572
722
revision_id = target_inv.revision_id