29
28
revision as _mod_revision,
33
versionedfile as _mod_versionedfile,
35
from bzrlib.bundle import bundle_data, serializer as bundle_serializer
36
from bzrlib.i18n import gettext, ngettext
37
from bzrlib import bencode
40
class _MPDiffInventoryGenerator(_mod_versionedfile._MPDiffGenerator):
41
"""Generate Inventory diffs serialized inventories."""
43
def __init__(self, repo, inventory_keys):
44
super(_MPDiffInventoryGenerator, self).__init__(repo.inventories,
50
"""Compute the diffs one at a time."""
51
# This is instead of compute_diffs() since we guarantee our ordering of
52
# inventories, we don't have to do any buffering
53
self._find_needed_keys()
54
# We actually use a slightly different ordering. We grab all of the
55
# parents first, and then grab the ordered requests.
56
needed_ids = [k[-1] for k in self.present_parents]
57
needed_ids.extend([k[-1] for k in self.ordered_keys])
58
inv_to_str = self.repo._serializer.write_inventory_to_string
59
for inv in self.repo.iter_inventories(needed_ids):
60
revision_id = inv.revision_id
62
if key in self.present_parents:
63
# Not a key we will transmit, which is a shame, since because
64
# of that bundles don't work with stacked branches
67
parent_ids = [k[-1] for k in self.parent_map[key]]
68
as_bytes = inv_to_str(inv)
69
self._process_one_record(key, (as_bytes,))
70
if parent_ids is None:
72
diff = self.diffs.pop(key)
73
sha1 = osutils.sha_string(as_bytes)
74
yield revision_id, parent_ids, sha1, diff
32
from bzrlib.bundle import bundle_data, serializer
33
from bzrlib.util import bencode
77
36
class BundleWriter(object):
311
270
self.repository = repository
312
271
bundle = BundleWriter(fileobj)
313
272
self.bundle = bundle
273
self.base_ancestry = set(repository.get_ancestry(base,
314
275
if revision_ids is not None:
315
276
self.revision_ids = revision_ids
317
graph = repository.get_graph()
318
revision_ids = graph.find_unique_ancestors(target, [base])
320
parents = graph.get_parent_map(revision_ids)
321
self.revision_ids = [r for r in revision_ids if r in parents]
278
revision_ids = set(repository.get_ancestry(target,
280
self.revision_ids = revision_ids.difference(self.base_ancestry)
322
281
self.revision_keys = set([(revid,) for revid in self.revision_ids])
324
283
def do_write(self):
325
284
"""Write all data to the bundle"""
326
trace.note(ngettext('Bundling %d revision.', 'Bundling %d revisions.',
327
len(self.revision_ids)), len(self.revision_ids))
285
trace.note('Bundling %d revision(s).', len(self.revision_ids))
328
286
self.repository.lock_read()
330
288
self.bundle.begin()
357
315
def write_revisions(self):
358
316
"""Write bundle records for all revisions and signatures"""
359
317
inv_vf = self.repository.inventories
360
topological_order = [key[-1] for key in multiparent.topo_iter_keys(
361
inv_vf, self.revision_keys)]
362
revision_order = topological_order
318
revision_order = [key[-1] for key in multiparent.topo_iter_keys(inv_vf,
363
320
if self.target is not None and self.target in self.revision_ids:
364
# Make sure the target revision is always the last entry
365
revision_order = list(topological_order)
366
321
revision_order.remove(self.target)
367
322
revision_order.append(self.target)
368
if self.repository._serializer.support_altered_by_hack:
369
# Repositories that support_altered_by_hack means that
370
# inventories.make_mpdiffs() contains all the data about the tree
371
# shape. Formats without support_altered_by_hack require
372
# chk_bytes/etc, so we use a different code path.
373
self._add_mp_records_keys('inventory', inv_vf,
374
[(revid,) for revid in topological_order])
376
# Inventories should always be added in pure-topological order, so
377
# that we can apply the mpdiff for the child to the parent texts.
378
self._add_inventory_mpdiffs_from_serializer(topological_order)
379
self._add_revision_texts(revision_order)
381
def _add_inventory_mpdiffs_from_serializer(self, revision_order):
382
"""Generate mpdiffs by serializing inventories.
384
The current repository only has part of the tree shape information in
385
the 'inventories' vf. So we use serializer.write_inventory_to_string to
386
get a 'full' representation of the tree shape, and then generate
387
mpdiffs on that data stream. This stream can then be reconstructed on
390
inventory_key_order = [(r,) for r in revision_order]
391
generator = _MPDiffInventoryGenerator(self.repository,
393
for revision_id, parent_ids, sha1, diff in generator.iter_diffs():
394
text = ''.join(diff.to_patch())
395
self.bundle.add_multiparent_record(text, sha1, parent_ids,
396
'inventory', revision_id, None)
398
def _add_revision_texts(self, revision_order):
323
self._add_mp_records_keys('inventory', inv_vf, [(revid,) for revid in revision_order])
399
324
parent_map = self.repository.get_parent_map(revision_order)
400
revision_to_str = self.repository._serializer.write_revision_to_string
401
revisions = self.repository.get_revisions(revision_order)
402
for revision in revisions:
403
revision_id = revision.revision_id
325
for revision_id in revision_order:
404
326
parents = parent_map.get(revision_id, None)
405
revision_text = revision_to_str(revision)
327
revision_text = self.repository.get_revision_xml(revision_id)
406
328
self.bundle.add_fulltext_record(revision_text, parents,
407
329
'revision', revision_id)
618
540
vf_records.append((key, parents, meta['sha1'], d_func(text)))
619
541
versionedfile.add_mpdiffs(vf_records)
621
def _get_parent_inventory_texts(self, inventory_text_cache,
622
inventory_cache, parent_ids):
623
cached_parent_texts = {}
624
remaining_parent_ids = []
625
for parent_id in parent_ids:
626
p_text = inventory_text_cache.get(parent_id, None)
628
remaining_parent_ids.append(parent_id)
630
cached_parent_texts[parent_id] = p_text
632
# TODO: Use inventory_cache to grab inventories we already have in
634
if remaining_parent_ids:
635
# first determine what keys are actually present in the local
636
# inventories object (don't use revisions as they haven't been
638
parent_keys = [(r,) for r in remaining_parent_ids]
639
present_parent_map = self._repository.inventories.get_parent_map(
641
present_parent_ids = []
643
for p_id in remaining_parent_ids:
644
if (p_id,) in present_parent_map:
645
present_parent_ids.append(p_id)
648
to_string = self._source_serializer.write_inventory_to_string
649
for parent_inv in self._repository.iter_inventories(
651
p_text = to_string(parent_inv)
652
inventory_cache[parent_inv.revision_id] = parent_inv
653
cached_parent_texts[parent_inv.revision_id] = p_text
654
inventory_text_cache[parent_inv.revision_id] = p_text
656
parent_texts = [cached_parent_texts[parent_id]
657
for parent_id in parent_ids
658
if parent_id not in ghosts]
661
543
def _install_inventory_records(self, records):
662
if (self._info['serializer'] == self._repository._serializer.format_num
663
and self._repository._serializer.support_altered_by_hack):
544
if self._info['serializer'] == self._repository._serializer.format_num:
664
545
return self._install_mp_records_keys(self._repository.inventories,
666
# Use a 10MB text cache, since these are string xml inventories. Note
667
# that 10MB is fairly small for large projects (a single inventory can
668
# be >5MB). Another possibility is to cache 10-20 inventory texts
670
inventory_text_cache = lru_cache.LRUSizeCache(10*1024*1024)
671
# Also cache the in-memory representation. This allows us to create
672
# inventory deltas to apply rather than calling add_inventory from
674
inventory_cache = lru_cache.LRUCache(10)
675
pb = ui.ui_factory.nested_progress_bar()
677
num_records = len(records)
678
for idx, (key, metadata, bytes) in enumerate(records):
679
pb.update('installing inventory', idx, num_records)
680
revision_id = key[-1]
681
parent_ids = metadata['parents']
682
# Note: This assumes the local ghosts are identical to the
683
# ghosts in the source, as the Bundle serialization
684
# format doesn't record ghosts.
685
p_texts = self._get_parent_inventory_texts(inventory_text_cache,
688
# Why does to_lines() take strings as the source, it seems that
689
# it would have to cast to a list of lines, which we get back
690
# as lines and then cast back to a string.
691
target_lines = multiparent.MultiParent.from_patch(bytes
693
inv_text = ''.join(target_lines)
695
sha1 = osutils.sha_string(inv_text)
696
if sha1 != metadata['sha1']:
697
raise errors.BadBundle("Can't convert to target format")
698
# Add this to the cache so we don't have to extract it again.
699
inventory_text_cache[revision_id] = inv_text
700
target_inv = self._source_serializer.read_inventory_from_string(
702
self._handle_root(target_inv, parent_ids)
705
parent_inv = inventory_cache.get(parent_ids[0], None)
707
if parent_inv is None:
708
self._repository.add_inventory(revision_id, target_inv,
711
delta = target_inv._make_delta(parent_inv)
712
self._repository.add_inventory_by_delta(parent_ids[0],
713
delta, revision_id, parent_ids)
714
except errors.UnsupportedInventoryKind:
715
raise errors.IncompatibleRevision(repr(self._repository))
716
inventory_cache[revision_id] = target_inv
547
for key, metadata, bytes in records:
548
revision_id = key[-1]
549
parent_ids = metadata['parents']
550
parents = [self._repository.get_inventory(p)
552
p_texts = [self._source_serializer.write_inventory_to_string(p)
554
target_lines = multiparent.MultiParent.from_patch(bytes).to_lines(
556
sha1 = osutils.sha_strings(target_lines)
557
if sha1 != metadata['sha1']:
558
raise errors.BadBundle("Can't convert to target format")
559
target_inv = self._source_serializer.read_inventory_from_string(
560
''.join(target_lines))
561
self._handle_root(target_inv, parent_ids)
563
self._repository.add_inventory(revision_id, target_inv,
565
except errors.UnsupportedInventoryKind:
566
raise errors.IncompatibleRevision(repr(self._repository))
720
568
def _handle_root(self, target_inv, parent_ids):
721
569
revision_id = target_inv.revision_id