13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
from __future__ import absolute_import
17
19
from cStringIO import StringIO
21
23
from bzrlib import (
28
30
revision as _mod_revision,
34
versionedfile as _mod_versionedfile,
32
from bzrlib.bundle import bundle_data, serializer
33
from bzrlib.util import bencode
36
from bzrlib.bundle import bundle_data, serializer as bundle_serializer
37
from bzrlib.i18n import ngettext
38
from bzrlib import bencode
41
class _MPDiffInventoryGenerator(_mod_versionedfile._MPDiffGenerator):
42
"""Generate Inventory diffs serialized inventories."""
44
def __init__(self, repo, inventory_keys):
45
super(_MPDiffInventoryGenerator, self).__init__(repo.inventories,
51
"""Compute the diffs one at a time."""
52
# This is instead of compute_diffs() since we guarantee our ordering of
53
# inventories, we don't have to do any buffering
54
self._find_needed_keys()
55
# We actually use a slightly different ordering. We grab all of the
56
# parents first, and then grab the ordered requests.
57
needed_ids = [k[-1] for k in self.present_parents]
58
needed_ids.extend([k[-1] for k in self.ordered_keys])
59
inv_to_str = self.repo._serializer.write_inventory_to_string
60
for inv in self.repo.iter_inventories(needed_ids):
61
revision_id = inv.revision_id
63
if key in self.present_parents:
64
# Not a key we will transmit, which is a shame, since because
65
# of that bundles don't work with stacked branches
68
parent_ids = [k[-1] for k in self.parent_map[key]]
69
as_bytes = inv_to_str(inv)
70
self._process_one_record(key, (as_bytes,))
71
if parent_ids is None:
73
diff = self.diffs.pop(key)
74
sha1 = osutils.sha_string(as_bytes)
75
yield revision_id, parent_ids, sha1, diff
36
78
class BundleWriter(object):
281
345
self.bundle.add_info_record(serializer=serializer_format,
282
346
supports_rich_root=supports_rich_root)
284
def iter_file_revisions(self):
285
"""Iterate through all relevant revisions of all files.
287
This is the correct implementation, but is not compatible with bzr.dev,
288
because certain old revisions were not converted correctly, and have
289
the wrong "revision" marker in inventories.
291
transaction = self.repository.get_transaction()
292
altered = self.repository.fileids_altered_by_revision_ids(
294
for file_id, file_revision_ids in altered.iteritems():
295
vf = self.repository.weave_store.get_weave(file_id, transaction)
296
yield vf, file_id, file_revision_ids
298
def iter_file_revisions_aggressive(self):
299
"""Iterate through all relevant revisions of all files.
301
This uses the standard iter_file_revisions to determine what revisions
302
are referred to by inventories, but then uses the versionedfile to
303
determine what the build-dependencies of each required revision.
305
All build dependencies which are not ancestors of the base revision
308
for vf, file_id, file_revision_ids in self.iter_file_revisions():
309
new_revision_ids = set()
310
pending = list(file_revision_ids)
311
while len(pending) > 0:
312
revision_id = pending.pop()
313
if revision_id in new_revision_ids:
315
if revision_id in self.base_ancestry:
317
new_revision_ids.add(revision_id)
318
pending.extend(vf.get_parents(revision_id))
319
yield vf, file_id, new_revision_ids
321
348
def write_files(self):
322
349
"""Write bundle records for all revisions of all files"""
323
for vf, file_id, revision_ids in self.iter_file_revisions_aggressive():
324
self.add_mp_records('file', file_id, vf, revision_ids)
351
altered_fileids = self.repository.fileids_altered_by_revision_ids(
353
for file_id, revision_ids in altered_fileids.iteritems():
354
for revision_id in revision_ids:
355
text_keys.append((file_id, revision_id))
356
self._add_mp_records_keys('file', self.repository.texts, text_keys)
326
358
def write_revisions(self):
327
359
"""Write bundle records for all revisions and signatures"""
328
inv_vf = self.repository.get_inventory_weave()
329
revision_order = list(multiparent.topo_iter(inv_vf, self.revision_ids))
360
inv_vf = self.repository.inventories
361
topological_order = [key[-1] for key in multiparent.topo_iter_keys(
362
inv_vf, self.revision_keys)]
363
revision_order = topological_order
330
364
if self.target is not None and self.target in self.revision_ids:
365
# Make sure the target revision is always the last entry
366
revision_order = list(topological_order)
331
367
revision_order.remove(self.target)
332
368
revision_order.append(self.target)
333
self.add_mp_records('inventory', None, inv_vf, revision_order)
334
parents_list = self.repository.get_parents(revision_order)
335
for parents, revision_id in zip(parents_list, revision_order):
336
revision_text = self.repository.get_revision_xml(revision_id)
369
if self.repository._serializer.support_altered_by_hack:
370
# Repositories that support_altered_by_hack means that
371
# inventories.make_mpdiffs() contains all the data about the tree
372
# shape. Formats without support_altered_by_hack require
373
# chk_bytes/etc, so we use a different code path.
374
self._add_mp_records_keys('inventory', inv_vf,
375
[(revid,) for revid in topological_order])
377
# Inventories should always be added in pure-topological order, so
378
# that we can apply the mpdiff for the child to the parent texts.
379
self._add_inventory_mpdiffs_from_serializer(topological_order)
380
self._add_revision_texts(revision_order)
382
def _add_inventory_mpdiffs_from_serializer(self, revision_order):
383
"""Generate mpdiffs by serializing inventories.
385
The current repository only has part of the tree shape information in
386
the 'inventories' vf. So we use serializer.write_inventory_to_string to
387
get a 'full' representation of the tree shape, and then generate
388
mpdiffs on that data stream. This stream can then be reconstructed on
391
inventory_key_order = [(r,) for r in revision_order]
392
generator = _MPDiffInventoryGenerator(self.repository,
394
for revision_id, parent_ids, sha1, diff in generator.iter_diffs():
395
text = ''.join(diff.to_patch())
396
self.bundle.add_multiparent_record(text, sha1, parent_ids,
397
'inventory', revision_id, None)
399
def _add_revision_texts(self, revision_order):
400
parent_map = self.repository.get_parent_map(revision_order)
401
revision_to_str = self.repository._serializer.write_revision_to_string
402
revisions = self.repository.get_revisions(revision_order)
403
for revision in revisions:
404
revision_id = revision.revision_id
405
parents = parent_map.get(revision_id, None)
406
revision_text = revision_to_str(revision)
337
407
self.bundle.add_fulltext_record(revision_text, parents,
338
408
'revision', revision_id)
358
428
base = parents[0]
359
429
return base, target
361
def add_mp_records(self, repo_kind, file_id, vf, revision_ids):
431
def _add_mp_records_keys(self, repo_kind, vf, keys):
362
432
"""Add multi-parent diff records to a bundle"""
363
revision_ids = list(multiparent.topo_iter(vf, revision_ids))
364
mpdiffs = vf.make_mpdiffs(revision_ids)
365
sha1s = vf.get_sha1s(revision_ids)
366
for mpdiff, revision_id, sha1, in zip(mpdiffs, revision_ids, sha1s):
367
parents = vf.get_parents(revision_id)
433
ordered_keys = list(multiparent.topo_iter_keys(vf, keys))
434
mpdiffs = vf.make_mpdiffs(ordered_keys)
435
sha1s = vf.get_sha1s(ordered_keys)
436
parent_map = vf.get_parent_map(ordered_keys)
437
for mpdiff, item_key, in zip(mpdiffs, ordered_keys):
438
sha1 = sha1s[item_key]
439
parents = [key[-1] for key in parent_map[item_key]]
368
440
text = ''.join(mpdiff.to_patch())
441
# Infer file id records as appropriate.
442
if len(item_key) == 2:
443
file_id = item_key[0]
369
446
self.bundle.add_multiparent_record(text, sha1, parents, repo_kind,
370
revision_id, file_id)
447
item_key[-1], file_id)
373
450
class BundleInfoV4(object):
444
533
self._info = None
446
535
def install(self):
447
"""Perform the installation"""
536
"""Perform the installation.
538
Must be called with the Repository locked.
540
self._repository.start_write_group()
542
result = self._install_in_write_group()
544
self._repository.abort_write_group()
546
self._repository.commit_write_group()
549
def _install_in_write_group(self):
448
550
current_file = None
449
551
current_versionedfile = None
450
552
pending_file_records = []
554
pending_inventory_records = []
451
555
added_inv = set()
452
556
target_revision = None
453
557
for bytes, metadata, repo_kind, revision_id, file_id in\
454
558
self._container.iter_records():
455
559
if repo_kind == 'info':
456
assert self._info is None
560
if self._info is not None:
561
raise AssertionError()
457
562
self._handle_info(metadata)
458
if repo_kind != 'file':
459
self._install_mp_records(current_versionedfile,
563
if (pending_file_records and
564
(repo_kind, file_id) != ('file', current_file)):
565
# Flush the data for a single file - prevents memory
566
# spiking due to buffering all files in memory.
567
self._install_mp_records_keys(self._repository.texts,
460
568
pending_file_records)
461
569
current_file = None
462
current_versionedfile = None
463
pending_file_records = []
464
if repo_kind == 'inventory':
465
self._install_inventory(revision_id, metadata, bytes)
466
if repo_kind == 'revision':
467
target_revision = revision_id
468
self._install_revision(revision_id, metadata, bytes)
469
if repo_kind == 'signature':
470
self._install_signature(revision_id, metadata, bytes)
570
del pending_file_records[:]
571
if len(pending_inventory_records) > 0 and repo_kind != 'inventory':
572
self._install_inventory_records(pending_inventory_records)
573
pending_inventory_records = []
574
if repo_kind == 'inventory':
575
pending_inventory_records.append(((revision_id,), metadata, bytes))
576
if repo_kind == 'revision':
577
target_revision = revision_id
578
self._install_revision(revision_id, metadata, bytes)
579
if repo_kind == 'signature':
580
self._install_signature(revision_id, metadata, bytes)
471
581
if repo_kind == 'file':
472
if file_id != current_file:
473
self._install_mp_records(current_versionedfile,
474
pending_file_records)
475
current_file = file_id
476
current_versionedfile = \
477
self._repository.weave_store.get_weave_or_empty(
478
file_id, self._repository.get_transaction())
479
pending_file_records = []
480
if revision_id in current_versionedfile:
482
pending_file_records.append((revision_id, metadata, bytes))
483
self._install_mp_records(current_versionedfile, pending_file_records)
582
current_file = file_id
583
pending_file_records.append(((file_id, revision_id), metadata, bytes))
584
self._install_mp_records_keys(self._repository.texts, pending_file_records)
484
585
return target_revision
486
587
def _handle_info(self, info):
501
602
records if r not in versionedfile]
502
603
versionedfile.add_mpdiffs(vf_records)
504
def _install_inventory(self, revision_id, metadata, text):
505
vf = self._repository.get_inventory_weave()
506
if revision_id in vf:
508
parent_ids = metadata['parents']
509
if self._info['serializer'] == self._repository._serializer.format_num:
510
return self._install_mp_records(vf, [(revision_id, metadata,
512
parents = [self._repository.get_inventory(p)
514
parent_texts = [self._source_serializer.write_inventory_to_string(p)
516
target_lines = multiparent.MultiParent.from_patch(text).to_lines(
518
sha1 = osutils.sha_strings(target_lines)
519
if sha1 != metadata['sha1']:
520
raise errors.BadBundle("Can't convert to target format")
521
target_inv = self._source_serializer.read_inventory_from_string(
522
''.join(target_lines))
523
self._handle_root(target_inv, parent_ids)
605
def _install_mp_records_keys(self, versionedfile, records):
606
d_func = multiparent.MultiParent.from_patch
608
for key, meta, text in records:
609
# Adapt to tuple interface: A length two key is a file_id,
610
# revision_id pair, a length 1 key is a
611
# revision/signature/inventory. We need to do this because
612
# the metadata extraction from the bundle has not yet been updated
613
# to use the consistent tuple interface itself.
618
parents = [prefix + (parent,) for parent in meta['parents']]
619
vf_records.append((key, parents, meta['sha1'], d_func(text)))
620
versionedfile.add_mpdiffs(vf_records)
622
def _get_parent_inventory_texts(self, inventory_text_cache,
623
inventory_cache, parent_ids):
624
cached_parent_texts = {}
625
remaining_parent_ids = []
626
for parent_id in parent_ids:
627
p_text = inventory_text_cache.get(parent_id, None)
629
remaining_parent_ids.append(parent_id)
631
cached_parent_texts[parent_id] = p_text
633
# TODO: Use inventory_cache to grab inventories we already have in
635
if remaining_parent_ids:
636
# first determine what keys are actually present in the local
637
# inventories object (don't use revisions as they haven't been
639
parent_keys = [(r,) for r in remaining_parent_ids]
640
present_parent_map = self._repository.inventories.get_parent_map(
642
present_parent_ids = []
644
for p_id in remaining_parent_ids:
645
if (p_id,) in present_parent_map:
646
present_parent_ids.append(p_id)
649
to_string = self._source_serializer.write_inventory_to_string
650
for parent_inv in self._repository.iter_inventories(
652
p_text = to_string(parent_inv)
653
inventory_cache[parent_inv.revision_id] = parent_inv
654
cached_parent_texts[parent_inv.revision_id] = p_text
655
inventory_text_cache[parent_inv.revision_id] = p_text
657
parent_texts = [cached_parent_texts[parent_id]
658
for parent_id in parent_ids
659
if parent_id not in ghosts]
662
def _install_inventory_records(self, records):
663
if (self._info['serializer'] == self._repository._serializer.format_num
664
and self._repository._serializer.support_altered_by_hack):
665
return self._install_mp_records_keys(self._repository.inventories,
667
# Use a 10MB text cache, since these are string xml inventories. Note
668
# that 10MB is fairly small for large projects (a single inventory can
669
# be >5MB). Another possibility is to cache 10-20 inventory texts
671
inventory_text_cache = lru_cache.LRUSizeCache(10*1024*1024)
672
# Also cache the in-memory representation. This allows us to create
673
# inventory deltas to apply rather than calling add_inventory from
675
inventory_cache = lru_cache.LRUCache(10)
676
pb = ui.ui_factory.nested_progress_bar()
525
self._repository.add_inventory(revision_id, target_inv, parent_ids)
526
except errors.UnsupportedInventoryKind:
527
raise errors.IncompatibleRevision(repr(self._repository))
678
num_records = len(records)
679
for idx, (key, metadata, bytes) in enumerate(records):
680
pb.update('installing inventory', idx, num_records)
681
revision_id = key[-1]
682
parent_ids = metadata['parents']
683
# Note: This assumes the local ghosts are identical to the
684
# ghosts in the source, as the Bundle serialization
685
# format doesn't record ghosts.
686
p_texts = self._get_parent_inventory_texts(inventory_text_cache,
689
# Why does to_lines() take strings as the source, it seems that
690
# it would have to cast to a list of lines, which we get back
691
# as lines and then cast back to a string.
692
target_lines = multiparent.MultiParent.from_patch(bytes
694
inv_text = ''.join(target_lines)
696
sha1 = osutils.sha_string(inv_text)
697
if sha1 != metadata['sha1']:
698
raise errors.BadBundle("Can't convert to target format")
699
# Add this to the cache so we don't have to extract it again.
700
inventory_text_cache[revision_id] = inv_text
701
target_inv = self._source_serializer.read_inventory_from_string(
703
self._handle_root(target_inv, parent_ids)
706
parent_inv = inventory_cache.get(parent_ids[0], None)
708
if parent_inv is None:
709
self._repository.add_inventory(revision_id, target_inv,
712
delta = target_inv._make_delta(parent_inv)
713
self._repository.add_inventory_by_delta(parent_ids[0],
714
delta, revision_id, parent_ids)
715
except errors.UnsupportedInventoryKind:
716
raise errors.IncompatibleRevision(repr(self._repository))
717
inventory_cache[revision_id] = target_inv
529
721
def _handle_root(self, target_inv, parent_ids):
530
722
revision_id = target_inv.revision_id
531
723
if self.update_root:
532
target_inv.root.revision = revision_id
533
store = self._repository.weave_store
534
transaction = self._repository.get_transaction()
535
vf = store.get_weave_or_empty(target_inv.root.file_id, transaction)
536
vf.add_lines(revision_id, parent_ids, [])
724
text_key = (target_inv.root.file_id, revision_id)
725
parent_keys = [(target_inv.root.file_id, parent) for
726
parent in parent_ids]
727
self._repository.texts.add_lines(text_key, parent_keys, [])
537
728
elif not self._repository.supports_rich_root():
538
729
if target_inv.root.revision != revision_id:
539
730
raise errors.IncompatibleRevision(repr(self._repository))
542
732
def _install_revision(self, revision_id, metadata, text):
543
733
if self._repository.has_revision(revision_id):
545
self._repository._add_revision_text(revision_id, text)
735
revision = self._source_serializer.read_revision_from_string(text)
736
self._repository.add_revision(revision.revision_id, revision)
547
738
def _install_signature(self, revision_id, metadata, text):
548
739
transaction = self._repository.get_transaction()
549
if self._repository._revision_store.has_signature(revision_id,
740
if self._repository.has_signature_for_revision_id(revision_id):
552
self._repository._revision_store.add_revision_signature_text(
553
revision_id, text, transaction)
742
self._repository.add_signature_text(revision_id, text)