13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
17
from cStringIO import StringIO
21
21
from bzrlib import (
28
28
revision as _mod_revision,
32
versionedfile as _mod_versionedfile,
32
from bzrlib.bundle import bundle_data, serializer
33
from bzrlib.util import bencode
34
from bzrlib.bundle import bundle_data, serializer as bundle_serializer
35
from bzrlib.i18n import ngettext
36
from bzrlib import bencode
39
class _MPDiffInventoryGenerator(_mod_versionedfile._MPDiffGenerator):
40
"""Generate Inventory diffs serialized inventories."""
42
def __init__(self, repo, inventory_keys):
43
super(_MPDiffInventoryGenerator, self).__init__(repo.inventories,
49
"""Compute the diffs one at a time."""
50
# This is instead of compute_diffs() since we guarantee our ordering of
51
# inventories, we don't have to do any buffering
52
self._find_needed_keys()
53
# We actually use a slightly different ordering. We grab all of the
54
# parents first, and then grab the ordered requests.
55
needed_ids = [k[-1] for k in self.present_parents]
56
needed_ids.extend([k[-1] for k in self.ordered_keys])
57
inv_to_str = self.repo._serializer.write_inventory_to_string
58
for inv in self.repo.iter_inventories(needed_ids):
59
revision_id = inv.revision_id
61
if key in self.present_parents:
62
# Not a key we will transmit, which is a shame, since because
63
# of that bundles don't work with stacked branches
66
parent_ids = [k[-1] for k in self.parent_map[key]]
67
as_bytes = inv_to_str(inv)
68
self._process_one_record(key, (as_bytes,))
69
if parent_ids is None:
71
diff = self.diffs.pop(key)
72
sha1 = osutils.sha_string(as_bytes)
73
yield revision_id, parent_ids, sha1, diff
36
76
class BundleWriter(object):
293
343
self.bundle.add_info_record(serializer=serializer_format,
294
344
supports_rich_root=supports_rich_root)
296
def iter_file_revisions(self):
297
"""Iterate through all relevant revisions of all files.
299
This is the correct implementation, but is not compatible with bzr.dev,
300
because certain old revisions were not converted correctly, and have
301
the wrong "revision" marker in inventories.
303
transaction = self.repository.get_transaction()
304
altered = self.repository.fileids_altered_by_revision_ids(
306
for file_id, file_revision_ids in altered.iteritems():
307
vf = self.repository.weave_store.get_weave(file_id, transaction)
308
yield vf, file_id, file_revision_ids
310
def iter_file_revisions_aggressive(self):
311
"""Iterate through all relevant revisions of all files.
313
This uses the standard iter_file_revisions to determine what revisions
314
are referred to by inventories, but then uses the versionedfile to
315
determine what the build-dependencies of each required revision.
317
All build dependencies which are not ancestors of the base revision
320
for vf, file_id, file_revision_ids in self.iter_file_revisions():
321
new_revision_ids = set()
322
pending = list(file_revision_ids)
323
while len(pending) > 0:
324
revision_id = pending.pop()
325
if revision_id in new_revision_ids:
327
if revision_id in self.base_ancestry:
329
new_revision_ids.add(revision_id)
330
pending.extend(vf.get_parents(revision_id))
331
yield vf, file_id, new_revision_ids
333
346
def write_files(self):
334
347
"""Write bundle records for all revisions of all files"""
335
for vf, file_id, revision_ids in self.iter_file_revisions():
336
self.add_mp_records('file', file_id, vf, revision_ids)
349
altered_fileids = self.repository.fileids_altered_by_revision_ids(
351
for file_id, revision_ids in altered_fileids.iteritems():
352
for revision_id in revision_ids:
353
text_keys.append((file_id, revision_id))
354
self._add_mp_records_keys('file', self.repository.texts, text_keys)
338
356
def write_revisions(self):
339
357
"""Write bundle records for all revisions and signatures"""
340
inv_vf = self.repository.get_inventory_weave()
341
revision_order = list(multiparent.topo_iter(inv_vf, self.revision_ids))
358
inv_vf = self.repository.inventories
359
topological_order = [key[-1] for key in multiparent.topo_iter_keys(
360
inv_vf, self.revision_keys)]
361
revision_order = topological_order
342
362
if self.target is not None and self.target in self.revision_ids:
363
# Make sure the target revision is always the last entry
364
revision_order = list(topological_order)
343
365
revision_order.remove(self.target)
344
366
revision_order.append(self.target)
345
self.add_mp_records('inventory', None, inv_vf, revision_order)
367
if self.repository._serializer.support_altered_by_hack:
368
# Repositories that support_altered_by_hack means that
369
# inventories.make_mpdiffs() contains all the data about the tree
370
# shape. Formats without support_altered_by_hack require
371
# chk_bytes/etc, so we use a different code path.
372
self._add_mp_records_keys('inventory', inv_vf,
373
[(revid,) for revid in topological_order])
375
# Inventories should always be added in pure-topological order, so
376
# that we can apply the mpdiff for the child to the parent texts.
377
self._add_inventory_mpdiffs_from_serializer(topological_order)
378
self._add_revision_texts(revision_order)
380
def _add_inventory_mpdiffs_from_serializer(self, revision_order):
381
"""Generate mpdiffs by serializing inventories.
383
The current repository only has part of the tree shape information in
384
the 'inventories' vf. So we use serializer.write_inventory_to_string to
385
get a 'full' representation of the tree shape, and then generate
386
mpdiffs on that data stream. This stream can then be reconstructed on
389
inventory_key_order = [(r,) for r in revision_order]
390
generator = _MPDiffInventoryGenerator(self.repository,
392
for revision_id, parent_ids, sha1, diff in generator.iter_diffs():
393
text = ''.join(diff.to_patch())
394
self.bundle.add_multiparent_record(text, sha1, parent_ids,
395
'inventory', revision_id, None)
397
def _add_revision_texts(self, revision_order):
346
398
parent_map = self.repository.get_parent_map(revision_order)
347
for revision_id in revision_order:
399
revision_to_str = self.repository._serializer.write_revision_to_string
400
revisions = self.repository.get_revisions(revision_order)
401
for revision in revisions:
402
revision_id = revision.revision_id
348
403
parents = parent_map.get(revision_id, None)
349
revision_text = self.repository.get_revision_xml(revision_id)
404
revision_text = revision_to_str(revision)
350
405
self.bundle.add_fulltext_record(revision_text, parents,
351
406
'revision', revision_id)
371
426
base = parents[0]
372
427
return base, target
374
def add_mp_records(self, repo_kind, file_id, vf, revision_ids):
429
def _add_mp_records_keys(self, repo_kind, vf, keys):
375
430
"""Add multi-parent diff records to a bundle"""
376
revision_ids = list(multiparent.topo_iter(vf, revision_ids))
377
mpdiffs = vf.make_mpdiffs(revision_ids)
378
sha1s = vf.get_sha1s(revision_ids)
379
for mpdiff, revision_id, sha1, in zip(mpdiffs, revision_ids, sha1s):
380
parents = vf.get_parents(revision_id)
431
ordered_keys = list(multiparent.topo_iter_keys(vf, keys))
432
mpdiffs = vf.make_mpdiffs(ordered_keys)
433
sha1s = vf.get_sha1s(ordered_keys)
434
parent_map = vf.get_parent_map(ordered_keys)
435
for mpdiff, item_key, in zip(mpdiffs, ordered_keys):
436
sha1 = sha1s[item_key]
437
parents = [key[-1] for key in parent_map[item_key]]
381
438
text = ''.join(mpdiff.to_patch())
439
# Infer file id records as appropriate.
440
if len(item_key) == 2:
441
file_id = item_key[0]
382
444
self.bundle.add_multiparent_record(text, sha1, parents, repo_kind,
383
revision_id, file_id)
445
item_key[-1], file_id)
386
448
class BundleInfoV4(object):
493
555
for bytes, metadata, repo_kind, revision_id, file_id in\
494
556
self._container.iter_records():
495
557
if repo_kind == 'info':
496
assert self._info is None
558
if self._info is not None:
559
raise AssertionError()
497
560
self._handle_info(metadata)
498
if (repo_kind, file_id) != ('file', current_file):
499
if len(pending_file_records) > 0:
500
self._install_mp_records(current_versionedfile,
501
pending_file_records)
561
if (pending_file_records and
562
(repo_kind, file_id) != ('file', current_file)):
563
# Flush the data for a single file - prevents memory
564
# spiking due to buffering all files in memory.
565
self._install_mp_records_keys(self._repository.texts,
566
pending_file_records)
502
567
current_file = None
503
current_versionedfile = None
504
pending_file_records = []
568
del pending_file_records[:]
505
569
if len(pending_inventory_records) > 0 and repo_kind != 'inventory':
506
self._install_inventory_records(inventory_vf,
507
pending_inventory_records)
570
self._install_inventory_records(pending_inventory_records)
508
571
pending_inventory_records = []
509
572
if repo_kind == 'inventory':
510
if inventory_vf is None:
511
inventory_vf = self._repository.get_inventory_weave()
512
if revision_id not in inventory_vf:
513
pending_inventory_records.append((revision_id, metadata,
573
pending_inventory_records.append(((revision_id,), metadata, bytes))
515
574
if repo_kind == 'revision':
516
575
target_revision = revision_id
517
576
self._install_revision(revision_id, metadata, bytes)
548
600
records if r not in versionedfile]
549
601
versionedfile.add_mpdiffs(vf_records)
551
def _install_inventory_records(self, vf, records):
552
if self._info['serializer'] == self._repository._serializer.format_num:
553
return self._install_mp_records(vf, records)
554
for revision_id, metadata, bytes in records:
555
parent_ids = metadata['parents']
556
parents = [self._repository.get_inventory(p)
558
p_texts = [self._source_serializer.write_inventory_to_string(p)
560
target_lines = multiparent.MultiParent.from_patch(bytes).to_lines(
562
sha1 = osutils.sha_strings(target_lines)
563
if sha1 != metadata['sha1']:
564
raise errors.BadBundle("Can't convert to target format")
565
target_inv = self._source_serializer.read_inventory_from_string(
566
''.join(target_lines))
567
self._handle_root(target_inv, parent_ids)
569
self._repository.add_inventory(revision_id, target_inv,
571
except errors.UnsupportedInventoryKind:
572
raise errors.IncompatibleRevision(repr(self._repository))
603
def _install_mp_records_keys(self, versionedfile, records):
604
d_func = multiparent.MultiParent.from_patch
606
for key, meta, text in records:
607
# Adapt to tuple interface: A length two key is a file_id,
608
# revision_id pair, a length 1 key is a
609
# revision/signature/inventory. We need to do this because
610
# the metadata extraction from the bundle has not yet been updated
611
# to use the consistent tuple interface itself.
616
parents = [prefix + (parent,) for parent in meta['parents']]
617
vf_records.append((key, parents, meta['sha1'], d_func(text)))
618
versionedfile.add_mpdiffs(vf_records)
620
def _get_parent_inventory_texts(self, inventory_text_cache,
621
inventory_cache, parent_ids):
622
cached_parent_texts = {}
623
remaining_parent_ids = []
624
for parent_id in parent_ids:
625
p_text = inventory_text_cache.get(parent_id, None)
627
remaining_parent_ids.append(parent_id)
629
cached_parent_texts[parent_id] = p_text
631
# TODO: Use inventory_cache to grab inventories we already have in
633
if remaining_parent_ids:
634
# first determine what keys are actually present in the local
635
# inventories object (don't use revisions as they haven't been
637
parent_keys = [(r,) for r in remaining_parent_ids]
638
present_parent_map = self._repository.inventories.get_parent_map(
640
present_parent_ids = []
642
for p_id in remaining_parent_ids:
643
if (p_id,) in present_parent_map:
644
present_parent_ids.append(p_id)
647
to_string = self._source_serializer.write_inventory_to_string
648
for parent_inv in self._repository.iter_inventories(
650
p_text = to_string(parent_inv)
651
inventory_cache[parent_inv.revision_id] = parent_inv
652
cached_parent_texts[parent_inv.revision_id] = p_text
653
inventory_text_cache[parent_inv.revision_id] = p_text
655
parent_texts = [cached_parent_texts[parent_id]
656
for parent_id in parent_ids
657
if parent_id not in ghosts]
660
def _install_inventory_records(self, records):
661
if (self._info['serializer'] == self._repository._serializer.format_num
662
and self._repository._serializer.support_altered_by_hack):
663
return self._install_mp_records_keys(self._repository.inventories,
665
# Use a 10MB text cache, since these are string xml inventories. Note
666
# that 10MB is fairly small for large projects (a single inventory can
667
# be >5MB). Another possibility is to cache 10-20 inventory texts
669
inventory_text_cache = lru_cache.LRUSizeCache(10*1024*1024)
670
# Also cache the in-memory representation. This allows us to create
671
# inventory deltas to apply rather than calling add_inventory from
673
inventory_cache = lru_cache.LRUCache(10)
674
pb = ui.ui_factory.nested_progress_bar()
676
num_records = len(records)
677
for idx, (key, metadata, bytes) in enumerate(records):
678
pb.update('installing inventory', idx, num_records)
679
revision_id = key[-1]
680
parent_ids = metadata['parents']
681
# Note: This assumes the local ghosts are identical to the
682
# ghosts in the source, as the Bundle serialization
683
# format doesn't record ghosts.
684
p_texts = self._get_parent_inventory_texts(inventory_text_cache,
687
# Why does to_lines() take strings as the source, it seems that
688
# it would have to cast to a list of lines, which we get back
689
# as lines and then cast back to a string.
690
target_lines = multiparent.MultiParent.from_patch(bytes
692
inv_text = ''.join(target_lines)
694
sha1 = osutils.sha_string(inv_text)
695
if sha1 != metadata['sha1']:
696
raise errors.BadBundle("Can't convert to target format")
697
# Add this to the cache so we don't have to extract it again.
698
inventory_text_cache[revision_id] = inv_text
699
target_inv = self._source_serializer.read_inventory_from_string(
701
self._handle_root(target_inv, parent_ids)
704
parent_inv = inventory_cache.get(parent_ids[0], None)
706
if parent_inv is None:
707
self._repository.add_inventory(revision_id, target_inv,
710
delta = target_inv._make_delta(parent_inv)
711
self._repository.add_inventory_by_delta(parent_ids[0],
712
delta, revision_id, parent_ids)
713
except errors.UnsupportedInventoryKind:
714
raise errors.IncompatibleRevision(repr(self._repository))
715
inventory_cache[revision_id] = target_inv
574
719
def _handle_root(self, target_inv, parent_ids):
575
720
revision_id = target_inv.revision_id
576
721
if self.update_root:
577
target_inv.root.revision = revision_id
578
store = self._repository.weave_store
579
transaction = self._repository.get_transaction()
580
vf = store.get_weave_or_empty(target_inv.root.file_id, transaction)
581
vf.add_lines(revision_id, parent_ids, [])
722
text_key = (target_inv.root.file_id, revision_id)
723
parent_keys = [(target_inv.root.file_id, parent) for
724
parent in parent_ids]
725
self._repository.texts.add_lines(text_key, parent_keys, [])
582
726
elif not self._repository.supports_rich_root():
583
727
if target_inv.root.revision != revision_id:
584
728
raise errors.IncompatibleRevision(repr(self._repository))
587
730
def _install_revision(self, revision_id, metadata, text):
588
731
if self._repository.has_revision(revision_id):
590
self._repository._add_revision_text(revision_id, text)
733
revision = self._source_serializer.read_revision_from_string(text)
734
self._repository.add_revision(revision.revision_id, revision)
592
736
def _install_signature(self, revision_id, metadata, text):
593
737
transaction = self._repository.get_transaction()
594
if self._repository._revision_store.has_signature(revision_id,
738
if self._repository.has_signature_for_revision_id(revision_id):
597
self._repository._revision_store.add_revision_signature_text(
598
revision_id, text, transaction)
740
self._repository.add_signature_text(revision_id, text)