1
# Copyright (C) 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
from cStringIO import StringIO
29
revision as _mod_revision,
34
from bzrlib.bundle import bundle_data, serializer as bundle_serializer
35
from bzrlib import bencode
38
class BundleWriter(object):
39
"""Writer for bundle-format files.
41
This serves roughly the same purpose as ContainerReader, but acts as a
44
Provides ways of writing the specific record types supported this bundle
48
def __init__(self, fileobj):
49
self._container = pack.ContainerWriter(self._write_encoded)
50
self._fileobj = fileobj
51
self._compressor = bz2.BZ2Compressor()
53
def _write_encoded(self, bytes):
54
"""Write bzip2-encoded bytes to the file"""
55
self._fileobj.write(self._compressor.compress(bytes))
58
"""Start writing the bundle"""
59
self._fileobj.write(bundle_serializer._get_bundle_header(
60
bundle_serializer.v4_string))
61
self._fileobj.write('#\n')
62
self._container.begin()
65
"""Finish writing the bundle"""
67
self._fileobj.write(self._compressor.flush())
69
def add_multiparent_record(self, mp_bytes, sha1, parents, repo_kind,
70
revision_id, file_id):
71
"""Add a record for a multi-parent diff
73
:mp_bytes: A multi-parent diff, as a bytestring
74
:sha1: The sha1 hash of the fulltext
75
:parents: a list of revision-ids of the parents
76
:repo_kind: The kind of object in the repository. May be 'file' or
78
:revision_id: The revision id of the mpdiff being added.
79
:file_id: The file-id of the file, or None for inventories.
81
metadata = {'parents': parents,
82
'storage_kind': 'mpdiff',
84
self._add_record(mp_bytes, metadata, repo_kind, revision_id, file_id)
86
def add_fulltext_record(self, bytes, parents, repo_kind, revision_id):
87
"""Add a record for a fulltext
89
:bytes: The fulltext, as a bytestring
90
:parents: a list of revision-ids of the parents
91
:repo_kind: The kind of object in the repository. May be 'revision' or
93
:revision_id: The revision id of the fulltext being added.
95
metadata = {'parents': parents,
96
'storage_kind': 'mpdiff'}
97
self._add_record(bytes, {'parents': parents,
98
'storage_kind': 'fulltext'}, repo_kind, revision_id, None)
100
def add_info_record(self, **kwargs):
101
"""Add an info record to the bundle
103
Any parameters may be supplied, except 'self' and 'storage_kind'.
104
Values must be lists, strings, integers, dicts, or a combination.
106
kwargs['storage_kind'] = 'header'
107
self._add_record(None, kwargs, 'info', None, None)
110
def encode_name(content_kind, revision_id, file_id=None):
111
"""Encode semantic ids as a container name"""
112
if content_kind not in ('revision', 'file', 'inventory', 'signature',
114
raise ValueError(content_kind)
115
if content_kind == 'file':
117
raise AssertionError()
119
if file_id is not None:
120
raise AssertionError()
121
if content_kind == 'info':
122
if revision_id is not None:
123
raise AssertionError()
124
elif revision_id is None:
125
raise AssertionError()
126
names = [n.replace('/', '//') for n in
127
(content_kind, revision_id, file_id) if n is not None]
128
return '/'.join(names)
130
def _add_record(self, bytes, metadata, repo_kind, revision_id, file_id):
131
"""Add a bundle record to the container.
133
Most bundle records are recorded as header/body pairs, with the
134
body being nameless. Records with storage_kind 'header' have no
137
name = self.encode_name(repo_kind, revision_id, file_id)
138
encoded_metadata = bencode.bencode(metadata)
139
self._container.add_bytes_record(encoded_metadata, [(name, )])
140
if metadata['storage_kind'] != 'header':
141
self._container.add_bytes_record(bytes, [])
144
class BundleReader(object):
145
"""Reader for bundle-format files.
147
This serves roughly the same purpose as ContainerReader, but acts as a
148
layer on top of it, providing metadata, a semantic name, and a record
152
def __init__(self, fileobj, stream_input=True):
155
:param fileobj: a file containing a bzip-encoded container
156
:param stream_input: If True, the BundleReader stream input rather than
157
reading it all into memory at once. Reading it into memory all at
158
once is (currently) faster.
160
line = fileobj.readline()
163
self.patch_lines = []
165
source_file = iterablefile.IterableFile(self.iter_decode(fileobj))
167
source_file = StringIO(bz2.decompress(fileobj.read()))
168
self._container_file = source_file
171
def iter_decode(fileobj):
172
"""Iterate through decoded fragments of the file"""
173
decompressor = bz2.BZ2Decompressor()
176
yield decompressor.decompress(line)
181
def decode_name(name):
182
"""Decode a name from its container form into a semantic form
184
:retval: content_kind, revision_id, file_id
186
segments = re.split('(//?)', name)
188
for segment in segments:
195
content_kind = names[0]
199
revision_id = names[1]
202
return content_kind, revision_id, file_id
204
def iter_records(self):
205
"""Iterate through bundle records
207
:return: a generator of (bytes, metadata, content_kind, revision_id,
210
iterator = pack.iter_records_from_file(self._container_file)
211
for names, bytes in iterator:
213
raise errors.BadBundle('Record has %d names instead of 1'
215
metadata = bencode.bdecode(bytes)
216
if metadata['storage_kind'] == 'header':
219
_unused, bytes = iterator.next()
220
yield (bytes, metadata) + self.decode_name(names[0][0])
223
class BundleSerializerV4(bundle_serializer.BundleSerializer):
224
"""Implement the high-level bundle interface"""
226
def write(self, repository, revision_ids, forced_bases, fileobj):
227
"""Write a bundle to a file-like object
229
For backwards-compatibility only
231
write_op = BundleWriteOperation.from_old_args(repository, revision_ids,
232
forced_bases, fileobj)
233
return write_op.do_write()
235
def write_bundle(self, repository, target, base, fileobj):
236
"""Write a bundle to a file object
238
:param repository: The repository to retrieve revision data from
239
:param target: The head revision to include ancestors of
240
:param base: The ancestor of the target to stop including acestors
242
:param fileobj: The file-like object to write to
244
write_op = BundleWriteOperation(base, target, repository, fileobj)
245
return write_op.do_write()
247
def read(self, file):
248
"""return a reader object for a given file"""
249
bundle = BundleInfoV4(file, self)
253
def get_source_serializer(info):
254
"""Retrieve the serializer for a given info object"""
255
return serializer.format_registry.get(info['serializer'])
258
class BundleWriteOperation(object):
259
"""Perform the operation of writing revisions to a bundle"""
262
def from_old_args(cls, repository, revision_ids, forced_bases, fileobj):
263
"""Create a BundleWriteOperation from old-style arguments"""
264
base, target = cls.get_base_target(revision_ids, forced_bases,
266
return BundleWriteOperation(base, target, repository, fileobj,
269
def __init__(self, base, target, repository, fileobj, revision_ids=None):
272
self.repository = repository
273
bundle = BundleWriter(fileobj)
275
if revision_ids is not None:
276
self.revision_ids = revision_ids
278
graph = repository.get_graph()
279
revision_ids = graph.find_unique_ancestors(target, [base])
281
parents = graph.get_parent_map(revision_ids)
282
self.revision_ids = [r for r in revision_ids if r in parents]
283
self.revision_keys = set([(revid,) for revid in self.revision_ids])
286
"""Write all data to the bundle"""
287
trace.note('Bundling %d revision(s).', len(self.revision_ids))
288
self.repository.lock_read()
293
self.write_revisions()
296
self.repository.unlock()
297
return self.revision_ids
299
def write_info(self):
300
"""Write format info"""
301
serializer_format = self.repository.get_serializer_format()
302
supports_rich_root = {True: 1, False: 0}[
303
self.repository.supports_rich_root()]
304
self.bundle.add_info_record(serializer=serializer_format,
305
supports_rich_root=supports_rich_root)
307
def write_files(self):
308
"""Write bundle records for all revisions of all files"""
310
altered_fileids = self.repository.fileids_altered_by_revision_ids(
312
for file_id, revision_ids in altered_fileids.iteritems():
313
for revision_id in revision_ids:
314
text_keys.append((file_id, revision_id))
315
self._add_mp_records_keys('file', self.repository.texts, text_keys)
317
def write_revisions(self):
318
"""Write bundle records for all revisions and signatures"""
319
inv_vf = self.repository.inventories
320
topological_order = [key[-1] for key in multiparent.topo_iter_keys(
321
inv_vf, self.revision_keys)]
322
revision_order = topological_order
323
if self.target is not None and self.target in self.revision_ids:
324
# Make sure the target revision is always the last entry
325
revision_order = list(topological_order)
326
revision_order.remove(self.target)
327
revision_order.append(self.target)
328
if self.repository._serializer.support_altered_by_hack:
329
# Repositories that support_altered_by_hack means that
330
# inventories.make_mpdiffs() contains all the data about the tree
331
# shape. Formats without support_altered_by_hack require
332
# chk_bytes/etc, so we use a different code path.
333
self._add_mp_records_keys('inventory', inv_vf,
334
[(revid,) for revid in topological_order])
336
# Inventories should always be added in pure-topological order, so
337
# that we can apply the mpdiff for the child to the parent texts.
338
self._add_inventory_mpdiffs_from_serializer(topological_order)
339
self._add_revision_texts(revision_order)
341
def _add_inventory_mpdiffs_from_serializer(self, revision_order):
342
"""Generate mpdiffs by serializing inventories.
344
The current repository only has part of the tree shape information in
345
the 'inventories' vf. So we use serializer.write_inventory_to_string to
346
get a 'full' representation of the tree shape, and then generate
347
mpdiffs on that data stream. This stream can then be reconstructed on
350
inventory_key_order = [(r,) for r in revision_order]
351
parent_map = self.repository.inventories.get_parent_map(
353
missing_keys = set(inventory_key_order).difference(parent_map)
355
raise errors.RevisionNotPresent(list(missing_keys)[0],
356
self.repository.inventories)
357
inv_to_str = self.repository._serializer.write_inventory_to_string
358
# Make sure that we grab the parent texts first
360
map(just_parents.update, parent_map.itervalues())
361
just_parents.difference_update(parent_map)
362
# Ignore ghost parents
363
present_parents = self.repository.inventories.get_parent_map(
365
ghost_keys = just_parents.difference(present_parents)
366
needed_inventories = list(present_parents) + inventory_key_order
367
needed_inventories = [k[-1] for k in needed_inventories]
369
for inv in self.repository.iter_inventories(needed_inventories):
370
revision_id = inv.revision_id
372
as_bytes = inv_to_str(inv)
373
# The sha1 is validated as the xml/textual form, not as the
374
# form-in-the-repository
375
sha1 = osutils.sha_string(as_bytes)
376
as_lines = osutils.split_lines(as_bytes)
378
all_lines[key] = as_lines
379
if key in just_parents:
380
# We don't transmit those entries
382
# Create an mpdiff for this text, and add it to the output
383
parent_keys = parent_map[key]
384
# See the comment in VF.make_mpdiffs about how this effects
385
# ordering when there are ghosts present. I think we have a latent
387
parent_lines = [all_lines[p_key] for p_key in parent_keys
388
if p_key not in ghost_keys]
389
diff = multiparent.MultiParent.from_lines(
390
as_lines, parent_lines)
391
text = ''.join(diff.to_patch())
392
parent_ids = [k[-1] for k in parent_keys]
393
self.bundle.add_multiparent_record(text, sha1, parent_ids,
394
'inventory', revision_id, None)
396
def _add_revision_texts(self, revision_order):
397
parent_map = self.repository.get_parent_map(revision_order)
398
revision_to_str = self.repository._serializer.write_revision_to_string
399
revisions = self.repository.get_revisions(revision_order)
400
for revision in revisions:
401
revision_id = revision.revision_id
402
parents = parent_map.get(revision_id, None)
403
revision_text = revision_to_str(revision)
404
self.bundle.add_fulltext_record(revision_text, parents,
405
'revision', revision_id)
407
self.bundle.add_fulltext_record(
408
self.repository.get_signature_text(
409
revision_id), parents, 'signature', revision_id)
410
except errors.NoSuchRevision:
414
def get_base_target(revision_ids, forced_bases, repository):
415
"""Determine the base and target from old-style revision ids"""
416
if len(revision_ids) == 0:
418
target = revision_ids[0]
419
base = forced_bases.get(target)
421
parents = repository.get_revision(target).parent_ids
422
if len(parents) == 0:
423
base = _mod_revision.NULL_REVISION
428
def _add_mp_records_keys(self, repo_kind, vf, keys):
429
"""Add multi-parent diff records to a bundle"""
430
ordered_keys = list(multiparent.topo_iter_keys(vf, keys))
431
mpdiffs = vf.make_mpdiffs(ordered_keys)
432
sha1s = vf.get_sha1s(ordered_keys)
433
parent_map = vf.get_parent_map(ordered_keys)
434
for mpdiff, item_key, in zip(mpdiffs, ordered_keys):
435
sha1 = sha1s[item_key]
436
parents = [key[-1] for key in parent_map[item_key]]
437
text = ''.join(mpdiff.to_patch())
438
# Infer file id records as appropriate.
439
if len(item_key) == 2:
440
file_id = item_key[0]
443
self.bundle.add_multiparent_record(text, sha1, parents, repo_kind,
444
item_key[-1], file_id)
447
class BundleInfoV4(object):
449
"""Provide (most of) the BundleInfo interface"""
450
def __init__(self, fileobj, serializer):
451
self._fileobj = fileobj
452
self._serializer = serializer
453
self.__real_revisions = None
454
self.__revisions = None
456
def install(self, repository):
457
return self.install_revisions(repository)
459
def install_revisions(self, repository, stream_input=True):
460
"""Install this bundle's revisions into the specified repository
462
:param target_repo: The repository to install into
463
:param stream_input: If True, will stream input rather than reading it
464
all into memory at once. Reading it into memory all at once is
467
repository.lock_write()
469
ri = RevisionInstaller(self.get_bundle_reader(stream_input),
470
self._serializer, repository)
475
def get_merge_request(self, target_repo):
476
"""Provide data for performing a merge
478
Returns suggested base, suggested target, and patch verification status
480
return None, self.target, 'inapplicable'
482
def get_bundle_reader(self, stream_input=True):
483
"""Return a new BundleReader for the associated bundle
485
:param stream_input: If True, the BundleReader stream input rather than
486
reading it all into memory at once. Reading it into memory all at
487
once is (currently) faster.
489
self._fileobj.seek(0)
490
return BundleReader(self._fileobj, stream_input)
492
def _get_real_revisions(self):
493
if self.__real_revisions is None:
494
self.__real_revisions = []
495
bundle_reader = self.get_bundle_reader()
496
for bytes, metadata, repo_kind, revision_id, file_id in \
497
bundle_reader.iter_records():
498
if repo_kind == 'info':
500
self._serializer.get_source_serializer(metadata)
501
if repo_kind == 'revision':
502
rev = serializer.read_revision_from_string(bytes)
503
self.__real_revisions.append(rev)
504
return self.__real_revisions
505
real_revisions = property(_get_real_revisions)
507
def _get_revisions(self):
508
if self.__revisions is None:
509
self.__revisions = []
510
for revision in self.real_revisions:
511
self.__revisions.append(
512
bundle_data.RevisionInfo.from_revision(revision))
513
return self.__revisions
515
revisions = property(_get_revisions)
517
def _get_target(self):
518
return self.revisions[-1].revision_id
520
target = property(_get_target)
523
class RevisionInstaller(object):
524
"""Installs revisions into a repository"""
526
def __init__(self, container, serializer, repository):
527
self._container = container
528
self._serializer = serializer
529
self._repository = repository
533
"""Perform the installation.
535
Must be called with the Repository locked.
537
self._repository.start_write_group()
539
result = self._install_in_write_group()
541
self._repository.abort_write_group()
543
self._repository.commit_write_group()
546
def _install_in_write_group(self):
548
current_versionedfile = None
549
pending_file_records = []
551
pending_inventory_records = []
553
target_revision = None
554
for bytes, metadata, repo_kind, revision_id, file_id in\
555
self._container.iter_records():
556
if repo_kind == 'info':
557
if self._info is not None:
558
raise AssertionError()
559
self._handle_info(metadata)
560
if (pending_file_records and
561
(repo_kind, file_id) != ('file', current_file)):
562
# Flush the data for a single file - prevents memory
563
# spiking due to buffering all files in memory.
564
self._install_mp_records_keys(self._repository.texts,
565
pending_file_records)
567
del pending_file_records[:]
568
if len(pending_inventory_records) > 0 and repo_kind != 'inventory':
569
self._install_inventory_records(pending_inventory_records)
570
pending_inventory_records = []
571
if repo_kind == 'inventory':
572
pending_inventory_records.append(((revision_id,), metadata, bytes))
573
if repo_kind == 'revision':
574
target_revision = revision_id
575
self._install_revision(revision_id, metadata, bytes)
576
if repo_kind == 'signature':
577
self._install_signature(revision_id, metadata, bytes)
578
if repo_kind == 'file':
579
current_file = file_id
580
pending_file_records.append(((file_id, revision_id), metadata, bytes))
581
self._install_mp_records_keys(self._repository.texts, pending_file_records)
582
return target_revision
584
def _handle_info(self, info):
585
"""Extract data from an info record"""
587
self._source_serializer = self._serializer.get_source_serializer(info)
588
if (info['supports_rich_root'] == 0 and
589
self._repository.supports_rich_root()):
590
self.update_root = True
592
self.update_root = False
594
def _install_mp_records(self, versionedfile, records):
595
if len(records) == 0:
597
d_func = multiparent.MultiParent.from_patch
598
vf_records = [(r, m['parents'], m['sha1'], d_func(t)) for r, m, t in
599
records if r not in versionedfile]
600
versionedfile.add_mpdiffs(vf_records)
602
def _install_mp_records_keys(self, versionedfile, records):
603
d_func = multiparent.MultiParent.from_patch
605
for key, meta, text in records:
606
# Adapt to tuple interface: A length two key is a file_id,
607
# revision_id pair, a length 1 key is a
608
# revision/signature/inventory. We need to do this because
609
# the metadata extraction from the bundle has not yet been updated
610
# to use the consistent tuple interface itself.
615
parents = [prefix + (parent,) for parent in meta['parents']]
616
vf_records.append((key, parents, meta['sha1'], d_func(text)))
617
versionedfile.add_mpdiffs(vf_records)
619
def _get_parent_inventory_texts(self, inventory_text_cache,
620
inventory_cache, parent_ids):
621
cached_parent_texts = {}
622
remaining_parent_ids = []
623
for parent_id in parent_ids:
624
p_text = inventory_text_cache.get(parent_id, None)
626
remaining_parent_ids.append(parent_id)
628
cached_parent_texts[parent_id] = p_text
630
# TODO: Use inventory_cache to grab inventories we already have in
632
if remaining_parent_ids:
633
# first determine what keys are actually present in the local
634
# inventories object (don't use revisions as they haven't been
636
parent_keys = [(r,) for r in remaining_parent_ids]
637
present_parent_map = self._repository.inventories.get_parent_map(
639
present_parent_ids = []
641
for p_id in remaining_parent_ids:
642
if (p_id,) in present_parent_map:
643
present_parent_ids.append(p_id)
646
to_string = self._source_serializer.write_inventory_to_string
647
for parent_inv in self._repository.iter_inventories(
649
p_text = to_string(parent_inv)
650
inventory_cache[parent_inv.revision_id] = parent_inv
651
cached_parent_texts[parent_inv.revision_id] = p_text
652
inventory_text_cache[parent_inv.revision_id] = p_text
654
parent_texts = [cached_parent_texts[parent_id]
655
for parent_id in parent_ids
656
if parent_id not in ghosts]
659
def _install_inventory_records(self, records):
660
if (self._info['serializer'] == self._repository._serializer.format_num
661
and self._repository._serializer.support_altered_by_hack):
662
return self._install_mp_records_keys(self._repository.inventories,
664
# Use a 10MB text cache, since these are string xml inventories. Note
665
# that 10MB is fairly small for large projects (a single inventory can
666
# be >5MB). Another possibility is to cache 10-20 inventory texts
668
inventory_text_cache = lru_cache.LRUSizeCache(10*1024*1024)
669
# Also cache the in-memory representation. This allows us to create
670
# inventory deltas to apply rather than calling add_inventory from
672
inventory_cache = lru_cache.LRUCache(10)
673
pb = ui.ui_factory.nested_progress_bar()
675
num_records = len(records)
676
for idx, (key, metadata, bytes) in enumerate(records):
677
pb.update('installing inventory', idx, num_records)
678
revision_id = key[-1]
679
parent_ids = metadata['parents']
680
# Note: This assumes the local ghosts are identical to the
681
# ghosts in the source, as the Bundle serialization
682
# format doesn't record ghosts.
683
p_texts = self._get_parent_inventory_texts(inventory_text_cache,
686
# Why does to_lines() take strings as the source, it seems that
687
# it would have to cast to a list of lines, which we get back
688
# as lines and then cast back to a string.
689
target_lines = multiparent.MultiParent.from_patch(bytes
691
inv_text = ''.join(target_lines)
693
sha1 = osutils.sha_string(inv_text)
694
if sha1 != metadata['sha1']:
695
raise errors.BadBundle("Can't convert to target format")
696
# Add this to the cache so we don't have to extract it again.
697
inventory_text_cache[revision_id] = inv_text
698
target_inv = self._source_serializer.read_inventory_from_string(
700
self._handle_root(target_inv, parent_ids)
703
parent_inv = inventory_cache.get(parent_ids[0], None)
705
if parent_inv is None:
706
self._repository.add_inventory(revision_id, target_inv,
709
delta = target_inv._make_delta(parent_inv)
710
self._repository.add_inventory_by_delta(parent_ids[0],
711
delta, revision_id, parent_ids)
712
except errors.UnsupportedInventoryKind:
713
raise errors.IncompatibleRevision(repr(self._repository))
714
inventory_cache[revision_id] = target_inv
718
def _handle_root(self, target_inv, parent_ids):
719
revision_id = target_inv.revision_id
721
text_key = (target_inv.root.file_id, revision_id)
722
parent_keys = [(target_inv.root.file_id, parent) for
723
parent in parent_ids]
724
self._repository.texts.add_lines(text_key, parent_keys, [])
725
elif not self._repository.supports_rich_root():
726
if target_inv.root.revision != revision_id:
727
raise errors.IncompatibleRevision(repr(self._repository))
729
def _install_revision(self, revision_id, metadata, text):
730
if self._repository.has_revision(revision_id):
732
revision = self._source_serializer.read_revision_from_string(text)
733
self._repository.add_revision(revision.revision_id, revision)
735
def _install_signature(self, revision_id, metadata, text):
736
transaction = self._repository.get_transaction()
737
if self._repository.has_signature_for_revision_id(revision_id):
739
self._repository.add_signature_text(revision_id, text)