1
# Copyright (C) 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
from cStringIO import StringIO
28
revision as _mod_revision,
32
from bzrlib.bundle import bundle_data, serializer as bundle_serializer
33
from bzrlib import bencode
36
class BundleWriter(object):
37
"""Writer for bundle-format files.
39
This serves roughly the same purpose as ContainerReader, but acts as a
42
Provides ways of writing the specific record types supported this bundle
46
def __init__(self, fileobj):
47
self._container = pack.ContainerWriter(self._write_encoded)
48
self._fileobj = fileobj
49
self._compressor = bz2.BZ2Compressor()
51
def _write_encoded(self, bytes):
52
"""Write bzip2-encoded bytes to the file"""
53
self._fileobj.write(self._compressor.compress(bytes))
56
"""Start writing the bundle"""
57
self._fileobj.write(bundle_serializer._get_bundle_header(
58
bundle_serializer.v4_string))
59
self._fileobj.write('#\n')
60
self._container.begin()
63
"""Finish writing the bundle"""
65
self._fileobj.write(self._compressor.flush())
67
def add_multiparent_record(self, mp_bytes, sha1, parents, repo_kind,
68
revision_id, file_id):
69
"""Add a record for a multi-parent diff
71
:mp_bytes: A multi-parent diff, as a bytestring
72
:sha1: The sha1 hash of the fulltext
73
:parents: a list of revision-ids of the parents
74
:repo_kind: The kind of object in the repository. May be 'file' or
76
:revision_id: The revision id of the mpdiff being added.
77
:file_id: The file-id of the file, or None for inventories.
79
metadata = {'parents': parents,
80
'storage_kind': 'mpdiff',
82
self._add_record(mp_bytes, metadata, repo_kind, revision_id, file_id)
84
def add_fulltext_record(self, bytes, parents, repo_kind, revision_id):
85
"""Add a record for a fulltext
87
:bytes: The fulltext, as a bytestring
88
:parents: a list of revision-ids of the parents
89
:repo_kind: The kind of object in the repository. May be 'revision' or
91
:revision_id: The revision id of the fulltext being added.
93
metadata = {'parents': parents,
94
'storage_kind': 'mpdiff'}
95
self._add_record(bytes, {'parents': parents,
96
'storage_kind': 'fulltext'}, repo_kind, revision_id, None)
98
def add_info_record(self, **kwargs):
99
"""Add an info record to the bundle
101
Any parameters may be supplied, except 'self' and 'storage_kind'.
102
Values must be lists, strings, integers, dicts, or a combination.
104
kwargs['storage_kind'] = 'header'
105
self._add_record(None, kwargs, 'info', None, None)
108
def encode_name(content_kind, revision_id, file_id=None):
109
"""Encode semantic ids as a container name"""
110
if content_kind not in ('revision', 'file', 'inventory', 'signature',
112
raise ValueError(content_kind)
113
if content_kind == 'file':
115
raise AssertionError()
117
if file_id is not None:
118
raise AssertionError()
119
if content_kind == 'info':
120
if revision_id is not None:
121
raise AssertionError()
122
elif revision_id is None:
123
raise AssertionError()
124
names = [n.replace('/', '//') for n in
125
(content_kind, revision_id, file_id) if n is not None]
126
return '/'.join(names)
128
def _add_record(self, bytes, metadata, repo_kind, revision_id, file_id):
129
"""Add a bundle record to the container.
131
Most bundle records are recorded as header/body pairs, with the
132
body being nameless. Records with storage_kind 'header' have no
135
name = self.encode_name(repo_kind, revision_id, file_id)
136
encoded_metadata = bencode.bencode(metadata)
137
self._container.add_bytes_record(encoded_metadata, [(name, )])
138
if metadata['storage_kind'] != 'header':
139
self._container.add_bytes_record(bytes, [])
142
class BundleReader(object):
143
"""Reader for bundle-format files.
145
This serves roughly the same purpose as ContainerReader, but acts as a
146
layer on top of it, providing metadata, a semantic name, and a record
150
def __init__(self, fileobj, stream_input=True):
153
:param fileobj: a file containing a bzip-encoded container
154
:param stream_input: If True, the BundleReader stream input rather than
155
reading it all into memory at once. Reading it into memory all at
156
once is (currently) faster.
158
line = fileobj.readline()
161
self.patch_lines = []
163
source_file = iterablefile.IterableFile(self.iter_decode(fileobj))
165
source_file = StringIO(bz2.decompress(fileobj.read()))
166
self._container_file = source_file
169
def iter_decode(fileobj):
170
"""Iterate through decoded fragments of the file"""
171
decompressor = bz2.BZ2Decompressor()
174
yield decompressor.decompress(line)
179
def decode_name(name):
180
"""Decode a name from its container form into a semantic form
182
:retval: content_kind, revision_id, file_id
184
segments = re.split('(//?)', name)
186
for segment in segments:
193
content_kind = names[0]
197
revision_id = names[1]
200
return content_kind, revision_id, file_id
202
def iter_records(self):
203
"""Iterate through bundle records
205
:return: a generator of (bytes, metadata, content_kind, revision_id,
208
iterator = pack.iter_records_from_file(self._container_file)
209
for names, bytes in iterator:
211
raise errors.BadBundle('Record has %d names instead of 1'
213
metadata = bencode.bdecode(bytes)
214
if metadata['storage_kind'] == 'header':
217
_unused, bytes = iterator.next()
218
yield (bytes, metadata) + self.decode_name(names[0][0])
221
class BundleSerializerV4(bundle_serializer.BundleSerializer):
222
"""Implement the high-level bundle interface"""
224
def write(self, repository, revision_ids, forced_bases, fileobj):
225
"""Write a bundle to a file-like object
227
For backwards-compatibility only
229
write_op = BundleWriteOperation.from_old_args(repository, revision_ids,
230
forced_bases, fileobj)
231
return write_op.do_write()
233
def write_bundle(self, repository, target, base, fileobj):
234
"""Write a bundle to a file object
236
:param repository: The repository to retrieve revision data from
237
:param target: The head revision to include ancestors of
238
:param base: The ancestor of the target to stop including acestors
240
:param fileobj: The file-like object to write to
242
write_op = BundleWriteOperation(base, target, repository, fileobj)
243
return write_op.do_write()
245
def read(self, file):
246
"""return a reader object for a given file"""
247
bundle = BundleInfoV4(file, self)
251
def get_source_serializer(info):
252
"""Retrieve the serializer for a given info object"""
253
return serializer.format_registry.get(info['serializer'])
256
class BundleWriteOperation(object):
257
"""Perform the operation of writing revisions to a bundle"""
260
def from_old_args(cls, repository, revision_ids, forced_bases, fileobj):
261
"""Create a BundleWriteOperation from old-style arguments"""
262
base, target = cls.get_base_target(revision_ids, forced_bases,
264
return BundleWriteOperation(base, target, repository, fileobj,
267
def __init__(self, base, target, repository, fileobj, revision_ids=None):
270
self.repository = repository
271
bundle = BundleWriter(fileobj)
273
if revision_ids is not None:
274
self.revision_ids = revision_ids
276
graph = repository.get_graph()
277
revision_ids = graph.find_unique_ancestors(target, [base])
279
parents = graph.get_parent_map(revision_ids)
280
self.revision_ids = [r for r in revision_ids if r in parents]
281
self.revision_keys = set([(revid,) for revid in self.revision_ids])
284
"""Write all data to the bundle"""
285
trace.note('Bundling %d revision(s).', len(self.revision_ids))
286
self.repository.lock_read()
291
self.write_revisions()
294
self.repository.unlock()
295
return self.revision_ids
297
def write_info(self):
298
"""Write format info"""
299
serializer_format = self.repository.get_serializer_format()
300
supports_rich_root = {True: 1, False: 0}[
301
self.repository.supports_rich_root()]
302
self.bundle.add_info_record(serializer=serializer_format,
303
supports_rich_root=supports_rich_root)
305
def write_files(self):
306
"""Write bundle records for all revisions of all files"""
308
altered_fileids = self.repository.fileids_altered_by_revision_ids(
310
for file_id, revision_ids in altered_fileids.iteritems():
311
for revision_id in revision_ids:
312
text_keys.append((file_id, revision_id))
313
self._add_mp_records_keys('file', self.repository.texts, text_keys)
315
def write_revisions(self):
316
"""Write bundle records for all revisions and signatures"""
317
inv_vf = self.repository.inventories
318
revision_order = [key[-1] for key in multiparent.topo_iter_keys(inv_vf,
320
if self.target is not None and self.target in self.revision_ids:
321
revision_order.remove(self.target)
322
revision_order.append(self.target)
323
self._add_mp_records_keys('inventory', inv_vf, [(revid,) for revid in revision_order])
324
parent_map = self.repository.get_parent_map(revision_order)
325
revision_to_str = self.repository._serializer.write_revision_to_string
326
revisions = self.repository.get_revisions(revision_order)
327
for revision in revisions:
328
revision_id = revision.revision_id
329
parents = parent_map.get(revision_id, None)
330
revision_text = revision_to_str(revision)
331
self.bundle.add_fulltext_record(revision_text, parents,
332
'revision', revision_id)
334
self.bundle.add_fulltext_record(
335
self.repository.get_signature_text(
336
revision_id), parents, 'signature', revision_id)
337
except errors.NoSuchRevision:
341
def get_base_target(revision_ids, forced_bases, repository):
342
"""Determine the base and target from old-style revision ids"""
343
if len(revision_ids) == 0:
345
target = revision_ids[0]
346
base = forced_bases.get(target)
348
parents = repository.get_revision(target).parent_ids
349
if len(parents) == 0:
350
base = _mod_revision.NULL_REVISION
355
def _add_mp_records_keys(self, repo_kind, vf, keys):
356
"""Add multi-parent diff records to a bundle"""
357
ordered_keys = list(multiparent.topo_iter_keys(vf, keys))
358
mpdiffs = vf.make_mpdiffs(ordered_keys)
359
sha1s = vf.get_sha1s(ordered_keys)
360
parent_map = vf.get_parent_map(ordered_keys)
361
for mpdiff, item_key, in zip(mpdiffs, ordered_keys):
362
sha1 = sha1s[item_key]
363
parents = [key[-1] for key in parent_map[item_key]]
364
text = ''.join(mpdiff.to_patch())
365
# Infer file id records as appropriate.
366
if len(item_key) == 2:
367
file_id = item_key[0]
370
self.bundle.add_multiparent_record(text, sha1, parents, repo_kind,
371
item_key[-1], file_id)
374
class BundleInfoV4(object):
376
"""Provide (most of) the BundleInfo interface"""
377
def __init__(self, fileobj, serializer):
378
self._fileobj = fileobj
379
self._serializer = serializer
380
self.__real_revisions = None
381
self.__revisions = None
383
def install(self, repository):
384
return self.install_revisions(repository)
386
def install_revisions(self, repository, stream_input=True):
387
"""Install this bundle's revisions into the specified repository
389
:param target_repo: The repository to install into
390
:param stream_input: If True, will stream input rather than reading it
391
all into memory at once. Reading it into memory all at once is
394
repository.lock_write()
396
ri = RevisionInstaller(self.get_bundle_reader(stream_input),
397
self._serializer, repository)
402
def get_merge_request(self, target_repo):
403
"""Provide data for performing a merge
405
Returns suggested base, suggested target, and patch verification status
407
return None, self.target, 'inapplicable'
409
def get_bundle_reader(self, stream_input=True):
410
"""Return a new BundleReader for the associated bundle
412
:param stream_input: If True, the BundleReader stream input rather than
413
reading it all into memory at once. Reading it into memory all at
414
once is (currently) faster.
416
self._fileobj.seek(0)
417
return BundleReader(self._fileobj, stream_input)
419
def _get_real_revisions(self):
420
if self.__real_revisions is None:
421
self.__real_revisions = []
422
bundle_reader = self.get_bundle_reader()
423
for bytes, metadata, repo_kind, revision_id, file_id in \
424
bundle_reader.iter_records():
425
if repo_kind == 'info':
427
self._serializer.get_source_serializer(metadata)
428
if repo_kind == 'revision':
429
rev = serializer.read_revision_from_string(bytes)
430
self.__real_revisions.append(rev)
431
return self.__real_revisions
432
real_revisions = property(_get_real_revisions)
434
def _get_revisions(self):
435
if self.__revisions is None:
436
self.__revisions = []
437
for revision in self.real_revisions:
438
self.__revisions.append(
439
bundle_data.RevisionInfo.from_revision(revision))
440
return self.__revisions
442
revisions = property(_get_revisions)
444
def _get_target(self):
445
return self.revisions[-1].revision_id
447
target = property(_get_target)
450
class RevisionInstaller(object):
451
"""Installs revisions into a repository"""
453
def __init__(self, container, serializer, repository):
454
self._container = container
455
self._serializer = serializer
456
self._repository = repository
460
"""Perform the installation.
462
Must be called with the Repository locked.
464
self._repository.start_write_group()
466
result = self._install_in_write_group()
468
self._repository.abort_write_group()
470
self._repository.commit_write_group()
473
def _install_in_write_group(self):
475
current_versionedfile = None
476
pending_file_records = []
478
pending_inventory_records = []
480
target_revision = None
481
for bytes, metadata, repo_kind, revision_id, file_id in\
482
self._container.iter_records():
483
if repo_kind == 'info':
484
if self._info is not None:
485
raise AssertionError()
486
self._handle_info(metadata)
487
if (pending_file_records and
488
(repo_kind, file_id) != ('file', current_file)):
489
# Flush the data for a single file - prevents memory
490
# spiking due to buffering all files in memory.
491
self._install_mp_records_keys(self._repository.texts,
492
pending_file_records)
494
del pending_file_records[:]
495
if len(pending_inventory_records) > 0 and repo_kind != 'inventory':
496
self._install_inventory_records(pending_inventory_records)
497
pending_inventory_records = []
498
if repo_kind == 'inventory':
499
pending_inventory_records.append(((revision_id,), metadata, bytes))
500
if repo_kind == 'revision':
501
target_revision = revision_id
502
self._install_revision(revision_id, metadata, bytes)
503
if repo_kind == 'signature':
504
self._install_signature(revision_id, metadata, bytes)
505
if repo_kind == 'file':
506
current_file = file_id
507
pending_file_records.append(((file_id, revision_id), metadata, bytes))
508
self._install_mp_records_keys(self._repository.texts, pending_file_records)
509
return target_revision
511
def _handle_info(self, info):
512
"""Extract data from an info record"""
514
self._source_serializer = self._serializer.get_source_serializer(info)
515
if (info['supports_rich_root'] == 0 and
516
self._repository.supports_rich_root()):
517
self.update_root = True
519
self.update_root = False
521
def _install_mp_records(self, versionedfile, records):
522
if len(records) == 0:
524
d_func = multiparent.MultiParent.from_patch
525
vf_records = [(r, m['parents'], m['sha1'], d_func(t)) for r, m, t in
526
records if r not in versionedfile]
527
versionedfile.add_mpdiffs(vf_records)
529
def _install_mp_records_keys(self, versionedfile, records):
530
d_func = multiparent.MultiParent.from_patch
532
for key, meta, text in records:
533
# Adapt to tuple interface: A length two key is a file_id,
534
# revision_id pair, a length 1 key is a
535
# revision/signature/inventory. We need to do this because
536
# the metadata extraction from the bundle has not yet been updated
537
# to use the consistent tuple interface itself.
542
parents = [prefix + (parent,) for parent in meta['parents']]
543
vf_records.append((key, parents, meta['sha1'], d_func(text)))
544
versionedfile.add_mpdiffs(vf_records)
546
def _install_inventory_records(self, records):
547
if self._info['serializer'] == self._repository._serializer.format_num:
548
return self._install_mp_records_keys(self._repository.inventories,
550
for key, metadata, bytes in records:
551
revision_id = key[-1]
552
parent_ids = metadata['parents']
553
parents = [self._repository.get_inventory(p)
555
p_texts = [self._source_serializer.write_inventory_to_string(p)
557
target_lines = multiparent.MultiParent.from_patch(bytes).to_lines(
559
sha1 = osutils.sha_strings(target_lines)
560
if sha1 != metadata['sha1']:
561
raise errors.BadBundle("Can't convert to target format")
562
target_inv = self._source_serializer.read_inventory_from_string(
563
''.join(target_lines))
564
self._handle_root(target_inv, parent_ids)
566
self._repository.add_inventory(revision_id, target_inv,
568
except errors.UnsupportedInventoryKind:
569
raise errors.IncompatibleRevision(repr(self._repository))
571
def _handle_root(self, target_inv, parent_ids):
572
revision_id = target_inv.revision_id
574
text_key = (target_inv.root.file_id, revision_id)
575
parent_keys = [(target_inv.root.file_id, parent) for
576
parent in parent_ids]
577
self._repository.texts.add_lines(text_key, parent_keys, [])
578
elif not self._repository.supports_rich_root():
579
if target_inv.root.revision != revision_id:
580
raise errors.IncompatibleRevision(repr(self._repository))
582
def _install_revision(self, revision_id, metadata, text):
583
if self._repository.has_revision(revision_id):
585
revision = self._source_serializer.read_revision_from_string(text)
586
self._repository.add_revision(revision.revision_id, revision)
588
def _install_signature(self, revision_id, metadata, text):
589
transaction = self._repository.get_transaction()
590
if self._repository.has_signature_for_revision_id(revision_id):
592
self._repository.add_signature_text(revision_id, text)