1
# Copyright (C) 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from cStringIO import StringIO
28
revision as _mod_revision,
32
from bzrlib.bundle import bundle_data, serializer
33
from bzrlib.util import bencode
36
class BundleWriter(object):
37
"""Writer for bundle-format files.
39
This serves roughly the same purpose as ContainerReader, but acts as a
42
Provides ways of writing the specific record types supported this bundle
46
def __init__(self, fileobj):
47
self._container = pack.ContainerWriter(self._write_encoded)
48
self._fileobj = fileobj
49
self._compressor = bz2.BZ2Compressor()
51
def _write_encoded(self, bytes):
52
"""Write bzip2-encoded bytes to the file"""
53
self._fileobj.write(self._compressor.compress(bytes))
56
"""Start writing the bundle"""
57
self._fileobj.write(serializer._get_bundle_header(
58
serializer.v4_string))
59
self._fileobj.write('#\n')
60
self._container.begin()
63
"""Finish writing the bundle"""
65
self._fileobj.write(self._compressor.flush())
67
def add_multiparent_record(self, mp_bytes, sha1, parents, repo_kind,
68
revision_id, file_id):
69
"""Add a record for a multi-parent diff
71
:mp_bytes: A multi-parent diff, as a bytestring
72
:sha1: The sha1 hash of the fulltext
73
:parents: a list of revision-ids of the parents
74
:repo_kind: The kind of object in the repository. May be 'file' or
76
:revision_id: The revision id of the mpdiff being added.
77
:file_id: The file-id of the file, or None for inventories.
79
metadata = {'parents': parents,
80
'storage_kind': 'mpdiff',
82
self._add_record(mp_bytes, metadata, repo_kind, revision_id, file_id)
84
def add_fulltext_record(self, bytes, parents, repo_kind, revision_id):
85
"""Add a record for a fulltext
87
:bytes: The fulltext, as a bytestring
88
:parents: a list of revision-ids of the parents
89
:repo_kind: The kind of object in the repository. May be 'revision' or
91
:revision_id: The revision id of the fulltext being added.
93
metadata = {'parents': parents,
94
'storage_kind': 'mpdiff'}
95
self._add_record(bytes, {'parents': parents,
96
'storage_kind': 'fulltext'}, repo_kind, revision_id, None)
98
def add_info_record(self, **kwargs):
99
"""Add an info record to the bundle
101
Any parameters may be supplied, except 'self' and 'storage_kind'.
102
Values must be lists, strings, integers, dicts, or a combination.
104
kwargs['storage_kind'] = 'header'
105
self._add_record(None, kwargs, 'info', None, None)
108
def encode_name(content_kind, revision_id, file_id=None):
109
"""Encode semantic ids as a container name"""
110
if content_kind not in ('revision', 'file', 'inventory', 'signature',
112
raise ValueError(content_kind)
113
if content_kind == 'file':
115
raise AssertionError()
117
if file_id is not None:
118
raise AssertionError()
119
if content_kind == 'info':
120
if revision_id is not None:
121
raise AssertionError()
122
elif revision_id is None:
123
raise AssertionError()
124
names = [n.replace('/', '//') for n in
125
(content_kind, revision_id, file_id) if n is not None]
126
return '/'.join(names)
128
def _add_record(self, bytes, metadata, repo_kind, revision_id, file_id):
129
"""Add a bundle record to the container.
131
Most bundle records are recorded as header/body pairs, with the
132
body being nameless. Records with storage_kind 'header' have no
135
name = self.encode_name(repo_kind, revision_id, file_id)
136
encoded_metadata = bencode.bencode(metadata)
137
self._container.add_bytes_record(encoded_metadata, [(name, )])
138
if metadata['storage_kind'] != 'header':
139
self._container.add_bytes_record(bytes, [])
142
class BundleReader(object):
143
"""Reader for bundle-format files.
145
This serves roughly the same purpose as ContainerReader, but acts as a
146
layer on top of it, providing metadata, a semantic name, and a record
150
def __init__(self, fileobj, stream_input=True):
153
:param fileobj: a file containing a bzip-encoded container
154
:param stream_input: If True, the BundleReader stream input rather than
155
reading it all into memory at once. Reading it into memory all at
156
once is (currently) faster.
158
line = fileobj.readline()
161
self.patch_lines = []
163
source_file = iterablefile.IterableFile(self.iter_decode(fileobj))
165
source_file = StringIO(bz2.decompress(fileobj.read()))
166
self._container_file = source_file
169
def iter_decode(fileobj):
170
"""Iterate through decoded fragments of the file"""
171
decompressor = bz2.BZ2Decompressor()
174
yield decompressor.decompress(line)
179
def decode_name(name):
180
"""Decode a name from its container form into a semantic form
182
:retval: content_kind, revision_id, file_id
184
segments = re.split('(//?)', name)
186
for segment in segments:
193
content_kind = names[0]
197
revision_id = names[1]
200
return content_kind, revision_id, file_id
202
def iter_records(self):
203
"""Iterate through bundle records
205
:return: a generator of (bytes, metadata, content_kind, revision_id,
208
iterator = pack.iter_records_from_file(self._container_file)
209
for names, bytes in iterator:
211
raise errors.BadBundle('Record has %d names instead of 1'
213
metadata = bencode.bdecode(bytes)
214
if metadata['storage_kind'] == 'header':
217
_unused, bytes = iterator.next()
218
yield (bytes, metadata) + self.decode_name(names[0][0])
221
class BundleSerializerV4(serializer.BundleSerializer):
222
"""Implement the high-level bundle interface"""
224
def write(self, repository, revision_ids, forced_bases, fileobj):
225
"""Write a bundle to a file-like object
227
For backwards-compatibility only
229
write_op = BundleWriteOperation.from_old_args(repository, revision_ids,
230
forced_bases, fileobj)
231
return write_op.do_write()
233
def write_bundle(self, repository, target, base, fileobj):
234
"""Write a bundle to a file object
236
:param repository: The repository to retrieve revision data from
237
:param target: The head revision to include ancestors of
238
:param base: The ancestor of the target to stop including acestors
240
:param fileobj: The file-like object to write to
242
write_op = BundleWriteOperation(base, target, repository, fileobj)
243
return write_op.do_write()
245
def read(self, file):
246
"""return a reader object for a given file"""
247
bundle = BundleInfoV4(file, self)
251
def get_source_serializer(info):
252
"""Retrieve the serializer for a given info object"""
253
return xml_serializer.format_registry.get(info['serializer'])
256
class BundleWriteOperation(object):
257
"""Perform the operation of writing revisions to a bundle"""
260
def from_old_args(cls, repository, revision_ids, forced_bases, fileobj):
261
"""Create a BundleWriteOperation from old-style arguments"""
262
base, target = cls.get_base_target(revision_ids, forced_bases,
264
return BundleWriteOperation(base, target, repository, fileobj,
267
def __init__(self, base, target, repository, fileobj, revision_ids=None):
270
self.repository = repository
271
bundle = BundleWriter(fileobj)
273
self.base_ancestry = set(repository.get_ancestry(base,
275
if revision_ids is not None:
276
self.revision_ids = revision_ids
278
revision_ids = set(repository.get_ancestry(target,
280
self.revision_ids = revision_ids.difference(self.base_ancestry)
283
"""Write all data to the bundle"""
287
self.write_revisions()
289
return self.revision_ids
291
def write_info(self):
292
"""Write format info"""
293
serializer_format = self.repository.get_serializer_format()
294
supports_rich_root = {True: 1, False: 0}[
295
self.repository.supports_rich_root()]
296
self.bundle.add_info_record(serializer=serializer_format,
297
supports_rich_root=supports_rich_root)
299
def iter_file_revisions(self):
300
"""Iterate through all relevant revisions of all files.
302
This is the correct implementation, but is not compatible with bzr.dev,
303
because certain old revisions were not converted correctly, and have
304
the wrong "revision" marker in inventories.
306
transaction = self.repository.get_transaction()
307
altered = self.repository.fileids_altered_by_revision_ids(
309
for file_id, file_revision_ids in altered.iteritems():
310
vf = self.repository.weave_store.get_weave(file_id, transaction)
311
yield vf, file_id, file_revision_ids
313
def iter_file_revisions_aggressive(self):
314
"""Iterate through all relevant revisions of all files.
316
This uses the standard iter_file_revisions to determine what revisions
317
are referred to by inventories, but then uses the versionedfile to
318
determine what the build-dependencies of each required revision.
320
All build dependencies which are not ancestors of the base revision
323
for vf, file_id, file_revision_ids in self.iter_file_revisions():
324
new_revision_ids = set()
325
pending = list(file_revision_ids)
326
while len(pending) > 0:
327
revision_id = pending.pop()
328
if revision_id in new_revision_ids:
330
if revision_id in self.base_ancestry:
332
new_revision_ids.add(revision_id)
333
pending.extend(vf.get_parent_map([revision_id])[revision_id])
334
yield vf, file_id, new_revision_ids
336
def write_files(self):
337
"""Write bundle records for all revisions of all files"""
338
for vf, file_id, revision_ids in self.iter_file_revisions():
339
self.add_mp_records('file', file_id, vf, revision_ids)
341
def write_revisions(self):
342
"""Write bundle records for all revisions and signatures"""
343
inv_vf = self.repository.get_inventory_weave()
344
revision_order = list(multiparent.topo_iter(inv_vf, self.revision_ids))
345
if self.target is not None and self.target in self.revision_ids:
346
revision_order.remove(self.target)
347
revision_order.append(self.target)
348
self.add_mp_records('inventory', None, inv_vf, revision_order)
349
parent_map = self.repository.get_parent_map(revision_order)
350
for revision_id in revision_order:
351
parents = parent_map.get(revision_id, None)
352
revision_text = self.repository.get_revision_xml(revision_id)
353
self.bundle.add_fulltext_record(revision_text, parents,
354
'revision', revision_id)
356
self.bundle.add_fulltext_record(
357
self.repository.get_signature_text(
358
revision_id), parents, 'signature', revision_id)
359
except errors.NoSuchRevision:
363
def get_base_target(revision_ids, forced_bases, repository):
364
"""Determine the base and target from old-style revision ids"""
365
if len(revision_ids) == 0:
367
target = revision_ids[0]
368
base = forced_bases.get(target)
370
parents = repository.get_revision(target).parent_ids
371
if len(parents) == 0:
372
base = _mod_revision.NULL_REVISION
377
def add_mp_records(self, repo_kind, file_id, vf, revision_ids):
378
"""Add multi-parent diff records to a bundle"""
379
revision_ids = list(multiparent.topo_iter(vf, revision_ids))
380
mpdiffs = vf.make_mpdiffs(revision_ids)
381
sha1s = vf.get_sha1s(revision_ids)
382
parent_map = vf.get_parent_map(revision_ids)
383
for mpdiff, revision_id, sha1, in zip(mpdiffs, revision_ids, sha1s):
384
parents = parent_map[revision_id]
385
text = ''.join(mpdiff.to_patch())
386
self.bundle.add_multiparent_record(text, sha1, parents, repo_kind,
387
revision_id, file_id)
390
class BundleInfoV4(object):
392
"""Provide (most of) the BundleInfo interface"""
393
def __init__(self, fileobj, serializer):
394
self._fileobj = fileobj
395
self._serializer = serializer
396
self.__real_revisions = None
397
self.__revisions = None
399
def install(self, repository):
400
return self.install_revisions(repository)
402
def install_revisions(self, repository, stream_input=True):
403
"""Install this bundle's revisions into the specified repository
405
:param target_repo: The repository to install into
406
:param stream_input: If True, will stream input rather than reading it
407
all into memory at once. Reading it into memory all at once is
410
repository.lock_write()
412
ri = RevisionInstaller(self.get_bundle_reader(stream_input),
413
self._serializer, repository)
418
def get_merge_request(self, target_repo):
419
"""Provide data for performing a merge
421
Returns suggested base, suggested target, and patch verification status
423
return None, self.target, 'inapplicable'
425
def get_bundle_reader(self, stream_input=True):
426
"""Return a new BundleReader for the associated bundle
428
:param stream_input: If True, the BundleReader stream input rather than
429
reading it all into memory at once. Reading it into memory all at
430
once is (currently) faster.
432
self._fileobj.seek(0)
433
return BundleReader(self._fileobj, stream_input)
435
def _get_real_revisions(self):
436
if self.__real_revisions is None:
437
self.__real_revisions = []
438
bundle_reader = self.get_bundle_reader()
439
for bytes, metadata, repo_kind, revision_id, file_id in \
440
bundle_reader.iter_records():
441
if repo_kind == 'info':
443
self._serializer.get_source_serializer(metadata)
444
if repo_kind == 'revision':
445
rev = serializer.read_revision_from_string(bytes)
446
self.__real_revisions.append(rev)
447
return self.__real_revisions
448
real_revisions = property(_get_real_revisions)
450
def _get_revisions(self):
451
if self.__revisions is None:
452
self.__revisions = []
453
for revision in self.real_revisions:
454
self.__revisions.append(
455
bundle_data.RevisionInfo.from_revision(revision))
456
return self.__revisions
458
revisions = property(_get_revisions)
460
def _get_target(self):
461
return self.revisions[-1].revision_id
463
target = property(_get_target)
466
class RevisionInstaller(object):
467
"""Installs revisions into a repository"""
469
def __init__(self, container, serializer, repository):
470
self._container = container
471
self._serializer = serializer
472
self._repository = repository
476
"""Perform the installation.
478
Must be called with the Repository locked.
480
self._repository.start_write_group()
482
result = self._install_in_write_group()
484
self._repository.abort_write_group()
486
self._repository.commit_write_group()
489
def _install_in_write_group(self):
491
current_versionedfile = None
492
pending_file_records = []
494
pending_inventory_records = []
496
target_revision = None
497
for bytes, metadata, repo_kind, revision_id, file_id in\
498
self._container.iter_records():
499
if repo_kind == 'info':
500
if self._info is not None:
501
raise AssertionError()
502
self._handle_info(metadata)
503
if (repo_kind, file_id) != ('file', current_file):
504
if len(pending_file_records) > 0:
505
self._install_mp_records(current_versionedfile,
506
pending_file_records)
508
current_versionedfile = None
509
pending_file_records = []
510
if len(pending_inventory_records) > 0 and repo_kind != 'inventory':
511
self._install_inventory_records(inventory_vf,
512
pending_inventory_records)
513
pending_inventory_records = []
514
if repo_kind == 'inventory':
515
if inventory_vf is None:
516
inventory_vf = self._repository.get_inventory_weave()
517
if revision_id not in inventory_vf:
518
pending_inventory_records.append((revision_id, metadata,
520
if repo_kind == 'revision':
521
target_revision = revision_id
522
self._install_revision(revision_id, metadata, bytes)
523
if repo_kind == 'signature':
524
self._install_signature(revision_id, metadata, bytes)
525
if repo_kind == 'file':
526
current_file = file_id
527
if current_versionedfile is None:
528
current_versionedfile = \
529
self._repository.weave_store.get_weave_or_empty(
530
file_id, self._repository.get_transaction())
531
pending_file_records = []
532
if revision_id in current_versionedfile:
534
pending_file_records.append((revision_id, metadata, bytes))
535
self._install_mp_records(current_versionedfile, pending_file_records)
536
return target_revision
538
def _handle_info(self, info):
539
"""Extract data from an info record"""
541
self._source_serializer = self._serializer.get_source_serializer(info)
542
if (info['supports_rich_root'] == 0 and
543
self._repository.supports_rich_root()):
544
self.update_root = True
546
self.update_root = False
548
def _install_mp_records(self, versionedfile, records):
549
if len(records) == 0:
551
d_func = multiparent.MultiParent.from_patch
552
vf_records = [(r, m['parents'], m['sha1'], d_func(t)) for r, m, t in
553
records if r not in versionedfile]
554
versionedfile.add_mpdiffs(vf_records)
556
def _install_inventory_records(self, vf, records):
557
if self._info['serializer'] == self._repository._serializer.format_num:
558
return self._install_mp_records(vf, records)
559
for revision_id, metadata, bytes in records:
560
parent_ids = metadata['parents']
561
parents = [self._repository.get_inventory(p)
563
p_texts = [self._source_serializer.write_inventory_to_string(p)
565
target_lines = multiparent.MultiParent.from_patch(bytes).to_lines(
567
sha1 = osutils.sha_strings(target_lines)
568
if sha1 != metadata['sha1']:
569
raise errors.BadBundle("Can't convert to target format")
570
target_inv = self._source_serializer.read_inventory_from_string(
571
''.join(target_lines))
572
self._handle_root(target_inv, parent_ids)
574
self._repository.add_inventory(revision_id, target_inv,
576
except errors.UnsupportedInventoryKind:
577
raise errors.IncompatibleRevision(repr(self._repository))
579
def _handle_root(self, target_inv, parent_ids):
580
revision_id = target_inv.revision_id
582
target_inv.root.revision = revision_id
583
store = self._repository.weave_store
584
transaction = self._repository.get_transaction()
585
vf = store.get_weave_or_empty(target_inv.root.file_id, transaction)
586
vf.add_lines(revision_id, parent_ids, [])
587
elif not self._repository.supports_rich_root():
588
if target_inv.root.revision != revision_id:
589
raise errors.IncompatibleRevision(repr(self._repository))
592
def _install_revision(self, revision_id, metadata, text):
593
if self._repository.has_revision(revision_id):
595
if self._info['serializer'] == self._repository._serializer.format_num:
596
self._repository._add_revision_text(revision_id, text)
598
revision = self._source_serializer.read_revision_from_string(text)
599
self._repository.add_revision(revision.revision_id, revision)
601
def _install_signature(self, revision_id, metadata, text):
602
transaction = self._repository.get_transaction()
603
if self._repository._revision_store.has_signature(revision_id,
606
self._repository._revision_store.add_revision_signature_text(
607
revision_id, text, transaction)