1
# Copyright (C) 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from cStringIO import StringIO
28
revision as _mod_revision,
32
from bzrlib.bundle import bundle_data, serializer
33
from bzrlib.util import bencode
36
class BundleWriter(object):
37
"""Writer for bundle-format files.
39
This serves roughly the same purpose as ContainerReader, but acts as a
42
Provides ways of writing the specific record types supported this bundle
46
def __init__(self, fileobj):
47
self._container = pack.ContainerWriter(self._write_encoded)
48
self._fileobj = fileobj
49
self._compressor = bz2.BZ2Compressor()
51
def _write_encoded(self, bytes):
52
"""Write bzip2-encoded bytes to the file"""
53
self._fileobj.write(self._compressor.compress(bytes))
56
"""Start writing the bundle"""
57
self._fileobj.write(serializer._get_bundle_header(
58
serializer.v4_string))
59
self._fileobj.write('#\n')
60
self._container.begin()
63
"""Finish writing the bundle"""
65
self._fileobj.write(self._compressor.flush())
67
def add_multiparent_record(self, mp_bytes, sha1, parents, repo_kind,
68
revision_id, file_id):
69
"""Add a record for a multi-parent diff
71
:mp_bytes: A multi-parent diff, as a bytestring
72
:sha1: The sha1 hash of the fulltext
73
:parents: a list of revision-ids of the parents
74
:repo_kind: The kind of object in the repository. May be 'file' or
76
:revision_id: The revision id of the mpdiff being added.
77
:file_id: The file-id of the file, or None for inventories.
79
metadata = {'parents': parents,
80
'storage_kind': 'mpdiff',
82
self._add_record(mp_bytes, metadata, repo_kind, revision_id, file_id)
84
def add_fulltext_record(self, bytes, parents, repo_kind, revision_id):
85
"""Add a record for a fulltext
87
:bytes: The fulltext, as a bytestring
88
:parents: a list of revision-ids of the parents
89
:repo_kind: The kind of object in the repository. May be 'revision' or
91
:revision_id: The revision id of the fulltext being added.
93
metadata = {'parents': parents,
94
'storage_kind': 'mpdiff'}
95
self._add_record(bytes, {'parents': parents,
96
'storage_kind': 'fulltext'}, repo_kind, revision_id, None)
98
def add_info_record(self, **kwargs):
99
"""Add an info record to the bundle
101
Any parameters may be supplied, except 'self' and 'storage_kind'.
102
Values must be lists, strings, integers, dicts, or a combination.
104
kwargs['storage_kind'] = 'header'
105
self._add_record(None, kwargs, 'info', None, None)
108
def encode_name(content_kind, revision_id, file_id=None):
109
"""Encode semantic ids as a container name"""
110
assert content_kind in ('revision', 'file', 'inventory', 'signature',
113
if content_kind == 'file':
114
assert file_id is not None
116
assert file_id is None
117
if content_kind == 'info':
118
assert revision_id is None
120
assert revision_id is not None
121
names = [n.replace('/', '//') for n in
122
(content_kind, revision_id, file_id) if n is not None]
123
return '/'.join(names)
125
def _add_record(self, bytes, metadata, repo_kind, revision_id, file_id):
126
"""Add a bundle record to the container.
128
Most bundle records are recorded as header/body pairs, with the
129
body being nameless. Records with storage_kind 'header' have no
132
name = self.encode_name(repo_kind, revision_id, file_id)
133
encoded_metadata = bencode.bencode(metadata)
134
self._container.add_bytes_record(encoded_metadata, [(name, )])
135
if metadata['storage_kind'] != 'header':
136
self._container.add_bytes_record(bytes, [])
139
class BundleReader(object):
140
"""Reader for bundle-format files.
142
This serves roughly the same purpose as ContainerReader, but acts as a
143
layer on top of it, providing metadata, a semantic name, and a record
147
def __init__(self, fileobj, stream_input=True):
150
:param fileobj: a file containing a bzip-encoded container
151
:param stream_input: If True, the BundleReader stream input rather than
152
reading it all into memory at once. Reading it into memory all at
153
once is (currently) faster.
155
line = fileobj.readline()
158
self.patch_lines = []
160
source_file = iterablefile.IterableFile(self.iter_decode(fileobj))
162
source_file = StringIO(bz2.decompress(fileobj.read()))
163
self._container_file = source_file
166
def iter_decode(fileobj):
167
"""Iterate through decoded fragments of the file"""
168
decompressor = bz2.BZ2Decompressor()
171
yield decompressor.decompress(line)
176
def decode_name(name):
177
"""Decode a name from its container form into a semantic form
179
:retval: content_kind, revision_id, file_id
181
segments = re.split('(//?)', name)
183
for segment in segments:
190
content_kind = names[0]
194
revision_id = names[1]
197
return content_kind, revision_id, file_id
199
def iter_records(self):
200
"""Iterate through bundle records
202
:return: a generator of (bytes, metadata, content_kind, revision_id,
205
iterator = pack.iter_records_from_file(self._container_file)
206
for names, bytes in iterator:
208
raise errors.BadBundle('Record has %d names instead of 1'
210
metadata = bencode.bdecode(bytes)
211
if metadata['storage_kind'] == 'header':
214
_unused, bytes = iterator.next()
215
yield (bytes, metadata) + self.decode_name(names[0][0])
218
class BundleSerializerV4(serializer.BundleSerializer):
219
"""Implement the high-level bundle interface"""
221
def write(self, repository, revision_ids, forced_bases, fileobj):
222
"""Write a bundle to a file-like object
224
For backwards-compatibility only
226
write_op = BundleWriteOperation.from_old_args(repository, revision_ids,
227
forced_bases, fileobj)
228
return write_op.do_write()
230
def write_bundle(self, repository, target, base, fileobj):
231
"""Write a bundle to a file object
233
:param repository: The repository to retrieve revision data from
234
:param target: The head revision to include ancestors of
235
:param base: The ancestor of the target to stop including acestors
237
:param fileobj: The file-like object to write to
239
write_op = BundleWriteOperation(base, target, repository, fileobj)
240
return write_op.do_write()
242
def read(self, file):
243
"""return a reader object for a given file"""
244
bundle = BundleInfoV4(file, self)
248
def get_source_serializer(info):
249
"""Retrieve the serializer for a given info object"""
250
return xml_serializer.format_registry.get(info['serializer'])
253
class BundleWriteOperation(object):
254
"""Perform the operation of writing revisions to a bundle"""
257
def from_old_args(cls, repository, revision_ids, forced_bases, fileobj):
258
"""Create a BundleWriteOperation from old-style arguments"""
259
base, target = cls.get_base_target(revision_ids, forced_bases,
261
return BundleWriteOperation(base, target, repository, fileobj,
264
def __init__(self, base, target, repository, fileobj, revision_ids=None):
267
self.repository = repository
268
bundle = BundleWriter(fileobj)
270
self.base_ancestry = set(repository.get_ancestry(base,
272
if revision_ids is not None:
273
self.revision_ids = revision_ids
275
revision_ids = set(repository.get_ancestry(target,
277
self.revision_ids = revision_ids.difference(self.base_ancestry)
280
"""Write all data to the bundle"""
284
self.write_revisions()
286
return self.revision_ids
288
def write_info(self):
289
"""Write format info"""
290
serializer_format = self.repository.get_serializer_format()
291
supports_rich_root = {True: 1, False: 0}[
292
self.repository.supports_rich_root()]
293
self.bundle.add_info_record(serializer=serializer_format,
294
supports_rich_root=supports_rich_root)
296
def iter_file_revisions(self):
297
"""Iterate through all relevant revisions of all files.
299
This is the correct implementation, but is not compatible with bzr.dev,
300
because certain old revisions were not converted correctly, and have
301
the wrong "revision" marker in inventories.
303
transaction = self.repository.get_transaction()
304
altered = self.repository.fileids_altered_by_revision_ids(
306
for file_id, file_revision_ids in altered.iteritems():
307
vf = self.repository.weave_store.get_weave(file_id, transaction)
308
yield vf, file_id, file_revision_ids
310
def iter_file_revisions_aggressive(self):
311
"""Iterate through all relevant revisions of all files.
313
This uses the standard iter_file_revisions to determine what revisions
314
are referred to by inventories, but then uses the versionedfile to
315
determine what the build-dependencies of each required revision.
317
All build dependencies which are not ancestors of the base revision
320
for vf, file_id, file_revision_ids in self.iter_file_revisions():
321
new_revision_ids = set()
322
pending = list(file_revision_ids)
323
while len(pending) > 0:
324
revision_id = pending.pop()
325
if revision_id in new_revision_ids:
327
if revision_id in self.base_ancestry:
329
new_revision_ids.add(revision_id)
330
pending.extend(vf.get_parents(revision_id))
331
yield vf, file_id, new_revision_ids
333
def write_files(self):
334
"""Write bundle records for all revisions of all files"""
335
for vf, file_id, revision_ids in self.iter_file_revisions():
336
self.add_mp_records('file', file_id, vf, revision_ids)
338
def write_revisions(self):
339
"""Write bundle records for all revisions and signatures"""
340
inv_vf = self.repository.get_inventory_weave()
341
revision_order = list(multiparent.topo_iter(inv_vf, self.revision_ids))
342
if self.target is not None and self.target in self.revision_ids:
343
revision_order.remove(self.target)
344
revision_order.append(self.target)
345
self.add_mp_records('inventory', None, inv_vf, revision_order)
346
parent_map = self.repository.get_parent_map(revision_order)
347
for revision_id in revision_order:
348
parents = parent_map.get(revision_id, None)
349
revision_text = self.repository.get_revision_xml(revision_id)
350
self.bundle.add_fulltext_record(revision_text, parents,
351
'revision', revision_id)
353
self.bundle.add_fulltext_record(
354
self.repository.get_signature_text(
355
revision_id), parents, 'signature', revision_id)
356
except errors.NoSuchRevision:
360
def get_base_target(revision_ids, forced_bases, repository):
361
"""Determine the base and target from old-style revision ids"""
362
if len(revision_ids) == 0:
364
target = revision_ids[0]
365
base = forced_bases.get(target)
367
parents = repository.get_revision(target).parent_ids
368
if len(parents) == 0:
369
base = _mod_revision.NULL_REVISION
374
def add_mp_records(self, repo_kind, file_id, vf, revision_ids):
375
"""Add multi-parent diff records to a bundle"""
376
revision_ids = list(multiparent.topo_iter(vf, revision_ids))
377
mpdiffs = vf.make_mpdiffs(revision_ids)
378
sha1s = vf.get_sha1s(revision_ids)
379
for mpdiff, revision_id, sha1, in zip(mpdiffs, revision_ids, sha1s):
380
parents = vf.get_parents(revision_id)
381
text = ''.join(mpdiff.to_patch())
382
self.bundle.add_multiparent_record(text, sha1, parents, repo_kind,
383
revision_id, file_id)
386
class BundleInfoV4(object):
388
"""Provide (most of) the BundleInfo interface"""
389
def __init__(self, fileobj, serializer):
390
self._fileobj = fileobj
391
self._serializer = serializer
392
self.__real_revisions = None
393
self.__revisions = None
395
def install(self, repository):
396
return self.install_revisions(repository)
398
def install_revisions(self, repository, stream_input=True):
399
"""Install this bundle's revisions into the specified repository
401
:param target_repo: The repository to install into
402
:param stream_input: If True, will stream input rather than reading it
403
all into memory at once. Reading it into memory all at once is
406
repository.lock_write()
408
ri = RevisionInstaller(self.get_bundle_reader(stream_input),
409
self._serializer, repository)
414
def get_merge_request(self, target_repo):
415
"""Provide data for performing a merge
417
Returns suggested base, suggested target, and patch verification status
419
return None, self.target, 'inapplicable'
421
def get_bundle_reader(self, stream_input=True):
422
"""Return a new BundleReader for the associated bundle
424
:param stream_input: If True, the BundleReader stream input rather than
425
reading it all into memory at once. Reading it into memory all at
426
once is (currently) faster.
428
self._fileobj.seek(0)
429
return BundleReader(self._fileobj, stream_input)
431
def _get_real_revisions(self):
432
if self.__real_revisions is None:
433
self.__real_revisions = []
434
bundle_reader = self.get_bundle_reader()
435
for bytes, metadata, repo_kind, revision_id, file_id in \
436
bundle_reader.iter_records():
437
if repo_kind == 'info':
439
self._serializer.get_source_serializer(metadata)
440
if repo_kind == 'revision':
441
rev = serializer.read_revision_from_string(bytes)
442
self.__real_revisions.append(rev)
443
return self.__real_revisions
444
real_revisions = property(_get_real_revisions)
446
def _get_revisions(self):
447
if self.__revisions is None:
448
self.__revisions = []
449
for revision in self.real_revisions:
450
self.__revisions.append(
451
bundle_data.RevisionInfo.from_revision(revision))
452
return self.__revisions
454
revisions = property(_get_revisions)
456
def _get_target(self):
457
return self.revisions[-1].revision_id
459
target = property(_get_target)
462
class RevisionInstaller(object):
463
"""Installs revisions into a repository"""
465
def __init__(self, container, serializer, repository):
466
self._container = container
467
self._serializer = serializer
468
self._repository = repository
472
"""Perform the installation.
474
Must be called with the Repository locked.
476
self._repository.start_write_group()
478
result = self._install_in_write_group()
480
self._repository.abort_write_group()
482
self._repository.commit_write_group()
485
def _install_in_write_group(self):
487
current_versionedfile = None
488
pending_file_records = []
490
pending_inventory_records = []
492
target_revision = None
493
for bytes, metadata, repo_kind, revision_id, file_id in\
494
self._container.iter_records():
495
if repo_kind == 'info':
496
assert self._info is None
497
self._handle_info(metadata)
498
if (repo_kind, file_id) != ('file', current_file):
499
if len(pending_file_records) > 0:
500
self._install_mp_records(current_versionedfile,
501
pending_file_records)
503
current_versionedfile = None
504
pending_file_records = []
505
if len(pending_inventory_records) > 0 and repo_kind != 'inventory':
506
self._install_inventory_records(inventory_vf,
507
pending_inventory_records)
508
pending_inventory_records = []
509
if repo_kind == 'inventory':
510
if inventory_vf is None:
511
inventory_vf = self._repository.get_inventory_weave()
512
if revision_id not in inventory_vf:
513
pending_inventory_records.append((revision_id, metadata,
515
if repo_kind == 'revision':
516
target_revision = revision_id
517
self._install_revision(revision_id, metadata, bytes)
518
if repo_kind == 'signature':
519
self._install_signature(revision_id, metadata, bytes)
520
if repo_kind == 'file':
521
current_file = file_id
522
if current_versionedfile is None:
523
current_versionedfile = \
524
self._repository.weave_store.get_weave_or_empty(
525
file_id, self._repository.get_transaction())
526
pending_file_records = []
527
if revision_id in current_versionedfile:
529
pending_file_records.append((revision_id, metadata, bytes))
530
self._install_mp_records(current_versionedfile, pending_file_records)
531
return target_revision
533
def _handle_info(self, info):
534
"""Extract data from an info record"""
536
self._source_serializer = self._serializer.get_source_serializer(info)
537
if (info['supports_rich_root'] == 0 and
538
self._repository.supports_rich_root()):
539
self.update_root = True
541
self.update_root = False
543
def _install_mp_records(self, versionedfile, records):
544
if len(records) == 0:
546
d_func = multiparent.MultiParent.from_patch
547
vf_records = [(r, m['parents'], m['sha1'], d_func(t)) for r, m, t in
548
records if r not in versionedfile]
549
versionedfile.add_mpdiffs(vf_records)
551
def _install_inventory_records(self, vf, records):
552
if self._info['serializer'] == self._repository._serializer.format_num:
553
return self._install_mp_records(vf, records)
554
for revision_id, metadata, bytes in records:
555
parent_ids = metadata['parents']
556
parents = [self._repository.get_inventory(p)
558
p_texts = [self._source_serializer.write_inventory_to_string(p)
560
target_lines = multiparent.MultiParent.from_patch(bytes).to_lines(
562
sha1 = osutils.sha_strings(target_lines)
563
if sha1 != metadata['sha1']:
564
raise errors.BadBundle("Can't convert to target format")
565
target_inv = self._source_serializer.read_inventory_from_string(
566
''.join(target_lines))
567
self._handle_root(target_inv, parent_ids)
569
self._repository.add_inventory(revision_id, target_inv,
571
except errors.UnsupportedInventoryKind:
572
raise errors.IncompatibleRevision(repr(self._repository))
574
def _handle_root(self, target_inv, parent_ids):
575
revision_id = target_inv.revision_id
577
target_inv.root.revision = revision_id
578
store = self._repository.weave_store
579
transaction = self._repository.get_transaction()
580
vf = store.get_weave_or_empty(target_inv.root.file_id, transaction)
581
vf.add_lines(revision_id, parent_ids, [])
582
elif not self._repository.supports_rich_root():
583
if target_inv.root.revision != revision_id:
584
raise errors.IncompatibleRevision(repr(self._repository))
587
def _install_revision(self, revision_id, metadata, text):
588
if self._repository.has_revision(revision_id):
590
self._repository._add_revision_text(revision_id, text)
592
def _install_signature(self, revision_id, metadata, text):
593
transaction = self._repository.get_transaction()
594
if self._repository._revision_store.has_signature(revision_id,
597
self._repository._revision_store.add_revision_signature_text(
598
revision_id, text, transaction)