1
# Copyright (C) 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from cStringIO import StringIO
28
revision as _mod_revision,
32
from bzrlib.bundle import bundle_data, serializer
33
from bzrlib.util import bencode
36
class BundleWriter(object):
37
"""Writer for bundle-format files.
39
This serves roughly the same purpose as ContainerReader, but acts as a
42
Provides ways of writing the specific record types supported this bundle
46
def __init__(self, fileobj):
47
self._container = pack.ContainerWriter(self._write_encoded)
48
self._fileobj = fileobj
49
self._compressor = bz2.BZ2Compressor()
51
def _write_encoded(self, bytes):
52
"""Write bzip2-encoded bytes to the file"""
53
self._fileobj.write(self._compressor.compress(bytes))
56
"""Start writing the bundle"""
57
self._fileobj.write(serializer._get_bundle_header(
58
serializer.v4_string))
59
self._fileobj.write('#\n')
60
self._container.begin()
63
"""Finish writing the bundle"""
65
self._fileobj.write(self._compressor.flush())
67
def add_multiparent_record(self, mp_bytes, sha1, parents, repo_kind,
68
revision_id, file_id):
69
"""Add a record for a multi-parent diff
71
:mp_bytes: A multi-parent diff, as a bytestring
72
:sha1: The sha1 hash of the fulltext
73
:parents: a list of revision-ids of the parents
74
:repo_kind: The kind of object in the repository. May be 'file' or
76
:revision_id: The revision id of the mpdiff being added.
77
:file_id: The file-id of the file, or None for inventories.
79
metadata = {'parents': parents,
80
'storage_kind': 'mpdiff',
82
self._add_record(mp_bytes, metadata, repo_kind, revision_id, file_id)
84
def add_fulltext_record(self, bytes, parents, repo_kind, revision_id):
85
"""Add a record for a fulltext
87
:bytes: The fulltext, as a bytestring
88
:parents: a list of revision-ids of the parents
89
:repo_kind: The kind of object in the repository. May be 'revision' or
91
:revision_id: The revision id of the fulltext being added.
93
metadata = {'parents': parents,
94
'storage_kind': 'mpdiff'}
95
self._add_record(bytes, {'parents': parents,
96
'storage_kind': 'fulltext'}, repo_kind, revision_id, None)
98
def add_info_record(self, **kwargs):
99
"""Add an info record to the bundle
101
Any parameters may be supplied, except 'self' and 'storage_kind'.
102
Values must be lists, strings, integers, dicts, or a combination.
104
kwargs['storage_kind'] = 'header'
105
self._add_record(None, kwargs, 'info', None, None)
108
def encode_name(content_kind, revision_id, file_id=None):
109
"""Encode semantic ids as a container name"""
110
assert content_kind in ('revision', 'file', 'inventory', 'signature',
113
if content_kind == 'file':
114
assert file_id is not None
116
assert file_id is None
117
if content_kind == 'info':
118
assert revision_id is None
120
assert revision_id is not None
121
names = [n.replace('/', '//') for n in
122
(content_kind, revision_id, file_id) if n is not None]
123
return '/'.join(names)
125
def _add_record(self, bytes, metadata, repo_kind, revision_id, file_id):
126
"""Add a bundle record to the container.
128
Most bundle records are recorded as header/body pairs, with the
129
body being nameless. Records with storage_kind 'header' have no
132
name = self.encode_name(repo_kind, revision_id, file_id)
133
encoded_metadata = bencode.bencode(metadata)
134
self._container.add_bytes_record(encoded_metadata, [(name, )])
135
if metadata['storage_kind'] != 'header':
136
self._container.add_bytes_record(bytes, [])
139
class BundleReader(object):
140
"""Reader for bundle-format files.
142
This serves roughly the same purpose as ContainerReader, but acts as a
143
layer on top of it, providing metadata, a semantic name, and a record
147
def __init__(self, fileobj, stream_input=True):
150
:param fileobj: a file containing a bzip-encoded container
151
:param stream_input: If True, the BundleReader stream input rather than
152
reading it all into memory at once. Reading it into memory all at
153
once is (currently) faster.
155
line = fileobj.readline()
158
self.patch_lines = []
160
source_file = iterablefile.IterableFile(self.iter_decode(fileobj))
162
source_file = StringIO(bz2.decompress(fileobj.read()))
163
self._container_file = source_file
166
def iter_decode(fileobj):
167
"""Iterate through decoded fragments of the file"""
168
decompressor = bz2.BZ2Decompressor()
171
yield decompressor.decompress(line)
176
def decode_name(name):
177
"""Decode a name from its container form into a semantic form
179
:retval: content_kind, revision_id, file_id
181
segments = re.split('(//?)', name)
183
for segment in segments:
190
content_kind = names[0]
194
revision_id = names[1]
197
return content_kind, revision_id, file_id
199
def iter_records(self):
200
"""Iterate through bundle records
202
:return: a generator of (bytes, metadata, content_kind, revision_id,
205
iterator = pack.iter_records_from_file(self._container_file)
206
for names, bytes in iterator:
208
raise errors.BadBundle('Record has %d names instead of 1'
210
metadata = bencode.bdecode(bytes)
211
if metadata['storage_kind'] == 'header':
214
_unused, bytes = iterator.next()
215
yield (bytes, metadata) + self.decode_name(names[0][0])
218
class BundleSerializerV4(serializer.BundleSerializer):
219
"""Implement the high-level bundle interface"""
221
def write(self, repository, revision_ids, forced_bases, fileobj):
222
"""Write a bundle to a file-like object
224
For backwards-compatibility only
226
write_op = BundleWriteOperation.from_old_args(repository, revision_ids,
227
forced_bases, fileobj)
228
return write_op.do_write()
230
def write_bundle(self, repository, target, base, fileobj):
231
"""Write a bundle to a file object
233
:param repository: The repository to retrieve revision data from
234
:param target: The head revision to include ancestors of
235
:param base: The ancestor of the target to stop including acestors
237
:param fileobj: The file-like object to write to
239
write_op = BundleWriteOperation(base, target, repository, fileobj)
240
return write_op.do_write()
242
def read(self, file):
243
"""return a reader object for a given file"""
244
bundle = BundleInfoV4(file, self)
248
def get_source_serializer(info):
249
"""Retrieve the serializer for a given info object"""
250
return xml_serializer.format_registry.get(info['serializer'])
253
class BundleWriteOperation(object):
254
"""Perform the operation of writing revisions to a bundle"""
257
def from_old_args(cls, repository, revision_ids, forced_bases, fileobj):
258
"""Create a BundleWriteOperation from old-style arguments"""
259
base, target = cls.get_base_target(revision_ids, forced_bases,
261
return BundleWriteOperation(base, target, repository, fileobj,
264
def __init__(self, base, target, repository, fileobj, revision_ids=None):
267
self.repository = repository
268
bundle = BundleWriter(fileobj)
270
self.base_ancestry = set(repository.get_ancestry(base,
272
if revision_ids is not None:
273
self.revision_ids = revision_ids
275
revision_ids = set(repository.get_ancestry(target,
277
self.revision_ids = revision_ids.difference(self.base_ancestry)
280
"""Write all data to the bundle"""
284
self.write_revisions()
286
return self.revision_ids
288
def write_info(self):
289
"""Write format info"""
290
serializer_format = self.repository.get_serializer_format()
291
supports_rich_root = {True: 1, False: 0}[
292
self.repository.supports_rich_root()]
293
self.bundle.add_info_record(serializer=serializer_format,
294
supports_rich_root=supports_rich_root)
296
def iter_file_revisions(self):
297
"""Iterate through all relevant revisions of all files.
299
This is the correct implementation, but is not compatible with bzr.dev,
300
because certain old revisions were not converted correctly, and have
301
the wrong "revision" marker in inventories.
303
transaction = self.repository.get_transaction()
304
altered = self.repository.fileids_altered_by_revision_ids(
306
for file_id, file_revision_ids in altered.iteritems():
307
vf = self.repository.weave_store.get_weave(file_id, transaction)
308
yield vf, file_id, file_revision_ids
310
def iter_file_revisions_aggressive(self):
311
"""Iterate through all relevant revisions of all files.
313
This uses the standard iter_file_revisions to determine what revisions
314
are referred to by inventories, but then uses the versionedfile to
315
determine what the build-dependencies of each required revision.
317
All build dependencies which are not ancestors of the base revision
320
for vf, file_id, file_revision_ids in self.iter_file_revisions():
321
new_revision_ids = set()
322
pending = list(file_revision_ids)
323
while len(pending) > 0:
324
revision_id = pending.pop()
325
if revision_id in new_revision_ids:
327
if revision_id in self.base_ancestry:
329
new_revision_ids.add(revision_id)
330
pending.extend(vf.get_parents(revision_id))
331
yield vf, file_id, new_revision_ids
333
def write_files(self):
334
"""Write bundle records for all revisions of all files"""
335
for vf, file_id, revision_ids in self.iter_file_revisions():
336
self.add_mp_records('file', file_id, vf, revision_ids)
338
def write_revisions(self):
339
"""Write bundle records for all revisions and signatures"""
340
inv_vf = self.repository.get_inventory_weave()
341
revision_order = list(multiparent.topo_iter(inv_vf, self.revision_ids))
342
if self.target is not None and self.target in self.revision_ids:
343
revision_order.remove(self.target)
344
revision_order.append(self.target)
345
self.add_mp_records('inventory', None, inv_vf, revision_order)
346
parents_list = self.repository.get_parents(revision_order)
347
for parents, revision_id in zip(parents_list, revision_order):
348
revision_text = self.repository.get_revision_xml(revision_id)
349
self.bundle.add_fulltext_record(revision_text, parents,
350
'revision', revision_id)
352
self.bundle.add_fulltext_record(
353
self.repository.get_signature_text(
354
revision_id), parents, 'signature', revision_id)
355
except errors.NoSuchRevision:
359
def get_base_target(revision_ids, forced_bases, repository):
360
"""Determine the base and target from old-style revision ids"""
361
if len(revision_ids) == 0:
363
target = revision_ids[0]
364
base = forced_bases.get(target)
366
parents = repository.get_revision(target).parent_ids
367
if len(parents) == 0:
368
base = _mod_revision.NULL_REVISION
373
def add_mp_records(self, repo_kind, file_id, vf, revision_ids):
374
"""Add multi-parent diff records to a bundle"""
375
revision_ids = list(multiparent.topo_iter(vf, revision_ids))
376
mpdiffs = vf.make_mpdiffs(revision_ids)
377
sha1s = vf.get_sha1s(revision_ids)
378
for mpdiff, revision_id, sha1, in zip(mpdiffs, revision_ids, sha1s):
379
parents = vf.get_parents(revision_id)
380
text = ''.join(mpdiff.to_patch())
381
self.bundle.add_multiparent_record(text, sha1, parents, repo_kind,
382
revision_id, file_id)
385
class BundleInfoV4(object):
387
"""Provide (most of) the BundleInfo interface"""
388
def __init__(self, fileobj, serializer):
389
self._fileobj = fileobj
390
self._serializer = serializer
391
self.__real_revisions = None
392
self.__revisions = None
394
def install(self, repository):
395
return self.install_revisions(repository)
397
def install_revisions(self, repository, stream_input=True):
398
"""Install this bundle's revisions into the specified repository
400
:param target_repo: The repository to install into
401
:param stream_input: If True, will stream input rather than reading it
402
all into memory at once. Reading it into memory all at once is
405
repository.lock_write()
407
ri = RevisionInstaller(self.get_bundle_reader(stream_input),
408
self._serializer, repository)
413
def get_merge_request(self, target_repo):
414
"""Provide data for performing a merge
416
Returns suggested base, suggested target, and patch verification status
418
return None, self.target, 'inapplicable'
420
def get_bundle_reader(self, stream_input=True):
421
"""Return a new BundleReader for the associated bundle
423
:param stream_input: If True, the BundleReader stream input rather than
424
reading it all into memory at once. Reading it into memory all at
425
once is (currently) faster.
427
self._fileobj.seek(0)
428
return BundleReader(self._fileobj, stream_input)
430
def _get_real_revisions(self):
431
if self.__real_revisions is None:
432
self.__real_revisions = []
433
bundle_reader = self.get_bundle_reader()
434
for bytes, metadata, repo_kind, revision_id, file_id in \
435
bundle_reader.iter_records():
436
if repo_kind == 'info':
438
self._serializer.get_source_serializer(metadata)
439
if repo_kind == 'revision':
440
rev = serializer.read_revision_from_string(bytes)
441
self.__real_revisions.append(rev)
442
return self.__real_revisions
443
real_revisions = property(_get_real_revisions)
445
def _get_revisions(self):
446
if self.__revisions is None:
447
self.__revisions = []
448
for revision in self.real_revisions:
449
self.__revisions.append(
450
bundle_data.RevisionInfo.from_revision(revision))
451
return self.__revisions
453
revisions = property(_get_revisions)
455
def _get_target(self):
456
return self.revisions[-1].revision_id
458
target = property(_get_target)
461
class RevisionInstaller(object):
462
"""Installs revisions into a repository"""
464
def __init__(self, container, serializer, repository):
465
self._container = container
466
self._serializer = serializer
467
self._repository = repository
471
"""Perform the installation.
473
Must be called with the Repository locked.
475
self._repository.start_write_group()
477
result = self._install_in_write_group()
479
self._repository.abort_write_group()
481
self._repository.commit_write_group()
484
def _install_in_write_group(self):
486
current_versionedfile = None
487
pending_file_records = []
489
pending_inventory_records = []
491
target_revision = None
492
for bytes, metadata, repo_kind, revision_id, file_id in\
493
self._container.iter_records():
494
if repo_kind == 'info':
495
assert self._info is None
496
self._handle_info(metadata)
497
if (repo_kind, file_id) != ('file', current_file):
498
if len(pending_file_records) > 0:
499
self._install_mp_records(current_versionedfile,
500
pending_file_records)
502
current_versionedfile = None
503
pending_file_records = []
504
if len(pending_inventory_records) > 0 and repo_kind != 'inventory':
505
self._install_inventory_records(inventory_vf,
506
pending_inventory_records)
507
pending_inventory_records = []
508
if repo_kind == 'inventory':
509
if inventory_vf is None:
510
inventory_vf = self._repository.get_inventory_weave()
511
if revision_id not in inventory_vf:
512
pending_inventory_records.append((revision_id, metadata,
514
if repo_kind == 'revision':
515
target_revision = revision_id
516
self._install_revision(revision_id, metadata, bytes)
517
if repo_kind == 'signature':
518
self._install_signature(revision_id, metadata, bytes)
519
if repo_kind == 'file':
520
current_file = file_id
521
if current_versionedfile is None:
522
current_versionedfile = \
523
self._repository.weave_store.get_weave_or_empty(
524
file_id, self._repository.get_transaction())
525
pending_file_records = []
526
if revision_id in current_versionedfile:
528
pending_file_records.append((revision_id, metadata, bytes))
529
self._install_mp_records(current_versionedfile, pending_file_records)
530
return target_revision
532
def _handle_info(self, info):
533
"""Extract data from an info record"""
535
self._source_serializer = self._serializer.get_source_serializer(info)
536
if (info['supports_rich_root'] == 0 and
537
self._repository.supports_rich_root()):
538
self.update_root = True
540
self.update_root = False
542
def _install_mp_records(self, versionedfile, records):
543
if len(records) == 0:
545
d_func = multiparent.MultiParent.from_patch
546
vf_records = [(r, m['parents'], m['sha1'], d_func(t)) for r, m, t in
547
records if r not in versionedfile]
548
versionedfile.add_mpdiffs(vf_records)
550
def _install_inventory_records(self, vf, records):
551
if self._info['serializer'] == self._repository._serializer.format_num:
552
return self._install_mp_records(vf, records)
553
for revision_id, metadata, bytes in records:
554
parent_ids = metadata['parents']
555
parents = [self._repository.get_inventory(p)
557
p_texts = [self._source_serializer.write_inventory_to_string(p)
559
target_lines = multiparent.MultiParent.from_patch(bytes).to_lines(
561
sha1 = osutils.sha_strings(target_lines)
562
if sha1 != metadata['sha1']:
563
raise errors.BadBundle("Can't convert to target format")
564
target_inv = self._source_serializer.read_inventory_from_string(
565
''.join(target_lines))
566
self._handle_root(target_inv, parent_ids)
568
self._repository.add_inventory(revision_id, target_inv,
570
except errors.UnsupportedInventoryKind:
571
raise errors.IncompatibleRevision(repr(self._repository))
573
def _handle_root(self, target_inv, parent_ids):
574
revision_id = target_inv.revision_id
576
target_inv.root.revision = revision_id
577
store = self._repository.weave_store
578
transaction = self._repository.get_transaction()
579
vf = store.get_weave_or_empty(target_inv.root.file_id, transaction)
580
vf.add_lines(revision_id, parent_ids, [])
581
elif not self._repository.supports_rich_root():
582
if target_inv.root.revision != revision_id:
583
raise errors.IncompatibleRevision(repr(self._repository))
586
def _install_revision(self, revision_id, metadata, text):
587
if self._repository.has_revision(revision_id):
589
self._repository._add_revision_text(revision_id, text)
591
def _install_signature(self, revision_id, metadata, text):
592
transaction = self._repository.get_transaction()
593
if self._repository._revision_store.has_signature(revision_id,
596
self._repository._revision_store.add_revision_signature_text(
597
revision_id, text, transaction)