1
# Copyright (C) 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from cStringIO import StringIO
28
revision as _mod_revision,
32
from bzrlib.bundle import bundle_data, serializer
33
from bzrlib.util import bencode
36
class BundleWriter(object):
37
"""Writer for bundle-format files.
39
This serves roughly the same purpose as ContainerReader, but acts as a
42
Provides ways of writing the specific record types supported this bundle
46
def __init__(self, fileobj):
47
self._container = pack.ContainerWriter(self._write_encoded)
48
self._fileobj = fileobj
49
self._compressor = bz2.BZ2Compressor()
51
def _write_encoded(self, bytes):
52
"""Write bzip2-encoded bytes to the file"""
53
self._fileobj.write(self._compressor.compress(bytes))
56
"""Start writing the bundle"""
57
self._fileobj.write(serializer._get_bundle_header(
58
serializer.v4_string))
59
self._fileobj.write('#\n')
60
self._container.begin()
63
"""Finish writing the bundle"""
65
self._fileobj.write(self._compressor.flush())
67
def add_multiparent_record(self, mp_bytes, sha1, parents, repo_kind,
68
revision_id, file_id):
69
"""Add a record for a multi-parent diff
71
:mp_bytes: A multi-parent diff, as a bytestring
72
:sha1: The sha1 hash of the fulltext
73
:parents: a list of revision-ids of the parents
74
:repo_kind: The kind of object in the repository. May be 'file' or
76
:revision_id: The revision id of the mpdiff being added.
77
:file_id: The file-id of the file, or None for inventories.
79
metadata = {'parents': parents,
80
'storage_kind': 'mpdiff',
82
self._add_record(mp_bytes, metadata, repo_kind, revision_id, file_id)
84
def add_fulltext_record(self, bytes, parents, repo_kind, revision_id):
85
"""Add a record for a fulltext
87
:bytes: The fulltext, as a bytestring
88
:parents: a list of revision-ids of the parents
89
:repo_kind: The kind of object in the repository. May be 'revision' or
91
:revision_id: The revision id of the fulltext being added.
93
metadata = {'parents': parents,
94
'storage_kind': 'mpdiff'}
95
self._add_record(bytes, {'parents': parents,
96
'storage_kind': 'fulltext'}, repo_kind, revision_id, None)
98
def add_info_record(self, **kwargs):
99
"""Add an info record to the bundle
101
Any parameters may be supplied, except 'self' and 'storage_kind'.
102
Values must be lists, strings, integers, dicts, or a combination.
104
kwargs['storage_kind'] = 'header'
105
self._add_record(None, kwargs, 'info', None, None)
108
def encode_name(content_kind, revision_id, file_id=None):
109
"""Encode semantic ids as a container name"""
110
assert content_kind in ('revision', 'file', 'inventory', 'signature',
113
if content_kind == 'file':
114
assert file_id is not None
116
assert file_id is None
117
if content_kind == 'info':
118
assert revision_id is None
120
assert revision_id is not None
121
names = [n.replace('/', '//') for n in
122
(content_kind, revision_id, file_id) if n is not None]
123
return '/'.join(names)
125
def _add_record(self, bytes, metadata, repo_kind, revision_id, file_id):
126
"""Add a bundle record to the container.
128
Most bundle records are recorded as header/body pairs, with the
129
body being nameless. Records with storage_kind 'header' have no
132
name = self.encode_name(repo_kind, revision_id, file_id)
133
encoded_metadata = bencode.bencode(metadata)
134
self._container.add_bytes_record(encoded_metadata, [(name, )])
135
if metadata['storage_kind'] != 'header':
136
self._container.add_bytes_record(bytes, [])
139
class BundleReader(object):
140
"""Reader for bundle-format files.
142
This serves roughly the same purpose as ContainerReader, but acts as a
143
layer on top of it, providing metadata, a semantic name, and a record
147
def __init__(self, fileobj, stream_input=True):
150
:param fileobj: a file containing a bzip-encoded container
151
:param stream_input: If True, the BundleReader stream input rather than
152
reading it all into memory at once. Reading it into memory all at
153
once is (currently) faster.
155
line = fileobj.readline()
158
self.patch_lines = []
160
source_file = iterablefile.IterableFile(self.iter_decode(fileobj))
162
source_file = StringIO(bz2.decompress(fileobj.read()))
163
self._container = pack.ContainerReader(source_file)
166
def iter_decode(fileobj):
167
"""Iterate through decoded fragments of the file"""
168
decompressor = bz2.BZ2Decompressor()
170
yield decompressor.decompress(line)
173
def decode_name(name):
174
"""Decode a name from its container form into a semantic form
176
:retval: content_kind, revision_id, file_id
178
segments = re.split('(//?)', name)
180
for segment in segments:
187
content_kind = names[0]
191
revision_id = names[1]
194
return content_kind, revision_id, file_id
196
def iter_records(self):
197
"""Iterate through bundle records
199
:return: a generator of (bytes, metadata, content_kind, revision_id,
202
iterator = self._container.iter_records()
203
for names, meta_bytes in iterator:
205
raise errors.BadBundle('Record has %d names instead of 1'
207
metadata = bencode.bdecode(meta_bytes(None))
208
if metadata['storage_kind'] == 'header':
211
_unused, bytes = iterator.next()
213
yield (bytes, metadata) + self.decode_name(names[0][0])
216
class BundleSerializerV4(serializer.BundleSerializer):
217
"""Implement the high-level bundle interface"""
219
def write(self, repository, revision_ids, forced_bases, fileobj):
220
"""Write a bundle to a file-like object
222
For backwards-compatibility only
224
write_op = BundleWriteOperation.from_old_args(repository, revision_ids,
225
forced_bases, fileobj)
226
return write_op.do_write()
228
def write_bundle(self, repository, target, base, fileobj):
229
"""Write a bundle to a file object
231
:param repository: The repository to retrieve revision data from
232
:param target: The head revision to include ancestors of
233
:param base: The ancestor of the target to stop including acestors
235
:param fileobj: The file-like object to write to
237
write_op = BundleWriteOperation(base, target, repository, fileobj)
238
return write_op.do_write()
240
def read(self, file):
241
"""return a reader object for a given file"""
242
bundle = BundleInfoV4(file, self)
246
def get_source_serializer(info):
247
"""Retrieve the serializer for a given info object"""
248
return xml_serializer.format_registry.get(info['serializer'])
251
class BundleWriteOperation(object):
252
"""Perform the operation of writing revisions to a bundle"""
255
def from_old_args(cls, repository, revision_ids, forced_bases, fileobj):
256
"""Create a BundleWriteOperation from old-style arguments"""
257
base, target = cls.get_base_target(revision_ids, forced_bases,
259
return BundleWriteOperation(base, target, repository, fileobj,
262
def __init__(self, base, target, repository, fileobj, revision_ids=None):
265
self.repository = repository
266
bundle = BundleWriter(fileobj)
268
self.base_ancestry = set(repository.get_ancestry(base,
270
if revision_ids is not None:
271
self.revision_ids = revision_ids
273
revision_ids = set(repository.get_ancestry(target,
275
self.revision_ids = revision_ids.difference(self.base_ancestry)
278
"""Write all data to the bundle"""
282
self.write_revisions()
284
return self.revision_ids
286
def write_info(self):
287
"""Write format info"""
288
serializer_format = self.repository.get_serializer_format()
289
supports_rich_root = {True: 1, False: 0}[
290
self.repository.supports_rich_root()]
291
self.bundle.add_info_record(serializer=serializer_format,
292
supports_rich_root=supports_rich_root)
294
def iter_file_revisions(self):
295
"""Iterate through all relevant revisions of all files.
297
This is the correct implementation, but is not compatible with bzr.dev,
298
because certain old revisions were not converted correctly, and have
299
the wrong "revision" marker in inventories.
301
transaction = self.repository.get_transaction()
302
altered = self.repository.fileids_altered_by_revision_ids(
304
for file_id, file_revision_ids in altered.iteritems():
305
vf = self.repository.weave_store.get_weave(file_id, transaction)
306
yield vf, file_id, file_revision_ids
308
def iter_file_revisions_aggressive(self):
309
"""Iterate through all relevant revisions of all files.
311
This uses the standard iter_file_revisions to determine what revisions
312
are referred to by inventories, but then uses the versionedfile to
313
determine what the build-dependencies of each required revision.
315
All build dependencies which are not ancestors of the base revision
318
for vf, file_id, file_revision_ids in self.iter_file_revisions():
319
new_revision_ids = set()
320
pending = list(file_revision_ids)
321
while len(pending) > 0:
322
revision_id = pending.pop()
323
if revision_id in new_revision_ids:
325
if revision_id in self.base_ancestry:
327
new_revision_ids.add(revision_id)
328
pending.extend(vf.get_parents(revision_id))
329
yield vf, file_id, new_revision_ids
331
def write_files(self):
332
"""Write bundle records for all revisions of all files"""
333
for vf, file_id, revision_ids in self.iter_file_revisions_aggressive():
334
self.add_mp_records('file', file_id, vf, revision_ids)
336
def write_revisions(self):
337
"""Write bundle records for all revisions and signatures"""
338
inv_vf = self.repository.get_inventory_weave()
339
revision_order = list(multiparent.topo_iter(inv_vf, self.revision_ids))
340
if self.target is not None and self.target in self.revision_ids:
341
revision_order.remove(self.target)
342
revision_order.append(self.target)
343
self.add_mp_records('inventory', None, inv_vf, revision_order)
344
parents_list = self.repository.get_parents(revision_order)
345
for parents, revision_id in zip(parents_list, revision_order):
346
revision_text = self.repository.get_revision_xml(revision_id)
347
self.bundle.add_fulltext_record(revision_text, parents,
348
'revision', revision_id)
350
self.bundle.add_fulltext_record(
351
self.repository.get_signature_text(
352
revision_id), parents, 'signature', revision_id)
353
except errors.NoSuchRevision:
357
def get_base_target(revision_ids, forced_bases, repository):
358
"""Determine the base and target from old-style revision ids"""
359
if len(revision_ids) == 0:
361
target = revision_ids[0]
362
base = forced_bases.get(target)
364
parents = repository.get_revision(target).parent_ids
365
if len(parents) == 0:
366
base = _mod_revision.NULL_REVISION
371
def add_mp_records(self, repo_kind, file_id, vf, revision_ids):
372
"""Add multi-parent diff records to a bundle"""
373
revision_ids = list(multiparent.topo_iter(vf, revision_ids))
374
mpdiffs = vf.make_mpdiffs(revision_ids)
375
sha1s = vf.get_sha1s(revision_ids)
376
for mpdiff, revision_id, sha1, in zip(mpdiffs, revision_ids, sha1s):
377
parents = vf.get_parents(revision_id)
378
text = ''.join(mpdiff.to_patch())
379
self.bundle.add_multiparent_record(text, sha1, parents, repo_kind,
380
revision_id, file_id)
383
class BundleInfoV4(object):
385
"""Provide (most of) the BundleInfo interface"""
386
def __init__(self, fileobj, serializer):
387
self._fileobj = fileobj
388
self._serializer = serializer
389
self.__real_revisions = None
390
self.__revisions = None
392
def install(self, repository):
393
return self.install_revisions(repository)
395
def install_revisions(self, repository, stream_input=True):
396
"""Install this bundle's revisions into the specified repository
398
:param target_repo: The repository to install into
399
:param stream_input: If True, will stream input rather than reading it
400
all into memory at once. Reading it into memory all at once is
403
repository.lock_write()
405
ri = RevisionInstaller(self.get_bundle_reader(stream_input),
406
self._serializer, repository)
411
def get_merge_request(self, target_repo):
412
"""Provide data for performing a merge
414
Returns suggested base, suggested target, and patch verification status
416
return None, self.target, 'inapplicable'
418
def get_bundle_reader(self, stream_input=True):
419
"""Return a new BundleReader for the associated bundle
421
:param stream_input: If True, the BundleReader stream input rather than
422
reading it all into memory at once. Reading it into memory all at
423
once is (currently) faster.
425
self._fileobj.seek(0)
426
return BundleReader(self._fileobj, stream_input)
428
def _get_real_revisions(self):
429
if self.__real_revisions is None:
430
self.__real_revisions = []
431
bundle_reader = self.get_bundle_reader()
432
for bytes, metadata, repo_kind, revision_id, file_id in \
433
bundle_reader.iter_records():
434
if repo_kind == 'info':
436
self._serializer.get_source_serializer(metadata)
437
if repo_kind == 'revision':
438
rev = serializer.read_revision_from_string(bytes)
439
self.__real_revisions.append(rev)
440
return self.__real_revisions
441
real_revisions = property(_get_real_revisions)
443
def _get_revisions(self):
444
if self.__revisions is None:
445
self.__revisions = []
446
for revision in self.real_revisions:
447
self.__revisions.append(
448
bundle_data.RevisionInfo.from_revision(revision))
449
return self.__revisions
451
revisions = property(_get_revisions)
453
def _get_target(self):
454
return self.revisions[-1].revision_id
456
target = property(_get_target)
459
class RevisionInstaller(object):
460
"""Installs revisions into a repository"""
462
def __init__(self, container, serializer, repository):
463
self._container = container
464
self._serializer = serializer
465
self._repository = repository
469
"""Perform the installation.
471
Must be called with the Repository locked.
473
self._repository.start_write_group()
475
result = self._install_in_write_group()
477
self._repository.abort_write_group()
479
self._repository.commit_write_group()
482
def _install_in_write_group(self):
484
current_versionedfile = None
485
pending_file_records = []
487
pending_inventory_records = []
489
target_revision = None
490
for bytes, metadata, repo_kind, revision_id, file_id in\
491
self._container.iter_records():
492
if repo_kind == 'info':
493
assert self._info is None
494
self._handle_info(metadata)
495
if (repo_kind, file_id) != ('file', current_file):
496
if len(pending_file_records) > 0:
497
self._install_mp_records(current_versionedfile,
498
pending_file_records)
500
current_versionedfile = None
501
pending_file_records = []
502
if len(pending_inventory_records) > 0 and repo_kind != 'inventory':
503
self._install_inventory_records(inventory_vf,
504
pending_inventory_records)
505
pending_inventory_records = []
506
if repo_kind == 'inventory':
507
if inventory_vf is None:
508
inventory_vf = self._repository.get_inventory_weave()
509
if revision_id not in inventory_vf:
510
pending_inventory_records.append((revision_id, metadata,
512
if repo_kind == 'revision':
513
target_revision = revision_id
514
self._install_revision(revision_id, metadata, bytes)
515
if repo_kind == 'signature':
516
self._install_signature(revision_id, metadata, bytes)
517
if repo_kind == 'file':
518
current_file = file_id
519
if current_versionedfile is None:
520
current_versionedfile = \
521
self._repository.weave_store.get_weave_or_empty(
522
file_id, self._repository.get_transaction())
523
pending_file_records = []
524
if revision_id in current_versionedfile:
526
pending_file_records.append((revision_id, metadata, bytes))
527
self._install_mp_records(current_versionedfile, pending_file_records)
528
return target_revision
530
def _handle_info(self, info):
531
"""Extract data from an info record"""
533
self._source_serializer = self._serializer.get_source_serializer(info)
534
if (info['supports_rich_root'] == 0 and
535
self._repository.supports_rich_root()):
536
self.update_root = True
538
self.update_root = False
540
def _install_mp_records(self, versionedfile, records):
541
if len(records) == 0:
543
d_func = multiparent.MultiParent.from_patch
544
vf_records = [(r, m['parents'], m['sha1'], d_func(t)) for r, m, t in
545
records if r not in versionedfile]
546
versionedfile.add_mpdiffs(vf_records)
548
def _install_inventory_records(self, vf, records):
549
if self._info['serializer'] == self._repository._serializer.format_num:
550
return self._install_mp_records(vf, records)
551
for revision_id, metadata, bytes in records:
552
parent_ids = metadata['parents']
553
parents = [self._repository.get_inventory(p)
555
p_texts = [self._source_serializer.write_inventory_to_string(p)
557
target_lines = multiparent.MultiParent.from_patch(bytes).to_lines(
559
sha1 = osutils.sha_strings(target_lines)
560
if sha1 != metadata['sha1']:
561
raise errors.BadBundle("Can't convert to target format")
562
target_inv = self._source_serializer.read_inventory_from_string(
563
''.join(target_lines))
564
self._handle_root(target_inv, parent_ids)
566
self._repository.add_inventory(revision_id, target_inv,
568
except errors.UnsupportedInventoryKind:
569
raise errors.IncompatibleRevision(repr(self._repository))
571
def _handle_root(self, target_inv, parent_ids):
572
revision_id = target_inv.revision_id
574
target_inv.root.revision = revision_id
575
store = self._repository.weave_store
576
transaction = self._repository.get_transaction()
577
vf = store.get_weave_or_empty(target_inv.root.file_id, transaction)
578
vf.add_lines(revision_id, parent_ids, [])
579
elif not self._repository.supports_rich_root():
580
if target_inv.root.revision != revision_id:
581
raise errors.IncompatibleRevision(repr(self._repository))
584
def _install_revision(self, revision_id, metadata, text):
585
if self._repository.has_revision(revision_id):
587
self._repository._add_revision_text(revision_id, text)
589
def _install_signature(self, revision_id, metadata, text):
590
transaction = self._repository.get_transaction()
591
if self._repository._revision_store.has_signature(revision_id,
594
self._repository._revision_store.add_revision_signature_text(
595
revision_id, text, transaction)