1
# Copyright (C) 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from cStringIO import StringIO
28
revision as _mod_revision,
32
from bzrlib.bundle import bundle_data, serializer
33
from bzrlib.util import bencode
36
class BundleWriter(object):
37
"""Writer for bundle-format files.
39
This serves roughly the same purpose as ContainerReader, but acts as a
42
Provides ways of writing the specific record types supported this bundle
46
def __init__(self, fileobj):
47
self._container = pack.ContainerWriter(self._write_encoded)
48
self._fileobj = fileobj
49
self._compressor = bz2.BZ2Compressor()
51
def _write_encoded(self, bytes):
52
"""Write bzip2-encoded bytes to the file"""
53
self._fileobj.write(self._compressor.compress(bytes))
56
"""Start writing the bundle"""
57
self._fileobj.write(serializer._get_bundle_header(
58
serializer.v4_string))
59
self._fileobj.write('#\n')
60
self._container.begin()
63
"""Finish writing the bundle"""
65
self._fileobj.write(self._compressor.flush())
67
def add_multiparent_record(self, mp_bytes, sha1, parents, repo_kind,
68
revision_id, file_id):
69
"""Add a record for a multi-parent diff
71
:mp_bytes: A multi-parent diff, as a bytestring
72
:sha1: The sha1 hash of the fulltext
73
:parents: a list of revision-ids of the parents
74
:repo_kind: The kind of object in the repository. May be 'file' or
76
:revision_id: The revision id of the mpdiff being added.
77
:file_id: The file-id of the file, or None for inventories.
79
metadata = {'parents': parents,
80
'storage_kind': 'mpdiff',
82
self._add_record(mp_bytes, metadata, repo_kind, revision_id, file_id)
84
def add_fulltext_record(self, bytes, parents, repo_kind, revision_id):
85
"""Add a record for a fulltext
87
:bytes: The fulltext, as a bytestring
88
:parents: a list of revision-ids of the parents
89
:repo_kind: The kind of object in the repository. May be 'revision' or
91
:revision_id: The revision id of the fulltext being added.
93
metadata = {'parents': parents,
94
'storage_kind': 'mpdiff'}
95
self._add_record(bytes, {'parents': parents,
96
'storage_kind': 'fulltext'}, repo_kind, revision_id, None)
98
def add_info_record(self, **kwargs):
99
"""Add an info record to the bundle
101
Any parameters may be supplied, except 'self' and 'storage_kind'.
102
Values must be lists, strings, integers, dicts, or a combination.
104
kwargs['storage_kind'] = 'header'
105
self._add_record(None, kwargs, 'info', None, None)
108
def encode_name(content_kind, revision_id, file_id=None):
109
"""Encode semantic ids as a container name"""
110
assert content_kind in ('revision', 'file', 'inventory', 'signature',
113
if content_kind == 'file':
114
assert file_id is not None
116
assert file_id is None
117
if content_kind == 'info':
118
assert revision_id is None
120
assert revision_id is not None
121
names = [n.replace('/', '//') for n in
122
(content_kind, revision_id, file_id) if n is not None]
123
return '/'.join(names)
125
def _add_record(self, bytes, metadata, repo_kind, revision_id, file_id):
126
"""Add a bundle record to the container.
128
Most bundle records are recorded as header/body pairs, with the
129
body being nameless. Records with storage_kind 'header' have no
132
name = self.encode_name(repo_kind, revision_id, file_id)
133
encoded_metadata = bencode.bencode(metadata)
134
self._container.add_bytes_record(encoded_metadata, [(name, )])
135
if metadata['storage_kind'] != 'header':
136
self._container.add_bytes_record(bytes, [])
139
class BundleReader(object):
140
"""Reader for bundle-format files.
142
This serves roughly the same purpose as ContainerReader, but acts as a
143
layer on top of it, providing metadata, a semantic name, and a record
147
def __init__(self, fileobj, stream_input=True):
150
:param fileobj: a file containing a bzip-encoded container
151
:param stream_input: If True, the BundleReader stream input rather than
152
reading it all into memory at once. Reading it into memory all at
153
once is (currently) faster.
155
line = fileobj.readline()
158
self.patch_lines = []
160
source_file = iterablefile.IterableFile(self.iter_decode(fileobj))
162
source_file = StringIO(bz2.decompress(fileobj.read()))
163
self._container_file = source_file
166
def iter_decode(fileobj):
167
"""Iterate through decoded fragments of the file"""
168
decompressor = bz2.BZ2Decompressor()
171
yield decompressor.decompress(line)
176
def decode_name(name):
177
"""Decode a name from its container form into a semantic form
179
:retval: content_kind, revision_id, file_id
181
segments = re.split('(//?)', name)
183
for segment in segments:
190
content_kind = names[0]
194
revision_id = names[1]
197
return content_kind, revision_id, file_id
199
def iter_records(self):
200
"""Iterate through bundle records
202
:return: a generator of (bytes, metadata, content_kind, revision_id,
205
iterator = pack.iter_records_from_file(self._container_file)
206
for names, bytes in iterator:
208
raise errors.BadBundle('Record has %d names instead of 1'
210
metadata = bencode.bdecode(bytes)
211
if metadata['storage_kind'] == 'header':
214
_unused, bytes = iterator.next()
215
yield (bytes, metadata) + self.decode_name(names[0][0])
218
class BundleSerializerV4(serializer.BundleSerializer):
219
"""Implement the high-level bundle interface"""
221
def write(self, repository, revision_ids, forced_bases, fileobj):
222
"""Write a bundle to a file-like object
224
For backwards-compatibility only
226
write_op = BundleWriteOperation.from_old_args(repository, revision_ids,
227
forced_bases, fileobj)
228
return write_op.do_write()
230
def write_bundle(self, repository, target, base, fileobj):
231
"""Write a bundle to a file object
233
:param repository: The repository to retrieve revision data from
234
:param target: The head revision to include ancestors of
235
:param base: The ancestor of the target to stop including acestors
237
:param fileobj: The file-like object to write to
239
write_op = BundleWriteOperation(base, target, repository, fileobj)
240
return write_op.do_write()
242
def read(self, file):
243
"""return a reader object for a given file"""
244
bundle = BundleInfoV4(file, self)
248
def get_source_serializer(info):
249
"""Retrieve the serializer for a given info object"""
250
return xml_serializer.format_registry.get(info['serializer'])
253
class BundleWriteOperation(object):
254
"""Perform the operation of writing revisions to a bundle"""
257
def from_old_args(cls, repository, revision_ids, forced_bases, fileobj):
258
"""Create a BundleWriteOperation from old-style arguments"""
259
base, target = cls.get_base_target(revision_ids, forced_bases,
261
return BundleWriteOperation(base, target, repository, fileobj,
264
def __init__(self, base, target, repository, fileobj, revision_ids=None):
267
self.repository = repository
268
bundle = BundleWriter(fileobj)
270
self.base_ancestry = set(repository.get_ancestry(base,
272
if revision_ids is not None:
273
self.revision_ids = revision_ids
275
revision_ids = set(repository.get_ancestry(target,
277
self.revision_ids = revision_ids.difference(self.base_ancestry)
280
"""Write all data to the bundle"""
284
self.write_revisions()
286
return self.revision_ids
288
def write_info(self):
289
"""Write format info"""
290
serializer_format = self.repository.get_serializer_format()
291
supports_rich_root = {True: 1, False: 0}[
292
self.repository.supports_rich_root()]
293
self.bundle.add_info_record(serializer=serializer_format,
294
supports_rich_root=supports_rich_root)
296
def iter_file_revisions(self):
297
"""Iterate through all relevant revisions of all files.
299
This is the correct implementation, but is not compatible with bzr.dev,
300
because certain old revisions were not converted correctly, and have
301
the wrong "revision" marker in inventories.
303
transaction = self.repository.get_transaction()
304
altered = self.repository.fileids_altered_by_revision_ids(
306
for file_id, file_revision_ids in altered.iteritems():
307
vf = self.repository.weave_store.get_weave(file_id, transaction)
308
yield vf, file_id, file_revision_ids
310
def iter_file_revisions_aggressive(self):
311
"""Iterate through all relevant revisions of all files.
313
This uses the standard iter_file_revisions to determine what revisions
314
are referred to by inventories, but then uses the versionedfile to
315
determine what the build-dependencies of each required revision.
317
All build dependencies which are not ancestors of the base revision
320
for vf, file_id, file_revision_ids in self.iter_file_revisions():
321
new_revision_ids = set()
322
pending = list(file_revision_ids)
323
while len(pending) > 0:
324
revision_id = pending.pop()
325
if revision_id in new_revision_ids:
327
if revision_id in self.base_ancestry:
329
new_revision_ids.add(revision_id)
330
pending.extend(vf.get_parent_map([revision_id])[revision_id])
331
yield vf, file_id, new_revision_ids
333
def write_files(self):
334
"""Write bundle records for all revisions of all files"""
335
for vf, file_id, revision_ids in self.iter_file_revisions():
336
self.add_mp_records('file', file_id, vf, revision_ids)
338
def write_revisions(self):
339
"""Write bundle records for all revisions and signatures"""
340
inv_vf = self.repository.get_inventory_weave()
341
revision_order = list(multiparent.topo_iter(inv_vf, self.revision_ids))
342
if self.target is not None and self.target in self.revision_ids:
343
revision_order.remove(self.target)
344
revision_order.append(self.target)
345
self.add_mp_records('inventory', None, inv_vf, revision_order)
346
parent_map = self.repository.get_parent_map(revision_order)
347
for revision_id in revision_order:
348
parents = parent_map.get(revision_id, None)
349
revision_text = self.repository.get_revision_xml(revision_id)
350
self.bundle.add_fulltext_record(revision_text, parents,
351
'revision', revision_id)
353
self.bundle.add_fulltext_record(
354
self.repository.get_signature_text(
355
revision_id), parents, 'signature', revision_id)
356
except errors.NoSuchRevision:
360
def get_base_target(revision_ids, forced_bases, repository):
361
"""Determine the base and target from old-style revision ids"""
362
if len(revision_ids) == 0:
364
target = revision_ids[0]
365
base = forced_bases.get(target)
367
parents = repository.get_revision(target).parent_ids
368
if len(parents) == 0:
369
base = _mod_revision.NULL_REVISION
374
def add_mp_records(self, repo_kind, file_id, vf, revision_ids):
375
"""Add multi-parent diff records to a bundle"""
376
revision_ids = list(multiparent.topo_iter(vf, revision_ids))
377
mpdiffs = vf.make_mpdiffs(revision_ids)
378
sha1s = vf.get_sha1s(revision_ids)
379
parent_map = vf.get_parent_map(revision_ids)
380
for mpdiff, revision_id, sha1, in zip(mpdiffs, revision_ids, sha1s):
381
parents = parent_map[revision_id]
382
text = ''.join(mpdiff.to_patch())
383
self.bundle.add_multiparent_record(text, sha1, parents, repo_kind,
384
revision_id, file_id)
387
class BundleInfoV4(object):
389
"""Provide (most of) the BundleInfo interface"""
390
def __init__(self, fileobj, serializer):
391
self._fileobj = fileobj
392
self._serializer = serializer
393
self.__real_revisions = None
394
self.__revisions = None
396
def install(self, repository):
397
return self.install_revisions(repository)
399
def install_revisions(self, repository, stream_input=True):
400
"""Install this bundle's revisions into the specified repository
402
:param target_repo: The repository to install into
403
:param stream_input: If True, will stream input rather than reading it
404
all into memory at once. Reading it into memory all at once is
407
repository.lock_write()
409
ri = RevisionInstaller(self.get_bundle_reader(stream_input),
410
self._serializer, repository)
415
def get_merge_request(self, target_repo):
416
"""Provide data for performing a merge
418
Returns suggested base, suggested target, and patch verification status
420
return None, self.target, 'inapplicable'
422
def get_bundle_reader(self, stream_input=True):
423
"""Return a new BundleReader for the associated bundle
425
:param stream_input: If True, the BundleReader stream input rather than
426
reading it all into memory at once. Reading it into memory all at
427
once is (currently) faster.
429
self._fileobj.seek(0)
430
return BundleReader(self._fileobj, stream_input)
432
def _get_real_revisions(self):
433
if self.__real_revisions is None:
434
self.__real_revisions = []
435
bundle_reader = self.get_bundle_reader()
436
for bytes, metadata, repo_kind, revision_id, file_id in \
437
bundle_reader.iter_records():
438
if repo_kind == 'info':
440
self._serializer.get_source_serializer(metadata)
441
if repo_kind == 'revision':
442
rev = serializer.read_revision_from_string(bytes)
443
self.__real_revisions.append(rev)
444
return self.__real_revisions
445
real_revisions = property(_get_real_revisions)
447
def _get_revisions(self):
448
if self.__revisions is None:
449
self.__revisions = []
450
for revision in self.real_revisions:
451
self.__revisions.append(
452
bundle_data.RevisionInfo.from_revision(revision))
453
return self.__revisions
455
revisions = property(_get_revisions)
457
def _get_target(self):
458
return self.revisions[-1].revision_id
460
target = property(_get_target)
463
class RevisionInstaller(object):
464
"""Installs revisions into a repository"""
466
def __init__(self, container, serializer, repository):
467
self._container = container
468
self._serializer = serializer
469
self._repository = repository
473
"""Perform the installation.
475
Must be called with the Repository locked.
477
self._repository.start_write_group()
479
result = self._install_in_write_group()
481
self._repository.abort_write_group()
483
self._repository.commit_write_group()
486
def _install_in_write_group(self):
488
current_versionedfile = None
489
pending_file_records = []
491
pending_inventory_records = []
493
target_revision = None
494
for bytes, metadata, repo_kind, revision_id, file_id in\
495
self._container.iter_records():
496
if repo_kind == 'info':
497
assert self._info is None
498
self._handle_info(metadata)
499
if (repo_kind, file_id) != ('file', current_file):
500
if len(pending_file_records) > 0:
501
self._install_mp_records(current_versionedfile,
502
pending_file_records)
504
current_versionedfile = None
505
pending_file_records = []
506
if len(pending_inventory_records) > 0 and repo_kind != 'inventory':
507
self._install_inventory_records(inventory_vf,
508
pending_inventory_records)
509
pending_inventory_records = []
510
if repo_kind == 'inventory':
511
if inventory_vf is None:
512
inventory_vf = self._repository.get_inventory_weave()
513
if revision_id not in inventory_vf:
514
pending_inventory_records.append((revision_id, metadata,
516
if repo_kind == 'revision':
517
target_revision = revision_id
518
self._install_revision(revision_id, metadata, bytes)
519
if repo_kind == 'signature':
520
self._install_signature(revision_id, metadata, bytes)
521
if repo_kind == 'file':
522
current_file = file_id
523
if current_versionedfile is None:
524
current_versionedfile = \
525
self._repository.weave_store.get_weave_or_empty(
526
file_id, self._repository.get_transaction())
527
pending_file_records = []
528
if revision_id in current_versionedfile:
530
pending_file_records.append((revision_id, metadata, bytes))
531
self._install_mp_records(current_versionedfile, pending_file_records)
532
return target_revision
534
def _handle_info(self, info):
535
"""Extract data from an info record"""
537
self._source_serializer = self._serializer.get_source_serializer(info)
538
if (info['supports_rich_root'] == 0 and
539
self._repository.supports_rich_root()):
540
self.update_root = True
542
self.update_root = False
544
def _install_mp_records(self, versionedfile, records):
545
if len(records) == 0:
547
d_func = multiparent.MultiParent.from_patch
548
vf_records = [(r, m['parents'], m['sha1'], d_func(t)) for r, m, t in
549
records if r not in versionedfile]
550
versionedfile.add_mpdiffs(vf_records)
552
def _install_inventory_records(self, vf, records):
553
if self._info['serializer'] == self._repository._serializer.format_num:
554
return self._install_mp_records(vf, records)
555
for revision_id, metadata, bytes in records:
556
parent_ids = metadata['parents']
557
parents = [self._repository.get_inventory(p)
559
p_texts = [self._source_serializer.write_inventory_to_string(p)
561
target_lines = multiparent.MultiParent.from_patch(bytes).to_lines(
563
sha1 = osutils.sha_strings(target_lines)
564
if sha1 != metadata['sha1']:
565
raise errors.BadBundle("Can't convert to target format")
566
target_inv = self._source_serializer.read_inventory_from_string(
567
''.join(target_lines))
568
self._handle_root(target_inv, parent_ids)
570
self._repository.add_inventory(revision_id, target_inv,
572
except errors.UnsupportedInventoryKind:
573
raise errors.IncompatibleRevision(repr(self._repository))
575
def _handle_root(self, target_inv, parent_ids):
576
revision_id = target_inv.revision_id
578
target_inv.root.revision = revision_id
579
store = self._repository.weave_store
580
transaction = self._repository.get_transaction()
581
vf = store.get_weave_or_empty(target_inv.root.file_id, transaction)
582
vf.add_lines(revision_id, parent_ids, [])
583
elif not self._repository.supports_rich_root():
584
if target_inv.root.revision != revision_id:
585
raise errors.IncompatibleRevision(repr(self._repository))
588
def _install_revision(self, revision_id, metadata, text):
589
if self._repository.has_revision(revision_id):
591
self._repository._add_revision_text(revision_id, text)
593
def _install_signature(self, revision_id, metadata, text):
594
transaction = self._repository.get_transaction()
595
if self._repository._revision_store.has_signature(revision_id,
598
self._repository._revision_store.add_revision_signature_text(
599
revision_id, text, transaction)