~bzr-pqm/bzr/bzr.dev

2520.4.85 by Aaron Bentley
Get all test passing (which just proves there aren't enough tests!)
1
# Copyright (C) 2007 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
2520.4.20 by Aaron Bentley
Compress and base64-encode bundle contents
17
from cStringIO import StringIO
2520.4.26 by Aaron Bentley
Make decompression reasonably memory-efficient
18
import bz2
2520.4.130 by Aaron Bentley
Finish tweaking decode_name
19
import re
2520.4.20 by Aaron Bentley
Compress and base64-encode bundle contents
20
2520.4.13 by Aaron Bentley
Use real container implementation
21
from bzrlib import (
2520.4.40 by Aaron Bentley
Add human-readable diff to bundles
22
    diff,
2520.4.34 by Aaron Bentley
Add signature support
23
    errors,
2520.4.26 by Aaron Bentley
Make decompression reasonably memory-efficient
24
    iterablefile,
2520.4.13 by Aaron Bentley
Use real container implementation
25
    multiparent,
2520.4.97 by Aaron Bentley
Hack in support for inventory conversion
26
    osutils,
2520.4.13 by Aaron Bentley
Use real container implementation
27
    pack,
2520.4.40 by Aaron Bentley
Add human-readable diff to bundles
28
    revision as _mod_revision,
2520.4.45 by Aaron Bentley
Handle inconsistencies in last-modified-revision between vf and inventory
29
    trace,
2520.4.101 by Aaron Bentley
Use a registry to look up xml serializers by format
30
    xml_serializer,
2520.4.13 by Aaron Bentley
Use real container implementation
31
    )
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
32
from bzrlib.bundle import bundle_data, serializer
2520.4.56 by Aaron Bentley
Begin adding support for arbitrary metadata
33
from bzrlib.util import bencode
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
34
2520.4.4 by Aaron Bentley
Get basis support for a new bundle format in place
35
2520.4.25 by Aaron Bentley
Rename ContainerWriter/ContainerReader to BundleWriter/BundleReader
36
class BundleWriter(object):
2520.4.118 by Aaron Bentley
Add docs
37
    """Writer for bundle-format files.
38
39
    This serves roughly the same purpose as ContainerReader, but acts as a
40
    layer on top of it.
41
2520.4.123 by Aaron Bentley
Cleanup of bundle code
42
    Provides ways of writing the specific record types supported this bundle
2520.4.118 by Aaron Bentley
Add docs
43
    format.
44
    """
2520.4.123 by Aaron Bentley
Cleanup of bundle code
45
2520.4.23 by Aaron Bentley
Move responsability for encoding into container objects
46
    def __init__(self, fileobj):
2520.4.27 by Aaron Bentley
Use less memory when writing bzip-encoded files
47
        self._container = pack.ContainerWriter(self._write_encoded)
2520.4.23 by Aaron Bentley
Move responsability for encoding into container objects
48
        self._fileobj = fileobj
2520.4.27 by Aaron Bentley
Use less memory when writing bzip-encoded files
49
        self._compressor = bz2.BZ2Compressor()
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
50
2520.4.118 by Aaron Bentley
Add docs
51
    def _write_encoded(self, bytes):
52
        """Write bzip2-encoded bytes to the file"""
53
        self._fileobj.write(self._compressor.compress(bytes))
54
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
55
    def begin(self):
2520.4.118 by Aaron Bentley
Add docs
56
        """Start writing the bundle"""
2520.4.123 by Aaron Bentley
Cleanup of bundle code
57
        self._fileobj.write(serializer._get_bundle_header(
58
            serializer.v4_string))
2520.4.24 by Aaron Bentley
Move heading writing above container beginning
59
        self._fileobj.write('#\n')
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
60
        self._container.begin()
61
62
    def end(self):
2520.4.118 by Aaron Bentley
Add docs
63
        """Finish writing the bundle"""
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
64
        self._container.end()
2520.4.76 by Aaron Bentley
Move base64-encoding into merge directives
65
        self._fileobj.write(self._compressor.flush())
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
66
2520.4.60 by Aaron Bentley
Add sha1 verification for mpdiffs
67
    def add_multiparent_record(self, mp_bytes, sha1, parents, repo_kind,
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
68
                               revision_id, file_id):
2520.4.118 by Aaron Bentley
Add docs
69
        """Add a record for a multi-parent diff
70
71
        :mp_bytes: A multi-parent diff, as a bytestring
2520.4.123 by Aaron Bentley
Cleanup of bundle code
72
        :sha1: The sha1 hash of the fulltext
2520.4.118 by Aaron Bentley
Add docs
73
        :parents: a list of revision-ids of the parents
74
        :repo_kind: The kind of object in the repository.  May be 'file' or
75
            'inventory'
76
        :revision_id: The revision id of the mpdiff being added.
77
        :file_id: The file-id of the file, or None for inventories.
78
        """
2520.4.60 by Aaron Bentley
Add sha1 verification for mpdiffs
79
        metadata = {'parents': parents,
80
                    'storage_kind': 'mpdiff',
81
                    'sha1': sha1}
82
        self._add_record(mp_bytes, metadata, repo_kind, revision_id, file_id)
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
83
2520.4.123 by Aaron Bentley
Cleanup of bundle code
84
    def add_fulltext_record(self, bytes, parents, repo_kind, revision_id):
2520.4.118 by Aaron Bentley
Add docs
85
        """Add a record for a fulltext
86
87
        :bytes: The fulltext, as a bytestring
88
        :parents: a list of revision-ids of the parents
89
        :repo_kind: The kind of object in the repository.  May be 'revision' or
90
            'signature'
91
        :revision_id: The revision id of the fulltext being added.
92
        """
93
        metadata = {'parents': parents,
2520.5.3 by Aaron Bentley
fix sha1 in bundle format 4
94
                    'storage_kind': 'mpdiff'}
2520.4.60 by Aaron Bentley
Add sha1 verification for mpdiffs
95
        self._add_record(bytes, {'parents': parents,
2520.4.123 by Aaron Bentley
Cleanup of bundle code
96
            'storage_kind': 'fulltext'}, repo_kind, revision_id, None)
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
97
2520.4.95 by Aaron Bentley
Add support for header/info records
98
    def add_info_record(self, **kwargs):
2520.4.118 by Aaron Bentley
Add docs
99
        """Add an info record to the bundle
100
101
        Any parameters may be supplied, except 'self' and 'storage_kind'.
102
        Values must be lists, strings, integers, dicts, or a combination.
103
        """
2520.4.95 by Aaron Bentley
Add support for header/info records
104
        kwargs['storage_kind'] = 'header'
105
        self._add_record(None, kwargs, 'info', None, None)
106
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
107
    @staticmethod
2520.4.68 by Aaron Bentley
Change name separators to all-slash
108
    def encode_name(content_kind, revision_id, file_id=None):
2520.4.118 by Aaron Bentley
Add docs
109
        """Encode semantic ids as a container name"""
2520.4.95 by Aaron Bentley
Add support for header/info records
110
        assert content_kind in ('revision', 'file', 'inventory', 'signature',
111
                                'info')
2520.4.118 by Aaron Bentley
Add docs
112
113
        if content_kind == 'file':
114
            assert file_id is not None
115
        else:
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
116
            assert file_id is None
2520.4.95 by Aaron Bentley
Add support for header/info records
117
        if content_kind == 'info':
118
            assert revision_id is None
119
        else:
120
            assert revision_id is not None
2520.4.127 by Aaron Bentley
Fix up name encoding to handle revision-ids with slashes
121
        names = [n.replace('/', '//') for n in
122
                 (content_kind, revision_id, file_id) if n is not None]
2520.4.68 by Aaron Bentley
Change name separators to all-slash
123
        return '/'.join(names)
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
124
2520.4.56 by Aaron Bentley
Begin adding support for arbitrary metadata
125
    def _add_record(self, bytes, metadata, repo_kind, revision_id, file_id):
2520.4.118 by Aaron Bentley
Add docs
126
        """Add a bundle record to the container.
127
128
        Most bundle records are recorded as header/body pairs, with the
129
        body being nameless.  Records with storage_kind 'header' have no
130
        body.
131
        """
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
132
        name = self.encode_name(repo_kind, revision_id, file_id)
2520.4.95 by Aaron Bentley
Add support for header/info records
133
        encoded_metadata = bencode.bencode(metadata)
2682.1.1 by Robert Collins
* The ``bzrlib.pack`` interface has changed to use tuples of bytestrings
134
        self._container.add_bytes_record(encoded_metadata, [(name, )])
2520.4.95 by Aaron Bentley
Add support for header/info records
135
        if metadata['storage_kind'] != 'header':
136
            self._container.add_bytes_record(bytes, [])
2520.4.13 by Aaron Bentley
Use real container implementation
137
2520.4.7 by Aaron Bentley
Fix patch deserialization
138
2520.4.25 by Aaron Bentley
Rename ContainerWriter/ContainerReader to BundleWriter/BundleReader
139
class BundleReader(object):
2520.4.118 by Aaron Bentley
Add docs
140
    """Reader for bundle-format files.
141
142
    This serves roughly the same purpose as ContainerReader, but acts as a
143
    layer on top of it, providing metadata, a semantic name, and a record
144
    body
145
    """
2520.4.123 by Aaron Bentley
Cleanup of bundle code
146
2520.4.148 by Aaron Bentley
Updates from review
147
    def __init__(self, fileobj, stream_input=True):
2520.4.145 by Aaron Bentley
Add memory_friendly toggle, be memory-unfriendly for merge directives
148
        """Constructor
149
150
        :param fileobj: a file containing a bzip-encoded container
2520.4.148 by Aaron Bentley
Updates from review
151
        :param stream_input: If True, the BundleReader stream input rather than
152
            reading it all into memory at once.  Reading it into memory all at
153
            once is (currently) faster.
2520.4.145 by Aaron Bentley
Add memory_friendly toggle, be memory-unfriendly for merge directives
154
        """
2520.4.23 by Aaron Bentley
Move responsability for encoding into container objects
155
        line = fileobj.readline()
156
        if line != '\n':
157
            fileobj.readline()
2520.4.40 by Aaron Bentley
Add human-readable diff to bundles
158
        self.patch_lines = []
2520.4.148 by Aaron Bentley
Updates from review
159
        if stream_input:
2520.4.145 by Aaron Bentley
Add memory_friendly toggle, be memory-unfriendly for merge directives
160
            source_file = iterablefile.IterableFile(self.iter_decode(fileobj))
161
        else:
162
            source_file = StringIO(bz2.decompress(fileobj.read()))
2916.2.18 by Andrew Bennetts
Use iter_records_from_file rather than ContainerReader.
163
        self._container_file = source_file
2520.4.26 by Aaron Bentley
Make decompression reasonably memory-efficient
164
165
    @staticmethod
166
    def iter_decode(fileobj):
2520.4.118 by Aaron Bentley
Add docs
167
        """Iterate through decoded fragments of the file"""
2520.4.26 by Aaron Bentley
Make decompression reasonably memory-efficient
168
        decompressor = bz2.BZ2Decompressor()
169
        for line in fileobj:
2916.2.18 by Andrew Bennetts
Use iter_records_from_file rather than ContainerReader.
170
            try:
171
                yield decompressor.decompress(line)
172
            except EOFError:
173
                return
2520.4.22 by Aaron Bentley
Create ContainerReader
174
175
    @staticmethod
176
    def decode_name(name):
2520.4.118 by Aaron Bentley
Add docs
177
        """Decode a name from its container form into a semantic form
178
179
        :retval: content_kind, revision_id, file_id
180
        """
2520.4.130 by Aaron Bentley
Finish tweaking decode_name
181
        segments = re.split('(//?)', name)
182
        names = ['']
2520.4.127 by Aaron Bentley
Fix up name encoding to handle revision-ids with slashes
183
        for segment in segments:
184
            if segment == '//':
185
                names[-1] += '/'
2520.4.130 by Aaron Bentley
Finish tweaking decode_name
186
            elif segment == '/':
2520.4.127 by Aaron Bentley
Fix up name encoding to handle revision-ids with slashes
187
                names.append('')
188
            else:
189
                names[-1] += segment
2520.4.130 by Aaron Bentley
Finish tweaking decode_name
190
        content_kind = names[0]
2520.4.95 by Aaron Bentley
Add support for header/info records
191
        revision_id = None
192
        file_id = None
193
        if len(names) > 1:
194
            revision_id = names[1]
2520.4.68 by Aaron Bentley
Change name separators to all-slash
195
        if len(names) > 2:
196
            file_id = names[2]
197
        return content_kind, revision_id, file_id
2520.4.22 by Aaron Bentley
Create ContainerReader
198
199
    def iter_records(self):
2520.4.118 by Aaron Bentley
Add docs
200
        """Iterate through bundle records
201
202
        :return: a generator of (bytes, metadata, content_kind, revision_id,
203
            file_id)
204
        """
2916.2.18 by Andrew Bennetts
Use iter_records_from_file rather than ContainerReader.
205
        iterator = pack.iter_records_from_file(self._container_file)
206
        for names, bytes in iterator:
2520.4.131 by Aaron Bentley
Raise BadBundle for records with wrong number of names
207
            if len(names) != 1:
208
                raise errors.BadBundle('Record has %d names instead of 1'
209
                                       % len(names))
2916.2.18 by Andrew Bennetts
Use iter_records_from_file rather than ContainerReader.
210
            metadata = bencode.bdecode(bytes)
2520.4.95 by Aaron Bentley
Add support for header/info records
211
            if metadata['storage_kind'] == 'header':
212
                bytes = None
213
            else:
214
                _unused, bytes = iterator.next()
2682.1.1 by Robert Collins
* The ``bzrlib.pack`` interface has changed to use tuples of bytestrings
215
            yield (bytes, metadata) + self.decode_name(names[0][0])
2520.4.22 by Aaron Bentley
Create ContainerReader
216
217
2520.4.72 by Aaron Bentley
Rename format to 4alpha
218
class BundleSerializerV4(serializer.BundleSerializer):
2520.4.118 by Aaron Bentley
Add docs
219
    """Implement the high-level bundle interface"""
2520.4.123 by Aaron Bentley
Cleanup of bundle code
220
2520.4.4 by Aaron Bentley
Get basis support for a new bundle format in place
221
    def write(self, repository, revision_ids, forced_bases, fileobj):
2520.4.118 by Aaron Bentley
Add docs
222
        """Write a bundle to a file-like object
223
224
        For backwards-compatibility only
225
        """
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
226
        write_op = BundleWriteOperation.from_old_args(repository, revision_ids,
227
                                                      forced_bases, fileobj)
2520.4.53 by Aaron Bentley
refactor bundle serialization to make write_bundle primary
228
        return write_op.do_write()
229
230
    def write_bundle(self, repository, target, base, fileobj):
2520.4.118 by Aaron Bentley
Add docs
231
        """Write a bundle to a file object
232
233
        :param repository: The repository to retrieve revision data from
234
        :param target: The head revision to include ancestors of
235
        :param base: The ancestor of the target to stop including acestors
236
            at.
237
        :param fileobj: The file-like object to write to
238
        """
2520.4.53 by Aaron Bentley
refactor bundle serialization to make write_bundle primary
239
        write_op =  BundleWriteOperation(base, target, repository, fileobj)
240
        return write_op.do_write()
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
241
242
    def read(self, file):
2520.4.118 by Aaron Bentley
Add docs
243
        """return a reader object for a given file"""
2520.4.72 by Aaron Bentley
Rename format to 4alpha
244
        bundle = BundleInfoV4(file, self)
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
245
        return bundle
246
2520.4.101 by Aaron Bentley
Use a registry to look up xml serializers by format
247
    @staticmethod
248
    def get_source_serializer(info):
2520.4.118 by Aaron Bentley
Add docs
249
        """Retrieve the serializer for a given info object"""
2520.4.101 by Aaron Bentley
Use a registry to look up xml serializers by format
250
        return xml_serializer.format_registry.get(info['serializer'])
251
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
252
253
class BundleWriteOperation(object):
2520.4.118 by Aaron Bentley
Add docs
254
    """Perform the operation of writing revisions to a bundle"""
2520.4.123 by Aaron Bentley
Cleanup of bundle code
255
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
256
    @classmethod
257
    def from_old_args(cls, repository, revision_ids, forced_bases, fileobj):
2520.4.123 by Aaron Bentley
Cleanup of bundle code
258
        """Create a BundleWriteOperation from old-style arguments"""
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
259
        base, target = cls.get_base_target(revision_ids, forced_bases,
260
                                           repository)
261
        return BundleWriteOperation(base, target, repository, fileobj,
262
                                    revision_ids)
263
2520.4.53 by Aaron Bentley
refactor bundle serialization to make write_bundle primary
264
    def __init__(self, base, target, repository, fileobj, revision_ids=None):
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
265
        self.base = base
266
        self.target = target
267
        self.repository = repository
2520.4.39 by Aaron Bentley
Rename container => bundle(reader) where appropriate
268
        bundle = BundleWriter(fileobj)
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
269
        self.bundle = bundle
2520.4.64 by Aaron Bentley
Avoid topo sort for v10 bundles
270
        self.base_ancestry = set(repository.get_ancestry(base,
271
                                                         topo_sorted=False))
2520.4.53 by Aaron Bentley
refactor bundle serialization to make write_bundle primary
272
        if revision_ids is not None:
273
            self.revision_ids = revision_ids
274
        else:
2520.4.64 by Aaron Bentley
Avoid topo sort for v10 bundles
275
            revision_ids = set(repository.get_ancestry(target,
276
                                                       topo_sorted=False))
2520.4.55 by Aaron Bentley
Fix file revision selection to grab all dependencies properly
277
            self.revision_ids = revision_ids.difference(self.base_ancestry)
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
278
279
    def do_write(self):
2520.4.118 by Aaron Bentley
Add docs
280
        """Write all data to the bundle"""
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
281
        self.bundle.begin()
2520.4.97 by Aaron Bentley
Hack in support for inventory conversion
282
        self.write_info()
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
283
        self.write_files()
284
        self.write_revisions()
285
        self.bundle.end()
2520.4.53 by Aaron Bentley
refactor bundle serialization to make write_bundle primary
286
        return self.revision_ids
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
287
2520.4.97 by Aaron Bentley
Hack in support for inventory conversion
288
    def write_info(self):
2520.4.118 by Aaron Bentley
Add docs
289
        """Write format info"""
2520.4.113 by Aaron Bentley
Avoid peeking at Repository._serializer
290
        serializer_format = self.repository.get_serializer_format()
2520.4.99 by Aaron Bentley
Test conversion across models
291
        supports_rich_root = {True: 1, False: 0}[
292
            self.repository.supports_rich_root()]
2520.4.113 by Aaron Bentley
Avoid peeking at Repository._serializer
293
        self.bundle.add_info_record(serializer=serializer_format,
2520.4.99 by Aaron Bentley
Test conversion across models
294
                                    supports_rich_root=supports_rich_root)
2520.4.97 by Aaron Bentley
Hack in support for inventory conversion
295
2520.4.51 by Aaron Bentley
Split iteration through file revisions into a method, so we can vary it
296
    def iter_file_revisions(self):
2520.4.118 by Aaron Bentley
Add docs
297
        """Iterate through all relevant revisions of all files.
2520.4.51 by Aaron Bentley
Split iteration through file revisions into a method, so we can vary it
298
2520.4.118 by Aaron Bentley
Add docs
299
        This is the correct implementation, but is not compatible with bzr.dev,
300
        because certain old revisions were not converted correctly, and have
301
        the wrong "revision" marker in inventories.
2520.4.51 by Aaron Bentley
Split iteration through file revisions into a method, so we can vary it
302
        """
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
303
        transaction = self.repository.get_transaction()
304
        altered = self.repository.fileids_altered_by_revision_ids(
305
            self.revision_ids)
2520.4.6 by Aaron Bentley
Get installation started
306
        for file_id, file_revision_ids in altered.iteritems():
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
307
            vf = self.repository.weave_store.get_weave(file_id, transaction)
2520.4.51 by Aaron Bentley
Split iteration through file revisions into a method, so we can vary it
308
            yield vf, file_id, file_revision_ids
309
2520.4.55 by Aaron Bentley
Fix file revision selection to grab all dependencies properly
310
    def iter_file_revisions_aggressive(self):
2520.4.118 by Aaron Bentley
Add docs
311
        """Iterate through all relevant revisions of all files.
2520.4.55 by Aaron Bentley
Fix file revision selection to grab all dependencies properly
312
313
        This uses the standard iter_file_revisions to determine what revisions
314
        are referred to by inventories, but then uses the versionedfile to
315
        determine what the build-dependencies of each required revision.
316
317
        All build dependencies which are not ancestors of the base revision
318
        are emitted.
319
        """
320
        for vf, file_id, file_revision_ids in self.iter_file_revisions():
321
            new_revision_ids = set()
322
            pending = list(file_revision_ids)
323
            while len(pending) > 0:
324
                revision_id = pending.pop()
325
                if revision_id in new_revision_ids:
326
                    continue
327
                if revision_id in self.base_ancestry:
328
                    continue
329
                new_revision_ids.add(revision_id)
330
                pending.extend(vf.get_parents(revision_id))
331
            yield vf, file_id, new_revision_ids
332
2520.4.51 by Aaron Bentley
Split iteration through file revisions into a method, so we can vary it
333
    def write_files(self):
2520.4.118 by Aaron Bentley
Add docs
334
        """Write bundle records for all revisions of all files"""
2745.6.11 by Aaron Bentley
Fix knit file parents to follow parentage from revision/inventory XML
335
        for vf, file_id, revision_ids in self.iter_file_revisions():
2520.4.51 by Aaron Bentley
Split iteration through file revisions into a method, so we can vary it
336
            self.add_mp_records('file', file_id, vf, revision_ids)
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
337
338
    def write_revisions(self):
2520.4.118 by Aaron Bentley
Add docs
339
        """Write bundle records for all revisions and signatures"""
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
340
        inv_vf = self.repository.get_inventory_weave()
341
        revision_order = list(multiparent.topo_iter(inv_vf, self.revision_ids))
2520.4.75 by Aaron Bentley
Fix traceback on empty bundles.
342
        if self.target is not None and self.target in self.revision_ids:
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
343
            revision_order.remove(self.target)
344
            revision_order.append(self.target)
345
        self.add_mp_records('inventory', None, inv_vf, revision_order)
3099.3.5 by John Arbash Meinel
Update the last couple of places that referred to Provider.get_parents() directly.
346
        parent_map = self.repository.get_parent_map(revision_order)
347
        for revision_id in revision_order:
348
            parents = parent_map.get(revision_id, None)
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
349
            revision_text = self.repository.get_revision_xml(revision_id)
350
            self.bundle.add_fulltext_record(revision_text, parents,
2520.4.123 by Aaron Bentley
Cleanup of bundle code
351
                                       'revision', revision_id)
2520.4.34 by Aaron Bentley
Add signature support
352
            try:
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
353
                self.bundle.add_fulltext_record(
354
                    self.repository.get_signature_text(
2520.4.123 by Aaron Bentley
Cleanup of bundle code
355
                    revision_id), parents, 'signature', revision_id)
2520.4.34 by Aaron Bentley
Add signature support
356
            except errors.NoSuchRevision:
357
                pass
358
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
359
    @staticmethod
360
    def get_base_target(revision_ids, forced_bases, repository):
2520.4.123 by Aaron Bentley
Cleanup of bundle code
361
        """Determine the base and target from old-style revision ids"""
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
362
        if len(revision_ids) == 0:
363
            return None, None
364
        target = revision_ids[0]
365
        base = forced_bases.get(target)
366
        if base is None:
367
            parents = repository.get_revision(target).parent_ids
368
            if len(parents) == 0:
369
                base = _mod_revision.NULL_REVISION
370
            else:
371
                base = parents[0]
372
        return base, target
373
374
    def add_mp_records(self, repo_kind, file_id, vf, revision_ids):
2520.4.118 by Aaron Bentley
Add docs
375
        """Add multi-parent diff records to a bundle"""
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
376
        revision_ids = list(multiparent.topo_iter(vf, revision_ids))
377
        mpdiffs = vf.make_mpdiffs(revision_ids)
2520.4.88 by Aaron Bentley
Retrieve all sha1s at once (ftw)
378
        sha1s = vf.get_sha1s(revision_ids)
379
        for mpdiff, revision_id, sha1, in zip(mpdiffs, revision_ids, sha1s):
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
380
            parents = vf.get_parents(revision_id)
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
381
            text = ''.join(mpdiff.to_patch())
2520.4.60 by Aaron Bentley
Add sha1 verification for mpdiffs
382
            self.bundle.add_multiparent_record(text, sha1, parents, repo_kind,
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
383
                                               revision_id, file_id)
2520.4.6 by Aaron Bentley
Get installation started
384
385
2520.4.72 by Aaron Bentley
Rename format to 4alpha
386
class BundleInfoV4(object):
2520.4.6 by Aaron Bentley
Get installation started
387
2520.4.118 by Aaron Bentley
Add docs
388
    """Provide (most of) the BundleInfo interface"""
2520.4.6 by Aaron Bentley
Get installation started
389
    def __init__(self, fileobj, serializer):
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
390
        self._fileobj = fileobj
391
        self._serializer = serializer
392
        self.__real_revisions = None
393
        self.__revisions = None
394
395
    def install(self, repository):
396
        return self.install_revisions(repository)
397
2520.4.148 by Aaron Bentley
Updates from review
398
    def install_revisions(self, repository, stream_input=True):
399
        """Install this bundle's revisions into the specified repository
400
401
        :param target_repo: The repository to install into
402
        :param stream_input: If True, will stream input rather than reading it
403
            all into memory at once.  Reading it into memory all at once is
404
            (currently) faster.
405
        """
2520.4.18 by Aaron Bentley
Generate mpdiffs for inventory
406
        repository.lock_write()
407
        try:
2520.4.148 by Aaron Bentley
Updates from review
408
            ri = RevisionInstaller(self.get_bundle_reader(stream_input),
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
409
                                   self._serializer, repository)
2520.4.18 by Aaron Bentley
Generate mpdiffs for inventory
410
            return ri.install()
411
        finally:
412
            repository.unlock()
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
413
2520.4.109 by Aaron Bentley
start work on directive cherry-picking
414
    def get_merge_request(self, target_repo):
415
        """Provide data for performing a merge
416
417
        Returns suggested base, suggested target, and patch verification status
418
        """
419
        return None, self.target, 'inapplicable'
420
2520.4.148 by Aaron Bentley
Updates from review
421
    def get_bundle_reader(self, stream_input=True):
422
        """Return a new BundleReader for the associated bundle
423
424
        :param stream_input: If True, the BundleReader stream input rather than
425
            reading it all into memory at once.  Reading it into memory all at
426
            once is (currently) faster.
427
        """
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
428
        self._fileobj.seek(0)
2520.4.148 by Aaron Bentley
Updates from review
429
        return BundleReader(self._fileobj, stream_input)
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
430
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
431
    def _get_real_revisions(self):
432
        if self.__real_revisions is None:
433
            self.__real_revisions = []
2520.4.39 by Aaron Bentley
Rename container => bundle(reader) where appropriate
434
            bundle_reader = self.get_bundle_reader()
2520.4.102 by Aaron Bentley
rename parents to metadata
435
            for bytes, metadata, repo_kind, revision_id, file_id in \
2520.4.39 by Aaron Bentley
Rename container => bundle(reader) where appropriate
436
                bundle_reader.iter_records():
2520.4.101 by Aaron Bentley
Use a registry to look up xml serializers by format
437
                if repo_kind == 'info':
438
                    serializer =\
2520.4.102 by Aaron Bentley
rename parents to metadata
439
                        self._serializer.get_source_serializer(metadata)
2520.4.22 by Aaron Bentley
Create ContainerReader
440
                if repo_kind == 'revision':
2520.4.101 by Aaron Bentley
Use a registry to look up xml serializers by format
441
                    rev = serializer.read_revision_from_string(bytes)
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
442
                    self.__real_revisions.append(rev)
443
        return self.__real_revisions
444
    real_revisions = property(_get_real_revisions)
445
446
    def _get_revisions(self):
447
        if self.__revisions is None:
448
            self.__revisions = []
449
            for revision in self.real_revisions:
2520.4.33 by Aaron Bentley
remove test dependencies on serialization minutia
450
                self.__revisions.append(
451
                    bundle_data.RevisionInfo.from_revision(revision))
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
452
        return self.__revisions
453
454
    revisions = property(_get_revisions)
455
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
456
    def _get_target(self):
457
        return self.revisions[-1].revision_id
458
459
    target = property(_get_target)
460
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
461
462
class RevisionInstaller(object):
2520.4.123 by Aaron Bentley
Cleanup of bundle code
463
    """Installs revisions into a repository"""
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
464
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
465
    def __init__(self, container, serializer, repository):
466
        self._container = container
2520.4.6 by Aaron Bentley
Get installation started
467
        self._serializer = serializer
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
468
        self._repository = repository
2520.4.97 by Aaron Bentley
Hack in support for inventory conversion
469
        self._info = None
2520.4.99 by Aaron Bentley
Test conversion across models
470
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
471
    def install(self):
2592.4.1 by Martin Pool
RevisionInstaller now creates a write group for its work
472
        """Perform the installation.
473
        
474
        Must be called with the Repository locked.
475
        """
476
        self._repository.start_write_group()
477
        try:
2856.1.2 by Robert Collins
Review feedback.
478
            result = self._install_in_write_group()
2592.4.1 by Martin Pool
RevisionInstaller now creates a write group for its work
479
        except:
480
            self._repository.abort_write_group()
481
            raise
482
        self._repository.commit_write_group()
483
        return result
484
2856.1.2 by Robert Collins
Review feedback.
485
    def _install_in_write_group(self):
2520.4.6 by Aaron Bentley
Get installation started
486
        current_file = None
487
        current_versionedfile = None
488
        pending_file_records = []
2520.4.142 by Aaron Bentley
Clean up installation of inventory records
489
        inventory_vf = None
490
        pending_inventory_records = []
2520.4.8 by Aaron Bentley
Serialize inventory
491
        added_inv = set()
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
492
        target_revision = None
2520.4.58 by Aaron Bentley
Propogate support for metadata to iter_revisions, add storage kind
493
        for bytes, metadata, repo_kind, revision_id, file_id in\
2520.4.22 by Aaron Bentley
Create ContainerReader
494
            self._container.iter_records():
2520.4.97 by Aaron Bentley
Hack in support for inventory conversion
495
            if repo_kind == 'info':
496
                assert self._info is None
2520.4.123 by Aaron Bentley
Cleanup of bundle code
497
                self._handle_info(metadata)
2520.6.2 by Aaron Bentley
Fix bundle installation wrong-versionedfile bug
498
            if (repo_kind, file_id) != ('file', current_file):
499
                if len(pending_file_records) > 0:
500
                    self._install_mp_records(current_versionedfile,
501
                                             pending_file_records)
2520.4.8 by Aaron Bentley
Serialize inventory
502
                current_file = None
503
                current_versionedfile = None
504
                pending_file_records = []
2520.4.142 by Aaron Bentley
Clean up installation of inventory records
505
            if len(pending_inventory_records) > 0 and repo_kind != 'inventory':
506
                self._install_inventory_records(inventory_vf,
507
                                                pending_inventory_records)
508
                pending_inventory_records = []
509
            if repo_kind == 'inventory':
510
                if inventory_vf is None:
511
                    inventory_vf = self._repository.get_inventory_weave()
512
                if revision_id not in inventory_vf:
513
                    pending_inventory_records.append((revision_id, metadata,
514
                                                      bytes))
515
            if repo_kind == 'revision':
516
                target_revision = revision_id
517
                self._install_revision(revision_id, metadata, bytes)
518
            if repo_kind == 'signature':
519
                self._install_signature(revision_id, metadata, bytes)
2520.4.22 by Aaron Bentley
Create ContainerReader
520
            if repo_kind == 'file':
2520.4.142 by Aaron Bentley
Clean up installation of inventory records
521
                current_file = file_id
522
                if current_versionedfile is None:
2520.4.6 by Aaron Bentley
Get installation started
523
                    current_versionedfile = \
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
524
                        self._repository.weave_store.get_weave_or_empty(
525
                        file_id, self._repository.get_transaction())
2520.4.6 by Aaron Bentley
Get installation started
526
                    pending_file_records = []
527
                if revision_id in current_versionedfile:
528
                    continue
2520.4.59 by Aaron Bentley
Push metadata down the stack
529
                pending_file_records.append((revision_id, metadata, bytes))
2520.4.18 by Aaron Bentley
Generate mpdiffs for inventory
530
        self._install_mp_records(current_versionedfile, pending_file_records)
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
531
        return target_revision
2520.4.6 by Aaron Bentley
Get installation started
532
2520.4.123 by Aaron Bentley
Cleanup of bundle code
533
    def _handle_info(self, info):
534
        """Extract data from an info record"""
535
        self._info = info
536
        self._source_serializer = self._serializer.get_source_serializer(info)
537
        if (info['supports_rich_root'] == 0 and
538
            self._repository.supports_rich_root()):
539
            self.update_root = True
540
        else:
541
            self.update_root = False
542
2520.4.60 by Aaron Bentley
Add sha1 verification for mpdiffs
543
    def _install_mp_records(self, versionedfile, records):
2520.4.61 by Aaron Bentley
Do bulk insertion of records
544
        if len(records) == 0:
545
            return
546
        d_func = multiparent.MultiParent.from_patch
547
        vf_records = [(r, m['parents'], m['sha1'], d_func(t)) for r, m, t in
548
                      records if r not in versionedfile]
549
        versionedfile.add_mpdiffs(vf_records)
2520.4.8 by Aaron Bentley
Serialize inventory
550
2520.4.142 by Aaron Bentley
Clean up installation of inventory records
551
    def _install_inventory_records(self, vf, records):
2520.4.97 by Aaron Bentley
Hack in support for inventory conversion
552
        if self._info['serializer'] == self._repository._serializer.format_num:
2520.4.142 by Aaron Bentley
Clean up installation of inventory records
553
            return self._install_mp_records(vf, records)
554
        for revision_id, metadata, bytes in records:
555
            parent_ids = metadata['parents']
556
            parents = [self._repository.get_inventory(p)
557
                       for p in parent_ids]
558
            p_texts = [self._source_serializer.write_inventory_to_string(p)
559
                       for p in parents]
560
            target_lines = multiparent.MultiParent.from_patch(bytes).to_lines(
561
                p_texts)
562
            sha1 = osutils.sha_strings(target_lines)
563
            if sha1 != metadata['sha1']:
564
                raise errors.BadBundle("Can't convert to target format")
565
            target_inv = self._source_serializer.read_inventory_from_string(
566
                ''.join(target_lines))
567
            self._handle_root(target_inv, parent_ids)
568
            try:
569
                self._repository.add_inventory(revision_id, target_inv,
570
                                               parent_ids)
571
            except errors.UnsupportedInventoryKind:
572
                raise errors.IncompatibleRevision(repr(self._repository))
2520.4.99 by Aaron Bentley
Test conversion across models
573
574
    def _handle_root(self, target_inv, parent_ids):
575
        revision_id = target_inv.revision_id
576
        if self.update_root:
577
            target_inv.root.revision = revision_id
578
            store = self._repository.weave_store
579
            transaction = self._repository.get_transaction()
580
            vf = store.get_weave_or_empty(target_inv.root.file_id, transaction)
581
            vf.add_lines(revision_id, parent_ids, [])
582
        elif not self._repository.supports_rich_root():
583
            if target_inv.root.revision != revision_id:
584
                raise errors.IncompatibleRevision(repr(self._repository))
585
2520.4.10 by Aaron Bentley
Enable installation of revisions
586
2520.4.59 by Aaron Bentley
Push metadata down the stack
587
    def _install_revision(self, revision_id, metadata, text):
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
588
        if self._repository.has_revision(revision_id):
589
            return
590
        self._repository._add_revision_text(revision_id, text)
2520.4.34 by Aaron Bentley
Add signature support
591
2520.4.59 by Aaron Bentley
Push metadata down the stack
592
    def _install_signature(self, revision_id, metadata, text):
2520.4.100 by Aaron Bentley
Fix repeat signature installs
593
        transaction = self._repository.get_transaction()
594
        if self._repository._revision_store.has_signature(revision_id,
595
                                                          transaction):
596
            return
2520.4.34 by Aaron Bentley
Add signature support
597
        self._repository._revision_store.add_revision_signature_text(
2520.4.100 by Aaron Bentley
Fix repeat signature installs
598
            revision_id, text, transaction)