~bzr-pqm/bzr/bzr.dev

2520.4.85 by Aaron Bentley
Get all test passing (which just proves there aren't enough tests!)
1
# Copyright (C) 2007 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
2520.4.20 by Aaron Bentley
Compress and base64-encode bundle contents
17
from cStringIO import StringIO
2520.4.26 by Aaron Bentley
Make decompression reasonably memory-efficient
18
import bz2
2520.4.130 by Aaron Bentley
Finish tweaking decode_name
19
import re
2520.4.20 by Aaron Bentley
Compress and base64-encode bundle contents
20
2520.4.13 by Aaron Bentley
Use real container implementation
21
from bzrlib import (
2520.4.40 by Aaron Bentley
Add human-readable diff to bundles
22
    diff,
2520.4.34 by Aaron Bentley
Add signature support
23
    errors,
2520.4.26 by Aaron Bentley
Make decompression reasonably memory-efficient
24
    iterablefile,
2520.4.13 by Aaron Bentley
Use real container implementation
25
    multiparent,
2520.4.97 by Aaron Bentley
Hack in support for inventory conversion
26
    osutils,
2520.4.13 by Aaron Bentley
Use real container implementation
27
    pack,
2520.4.40 by Aaron Bentley
Add human-readable diff to bundles
28
    revision as _mod_revision,
2520.4.45 by Aaron Bentley
Handle inconsistencies in last-modified-revision between vf and inventory
29
    trace,
2520.4.101 by Aaron Bentley
Use a registry to look up xml serializers by format
30
    xml_serializer,
2520.4.13 by Aaron Bentley
Use real container implementation
31
    )
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
32
from bzrlib.bundle import bundle_data, serializer
2520.4.56 by Aaron Bentley
Begin adding support for arbitrary metadata
33
from bzrlib.util import bencode
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
34
2520.4.4 by Aaron Bentley
Get basis support for a new bundle format in place
35
2520.4.25 by Aaron Bentley
Rename ContainerWriter/ContainerReader to BundleWriter/BundleReader
36
class BundleWriter(object):
2520.4.118 by Aaron Bentley
Add docs
37
    """Writer for bundle-format files.
38
39
    This serves roughly the same purpose as ContainerReader, but acts as a
40
    layer on top of it.
41
2520.4.123 by Aaron Bentley
Cleanup of bundle code
42
    Provides ways of writing the specific record types supported this bundle
2520.4.118 by Aaron Bentley
Add docs
43
    format.
44
    """
2520.4.123 by Aaron Bentley
Cleanup of bundle code
45
2520.4.23 by Aaron Bentley
Move responsability for encoding into container objects
46
    def __init__(self, fileobj):
2520.4.27 by Aaron Bentley
Use less memory when writing bzip-encoded files
47
        self._container = pack.ContainerWriter(self._write_encoded)
2520.4.23 by Aaron Bentley
Move responsability for encoding into container objects
48
        self._fileobj = fileobj
2520.4.27 by Aaron Bentley
Use less memory when writing bzip-encoded files
49
        self._compressor = bz2.BZ2Compressor()
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
50
2520.4.118 by Aaron Bentley
Add docs
51
    def _write_encoded(self, bytes):
52
        """Write bzip2-encoded bytes to the file"""
53
        self._fileobj.write(self._compressor.compress(bytes))
54
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
55
    def begin(self):
2520.4.118 by Aaron Bentley
Add docs
56
        """Start writing the bundle"""
2520.4.123 by Aaron Bentley
Cleanup of bundle code
57
        self._fileobj.write(serializer._get_bundle_header(
58
            serializer.v4_string))
2520.4.24 by Aaron Bentley
Move heading writing above container beginning
59
        self._fileobj.write('#\n')
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
60
        self._container.begin()
61
62
    def end(self):
2520.4.118 by Aaron Bentley
Add docs
63
        """Finish writing the bundle"""
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
64
        self._container.end()
2520.4.76 by Aaron Bentley
Move base64-encoding into merge directives
65
        self._fileobj.write(self._compressor.flush())
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
66
2520.4.60 by Aaron Bentley
Add sha1 verification for mpdiffs
67
    def add_multiparent_record(self, mp_bytes, sha1, parents, repo_kind,
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
68
                               revision_id, file_id):
2520.4.118 by Aaron Bentley
Add docs
69
        """Add a record for a multi-parent diff
70
71
        :mp_bytes: A multi-parent diff, as a bytestring
2520.4.123 by Aaron Bentley
Cleanup of bundle code
72
        :sha1: The sha1 hash of the fulltext
2520.4.118 by Aaron Bentley
Add docs
73
        :parents: a list of revision-ids of the parents
74
        :repo_kind: The kind of object in the repository.  May be 'file' or
75
            'inventory'
76
        :revision_id: The revision id of the mpdiff being added.
77
        :file_id: The file-id of the file, or None for inventories.
78
        """
2520.4.60 by Aaron Bentley
Add sha1 verification for mpdiffs
79
        metadata = {'parents': parents,
80
                    'storage_kind': 'mpdiff',
81
                    'sha1': sha1}
82
        self._add_record(mp_bytes, metadata, repo_kind, revision_id, file_id)
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
83
2520.4.123 by Aaron Bentley
Cleanup of bundle code
84
    def add_fulltext_record(self, bytes, parents, repo_kind, revision_id):
2520.4.118 by Aaron Bentley
Add docs
85
        """Add a record for a fulltext
86
87
        :bytes: The fulltext, as a bytestring
88
        :parents: a list of revision-ids of the parents
89
        :repo_kind: The kind of object in the repository.  May be 'revision' or
90
            'signature'
91
        :revision_id: The revision id of the fulltext being added.
92
        """
93
        metadata = {'parents': parents,
2520.5.3 by Aaron Bentley
fix sha1 in bundle format 4
94
                    'storage_kind': 'mpdiff'}
2520.4.60 by Aaron Bentley
Add sha1 verification for mpdiffs
95
        self._add_record(bytes, {'parents': parents,
2520.4.123 by Aaron Bentley
Cleanup of bundle code
96
            'storage_kind': 'fulltext'}, repo_kind, revision_id, None)
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
97
2520.4.95 by Aaron Bentley
Add support for header/info records
98
    def add_info_record(self, **kwargs):
2520.4.118 by Aaron Bentley
Add docs
99
        """Add an info record to the bundle
100
101
        Any parameters may be supplied, except 'self' and 'storage_kind'.
102
        Values must be lists, strings, integers, dicts, or a combination.
103
        """
2520.4.95 by Aaron Bentley
Add support for header/info records
104
        kwargs['storage_kind'] = 'header'
105
        self._add_record(None, kwargs, 'info', None, None)
106
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
107
    @staticmethod
2520.4.68 by Aaron Bentley
Change name separators to all-slash
108
    def encode_name(content_kind, revision_id, file_id=None):
2520.4.118 by Aaron Bentley
Add docs
109
        """Encode semantic ids as a container name"""
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
110
        if content_kind not in ('revision', 'file', 'inventory', 'signature',
111
                'info'):
112
            raise ValueError(content_kind)
2520.4.118 by Aaron Bentley
Add docs
113
        if content_kind == 'file':
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
114
            if file_id is None:
115
                raise AssertionError()
2520.4.118 by Aaron Bentley
Add docs
116
        else:
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
117
            if file_id is not None:
118
                raise AssertionError()
2520.4.95 by Aaron Bentley
Add support for header/info records
119
        if content_kind == 'info':
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
120
            if revision_id is not None:
121
                raise AssertionError()
122
        elif revision_id is None:
123
            raise AssertionError()
2520.4.127 by Aaron Bentley
Fix up name encoding to handle revision-ids with slashes
124
        names = [n.replace('/', '//') for n in
125
                 (content_kind, revision_id, file_id) if n is not None]
2520.4.68 by Aaron Bentley
Change name separators to all-slash
126
        return '/'.join(names)
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
127
2520.4.56 by Aaron Bentley
Begin adding support for arbitrary metadata
128
    def _add_record(self, bytes, metadata, repo_kind, revision_id, file_id):
2520.4.118 by Aaron Bentley
Add docs
129
        """Add a bundle record to the container.
130
131
        Most bundle records are recorded as header/body pairs, with the
132
        body being nameless.  Records with storage_kind 'header' have no
133
        body.
134
        """
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
135
        name = self.encode_name(repo_kind, revision_id, file_id)
2520.4.95 by Aaron Bentley
Add support for header/info records
136
        encoded_metadata = bencode.bencode(metadata)
2682.1.1 by Robert Collins
* The ``bzrlib.pack`` interface has changed to use tuples of bytestrings
137
        self._container.add_bytes_record(encoded_metadata, [(name, )])
2520.4.95 by Aaron Bentley
Add support for header/info records
138
        if metadata['storage_kind'] != 'header':
139
            self._container.add_bytes_record(bytes, [])
2520.4.13 by Aaron Bentley
Use real container implementation
140
2520.4.7 by Aaron Bentley
Fix patch deserialization
141
2520.4.25 by Aaron Bentley
Rename ContainerWriter/ContainerReader to BundleWriter/BundleReader
142
class BundleReader(object):
2520.4.118 by Aaron Bentley
Add docs
143
    """Reader for bundle-format files.
144
145
    This serves roughly the same purpose as ContainerReader, but acts as a
146
    layer on top of it, providing metadata, a semantic name, and a record
147
    body
148
    """
2520.4.123 by Aaron Bentley
Cleanup of bundle code
149
2520.4.148 by Aaron Bentley
Updates from review
150
    def __init__(self, fileobj, stream_input=True):
2520.4.145 by Aaron Bentley
Add memory_friendly toggle, be memory-unfriendly for merge directives
151
        """Constructor
152
153
        :param fileobj: a file containing a bzip-encoded container
2520.4.148 by Aaron Bentley
Updates from review
154
        :param stream_input: If True, the BundleReader stream input rather than
155
            reading it all into memory at once.  Reading it into memory all at
156
            once is (currently) faster.
2520.4.145 by Aaron Bentley
Add memory_friendly toggle, be memory-unfriendly for merge directives
157
        """
2520.4.23 by Aaron Bentley
Move responsability for encoding into container objects
158
        line = fileobj.readline()
159
        if line != '\n':
160
            fileobj.readline()
2520.4.40 by Aaron Bentley
Add human-readable diff to bundles
161
        self.patch_lines = []
2520.4.148 by Aaron Bentley
Updates from review
162
        if stream_input:
2520.4.145 by Aaron Bentley
Add memory_friendly toggle, be memory-unfriendly for merge directives
163
            source_file = iterablefile.IterableFile(self.iter_decode(fileobj))
164
        else:
165
            source_file = StringIO(bz2.decompress(fileobj.read()))
2916.2.18 by Andrew Bennetts
Use iter_records_from_file rather than ContainerReader.
166
        self._container_file = source_file
2520.4.26 by Aaron Bentley
Make decompression reasonably memory-efficient
167
168
    @staticmethod
169
    def iter_decode(fileobj):
2520.4.118 by Aaron Bentley
Add docs
170
        """Iterate through decoded fragments of the file"""
2520.4.26 by Aaron Bentley
Make decompression reasonably memory-efficient
171
        decompressor = bz2.BZ2Decompressor()
172
        for line in fileobj:
2916.2.18 by Andrew Bennetts
Use iter_records_from_file rather than ContainerReader.
173
            try:
174
                yield decompressor.decompress(line)
175
            except EOFError:
176
                return
2520.4.22 by Aaron Bentley
Create ContainerReader
177
178
    @staticmethod
179
    def decode_name(name):
2520.4.118 by Aaron Bentley
Add docs
180
        """Decode a name from its container form into a semantic form
181
182
        :retval: content_kind, revision_id, file_id
183
        """
2520.4.130 by Aaron Bentley
Finish tweaking decode_name
184
        segments = re.split('(//?)', name)
185
        names = ['']
2520.4.127 by Aaron Bentley
Fix up name encoding to handle revision-ids with slashes
186
        for segment in segments:
187
            if segment == '//':
188
                names[-1] += '/'
2520.4.130 by Aaron Bentley
Finish tweaking decode_name
189
            elif segment == '/':
2520.4.127 by Aaron Bentley
Fix up name encoding to handle revision-ids with slashes
190
                names.append('')
191
            else:
192
                names[-1] += segment
2520.4.130 by Aaron Bentley
Finish tweaking decode_name
193
        content_kind = names[0]
2520.4.95 by Aaron Bentley
Add support for header/info records
194
        revision_id = None
195
        file_id = None
196
        if len(names) > 1:
197
            revision_id = names[1]
2520.4.68 by Aaron Bentley
Change name separators to all-slash
198
        if len(names) > 2:
199
            file_id = names[2]
200
        return content_kind, revision_id, file_id
2520.4.22 by Aaron Bentley
Create ContainerReader
201
202
    def iter_records(self):
2520.4.118 by Aaron Bentley
Add docs
203
        """Iterate through bundle records
204
205
        :return: a generator of (bytes, metadata, content_kind, revision_id,
206
            file_id)
207
        """
2916.2.18 by Andrew Bennetts
Use iter_records_from_file rather than ContainerReader.
208
        iterator = pack.iter_records_from_file(self._container_file)
209
        for names, bytes in iterator:
2520.4.131 by Aaron Bentley
Raise BadBundle for records with wrong number of names
210
            if len(names) != 1:
211
                raise errors.BadBundle('Record has %d names instead of 1'
212
                                       % len(names))
2916.2.18 by Andrew Bennetts
Use iter_records_from_file rather than ContainerReader.
213
            metadata = bencode.bdecode(bytes)
2520.4.95 by Aaron Bentley
Add support for header/info records
214
            if metadata['storage_kind'] == 'header':
215
                bytes = None
216
            else:
217
                _unused, bytes = iterator.next()
2682.1.1 by Robert Collins
* The ``bzrlib.pack`` interface has changed to use tuples of bytestrings
218
            yield (bytes, metadata) + self.decode_name(names[0][0])
2520.4.22 by Aaron Bentley
Create ContainerReader
219
220
2520.4.72 by Aaron Bentley
Rename format to 4alpha
221
class BundleSerializerV4(serializer.BundleSerializer):
2520.4.118 by Aaron Bentley
Add docs
222
    """Implement the high-level bundle interface"""
2520.4.123 by Aaron Bentley
Cleanup of bundle code
223
2520.4.4 by Aaron Bentley
Get basis support for a new bundle format in place
224
    def write(self, repository, revision_ids, forced_bases, fileobj):
2520.4.118 by Aaron Bentley
Add docs
225
        """Write a bundle to a file-like object
226
227
        For backwards-compatibility only
228
        """
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
229
        write_op = BundleWriteOperation.from_old_args(repository, revision_ids,
230
                                                      forced_bases, fileobj)
2520.4.53 by Aaron Bentley
refactor bundle serialization to make write_bundle primary
231
        return write_op.do_write()
232
233
    def write_bundle(self, repository, target, base, fileobj):
2520.4.118 by Aaron Bentley
Add docs
234
        """Write a bundle to a file object
235
236
        :param repository: The repository to retrieve revision data from
237
        :param target: The head revision to include ancestors of
238
        :param base: The ancestor of the target to stop including acestors
239
            at.
240
        :param fileobj: The file-like object to write to
241
        """
2520.4.53 by Aaron Bentley
refactor bundle serialization to make write_bundle primary
242
        write_op =  BundleWriteOperation(base, target, repository, fileobj)
243
        return write_op.do_write()
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
244
245
    def read(self, file):
2520.4.118 by Aaron Bentley
Add docs
246
        """return a reader object for a given file"""
2520.4.72 by Aaron Bentley
Rename format to 4alpha
247
        bundle = BundleInfoV4(file, self)
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
248
        return bundle
249
2520.4.101 by Aaron Bentley
Use a registry to look up xml serializers by format
250
    @staticmethod
251
    def get_source_serializer(info):
2520.4.118 by Aaron Bentley
Add docs
252
        """Retrieve the serializer for a given info object"""
2520.4.101 by Aaron Bentley
Use a registry to look up xml serializers by format
253
        return xml_serializer.format_registry.get(info['serializer'])
254
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
255
256
class BundleWriteOperation(object):
2520.4.118 by Aaron Bentley
Add docs
257
    """Perform the operation of writing revisions to a bundle"""
2520.4.123 by Aaron Bentley
Cleanup of bundle code
258
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
259
    @classmethod
260
    def from_old_args(cls, repository, revision_ids, forced_bases, fileobj):
2520.4.123 by Aaron Bentley
Cleanup of bundle code
261
        """Create a BundleWriteOperation from old-style arguments"""
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
262
        base, target = cls.get_base_target(revision_ids, forced_bases,
263
                                           repository)
264
        return BundleWriteOperation(base, target, repository, fileobj,
265
                                    revision_ids)
266
2520.4.53 by Aaron Bentley
refactor bundle serialization to make write_bundle primary
267
    def __init__(self, base, target, repository, fileobj, revision_ids=None):
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
268
        self.base = base
269
        self.target = target
270
        self.repository = repository
2520.4.39 by Aaron Bentley
Rename container => bundle(reader) where appropriate
271
        bundle = BundleWriter(fileobj)
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
272
        self.bundle = bundle
2520.4.64 by Aaron Bentley
Avoid topo sort for v10 bundles
273
        self.base_ancestry = set(repository.get_ancestry(base,
274
                                                         topo_sorted=False))
2520.4.53 by Aaron Bentley
refactor bundle serialization to make write_bundle primary
275
        if revision_ids is not None:
276
            self.revision_ids = revision_ids
277
        else:
2520.4.64 by Aaron Bentley
Avoid topo sort for v10 bundles
278
            revision_ids = set(repository.get_ancestry(target,
279
                                                       topo_sorted=False))
2520.4.55 by Aaron Bentley
Fix file revision selection to grab all dependencies properly
280
            self.revision_ids = revision_ids.difference(self.base_ancestry)
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
281
        self.revision_keys = set([(revid,) for revid in self.revision_ids])
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
282
283
    def do_write(self):
2520.4.118 by Aaron Bentley
Add docs
284
        """Write all data to the bundle"""
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
285
        self.repository.lock_read()
286
        try:
287
            self.bundle.begin()
288
            self.write_info()
289
            self.write_files()
290
            self.write_revisions()
291
            self.bundle.end()
292
        finally:
293
            self.repository.unlock()
2520.4.53 by Aaron Bentley
refactor bundle serialization to make write_bundle primary
294
        return self.revision_ids
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
295
2520.4.97 by Aaron Bentley
Hack in support for inventory conversion
296
    def write_info(self):
2520.4.118 by Aaron Bentley
Add docs
297
        """Write format info"""
2520.4.113 by Aaron Bentley
Avoid peeking at Repository._serializer
298
        serializer_format = self.repository.get_serializer_format()
2520.4.99 by Aaron Bentley
Test conversion across models
299
        supports_rich_root = {True: 1, False: 0}[
300
            self.repository.supports_rich_root()]
2520.4.113 by Aaron Bentley
Avoid peeking at Repository._serializer
301
        self.bundle.add_info_record(serializer=serializer_format,
2520.4.99 by Aaron Bentley
Test conversion across models
302
                                    supports_rich_root=supports_rich_root)
2520.4.97 by Aaron Bentley
Hack in support for inventory conversion
303
2520.4.51 by Aaron Bentley
Split iteration through file revisions into a method, so we can vary it
304
    def write_files(self):
2520.4.118 by Aaron Bentley
Add docs
305
        """Write bundle records for all revisions of all files"""
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
306
        text_keys = []
3350.6.7 by Robert Collins
Review feedback, making things more clear, adding documentation on what is used where.
307
        altered_fileids = self.repository.fileids_altered_by_revision_ids(
308
                self.revision_ids)
309
        for file_id, revision_ids in altered_fileids.iteritems():
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
310
            for revision_id in revision_ids:
311
                text_keys.append((file_id, revision_id))
3350.6.10 by Martin Pool
VersionedFiles review cleanups
312
        self._add_mp_records_keys('file', self.repository.texts, text_keys)
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
313
314
    def write_revisions(self):
2520.4.118 by Aaron Bentley
Add docs
315
        """Write bundle records for all revisions and signatures"""
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
316
        inv_vf = self.repository.inventories
317
        revision_order = [key[-1] for key in multiparent.topo_iter_keys(inv_vf,
318
            self.revision_keys)]
2520.4.75 by Aaron Bentley
Fix traceback on empty bundles.
319
        if self.target is not None and self.target in self.revision_ids:
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
320
            revision_order.remove(self.target)
321
            revision_order.append(self.target)
3350.6.10 by Martin Pool
VersionedFiles review cleanups
322
        self._add_mp_records_keys('inventory', inv_vf, [(revid,) for revid in revision_order])
3099.3.5 by John Arbash Meinel
Update the last couple of places that referred to Provider.get_parents() directly.
323
        parent_map = self.repository.get_parent_map(revision_order)
324
        for revision_id in revision_order:
325
            parents = parent_map.get(revision_id, None)
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
326
            revision_text = self.repository.get_revision_xml(revision_id)
327
            self.bundle.add_fulltext_record(revision_text, parents,
2520.4.123 by Aaron Bentley
Cleanup of bundle code
328
                                       'revision', revision_id)
2520.4.34 by Aaron Bentley
Add signature support
329
            try:
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
330
                self.bundle.add_fulltext_record(
331
                    self.repository.get_signature_text(
2520.4.123 by Aaron Bentley
Cleanup of bundle code
332
                    revision_id), parents, 'signature', revision_id)
2520.4.34 by Aaron Bentley
Add signature support
333
            except errors.NoSuchRevision:
334
                pass
335
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
336
    @staticmethod
337
    def get_base_target(revision_ids, forced_bases, repository):
2520.4.123 by Aaron Bentley
Cleanup of bundle code
338
        """Determine the base and target from old-style revision ids"""
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
339
        if len(revision_ids) == 0:
340
            return None, None
341
        target = revision_ids[0]
342
        base = forced_bases.get(target)
343
        if base is None:
344
            parents = repository.get_revision(target).parent_ids
345
            if len(parents) == 0:
346
                base = _mod_revision.NULL_REVISION
347
            else:
348
                base = parents[0]
349
        return base, target
350
3350.6.10 by Martin Pool
VersionedFiles review cleanups
351
    def _add_mp_records_keys(self, repo_kind, vf, keys):
2520.4.118 by Aaron Bentley
Add docs
352
        """Add multi-parent diff records to a bundle"""
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
353
        ordered_keys = list(multiparent.topo_iter_keys(vf, keys))
354
        mpdiffs = vf.make_mpdiffs(ordered_keys)
355
        sha1s = vf.get_sha1s(ordered_keys)
356
        parent_map = vf.get_parent_map(ordered_keys)
357
        for mpdiff, item_key, sha1, in zip(mpdiffs, ordered_keys, sha1s):
358
            parents = [key[-1] for key in parent_map[item_key]]
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
359
            text = ''.join(mpdiff.to_patch())
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
360
            # Infer file id records as appropriate.
361
            if len(item_key) == 2:
362
                file_id = item_key[0]
363
            else:
364
                file_id = None
2520.4.60 by Aaron Bentley
Add sha1 verification for mpdiffs
365
            self.bundle.add_multiparent_record(text, sha1, parents, repo_kind,
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
366
                                               item_key[-1], file_id)
2520.4.6 by Aaron Bentley
Get installation started
367
368
2520.4.72 by Aaron Bentley
Rename format to 4alpha
369
class BundleInfoV4(object):
2520.4.6 by Aaron Bentley
Get installation started
370
2520.4.118 by Aaron Bentley
Add docs
371
    """Provide (most of) the BundleInfo interface"""
2520.4.6 by Aaron Bentley
Get installation started
372
    def __init__(self, fileobj, serializer):
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
373
        self._fileobj = fileobj
374
        self._serializer = serializer
375
        self.__real_revisions = None
376
        self.__revisions = None
377
378
    def install(self, repository):
379
        return self.install_revisions(repository)
380
2520.4.148 by Aaron Bentley
Updates from review
381
    def install_revisions(self, repository, stream_input=True):
382
        """Install this bundle's revisions into the specified repository
383
384
        :param target_repo: The repository to install into
385
        :param stream_input: If True, will stream input rather than reading it
386
            all into memory at once.  Reading it into memory all at once is
387
            (currently) faster.
388
        """
2520.4.18 by Aaron Bentley
Generate mpdiffs for inventory
389
        repository.lock_write()
390
        try:
2520.4.148 by Aaron Bentley
Updates from review
391
            ri = RevisionInstaller(self.get_bundle_reader(stream_input),
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
392
                                   self._serializer, repository)
2520.4.18 by Aaron Bentley
Generate mpdiffs for inventory
393
            return ri.install()
394
        finally:
395
            repository.unlock()
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
396
2520.4.109 by Aaron Bentley
start work on directive cherry-picking
397
    def get_merge_request(self, target_repo):
398
        """Provide data for performing a merge
399
400
        Returns suggested base, suggested target, and patch verification status
401
        """
402
        return None, self.target, 'inapplicable'
403
2520.4.148 by Aaron Bentley
Updates from review
404
    def get_bundle_reader(self, stream_input=True):
405
        """Return a new BundleReader for the associated bundle
406
407
        :param stream_input: If True, the BundleReader stream input rather than
408
            reading it all into memory at once.  Reading it into memory all at
409
            once is (currently) faster.
410
        """
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
411
        self._fileobj.seek(0)
2520.4.148 by Aaron Bentley
Updates from review
412
        return BundleReader(self._fileobj, stream_input)
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
413
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
414
    def _get_real_revisions(self):
415
        if self.__real_revisions is None:
416
            self.__real_revisions = []
2520.4.39 by Aaron Bentley
Rename container => bundle(reader) where appropriate
417
            bundle_reader = self.get_bundle_reader()
2520.4.102 by Aaron Bentley
rename parents to metadata
418
            for bytes, metadata, repo_kind, revision_id, file_id in \
2520.4.39 by Aaron Bentley
Rename container => bundle(reader) where appropriate
419
                bundle_reader.iter_records():
2520.4.101 by Aaron Bentley
Use a registry to look up xml serializers by format
420
                if repo_kind == 'info':
421
                    serializer =\
2520.4.102 by Aaron Bentley
rename parents to metadata
422
                        self._serializer.get_source_serializer(metadata)
2520.4.22 by Aaron Bentley
Create ContainerReader
423
                if repo_kind == 'revision':
2520.4.101 by Aaron Bentley
Use a registry to look up xml serializers by format
424
                    rev = serializer.read_revision_from_string(bytes)
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
425
                    self.__real_revisions.append(rev)
426
        return self.__real_revisions
427
    real_revisions = property(_get_real_revisions)
428
429
    def _get_revisions(self):
430
        if self.__revisions is None:
431
            self.__revisions = []
432
            for revision in self.real_revisions:
2520.4.33 by Aaron Bentley
remove test dependencies on serialization minutia
433
                self.__revisions.append(
434
                    bundle_data.RevisionInfo.from_revision(revision))
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
435
        return self.__revisions
436
437
    revisions = property(_get_revisions)
438
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
439
    def _get_target(self):
440
        return self.revisions[-1].revision_id
441
442
    target = property(_get_target)
443
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
444
445
class RevisionInstaller(object):
2520.4.123 by Aaron Bentley
Cleanup of bundle code
446
    """Installs revisions into a repository"""
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
447
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
448
    def __init__(self, container, serializer, repository):
449
        self._container = container
2520.4.6 by Aaron Bentley
Get installation started
450
        self._serializer = serializer
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
451
        self._repository = repository
2520.4.97 by Aaron Bentley
Hack in support for inventory conversion
452
        self._info = None
2520.4.99 by Aaron Bentley
Test conversion across models
453
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
454
    def install(self):
2592.4.1 by Martin Pool
RevisionInstaller now creates a write group for its work
455
        """Perform the installation.
456
        
457
        Must be called with the Repository locked.
458
        """
459
        self._repository.start_write_group()
460
        try:
2856.1.2 by Robert Collins
Review feedback.
461
            result = self._install_in_write_group()
2592.4.1 by Martin Pool
RevisionInstaller now creates a write group for its work
462
        except:
463
            self._repository.abort_write_group()
464
            raise
465
        self._repository.commit_write_group()
466
        return result
467
2856.1.2 by Robert Collins
Review feedback.
468
    def _install_in_write_group(self):
2520.4.6 by Aaron Bentley
Get installation started
469
        current_file = None
470
        current_versionedfile = None
471
        pending_file_records = []
2520.4.142 by Aaron Bentley
Clean up installation of inventory records
472
        inventory_vf = None
473
        pending_inventory_records = []
2520.4.8 by Aaron Bentley
Serialize inventory
474
        added_inv = set()
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
475
        target_revision = None
2520.4.58 by Aaron Bentley
Propogate support for metadata to iter_revisions, add storage kind
476
        for bytes, metadata, repo_kind, revision_id, file_id in\
2520.4.22 by Aaron Bentley
Create ContainerReader
477
            self._container.iter_records():
2520.4.97 by Aaron Bentley
Hack in support for inventory conversion
478
            if repo_kind == 'info':
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
479
                if self._info is not None:
480
                    raise AssertionError()
2520.4.123 by Aaron Bentley
Cleanup of bundle code
481
                self._handle_info(metadata)
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
482
            if (pending_file_records and
483
                (repo_kind, file_id) != ('file', current_file)):
484
                # Flush the data for a single file - prevents memory
485
                # spiking due to buffering all files in memory.
486
                self._install_mp_records_keys(self._repository.texts,
487
                    pending_file_records)
2520.4.8 by Aaron Bentley
Serialize inventory
488
                current_file = None
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
489
                del pending_file_records[:]
2520.4.142 by Aaron Bentley
Clean up installation of inventory records
490
            if len(pending_inventory_records) > 0 and repo_kind != 'inventory':
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
491
                self._install_inventory_records(pending_inventory_records)
2520.4.142 by Aaron Bentley
Clean up installation of inventory records
492
                pending_inventory_records = []
493
            if repo_kind == 'inventory':
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
494
                pending_inventory_records.append(((revision_id,), metadata, bytes))
2520.4.142 by Aaron Bentley
Clean up installation of inventory records
495
            if repo_kind == 'revision':
496
                target_revision = revision_id
497
                self._install_revision(revision_id, metadata, bytes)
498
            if repo_kind == 'signature':
499
                self._install_signature(revision_id, metadata, bytes)
2520.4.22 by Aaron Bentley
Create ContainerReader
500
            if repo_kind == 'file':
2520.4.142 by Aaron Bentley
Clean up installation of inventory records
501
                current_file = file_id
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
502
                pending_file_records.append(((file_id, revision_id), metadata, bytes))
503
        self._install_mp_records_keys(self._repository.texts, pending_file_records)
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
504
        return target_revision
2520.4.6 by Aaron Bentley
Get installation started
505
2520.4.123 by Aaron Bentley
Cleanup of bundle code
506
    def _handle_info(self, info):
507
        """Extract data from an info record"""
508
        self._info = info
509
        self._source_serializer = self._serializer.get_source_serializer(info)
510
        if (info['supports_rich_root'] == 0 and
511
            self._repository.supports_rich_root()):
512
            self.update_root = True
513
        else:
514
            self.update_root = False
515
2520.4.60 by Aaron Bentley
Add sha1 verification for mpdiffs
516
    def _install_mp_records(self, versionedfile, records):
2520.4.61 by Aaron Bentley
Do bulk insertion of records
517
        if len(records) == 0:
518
            return
519
        d_func = multiparent.MultiParent.from_patch
520
        vf_records = [(r, m['parents'], m['sha1'], d_func(t)) for r, m, t in
521
                      records if r not in versionedfile]
522
        versionedfile.add_mpdiffs(vf_records)
2520.4.8 by Aaron Bentley
Serialize inventory
523
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
524
    def _install_mp_records_keys(self, versionedfile, records):
525
        d_func = multiparent.MultiParent.from_patch
526
        vf_records = []
527
        for key, meta, text in records:
3350.6.7 by Robert Collins
Review feedback, making things more clear, adding documentation on what is used where.
528
            # Adapt to tuple interface: A length two key is a file_id,
529
            # revision_id pair, a length 1 key is a
530
            # revision/signature/inventory. We need to do this because
531
            # the metadata extraction from the bundle has not yet been updated
532
            # to use the consistent tuple interface itself.
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
533
            if len(key) == 2:
534
                prefix = key[:1]
535
            else:
536
                prefix = ()
537
            parents = [prefix + (parent,) for parent in meta['parents']]
538
            vf_records.append((key, parents, meta['sha1'], d_func(text)))
539
        versionedfile.add_mpdiffs(vf_records)
540
541
    def _install_inventory_records(self, records):
2520.4.97 by Aaron Bentley
Hack in support for inventory conversion
542
        if self._info['serializer'] == self._repository._serializer.format_num:
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
543
            return self._install_mp_records_keys(self._repository.inventories,
544
                records)
545
        for key, metadata, bytes in records:
546
            revision_id = key[-1]
2520.4.142 by Aaron Bentley
Clean up installation of inventory records
547
            parent_ids = metadata['parents']
548
            parents = [self._repository.get_inventory(p)
549
                       for p in parent_ids]
550
            p_texts = [self._source_serializer.write_inventory_to_string(p)
551
                       for p in parents]
552
            target_lines = multiparent.MultiParent.from_patch(bytes).to_lines(
553
                p_texts)
554
            sha1 = osutils.sha_strings(target_lines)
555
            if sha1 != metadata['sha1']:
556
                raise errors.BadBundle("Can't convert to target format")
557
            target_inv = self._source_serializer.read_inventory_from_string(
558
                ''.join(target_lines))
559
            self._handle_root(target_inv, parent_ids)
560
            try:
561
                self._repository.add_inventory(revision_id, target_inv,
562
                                               parent_ids)
563
            except errors.UnsupportedInventoryKind:
564
                raise errors.IncompatibleRevision(repr(self._repository))
2520.4.99 by Aaron Bentley
Test conversion across models
565
566
    def _handle_root(self, target_inv, parent_ids):
567
        revision_id = target_inv.revision_id
568
        if self.update_root:
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
569
            text_key = (target_inv.root.file_id, revision_id)
570
            parent_keys = [(target_inv.root.file_id, parent) for
571
                parent in parent_ids]
572
            self._repository.texts.add_lines(text_key, parent_keys, [])
2520.4.99 by Aaron Bentley
Test conversion across models
573
        elif not self._repository.supports_rich_root():
574
            if target_inv.root.revision != revision_id:
575
                raise errors.IncompatibleRevision(repr(self._repository))
576
2520.4.59 by Aaron Bentley
Push metadata down the stack
577
    def _install_revision(self, revision_id, metadata, text):
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
578
        if self._repository.has_revision(revision_id):
579
            return
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
580
        revision = self._source_serializer.read_revision_from_string(text)
581
        self._repository.add_revision(revision.revision_id, revision)
2520.4.34 by Aaron Bentley
Add signature support
582
2520.4.59 by Aaron Bentley
Push metadata down the stack
583
    def _install_signature(self, revision_id, metadata, text):
2520.4.100 by Aaron Bentley
Fix repeat signature installs
584
        transaction = self._repository.get_transaction()
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
585
        if self._repository.has_signature_for_revision_id(revision_id):
2520.4.100 by Aaron Bentley
Fix repeat signature installs
586
            return
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
587
        self._repository.add_signature_text(revision_id, text)