~bzr-pqm/bzr/bzr.dev

2520.4.85 by Aaron Bentley
Get all test passing (which just proves there aren't enough tests!)
1
# Copyright (C) 2007 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2520.4.85 by Aaron Bentley
Get all test passing (which just proves there aren't enough tests!)
16
2520.4.20 by Aaron Bentley
Compress and base64-encode bundle contents
17
from cStringIO import StringIO
2520.4.26 by Aaron Bentley
Make decompression reasonably memory-efficient
18
import bz2
2520.4.130 by Aaron Bentley
Finish tweaking decode_name
19
import re
2520.4.20 by Aaron Bentley
Compress and base64-encode bundle contents
20
2520.4.13 by Aaron Bentley
Use real container implementation
21
from bzrlib import (
2520.4.40 by Aaron Bentley
Add human-readable diff to bundles
22
    diff,
2520.4.34 by Aaron Bentley
Add signature support
23
    errors,
2520.4.26 by Aaron Bentley
Make decompression reasonably memory-efficient
24
    iterablefile,
2520.4.13 by Aaron Bentley
Use real container implementation
25
    multiparent,
2520.4.97 by Aaron Bentley
Hack in support for inventory conversion
26
    osutils,
2520.4.13 by Aaron Bentley
Use real container implementation
27
    pack,
2520.4.40 by Aaron Bentley
Add human-readable diff to bundles
28
    revision as _mod_revision,
2520.4.45 by Aaron Bentley
Handle inconsistencies in last-modified-revision between vf and inventory
29
    trace,
4237.3.1 by Jelmer Vernooij
Add new module with generic serializer information; keep XML-specific bits in
30
    serializer,
2520.4.13 by Aaron Bentley
Use real container implementation
31
    )
4237.3.1 by Jelmer Vernooij
Add new module with generic serializer information; keep XML-specific bits in
32
from bzrlib.bundle import bundle_data, serializer as bundle_serializer
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
33
from bzrlib import bencode
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
34
2520.4.4 by Aaron Bentley
Get basis support for a new bundle format in place
35
2520.4.25 by Aaron Bentley
Rename ContainerWriter/ContainerReader to BundleWriter/BundleReader
36
class BundleWriter(object):
2520.4.118 by Aaron Bentley
Add docs
37
    """Writer for bundle-format files.
38
39
    This serves roughly the same purpose as ContainerReader, but acts as a
40
    layer on top of it.
41
2520.4.123 by Aaron Bentley
Cleanup of bundle code
42
    Provides ways of writing the specific record types supported this bundle
2520.4.118 by Aaron Bentley
Add docs
43
    format.
44
    """
2520.4.123 by Aaron Bentley
Cleanup of bundle code
45
2520.4.23 by Aaron Bentley
Move responsability for encoding into container objects
46
    def __init__(self, fileobj):
2520.4.27 by Aaron Bentley
Use less memory when writing bzip-encoded files
47
        self._container = pack.ContainerWriter(self._write_encoded)
2520.4.23 by Aaron Bentley
Move responsability for encoding into container objects
48
        self._fileobj = fileobj
2520.4.27 by Aaron Bentley
Use less memory when writing bzip-encoded files
49
        self._compressor = bz2.BZ2Compressor()
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
50
2520.4.118 by Aaron Bentley
Add docs
51
    def _write_encoded(self, bytes):
52
        """Write bzip2-encoded bytes to the file"""
53
        self._fileobj.write(self._compressor.compress(bytes))
54
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
55
    def begin(self):
2520.4.118 by Aaron Bentley
Add docs
56
        """Start writing the bundle"""
4237.3.1 by Jelmer Vernooij
Add new module with generic serializer information; keep XML-specific bits in
57
        self._fileobj.write(bundle_serializer._get_bundle_header(
58
            bundle_serializer.v4_string))
2520.4.24 by Aaron Bentley
Move heading writing above container beginning
59
        self._fileobj.write('#\n')
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
60
        self._container.begin()
61
62
    def end(self):
2520.4.118 by Aaron Bentley
Add docs
63
        """Finish writing the bundle"""
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
64
        self._container.end()
2520.4.76 by Aaron Bentley
Move base64-encoding into merge directives
65
        self._fileobj.write(self._compressor.flush())
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
66
2520.4.60 by Aaron Bentley
Add sha1 verification for mpdiffs
67
    def add_multiparent_record(self, mp_bytes, sha1, parents, repo_kind,
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
68
                               revision_id, file_id):
2520.4.118 by Aaron Bentley
Add docs
69
        """Add a record for a multi-parent diff
70
71
        :mp_bytes: A multi-parent diff, as a bytestring
2520.4.123 by Aaron Bentley
Cleanup of bundle code
72
        :sha1: The sha1 hash of the fulltext
2520.4.118 by Aaron Bentley
Add docs
73
        :parents: a list of revision-ids of the parents
74
        :repo_kind: The kind of object in the repository.  May be 'file' or
75
            'inventory'
76
        :revision_id: The revision id of the mpdiff being added.
77
        :file_id: The file-id of the file, or None for inventories.
78
        """
2520.4.60 by Aaron Bentley
Add sha1 verification for mpdiffs
79
        metadata = {'parents': parents,
80
                    'storage_kind': 'mpdiff',
81
                    'sha1': sha1}
82
        self._add_record(mp_bytes, metadata, repo_kind, revision_id, file_id)
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
83
2520.4.123 by Aaron Bentley
Cleanup of bundle code
84
    def add_fulltext_record(self, bytes, parents, repo_kind, revision_id):
2520.4.118 by Aaron Bentley
Add docs
85
        """Add a record for a fulltext
86
87
        :bytes: The fulltext, as a bytestring
88
        :parents: a list of revision-ids of the parents
89
        :repo_kind: The kind of object in the repository.  May be 'revision' or
90
            'signature'
91
        :revision_id: The revision id of the fulltext being added.
92
        """
93
        metadata = {'parents': parents,
2520.5.3 by Aaron Bentley
fix sha1 in bundle format 4
94
                    'storage_kind': 'mpdiff'}
2520.4.60 by Aaron Bentley
Add sha1 verification for mpdiffs
95
        self._add_record(bytes, {'parents': parents,
2520.4.123 by Aaron Bentley
Cleanup of bundle code
96
            'storage_kind': 'fulltext'}, repo_kind, revision_id, None)
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
97
2520.4.95 by Aaron Bentley
Add support for header/info records
98
    def add_info_record(self, **kwargs):
2520.4.118 by Aaron Bentley
Add docs
99
        """Add an info record to the bundle
100
101
        Any parameters may be supplied, except 'self' and 'storage_kind'.
102
        Values must be lists, strings, integers, dicts, or a combination.
103
        """
2520.4.95 by Aaron Bentley
Add support for header/info records
104
        kwargs['storage_kind'] = 'header'
105
        self._add_record(None, kwargs, 'info', None, None)
106
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
107
    @staticmethod
2520.4.68 by Aaron Bentley
Change name separators to all-slash
108
    def encode_name(content_kind, revision_id, file_id=None):
2520.4.118 by Aaron Bentley
Add docs
109
        """Encode semantic ids as a container name"""
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
110
        if content_kind not in ('revision', 'file', 'inventory', 'signature',
111
                'info'):
112
            raise ValueError(content_kind)
2520.4.118 by Aaron Bentley
Add docs
113
        if content_kind == 'file':
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
114
            if file_id is None:
115
                raise AssertionError()
2520.4.118 by Aaron Bentley
Add docs
116
        else:
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
117
            if file_id is not None:
118
                raise AssertionError()
2520.4.95 by Aaron Bentley
Add support for header/info records
119
        if content_kind == 'info':
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
120
            if revision_id is not None:
121
                raise AssertionError()
122
        elif revision_id is None:
123
            raise AssertionError()
2520.4.127 by Aaron Bentley
Fix up name encoding to handle revision-ids with slashes
124
        names = [n.replace('/', '//') for n in
125
                 (content_kind, revision_id, file_id) if n is not None]
2520.4.68 by Aaron Bentley
Change name separators to all-slash
126
        return '/'.join(names)
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
127
2520.4.56 by Aaron Bentley
Begin adding support for arbitrary metadata
128
    def _add_record(self, bytes, metadata, repo_kind, revision_id, file_id):
2520.4.118 by Aaron Bentley
Add docs
129
        """Add a bundle record to the container.
130
131
        Most bundle records are recorded as header/body pairs, with the
132
        body being nameless.  Records with storage_kind 'header' have no
133
        body.
134
        """
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
135
        name = self.encode_name(repo_kind, revision_id, file_id)
2520.4.95 by Aaron Bentley
Add support for header/info records
136
        encoded_metadata = bencode.bencode(metadata)
2682.1.1 by Robert Collins
* The ``bzrlib.pack`` interface has changed to use tuples of bytestrings
137
        self._container.add_bytes_record(encoded_metadata, [(name, )])
2520.4.95 by Aaron Bentley
Add support for header/info records
138
        if metadata['storage_kind'] != 'header':
139
            self._container.add_bytes_record(bytes, [])
2520.4.13 by Aaron Bentley
Use real container implementation
140
2520.4.7 by Aaron Bentley
Fix patch deserialization
141
2520.4.25 by Aaron Bentley
Rename ContainerWriter/ContainerReader to BundleWriter/BundleReader
142
class BundleReader(object):
2520.4.118 by Aaron Bentley
Add docs
143
    """Reader for bundle-format files.
144
145
    This serves roughly the same purpose as ContainerReader, but acts as a
146
    layer on top of it, providing metadata, a semantic name, and a record
147
    body
148
    """
2520.4.123 by Aaron Bentley
Cleanup of bundle code
149
2520.4.148 by Aaron Bentley
Updates from review
150
    def __init__(self, fileobj, stream_input=True):
2520.4.145 by Aaron Bentley
Add memory_friendly toggle, be memory-unfriendly for merge directives
151
        """Constructor
152
153
        :param fileobj: a file containing a bzip-encoded container
2520.4.148 by Aaron Bentley
Updates from review
154
        :param stream_input: If True, the BundleReader stream input rather than
155
            reading it all into memory at once.  Reading it into memory all at
156
            once is (currently) faster.
2520.4.145 by Aaron Bentley
Add memory_friendly toggle, be memory-unfriendly for merge directives
157
        """
2520.4.23 by Aaron Bentley
Move responsability for encoding into container objects
158
        line = fileobj.readline()
159
        if line != '\n':
160
            fileobj.readline()
2520.4.40 by Aaron Bentley
Add human-readable diff to bundles
161
        self.patch_lines = []
2520.4.148 by Aaron Bentley
Updates from review
162
        if stream_input:
2520.4.145 by Aaron Bentley
Add memory_friendly toggle, be memory-unfriendly for merge directives
163
            source_file = iterablefile.IterableFile(self.iter_decode(fileobj))
164
        else:
165
            source_file = StringIO(bz2.decompress(fileobj.read()))
2916.2.18 by Andrew Bennetts
Use iter_records_from_file rather than ContainerReader.
166
        self._container_file = source_file
2520.4.26 by Aaron Bentley
Make decompression reasonably memory-efficient
167
168
    @staticmethod
169
    def iter_decode(fileobj):
2520.4.118 by Aaron Bentley
Add docs
170
        """Iterate through decoded fragments of the file"""
2520.4.26 by Aaron Bentley
Make decompression reasonably memory-efficient
171
        decompressor = bz2.BZ2Decompressor()
172
        for line in fileobj:
2916.2.18 by Andrew Bennetts
Use iter_records_from_file rather than ContainerReader.
173
            try:
174
                yield decompressor.decompress(line)
175
            except EOFError:
176
                return
2520.4.22 by Aaron Bentley
Create ContainerReader
177
178
    @staticmethod
179
    def decode_name(name):
2520.4.118 by Aaron Bentley
Add docs
180
        """Decode a name from its container form into a semantic form
181
182
        :retval: content_kind, revision_id, file_id
183
        """
2520.4.130 by Aaron Bentley
Finish tweaking decode_name
184
        segments = re.split('(//?)', name)
185
        names = ['']
2520.4.127 by Aaron Bentley
Fix up name encoding to handle revision-ids with slashes
186
        for segment in segments:
187
            if segment == '//':
188
                names[-1] += '/'
2520.4.130 by Aaron Bentley
Finish tweaking decode_name
189
            elif segment == '/':
2520.4.127 by Aaron Bentley
Fix up name encoding to handle revision-ids with slashes
190
                names.append('')
191
            else:
192
                names[-1] += segment
2520.4.130 by Aaron Bentley
Finish tweaking decode_name
193
        content_kind = names[0]
2520.4.95 by Aaron Bentley
Add support for header/info records
194
        revision_id = None
195
        file_id = None
196
        if len(names) > 1:
197
            revision_id = names[1]
2520.4.68 by Aaron Bentley
Change name separators to all-slash
198
        if len(names) > 2:
199
            file_id = names[2]
200
        return content_kind, revision_id, file_id
2520.4.22 by Aaron Bentley
Create ContainerReader
201
202
    def iter_records(self):
2520.4.118 by Aaron Bentley
Add docs
203
        """Iterate through bundle records
204
205
        :return: a generator of (bytes, metadata, content_kind, revision_id,
206
            file_id)
207
        """
2916.2.18 by Andrew Bennetts
Use iter_records_from_file rather than ContainerReader.
208
        iterator = pack.iter_records_from_file(self._container_file)
209
        for names, bytes in iterator:
2520.4.131 by Aaron Bentley
Raise BadBundle for records with wrong number of names
210
            if len(names) != 1:
211
                raise errors.BadBundle('Record has %d names instead of 1'
212
                                       % len(names))
2916.2.18 by Andrew Bennetts
Use iter_records_from_file rather than ContainerReader.
213
            metadata = bencode.bdecode(bytes)
2520.4.95 by Aaron Bentley
Add support for header/info records
214
            if metadata['storage_kind'] == 'header':
215
                bytes = None
216
            else:
217
                _unused, bytes = iterator.next()
2682.1.1 by Robert Collins
* The ``bzrlib.pack`` interface has changed to use tuples of bytestrings
218
            yield (bytes, metadata) + self.decode_name(names[0][0])
2520.4.22 by Aaron Bentley
Create ContainerReader
219
220
4237.3.1 by Jelmer Vernooij
Add new module with generic serializer information; keep XML-specific bits in
221
class BundleSerializerV4(bundle_serializer.BundleSerializer):
2520.4.118 by Aaron Bentley
Add docs
222
    """Implement the high-level bundle interface"""
2520.4.123 by Aaron Bentley
Cleanup of bundle code
223
2520.4.4 by Aaron Bentley
Get basis support for a new bundle format in place
224
    def write(self, repository, revision_ids, forced_bases, fileobj):
2520.4.118 by Aaron Bentley
Add docs
225
        """Write a bundle to a file-like object
226
227
        For backwards-compatibility only
228
        """
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
229
        write_op = BundleWriteOperation.from_old_args(repository, revision_ids,
230
                                                      forced_bases, fileobj)
2520.4.53 by Aaron Bentley
refactor bundle serialization to make write_bundle primary
231
        return write_op.do_write()
232
233
    def write_bundle(self, repository, target, base, fileobj):
2520.4.118 by Aaron Bentley
Add docs
234
        """Write a bundle to a file object
235
236
        :param repository: The repository to retrieve revision data from
237
        :param target: The head revision to include ancestors of
238
        :param base: The ancestor of the target to stop including acestors
239
            at.
240
        :param fileobj: The file-like object to write to
241
        """
2520.4.53 by Aaron Bentley
refactor bundle serialization to make write_bundle primary
242
        write_op =  BundleWriteOperation(base, target, repository, fileobj)
243
        return write_op.do_write()
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
244
245
    def read(self, file):
2520.4.118 by Aaron Bentley
Add docs
246
        """return a reader object for a given file"""
2520.4.72 by Aaron Bentley
Rename format to 4alpha
247
        bundle = BundleInfoV4(file, self)
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
248
        return bundle
249
2520.4.101 by Aaron Bentley
Use a registry to look up xml serializers by format
250
    @staticmethod
251
    def get_source_serializer(info):
2520.4.118 by Aaron Bentley
Add docs
252
        """Retrieve the serializer for a given info object"""
4237.3.1 by Jelmer Vernooij
Add new module with generic serializer information; keep XML-specific bits in
253
        return serializer.format_registry.get(info['serializer'])
2520.4.101 by Aaron Bentley
Use a registry to look up xml serializers by format
254
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
255
256
class BundleWriteOperation(object):
2520.4.118 by Aaron Bentley
Add docs
257
    """Perform the operation of writing revisions to a bundle"""
2520.4.123 by Aaron Bentley
Cleanup of bundle code
258
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
259
    @classmethod
260
    def from_old_args(cls, repository, revision_ids, forced_bases, fileobj):
2520.4.123 by Aaron Bentley
Cleanup of bundle code
261
        """Create a BundleWriteOperation from old-style arguments"""
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
262
        base, target = cls.get_base_target(revision_ids, forced_bases,
263
                                           repository)
264
        return BundleWriteOperation(base, target, repository, fileobj,
265
                                    revision_ids)
266
2520.4.53 by Aaron Bentley
refactor bundle serialization to make write_bundle primary
267
    def __init__(self, base, target, repository, fileobj, revision_ids=None):
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
268
        self.base = base
269
        self.target = target
270
        self.repository = repository
2520.4.39 by Aaron Bentley
Rename container => bundle(reader) where appropriate
271
        bundle = BundleWriter(fileobj)
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
272
        self.bundle = bundle
2520.4.53 by Aaron Bentley
refactor bundle serialization to make write_bundle primary
273
        if revision_ids is not None:
274
            self.revision_ids = revision_ids
275
        else:
4154.1.1 by Ian Clatworthy
make send use graph.find_difference() instead of walking all of history twice
276
            graph = repository.get_graph()
4154.1.3 by Ian Clatworthy
strip ghosts so test_bundle_with_ghosts works again
277
            revision_ids = graph.find_unique_ancestors(target, [base])
278
            # Strip ghosts
279
            parents = graph.get_parent_map(revision_ids)
280
            self.revision_ids = [r for r in revision_ids if r in parents]
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
281
        self.revision_keys = set([(revid,) for revid in self.revision_ids])
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
282
283
    def do_write(self):
2520.4.118 by Aaron Bentley
Add docs
284
        """Write all data to the bundle"""
3794.4.1 by Aaron Bentley
Note the number of revisions being bundled (abentley)
285
        trace.note('Bundling %d revision(s).', len(self.revision_ids))
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
286
        self.repository.lock_read()
287
        try:
288
            self.bundle.begin()
289
            self.write_info()
290
            self.write_files()
291
            self.write_revisions()
292
            self.bundle.end()
293
        finally:
294
            self.repository.unlock()
2520.4.53 by Aaron Bentley
refactor bundle serialization to make write_bundle primary
295
        return self.revision_ids
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
296
2520.4.97 by Aaron Bentley
Hack in support for inventory conversion
297
    def write_info(self):
2520.4.118 by Aaron Bentley
Add docs
298
        """Write format info"""
2520.4.113 by Aaron Bentley
Avoid peeking at Repository._serializer
299
        serializer_format = self.repository.get_serializer_format()
2520.4.99 by Aaron Bentley
Test conversion across models
300
        supports_rich_root = {True: 1, False: 0}[
301
            self.repository.supports_rich_root()]
2520.4.113 by Aaron Bentley
Avoid peeking at Repository._serializer
302
        self.bundle.add_info_record(serializer=serializer_format,
2520.4.99 by Aaron Bentley
Test conversion across models
303
                                    supports_rich_root=supports_rich_root)
2520.4.97 by Aaron Bentley
Hack in support for inventory conversion
304
2520.4.51 by Aaron Bentley
Split iteration through file revisions into a method, so we can vary it
305
    def write_files(self):
2520.4.118 by Aaron Bentley
Add docs
306
        """Write bundle records for all revisions of all files"""
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
307
        text_keys = []
3350.6.7 by Robert Collins
Review feedback, making things more clear, adding documentation on what is used where.
308
        altered_fileids = self.repository.fileids_altered_by_revision_ids(
309
                self.revision_ids)
310
        for file_id, revision_ids in altered_fileids.iteritems():
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
311
            for revision_id in revision_ids:
312
                text_keys.append((file_id, revision_id))
3350.6.10 by Martin Pool
VersionedFiles review cleanups
313
        self._add_mp_records_keys('file', self.repository.texts, text_keys)
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
314
315
    def write_revisions(self):
2520.4.118 by Aaron Bentley
Add docs
316
        """Write bundle records for all revisions and signatures"""
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
317
        inv_vf = self.repository.inventories
318
        revision_order = [key[-1] for key in multiparent.topo_iter_keys(inv_vf,
319
            self.revision_keys)]
2520.4.75 by Aaron Bentley
Fix traceback on empty bundles.
320
        if self.target is not None and self.target in self.revision_ids:
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
321
            revision_order.remove(self.target)
322
            revision_order.append(self.target)
3350.6.10 by Martin Pool
VersionedFiles review cleanups
323
        self._add_mp_records_keys('inventory', inv_vf, [(revid,) for revid in revision_order])
3099.3.5 by John Arbash Meinel
Update the last couple of places that referred to Provider.get_parents() directly.
324
        parent_map = self.repository.get_parent_map(revision_order)
4202.3.1 by Andrew Bennetts
Don't use get_revision_xml when writing a bundle, instead get all the revisions together.
325
        revision_to_str = self.repository._serializer.write_revision_to_string
326
        revisions = self.repository.get_revisions(revision_order)
327
        for revision in revisions:
328
            revision_id = revision.revision_id
3099.3.5 by John Arbash Meinel
Update the last couple of places that referred to Provider.get_parents() directly.
329
            parents = parent_map.get(revision_id, None)
4202.3.1 by Andrew Bennetts
Don't use get_revision_xml when writing a bundle, instead get all the revisions together.
330
            revision_text = revision_to_str(revision)
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
331
            self.bundle.add_fulltext_record(revision_text, parents,
2520.4.123 by Aaron Bentley
Cleanup of bundle code
332
                                       'revision', revision_id)
2520.4.34 by Aaron Bentley
Add signature support
333
            try:
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
334
                self.bundle.add_fulltext_record(
335
                    self.repository.get_signature_text(
2520.4.123 by Aaron Bentley
Cleanup of bundle code
336
                    revision_id), parents, 'signature', revision_id)
2520.4.34 by Aaron Bentley
Add signature support
337
            except errors.NoSuchRevision:
338
                pass
339
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
340
    @staticmethod
341
    def get_base_target(revision_ids, forced_bases, repository):
2520.4.123 by Aaron Bentley
Cleanup of bundle code
342
        """Determine the base and target from old-style revision ids"""
2520.4.50 by Aaron Bentley
Split write functionality out into a separate object
343
        if len(revision_ids) == 0:
344
            return None, None
345
        target = revision_ids[0]
346
        base = forced_bases.get(target)
347
        if base is None:
348
            parents = repository.get_revision(target).parent_ids
349
            if len(parents) == 0:
350
                base = _mod_revision.NULL_REVISION
351
            else:
352
                base = parents[0]
353
        return base, target
354
3350.6.10 by Martin Pool
VersionedFiles review cleanups
355
    def _add_mp_records_keys(self, repo_kind, vf, keys):
2520.4.118 by Aaron Bentley
Add docs
356
        """Add multi-parent diff records to a bundle"""
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
357
        ordered_keys = list(multiparent.topo_iter_keys(vf, keys))
358
        mpdiffs = vf.make_mpdiffs(ordered_keys)
359
        sha1s = vf.get_sha1s(ordered_keys)
360
        parent_map = vf.get_parent_map(ordered_keys)
3350.8.3 by Robert Collins
VF.get_sha1s needed changing to be stackable.
361
        for mpdiff, item_key, in zip(mpdiffs, ordered_keys):
362
            sha1 = sha1s[item_key]
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
363
            parents = [key[-1] for key in parent_map[item_key]]
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
364
            text = ''.join(mpdiff.to_patch())
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
365
            # Infer file id records as appropriate.
366
            if len(item_key) == 2:
367
                file_id = item_key[0]
368
            else:
369
                file_id = None
2520.4.60 by Aaron Bentley
Add sha1 verification for mpdiffs
370
            self.bundle.add_multiparent_record(text, sha1, parents, repo_kind,
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
371
                                               item_key[-1], file_id)
2520.4.6 by Aaron Bentley
Get installation started
372
373
2520.4.72 by Aaron Bentley
Rename format to 4alpha
374
class BundleInfoV4(object):
2520.4.6 by Aaron Bentley
Get installation started
375
2520.4.118 by Aaron Bentley
Add docs
376
    """Provide (most of) the BundleInfo interface"""
2520.4.6 by Aaron Bentley
Get installation started
377
    def __init__(self, fileobj, serializer):
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
378
        self._fileobj = fileobj
379
        self._serializer = serializer
380
        self.__real_revisions = None
381
        self.__revisions = None
382
383
    def install(self, repository):
384
        return self.install_revisions(repository)
385
2520.4.148 by Aaron Bentley
Updates from review
386
    def install_revisions(self, repository, stream_input=True):
387
        """Install this bundle's revisions into the specified repository
388
389
        :param target_repo: The repository to install into
390
        :param stream_input: If True, will stream input rather than reading it
391
            all into memory at once.  Reading it into memory all at once is
392
            (currently) faster.
393
        """
2520.4.18 by Aaron Bentley
Generate mpdiffs for inventory
394
        repository.lock_write()
395
        try:
2520.4.148 by Aaron Bentley
Updates from review
396
            ri = RevisionInstaller(self.get_bundle_reader(stream_input),
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
397
                                   self._serializer, repository)
2520.4.18 by Aaron Bentley
Generate mpdiffs for inventory
398
            return ri.install()
399
        finally:
400
            repository.unlock()
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
401
2520.4.109 by Aaron Bentley
start work on directive cherry-picking
402
    def get_merge_request(self, target_repo):
403
        """Provide data for performing a merge
404
405
        Returns suggested base, suggested target, and patch verification status
406
        """
407
        return None, self.target, 'inapplicable'
408
2520.4.148 by Aaron Bentley
Updates from review
409
    def get_bundle_reader(self, stream_input=True):
410
        """Return a new BundleReader for the associated bundle
411
412
        :param stream_input: If True, the BundleReader stream input rather than
413
            reading it all into memory at once.  Reading it into memory all at
414
            once is (currently) faster.
415
        """
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
416
        self._fileobj.seek(0)
2520.4.148 by Aaron Bentley
Updates from review
417
        return BundleReader(self._fileobj, stream_input)
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
418
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
419
    def _get_real_revisions(self):
420
        if self.__real_revisions is None:
421
            self.__real_revisions = []
2520.4.39 by Aaron Bentley
Rename container => bundle(reader) where appropriate
422
            bundle_reader = self.get_bundle_reader()
2520.4.102 by Aaron Bentley
rename parents to metadata
423
            for bytes, metadata, repo_kind, revision_id, file_id in \
2520.4.39 by Aaron Bentley
Rename container => bundle(reader) where appropriate
424
                bundle_reader.iter_records():
2520.4.101 by Aaron Bentley
Use a registry to look up xml serializers by format
425
                if repo_kind == 'info':
426
                    serializer =\
2520.4.102 by Aaron Bentley
rename parents to metadata
427
                        self._serializer.get_source_serializer(metadata)
2520.4.22 by Aaron Bentley
Create ContainerReader
428
                if repo_kind == 'revision':
2520.4.101 by Aaron Bentley
Use a registry to look up xml serializers by format
429
                    rev = serializer.read_revision_from_string(bytes)
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
430
                    self.__real_revisions.append(rev)
431
        return self.__real_revisions
432
    real_revisions = property(_get_real_revisions)
433
434
    def _get_revisions(self):
435
        if self.__revisions is None:
436
            self.__revisions = []
437
            for revision in self.real_revisions:
2520.4.33 by Aaron Bentley
remove test dependencies on serialization minutia
438
                self.__revisions.append(
439
                    bundle_data.RevisionInfo.from_revision(revision))
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
440
        return self.__revisions
441
442
    revisions = property(_get_revisions)
443
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
444
    def _get_target(self):
445
        return self.revisions[-1].revision_id
446
447
    target = property(_get_target)
448
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
449
450
class RevisionInstaller(object):
2520.4.123 by Aaron Bentley
Cleanup of bundle code
451
    """Installs revisions into a repository"""
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
452
2520.4.21 by Aaron Bentley
Finish turning ContainerWriter into a new layer
453
    def __init__(self, container, serializer, repository):
454
        self._container = container
2520.4.6 by Aaron Bentley
Get installation started
455
        self._serializer = serializer
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
456
        self._repository = repository
2520.4.97 by Aaron Bentley
Hack in support for inventory conversion
457
        self._info = None
2520.4.99 by Aaron Bentley
Test conversion across models
458
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
459
    def install(self):
2592.4.1 by Martin Pool
RevisionInstaller now creates a write group for its work
460
        """Perform the installation.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
461
2592.4.1 by Martin Pool
RevisionInstaller now creates a write group for its work
462
        Must be called with the Repository locked.
463
        """
464
        self._repository.start_write_group()
465
        try:
2856.1.2 by Robert Collins
Review feedback.
466
            result = self._install_in_write_group()
2592.4.1 by Martin Pool
RevisionInstaller now creates a write group for its work
467
        except:
468
            self._repository.abort_write_group()
469
            raise
470
        self._repository.commit_write_group()
471
        return result
472
2856.1.2 by Robert Collins
Review feedback.
473
    def _install_in_write_group(self):
2520.4.6 by Aaron Bentley
Get installation started
474
        current_file = None
475
        current_versionedfile = None
476
        pending_file_records = []
2520.4.142 by Aaron Bentley
Clean up installation of inventory records
477
        inventory_vf = None
478
        pending_inventory_records = []
2520.4.8 by Aaron Bentley
Serialize inventory
479
        added_inv = set()
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
480
        target_revision = None
2520.4.58 by Aaron Bentley
Propogate support for metadata to iter_revisions, add storage kind
481
        for bytes, metadata, repo_kind, revision_id, file_id in\
2520.4.22 by Aaron Bentley
Create ContainerReader
482
            self._container.iter_records():
2520.4.97 by Aaron Bentley
Hack in support for inventory conversion
483
            if repo_kind == 'info':
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
484
                if self._info is not None:
485
                    raise AssertionError()
2520.4.123 by Aaron Bentley
Cleanup of bundle code
486
                self._handle_info(metadata)
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
487
            if (pending_file_records and
488
                (repo_kind, file_id) != ('file', current_file)):
489
                # Flush the data for a single file - prevents memory
490
                # spiking due to buffering all files in memory.
491
                self._install_mp_records_keys(self._repository.texts,
492
                    pending_file_records)
2520.4.8 by Aaron Bentley
Serialize inventory
493
                current_file = None
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
494
                del pending_file_records[:]
2520.4.142 by Aaron Bentley
Clean up installation of inventory records
495
            if len(pending_inventory_records) > 0 and repo_kind != 'inventory':
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
496
                self._install_inventory_records(pending_inventory_records)
2520.4.142 by Aaron Bentley
Clean up installation of inventory records
497
                pending_inventory_records = []
498
            if repo_kind == 'inventory':
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
499
                pending_inventory_records.append(((revision_id,), metadata, bytes))
2520.4.142 by Aaron Bentley
Clean up installation of inventory records
500
            if repo_kind == 'revision':
501
                target_revision = revision_id
502
                self._install_revision(revision_id, metadata, bytes)
503
            if repo_kind == 'signature':
504
                self._install_signature(revision_id, metadata, bytes)
2520.4.22 by Aaron Bentley
Create ContainerReader
505
            if repo_kind == 'file':
2520.4.142 by Aaron Bentley
Clean up installation of inventory records
506
                current_file = file_id
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
507
                pending_file_records.append(((file_id, revision_id), metadata, bytes))
508
        self._install_mp_records_keys(self._repository.texts, pending_file_records)
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
509
        return target_revision
2520.4.6 by Aaron Bentley
Get installation started
510
2520.4.123 by Aaron Bentley
Cleanup of bundle code
511
    def _handle_info(self, info):
512
        """Extract data from an info record"""
513
        self._info = info
514
        self._source_serializer = self._serializer.get_source_serializer(info)
515
        if (info['supports_rich_root'] == 0 and
516
            self._repository.supports_rich_root()):
517
            self.update_root = True
518
        else:
519
            self.update_root = False
520
2520.4.60 by Aaron Bentley
Add sha1 verification for mpdiffs
521
    def _install_mp_records(self, versionedfile, records):
2520.4.61 by Aaron Bentley
Do bulk insertion of records
522
        if len(records) == 0:
523
            return
524
        d_func = multiparent.MultiParent.from_patch
525
        vf_records = [(r, m['parents'], m['sha1'], d_func(t)) for r, m, t in
526
                      records if r not in versionedfile]
527
        versionedfile.add_mpdiffs(vf_records)
2520.4.8 by Aaron Bentley
Serialize inventory
528
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
529
    def _install_mp_records_keys(self, versionedfile, records):
530
        d_func = multiparent.MultiParent.from_patch
531
        vf_records = []
532
        for key, meta, text in records:
3350.6.7 by Robert Collins
Review feedback, making things more clear, adding documentation on what is used where.
533
            # Adapt to tuple interface: A length two key is a file_id,
534
            # revision_id pair, a length 1 key is a
535
            # revision/signature/inventory. We need to do this because
536
            # the metadata extraction from the bundle has not yet been updated
537
            # to use the consistent tuple interface itself.
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
538
            if len(key) == 2:
539
                prefix = key[:1]
540
            else:
541
                prefix = ()
542
            parents = [prefix + (parent,) for parent in meta['parents']]
543
            vf_records.append((key, parents, meta['sha1'], d_func(text)))
544
        versionedfile.add_mpdiffs(vf_records)
545
546
    def _install_inventory_records(self, records):
2520.4.97 by Aaron Bentley
Hack in support for inventory conversion
547
        if self._info['serializer'] == self._repository._serializer.format_num:
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
548
            return self._install_mp_records_keys(self._repository.inventories,
549
                records)
550
        for key, metadata, bytes in records:
551
            revision_id = key[-1]
2520.4.142 by Aaron Bentley
Clean up installation of inventory records
552
            parent_ids = metadata['parents']
553
            parents = [self._repository.get_inventory(p)
554
                       for p in parent_ids]
555
            p_texts = [self._source_serializer.write_inventory_to_string(p)
556
                       for p in parents]
557
            target_lines = multiparent.MultiParent.from_patch(bytes).to_lines(
558
                p_texts)
559
            sha1 = osutils.sha_strings(target_lines)
560
            if sha1 != metadata['sha1']:
561
                raise errors.BadBundle("Can't convert to target format")
562
            target_inv = self._source_serializer.read_inventory_from_string(
563
                ''.join(target_lines))
564
            self._handle_root(target_inv, parent_ids)
565
            try:
566
                self._repository.add_inventory(revision_id, target_inv,
567
                                               parent_ids)
568
            except errors.UnsupportedInventoryKind:
569
                raise errors.IncompatibleRevision(repr(self._repository))
2520.4.99 by Aaron Bentley
Test conversion across models
570
571
    def _handle_root(self, target_inv, parent_ids):
572
        revision_id = target_inv.revision_id
573
        if self.update_root:
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
574
            text_key = (target_inv.root.file_id, revision_id)
575
            parent_keys = [(target_inv.root.file_id, parent) for
576
                parent in parent_ids]
577
            self._repository.texts.add_lines(text_key, parent_keys, [])
2520.4.99 by Aaron Bentley
Test conversion across models
578
        elif not self._repository.supports_rich_root():
579
            if target_inv.root.revision != revision_id:
580
                raise errors.IncompatibleRevision(repr(self._repository))
581
2520.4.59 by Aaron Bentley
Push metadata down the stack
582
    def _install_revision(self, revision_id, metadata, text):
2520.4.14 by Aaron Bentley
Get most tests passing, use format header
583
        if self._repository.has_revision(revision_id):
584
            return
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
585
        revision = self._source_serializer.read_revision_from_string(text)
586
        self._repository.add_revision(revision.revision_id, revision)
2520.4.34 by Aaron Bentley
Add signature support
587
2520.4.59 by Aaron Bentley
Push metadata down the stack
588
    def _install_signature(self, revision_id, metadata, text):
2520.4.100 by Aaron Bentley
Fix repeat signature installs
589
        transaction = self._repository.get_transaction()
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
590
        if self._repository.has_signature_for_revision_id(revision_id):
2520.4.100 by Aaron Bentley
Fix repeat signature installs
591
            return
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
592
        self._repository.add_signature_text(revision_id, text)