~bzr-pqm/bzr/bzr.dev

3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
1
# Copyright (C) 2005, 2006, 2007 Canonical Ltd
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
2
#
1185.11.19 by John Arbash Meinel
Testing put and append, also testing agaist file-like objects as well as strings.
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
7
#
1185.11.19 by John Arbash Meinel
Testing put and append, also testing agaist file-like objects as well as strings.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
12
#
1185.11.19 by John Arbash Meinel
Testing put and append, also testing agaist file-like objects as well as strings.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
1540.3.3 by Martin Pool
Review updates of pycurl transport
16
17
"""Base implementation of Transport over http.
18
19
There are separate implementation modules for each http client implementation.
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
20
"""
21
1711.4.14 by John Arbash Meinel
Custom HttpRequestHandler which treats all paths as utf8 encoded
22
from cStringIO import StringIO
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
23
import mimetools
1540.3.23 by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl
24
import re
1540.3.3 by Martin Pool
Review updates of pycurl transport
25
import urlparse
26
import urllib
2172.3.2 by v.ladeuil+lp at free
Fix the missing import and typos in comments.
27
import sys
1786.1.6 by John Arbash Meinel
Missed a couple of imports
28
2485.8.24 by Vincent Ladeuil
Finish http refactoring. Test suite passing.
29
from bzrlib import (
3675.1.1 by Martin Pool
Merge and update log+ transport decorator
30
    debug,
2485.8.24 by Vincent Ladeuil
Finish http refactoring. Test suite passing.
31
    errors,
32
    ui,
33
    urlutils,
34
    )
2400.1.3 by Andrew Bennetts
Split smart transport code into several separate modules.
35
from bzrlib.smart import medium
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
36
from bzrlib.symbol_versioning import (
37
        deprecated_method,
38
        )
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
39
from bzrlib.trace import mutter
2018.2.2 by Andrew Bennetts
Implement HTTP smart server.
40
from bzrlib.transport import (
2485.8.16 by Vincent Ladeuil
Create a new, empty, ConnectedTransport class.
41
    ConnectedTransport,
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
42
    _CoalescedOffset,
2018.2.2 by Andrew Bennetts
Implement HTTP smart server.
43
    Transport,
44
    )
1540.3.6 by Martin Pool
[merge] update from bzr.dev
45
2004.1.9 by vila
Takes jam's remarks into account when possible, add TODOs for the rest.
46
# TODO: This is not used anymore by HttpTransport_urllib
47
# (extracting the auth info and prompting the user for a password
48
# have been split), only the tests still use it. It should be
49
# deleted and the tests rewritten ASAP to stay in sync.
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
50
def extract_auth(url, password_manager):
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
51
    """Extract auth parameters from am HTTP/HTTPS url and add them to the given
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
52
    password manager.  Return the url, minus those auth parameters (which
53
    confuse urllib2).
54
    """
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
55
    if not re.match(r'^(https?)(\+\w+)?://', url):
56
        raise ValueError(
57
            'invalid absolute url %r' % (url,))
1540.2.1 by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary
58
    scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
2004.3.1 by vila
Test ConnectionError exceptions.
59
1540.2.1 by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary
60
    if '@' in netloc:
61
        auth, netloc = netloc.split('@', 1)
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
62
        if ':' in auth:
63
            username, password = auth.split(':', 1)
64
        else:
65
            username, password = auth, None
1540.2.1 by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary
66
        if ':' in netloc:
67
            host = netloc.split(':', 1)[0]
68
        else:
69
            host = netloc
70
        username = urllib.unquote(username)
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
71
        if password is not None:
72
            password = urllib.unquote(password)
1540.2.1 by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary
73
        else:
2094.3.6 by John Arbash Meinel
[merge] bzr.dev 2158
74
            password = ui.ui_factory.get_password(
2004.2.1 by John Arbash Meinel
Cleanup of urllib functions
75
                prompt='HTTP %(user)s@%(host)s password',
76
                user=username, host=host)
1540.2.1 by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary
77
        password_manager.add_password(None, host, username, password)
78
    url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
79
    return url
1553.1.5 by James Henstridge
Make HTTP transport has() method do HEAD requests, and update test to
80
1185.50.83 by John Arbash Meinel
[merge] James Henstridge: Set Agent string in http headers, add tests for it.
81
2485.8.16 by Vincent Ladeuil
Create a new, empty, ConnectedTransport class.
82
class HttpTransportBase(ConnectedTransport, medium.SmartClientMedium):
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
83
    """Base class for http implementations.
84
1540.3.23 by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl
85
    Does URL parsing, etc, but not any network IO.
86
87
    The protocol can be given as e.g. http+urllib://host/ to use a particular
88
    implementation.
89
    """
90
2485.8.24 by Vincent Ladeuil
Finish http refactoring. Test suite passing.
91
    # _unqualified_scheme: "http" or "https"
92
    # _scheme: may have "+pycurl", etc
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
93
2485.8.59 by Vincent Ladeuil
Update from review comments.
94
    def __init__(self, base, _from_transport=None):
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
95
        """Set the base path where files will be stored."""
1540.3.23 by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl
96
        proto_match = re.match(r'^(https?)(\+\w+)?://', base)
97
        if not proto_match:
98
            raise AssertionError("not a http url: %r" % base)
2485.8.24 by Vincent Ladeuil
Finish http refactoring. Test suite passing.
99
        self._unqualified_scheme = proto_match.group(1)
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
100
        impl_name = proto_match.group(2)
1540.3.23 by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl
101
        if impl_name:
102
            impl_name = impl_name[1:]
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
103
        self._impl_name = impl_name
2485.8.59 by Vincent Ladeuil
Update from review comments.
104
        super(HttpTransportBase, self).__init__(base,
105
                                                _from_transport=_from_transport)
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
106
        # range hint is handled dynamically throughout the life
2363.4.9 by Vincent Ladeuil
Catch first succesful authentification to avoid further 401
107
        # of the transport object. We start by trying multi-range
108
        # requests and if the server returns bogus results, we
109
        # retry with single range requests and, finally, we
110
        # forget about range if the server really can't
111
        # understand. Once acquired, this piece of info is
112
        # propagated to clones.
2485.8.59 by Vincent Ladeuil
Update from review comments.
113
        if _from_transport is not None:
114
            self._range_hint = _from_transport._range_hint
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
115
        else:
116
            self._range_hint = 'multi'
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
117
118
    def has(self, relpath):
1540.3.15 by Martin Pool
[merge] large merge to sync with bzr.dev
119
        raise NotImplementedError("has() is abstract on %r" % self)
120
2164.2.15 by Vincent Ladeuil
Http redirections are not followed by default. Do not use hints
121
    def get(self, relpath):
1594.2.5 by Robert Collins
Readv patch from Johan Rydberg giving knits partial download support.
122
        """Get the file at the given relative path.
123
124
        :param relpath: The relative path to the file
125
        """
2164.2.15 by Vincent Ladeuil
Http redirections are not followed by default. Do not use hints
126
        code, response_file = self._get(relpath, None)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
127
        # FIXME: some callers want an iterable... One step forward, three steps
3059.2.6 by Vincent Ladeuil
Light modifications after a failed attempt at making RangeFile iterable.
128
        # backwards :-/ And not only an iterable, but an iterable that can be
129
        # seeked backwards, so we will never be able to do that.  One such
130
        # known client is bzrlib.bundle.serializer.v4.get_bundle_reader. At the
131
        # time of this writing it's even the only known client -- vila20071203
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
132
        return StringIO(response_file.read())
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
133
2164.2.15 by Vincent Ladeuil
Http redirections are not followed by default. Do not use hints
134
    def _get(self, relpath, ranges, tail_amount=0):
1540.3.27 by Martin Pool
Integrate http range support for pycurl
135
        """Get a file, or part of a file.
136
137
        :param relpath: Path relative to transport base URL
2164.2.1 by v.ladeuil+lp at free
First rough http branch redirection implementation.
138
        :param ranges: None to get the whole file;
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
139
            or  a list of _CoalescedOffset to fetch parts of a file.
2164.2.26 by Vincent Ladeuil
Delete obsolete note in doc string.
140
        :param tail_amount: The amount to get from the end of the file.
1540.3.27 by Martin Pool
Integrate http range support for pycurl
141
142
        :returns: (http_code, result_file)
143
        """
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
144
        raise NotImplementedError(self._get)
1594.2.5 by Robert Collins
Readv patch from Johan Rydberg giving knits partial download support.
145
3133.1.2 by Vincent Ladeuil
Fix #177643 by making pycurl handle url-embedded credentials again.
146
    def _remote_path(self, relpath):
147
        """See ConnectedTransport._remote_path.
148
149
        user and passwords are not embedded in the path provided to the server.
150
        """
151
        relative = urlutils.unescape(relpath).encode('utf-8')
152
        path = self._combine_paths(self._path, relative)
153
        return self._unsplit_url(self._unqualified_scheme,
154
                                 None, None, self._host, self._port, path)
155
156
    def _create_auth(self):
157
        """Returns a dict returning the credentials provided at build time."""
158
        auth = dict(host=self._host, port=self._port,
159
                    user=self._user, password=self._password,
160
                    protocol=self._unqualified_scheme,
161
                    path=self._path)
162
        return auth
163
2018.2.6 by Andrew Bennetts
HTTP client starting to work (pycurl for the moment).
164
    def get_request(self):
2018.2.8 by Andrew Bennetts
Make HttpTransportBase.get_smart_client return self again.
165
        return SmartClientHTTPMediumRequest(self)
2018.2.6 by Andrew Bennetts
HTTP client starting to work (pycurl for the moment).
166
2018.2.3 by Andrew Bennetts
Starting factoring out the smart server client "medium" from the protocol.
167
    def get_smart_medium(self):
168
        """See Transport.get_smart_medium.
169
170
        HttpTransportBase directly implements the minimal interface of
171
        SmartMediumClient, so this returns self.
172
        """
2018.2.8 by Andrew Bennetts
Make HttpTransportBase.get_smart_client return self again.
173
        return self
2018.2.3 by Andrew Bennetts
Starting factoring out the smart server client "medium" from the protocol.
174
2520.2.2 by Vincent Ladeuil
Fix #115209 by issuing a single range request on 400: Bad Request
175
    def _degrade_range_hint(self, relpath, ranges, exc_info):
2000.3.9 by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :)
176
        if self._range_hint == 'multi':
177
            self._range_hint = 'single'
2520.2.2 by Vincent Ladeuil
Fix #115209 by issuing a single range request on 400: Bad Request
178
            mutter('Retry "%s" with single range request' % relpath)
2000.3.9 by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :)
179
        elif self._range_hint == 'single':
180
            self._range_hint = None
2520.2.2 by Vincent Ladeuil
Fix #115209 by issuing a single range request on 400: Bad Request
181
            mutter('Retry "%s" without ranges' % relpath)
2000.3.9 by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :)
182
        else:
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
183
            # We tried all the tricks, but nothing worked. We re-raise the
184
            # original exception; the 'mutter' calls above will indicate that
185
            # further tries were unsuccessful
2172.3.1 by v.ladeuil+lp at free
Merge a recent bzr.dev (2172) and takes John's remarks into account.
186
            raise exc_info[0], exc_info[1], exc_info[2]
2000.3.9 by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :)
187
2520.2.2 by Vincent Ladeuil
Fix #115209 by issuing a single range request on 400: Bad Request
188
    # _coalesce_offsets is a helper for readv, it try to combine ranges without
189
    # degrading readv performances. _bytes_to_read_before_seek is the value
190
    # used for the limit parameter and has been tuned for other transports. For
191
    # HTTP, the name is inappropriate but the parameter is still useful and
192
    # helps reduce the number of chunks in the response. The overhead for a
193
    # chunk (headers, length, footer around the data itself is variable but
194
    # around 50 bytes. We use 128 to reduce the range specifiers that appear in
195
    # the header, some servers (notably Apache) enforce a maximum length for a
196
    # header and issue a '400: Bad request' error when too much ranges are
197
    # specified.
198
    _bytes_to_read_before_seek = 128
199
    # No limit on the offset number that get combined into one, we are trying
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
200
    # to avoid downloading the whole file.
3024.2.1 by Vincent Ladeuil
Fix 165061 by using the correct _max_readv_combine attribute.
201
    _max_readv_combine = 0
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
202
    # By default Apache has a limit of ~400 ranges before replying with a 400
203
    # Bad Request. So we go underneath that amount to be safe.
204
    _max_get_ranges = 200
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
205
    # We impose no limit on the range size. But see _pycurl.py for a different
206
    # use.
207
    _get_max_size = 0
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
208
2745.5.1 by Robert Collins
* New parameter on ``bzrlib.transport.Transport.readv``
209
    def _readv(self, relpath, offsets):
1594.2.5 by Robert Collins
Readv patch from Johan Rydberg giving knits partial download support.
210
        """Get parts of the file at the given relative path.
211
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
212
        :param offsets: A list of (offset, size) tuples.
1540.3.27 by Martin Pool
Integrate http range support for pycurl
213
        :param return: A list or generator of (offset, data) tuples
1594.2.5 by Robert Collins
Readv patch from Johan Rydberg giving knits partial download support.
214
        """
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
215
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
216
        # offsets may be a generator, we will iterate it several times, so
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
217
        # build a list
218
        offsets = list(offsets)
219
220
        try_again = True
3146.3.2 by Vincent Ladeuil
Fix #179368 by keeping the current range hint on ShortReadvErrors.
221
        retried_offset = None
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
222
        while try_again:
223
            try_again = False
224
225
            # Coalesce the offsets to minimize the GET requests issued
226
            sorted_offsets = sorted(offsets)
227
            coalesced = self._coalesce_offsets(
228
                sorted_offsets, limit=self._max_readv_combine,
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
229
                fudge_factor=self._bytes_to_read_before_seek,
230
                max_size=self._get_max_size)
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
231
232
            # Turn it into a list, we will iterate it several times
233
            coalesced = list(coalesced)
3675.1.1 by Martin Pool
Merge and update log+ transport decorator
234
            if 'http' in debug.debug_flags:
235
                mutter('http readv of %s  offsets => %s collapsed %s',
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
236
                    relpath, len(offsets), len(coalesced))
237
238
            # Cache the data read, but only until it's been used
239
            data_map = {}
240
            # We will iterate on the data received from the GET requests and
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
241
            # serve the corresponding offsets respecting the initial order. We
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
242
            # need an offset iterator for that.
243
            iter_offsets = iter(offsets)
244
            cur_offset_and_size = iter_offsets.next()
245
246
            try:
3059.2.10 by Vincent Ladeuil
Jam's review feedback.
247
                for cur_coal, rfile in self._coalesce_readv(relpath, coalesced):
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
248
                    # Split the received chunk
249
                    for offset, size in cur_coal.ranges:
3052.3.2 by Vincent Ladeuil
Add tests and fix trivial bugs and other typos.
250
                        start = cur_coal.start + offset
3059.2.10 by Vincent Ladeuil
Jam's review feedback.
251
                        rfile.seek(start, 0)
252
                        data = rfile.read(size)
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
253
                        data_len = len(data)
254
                        if data_len != size:
255
                            raise errors.ShortReadvError(relpath, start, size,
256
                                                         actual=data_len)
3059.2.5 by Vincent Ladeuil
DAMN^64, the http test server is 1.0 not 1.1 :( Better pipe cleaning and less readv caching (since that's the point of the whole fix).
257
                        if (start, size) == cur_offset_and_size:
258
                            # The offset requested are sorted as the coalesced
3059.2.11 by Vincent Ladeuil
Fix typos mentioned by spiv.
259
                            # ones, no need to cache. Win !
3059.2.5 by Vincent Ladeuil
DAMN^64, the http test server is 1.0 not 1.1 :( Better pipe cleaning and less readv caching (since that's the point of the whole fix).
260
                            yield cur_offset_and_size[0], data
261
                            cur_offset_and_size = iter_offsets.next()
262
                        else:
263
                            # Different sorting. We need to cache.
264
                            data_map[(start, size)] = data
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
265
266
                    # Yield everything we can
267
                    while cur_offset_and_size in data_map:
268
                        # Clean the cached data since we use it
269
                        # XXX: will break if offsets contains duplicates --
270
                        # vila20071129
271
                        this_data = data_map.pop(cur_offset_and_size)
272
                        yield cur_offset_and_size[0], this_data
273
                        cur_offset_and_size = iter_offsets.next()
274
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
275
            except (errors.ShortReadvError, errors.InvalidRange,
276
                    errors.InvalidHttpRange), e:
3146.3.2 by Vincent Ladeuil
Fix #179368 by keeping the current range hint on ShortReadvErrors.
277
                mutter('Exception %r: %s during http._readv',e, e)
278
                if (not isinstance(e, errors.ShortReadvError)
279
                    or retried_offset == cur_offset_and_size):
280
                    # We don't degrade the range hint for ShortReadvError since
281
                    # they do not indicate a problem with the server ability to
282
                    # handle ranges. Except when we fail to get back a required
283
                    # offset twice in a row. In that case, falling back to
284
                    # single range or whole file should help or end up in a
285
                    # fatal exception.
286
                    self._degrade_range_hint(relpath, coalesced, sys.exc_info())
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
287
                # Some offsets may have been already processed, so we retry
288
                # only the unsuccessful ones.
3052.3.2 by Vincent Ladeuil
Add tests and fix trivial bugs and other typos.
289
                offsets = [cur_offset_and_size] + [o for o in iter_offsets]
3146.3.2 by Vincent Ladeuil
Fix #179368 by keeping the current range hint on ShortReadvErrors.
290
                retried_offset = cur_offset_and_size
3052.3.2 by Vincent Ladeuil
Add tests and fix trivial bugs and other typos.
291
                try_again = True
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
292
293
    def _coalesce_readv(self, relpath, coalesced):
294
        """Issue several GET requests to satisfy the coalesced offsets"""
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
295
296
        def get_and_yield(relpath, coalesced):
297
            if coalesced:
298
                # Note that the _get below may raise
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
299
                # errors.InvalidHttpRange. It's the caller's responsibility to
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
300
                # decide how to retry since it may provide different coalesced
301
                # offsets.
302
                code, rfile = self._get(relpath, coalesced)
303
                for coal in coalesced:
304
                    yield coal, rfile
305
306
        if self._range_hint is None:
307
            # Download whole file
308
            for c, rfile in get_and_yield(relpath, coalesced):
309
                yield c, rfile
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
310
        else:
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
311
            total = len(coalesced)
312
            if self._range_hint == 'multi':
313
                max_ranges = self._max_get_ranges
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
314
            elif self._range_hint == 'single':
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
315
                max_ranges = total
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
316
            else:
317
                raise AssertionError("Unknown _range_hint %r"
318
                                     % (self._range_hint,))
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
319
            # TODO: Some web servers may ignore the range requests and return
320
            # the whole file, we may want to detect that and avoid further
321
            # requests.
322
            # Hint: test_readv_multiple_get_requests will fail once we do that
323
            cumul = 0
324
            ranges = []
325
            for coal in coalesced:
326
                if ((self._get_max_size > 0
327
                     and cumul + coal.length > self._get_max_size)
328
                    or len(ranges) >= max_ranges):
329
                    # Get that much and yield
330
                    for c, rfile in get_and_yield(relpath, ranges):
331
                        yield c, rfile
332
                    # Restart with the current offset
333
                    ranges = [coal]
334
                    cumul = coal.length
335
                else:
336
                    ranges.append(coal)
337
                    cumul += coal.length
338
            # Get the rest and yield
339
            for c, rfile in get_and_yield(relpath, ranges):
340
                yield c, rfile
1786.1.5 by John Arbash Meinel
Move the common Multipart stuff into plain http, and wrap pycurl response so that it matches the urllib response object.
341
2671.3.1 by Robert Collins
* New method ``bzrlib.transport.Transport.get_recommended_page_size``.
342
    def recommended_page_size(self):
343
        """See Transport.recommended_page_size().
344
345
        For HTTP we suggest a large page size to reduce the overhead
346
        introduced by latency.
347
        """
348
        return 64 * 1024
349
2018.2.10 by Andrew Bennetts
Tidy up TODOs, further testing and fixes for SmartServerRequestProtocolOne, and remove a read_bytes(1) call.
350
    def _post(self, body_bytes):
351
        """POST body_bytes to .bzr/smart on this transport.
352
        
353
        :returns: (response code, response body file-like object).
354
        """
355
        # TODO: Requiring all the body_bytes to be available at the beginning of
356
        # the POST may require large client buffers.  It would be nice to have
357
        # an interface that allows streaming via POST when possible (and
358
        # degrades to a local buffer when not).
359
        raise NotImplementedError(self._post)
360
1955.3.6 by John Arbash Meinel
Lots of deprecation warnings, but no errors
361
    def put_file(self, relpath, f, mode=None):
362
        """Copy the file-like object into the location.
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
363
364
        :param relpath: Location to put the contents, relative to base.
1955.3.6 by John Arbash Meinel
Lots of deprecation warnings, but no errors
365
        :param f:       File-like object.
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
366
        """
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
367
        raise errors.TransportNotPossible('http PUT not supported')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
368
1185.58.2 by John Arbash Meinel
Added mode to the appropriate transport functions, and tests to make sure they work.
369
    def mkdir(self, relpath, mode=None):
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
370
        """Create a directory at the given path."""
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
371
        raise errors.TransportNotPossible('http does not support mkdir()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
372
1534.4.15 by Robert Collins
Remove shutil dependency in upgrade - create a delete_tree method for transports.
373
    def rmdir(self, relpath):
374
        """See Transport.rmdir."""
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
375
        raise errors.TransportNotPossible('http does not support rmdir()')
1534.4.15 by Robert Collins
Remove shutil dependency in upgrade - create a delete_tree method for transports.
376
1955.3.15 by John Arbash Meinel
Deprecate 'Transport.append' in favor of Transport.append_file or Transport.append_bytes
377
    def append_file(self, relpath, f, mode=None):
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
378
        """Append the text in the file-like object into the final
379
        location.
380
        """
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
381
        raise errors.TransportNotPossible('http does not support append()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
382
383
    def copy(self, rel_from, rel_to):
384
        """Copy the item at rel_from to the location at rel_to"""
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
385
        raise errors.TransportNotPossible('http does not support copy()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
386
1185.58.2 by John Arbash Meinel
Added mode to the appropriate transport functions, and tests to make sure they work.
387
    def copy_to(self, relpaths, other, mode=None, pb=None):
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
388
        """Copy a set of entries from self into another Transport.
389
390
        :param relpaths: A list/generator of entries to be copied.
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
391
392
        TODO: if other is LocalTransport, is it possible to
393
              do better than put(get())?
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
394
        """
907.1.29 by John Arbash Meinel
Fixing small bug in HttpTransport.copy_to
395
        # At this point HttpTransport might be able to check and see if
396
        # the remote location is the same, and rather than download, and
397
        # then upload, it could just issue a remote copy_this command.
1540.3.6 by Martin Pool
[merge] update from bzr.dev
398
        if isinstance(other, HttpTransportBase):
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
399
            raise errors.TransportNotPossible(
400
                'http cannot be the target of copy_to()')
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
401
        else:
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
402
            return super(HttpTransportBase, self).\
403
                    copy_to(relpaths, other, mode=mode, pb=pb)
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
404
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
405
    def move(self, rel_from, rel_to):
406
        """Move the item at rel_from to the location at rel_to"""
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
407
        raise errors.TransportNotPossible('http does not support move()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
408
409
    def delete(self, relpath):
410
        """Delete the item at relpath"""
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
411
        raise errors.TransportNotPossible('http does not support delete()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
412
2634.1.1 by Robert Collins
(robertc) Reinstate the accidentally backed out external_url patch.
413
    def external_url(self):
414
        """See bzrlib.transport.Transport.external_url."""
415
        # HTTP URL's are externally usable.
416
        return self.base
417
1530.1.3 by Robert Collins
transport implementations now tested consistently.
418
    def is_readonly(self):
419
        """See Transport.is_readonly."""
420
        return True
421
1400.1.1 by Robert Collins
implement a basic test for the ui branch command from http servers
422
    def listable(self):
423
        """See Transport.listable."""
424
        return False
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
425
426
    def stat(self, relpath):
427
        """Return the stat information for a file.
428
        """
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
429
        raise errors.TransportNotPossible('http does not support stat()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
430
907.1.24 by John Arbash Meinel
Remote functionality work.
431
    def lock_read(self, relpath):
432
        """Lock the given file for shared (read) access.
433
        :return: A lock object, which should be passed to Transport.unlock()
434
        """
435
        # The old RemoteBranch ignore lock for reading, so we will
436
        # continue that tradition and return a bogus lock object.
437
        class BogusLock(object):
438
            def __init__(self, path):
439
                self.path = path
440
            def unlock(self):
441
                pass
442
        return BogusLock(relpath)
443
444
    def lock_write(self, relpath):
445
        """Lock the given file for exclusive (write) access.
446
        WARNING: many transports do not support this, so trying avoid using it
447
448
        :return: A lock object, which should be passed to Transport.unlock()
449
        """
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
450
        raise errors.TransportNotPossible('http does not support lock_write()')
1530.1.1 by Robert Collins
Minimal infrastructure to test TransportTestProviderAdapter.
451
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
452
    def clone(self, offset=None):
453
        """Return a new HttpTransportBase with root at self.base + offset
2025.2.1 by v.ladeuil+lp at free
Fix bug #61606 by providing cloning hint do daughter classes.
454
2004.1.6 by vila
Connection sharing between cloned transports.
455
        We leave the daughter classes take advantage of the hint
456
        that it's a cloning not a raw creation.
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
457
        """
458
        if offset is None:
2004.1.6 by vila
Connection sharing between cloned transports.
459
            return self.__class__(self.base, self)
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
460
        else:
2004.1.6 by vila
Connection sharing between cloned transports.
461
            return self.__class__(self.abspath(offset), self)
1530.1.1 by Robert Collins
Minimal infrastructure to test TransportTestProviderAdapter.
462
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
463
    def _attempted_range_header(self, offsets, tail_amount):
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
464
        """Prepare a HTTP Range header at a level the server should accept.
465
466
        :return: the range header representing offsets/tail_amount or None if
467
            no header can be built.
468
        """
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
469
470
        if self._range_hint == 'multi':
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
471
            # Generate the header describing all offsets
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
472
            return self._range_header(offsets, tail_amount)
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
473
        elif self._range_hint == 'single':
474
            # Combine all the requested ranges into a single
475
            # encompassing one
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
476
            if len(offsets) > 0:
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
477
                if tail_amount not in (0, None):
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
478
                    # Nothing we can do here to combine ranges with tail_amount
479
                    # in a single range, just returns None. The whole file
480
                    # should be downloaded.
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
481
                    return None
482
                else:
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
483
                    start = offsets[0].start
484
                    last = offsets[-1]
485
                    end = last.start + last.length - 1
486
                    whole = self._coalesce_offsets([(start, end - start + 1)],
487
                                                   limit=0, fudge_factor=0)
488
                    return self._range_header(list(whole), 0)
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
489
            else:
490
                # Only tail_amount, requested, leave range_header
491
                # do its work
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
492
                return self._range_header(offsets, tail_amount)
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
493
        else:
494
            return None
495
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
496
    @staticmethod
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
497
    def _range_header(ranges, tail_amount):
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
498
        """Turn a list of bytes ranges into a HTTP Range header value.
499
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
500
        :param ranges: A list of _CoalescedOffset
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
501
        :param tail_amount: The amount to get from the end of the file.
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
502
503
        :return: HTTP range header string.
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
504
505
        At least a non-empty ranges *or* a tail_amount must be
506
        provided.
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
507
        """
508
        strings = []
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
509
        for offset in ranges:
510
            strings.append('%d-%d' % (offset.start,
511
                                      offset.start + offset.length - 1))
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
512
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
513
        if tail_amount:
514
            strings.append('-%d' % tail_amount)
515
1786.1.36 by John Arbash Meinel
pycurl expects us to just set the range of bytes, not including bytes=
516
        return ','.join(strings)
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
517
2018.2.8 by Andrew Bennetts
Make HttpTransportBase.get_smart_client return self again.
518
    def send_http_smart_request(self, bytes):
3241.1.4 by Andrew Bennetts
Use get_smart_medium as suggested by Robert, and deal with the fallout.
519
        try:
520
            code, body_filelike = self._post(bytes)
521
            if code != 200:
522
                raise InvalidHttpResponse(
523
                    self._remote_path('.bzr/smart'),
524
                    'Expected 200 response code, got %r' % (code,))
525
        except errors.InvalidHttpResponse, e:
526
            raise errors.SmartProtocolError(str(e))
2018.2.8 by Andrew Bennetts
Make HttpTransportBase.get_smart_client return self again.
527
        return body_filelike
528
3245.4.47 by Andrew Bennetts
Don't automatically send 'hello' requests from RemoteBzrDirFormat.probe_transport unless we have to (i.e. the transport is HTTP).
529
    def should_probe(self):
530
        return True
531
3431.3.11 by Andrew Bennetts
Push remote_path_from_transport logic into SmartClientMedium, removing special-casing of bzr+http from _SmartClient.
532
    def remote_path_from_transport(self, transport):
533
        # Strip the optional 'bzr+' prefix from transport so it will have the
534
        # same scheme as self.
535
        transport_base = transport.base
536
        if transport_base.startswith('bzr+'):
537
            transport_base = transport_base[4:]
538
        rel_url = urlutils.relative_url(self.base, transport_base)
539
        return urllib.unquote(rel_url)
540
2018.2.8 by Andrew Bennetts
Make HttpTransportBase.get_smart_client return self again.
541
2018.5.2 by Andrew Bennetts
Start splitting bzrlib/transport/smart.py into a package.
542
class SmartClientHTTPMediumRequest(medium.SmartClientMediumRequest):
2018.2.8 by Andrew Bennetts
Make HttpTransportBase.get_smart_client return self again.
543
    """A SmartClientMediumRequest that works with an HTTP medium."""
544
2018.5.2 by Andrew Bennetts
Start splitting bzrlib/transport/smart.py into a package.
545
    def __init__(self, client_medium):
546
        medium.SmartClientMediumRequest.__init__(self, client_medium)
2018.2.8 by Andrew Bennetts
Make HttpTransportBase.get_smart_client return self again.
547
        self._buffer = ''
548
549
    def _accept_bytes(self, bytes):
550
        self._buffer += bytes
551
552
    def _finished_writing(self):
553
        data = self._medium.send_http_smart_request(self._buffer)
554
        self._response_body = data
555
556
    def _read_bytes(self, count):
3565.1.2 by Andrew Bennetts
Delete some more code, fix some bugs, add more comments.
557
        """See SmartClientMediumRequest._read_bytes."""
2018.2.8 by Andrew Bennetts
Make HttpTransportBase.get_smart_client return self again.
558
        return self._response_body.read(count)
2004.1.28 by v.ladeuil+lp at free
Merge bzr.dev. Including http modifications by "smart" related code
559
3606.4.1 by Andrew Bennetts
Fix NotImplementedError when probing for smart protocol via HTTP.
560
    def _read_line(self):
561
        line, excess = medium._get_line(self._response_body.read)
562
        if excess != '':
563
            raise AssertionError(
564
                '_get_line returned excess bytes, but this mediumrequest '
565
                'cannot handle excess. (%r)' % (excess,))
566
        return line
567
2018.2.8 by Andrew Bennetts
Make HttpTransportBase.get_smart_client return self again.
568
    def _finished_reading(self):
569
        """See SmartClientMediumRequest._finished_reading."""
570
        pass