~bzr-pqm/bzr/bzr.dev

4763.2.4 by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry.
1
# Copyright (C) 2005-2010 Canonical Ltd
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
2
#
1185.11.19 by John Arbash Meinel
Testing put and append, also testing agaist file-like objects as well as strings.
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
7
#
1185.11.19 by John Arbash Meinel
Testing put and append, also testing agaist file-like objects as well as strings.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
12
#
1185.11.19 by John Arbash Meinel
Testing put and append, also testing agaist file-like objects as well as strings.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1540.3.3 by Martin Pool
Review updates of pycurl transport
16
17
"""Base implementation of Transport over http.
18
19
There are separate implementation modules for each http client implementation.
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
20
"""
21
1711.4.14 by John Arbash Meinel
Custom HttpRequestHandler which treats all paths as utf8 encoded
22
from cStringIO import StringIO
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
23
import mimetools
1540.3.23 by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl
24
import re
1540.3.3 by Martin Pool
Review updates of pycurl transport
25
import urlparse
26
import urllib
2172.3.2 by v.ladeuil+lp at free
Fix the missing import and typos in comments.
27
import sys
3734.2.3 by Vincent Ladeuil
Don't use multiple inheritance for http smart medium since we
28
import weakref
1786.1.6 by John Arbash Meinel
Missed a couple of imports
29
2485.8.24 by Vincent Ladeuil
Finish http refactoring. Test suite passing.
30
from bzrlib import (
3675.1.1 by Martin Pool
Merge and update log+ transport decorator
31
    debug,
2485.8.24 by Vincent Ladeuil
Finish http refactoring. Test suite passing.
32
    errors,
5609.9.1 by Martin
Blindly change all users of get_transport to address the function via the transport module
33
    transport,
2485.8.24 by Vincent Ladeuil
Finish http refactoring. Test suite passing.
34
    ui,
35
    urlutils,
36
    )
2400.1.3 by Andrew Bennetts
Split smart transport code into several separate modules.
37
from bzrlib.smart import medium
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
38
from bzrlib.symbol_versioning import (
39
        deprecated_method,
40
        )
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
41
from bzrlib.trace import mutter
2018.2.2 by Andrew Bennetts
Implement HTTP smart server.
42
from bzrlib.transport import (
2485.8.16 by Vincent Ladeuil
Create a new, empty, ConnectedTransport class.
43
    ConnectedTransport,
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
44
    _CoalescedOffset,
2018.2.2 by Andrew Bennetts
Implement HTTP smart server.
45
    Transport,
46
    )
1540.3.6 by Martin Pool
[merge] update from bzr.dev
47
2004.1.9 by vila
Takes jam's remarks into account when possible, add TODOs for the rest.
48
# TODO: This is not used anymore by HttpTransport_urllib
49
# (extracting the auth info and prompting the user for a password
50
# have been split), only the tests still use it. It should be
51
# deleted and the tests rewritten ASAP to stay in sync.
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
52
def extract_auth(url, password_manager):
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
53
    """Extract auth parameters from am HTTP/HTTPS url and add them to the given
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
54
    password manager.  Return the url, minus those auth parameters (which
55
    confuse urllib2).
56
    """
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
57
    if not re.match(r'^(https?)(\+\w+)?://', url):
58
        raise ValueError(
59
            'invalid absolute url %r' % (url,))
1540.2.1 by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary
60
    scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
2004.3.1 by vila
Test ConnectionError exceptions.
61
1540.2.1 by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary
62
    if '@' in netloc:
63
        auth, netloc = netloc.split('@', 1)
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
64
        if ':' in auth:
65
            username, password = auth.split(':', 1)
66
        else:
67
            username, password = auth, None
1540.2.1 by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary
68
        if ':' in netloc:
69
            host = netloc.split(':', 1)[0]
70
        else:
71
            host = netloc
72
        username = urllib.unquote(username)
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
73
        if password is not None:
74
            password = urllib.unquote(password)
1540.2.1 by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary
75
        else:
2094.3.6 by John Arbash Meinel
[merge] bzr.dev 2158
76
            password = ui.ui_factory.get_password(
5923.1.2 by Vincent Ladeuil
Fix some more prompts to be unicode.
77
                prompt=u'HTTP %(user)s@%(host)s password',
2004.2.1 by John Arbash Meinel
Cleanup of urllib functions
78
                user=username, host=host)
1540.2.1 by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary
79
        password_manager.add_password(None, host, username, password)
80
    url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
81
    return url
1553.1.5 by James Henstridge
Make HTTP transport has() method do HEAD requests, and update test to
82
1185.50.83 by John Arbash Meinel
[merge] James Henstridge: Set Agent string in http headers, add tests for it.
83
3734.2.3 by Vincent Ladeuil
Don't use multiple inheritance for http smart medium since we
84
class HttpTransportBase(ConnectedTransport):
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
85
    """Base class for http implementations.
86
1540.3.23 by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl
87
    Does URL parsing, etc, but not any network IO.
88
89
    The protocol can be given as e.g. http+urllib://host/ to use a particular
90
    implementation.
91
    """
92
2485.8.24 by Vincent Ladeuil
Finish http refactoring. Test suite passing.
93
    # _unqualified_scheme: "http" or "https"
94
    # _scheme: may have "+pycurl", etc
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
95
3878.4.2 by Vincent Ladeuil
Fix bug #265070 by providing a finer sieve for accepted redirections.
96
    def __init__(self, base, _impl_name, _from_transport=None):
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
97
        """Set the base path where files will be stored."""
1540.3.23 by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl
98
        proto_match = re.match(r'^(https?)(\+\w+)?://', base)
99
        if not proto_match:
100
            raise AssertionError("not a http url: %r" % base)
2485.8.24 by Vincent Ladeuil
Finish http refactoring. Test suite passing.
101
        self._unqualified_scheme = proto_match.group(1)
3878.4.2 by Vincent Ladeuil
Fix bug #265070 by providing a finer sieve for accepted redirections.
102
        self._impl_name = _impl_name
2485.8.59 by Vincent Ladeuil
Update from review comments.
103
        super(HttpTransportBase, self).__init__(base,
104
                                                _from_transport=_from_transport)
3734.3.2 by Vincent Ladeuil
Fix another SmartHTTPMedium refactoring bit.
105
        self._medium = None
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
106
        # range hint is handled dynamically throughout the life
2363.4.9 by Vincent Ladeuil
Catch first succesful authentification to avoid further 401
107
        # of the transport object. We start by trying multi-range
108
        # requests and if the server returns bogus results, we
109
        # retry with single range requests and, finally, we
110
        # forget about range if the server really can't
111
        # understand. Once acquired, this piece of info is
112
        # propagated to clones.
2485.8.59 by Vincent Ladeuil
Update from review comments.
113
        if _from_transport is not None:
114
            self._range_hint = _from_transport._range_hint
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
115
        else:
116
            self._range_hint = 'multi'
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
117
118
    def has(self, relpath):
1540.3.15 by Martin Pool
[merge] large merge to sync with bzr.dev
119
        raise NotImplementedError("has() is abstract on %r" % self)
120
2164.2.15 by Vincent Ladeuil
Http redirections are not followed by default. Do not use hints
121
    def get(self, relpath):
1594.2.5 by Robert Collins
Readv patch from Johan Rydberg giving knits partial download support.
122
        """Get the file at the given relative path.
123
124
        :param relpath: The relative path to the file
125
        """
3945.1.5 by Vincent Ladeuil
Start implementing http activity reporting at socket level.
126
        code, response_file = self._get(relpath, None)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
127
        # FIXME: some callers want an iterable... One step forward, three steps
3059.2.6 by Vincent Ladeuil
Light modifications after a failed attempt at making RangeFile iterable.
128
        # backwards :-/ And not only an iterable, but an iterable that can be
129
        # seeked backwards, so we will never be able to do that.  One such
130
        # known client is bzrlib.bundle.serializer.v4.get_bundle_reader. At the
131
        # time of this writing it's even the only known client -- vila20071203
3945.1.5 by Vincent Ladeuil
Start implementing http activity reporting at socket level.
132
        return StringIO(response_file.read())
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
133
2164.2.15 by Vincent Ladeuil
Http redirections are not followed by default. Do not use hints
134
    def _get(self, relpath, ranges, tail_amount=0):
1540.3.27 by Martin Pool
Integrate http range support for pycurl
135
        """Get a file, or part of a file.
136
137
        :param relpath: Path relative to transport base URL
2164.2.1 by v.ladeuil+lp at free
First rough http branch redirection implementation.
138
        :param ranges: None to get the whole file;
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
139
            or  a list of _CoalescedOffset to fetch parts of a file.
2164.2.26 by Vincent Ladeuil
Delete obsolete note in doc string.
140
        :param tail_amount: The amount to get from the end of the file.
1540.3.27 by Martin Pool
Integrate http range support for pycurl
141
142
        :returns: (http_code, result_file)
143
        """
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
144
        raise NotImplementedError(self._get)
1594.2.5 by Robert Collins
Readv patch from Johan Rydberg giving knits partial download support.
145
3133.1.2 by Vincent Ladeuil
Fix #177643 by making pycurl handle url-embedded credentials again.
146
    def _remote_path(self, relpath):
147
        """See ConnectedTransport._remote_path.
148
149
        user and passwords are not embedded in the path provided to the server.
150
        """
6055.2.19 by Jelmer Vernooij
Use clone
151
        path = self._parsed_url.clone(relpath).path
3133.1.2 by Vincent Ladeuil
Fix #177643 by making pycurl handle url-embedded credentials again.
152
        return self._unsplit_url(self._unqualified_scheme,
6055.2.1 by Jelmer Vernooij
Add UnparsedUrl.
153
                                 None, None, self._parsed_url.host,
154
                                 self._parsed_url.port, path)
3133.1.2 by Vincent Ladeuil
Fix #177643 by making pycurl handle url-embedded credentials again.
155
156
    def _create_auth(self):
4795.4.4 by Vincent Ladeuil
Protect more access to 'user' and 'password' auth attributes.
157
        """Returns a dict containing the credentials provided at build time."""
6055.2.1 by Jelmer Vernooij
Add UnparsedUrl.
158
        auth = dict(host=self._parsed_url.host, port=self._parsed_url.port,
159
                    user=self._parsed_url.user, password=self._parsed_url.password,
3133.1.2 by Vincent Ladeuil
Fix #177643 by making pycurl handle url-embedded credentials again.
160
                    protocol=self._unqualified_scheme,
6055.2.1 by Jelmer Vernooij
Add UnparsedUrl.
161
                    path=self._parsed_url.path)
3133.1.2 by Vincent Ladeuil
Fix #177643 by making pycurl handle url-embedded credentials again.
162
        return auth
163
2018.2.3 by Andrew Bennetts
Starting factoring out the smart server client "medium" from the protocol.
164
    def get_smart_medium(self):
3734.2.3 by Vincent Ladeuil
Don't use multiple inheritance for http smart medium since we
165
        """See Transport.get_smart_medium."""
166
        if self._medium is None:
167
            # Since medium holds some state (smart server probing at least), we
168
            # need to keep it around. Note that this is needed because medium
169
            # has the same 'base' attribute as the transport so it can't be
170
            # shared between transports having different bases.
171
            self._medium = SmartClientHTTPMedium(self)
172
        return self._medium
2018.2.3 by Andrew Bennetts
Starting factoring out the smart server client "medium" from the protocol.
173
2520.2.2 by Vincent Ladeuil
Fix #115209 by issuing a single range request on 400: Bad Request
174
    def _degrade_range_hint(self, relpath, ranges, exc_info):
2000.3.9 by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :)
175
        if self._range_hint == 'multi':
176
            self._range_hint = 'single'
2520.2.2 by Vincent Ladeuil
Fix #115209 by issuing a single range request on 400: Bad Request
177
            mutter('Retry "%s" with single range request' % relpath)
2000.3.9 by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :)
178
        elif self._range_hint == 'single':
179
            self._range_hint = None
2520.2.2 by Vincent Ladeuil
Fix #115209 by issuing a single range request on 400: Bad Request
180
            mutter('Retry "%s" without ranges' % relpath)
2000.3.9 by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :)
181
        else:
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
182
            # We tried all the tricks, but nothing worked. We re-raise the
183
            # original exception; the 'mutter' calls above will indicate that
184
            # further tries were unsuccessful
2172.3.1 by v.ladeuil+lp at free
Merge a recent bzr.dev (2172) and takes John's remarks into account.
185
            raise exc_info[0], exc_info[1], exc_info[2]
2000.3.9 by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :)
186
2520.2.2 by Vincent Ladeuil
Fix #115209 by issuing a single range request on 400: Bad Request
187
    # _coalesce_offsets is a helper for readv, it try to combine ranges without
188
    # degrading readv performances. _bytes_to_read_before_seek is the value
189
    # used for the limit parameter and has been tuned for other transports. For
190
    # HTTP, the name is inappropriate but the parameter is still useful and
191
    # helps reduce the number of chunks in the response. The overhead for a
192
    # chunk (headers, length, footer around the data itself is variable but
193
    # around 50 bytes. We use 128 to reduce the range specifiers that appear in
194
    # the header, some servers (notably Apache) enforce a maximum length for a
195
    # header and issue a '400: Bad request' error when too much ranges are
196
    # specified.
197
    _bytes_to_read_before_seek = 128
198
    # No limit on the offset number that get combined into one, we are trying
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
199
    # to avoid downloading the whole file.
3024.2.1 by Vincent Ladeuil
Fix 165061 by using the correct _max_readv_combine attribute.
200
    _max_readv_combine = 0
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
201
    # By default Apache has a limit of ~400 ranges before replying with a 400
202
    # Bad Request. So we go underneath that amount to be safe.
203
    _max_get_ranges = 200
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
204
    # We impose no limit on the range size. But see _pycurl.py for a different
205
    # use.
206
    _get_max_size = 0
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
207
2745.5.1 by Robert Collins
* New parameter on ``bzrlib.transport.Transport.readv``
208
    def _readv(self, relpath, offsets):
1594.2.5 by Robert Collins
Readv patch from Johan Rydberg giving knits partial download support.
209
        """Get parts of the file at the given relative path.
210
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
211
        :param offsets: A list of (offset, size) tuples.
1540.3.27 by Martin Pool
Integrate http range support for pycurl
212
        :param return: A list or generator of (offset, data) tuples
1594.2.5 by Robert Collins
Readv patch from Johan Rydberg giving knits partial download support.
213
        """
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
214
        # offsets may be a generator, we will iterate it several times, so
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
215
        # build a list
216
        offsets = list(offsets)
217
218
        try_again = True
3146.3.2 by Vincent Ladeuil
Fix #179368 by keeping the current range hint on ShortReadvErrors.
219
        retried_offset = None
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
220
        while try_again:
221
            try_again = False
222
223
            # Coalesce the offsets to minimize the GET requests issued
224
            sorted_offsets = sorted(offsets)
225
            coalesced = self._coalesce_offsets(
226
                sorted_offsets, limit=self._max_readv_combine,
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
227
                fudge_factor=self._bytes_to_read_before_seek,
228
                max_size=self._get_max_size)
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
229
230
            # Turn it into a list, we will iterate it several times
231
            coalesced = list(coalesced)
3675.1.1 by Martin Pool
Merge and update log+ transport decorator
232
            if 'http' in debug.debug_flags:
233
                mutter('http readv of %s  offsets => %s collapsed %s',
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
234
                    relpath, len(offsets), len(coalesced))
235
236
            # Cache the data read, but only until it's been used
237
            data_map = {}
238
            # We will iterate on the data received from the GET requests and
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
239
            # serve the corresponding offsets respecting the initial order. We
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
240
            # need an offset iterator for that.
241
            iter_offsets = iter(offsets)
242
            cur_offset_and_size = iter_offsets.next()
243
244
            try:
3059.2.10 by Vincent Ladeuil
Jam's review feedback.
245
                for cur_coal, rfile in self._coalesce_readv(relpath, coalesced):
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
246
                    # Split the received chunk
247
                    for offset, size in cur_coal.ranges:
3052.3.2 by Vincent Ladeuil
Add tests and fix trivial bugs and other typos.
248
                        start = cur_coal.start + offset
3059.2.10 by Vincent Ladeuil
Jam's review feedback.
249
                        rfile.seek(start, 0)
250
                        data = rfile.read(size)
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
251
                        data_len = len(data)
252
                        if data_len != size:
253
                            raise errors.ShortReadvError(relpath, start, size,
254
                                                         actual=data_len)
3059.2.5 by Vincent Ladeuil
DAMN^64, the http test server is 1.0 not 1.1 :( Better pipe cleaning and less readv caching (since that's the point of the whole fix).
255
                        if (start, size) == cur_offset_and_size:
256
                            # The offset requested are sorted as the coalesced
3059.2.11 by Vincent Ladeuil
Fix typos mentioned by spiv.
257
                            # ones, no need to cache. Win !
3059.2.5 by Vincent Ladeuil
DAMN^64, the http test server is 1.0 not 1.1 :( Better pipe cleaning and less readv caching (since that's the point of the whole fix).
258
                            yield cur_offset_and_size[0], data
259
                            cur_offset_and_size = iter_offsets.next()
260
                        else:
261
                            # Different sorting. We need to cache.
262
                            data_map[(start, size)] = data
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
263
264
                    # Yield everything we can
265
                    while cur_offset_and_size in data_map:
266
                        # Clean the cached data since we use it
267
                        # XXX: will break if offsets contains duplicates --
268
                        # vila20071129
269
                        this_data = data_map.pop(cur_offset_and_size)
270
                        yield cur_offset_and_size[0], this_data
271
                        cur_offset_and_size = iter_offsets.next()
272
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
273
            except (errors.ShortReadvError, errors.InvalidRange,
5609.52.1 by Martin Pool
Cope with buggy squids interrupting the response before a mime multipart boundary
274
                    errors.InvalidHttpRange, errors.HttpBoundaryMissing), e:
3146.3.2 by Vincent Ladeuil
Fix #179368 by keeping the current range hint on ShortReadvErrors.
275
                mutter('Exception %r: %s during http._readv',e, e)
276
                if (not isinstance(e, errors.ShortReadvError)
277
                    or retried_offset == cur_offset_and_size):
278
                    # We don't degrade the range hint for ShortReadvError since
279
                    # they do not indicate a problem with the server ability to
280
                    # handle ranges. Except when we fail to get back a required
281
                    # offset twice in a row. In that case, falling back to
282
                    # single range or whole file should help or end up in a
283
                    # fatal exception.
284
                    self._degrade_range_hint(relpath, coalesced, sys.exc_info())
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
285
                # Some offsets may have been already processed, so we retry
286
                # only the unsuccessful ones.
3052.3.2 by Vincent Ladeuil
Add tests and fix trivial bugs and other typos.
287
                offsets = [cur_offset_and_size] + [o for o in iter_offsets]
3146.3.2 by Vincent Ladeuil
Fix #179368 by keeping the current range hint on ShortReadvErrors.
288
                retried_offset = cur_offset_and_size
3052.3.2 by Vincent Ladeuil
Add tests and fix trivial bugs and other typos.
289
                try_again = True
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
290
291
    def _coalesce_readv(self, relpath, coalesced):
292
        """Issue several GET requests to satisfy the coalesced offsets"""
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
293
294
        def get_and_yield(relpath, coalesced):
295
            if coalesced:
296
                # Note that the _get below may raise
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
297
                # errors.InvalidHttpRange. It's the caller's responsibility to
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
298
                # decide how to retry since it may provide different coalesced
299
                # offsets.
300
                code, rfile = self._get(relpath, coalesced)
301
                for coal in coalesced:
302
                    yield coal, rfile
303
304
        if self._range_hint is None:
305
            # Download whole file
306
            for c, rfile in get_and_yield(relpath, coalesced):
307
                yield c, rfile
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
308
        else:
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
309
            total = len(coalesced)
310
            if self._range_hint == 'multi':
311
                max_ranges = self._max_get_ranges
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
312
            elif self._range_hint == 'single':
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
313
                max_ranges = total
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
314
            else:
315
                raise AssertionError("Unknown _range_hint %r"
316
                                     % (self._range_hint,))
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
317
            # TODO: Some web servers may ignore the range requests and return
318
            # the whole file, we may want to detect that and avoid further
319
            # requests.
320
            # Hint: test_readv_multiple_get_requests will fail once we do that
321
            cumul = 0
322
            ranges = []
323
            for coal in coalesced:
324
                if ((self._get_max_size > 0
325
                     and cumul + coal.length > self._get_max_size)
326
                    or len(ranges) >= max_ranges):
327
                    # Get that much and yield
328
                    for c, rfile in get_and_yield(relpath, ranges):
329
                        yield c, rfile
330
                    # Restart with the current offset
331
                    ranges = [coal]
332
                    cumul = coal.length
333
                else:
334
                    ranges.append(coal)
335
                    cumul += coal.length
336
            # Get the rest and yield
337
            for c, rfile in get_and_yield(relpath, ranges):
338
                yield c, rfile
1786.1.5 by John Arbash Meinel
Move the common Multipart stuff into plain http, and wrap pycurl response so that it matches the urllib response object.
339
2671.3.1 by Robert Collins
* New method ``bzrlib.transport.Transport.get_recommended_page_size``.
340
    def recommended_page_size(self):
341
        """See Transport.recommended_page_size().
342
343
        For HTTP we suggest a large page size to reduce the overhead
344
        introduced by latency.
345
        """
346
        return 64 * 1024
347
2018.2.10 by Andrew Bennetts
Tidy up TODOs, further testing and fixes for SmartServerRequestProtocolOne, and remove a read_bytes(1) call.
348
    def _post(self, body_bytes):
349
        """POST body_bytes to .bzr/smart on this transport.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
350
2018.2.10 by Andrew Bennetts
Tidy up TODOs, further testing and fixes for SmartServerRequestProtocolOne, and remove a read_bytes(1) call.
351
        :returns: (response code, response body file-like object).
352
        """
353
        # TODO: Requiring all the body_bytes to be available at the beginning of
354
        # the POST may require large client buffers.  It would be nice to have
355
        # an interface that allows streaming via POST when possible (and
356
        # degrades to a local buffer when not).
357
        raise NotImplementedError(self._post)
358
1955.3.6 by John Arbash Meinel
Lots of deprecation warnings, but no errors
359
    def put_file(self, relpath, f, mode=None):
360
        """Copy the file-like object into the location.
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
361
362
        :param relpath: Location to put the contents, relative to base.
1955.3.6 by John Arbash Meinel
Lots of deprecation warnings, but no errors
363
        :param f:       File-like object.
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
364
        """
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
365
        raise errors.TransportNotPossible('http PUT not supported')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
366
1185.58.2 by John Arbash Meinel
Added mode to the appropriate transport functions, and tests to make sure they work.
367
    def mkdir(self, relpath, mode=None):
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
368
        """Create a directory at the given path."""
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
369
        raise errors.TransportNotPossible('http does not support mkdir()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
370
1534.4.15 by Robert Collins
Remove shutil dependency in upgrade - create a delete_tree method for transports.
371
    def rmdir(self, relpath):
372
        """See Transport.rmdir."""
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
373
        raise errors.TransportNotPossible('http does not support rmdir()')
1534.4.15 by Robert Collins
Remove shutil dependency in upgrade - create a delete_tree method for transports.
374
1955.3.15 by John Arbash Meinel
Deprecate 'Transport.append' in favor of Transport.append_file or Transport.append_bytes
375
    def append_file(self, relpath, f, mode=None):
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
376
        """Append the text in the file-like object into the final
377
        location.
378
        """
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
379
        raise errors.TransportNotPossible('http does not support append()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
380
381
    def copy(self, rel_from, rel_to):
382
        """Copy the item at rel_from to the location at rel_to"""
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
383
        raise errors.TransportNotPossible('http does not support copy()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
384
1185.58.2 by John Arbash Meinel
Added mode to the appropriate transport functions, and tests to make sure they work.
385
    def copy_to(self, relpaths, other, mode=None, pb=None):
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
386
        """Copy a set of entries from self into another Transport.
387
388
        :param relpaths: A list/generator of entries to be copied.
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
389
390
        TODO: if other is LocalTransport, is it possible to
391
              do better than put(get())?
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
392
        """
907.1.29 by John Arbash Meinel
Fixing small bug in HttpTransport.copy_to
393
        # At this point HttpTransport might be able to check and see if
394
        # the remote location is the same, and rather than download, and
395
        # then upload, it could just issue a remote copy_this command.
1540.3.6 by Martin Pool
[merge] update from bzr.dev
396
        if isinstance(other, HttpTransportBase):
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
397
            raise errors.TransportNotPossible(
398
                'http cannot be the target of copy_to()')
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
399
        else:
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
400
            return super(HttpTransportBase, self).\
401
                    copy_to(relpaths, other, mode=mode, pb=pb)
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
402
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
403
    def move(self, rel_from, rel_to):
404
        """Move the item at rel_from to the location at rel_to"""
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
405
        raise errors.TransportNotPossible('http does not support move()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
406
407
    def delete(self, relpath):
408
        """Delete the item at relpath"""
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
409
        raise errors.TransportNotPossible('http does not support delete()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
410
2634.1.1 by Robert Collins
(robertc) Reinstate the accidentally backed out external_url patch.
411
    def external_url(self):
412
        """See bzrlib.transport.Transport.external_url."""
3878.4.6 by Vincent Ladeuil
Fix bug #270863 by preserving 'bzr+http[s]' decorator.
413
        # HTTP URL's are externally usable as long as they don't mention their
414
        # implementation qualifier
415
        return self._unsplit_url(self._unqualified_scheme,
6055.2.1 by Jelmer Vernooij
Add UnparsedUrl.
416
                                 self._parsed_url.user, self._parsed_url.password,
417
                                 self._parsed_url.host, self._parsed_url.port,
418
                                 self._parsed_url.path)
2634.1.1 by Robert Collins
(robertc) Reinstate the accidentally backed out external_url patch.
419
1530.1.3 by Robert Collins
transport implementations now tested consistently.
420
    def is_readonly(self):
421
        """See Transport.is_readonly."""
422
        return True
423
1400.1.1 by Robert Collins
implement a basic test for the ui branch command from http servers
424
    def listable(self):
425
        """See Transport.listable."""
426
        return False
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
427
428
    def stat(self, relpath):
429
        """Return the stat information for a file.
430
        """
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
431
        raise errors.TransportNotPossible('http does not support stat()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
432
907.1.24 by John Arbash Meinel
Remote functionality work.
433
    def lock_read(self, relpath):
434
        """Lock the given file for shared (read) access.
435
        :return: A lock object, which should be passed to Transport.unlock()
436
        """
437
        # The old RemoteBranch ignore lock for reading, so we will
438
        # continue that tradition and return a bogus lock object.
439
        class BogusLock(object):
440
            def __init__(self, path):
441
                self.path = path
442
            def unlock(self):
443
                pass
444
        return BogusLock(relpath)
445
446
    def lock_write(self, relpath):
447
        """Lock the given file for exclusive (write) access.
448
        WARNING: many transports do not support this, so trying avoid using it
449
450
        :return: A lock object, which should be passed to Transport.unlock()
451
        """
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
452
        raise errors.TransportNotPossible('http does not support lock_write()')
1530.1.1 by Robert Collins
Minimal infrastructure to test TransportTestProviderAdapter.
453
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
454
    def _attempted_range_header(self, offsets, tail_amount):
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
455
        """Prepare a HTTP Range header at a level the server should accept.
456
457
        :return: the range header representing offsets/tail_amount or None if
458
            no header can be built.
459
        """
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
460
461
        if self._range_hint == 'multi':
3024.2.3 by Vincent Ladeuil
Rewrite http_readv to allow several GET requests. Smoke tested against branch reported in the bug.
462
            # Generate the header describing all offsets
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
463
            return self._range_header(offsets, tail_amount)
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
464
        elif self._range_hint == 'single':
465
            # Combine all the requested ranges into a single
466
            # encompassing one
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
467
            if len(offsets) > 0:
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
468
                if tail_amount not in (0, None):
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
469
                    # Nothing we can do here to combine ranges with tail_amount
470
                    # in a single range, just returns None. The whole file
471
                    # should be downloaded.
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
472
                    return None
473
                else:
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
474
                    start = offsets[0].start
475
                    last = offsets[-1]
476
                    end = last.start + last.length - 1
477
                    whole = self._coalesce_offsets([(start, end - start + 1)],
478
                                                   limit=0, fudge_factor=0)
479
                    return self._range_header(list(whole), 0)
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
480
            else:
481
                # Only tail_amount, requested, leave range_header
482
                # do its work
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
483
                return self._range_header(offsets, tail_amount)
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
484
        else:
485
            return None
486
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
487
    @staticmethod
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
488
    def _range_header(ranges, tail_amount):
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
489
        """Turn a list of bytes ranges into a HTTP Range header value.
490
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
491
        :param ranges: A list of _CoalescedOffset
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
492
        :param tail_amount: The amount to get from the end of the file.
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
493
494
        :return: HTTP range header string.
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
495
496
        At least a non-empty ranges *or* a tail_amount must be
497
        provided.
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
498
        """
499
        strings = []
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
500
        for offset in ranges:
501
            strings.append('%d-%d' % (offset.start,
502
                                      offset.start + offset.length - 1))
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
503
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
504
        if tail_amount:
505
            strings.append('-%d' % tail_amount)
506
1786.1.36 by John Arbash Meinel
pycurl expects us to just set the range of bytes, not including bytes=
507
        return ','.join(strings)
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
508
3878.4.5 by Vincent Ladeuil
Don't use the exception as a parameter for _redirected_to.
509
    def _redirected_to(self, source, target):
3878.4.1 by Vincent Ladeuil
Fix bug #245964 by preserving decorators during redirections (when
510
        """Returns a transport suitable to re-issue a redirected request.
511
3878.4.5 by Vincent Ladeuil
Don't use the exception as a parameter for _redirected_to.
512
        :param source: The source url as returned by the server.
513
        :param target: The target url as returned by the server.
3878.4.1 by Vincent Ladeuil
Fix bug #245964 by preserving decorators during redirections (when
514
515
        The redirection can be handled only if the relpath involved is not
516
        renamed by the redirection.
517
518
        :returns: A transport or None.
519
        """
3878.4.2 by Vincent Ladeuil
Fix bug #265070 by providing a finer sieve for accepted redirections.
520
        def relpath(abspath):
521
            """Returns the path relative to our base.
522
523
            The constraints are weaker than the real relpath method because the
524
            abspath is coming from the server and may slightly differ from our
525
            base. We don't check the scheme, host, port, user, password parts,
526
            relying on the caller to give us a proper url (i.e. one returned by
527
            the server mirroring the one we sent).
528
            """
6055.2.1 by Jelmer Vernooij
Add UnparsedUrl.
529
            parsed_url = self._split_url(abspath)
530
            pl = len(self._parsed_url.path)
531
            return parsed_url.path[pl:].strip('/')
3878.4.2 by Vincent Ladeuil
Fix bug #265070 by providing a finer sieve for accepted redirections.
532
3878.4.5 by Vincent Ladeuil
Don't use the exception as a parameter for _redirected_to.
533
        relpath = relpath(source)
534
        if not target.endswith(relpath):
3878.4.1 by Vincent Ladeuil
Fix bug #245964 by preserving decorators during redirections (when
535
            # The final part of the url has been renamed, we can't handle the
536
            # redirection.
537
            return None
538
        new_transport = None
6055.2.1 by Jelmer Vernooij
Add UnparsedUrl.
539
        parsed_url = self._split_url(target)
3878.4.1 by Vincent Ladeuil
Fix bug #245964 by preserving decorators during redirections (when
540
        # Recalculate base path. This is needed to ensure that when the
541
        # redirected tranport will be used to re-try whatever request was
542
        # redirected, we end up with the same url
6055.2.1 by Jelmer Vernooij
Add UnparsedUrl.
543
        base_path = parsed_url.path[:-len(relpath)]
544
        if parsed_url.scheme in ('http', 'https'):
3878.4.7 by Vincent Ladeuil
Fixed as per Robert's review.
545
            # Same protocol family (i.e. http[s]), we will preserve the same
546
            # http client implementation when a redirection occurs from one to
547
            # the other (otherwise users may be surprised that bzr switches
548
            # from one implementation to the other, and devs may suffer
549
            # debugging it).
6055.2.1 by Jelmer Vernooij
Add UnparsedUrl.
550
            if (parsed_url.scheme == self._unqualified_scheme
551
                and parsed_url.host == self._parsed_url.host
552
                and parsed_url.port == self._parsed_url.port
553
                and (parsed_url.user is None or
554
                     parsed_url.user == self._parsed_url.user)):
3878.4.1 by Vincent Ladeuil
Fix bug #245964 by preserving decorators during redirections (when
555
                # If a user is specified, it should match, we don't care about
556
                # passwords, wrong passwords will be rejected anyway.
557
                new_transport = self.clone(base_path)
558
            else:
3878.4.2 by Vincent Ladeuil
Fix bug #265070 by providing a finer sieve for accepted redirections.
559
                # Rebuild the url preserving the scheme qualification and the
560
                # credentials (if they don't apply, the redirected to server
561
                # will tell us, but if they do apply, we avoid prompting the
562
                # user)
6055.2.1 by Jelmer Vernooij
Add UnparsedUrl.
563
                redir_scheme = parsed_url.scheme + '+' + self._impl_name
3878.4.2 by Vincent Ladeuil
Fix bug #265070 by providing a finer sieve for accepted redirections.
564
                new_url = self._unsplit_url(redir_scheme,
6055.2.1 by Jelmer Vernooij
Add UnparsedUrl.
565
                                            self._parsed_url.user,
566
                                            self._parsed_url.password,
567
                                            parsed_url.host, parsed_url.port,
3878.4.1 by Vincent Ladeuil
Fix bug #245964 by preserving decorators during redirections (when
568
                                            base_path)
5609.9.1 by Martin
Blindly change all users of get_transport to address the function via the transport module
569
                new_transport = transport.get_transport(new_url)
3878.4.1 by Vincent Ladeuil
Fix bug #245964 by preserving decorators during redirections (when
570
        else:
571
            # Redirected to a different protocol
6055.2.1 by Jelmer Vernooij
Add UnparsedUrl.
572
            new_url = self._unsplit_url(parsed_url.scheme,
573
                                        parsed_url.user, parsed_url.password,
574
                                        parsed_url.host, parsed_url.port,
3878.4.1 by Vincent Ladeuil
Fix bug #245964 by preserving decorators during redirections (when
575
                                        base_path)
5609.9.1 by Martin
Blindly change all users of get_transport to address the function via the transport module
576
            new_transport = transport.get_transport(new_url)
3878.4.1 by Vincent Ladeuil
Fix bug #245964 by preserving decorators during redirections (when
577
        return new_transport
578
3734.2.3 by Vincent Ladeuil
Don't use multiple inheritance for http smart medium since we
579
580
# TODO: May be better located in smart/medium.py with the other
581
# SmartMedium classes
582
class SmartClientHTTPMedium(medium.SmartClientMedium):
583
584
    def __init__(self, http_transport):
585
        super(SmartClientHTTPMedium, self).__init__(http_transport.base)
586
        # We don't want to create a circular reference between the http
587
        # transport and its associated medium. Since the transport will live
588
        # longer than the medium, the medium keep only a weak reference to its
589
        # transport.
590
        self._http_transport_ref = weakref.ref(http_transport)
591
592
    def get_request(self):
593
        return SmartClientHTTPMediumRequest(self)
2018.2.8 by Andrew Bennetts
Make HttpTransportBase.get_smart_client return self again.
594
3245.4.47 by Andrew Bennetts
Don't automatically send 'hello' requests from RemoteBzrDirFormat.probe_transport unless we have to (i.e. the transport is HTTP).
595
    def should_probe(self):
596
        return True
597
3431.3.11 by Andrew Bennetts
Push remote_path_from_transport logic into SmartClientMedium, removing special-casing of bzr+http from _SmartClient.
598
    def remote_path_from_transport(self, transport):
599
        # Strip the optional 'bzr+' prefix from transport so it will have the
600
        # same scheme as self.
601
        transport_base = transport.base
602
        if transport_base.startswith('bzr+'):
603
            transport_base = transport_base[4:]
604
        rel_url = urlutils.relative_url(self.base, transport_base)
605
        return urllib.unquote(rel_url)
606
3734.2.3 by Vincent Ladeuil
Don't use multiple inheritance for http smart medium since we
607
    def send_http_smart_request(self, bytes):
608
        try:
609
            # Get back the http_transport hold by the weak reference
610
            t = self._http_transport_ref()
611
            code, body_filelike = t._post(bytes)
612
            if code != 200:
613
                raise InvalidHttpResponse(
614
                    t._remote_path('.bzr/smart'),
615
                    'Expected 200 response code, got %r' % (code,))
4628.1.2 by Vincent Ladeuil
More complete fix.
616
        except (errors.InvalidHttpResponse, errors.ConnectionReset), e:
3734.2.3 by Vincent Ladeuil
Don't use multiple inheritance for http smart medium since we
617
            raise errors.SmartProtocolError(str(e))
618
        return body_filelike
619
3958.1.1 by Andrew Bennetts
Report traffic on smart media as transport activity.
620
    def _report_activity(self, bytes, direction):
621
        """See SmartMedium._report_activity.
622
623
        Does nothing; the underlying plain HTTP transport will report the
624
        activity that this medium would report.
625
        """
626
        pass
627
5247.2.12 by Vincent Ladeuil
Ensure that all transports close their underlying connection.
628
    def disconnect(self):
629
        """See SmartClientMedium.disconnect()."""
630
        t = self._http_transport_ref()
631
        t.disconnect()
632
3734.2.3 by Vincent Ladeuil
Don't use multiple inheritance for http smart medium since we
633
634
# TODO: May be better located in smart/medium.py with the other
635
# SmartMediumRequest classes
2018.5.2 by Andrew Bennetts
Start splitting bzrlib/transport/smart.py into a package.
636
class SmartClientHTTPMediumRequest(medium.SmartClientMediumRequest):
2018.2.8 by Andrew Bennetts
Make HttpTransportBase.get_smart_client return self again.
637
    """A SmartClientMediumRequest that works with an HTTP medium."""
638
2018.5.2 by Andrew Bennetts
Start splitting bzrlib/transport/smart.py into a package.
639
    def __init__(self, client_medium):
640
        medium.SmartClientMediumRequest.__init__(self, client_medium)
2018.2.8 by Andrew Bennetts
Make HttpTransportBase.get_smart_client return self again.
641
        self._buffer = ''
642
643
    def _accept_bytes(self, bytes):
644
        self._buffer += bytes
645
646
    def _finished_writing(self):
647
        data = self._medium.send_http_smart_request(self._buffer)
648
        self._response_body = data
649
650
    def _read_bytes(self, count):
3565.1.2 by Andrew Bennetts
Delete some more code, fix some bugs, add more comments.
651
        """See SmartClientMediumRequest._read_bytes."""
2018.2.8 by Andrew Bennetts
Make HttpTransportBase.get_smart_client return self again.
652
        return self._response_body.read(count)
2004.1.28 by v.ladeuil+lp at free
Merge bzr.dev. Including http modifications by "smart" related code
653
3606.4.1 by Andrew Bennetts
Fix NotImplementedError when probing for smart protocol via HTTP.
654
    def _read_line(self):
655
        line, excess = medium._get_line(self._response_body.read)
656
        if excess != '':
657
            raise AssertionError(
658
                '_get_line returned excess bytes, but this mediumrequest '
659
                'cannot handle excess. (%r)' % (excess,))
660
        return line
661
2018.2.8 by Andrew Bennetts
Make HttpTransportBase.get_smart_client return self again.
662
    def _finished_reading(self):
663
        """See SmartClientMediumRequest._finished_reading."""
664
        pass
4912.2.1 by Martin Pool
Add unhtml_roughly
665
666
4912.2.4 by Martin Pool
Add test for unhtml_roughly, and truncate at 1000 bytes
667
def unhtml_roughly(maybe_html, length_limit=1000):
4912.2.1 by Martin Pool
Add unhtml_roughly
668
    """Very approximate html->text translation, for presenting error bodies.
669
4912.2.4 by Martin Pool
Add test for unhtml_roughly, and truncate at 1000 bytes
670
    :param length_limit: Truncate the result to this many characters.
671
4912.2.1 by Martin Pool
Add unhtml_roughly
672
    >>> unhtml_roughly("<b>bad</b> things happened\\n")
673
    ' bad  things happened '
674
    """
4912.2.4 by Martin Pool
Add test for unhtml_roughly, and truncate at 1000 bytes
675
    return re.subn(r"(<[^>]*>|\n|&nbsp;)", " ", maybe_html)[0][:length_limit]