~bzr-pqm/bzr/bzr.dev

5247.1.4 by Vincent Ladeuil
Merge cleanup into first-try
1
# Copyright (C) 2006-2010 Canonical Ltd
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
2
#
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
7
#
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
12
#
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
16
17
"""http/https transport using pycurl"""
18
19
# TODO: test reporting of http errors
1887.1.1 by Adeodato Simó
Do not separate paragraphs in the copyright statement with blank lines,
20
#
1616.1.9 by Martin Pool
Set Cache-control: max-age=0 and Pragma: no-cache
21
# TODO: Transport option to control caching of particular requests; broadly we
22
# would want to offer "caching allowed" or "must revalidate", depending on
23
# whether we expect a particular file will be modified after it's committed.
24
# It's probably safer to just always revalidate.  mbp 20060321
25
2164.2.16 by Vincent Ladeuil
Add tests.
26
# TODO: Some refactoring could be done to avoid the strange idiom
27
# used to capture data and headers while setting up the request
28
# (and having to pass 'header' to _curl_perform to handle
29
# redirections) . This could be achieved by creating a
30
# specialized Curl object and returning code, headers and data
31
# from _curl_perform.  Not done because we may deprecate pycurl in the
32
# future -- vila 20070212
33
1786.1.42 by John Arbash Meinel
Update _extract_headers, make it less generic, and non recursive.
34
from cStringIO import StringIO
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
35
import httplib
1540.3.5 by Martin Pool
Raise exception if unicode is passed to transport; formatting fixes
36
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
37
from bzrlib import (
3052.3.3 by Vincent Ladeuil
Add -Dhttp support.
38
    debug,
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
39
    errors,
5957.2.3 by Vincent Ladeuil
Mask credentials in the -Dhttp logging
40
    trace,
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
41
    )
1540.3.15 by Martin Pool
[merge] large merge to sync with bzr.dev
42
import bzrlib
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
43
from bzrlib.transport.http import (
2298.5.1 by Alexander Belchenko
Bugfix #82086: Searching location of CA bundle for PyCurl in env variable (CURL_CA_BUNDLE), and on win32 along the PATH
44
    ca_bundle,
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
45
    HttpTransportBase,
46
    response,
4912.2.2 by Martin Pool
Include rough unhtml in pycurl error messages
47
    unhtml_roughly,
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
48
    )
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
49
1540.3.7 by Martin Pool
Prepare to select a transport depending on what dependencies can be satisfied.
50
try:
51
    import pycurl
52
except ImportError, e:
5957.2.3 by Vincent Ladeuil
Mask credentials in the -Dhttp logging
53
    trace.mutter("failed to import pycurl: %s", e)
3052.3.3 by Vincent Ladeuil
Add -Dhttp support.
54
    raise errors.DependencyNotPresent('pycurl', e)
1540.3.7 by Martin Pool
Prepare to select a transport depending on what dependencies can be satisfied.
55
1684.1.5 by Martin Pool
(patch) check that pycurl will actuall initialize as well as load (Alexander)
56
try:
57
    # see if we can actually initialize PyCurl - sometimes it will load but
58
    # fail to start up due to this bug:
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
59
    #
1684.1.5 by Martin Pool
(patch) check that pycurl will actuall initialize as well as load (Alexander)
60
    #   32. (At least on Windows) If libcurl is built with c-ares and there's
61
    #   no DNS server configured in the system, the ares_init() call fails and
62
    #   thus curl_easy_init() fails as well. This causes weird effects for
63
    #   people who use numerical IP addresses only.
64
    #
65
    # reported by Alexander Belchenko, 2006-04-26
66
    pycurl.Curl()
67
except pycurl.error, e:
5957.2.3 by Vincent Ladeuil
Mask credentials in the -Dhttp logging
68
    trace.mutter("failed to initialize pycurl: %s", e)
3052.3.3 by Vincent Ladeuil
Add -Dhttp support.
69
    raise errors.DependencyNotPresent('pycurl', e)
1684.1.5 by Martin Pool
(patch) check that pycurl will actuall initialize as well as load (Alexander)
70
1540.3.7 by Martin Pool
Prepare to select a transport depending on what dependencies can be satisfied.
71
2872.6.1 by Vincent Ladeuil
Fix bug #147530 by enabling more robust error code definitions.
72
73
74
def _get_pycurl_errcode(symbol, default):
75
    """
76
    Returns the numerical error code for a symbol defined by pycurl.
77
78
    Different pycurl implementations define different symbols for error
79
    codes. Old versions never define some symbols (wether they can return the
80
    corresponding error code or not). The following addresses the problem by
81
    defining the symbols we care about.  Note: this allows to define symbols
82
    for errors that older versions will never return, which is fine.
83
    """
84
    return pycurl.__dict__.get(symbol, default)
85
86
CURLE_COULDNT_CONNECT = _get_pycurl_errcode('E_COULDNT_CONNECT', 7)
87
CURLE_COULDNT_RESOLVE_HOST = _get_pycurl_errcode('E_COULDNT_RESOLVE_HOST', 6)
88
CURLE_COULDNT_RESOLVE_PROXY = _get_pycurl_errcode('E_COULDNT_RESOLVE_PROXY', 5)
89
CURLE_GOT_NOTHING = _get_pycurl_errcode('E_GOT_NOTHING', 52)
90
CURLE_PARTIAL_FILE = _get_pycurl_errcode('E_PARTIAL_FILE', 18)
3651.1.1 by Vincent Ladeuil
Fix bug #225020 by catching CURLE_SEND_ERROR error.
91
CURLE_SEND_ERROR = _get_pycurl_errcode('E_SEND_ERROR', 55)
4628.1.1 by Vincent Ladeuil
Fix test failure by catching an error raised by newer pycurls.
92
CURLE_RECV_ERROR = _get_pycurl_errcode('E_RECV_ERROR', 56)
2929.3.19 by Vincent Ladeuil
Fix 1.1 related bugs in HTTP server, add HTTPS passing tests (by temporarily disabling pycurl certificate verification).
93
CURLE_SSL_CACERT = _get_pycurl_errcode('E_SSL_CACERT', 60)
94
CURLE_SSL_CACERT_BADFILE = _get_pycurl_errcode('E_SSL_CACERT_BADFILE', 77)
2872.6.1 by Vincent Ladeuil
Fix bug #147530 by enabling more robust error code definitions.
95
96
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
97
class PyCurlTransport(HttpTransportBase):
1540.3.3 by Martin Pool
Review updates of pycurl transport
98
    """http client transport using pycurl
99
100
    PyCurl is a Python binding to the C "curl" multiprotocol client.
101
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
102
    This transport can be significantly faster than the builtin
103
    Python client.  Advantages include: DNS caching.
1540.3.3 by Martin Pool
Review updates of pycurl transport
104
    """
105
2485.8.59 by Vincent Ladeuil
Update from review comments.
106
    def __init__(self, base, _from_transport=None):
3878.4.6 by Vincent Ladeuil
Fix bug #270863 by preserving 'bzr+http[s]' decorator.
107
        super(PyCurlTransport, self).__init__(base, 'pycurl',
108
                                              _from_transport=_from_transport)
3878.4.2 by Vincent Ladeuil
Fix bug #265070 by providing a finer sieve for accepted redirections.
109
        if self._unqualified_scheme == 'https':
2294.3.1 by Vincent Ladeuil
Fix #85305 by issuing an exception instead of a traceback.
110
            # Check availability of https into pycurl supported
111
            # protocols
112
            supported = pycurl.version_info()[8]
113
            if 'https' not in supported:
3052.3.3 by Vincent Ladeuil
Add -Dhttp support.
114
                raise errors.DependencyNotPresent('pycurl', 'no https support')
2298.5.1 by Alexander Belchenko
Bugfix #82086: Searching location of CA bundle for PyCurl in env variable (CURL_CA_BUNDLE), and on win32 along the PATH
115
        self.cabundle = ca_bundle.get_ca_path()
2485.8.41 by Vincent Ladeuil
Finish http refactoring. Test suite passing.
116
117
    def _get_curl(self):
118
        connection = self._get_connection()
119
        if connection is None:
120
            # First connection ever. There is no credentials for pycurl, either
121
            # the password was embedded in the URL or it's not needed. The
122
            # connection for pycurl is just the Curl object, it will not
2485.8.43 by Vincent Ladeuil
Cleaning.
123
            # connect to the http server until the first request (which had
124
            # just called us).
2485.8.41 by Vincent Ladeuil
Finish http refactoring. Test suite passing.
125
            connection = pycurl.Curl()
3133.1.2 by Vincent Ladeuil
Fix #177643 by making pycurl handle url-embedded credentials again.
126
            # First request, initialize credentials.
127
            auth = self._create_auth()
128
            # Proxy handling is out of reach, so we punt
129
            self._set_connection(connection, auth)
2485.8.41 by Vincent Ladeuil
Finish http refactoring. Test suite passing.
130
        return connection
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
131
5247.2.12 by Vincent Ladeuil
Ensure that all transports close their underlying connection.
132
    def disconnect(self):
133
        connection = self._get_connection()
134
        if connection is not None:
135
            connection.close()
136
1540.3.3 by Martin Pool
Review updates of pycurl transport
137
    def has(self, relpath):
1786.1.32 by John Arbash Meinel
cleanup pass, allow pycurl connections to be shared between transports.
138
        """See Transport.has()"""
139
        # We set NO BODY=0 in _get_full, so it should be safe
140
        # to re-use the non-range curl object
2485.8.41 by Vincent Ladeuil
Finish http refactoring. Test suite passing.
141
        curl = self._get_curl()
2485.8.25 by Vincent Ladeuil
Separate abspath from _remote_path, the intents are different.
142
        abspath = self._remote_path(relpath)
1540.3.14 by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object.
143
        curl.setopt(pycurl.URL, abspath)
144
        self._set_curl_options(curl)
2018.2.28 by Andrew Bennetts
Changes in response to review: re-use _base_curl, rather than keeping a seperate _post_curl object; add docstring to test_http.RecordingServer, set is_user_error on some new exceptions.
145
        curl.setopt(pycurl.HTTPGET, 1)
1540.3.3 by Martin Pool
Review updates of pycurl transport
146
        # don't want the body - ie just do a HEAD request
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
147
        # This means "NO BODY" not 'nobody'
1540.3.14 by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object.
148
        curl.setopt(pycurl.NOBODY, 1)
2164.2.16 by Vincent Ladeuil
Add tests.
149
        # But we need headers to handle redirections
150
        header = StringIO()
151
        curl.setopt(pycurl.HEADERFUNCTION, header.write)
2004.1.16 by v.ladeuil+lp at free
Add tests against erroneous http status lines.
152
        # In some erroneous cases, pycurl will emit text on
153
        # stdout if we don't catch it (see InvalidStatus tests
154
        # for one such occurrence).
155
        blackhole = StringIO()
156
        curl.setopt(pycurl.WRITEFUNCTION, blackhole.write)
2164.2.16 by Vincent Ladeuil
Add tests.
157
        self._curl_perform(curl, header)
1540.3.14 by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object.
158
        code = curl.getinfo(pycurl.HTTP_CODE)
159
        if code == 404: # not found
160
            return False
2164.2.16 by Vincent Ladeuil
Add tests.
161
        elif code == 200: # "ok"
1540.3.14 by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object.
162
            return True
163
        else:
1612.1.1 by Martin Pool
Raise errors correctly on pycurl connection failure
164
            self._raise_curl_http_error(curl)
2000.3.1 by v.ladeuil+lp at free
Better connection sharing by using only one curl object.
165
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
166
    def _get(self, relpath, offsets, tail_amount=0):
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
167
        # This just switches based on the type of request
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
168
        if offsets is not None or tail_amount not in (0, None):
169
            return self._get_ranged(relpath, offsets, tail_amount=tail_amount)
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
170
        else:
2164.2.5 by v.ladeuil+lp at free
Simpler implementation using inspect. 'hints' is a kwargs.
171
            return self._get_full(relpath)
2000.3.1 by v.ladeuil+lp at free
Better connection sharing by using only one curl object.
172
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
173
    def _setup_get_request(self, curl, relpath):
2018.2.6 by Andrew Bennetts
HTTP client starting to work (pycurl for the moment).
174
        # Make sure we do a GET request. versions > 7.14.1 also set the
175
        # NO BODY flag, but we'll do it ourselves in case it is an older
176
        # pycurl version
177
        curl.setopt(pycurl.NOBODY, 0)
178
        curl.setopt(pycurl.HTTPGET, 1)
179
        return self._setup_request(curl, relpath)
180
181
    def _setup_request(self, curl, relpath):
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
182
        """Do the common setup stuff for making a request
183
184
        :param curl: The curl object to place the request on
185
        :param relpath: The relative path that we want to get
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
186
        :return: (abspath, data, header)
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
187
                 abspath: full url
188
                 data: file that will be filled with the body
189
                 header: file that will be filled with the headers
190
        """
2485.8.25 by Vincent Ladeuil
Separate abspath from _remote_path, the intents are different.
191
        abspath = self._remote_path(relpath)
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
192
        curl.setopt(pycurl.URL, abspath)
193
        self._set_curl_options(curl)
194
195
        data = StringIO()
196
        header = StringIO()
197
        curl.setopt(pycurl.WRITEFUNCTION, data.write)
198
        curl.setopt(pycurl.HEADERFUNCTION, header.write)
199
200
        return abspath, data, header
201
2164.2.5 by v.ladeuil+lp at free
Simpler implementation using inspect. 'hints' is a kwargs.
202
    def _get_full(self, relpath):
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
203
        """Make a request for the entire file"""
2485.8.41 by Vincent Ladeuil
Finish http refactoring. Test suite passing.
204
        curl = self._get_curl()
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
205
        abspath, data, header = self._setup_get_request(curl, relpath)
2164.2.16 by Vincent Ladeuil
Add tests.
206
        self._curl_perform(curl, header)
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
207
208
        code = curl.getinfo(pycurl.HTTP_CODE)
209
        data.seek(0)
210
211
        if code == 404:
3052.3.3 by Vincent Ladeuil
Add -Dhttp support.
212
            raise errors.NoSuchFile(abspath)
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
213
        if code != 200:
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
214
            self._raise_curl_http_error(
215
                curl, 'expected 200 or 404 for full response.')
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
216
217
        return code, data
218
3059.2.11 by Vincent Ladeuil
Fix typos mentioned by spiv.
219
    # The parent class use 0 to minimize the requests, but since we can't
3059.2.7 by Vincent Ladeuil
Allow pycurl users to watch the blinkenlights and fix a bug when ranges are contiguous.
220
    # exploit the results as soon as they are received (pycurl limitation) we'd
3945.1.6 by Vincent Ladeuil
Fix debug handling for pycurl and implement pycurl http activity
221
    # better issue more requests and provide a more responsive UI incurring
222
    # more latency costs.
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
223
    # If you modify this, think about modifying the comment in http/__init__.py
3059.2.7 by Vincent Ladeuil
Allow pycurl users to watch the blinkenlights and fix a bug when ranges are contiguous.
224
    # too.
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
225
    _get_max_size = 4 * 1024 * 1024
3059.2.7 by Vincent Ladeuil
Allow pycurl users to watch the blinkenlights and fix a bug when ranges are contiguous.
226
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
227
    def _get_ranged(self, relpath, offsets, tail_amount):
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
228
        """Make a request for just part of the file."""
2485.8.41 by Vincent Ladeuil
Finish http refactoring. Test suite passing.
229
        curl = self._get_curl()
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
230
        abspath, data, header = self._setup_get_request(curl, relpath)
231
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
232
        range_header = self._attempted_range_header(offsets, tail_amount)
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
233
        if range_header is None:
234
            # Forget ranges, the server can't handle them
235
            return self._get_full(relpath)
236
2481.3.1 by Vincent Ladeuil
Fix bug #112719 by using the right range header.
237
        self._curl_perform(curl, header, ['Range: bytes=%s' % range_header])
1786.1.33 by John Arbash Meinel
Cleanup pass #2
238
        data.seek(0)
239
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
240
        code = curl.getinfo(pycurl.HTTP_CODE)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
241
242
        if code == 404: # not found
243
            raise errors.NoSuchFile(abspath)
244
        elif code in (400, 416):
245
            # We don't know which, but one of the ranges we specified was
246
            # wrong.
247
            raise errors.InvalidHttpRange(abspath, range_header,
248
                                          'Server return code %d'
249
                                          % curl.getinfo(pycurl.HTTP_CODE))
3059.2.4 by Vincent Ladeuil
Fix typo so that all tests pass now (after merging bzr.dev to get rid of
250
        msg = self._parse_headers(header)
3945.1.8 by Vincent Ladeuil
Add more tests, fix pycurl double handling, revert previous tracking.
251
        return code, response.handle_response(abspath, code, msg, data)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
252
253
    def _parse_headers(self, status_and_headers):
254
        """Transform the headers provided by curl into an HTTPMessage"""
255
        status_and_headers.seek(0)
256
        # Ignore status line
257
        status_and_headers.readline()
258
        msg = httplib.HTTPMessage(status_and_headers)
259
        return msg
1786.1.4 by John Arbash Meinel
Adding HEADERFUNCTION which lets us get any response codes we want.
260
2018.2.6 by Andrew Bennetts
HTTP client starting to work (pycurl for the moment).
261
    def _post(self, body_bytes):
3651.1.1 by Vincent Ladeuil
Fix bug #225020 by catching CURLE_SEND_ERROR error.
262
        curl = self._get_curl()
263
        abspath, data, header = self._setup_request(curl, '.bzr/smart')
264
        curl.setopt(pycurl.POST, 1)
2018.2.6 by Andrew Bennetts
HTTP client starting to work (pycurl for the moment).
265
        fake_file = StringIO(body_bytes)
266
        curl.setopt(pycurl.POSTFIELDSIZE, len(body_bytes))
267
        curl.setopt(pycurl.READFUNCTION, fake_file.read)
2000.3.4 by v.ladeuil+lp at free
Merge bzr.dev
268
        # We override the Expect: header so that pycurl will send the POST
269
        # body immediately.
3651.1.1 by Vincent Ladeuil
Fix bug #225020 by catching CURLE_SEND_ERROR error.
270
        try:
5514.1.1 by Vincent Ladeuil
Correctly set the Content-Type header when POSTing.
271
            self._curl_perform(curl, header,
272
                               ['Expect: ',
273
                                'Content-Type: application/octet-stream'])
3651.1.1 by Vincent Ladeuil
Fix bug #225020 by catching CURLE_SEND_ERROR error.
274
        except pycurl.error, e:
275
            if e[0] == CURLE_SEND_ERROR:
3651.1.2 by Vincent Ladeuil
Fix bug #225020 by catching the CURLE_SEND_ERROR error more broadly.
276
                # When talking to an HTTP/1.0 server, getting a 400+ error code
277
                # triggers a bug in some combinations of curl/kernel in rare
278
                # occurrences. Basically, the server closes the connection
279
                # after sending the error but the client (having received and
280
                # parsed the response) still try to send the request body (see
281
                # bug #225020 and its upstream associated bug).  Since the
282
                # error code and the headers are known to be available, we just
283
                # swallow the exception, leaving the upper levels handle the
284
                # 400+ error.
5957.2.3 by Vincent Ladeuil
Mask credentials in the -Dhttp logging
285
                trace.mutter('got pycurl error in POST: %s, %s, %s, url: %s ',
286
                             e[0], e[1], e, abspath)
3651.1.2 by Vincent Ladeuil
Fix bug #225020 by catching the CURLE_SEND_ERROR error more broadly.
287
            else:
288
                # Re-raise otherwise
289
                raise
2018.2.6 by Andrew Bennetts
HTTP client starting to work (pycurl for the moment).
290
        data.seek(0)
291
        code = curl.getinfo(pycurl.HTTP_CODE)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
292
        msg = self._parse_headers(header)
3945.1.8 by Vincent Ladeuil
Add more tests, fix pycurl double handling, revert previous tracking.
293
        return code, response.handle_response(abspath, code, msg, data)
3956.2.2 by John Arbash Meinel
Start using report_activity for HTTP (pycurl + urllib)
294
2018.2.6 by Andrew Bennetts
HTTP client starting to work (pycurl for the moment).
295
4912.2.2 by Martin Pool
Include rough unhtml in pycurl error messages
296
    def _raise_curl_http_error(self, curl, info=None, body=None):
297
        """Common curl->bzrlib error translation.
298
299
        Some methods may choose to override this for particular cases.
300
301
        The URL and code are automatically included as appropriate.
302
303
        :param info: Extra information to include in the message.
5923.1.2 by Vincent Ladeuil
Fix some more prompts to be unicode.
304
305
        :param body: File-like object from which the body of the page can be
306
            read.
4912.2.2 by Martin Pool
Include rough unhtml in pycurl error messages
307
        """
1612.1.1 by Martin Pool
Raise errors correctly on pycurl connection failure
308
        code = curl.getinfo(pycurl.HTTP_CODE)
309
        url = curl.getinfo(pycurl.EFFECTIVE_URL)
4912.2.2 by Martin Pool
Include rough unhtml in pycurl error messages
310
        if body is not None:
311
            response_body = body.read()
312
            plaintext_body = unhtml_roughly(response_body)
313
        else:
314
            response_body = None
315
            plaintext_body = ''
2004.1.27 by v.ladeuil+lp at free
Fix bug #57644 by issuing an explicit error message.
316
        if code == 403:
2004.1.34 by v.ladeuil+lp at free
Cosmetic fix for bug #57644.
317
            raise errors.TransportError(
3430.3.1 by Vincent Ladeuil
Fix #230223 by making both http implementations raise appropriate exceptions.
318
                'Server refuses to fulfill the request (403 Forbidden)'
4912.2.2 by Martin Pool
Include rough unhtml in pycurl error messages
319
                ' for %s: %s' % (url, plaintext_body))
1786.1.40 by John Arbash Meinel
code cleanups from Martin Pool.
320
        else:
2004.1.27 by v.ladeuil+lp at free
Fix bug #57644 by issuing an explicit error message.
321
            if info is None:
322
                msg = ''
323
            else:
324
                msg = ': ' + info
325
            raise errors.InvalidHttpResponse(
5923.1.2 by Vincent Ladeuil
Fix some more prompts to be unicode.
326
                url, 'Unable to handle http code %d%s: %s'
4912.2.2 by Martin Pool
Include rough unhtml in pycurl error messages
327
                % (code, msg, plaintext_body))
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
328
3945.1.6 by Vincent Ladeuil
Fix debug handling for pycurl and implement pycurl http activity
329
    def _debug_cb(self, kind, text):
330
        if kind in (pycurl.INFOTYPE_HEADER_IN, pycurl.INFOTYPE_DATA_IN,
331
                    pycurl.INFOTYPE_SSL_DATA_IN):
332
            self._report_activity(len(text), 'read')
333
            if (kind == pycurl.INFOTYPE_HEADER_IN
334
                and 'http' in debug.debug_flags):
5957.2.3 by Vincent Ladeuil
Mask credentials in the -Dhttp logging
335
                trace.mutter('< %s' % (text.rstrip(),))
3945.1.6 by Vincent Ladeuil
Fix debug handling for pycurl and implement pycurl http activity
336
        elif kind in (pycurl.INFOTYPE_HEADER_OUT, pycurl.INFOTYPE_DATA_OUT,
337
                      pycurl.INFOTYPE_SSL_DATA_OUT):
338
            self._report_activity(len(text), 'write')
339
            if (kind == pycurl.INFOTYPE_HEADER_OUT
340
                and 'http' in debug.debug_flags):
5957.2.3 by Vincent Ladeuil
Mask credentials in the -Dhttp logging
341
                lines = []
342
                for line in text.rstrip().splitlines():
343
                    # People are often told to paste -Dhttp output to help
344
                    # debug. Don't compromise credentials.
345
                    try:
346
                        header, details = line.split(':', 1)
347
                    except ValueError:
348
                        header = None
349
                    if header in ('Authorization', 'Proxy-Authorization'):
350
                        line = '%s: <masked>' % (header,)
351
                    lines.append(line)
352
                trace.mutter('> ' + '\n> '.join(lines))
3945.1.6 by Vincent Ladeuil
Fix debug handling for pycurl and implement pycurl http activity
353
        elif kind == pycurl.INFOTYPE_TEXT and 'http' in debug.debug_flags:
5957.2.3 by Vincent Ladeuil
Mask credentials in the -Dhttp logging
354
            trace.mutter('* %s' % text.rstrip())
3945.1.6 by Vincent Ladeuil
Fix debug handling for pycurl and implement pycurl http activity
355
1540.3.13 by Martin Pool
Curl should follow http redirects, the same as urllib
356
    def _set_curl_options(self, curl):
357
        """Set options for all requests"""
3052.3.3 by Vincent Ladeuil
Add -Dhttp support.
358
        ua_str = 'bzr/%s (pycurl: %s)' % (bzrlib.__version__, pycurl.version)
1540.3.15 by Martin Pool
[merge] large merge to sync with bzr.dev
359
        curl.setopt(pycurl.USERAGENT, ua_str)
3945.1.6 by Vincent Ladeuil
Fix debug handling for pycurl and implement pycurl http activity
360
        curl.setopt(pycurl.VERBOSE, 1)
361
        curl.setopt(pycurl.DEBUGFUNCTION, self._debug_cb)
2298.5.1 by Alexander Belchenko
Bugfix #82086: Searching location of CA bundle for PyCurl in env variable (CURL_CA_BUNDLE), and on win32 along the PATH
362
        if self.cabundle:
363
            curl.setopt(pycurl.CAINFO, self.cabundle)
3133.1.2 by Vincent Ladeuil
Fix #177643 by making pycurl handle url-embedded credentials again.
364
        # Set accepted auth methods
365
        curl.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_ANY)
366
        curl.setopt(pycurl.PROXYAUTH, pycurl.HTTPAUTH_ANY)
367
        auth = self._get_credentials()
368
        user = auth.get('user', None)
369
        password = auth.get('password', None)
370
        userpass = None
371
        if user is not None:
372
            userpass = user + ':'
373
            if password is not None: # '' is a valid password
374
                userpass += password
375
            curl.setopt(pycurl.USERPWD, userpass)
1540.3.3 by Martin Pool
Review updates of pycurl transport
376
2164.2.16 by Vincent Ladeuil
Add tests.
377
    def _curl_perform(self, curl, header, more_headers=[]):
1540.3.3 by Martin Pool
Review updates of pycurl transport
378
        """Perform curl operation and translate exceptions."""
379
        try:
2000.3.1 by v.ladeuil+lp at free
Better connection sharing by using only one curl object.
380
            # There's no way in http/1.0 to say "must
381
            # revalidate"; we don't want to force it to always
382
            # retrieve.  so just turn off the default Pragma
383
            # provided by Curl.
384
            headers = ['Cache-control: max-age=0',
385
                       'Pragma: no-cache',
386
                       'Connection: Keep-Alive']
387
            curl.setopt(pycurl.HTTPHEADER, headers + more_headers)
1540.3.14 by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object.
388
            curl.perform()
1540.3.3 by Martin Pool
Review updates of pycurl transport
389
        except pycurl.error, e:
1786.1.35 by John Arbash Meinel
For pycurl inverse of (NOBODY,1) is (HTTPGET,1) not (NOBODY,0)
390
            url = curl.getinfo(pycurl.EFFECTIVE_URL)
5957.2.3 by Vincent Ladeuil
Mask credentials in the -Dhttp logging
391
            trace.mutter('got pycurl error: %s, %s, %s, url: %s ',
392
                         e[0], e[1], e, url)
2929.3.19 by Vincent Ladeuil
Fix 1.1 related bugs in HTTP server, add HTTPS passing tests (by temporarily disabling pycurl certificate verification).
393
            if e[0] in (CURLE_COULDNT_RESOLVE_HOST,
394
                        CURLE_COULDNT_RESOLVE_PROXY,
2872.6.1 by Vincent Ladeuil
Fix bug #147530 by enabling more robust error code definitions.
395
                        CURLE_COULDNT_CONNECT,
396
                        CURLE_GOT_NOTHING,
2929.3.19 by Vincent Ladeuil
Fix 1.1 related bugs in HTTP server, add HTTPS passing tests (by temporarily disabling pycurl certificate verification).
397
                        CURLE_SSL_CACERT,
398
                        CURLE_SSL_CACERT_BADFILE,
399
                        ):
3052.3.3 by Vincent Ladeuil
Add -Dhttp support.
400
                raise errors.ConnectionError(
401
                    'curl connection error (%s)\non %s' % (e[1], url))
4628.1.2 by Vincent Ladeuil
More complete fix.
402
            elif e[0] == CURLE_RECV_ERROR:
403
                raise errors.ConnectionReset(
404
                    'curl connection error (%s)\non %s' % (e[1], url))
2872.6.1 by Vincent Ladeuil
Fix bug #147530 by enabling more robust error code definitions.
405
            elif e[0] == CURLE_PARTIAL_FILE:
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
406
                # Pycurl itself has detected a short read.  We do not have all
407
                # the information for the ShortReadvError, but that should be
408
                # enough
2000.3.9 by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :)
409
                raise errors.ShortReadvError(url,
410
                                             offset='unknown', length='unknown',
411
                                             actual='unknown',
412
                                             extra='Server aborted the request')
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
413
            raise
2164.2.16 by Vincent Ladeuil
Add tests.
414
        code = curl.getinfo(pycurl.HTTP_CODE)
415
        if code in (301, 302, 303, 307):
416
            url = curl.getinfo(pycurl.EFFECTIVE_URL)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
417
            msg = self._parse_headers(header)
418
            redirected_to = msg.getheader('location')
2164.2.16 by Vincent Ladeuil
Add tests.
419
            raise errors.RedirectRequested(url,
420
                                           redirected_to,
3878.4.4 by Vincent Ladeuil
Cleanup.
421
                                           is_permanent=(code == 301))
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
422
1540.3.10 by Martin Pool
[broken] keep hooking pycurl into test framework
423
424
def get_test_permutations():
425
    """Return the permutations to be used in testing."""
5967.12.1 by Martin Pool
Move all test features into bzrlib.tests.features
426
    from bzrlib.tests import features
2929.3.19 by Vincent Ladeuil
Fix 1.1 related bugs in HTTP server, add HTTPS passing tests (by temporarily disabling pycurl certificate verification).
427
    from bzrlib.tests import http_server
428
    permutations = [(PyCurlTransport, http_server.HttpServer_PyCurl),]
5967.12.1 by Martin Pool
Move all test features into bzrlib.tests.features
429
    if features.HTTPSServerFeature.available():
2929.3.20 by Vincent Ladeuil
Commit long standing modifications before merging bzr.dev.
430
        from bzrlib.tests import (
431
            https_server,
432
            ssl_certs,
433
            )
434
435
        class HTTPS_pycurl_transport(PyCurlTransport):
436
437
            def __init__(self, base, _from_transport=None):
438
                super(HTTPS_pycurl_transport, self).__init__(base,
439
                                                             _from_transport)
440
                self.cabundle = str(ssl_certs.build_path('ca.crt'))
441
442
        permutations.append((HTTPS_pycurl_transport,
2929.3.19 by Vincent Ladeuil
Fix 1.1 related bugs in HTTP server, add HTTPS passing tests (by temporarily disabling pycurl certificate verification).
443
                             https_server.HTTPSServer_PyCurl))
444
    return permutations