~bzr-pqm/bzr/bzr.dev

1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
1
# Copyright (C) 2006 Canonical Ltd
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
2
#
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
7
#
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
12
#
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
16
17
"""http/https transport using pycurl"""
18
19
# TODO: test reporting of http errors
1887.1.1 by Adeodato Simó
Do not separate paragraphs in the copyright statement with blank lines,
20
#
1616.1.9 by Martin Pool
Set Cache-control: max-age=0 and Pragma: no-cache
21
# TODO: Transport option to control caching of particular requests; broadly we
22
# would want to offer "caching allowed" or "must revalidate", depending on
23
# whether we expect a particular file will be modified after it's committed.
24
# It's probably safer to just always revalidate.  mbp 20060321
25
2164.2.16 by Vincent Ladeuil
Add tests.
26
# TODO: Some refactoring could be done to avoid the strange idiom
27
# used to capture data and headers while setting up the request
28
# (and having to pass 'header' to _curl_perform to handle
29
# redirections) . This could be achieved by creating a
30
# specialized Curl object and returning code, headers and data
31
# from _curl_perform.  Not done because we may deprecate pycurl in the
32
# future -- vila 20070212
33
1612.1.1 by Martin Pool
Raise errors correctly on pycurl connection failure
34
import os
1786.1.42 by John Arbash Meinel
Update _extract_headers, make it less generic, and non recursive.
35
from cStringIO import StringIO
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
36
import httplib
2298.5.1 by Alexander Belchenko
Bugfix #82086: Searching location of CA bundle for PyCurl in env variable (CURL_CA_BUNDLE), and on win32 along the PATH
37
import sys
1540.3.5 by Martin Pool
Raise exception if unicode is passed to transport; formatting fixes
38
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
39
from bzrlib import (
3052.3.3 by Vincent Ladeuil
Add -Dhttp support.
40
    debug,
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
41
    errors,
3052.3.3 by Vincent Ladeuil
Add -Dhttp support.
42
    trace,
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
43
    __version__ as bzrlib_version,
44
    )
1540.3.15 by Martin Pool
[merge] large merge to sync with bzr.dev
45
import bzrlib
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
46
from bzrlib.trace import mutter
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
47
from bzrlib.transport.http import (
2298.5.1 by Alexander Belchenko
Bugfix #82086: Searching location of CA bundle for PyCurl in env variable (CURL_CA_BUNDLE), and on win32 along the PATH
48
    ca_bundle,
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
49
    HttpTransportBase,
50
    response,
51
    )
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
52
1540.3.7 by Martin Pool
Prepare to select a transport depending on what dependencies can be satisfied.
53
try:
54
    import pycurl
55
except ImportError, e:
56
    mutter("failed to import pycurl: %s", e)
3052.3.3 by Vincent Ladeuil
Add -Dhttp support.
57
    raise errors.DependencyNotPresent('pycurl', e)
1540.3.7 by Martin Pool
Prepare to select a transport depending on what dependencies can be satisfied.
58
1684.1.5 by Martin Pool
(patch) check that pycurl will actuall initialize as well as load (Alexander)
59
try:
60
    # see if we can actually initialize PyCurl - sometimes it will load but
61
    # fail to start up due to this bug:
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
62
    #
1684.1.5 by Martin Pool
(patch) check that pycurl will actuall initialize as well as load (Alexander)
63
    #   32. (At least on Windows) If libcurl is built with c-ares and there's
64
    #   no DNS server configured in the system, the ares_init() call fails and
65
    #   thus curl_easy_init() fails as well. This causes weird effects for
66
    #   people who use numerical IP addresses only.
67
    #
68
    # reported by Alexander Belchenko, 2006-04-26
69
    pycurl.Curl()
70
except pycurl.error, e:
71
    mutter("failed to initialize pycurl: %s", e)
3052.3.3 by Vincent Ladeuil
Add -Dhttp support.
72
    raise errors.DependencyNotPresent('pycurl', e)
1684.1.5 by Martin Pool
(patch) check that pycurl will actuall initialize as well as load (Alexander)
73
1540.3.7 by Martin Pool
Prepare to select a transport depending on what dependencies can be satisfied.
74
2872.6.1 by Vincent Ladeuil
Fix bug #147530 by enabling more robust error code definitions.
75
76
77
def _get_pycurl_errcode(symbol, default):
78
    """
79
    Returns the numerical error code for a symbol defined by pycurl.
80
81
    Different pycurl implementations define different symbols for error
82
    codes. Old versions never define some symbols (wether they can return the
83
    corresponding error code or not). The following addresses the problem by
84
    defining the symbols we care about.  Note: this allows to define symbols
85
    for errors that older versions will never return, which is fine.
86
    """
87
    return pycurl.__dict__.get(symbol, default)
88
89
CURLE_COULDNT_CONNECT = _get_pycurl_errcode('E_COULDNT_CONNECT', 7)
90
CURLE_COULDNT_RESOLVE_HOST = _get_pycurl_errcode('E_COULDNT_RESOLVE_HOST', 6)
91
CURLE_COULDNT_RESOLVE_PROXY = _get_pycurl_errcode('E_COULDNT_RESOLVE_PROXY', 5)
92
CURLE_GOT_NOTHING = _get_pycurl_errcode('E_GOT_NOTHING', 52)
93
CURLE_PARTIAL_FILE = _get_pycurl_errcode('E_PARTIAL_FILE', 18)
3651.1.1 by Vincent Ladeuil
Fix bug #225020 by catching CURLE_SEND_ERROR error.
94
CURLE_SEND_ERROR = _get_pycurl_errcode('E_SEND_ERROR', 55)
4628.1.1 by Vincent Ladeuil
Fix test failure by catching an error raised by newer pycurls.
95
CURLE_RECV_ERROR = _get_pycurl_errcode('E_RECV_ERROR', 56)
2929.3.19 by Vincent Ladeuil
Fix 1.1 related bugs in HTTP server, add HTTPS passing tests (by temporarily disabling pycurl certificate verification).
96
CURLE_SSL_CACERT = _get_pycurl_errcode('E_SSL_CACERT', 60)
97
CURLE_SSL_CACERT_BADFILE = _get_pycurl_errcode('E_SSL_CACERT_BADFILE', 77)
2872.6.1 by Vincent Ladeuil
Fix bug #147530 by enabling more robust error code definitions.
98
99
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
100
class PyCurlTransport(HttpTransportBase):
1540.3.3 by Martin Pool
Review updates of pycurl transport
101
    """http client transport using pycurl
102
103
    PyCurl is a Python binding to the C "curl" multiprotocol client.
104
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
105
    This transport can be significantly faster than the builtin
106
    Python client.  Advantages include: DNS caching.
1540.3.3 by Martin Pool
Review updates of pycurl transport
107
    """
108
2485.8.59 by Vincent Ladeuil
Update from review comments.
109
    def __init__(self, base, _from_transport=None):
3878.4.6 by Vincent Ladeuil
Fix bug #270863 by preserving 'bzr+http[s]' decorator.
110
        super(PyCurlTransport, self).__init__(base, 'pycurl',
111
                                              _from_transport=_from_transport)
3878.4.2 by Vincent Ladeuil
Fix bug #265070 by providing a finer sieve for accepted redirections.
112
        if self._unqualified_scheme == 'https':
2294.3.1 by Vincent Ladeuil
Fix #85305 by issuing an exception instead of a traceback.
113
            # Check availability of https into pycurl supported
114
            # protocols
115
            supported = pycurl.version_info()[8]
116
            if 'https' not in supported:
3052.3.3 by Vincent Ladeuil
Add -Dhttp support.
117
                raise errors.DependencyNotPresent('pycurl', 'no https support')
2298.5.1 by Alexander Belchenko
Bugfix #82086: Searching location of CA bundle for PyCurl in env variable (CURL_CA_BUNDLE), and on win32 along the PATH
118
        self.cabundle = ca_bundle.get_ca_path()
2485.8.41 by Vincent Ladeuil
Finish http refactoring. Test suite passing.
119
120
    def _get_curl(self):
121
        connection = self._get_connection()
122
        if connection is None:
123
            # First connection ever. There is no credentials for pycurl, either
124
            # the password was embedded in the URL or it's not needed. The
125
            # connection for pycurl is just the Curl object, it will not
2485.8.43 by Vincent Ladeuil
Cleaning.
126
            # connect to the http server until the first request (which had
127
            # just called us).
2485.8.41 by Vincent Ladeuil
Finish http refactoring. Test suite passing.
128
            connection = pycurl.Curl()
3133.1.2 by Vincent Ladeuil
Fix #177643 by making pycurl handle url-embedded credentials again.
129
            # First request, initialize credentials.
130
            auth = self._create_auth()
131
            # Proxy handling is out of reach, so we punt
132
            self._set_connection(connection, auth)
2485.8.41 by Vincent Ladeuil
Finish http refactoring. Test suite passing.
133
        return connection
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
134
1540.3.3 by Martin Pool
Review updates of pycurl transport
135
    def has(self, relpath):
1786.1.32 by John Arbash Meinel
cleanup pass, allow pycurl connections to be shared between transports.
136
        """See Transport.has()"""
137
        # We set NO BODY=0 in _get_full, so it should be safe
138
        # to re-use the non-range curl object
2485.8.41 by Vincent Ladeuil
Finish http refactoring. Test suite passing.
139
        curl = self._get_curl()
2485.8.25 by Vincent Ladeuil
Separate abspath from _remote_path, the intents are different.
140
        abspath = self._remote_path(relpath)
1540.3.14 by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object.
141
        curl.setopt(pycurl.URL, abspath)
142
        self._set_curl_options(curl)
2018.2.28 by Andrew Bennetts
Changes in response to review: re-use _base_curl, rather than keeping a seperate _post_curl object; add docstring to test_http.RecordingServer, set is_user_error on some new exceptions.
143
        curl.setopt(pycurl.HTTPGET, 1)
1540.3.3 by Martin Pool
Review updates of pycurl transport
144
        # don't want the body - ie just do a HEAD request
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
145
        # This means "NO BODY" not 'nobody'
1540.3.14 by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object.
146
        curl.setopt(pycurl.NOBODY, 1)
2164.2.16 by Vincent Ladeuil
Add tests.
147
        # But we need headers to handle redirections
148
        header = StringIO()
149
        curl.setopt(pycurl.HEADERFUNCTION, header.write)
2004.1.16 by v.ladeuil+lp at free
Add tests against erroneous http status lines.
150
        # In some erroneous cases, pycurl will emit text on
151
        # stdout if we don't catch it (see InvalidStatus tests
152
        # for one such occurrence).
153
        blackhole = StringIO()
154
        curl.setopt(pycurl.WRITEFUNCTION, blackhole.write)
2164.2.16 by Vincent Ladeuil
Add tests.
155
        self._curl_perform(curl, header)
1540.3.14 by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object.
156
        code = curl.getinfo(pycurl.HTTP_CODE)
157
        if code == 404: # not found
158
            return False
2164.2.16 by Vincent Ladeuil
Add tests.
159
        elif code == 200: # "ok"
1540.3.14 by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object.
160
            return True
161
        else:
1612.1.1 by Martin Pool
Raise errors correctly on pycurl connection failure
162
            self._raise_curl_http_error(curl)
2000.3.1 by v.ladeuil+lp at free
Better connection sharing by using only one curl object.
163
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
164
    def _get(self, relpath, offsets, tail_amount=0):
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
165
        # This just switches based on the type of request
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
166
        if offsets is not None or tail_amount not in (0, None):
167
            return self._get_ranged(relpath, offsets, tail_amount=tail_amount)
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
168
        else:
2164.2.5 by v.ladeuil+lp at free
Simpler implementation using inspect. 'hints' is a kwargs.
169
            return self._get_full(relpath)
2000.3.1 by v.ladeuil+lp at free
Better connection sharing by using only one curl object.
170
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
171
    def _setup_get_request(self, curl, relpath):
2018.2.6 by Andrew Bennetts
HTTP client starting to work (pycurl for the moment).
172
        # Make sure we do a GET request. versions > 7.14.1 also set the
173
        # NO BODY flag, but we'll do it ourselves in case it is an older
174
        # pycurl version
175
        curl.setopt(pycurl.NOBODY, 0)
176
        curl.setopt(pycurl.HTTPGET, 1)
177
        return self._setup_request(curl, relpath)
178
179
    def _setup_request(self, curl, relpath):
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
180
        """Do the common setup stuff for making a request
181
182
        :param curl: The curl object to place the request on
183
        :param relpath: The relative path that we want to get
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
184
        :return: (abspath, data, header)
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
185
                 abspath: full url
186
                 data: file that will be filled with the body
187
                 header: file that will be filled with the headers
188
        """
2485.8.25 by Vincent Ladeuil
Separate abspath from _remote_path, the intents are different.
189
        abspath = self._remote_path(relpath)
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
190
        curl.setopt(pycurl.URL, abspath)
191
        self._set_curl_options(curl)
192
193
        data = StringIO()
194
        header = StringIO()
195
        curl.setopt(pycurl.WRITEFUNCTION, data.write)
196
        curl.setopt(pycurl.HEADERFUNCTION, header.write)
197
198
        return abspath, data, header
199
2164.2.5 by v.ladeuil+lp at free
Simpler implementation using inspect. 'hints' is a kwargs.
200
    def _get_full(self, relpath):
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
201
        """Make a request for the entire file"""
2485.8.41 by Vincent Ladeuil
Finish http refactoring. Test suite passing.
202
        curl = self._get_curl()
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
203
        abspath, data, header = self._setup_get_request(curl, relpath)
2164.2.16 by Vincent Ladeuil
Add tests.
204
        self._curl_perform(curl, header)
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
205
206
        code = curl.getinfo(pycurl.HTTP_CODE)
207
        data.seek(0)
208
209
        if code == 404:
3052.3.3 by Vincent Ladeuil
Add -Dhttp support.
210
            raise errors.NoSuchFile(abspath)
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
211
        if code != 200:
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
212
            self._raise_curl_http_error(
213
                curl, 'expected 200 or 404 for full response.')
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
214
215
        return code, data
216
3059.2.11 by Vincent Ladeuil
Fix typos mentioned by spiv.
217
    # The parent class use 0 to minimize the requests, but since we can't
3059.2.7 by Vincent Ladeuil
Allow pycurl users to watch the blinkenlights and fix a bug when ranges are contiguous.
218
    # exploit the results as soon as they are received (pycurl limitation) we'd
3945.1.6 by Vincent Ladeuil
Fix debug handling for pycurl and implement pycurl http activity
219
    # better issue more requests and provide a more responsive UI incurring
220
    # more latency costs.
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
221
    # If you modify this, think about modifying the comment in http/__init__.py
3059.2.7 by Vincent Ladeuil
Allow pycurl users to watch the blinkenlights and fix a bug when ranges are contiguous.
222
    # too.
3059.2.17 by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges.
223
    _get_max_size = 4 * 1024 * 1024
3059.2.7 by Vincent Ladeuil
Allow pycurl users to watch the blinkenlights and fix a bug when ranges are contiguous.
224
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
225
    def _get_ranged(self, relpath, offsets, tail_amount):
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
226
        """Make a request for just part of the file."""
2485.8.41 by Vincent Ladeuil
Finish http refactoring. Test suite passing.
227
        curl = self._get_curl()
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
228
        abspath, data, header = self._setup_get_request(curl, relpath)
229
2520.2.1 by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports.
230
        range_header = self._attempted_range_header(offsets, tail_amount)
2004.1.30 by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling.
231
        if range_header is None:
232
            # Forget ranges, the server can't handle them
233
            return self._get_full(relpath)
234
2481.3.1 by Vincent Ladeuil
Fix bug #112719 by using the right range header.
235
        self._curl_perform(curl, header, ['Range: bytes=%s' % range_header])
1786.1.33 by John Arbash Meinel
Cleanup pass #2
236
        data.seek(0)
237
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
238
        code = curl.getinfo(pycurl.HTTP_CODE)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
239
240
        if code == 404: # not found
241
            raise errors.NoSuchFile(abspath)
242
        elif code in (400, 416):
243
            # We don't know which, but one of the ranges we specified was
244
            # wrong.
245
            raise errors.InvalidHttpRange(abspath, range_header,
246
                                          'Server return code %d'
247
                                          % curl.getinfo(pycurl.HTTP_CODE))
3059.2.4 by Vincent Ladeuil
Fix typo so that all tests pass now (after merging bzr.dev to get rid of
248
        msg = self._parse_headers(header)
3945.1.8 by Vincent Ladeuil
Add more tests, fix pycurl double handling, revert previous tracking.
249
        return code, response.handle_response(abspath, code, msg, data)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
250
251
    def _parse_headers(self, status_and_headers):
252
        """Transform the headers provided by curl into an HTTPMessage"""
253
        status_and_headers.seek(0)
254
        # Ignore status line
255
        status_and_headers.readline()
256
        msg = httplib.HTTPMessage(status_and_headers)
257
        return msg
1786.1.4 by John Arbash Meinel
Adding HEADERFUNCTION which lets us get any response codes we want.
258
2018.2.6 by Andrew Bennetts
HTTP client starting to work (pycurl for the moment).
259
    def _post(self, body_bytes):
3651.1.1 by Vincent Ladeuil
Fix bug #225020 by catching CURLE_SEND_ERROR error.
260
        curl = self._get_curl()
261
        abspath, data, header = self._setup_request(curl, '.bzr/smart')
262
        curl.setopt(pycurl.POST, 1)
2018.2.6 by Andrew Bennetts
HTTP client starting to work (pycurl for the moment).
263
        fake_file = StringIO(body_bytes)
264
        curl.setopt(pycurl.POSTFIELDSIZE, len(body_bytes))
265
        curl.setopt(pycurl.READFUNCTION, fake_file.read)
2000.3.4 by v.ladeuil+lp at free
Merge bzr.dev
266
        # We override the Expect: header so that pycurl will send the POST
267
        # body immediately.
3651.1.1 by Vincent Ladeuil
Fix bug #225020 by catching CURLE_SEND_ERROR error.
268
        try:
269
            self._curl_perform(curl, header, ['Expect: '])
270
        except pycurl.error, e:
271
            if e[0] == CURLE_SEND_ERROR:
3651.1.2 by Vincent Ladeuil
Fix bug #225020 by catching the CURLE_SEND_ERROR error more broadly.
272
                # When talking to an HTTP/1.0 server, getting a 400+ error code
273
                # triggers a bug in some combinations of curl/kernel in rare
274
                # occurrences. Basically, the server closes the connection
275
                # after sending the error but the client (having received and
276
                # parsed the response) still try to send the request body (see
277
                # bug #225020 and its upstream associated bug).  Since the
278
                # error code and the headers are known to be available, we just
279
                # swallow the exception, leaving the upper levels handle the
280
                # 400+ error.
281
                mutter('got pycurl error in POST: %s, %s, %s, url: %s ',
282
                       e[0], e[1], e, abspath)
283
            else:
284
                # Re-raise otherwise
285
                raise
2018.2.6 by Andrew Bennetts
HTTP client starting to work (pycurl for the moment).
286
        data.seek(0)
287
        code = curl.getinfo(pycurl.HTTP_CODE)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
288
        msg = self._parse_headers(header)
3945.1.8 by Vincent Ladeuil
Add more tests, fix pycurl double handling, revert previous tracking.
289
        return code, response.handle_response(abspath, code, msg, data)
3956.2.2 by John Arbash Meinel
Start using report_activity for HTTP (pycurl + urllib)
290
2018.2.6 by Andrew Bennetts
HTTP client starting to work (pycurl for the moment).
291
1786.1.40 by John Arbash Meinel
code cleanups from Martin Pool.
292
    def _raise_curl_http_error(self, curl, info=None):
1612.1.1 by Martin Pool
Raise errors correctly on pycurl connection failure
293
        code = curl.getinfo(pycurl.HTTP_CODE)
294
        url = curl.getinfo(pycurl.EFFECTIVE_URL)
2004.1.27 by v.ladeuil+lp at free
Fix bug #57644 by issuing an explicit error message.
295
        # Some error codes can be handled the same way for all
296
        # requests
297
        if code == 403:
2004.1.34 by v.ladeuil+lp at free
Cosmetic fix for bug #57644.
298
            raise errors.TransportError(
3430.3.1 by Vincent Ladeuil
Fix #230223 by making both http implementations raise appropriate exceptions.
299
                'Server refuses to fulfill the request (403 Forbidden)'
300
                ' for %s' % url)
1786.1.40 by John Arbash Meinel
code cleanups from Martin Pool.
301
        else:
2004.1.27 by v.ladeuil+lp at free
Fix bug #57644 by issuing an explicit error message.
302
            if info is None:
303
                msg = ''
304
            else:
305
                msg = ': ' + info
306
            raise errors.InvalidHttpResponse(
307
                url, 'Unable to handle http code %d%s' % (code,msg))
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
308
3945.1.6 by Vincent Ladeuil
Fix debug handling for pycurl and implement pycurl http activity
309
    def _debug_cb(self, kind, text):
310
        if kind in (pycurl.INFOTYPE_HEADER_IN, pycurl.INFOTYPE_DATA_IN,
311
                    pycurl.INFOTYPE_SSL_DATA_IN):
312
            self._report_activity(len(text), 'read')
313
            if (kind == pycurl.INFOTYPE_HEADER_IN
314
                and 'http' in debug.debug_flags):
315
                mutter('< %s' % text)
316
        elif kind in (pycurl.INFOTYPE_HEADER_OUT, pycurl.INFOTYPE_DATA_OUT,
317
                      pycurl.INFOTYPE_SSL_DATA_OUT):
318
            self._report_activity(len(text), 'write')
319
            if (kind == pycurl.INFOTYPE_HEADER_OUT
320
                and 'http' in debug.debug_flags):
321
                mutter('> %s' % text)
322
        elif kind == pycurl.INFOTYPE_TEXT and 'http' in debug.debug_flags:
323
            mutter('* %s' % text)
324
1540.3.13 by Martin Pool
Curl should follow http redirects, the same as urllib
325
    def _set_curl_options(self, curl):
326
        """Set options for all requests"""
3052.3.3 by Vincent Ladeuil
Add -Dhttp support.
327
        ua_str = 'bzr/%s (pycurl: %s)' % (bzrlib.__version__, pycurl.version)
1540.3.15 by Martin Pool
[merge] large merge to sync with bzr.dev
328
        curl.setopt(pycurl.USERAGENT, ua_str)
3945.1.6 by Vincent Ladeuil
Fix debug handling for pycurl and implement pycurl http activity
329
        curl.setopt(pycurl.VERBOSE, 1)
330
        curl.setopt(pycurl.DEBUGFUNCTION, self._debug_cb)
2298.5.1 by Alexander Belchenko
Bugfix #82086: Searching location of CA bundle for PyCurl in env variable (CURL_CA_BUNDLE), and on win32 along the PATH
331
        if self.cabundle:
332
            curl.setopt(pycurl.CAINFO, self.cabundle)
3133.1.2 by Vincent Ladeuil
Fix #177643 by making pycurl handle url-embedded credentials again.
333
        # Set accepted auth methods
334
        curl.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_ANY)
335
        curl.setopt(pycurl.PROXYAUTH, pycurl.HTTPAUTH_ANY)
336
        auth = self._get_credentials()
337
        user = auth.get('user', None)
338
        password = auth.get('password', None)
339
        userpass = None
340
        if user is not None:
341
            userpass = user + ':'
342
            if password is not None: # '' is a valid password
343
                userpass += password
344
            curl.setopt(pycurl.USERPWD, userpass)
1540.3.3 by Martin Pool
Review updates of pycurl transport
345
2164.2.16 by Vincent Ladeuil
Add tests.
346
    def _curl_perform(self, curl, header, more_headers=[]):
1540.3.3 by Martin Pool
Review updates of pycurl transport
347
        """Perform curl operation and translate exceptions."""
348
        try:
2000.3.1 by v.ladeuil+lp at free
Better connection sharing by using only one curl object.
349
            # There's no way in http/1.0 to say "must
350
            # revalidate"; we don't want to force it to always
351
            # retrieve.  so just turn off the default Pragma
352
            # provided by Curl.
353
            headers = ['Cache-control: max-age=0',
354
                       'Pragma: no-cache',
355
                       'Connection: Keep-Alive']
356
            curl.setopt(pycurl.HTTPHEADER, headers + more_headers)
1540.3.14 by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object.
357
            curl.perform()
1540.3.3 by Martin Pool
Review updates of pycurl transport
358
        except pycurl.error, e:
1786.1.35 by John Arbash Meinel
For pycurl inverse of (NOBODY,1) is (HTTPGET,1) not (NOBODY,0)
359
            url = curl.getinfo(pycurl.EFFECTIVE_URL)
360
            mutter('got pycurl error: %s, %s, %s, url: %s ',
2872.6.1 by Vincent Ladeuil
Fix bug #147530 by enabling more robust error code definitions.
361
                    e[0], e[1], e, url)
2929.3.19 by Vincent Ladeuil
Fix 1.1 related bugs in HTTP server, add HTTPS passing tests (by temporarily disabling pycurl certificate verification).
362
            if e[0] in (CURLE_COULDNT_RESOLVE_HOST,
363
                        CURLE_COULDNT_RESOLVE_PROXY,
2872.6.1 by Vincent Ladeuil
Fix bug #147530 by enabling more robust error code definitions.
364
                        CURLE_COULDNT_CONNECT,
365
                        CURLE_GOT_NOTHING,
2929.3.19 by Vincent Ladeuil
Fix 1.1 related bugs in HTTP server, add HTTPS passing tests (by temporarily disabling pycurl certificate verification).
366
                        CURLE_SSL_CACERT,
367
                        CURLE_SSL_CACERT_BADFILE,
368
                        ):
3052.3.3 by Vincent Ladeuil
Add -Dhttp support.
369
                raise errors.ConnectionError(
370
                    'curl connection error (%s)\non %s' % (e[1], url))
4628.1.2 by Vincent Ladeuil
More complete fix.
371
            elif e[0] == CURLE_RECV_ERROR:
372
                raise errors.ConnectionReset(
373
                    'curl connection error (%s)\non %s' % (e[1], url))
2872.6.1 by Vincent Ladeuil
Fix bug #147530 by enabling more robust error code definitions.
374
            elif e[0] == CURLE_PARTIAL_FILE:
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
375
                # Pycurl itself has detected a short read.  We do not have all
376
                # the information for the ShortReadvError, but that should be
377
                # enough
2000.3.9 by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :)
378
                raise errors.ShortReadvError(url,
379
                                             offset='unknown', length='unknown',
380
                                             actual='unknown',
381
                                             extra='Server aborted the request')
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
382
            raise
2164.2.16 by Vincent Ladeuil
Add tests.
383
        code = curl.getinfo(pycurl.HTTP_CODE)
384
        if code in (301, 302, 303, 307):
385
            url = curl.getinfo(pycurl.EFFECTIVE_URL)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
386
            msg = self._parse_headers(header)
387
            redirected_to = msg.getheader('location')
2164.2.16 by Vincent Ladeuil
Add tests.
388
            raise errors.RedirectRequested(url,
389
                                           redirected_to,
3878.4.4 by Vincent Ladeuil
Cleanup.
390
                                           is_permanent=(code == 301))
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
391
1540.3.10 by Martin Pool
[broken] keep hooking pycurl into test framework
392
393
def get_test_permutations():
394
    """Return the permutations to be used in testing."""
2929.3.19 by Vincent Ladeuil
Fix 1.1 related bugs in HTTP server, add HTTPS passing tests (by temporarily disabling pycurl certificate verification).
395
    from bzrlib import tests
396
    from bzrlib.tests import http_server
397
    permutations = [(PyCurlTransport, http_server.HttpServer_PyCurl),]
398
    if tests.HTTPSServerFeature.available():
2929.3.20 by Vincent Ladeuil
Commit long standing modifications before merging bzr.dev.
399
        from bzrlib.tests import (
400
            https_server,
401
            ssl_certs,
402
            )
403
404
        class HTTPS_pycurl_transport(PyCurlTransport):
405
406
            def __init__(self, base, _from_transport=None):
407
                super(HTTPS_pycurl_transport, self).__init__(base,
408
                                                             _from_transport)
409
                self.cabundle = str(ssl_certs.build_path('ca.crt'))
410
411
        permutations.append((HTTPS_pycurl_transport,
2929.3.19 by Vincent Ladeuil
Fix 1.1 related bugs in HTTP server, add HTTPS passing tests (by temporarily disabling pycurl certificate verification).
412
                             https_server.HTTPSServer_PyCurl))
413
    return permutations