68
74
PyCurl is a Python binding to the C "curl" multiprotocol client.
70
This transport can be significantly faster than the builtin Python client.
71
Advantages include: DNS caching, connection keepalive, and ability to
72
set headers to allow caching.
76
This transport can be significantly faster than the builtin
77
Python client. Advantages include: DNS caching.
75
80
def __init__(self, base, from_transport=None):
76
81
super(PyCurlTransport, self).__init__(base)
77
82
if from_transport is not None:
78
self._base_curl = from_transport._base_curl
79
self._range_curl = from_transport._range_curl
83
self._curl = from_transport._curl
81
85
mutter('using pycurl %s' % pycurl.version)
82
self._base_curl = pycurl.Curl()
83
self._range_curl = pycurl.Curl()
86
self._curl = pycurl.Curl()
85
88
def should_cache(self):
86
89
"""Return True if the data pulled across should be cached locally.
91
94
"""See Transport.has()"""
92
95
# We set NO BODY=0 in _get_full, so it should be safe
93
96
# to re-use the non-range curl object
94
curl = self._base_curl
95
98
abspath = self._real_abspath(relpath)
96
99
curl.setopt(pycurl.URL, abspath)
97
100
self._set_curl_options(curl)
101
curl.setopt(pycurl.HTTPGET, 1)
98
102
# don't want the body - ie just do a HEAD request
99
103
# This means "NO BODY" not 'nobody'
100
104
curl.setopt(pycurl.NOBODY, 1)
105
# In some erroneous cases, pycurl will emit text on
106
# stdout if we don't catch it (see InvalidStatus tests
107
# for one such occurrence).
108
blackhole = StringIO()
109
curl.setopt(pycurl.WRITEFUNCTION, blackhole.write)
101
110
self._curl_perform(curl)
102
111
code = curl.getinfo(pycurl.HTTP_CODE)
103
112
if code == 404: # not found
108
117
self._raise_curl_http_error(curl)
110
119
def _get(self, relpath, ranges, tail_amount=0):
111
120
# This just switches based on the type of request
112
121
if ranges is not None or tail_amount not in (0, None):
113
122
return self._get_ranged(relpath, ranges, tail_amount=tail_amount)
115
124
return self._get_full(relpath)
117
126
def _setup_get_request(self, curl, relpath):
127
# Make sure we do a GET request. versions > 7.14.1 also set the
128
# NO BODY flag, but we'll do it ourselves in case it is an older
130
curl.setopt(pycurl.NOBODY, 0)
131
curl.setopt(pycurl.HTTPGET, 1)
132
return self._setup_request(curl, relpath)
134
def _setup_request(self, curl, relpath):
118
135
"""Do the common setup stuff for making a request
120
137
:param curl: The curl object to place the request on
153
165
raise NoSuchFile(abspath)
155
self._raise_curl_http_error(curl, 'expected 200 or 404 for full response.')
167
self._raise_curl_http_error(
168
curl, 'expected 200 or 404 for full response.')
157
170
return code, data
159
172
def _get_ranged(self, relpath, ranges, tail_amount):
160
173
"""Make a request for just part of the file."""
161
# We would like to re-use the same curl object for
162
# full requests and partial requests
163
# Documentation says 'Pass in NULL to disable the use of ranges'
164
# None is the closest we have, but at least with pycurl 7.13.1
165
# It raises an 'invalid arguments' response
166
# curl.setopt(pycurl.RANGE, None)
167
# curl.unsetopt(pycurl.RANGE) doesn't support the RANGE parameter
168
# So instead we hack around this by using a separate objects
169
curl = self._range_curl
170
175
abspath, data, header = self._setup_get_request(curl, relpath)
172
curl.setopt(pycurl.RANGE, self.range_header(ranges, tail_amount))
173
self._curl_perform(curl)
177
range_header = self.attempted_range_header(ranges, tail_amount)
178
if range_header is None:
179
# Forget ranges, the server can't handle them
180
return self._get_full(relpath)
182
self._curl_perform(curl, ['Range: bytes=%s'
183
% self.range_header(ranges, tail_amount)])
176
186
code = curl.getinfo(pycurl.HTTP_CODE)
179
189
# handle_response will raise NoSuchFile, etc based on the response code
180
190
return code, response.handle_response(abspath, code, headers, data)
182
def _raise_curl_connection_error(self, curl):
183
curl_errno = curl.getinfo(pycurl.OS_ERRNO)
184
url = curl.getinfo(pycurl.EFFECTIVE_URL)
185
raise ConnectionError('curl connection error (%s) on %s'
186
% (os.strerror(curl_errno), url))
192
def _post(self, body_bytes):
193
fake_file = StringIO(body_bytes)
195
# Other places that use _base_curl for GET requests explicitly set
196
# HTTPGET, so it should be safe to re-use the same object for both GETs
198
curl.setopt(pycurl.POST, 1)
199
curl.setopt(pycurl.POSTFIELDSIZE, len(body_bytes))
200
curl.setopt(pycurl.READFUNCTION, fake_file.read)
201
abspath, data, header = self._setup_request(curl, '.bzr/smart')
202
# We override the Expect: header so that pycurl will send the POST
204
self._curl_perform(curl,['Expect: '])
206
code = curl.getinfo(pycurl.HTTP_CODE)
207
headers = _extract_headers(header.getvalue(), abspath)
208
return code, response.handle_response(abspath, code, headers, data)
188
210
def _raise_curl_http_error(self, curl, info=None):
189
211
code = curl.getinfo(pycurl.HTTP_CODE)
190
212
url = curl.getinfo(pycurl.EFFECTIVE_URL)
213
# Some error codes can be handled the same way for all
216
raise errors.TransportError(
217
'Server refuses to fullfil the request for: %s' % url)
195
raise errors.InvalidHttpResponse(url, 'Unable to handle http code %d%s'
223
raise errors.InvalidHttpResponse(
224
url, 'Unable to handle http code %d%s' % (code,msg))
198
226
def _set_curl_options(self, curl):
199
227
"""Set options for all requests"""
200
# There's no way in http/1.0 to say "must revalidate"; we don't want
201
# to force it to always retrieve. so just turn off the default Pragma
203
headers = ['Cache-control: max-age=0',
205
'Connection: Keep-Alive']
206
228
## curl.setopt(pycurl.VERBOSE, 1)
207
229
# TODO: maybe include a summary of the pycurl version
208
230
ua_str = 'bzr/%s (pycurl)' % (bzrlib.__version__,)
209
231
curl.setopt(pycurl.USERAGENT, ua_str)
210
curl.setopt(pycurl.HTTPHEADER, headers)
211
232
curl.setopt(pycurl.FOLLOWLOCATION, 1) # follow redirect responses
213
def _curl_perform(self, curl):
234
def _curl_perform(self, curl, more_headers=[]):
214
235
"""Perform curl operation and translate exceptions."""
237
# There's no way in http/1.0 to say "must
238
# revalidate"; we don't want to force it to always
239
# retrieve. so just turn off the default Pragma
241
headers = ['Cache-control: max-age=0',
243
'Connection: Keep-Alive']
244
curl.setopt(pycurl.HTTPHEADER, headers + more_headers)
217
246
except pycurl.error, e:
218
# XXX: There seem to be no symbolic constants for these values.
219
247
url = curl.getinfo(pycurl.EFFECTIVE_URL)
220
248
mutter('got pycurl error: %s, %s, %s, url: %s ',
221
249
e[0], _pycurl_errors.errorcode[e[0]], e, url)
222
250
if e[0] in (_pycurl_errors.CURLE_COULDNT_RESOLVE_HOST,
223
_pycurl_errors.CURLE_COULDNT_CONNECT):
224
self._raise_curl_connection_error(curl)
225
# jam 20060713 The code didn't use to re-raise the exception here
251
_pycurl_errors.CURLE_COULDNT_CONNECT,
252
_pycurl_errors.CURLE_GOT_NOTHING,
253
_pycurl_errors.CURLE_COULDNT_RESOLVE_PROXY):
254
raise ConnectionError('curl connection error (%s)\non %s'
256
elif e[0] == _pycurl_errors.CURLE_PARTIAL_FILE:
257
# Pycurl itself has detected a short read. We do
258
# not have all the information for the
259
# ShortReadvError, but that should be enough
260
raise errors.ShortReadvError(url,
261
offset='unknown', length='unknown',
263
extra='Server aborted the request')
264
# jam 20060713 The code didn't use to re-raise the exception here,
226
265
# but that seemed bogus
230
class HttpServer_PyCurl(HttpServer):
231
"""Subclass of HttpServer that gives http+pycurl urls.
233
This is for use in testing: connections to this server will always go
234
through pycurl where possible.
237
# urls returned by this server should require the pycurl client impl
238
_url_protocol = 'http+pycurl'
241
269
def get_test_permutations():
242
270
"""Return the permutations to be used in testing."""
271
from bzrlib.tests.HttpServer import HttpServer_PyCurl
243
272
return [(PyCurlTransport, HttpServer_PyCurl),