27
27
from cStringIO import StringIO
29
from bzrlib import errors
31
__version__ as bzrlib_version,
31
from bzrlib.errors import (TransportNotPossible, NoSuchFile,
32
TransportError, ConnectionError,
34
from bzrlib.errors import (NoSuchFile,
33
36
DependencyNotPresent)
34
37
from bzrlib.trace import mutter
35
38
from bzrlib.transport import register_urlparse_netloc_protocol
36
from bzrlib.transport.http import (HttpTransportBase, HttpServer,
38
response, _pycurl_errors)
39
from bzrlib.transport.http import (
68
74
PyCurl is a Python binding to the C "curl" multiprotocol client.
70
This transport can be significantly faster than the builtin Python client.
71
Advantages include: DNS caching, connection keepalive, and ability to
72
set headers to allow caching.
76
This transport can be significantly faster than the builtin
77
Python client. Advantages include: DNS caching.
75
80
def __init__(self, base, from_transport=None):
76
81
super(PyCurlTransport, self).__init__(base)
77
82
if from_transport is not None:
78
self._base_curl = from_transport._base_curl
79
self._range_curl = from_transport._range_curl
83
self._curl = from_transport._curl
81
85
mutter('using pycurl %s' % pycurl.version)
82
self._base_curl = pycurl.Curl()
83
self._range_curl = pycurl.Curl()
86
self._curl = pycurl.Curl()
85
88
def should_cache(self):
86
89
"""Return True if the data pulled across should be cached locally.
99
102
# don't want the body - ie just do a HEAD request
100
103
# This means "NO BODY" not 'nobody'
101
104
curl.setopt(pycurl.NOBODY, 1)
105
# In some erroneous cases, pycurl will emit text on
106
# stdout if we don't catch it (see InvalidStatus tests
107
# for one such occurrence).
108
blackhole = StringIO()
109
curl.setopt(pycurl.WRITEFUNCTION, blackhole.write)
102
110
self._curl_perform(curl)
103
111
code = curl.getinfo(pycurl.HTTP_CODE)
104
112
if code == 404: # not found
109
117
self._raise_curl_http_error(curl)
111
119
def _get(self, relpath, ranges, tail_amount=0):
112
120
# This just switches based on the type of request
113
121
if ranges is not None or tail_amount not in (0, None):
114
122
return self._get_ranged(relpath, ranges, tail_amount=tail_amount)
116
124
return self._get_full(relpath)
118
126
def _setup_get_request(self, curl, relpath):
119
127
# Make sure we do a GET request. versions > 7.14.1 also set the
120
128
# NO BODY flag, but we'll do it ourselves in case it is an older
157
165
raise NoSuchFile(abspath)
159
self._raise_curl_http_error(curl, 'expected 200 or 404 for full response.')
167
self._raise_curl_http_error(
168
curl, 'expected 200 or 404 for full response.')
161
170
return code, data
163
172
def _get_ranged(self, relpath, ranges, tail_amount):
164
173
"""Make a request for just part of the file."""
165
# We would like to re-use the same curl object for
166
# full requests and partial requests
167
# Documentation says 'Pass in NULL to disable the use of ranges'
168
# None is the closest we have, but at least with pycurl 7.13.1
169
# It raises an 'invalid arguments' response
170
# curl.setopt(pycurl.RANGE, None)
171
# curl.unsetopt(pycurl.RANGE) doesn't support the RANGE parameter
172
# So instead we hack around this by using a separate objects
173
curl = self._range_curl
174
175
abspath, data, header = self._setup_get_request(curl, relpath)
176
curl.setopt(pycurl.RANGE, self.range_header(ranges, tail_amount))
177
self._curl_perform(curl)
177
range_header = self.attempted_range_header(ranges, tail_amount)
178
if range_header is None:
179
# Forget ranges, the server can't handle them
180
return self._get_full(relpath)
182
self._curl_perform(curl, ['Range: bytes=%s'
183
% self.range_header(ranges, tail_amount)])
180
186
code = curl.getinfo(pycurl.HTTP_CODE)
193
199
curl.setopt(pycurl.POSTFIELDSIZE, len(body_bytes))
194
200
curl.setopt(pycurl.READFUNCTION, fake_file.read)
195
201
abspath, data, header = self._setup_request(curl, '.bzr/smart')
196
self._curl_perform(curl)
202
# We override the Expect: header so that pycurl will send the POST
204
self._curl_perform(curl,['Expect: '])
198
206
code = curl.getinfo(pycurl.HTTP_CODE)
199
207
headers = _extract_headers(header.getvalue(), abspath)
202
210
def _raise_curl_http_error(self, curl, info=None):
203
211
code = curl.getinfo(pycurl.HTTP_CODE)
204
212
url = curl.getinfo(pycurl.EFFECTIVE_URL)
213
# Some error codes can be handled the same way for all
216
raise errors.TransportError(
217
'Server refuses to fullfil the request for: %s' % url)
209
raise errors.InvalidHttpResponse(url, 'Unable to handle http code %d%s'
223
raise errors.InvalidHttpResponse(
224
url, 'Unable to handle http code %d%s' % (code,msg))
212
226
def _set_curl_options(self, curl):
213
227
"""Set options for all requests"""
214
# There's no way in http/1.0 to say "must revalidate"; we don't want
215
# to force it to always retrieve. so just turn off the default Pragma
217
# Also, we override the Expect: header so that pycurl will send the POST
219
headers = ['Cache-control: max-age=0',
221
'Connection: Keep-Alive',
223
228
## curl.setopt(pycurl.VERBOSE, 1)
224
229
# TODO: maybe include a summary of the pycurl version
225
230
ua_str = 'bzr/%s (pycurl)' % (bzrlib.__version__,)
226
231
curl.setopt(pycurl.USERAGENT, ua_str)
227
curl.setopt(pycurl.HTTPHEADER, headers)
228
232
curl.setopt(pycurl.FOLLOWLOCATION, 1) # follow redirect responses
230
def _curl_perform(self, curl):
234
def _curl_perform(self, curl, more_headers=[]):
231
235
"""Perform curl operation and translate exceptions."""
237
# There's no way in http/1.0 to say "must
238
# revalidate"; we don't want to force it to always
239
# retrieve. so just turn off the default Pragma
241
headers = ['Cache-control: max-age=0',
243
'Connection: Keep-Alive']
244
curl.setopt(pycurl.HTTPHEADER, headers + more_headers)
234
246
except pycurl.error, e:
235
# XXX: There seem to be no symbolic constants for these values.
236
247
url = curl.getinfo(pycurl.EFFECTIVE_URL)
237
248
mutter('got pycurl error: %s, %s, %s, url: %s ',
238
249
e[0], _pycurl_errors.errorcode[e[0]], e, url)
239
250
if e[0] in (_pycurl_errors.CURLE_COULDNT_RESOLVE_HOST,
240
251
_pycurl_errors.CURLE_COULDNT_CONNECT,
252
_pycurl_errors.CURLE_GOT_NOTHING,
241
253
_pycurl_errors.CURLE_COULDNT_RESOLVE_PROXY):
242
254
raise ConnectionError('curl connection error (%s)\non %s'
249
class HttpServer_PyCurl(HttpServer):
250
"""Subclass of HttpServer that gives http+pycurl urls.
252
This is for use in testing: connections to this server will always go
253
through pycurl where possible.
256
# urls returned by this server should require the pycurl client impl
257
_url_protocol = 'http+pycurl'
260
261
def get_test_permutations():
261
262
"""Return the permutations to be used in testing."""
263
from bzrlib.tests.HttpServer import HttpServer_PyCurl
262
264
return [(PyCurlTransport, HttpServer_PyCurl),