24
24
# It's probably safer to just always revalidate. mbp 20060321
27
from StringIO import StringIO
27
from cStringIO import StringIO
29
from bzrlib import errors
30
31
from bzrlib.errors import (TransportNotPossible, NoSuchFile,
31
32
TransportError, ConnectionError,
32
33
DependencyNotPresent)
33
34
from bzrlib.trace import mutter
34
35
from bzrlib.transport import register_urlparse_netloc_protocol
35
from bzrlib.transport.http import HttpTransportBase, extract_auth, HttpServer
36
from bzrlib.transport.http import (HttpTransportBase, HttpServer,
38
response, _pycurl_errors)
69
72
set headers to allow caching.
72
def __init__(self, base):
75
def __init__(self, base, from_transport=None):
73
76
super(PyCurlTransport, self).__init__(base)
74
mutter('using pycurl %s' % pycurl.version)
77
if from_transport is not None:
78
self._base_curl = from_transport._base_curl
79
self._range_curl = from_transport._range_curl
81
mutter('using pycurl %s' % pycurl.version)
82
self._base_curl = pycurl.Curl()
83
self._range_curl = pycurl.Curl()
76
85
def should_cache(self):
77
86
"""Return True if the data pulled across should be cached locally.
81
90
def has(self, relpath):
91
"""See Transport.has()"""
92
# We set NO BODY=0 in _get_full, so it should be safe
93
# to re-use the non-range curl object
94
curl = self._base_curl
83
95
abspath = self._real_abspath(relpath)
84
96
curl.setopt(pycurl.URL, abspath)
85
curl.setopt(pycurl.FOLLOWLOCATION, 1) # follow redirect responses
86
97
self._set_curl_options(curl)
87
98
# don't want the body - ie just do a HEAD request
99
# This means "NO BODY" not 'nobody'
88
100
curl.setopt(pycurl.NOBODY, 1)
89
101
self._curl_perform(curl)
90
102
code = curl.getinfo(pycurl.HTTP_CODE)
93
105
elif code in (200, 302): # "ok", "found"
96
self._raise_curl_connection_error(curl)
98
108
self._raise_curl_http_error(curl)
100
def _get(self, relpath, ranges):
110
def _get(self, relpath, ranges, tail_amount=0):
111
# This just switches based on the type of request
112
if ranges is not None or tail_amount not in (0, None):
113
return self._get_ranged(relpath, ranges, tail_amount=tail_amount)
115
return self._get_full(relpath)
117
def _setup_get_request(self, curl, relpath):
118
"""Do the common setup stuff for making a request
120
:param curl: The curl object to place the request on
121
:param relpath: The relative path that we want to get
122
:return: (abspath, data, header)
124
data: file that will be filled with the body
125
header: file that will be filled with the headers
102
127
abspath = self._real_abspath(relpath)
104
128
curl.setopt(pycurl.URL, abspath)
105
129
self._set_curl_options(curl)
106
curl.setopt(pycurl.WRITEFUNCTION, sio.write)
130
# Make sure we do a GET request. versions > 7.14.1 also set the
131
# NO BODY flag, but we'll do it ourselves in case it is an older
107
133
curl.setopt(pycurl.NOBODY, 0)
108
if ranges is not None:
109
assert len(ranges) == 1
110
# multiple ranges not supported yet because we can't decode the
112
curl.setopt(pycurl.RANGE, '%d-%d' % ranges[0])
134
curl.setopt(pycurl.HTTPGET, 1)
138
curl.setopt(pycurl.WRITEFUNCTION, data.write)
139
curl.setopt(pycurl.HEADERFUNCTION, header.write)
141
return abspath, data, header
143
def _get_full(self, relpath):
144
"""Make a request for the entire file"""
145
curl = self._base_curl
146
abspath, data, header = self._setup_get_request(curl, relpath)
113
147
self._curl_perform(curl)
114
149
code = curl.getinfo(pycurl.HTTP_CODE)
116
153
raise NoSuchFile(abspath)
120
elif code == 206 and (ranges is not None):
124
self._raise_curl_connection_error(curl)
126
self._raise_curl_http_error(curl)
155
self._raise_curl_http_error(curl, 'expected 200 or 404 for full response.')
159
def _get_ranged(self, relpath, ranges, tail_amount):
160
"""Make a request for just part of the file."""
161
# We would like to re-use the same curl object for
162
# full requests and partial requests
163
# Documentation says 'Pass in NULL to disable the use of ranges'
164
# None is the closest we have, but at least with pycurl 7.13.1
165
# It raises an 'invalid arguments' response
166
# curl.setopt(pycurl.RANGE, None)
167
# curl.unsetopt(pycurl.RANGE) doesn't support the RANGE parameter
168
# So instead we hack around this by using a separate objects
169
curl = self._range_curl
170
abspath, data, header = self._setup_get_request(curl, relpath)
172
curl.setopt(pycurl.RANGE, self.range_header(ranges, tail_amount))
173
self._curl_perform(curl)
176
code = curl.getinfo(pycurl.HTTP_CODE)
177
# mutter('url: %s header:\n%s', abspath, header.getvalue())
178
headers = _extract_headers(header.getvalue(), abspath)
179
# handle_response will raise NoSuchFile, etc based on the response code
180
return code, response.handle_response(abspath, code, headers, data)
128
182
def _raise_curl_connection_error(self, curl):
129
183
curl_errno = curl.getinfo(pycurl.OS_ERRNO)
131
185
raise ConnectionError('curl connection error (%s) on %s'
132
186
% (os.strerror(curl_errno), url))
134
def _raise_curl_http_error(self, curl):
188
def _raise_curl_http_error(self, curl, info=None):
135
189
code = curl.getinfo(pycurl.HTTP_CODE)
136
190
url = curl.getinfo(pycurl.EFFECTIVE_URL)
137
raise TransportError('http error %d probing for %s' %
195
raise errors.InvalidHttpResponse(url, 'Unable to handle http code %d%s'
140
198
def _set_curl_options(self, curl):
141
199
"""Set options for all requests"""
143
201
# to force it to always retrieve. so just turn off the default Pragma
144
202
# provided by Curl.
145
203
headers = ['Cache-control: max-age=0',
205
'Connection: Keep-Alive']
147
206
## curl.setopt(pycurl.VERBOSE, 1)
148
207
# TODO: maybe include a summary of the pycurl version
149
ua_str = 'bzr/%s (pycurl)' % (bzrlib.__version__)
208
ua_str = 'bzr/%s (pycurl)' % (bzrlib.__version__,)
150
209
curl.setopt(pycurl.USERAGENT, ua_str)
151
210
curl.setopt(pycurl.HTTPHEADER, headers)
152
211
curl.setopt(pycurl.FOLLOWLOCATION, 1) # follow redirect responses
158
217
except pycurl.error, e:
159
218
# XXX: There seem to be no symbolic constants for these values.
161
# couldn't resolve host
162
raise NoSuchFile(curl.getinfo(pycurl.EFFECTIVE_URL), e)
219
url = curl.getinfo(pycurl.EFFECTIVE_URL)
220
mutter('got pycurl error: %s, %s, %s, url: %s ',
221
e[0], _pycurl_errors.errorcode[e[0]], e, url)
222
if e[0] in (_pycurl_errors.CURLE_COULDNT_RESOLVE_HOST,
223
_pycurl_errors.CURLE_COULDNT_CONNECT):
224
self._raise_curl_connection_error(curl)
225
# jam 20060713 The code didn't use to re-raise the exception here
226
# but that seemed bogus
165
230
class HttpServer_PyCurl(HttpServer):