1
# Copyright (C) 2006 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""http/https transport using pycurl"""
19
# TODO: test reporting of http errors
21
# TODO: Transport option to control caching of particular requests; broadly we
22
# would want to offer "caching allowed" or "must revalidate", depending on
23
# whether we expect a particular file will be modified after it's committed.
24
# It's probably safer to just always revalidate. mbp 20060321
27
from StringIO import StringIO
30
from bzrlib.errors import (TransportNotPossible, NoSuchFile,
31
TransportError, ConnectionError,
33
from bzrlib.trace import mutter
34
from bzrlib.transport import register_urlparse_netloc_protocol
35
from bzrlib.transport.http import HttpTransportBase, extract_auth, HttpServer
39
except ImportError, e:
40
mutter("failed to import pycurl: %s", e)
41
raise DependencyNotPresent('pycurl', e)
44
register_urlparse_netloc_protocol('http+pycurl')
47
class PyCurlTransport(HttpTransportBase):
48
"""http client transport using pycurl
50
PyCurl is a Python binding to the C "curl" multiprotocol client.
52
This transport can be significantly faster than the builtin Python client.
53
Advantages include: DNS caching, connection keepalive, and ability to
54
set headers to allow caching.
57
def __init__(self, base):
58
super(PyCurlTransport, self).__init__(base)
59
mutter('using pycurl %s' % pycurl.version)
61
def should_cache(self):
62
"""Return True if the data pulled across should be cached locally.
66
def has(self, relpath):
68
abspath = self._real_abspath(relpath)
69
curl.setopt(pycurl.URL, abspath)
70
curl.setopt(pycurl.FOLLOWLOCATION, 1) # follow redirect responses
71
self._set_curl_options(curl)
72
# don't want the body - ie just do a HEAD request
73
curl.setopt(pycurl.NOBODY, 1)
74
self._curl_perform(curl)
75
code = curl.getinfo(pycurl.HTTP_CODE)
76
if code == 404: # not found
78
elif code in (200, 302): # "ok", "found"
81
self._raise_curl_connection_error(curl)
83
self._raise_curl_http_error(curl)
85
def _get(self, relpath, ranges):
87
abspath = self._real_abspath(relpath)
89
curl.setopt(pycurl.URL, abspath)
90
self._set_curl_options(curl)
91
curl.setopt(pycurl.WRITEFUNCTION, sio.write)
92
curl.setopt(pycurl.NOBODY, 0)
93
if ranges is not None:
94
assert len(ranges) == 1
95
# multiple ranges not supported yet because we can't decode the
97
curl.setopt(pycurl.RANGE, '%d-%d' % ranges[0])
98
self._curl_perform(curl)
99
code = curl.getinfo(pycurl.HTTP_CODE)
101
raise NoSuchFile(abspath)
105
elif code == 206 and (ranges is not None):
109
self._raise_curl_connection_error(curl)
111
self._raise_curl_http_error(curl)
113
def _raise_curl_connection_error(self, curl):
114
curl_errno = curl.getinfo(pycurl.OS_ERRNO)
115
url = curl.getinfo(pycurl.EFFECTIVE_URL)
116
raise ConnectionError('curl connection error (%s) on %s'
117
% (os.strerror(curl_errno), url))
119
def _raise_curl_http_error(self, curl):
120
code = curl.getinfo(pycurl.HTTP_CODE)
121
url = curl.getinfo(pycurl.EFFECTIVE_URL)
122
raise TransportError('http error %d probing for %s' %
125
def _set_curl_options(self, curl):
126
"""Set options for all requests"""
127
# There's no way in http/1.0 to say "must revalidate"; we don't want
128
# to force it to always retrieve. so just turn off the default Pragma
130
headers = ['Cache-control: max-age=0',
132
## curl.setopt(pycurl.VERBOSE, 1)
133
# TODO: maybe include a summary of the pycurl version
134
ua_str = 'bzr/%s (pycurl)' % (bzrlib.__version__)
135
curl.setopt(pycurl.USERAGENT, ua_str)
136
curl.setopt(pycurl.HTTPHEADER, headers)
137
curl.setopt(pycurl.FOLLOWLOCATION, 1) # follow redirect responses
139
def _curl_perform(self, curl):
140
"""Perform curl operation and translate exceptions."""
143
except pycurl.error, e:
144
# XXX: There seem to be no symbolic constants for these values.
146
# couldn't resolve host
147
raise NoSuchFile(curl.getinfo(pycurl.EFFECTIVE_URL), e)
150
class HttpServer_PyCurl(HttpServer):
151
"""Subclass of HttpServer that gives http+pycurl urls.
153
This is for use in testing: connections to this server will always go
154
through pycurl where possible.
157
# urls returned by this server should require the pycurl client impl
158
_url_protocol = 'http+pycurl'
161
def get_test_permutations():
162
"""Return the permutations to be used in testing."""
163
return [(PyCurlTransport, HttpServer_PyCurl),