1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
1 |
# Copyright (C) 2006 Canonical Ltd
|
1540.3.18
by Martin Pool
Style review fixes (thanks robertc) |
2 |
#
|
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
3 |
# This program is free software; you can redistribute it and/or modify
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
1540.3.18
by Martin Pool
Style review fixes (thanks robertc) |
7 |
#
|
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
8 |
# This program is distributed in the hope that it will be useful,
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
1540.3.18
by Martin Pool
Style review fixes (thanks robertc) |
12 |
#
|
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
13 |
# You should have received a copy of the GNU General Public License
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
16 |
||
17 |
"""http/https transport using pycurl"""
|
|
18 |
||
19 |
# TODO: test reporting of http errors
|
|
20 |
||
1616.1.9
by Martin Pool
Set Cache-control: max-age=0 and Pragma: no-cache |
21 |
# TODO: Transport option to control caching of particular requests; broadly we
|
22 |
# would want to offer "caching allowed" or "must revalidate", depending on
|
|
23 |
# whether we expect a particular file will be modified after it's committed.
|
|
24 |
# It's probably safer to just always revalidate. mbp 20060321
|
|
25 |
||
1612.1.1
by Martin Pool
Raise errors correctly on pycurl connection failure |
26 |
import os |
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
27 |
from StringIO import StringIO |
1540.3.5
by Martin Pool
Raise exception if unicode is passed to transport; formatting fixes |
28 |
|
1540.3.15
by Martin Pool
[merge] large merge to sync with bzr.dev |
29 |
import bzrlib |
1540.3.5
by Martin Pool
Raise exception if unicode is passed to transport; formatting fixes |
30 |
from bzrlib.errors import (TransportNotPossible, NoSuchFile, |
1540.3.7
by Martin Pool
Prepare to select a transport depending on what dependencies can be satisfied. |
31 |
TransportError, ConnectionError, |
32 |
DependencyNotPresent) |
|
1540.3.18
by Martin Pool
Style review fixes (thanks robertc) |
33 |
from bzrlib.trace import mutter |
1636.1.2
by Robert Collins
More review fixen to the relpath at '/' fixes. |
34 |
from bzrlib.transport import register_urlparse_netloc_protocol |
1540.3.10
by Martin Pool
[broken] keep hooking pycurl into test framework |
35 |
from bzrlib.transport.http import HttpTransportBase, extract_auth, HttpServer |
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
36 |
|
1540.3.7
by Martin Pool
Prepare to select a transport depending on what dependencies can be satisfied. |
37 |
try: |
38 |
import pycurl |
|
39 |
except ImportError, e: |
|
40 |
mutter("failed to import pycurl: %s", e) |
|
41 |
raise DependencyNotPresent('pycurl', e) |
|
42 |
||
1684.1.5
by Martin Pool
(patch) check that pycurl will actuall initialize as well as load (Alexander) |
43 |
try: |
44 |
# see if we can actually initialize PyCurl - sometimes it will load but
|
|
45 |
# fail to start up due to this bug:
|
|
46 |
#
|
|
47 |
# 32. (At least on Windows) If libcurl is built with c-ares and there's
|
|
48 |
# no DNS server configured in the system, the ares_init() call fails and
|
|
49 |
# thus curl_easy_init() fails as well. This causes weird effects for
|
|
50 |
# people who use numerical IP addresses only.
|
|
51 |
#
|
|
52 |
# reported by Alexander Belchenko, 2006-04-26
|
|
53 |
pycurl.Curl() |
|
54 |
except pycurl.error, e: |
|
55 |
mutter("failed to initialize pycurl: %s", e) |
|
56 |
raise DependencyNotPresent('pycurl', e) |
|
57 |
||
1540.3.7
by Martin Pool
Prepare to select a transport depending on what dependencies can be satisfied. |
58 |
|
1636.1.2
by Robert Collins
More review fixen to the relpath at '/' fixes. |
59 |
register_urlparse_netloc_protocol('http+pycurl') |
1636.1.1
by Robert Collins
Fix calling relpath() and abspath() on transports at their root. |
60 |
|
61 |
||
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
62 |
class PyCurlTransport(HttpTransportBase): |
1540.3.3
by Martin Pool
Review updates of pycurl transport |
63 |
"""http client transport using pycurl
|
64 |
||
65 |
PyCurl is a Python binding to the C "curl" multiprotocol client.
|
|
66 |
||
67 |
This transport can be significantly faster than the builtin Python client.
|
|
68 |
Advantages include: DNS caching, connection keepalive, and ability to
|
|
69 |
set headers to allow caching.
|
|
70 |
"""
|
|
71 |
||
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
72 |
def __init__(self, base): |
73 |
super(PyCurlTransport, self).__init__(base) |
|
1540.3.7
by Martin Pool
Prepare to select a transport depending on what dependencies can be satisfied. |
74 |
mutter('using pycurl %s' % pycurl.version) |
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
75 |
|
1540.3.10
by Martin Pool
[broken] keep hooking pycurl into test framework |
76 |
def should_cache(self): |
77 |
"""Return True if the data pulled across should be cached locally.
|
|
78 |
"""
|
|
79 |
return True |
|
80 |
||
1540.3.3
by Martin Pool
Review updates of pycurl transport |
81 |
def has(self, relpath): |
1540.3.14
by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object. |
82 |
curl = pycurl.Curl() |
1540.3.24
by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl. |
83 |
abspath = self._real_abspath(relpath) |
1540.3.14
by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object. |
84 |
curl.setopt(pycurl.URL, abspath) |
85 |
curl.setopt(pycurl.FOLLOWLOCATION, 1) # follow redirect responses |
|
86 |
self._set_curl_options(curl) |
|
1540.3.3
by Martin Pool
Review updates of pycurl transport |
87 |
# don't want the body - ie just do a HEAD request
|
1540.3.14
by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object. |
88 |
curl.setopt(pycurl.NOBODY, 1) |
89 |
self._curl_perform(curl) |
|
90 |
code = curl.getinfo(pycurl.HTTP_CODE) |
|
91 |
if code == 404: # not found |
|
92 |
return False |
|
93 |
elif code in (200, 302): # "ok", "found" |
|
94 |
return True |
|
1612.1.1
by Martin Pool
Raise errors correctly on pycurl connection failure |
95 |
elif code == 0: |
96 |
self._raise_curl_connection_error(curl) |
|
1540.3.14
by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object. |
97 |
else: |
1612.1.1
by Martin Pool
Raise errors correctly on pycurl connection failure |
98 |
self._raise_curl_http_error(curl) |
1540.3.3
by Martin Pool
Review updates of pycurl transport |
99 |
|
1540.3.26
by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet |
100 |
def _get(self, relpath, ranges): |
1540.3.14
by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object. |
101 |
curl = pycurl.Curl() |
1540.3.24
by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl. |
102 |
abspath = self._real_abspath(relpath) |
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
103 |
sio = StringIO() |
1540.3.14
by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object. |
104 |
curl.setopt(pycurl.URL, abspath) |
105 |
self._set_curl_options(curl) |
|
106 |
curl.setopt(pycurl.WRITEFUNCTION, sio.write) |
|
107 |
curl.setopt(pycurl.NOBODY, 0) |
|
1540.3.27
by Martin Pool
Integrate http range support for pycurl |
108 |
if ranges is not None: |
109 |
assert len(ranges) == 1 |
|
110 |
# multiple ranges not supported yet because we can't decode the
|
|
111 |
# response
|
|
112 |
curl.setopt(pycurl.RANGE, '%d-%d' % ranges[0]) |
|
1540.3.14
by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object. |
113 |
self._curl_perform(curl) |
114 |
code = curl.getinfo(pycurl.HTTP_CODE) |
|
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
115 |
if code == 404: |
116 |
raise NoSuchFile(abspath) |
|
1540.3.13
by Martin Pool
Curl should follow http redirects, the same as urllib |
117 |
elif code == 200: |
118 |
sio.seek(0) |
|
1540.3.27
by Martin Pool
Integrate http range support for pycurl |
119 |
return code, sio |
120 |
elif code == 206 and (ranges is not None): |
|
121 |
sio.seek(0) |
|
122 |
return code, sio |
|
1612.1.1
by Martin Pool
Raise errors correctly on pycurl connection failure |
123 |
elif code == 0: |
124 |
self._raise_curl_connection_error(curl) |
|
1540.3.13
by Martin Pool
Curl should follow http redirects, the same as urllib |
125 |
else: |
1612.1.1
by Martin Pool
Raise errors correctly on pycurl connection failure |
126 |
self._raise_curl_http_error(curl) |
127 |
||
128 |
def _raise_curl_connection_error(self, curl): |
|
129 |
curl_errno = curl.getinfo(pycurl.OS_ERRNO) |
|
130 |
url = curl.getinfo(pycurl.EFFECTIVE_URL) |
|
131 |
raise ConnectionError('curl connection error (%s) on %s' |
|
132 |
% (os.strerror(curl_errno), url)) |
|
133 |
||
134 |
def _raise_curl_http_error(self, curl): |
|
135 |
code = curl.getinfo(pycurl.HTTP_CODE) |
|
136 |
url = curl.getinfo(pycurl.EFFECTIVE_URL) |
|
137 |
raise TransportError('http error %d probing for %s' % |
|
138 |
(code, url)) |
|
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
139 |
|
1540.3.13
by Martin Pool
Curl should follow http redirects, the same as urllib |
140 |
def _set_curl_options(self, curl): |
141 |
"""Set options for all requests"""
|
|
1540.3.11
by Martin Pool
doc |
142 |
# There's no way in http/1.0 to say "must revalidate"; we don't want
|
143 |
# to force it to always retrieve. so just turn off the default Pragma
|
|
144 |
# provided by Curl.
|
|
1616.1.9
by Martin Pool
Set Cache-control: max-age=0 and Pragma: no-cache |
145 |
headers = ['Cache-control: max-age=0', |
146 |
'Pragma: no-cache'] |
|
1540.3.14
by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object. |
147 |
## curl.setopt(pycurl.VERBOSE, 1)
|
1616.1.9
by Martin Pool
Set Cache-control: max-age=0 and Pragma: no-cache |
148 |
# TODO: maybe include a summary of the pycurl version
|
149 |
ua_str = 'bzr/%s (pycurl)' % (bzrlib.__version__) |
|
1540.3.15
by Martin Pool
[merge] large merge to sync with bzr.dev |
150 |
curl.setopt(pycurl.USERAGENT, ua_str) |
1540.3.13
by Martin Pool
Curl should follow http redirects, the same as urllib |
151 |
curl.setopt(pycurl.HTTPHEADER, headers) |
152 |
curl.setopt(pycurl.FOLLOWLOCATION, 1) # follow redirect responses |
|
1540.3.3
by Martin Pool
Review updates of pycurl transport |
153 |
|
1540.3.14
by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object. |
154 |
def _curl_perform(self, curl): |
1540.3.3
by Martin Pool
Review updates of pycurl transport |
155 |
"""Perform curl operation and translate exceptions."""
|
156 |
try: |
|
1540.3.14
by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object. |
157 |
curl.perform() |
1540.3.3
by Martin Pool
Review updates of pycurl transport |
158 |
except pycurl.error, e: |
159 |
# XXX: There seem to be no symbolic constants for these values.
|
|
160 |
if e[0] == 6: |
|
161 |
# couldn't resolve host
|
|
1540.3.14
by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object. |
162 |
raise NoSuchFile(curl.getinfo(pycurl.EFFECTIVE_URL), e) |
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
163 |
|
1540.3.10
by Martin Pool
[broken] keep hooking pycurl into test framework |
164 |
|
1540.3.24
by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl. |
165 |
class HttpServer_PyCurl(HttpServer): |
166 |
"""Subclass of HttpServer that gives http+pycurl urls.
|
|
167 |
||
168 |
This is for use in testing: connections to this server will always go
|
|
169 |
through pycurl where possible.
|
|
170 |
"""
|
|
171 |
||
172 |
# urls returned by this server should require the pycurl client impl
|
|
173 |
_url_protocol = 'http+pycurl' |
|
174 |
||
175 |
||
1540.3.10
by Martin Pool
[broken] keep hooking pycurl into test framework |
176 |
def get_test_permutations(): |
177 |
"""Return the permutations to be used in testing."""
|
|
1540.3.24
by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl. |
178 |
return [(PyCurlTransport, HttpServer_PyCurl), |
1540.3.10
by Martin Pool
[broken] keep hooking pycurl into test framework |
179 |
]
|