5247.1.4
by Vincent Ladeuil
Merge cleanup into first-try |
1 |
# Copyright (C) 2006-2010 Canonical Ltd
|
1540.3.18
by Martin Pool
Style review fixes (thanks robertc) |
2 |
#
|
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
3 |
# This program is free software; you can redistribute it and/or modify
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
1540.3.18
by Martin Pool
Style review fixes (thanks robertc) |
7 |
#
|
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
8 |
# This program is distributed in the hope that it will be useful,
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
1540.3.18
by Martin Pool
Style review fixes (thanks robertc) |
12 |
#
|
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
13 |
# You should have received a copy of the GNU General Public License
|
14 |
# along with this program; if not, write to the Free Software
|
|
4183.7.1
by Sabin Iacob
update FSF mailing address |
15 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
16 |
|
17 |
"""http/https transport using pycurl"""
|
|
18 |
||
19 |
# TODO: test reporting of http errors
|
|
1887.1.1
by Adeodato Simó
Do not separate paragraphs in the copyright statement with blank lines, |
20 |
#
|
1616.1.9
by Martin Pool
Set Cache-control: max-age=0 and Pragma: no-cache |
21 |
# TODO: Transport option to control caching of particular requests; broadly we
|
22 |
# would want to offer "caching allowed" or "must revalidate", depending on
|
|
23 |
# whether we expect a particular file will be modified after it's committed.
|
|
24 |
# It's probably safer to just always revalidate. mbp 20060321
|
|
25 |
||
2164.2.16
by Vincent Ladeuil
Add tests. |
26 |
# TODO: Some refactoring could be done to avoid the strange idiom
|
27 |
# used to capture data and headers while setting up the request
|
|
28 |
# (and having to pass 'header' to _curl_perform to handle
|
|
29 |
# redirections) . This could be achieved by creating a
|
|
30 |
# specialized Curl object and returning code, headers and data
|
|
31 |
# from _curl_perform. Not done because we may deprecate pycurl in the
|
|
32 |
# future -- vila 20070212
|
|
33 |
||
1786.1.42
by John Arbash Meinel
Update _extract_headers, make it less generic, and non recursive. |
34 |
from cStringIO import StringIO |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
35 |
import httplib |
1540.3.5
by Martin Pool
Raise exception if unicode is passed to transport; formatting fixes |
36 |
|
2004.1.25
by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :) |
37 |
from bzrlib import ( |
3052.3.3
by Vincent Ladeuil
Add -Dhttp support. |
38 |
debug, |
2004.1.25
by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :) |
39 |
errors, |
5957.2.3
by Vincent Ladeuil
Mask credentials in the -Dhttp logging |
40 |
trace, |
2004.1.25
by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :) |
41 |
)
|
1540.3.15
by Martin Pool
[merge] large merge to sync with bzr.dev |
42 |
import bzrlib |
2004.1.25
by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :) |
43 |
from bzrlib.transport.http import ( |
2298.5.1
by Alexander Belchenko
Bugfix #82086: Searching location of CA bundle for PyCurl in env variable (CURL_CA_BUNDLE), and on win32 along the PATH |
44 |
ca_bundle, |
2004.1.25
by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :) |
45 |
HttpTransportBase, |
46 |
response, |
|
4912.2.2
by Martin Pool
Include rough unhtml in pycurl error messages |
47 |
unhtml_roughly, |
2004.1.25
by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :) |
48 |
)
|
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
49 |
|
1540.3.7
by Martin Pool
Prepare to select a transport depending on what dependencies can be satisfied. |
50 |
try: |
51 |
import pycurl |
|
52 |
except ImportError, e: |
|
5957.2.3
by Vincent Ladeuil
Mask credentials in the -Dhttp logging |
53 |
trace.mutter("failed to import pycurl: %s", e) |
3052.3.3
by Vincent Ladeuil
Add -Dhttp support. |
54 |
raise errors.DependencyNotPresent('pycurl', e) |
1540.3.7
by Martin Pool
Prepare to select a transport depending on what dependencies can be satisfied. |
55 |
|
1684.1.5
by Martin Pool
(patch) check that pycurl will actuall initialize as well as load (Alexander) |
56 |
try: |
57 |
# see if we can actually initialize PyCurl - sometimes it will load but
|
|
58 |
# fail to start up due to this bug:
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
59 |
#
|
1684.1.5
by Martin Pool
(patch) check that pycurl will actuall initialize as well as load (Alexander) |
60 |
# 32. (At least on Windows) If libcurl is built with c-ares and there's
|
61 |
# no DNS server configured in the system, the ares_init() call fails and
|
|
62 |
# thus curl_easy_init() fails as well. This causes weird effects for
|
|
63 |
# people who use numerical IP addresses only.
|
|
64 |
#
|
|
65 |
# reported by Alexander Belchenko, 2006-04-26
|
|
66 |
pycurl.Curl() |
|
67 |
except pycurl.error, e: |
|
5957.2.3
by Vincent Ladeuil
Mask credentials in the -Dhttp logging |
68 |
trace.mutter("failed to initialize pycurl: %s", e) |
3052.3.3
by Vincent Ladeuil
Add -Dhttp support. |
69 |
raise errors.DependencyNotPresent('pycurl', e) |
1684.1.5
by Martin Pool
(patch) check that pycurl will actuall initialize as well as load (Alexander) |
70 |
|
1540.3.7
by Martin Pool
Prepare to select a transport depending on what dependencies can be satisfied. |
71 |
|
2872.6.1
by Vincent Ladeuil
Fix bug #147530 by enabling more robust error code definitions. |
72 |
|
73 |
||
74 |
def _get_pycurl_errcode(symbol, default): |
|
75 |
"""
|
|
76 |
Returns the numerical error code for a symbol defined by pycurl.
|
|
77 |
||
78 |
Different pycurl implementations define different symbols for error
|
|
79 |
codes. Old versions never define some symbols (wether they can return the
|
|
80 |
corresponding error code or not). The following addresses the problem by
|
|
81 |
defining the symbols we care about. Note: this allows to define symbols
|
|
82 |
for errors that older versions will never return, which is fine.
|
|
83 |
"""
|
|
84 |
return pycurl.__dict__.get(symbol, default) |
|
85 |
||
86 |
CURLE_COULDNT_CONNECT = _get_pycurl_errcode('E_COULDNT_CONNECT', 7) |
|
87 |
CURLE_COULDNT_RESOLVE_HOST = _get_pycurl_errcode('E_COULDNT_RESOLVE_HOST', 6) |
|
88 |
CURLE_COULDNT_RESOLVE_PROXY = _get_pycurl_errcode('E_COULDNT_RESOLVE_PROXY', 5) |
|
89 |
CURLE_GOT_NOTHING = _get_pycurl_errcode('E_GOT_NOTHING', 52) |
|
90 |
CURLE_PARTIAL_FILE = _get_pycurl_errcode('E_PARTIAL_FILE', 18) |
|
3651.1.1
by Vincent Ladeuil
Fix bug #225020 by catching CURLE_SEND_ERROR error. |
91 |
CURLE_SEND_ERROR = _get_pycurl_errcode('E_SEND_ERROR', 55) |
4628.1.1
by Vincent Ladeuil
Fix test failure by catching an error raised by newer pycurls. |
92 |
CURLE_RECV_ERROR = _get_pycurl_errcode('E_RECV_ERROR', 56) |
2929.3.19
by Vincent Ladeuil
Fix 1.1 related bugs in HTTP server, add HTTPS passing tests (by temporarily disabling pycurl certificate verification). |
93 |
CURLE_SSL_CACERT = _get_pycurl_errcode('E_SSL_CACERT', 60) |
94 |
CURLE_SSL_CACERT_BADFILE = _get_pycurl_errcode('E_SSL_CACERT_BADFILE', 77) |
|
2872.6.1
by Vincent Ladeuil
Fix bug #147530 by enabling more robust error code definitions. |
95 |
|
96 |
||
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
97 |
class PyCurlTransport(HttpTransportBase): |
1540.3.3
by Martin Pool
Review updates of pycurl transport |
98 |
"""http client transport using pycurl
|
99 |
||
100 |
PyCurl is a Python binding to the C "curl" multiprotocol client.
|
|
101 |
||
2004.1.30
by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling. |
102 |
This transport can be significantly faster than the builtin
|
103 |
Python client. Advantages include: DNS caching.
|
|
1540.3.3
by Martin Pool
Review updates of pycurl transport |
104 |
"""
|
105 |
||
2485.8.59
by Vincent Ladeuil
Update from review comments. |
106 |
def __init__(self, base, _from_transport=None): |
3878.4.6
by Vincent Ladeuil
Fix bug #270863 by preserving 'bzr+http[s]' decorator. |
107 |
super(PyCurlTransport, self).__init__(base, 'pycurl', |
108 |
_from_transport=_from_transport) |
|
3878.4.2
by Vincent Ladeuil
Fix bug #265070 by providing a finer sieve for accepted redirections. |
109 |
if self._unqualified_scheme == 'https': |
2294.3.1
by Vincent Ladeuil
Fix #85305 by issuing an exception instead of a traceback. |
110 |
# Check availability of https into pycurl supported
|
111 |
# protocols
|
|
112 |
supported = pycurl.version_info()[8] |
|
113 |
if 'https' not in supported: |
|
3052.3.3
by Vincent Ladeuil
Add -Dhttp support. |
114 |
raise errors.DependencyNotPresent('pycurl', 'no https support') |
2298.5.1
by Alexander Belchenko
Bugfix #82086: Searching location of CA bundle for PyCurl in env variable (CURL_CA_BUNDLE), and on win32 along the PATH |
115 |
self.cabundle = ca_bundle.get_ca_path() |
2485.8.41
by Vincent Ladeuil
Finish http refactoring. Test suite passing. |
116 |
|
117 |
def _get_curl(self): |
|
118 |
connection = self._get_connection() |
|
119 |
if connection is None: |
|
120 |
# First connection ever. There is no credentials for pycurl, either
|
|
121 |
# the password was embedded in the URL or it's not needed. The
|
|
122 |
# connection for pycurl is just the Curl object, it will not
|
|
2485.8.43
by Vincent Ladeuil
Cleaning. |
123 |
# connect to the http server until the first request (which had
|
124 |
# just called us).
|
|
2485.8.41
by Vincent Ladeuil
Finish http refactoring. Test suite passing. |
125 |
connection = pycurl.Curl() |
3133.1.2
by Vincent Ladeuil
Fix #177643 by making pycurl handle url-embedded credentials again. |
126 |
# First request, initialize credentials.
|
127 |
auth = self._create_auth() |
|
128 |
# Proxy handling is out of reach, so we punt
|
|
129 |
self._set_connection(connection, auth) |
|
2485.8.41
by Vincent Ladeuil
Finish http refactoring. Test suite passing. |
130 |
return connection |
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
131 |
|
5247.2.12
by Vincent Ladeuil
Ensure that all transports close their underlying connection. |
132 |
def disconnect(self): |
133 |
connection = self._get_connection() |
|
134 |
if connection is not None: |
|
135 |
connection.close() |
|
136 |
||
1540.3.3
by Martin Pool
Review updates of pycurl transport |
137 |
def has(self, relpath): |
1786.1.32
by John Arbash Meinel
cleanup pass, allow pycurl connections to be shared between transports. |
138 |
"""See Transport.has()"""
|
139 |
# We set NO BODY=0 in _get_full, so it should be safe
|
|
140 |
# to re-use the non-range curl object
|
|
2485.8.41
by Vincent Ladeuil
Finish http refactoring. Test suite passing. |
141 |
curl = self._get_curl() |
2485.8.25
by Vincent Ladeuil
Separate abspath from _remote_path, the intents are different. |
142 |
abspath = self._remote_path(relpath) |
1540.3.14
by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object. |
143 |
curl.setopt(pycurl.URL, abspath) |
144 |
self._set_curl_options(curl) |
|
2018.2.28
by Andrew Bennetts
Changes in response to review: re-use _base_curl, rather than keeping a seperate _post_curl object; add docstring to test_http.RecordingServer, set is_user_error on some new exceptions. |
145 |
curl.setopt(pycurl.HTTPGET, 1) |
1540.3.3
by Martin Pool
Review updates of pycurl transport |
146 |
# don't want the body - ie just do a HEAD request
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
147 |
# This means "NO BODY" not 'nobody'
|
1540.3.14
by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object. |
148 |
curl.setopt(pycurl.NOBODY, 1) |
2164.2.16
by Vincent Ladeuil
Add tests. |
149 |
# But we need headers to handle redirections
|
150 |
header = StringIO() |
|
151 |
curl.setopt(pycurl.HEADERFUNCTION, header.write) |
|
2004.1.16
by v.ladeuil+lp at free
Add tests against erroneous http status lines. |
152 |
# In some erroneous cases, pycurl will emit text on
|
153 |
# stdout if we don't catch it (see InvalidStatus tests
|
|
154 |
# for one such occurrence).
|
|
155 |
blackhole = StringIO() |
|
156 |
curl.setopt(pycurl.WRITEFUNCTION, blackhole.write) |
|
2164.2.16
by Vincent Ladeuil
Add tests. |
157 |
self._curl_perform(curl, header) |
1540.3.14
by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object. |
158 |
code = curl.getinfo(pycurl.HTTP_CODE) |
159 |
if code == 404: # not found |
|
160 |
return False |
|
2164.2.16
by Vincent Ladeuil
Add tests. |
161 |
elif code == 200: # "ok" |
1540.3.14
by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object. |
162 |
return True |
163 |
else: |
|
1612.1.1
by Martin Pool
Raise errors correctly on pycurl connection failure |
164 |
self._raise_curl_http_error(curl) |
2000.3.1
by v.ladeuil+lp at free
Better connection sharing by using only one curl object. |
165 |
|
2520.2.1
by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports. |
166 |
def _get(self, relpath, offsets, tail_amount=0): |
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
167 |
# This just switches based on the type of request
|
2520.2.1
by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports. |
168 |
if offsets is not None or tail_amount not in (0, None): |
169 |
return self._get_ranged(relpath, offsets, tail_amount=tail_amount) |
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
170 |
else: |
2164.2.5
by v.ladeuil+lp at free
Simpler implementation using inspect. 'hints' is a kwargs. |
171 |
return self._get_full(relpath) |
2000.3.1
by v.ladeuil+lp at free
Better connection sharing by using only one curl object. |
172 |
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
173 |
def _setup_get_request(self, curl, relpath): |
2018.2.6
by Andrew Bennetts
HTTP client starting to work (pycurl for the moment). |
174 |
# Make sure we do a GET request. versions > 7.14.1 also set the
|
175 |
# NO BODY flag, but we'll do it ourselves in case it is an older
|
|
176 |
# pycurl version
|
|
177 |
curl.setopt(pycurl.NOBODY, 0) |
|
178 |
curl.setopt(pycurl.HTTPGET, 1) |
|
179 |
return self._setup_request(curl, relpath) |
|
180 |
||
181 |
def _setup_request(self, curl, relpath): |
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
182 |
"""Do the common setup stuff for making a request
|
183 |
||
184 |
:param curl: The curl object to place the request on
|
|
185 |
:param relpath: The relative path that we want to get
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
186 |
:return: (abspath, data, header)
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
187 |
abspath: full url
|
188 |
data: file that will be filled with the body
|
|
189 |
header: file that will be filled with the headers
|
|
190 |
"""
|
|
2485.8.25
by Vincent Ladeuil
Separate abspath from _remote_path, the intents are different. |
191 |
abspath = self._remote_path(relpath) |
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
192 |
curl.setopt(pycurl.URL, abspath) |
193 |
self._set_curl_options(curl) |
|
194 |
||
195 |
data = StringIO() |
|
196 |
header = StringIO() |
|
197 |
curl.setopt(pycurl.WRITEFUNCTION, data.write) |
|
198 |
curl.setopt(pycurl.HEADERFUNCTION, header.write) |
|
199 |
||
200 |
return abspath, data, header |
|
201 |
||
2164.2.5
by v.ladeuil+lp at free
Simpler implementation using inspect. 'hints' is a kwargs. |
202 |
def _get_full(self, relpath): |
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
203 |
"""Make a request for the entire file"""
|
2485.8.41
by Vincent Ladeuil
Finish http refactoring. Test suite passing. |
204 |
curl = self._get_curl() |
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
205 |
abspath, data, header = self._setup_get_request(curl, relpath) |
2164.2.16
by Vincent Ladeuil
Add tests. |
206 |
self._curl_perform(curl, header) |
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
207 |
|
208 |
code = curl.getinfo(pycurl.HTTP_CODE) |
|
209 |
data.seek(0) |
|
210 |
||
211 |
if code == 404: |
|
3052.3.3
by Vincent Ladeuil
Add -Dhttp support. |
212 |
raise errors.NoSuchFile(abspath) |
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
213 |
if code != 200: |
2004.1.25
by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :) |
214 |
self._raise_curl_http_error( |
215 |
curl, 'expected 200 or 404 for full response.') |
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
216 |
|
217 |
return code, data |
|
218 |
||
3059.2.11
by Vincent Ladeuil
Fix typos mentioned by spiv. |
219 |
# The parent class use 0 to minimize the requests, but since we can't
|
3059.2.7
by Vincent Ladeuil
Allow pycurl users to watch the blinkenlights and fix a bug when ranges are contiguous. |
220 |
# exploit the results as soon as they are received (pycurl limitation) we'd
|
3945.1.6
by Vincent Ladeuil
Fix debug handling for pycurl and implement pycurl http activity |
221 |
# better issue more requests and provide a more responsive UI incurring
|
222 |
# more latency costs.
|
|
3059.2.17
by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges. |
223 |
# If you modify this, think about modifying the comment in http/__init__.py
|
3059.2.7
by Vincent Ladeuil
Allow pycurl users to watch the blinkenlights and fix a bug when ranges are contiguous. |
224 |
# too.
|
3059.2.17
by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges. |
225 |
_get_max_size = 4 * 1024 * 1024 |
3059.2.7
by Vincent Ladeuil
Allow pycurl users to watch the blinkenlights and fix a bug when ranges are contiguous. |
226 |
|
2520.2.1
by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports. |
227 |
def _get_ranged(self, relpath, offsets, tail_amount): |
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
228 |
"""Make a request for just part of the file."""
|
2485.8.41
by Vincent Ladeuil
Finish http refactoring. Test suite passing. |
229 |
curl = self._get_curl() |
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
230 |
abspath, data, header = self._setup_get_request(curl, relpath) |
231 |
||
2520.2.1
by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports. |
232 |
range_header = self._attempted_range_header(offsets, tail_amount) |
2004.1.30
by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling. |
233 |
if range_header is None: |
234 |
# Forget ranges, the server can't handle them
|
|
235 |
return self._get_full(relpath) |
|
236 |
||
2481.3.1
by Vincent Ladeuil
Fix bug #112719 by using the right range header. |
237 |
self._curl_perform(curl, header, ['Range: bytes=%s' % range_header]) |
1786.1.33
by John Arbash Meinel
Cleanup pass #2 |
238 |
data.seek(0) |
239 |
||
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
240 |
code = curl.getinfo(pycurl.HTTP_CODE) |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
241 |
|
242 |
if code == 404: # not found |
|
243 |
raise errors.NoSuchFile(abspath) |
|
244 |
elif code in (400, 416): |
|
245 |
# We don't know which, but one of the ranges we specified was
|
|
246 |
# wrong.
|
|
247 |
raise errors.InvalidHttpRange(abspath, range_header, |
|
248 |
'Server return code %d' |
|
249 |
% curl.getinfo(pycurl.HTTP_CODE)) |
|
3059.2.4
by Vincent Ladeuil
Fix typo so that all tests pass now (after merging bzr.dev to get rid of |
250 |
msg = self._parse_headers(header) |
3945.1.8
by Vincent Ladeuil
Add more tests, fix pycurl double handling, revert previous tracking. |
251 |
return code, response.handle_response(abspath, code, msg, data) |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
252 |
|
253 |
def _parse_headers(self, status_and_headers): |
|
254 |
"""Transform the headers provided by curl into an HTTPMessage"""
|
|
255 |
status_and_headers.seek(0) |
|
256 |
# Ignore status line
|
|
257 |
status_and_headers.readline() |
|
258 |
msg = httplib.HTTPMessage(status_and_headers) |
|
259 |
return msg |
|
1786.1.4
by John Arbash Meinel
Adding HEADERFUNCTION which lets us get any response codes we want. |
260 |
|
2018.2.6
by Andrew Bennetts
HTTP client starting to work (pycurl for the moment). |
261 |
def _post(self, body_bytes): |
3651.1.1
by Vincent Ladeuil
Fix bug #225020 by catching CURLE_SEND_ERROR error. |
262 |
curl = self._get_curl() |
263 |
abspath, data, header = self._setup_request(curl, '.bzr/smart') |
|
264 |
curl.setopt(pycurl.POST, 1) |
|
2018.2.6
by Andrew Bennetts
HTTP client starting to work (pycurl for the moment). |
265 |
fake_file = StringIO(body_bytes) |
266 |
curl.setopt(pycurl.POSTFIELDSIZE, len(body_bytes)) |
|
267 |
curl.setopt(pycurl.READFUNCTION, fake_file.read) |
|
2000.3.4
by v.ladeuil+lp at free
Merge bzr.dev |
268 |
# We override the Expect: header so that pycurl will send the POST
|
269 |
# body immediately.
|
|
3651.1.1
by Vincent Ladeuil
Fix bug #225020 by catching CURLE_SEND_ERROR error. |
270 |
try: |
5514.1.1
by Vincent Ladeuil
Correctly set the Content-Type header when POSTing. |
271 |
self._curl_perform(curl, header, |
272 |
['Expect: ', |
|
273 |
'Content-Type: application/octet-stream']) |
|
3651.1.1
by Vincent Ladeuil
Fix bug #225020 by catching CURLE_SEND_ERROR error. |
274 |
except pycurl.error, e: |
275 |
if e[0] == CURLE_SEND_ERROR: |
|
3651.1.2
by Vincent Ladeuil
Fix bug #225020 by catching the CURLE_SEND_ERROR error more broadly. |
276 |
# When talking to an HTTP/1.0 server, getting a 400+ error code
|
277 |
# triggers a bug in some combinations of curl/kernel in rare
|
|
278 |
# occurrences. Basically, the server closes the connection
|
|
279 |
# after sending the error but the client (having received and
|
|
280 |
# parsed the response) still try to send the request body (see
|
|
281 |
# bug #225020 and its upstream associated bug). Since the
|
|
282 |
# error code and the headers are known to be available, we just
|
|
283 |
# swallow the exception, leaving the upper levels handle the
|
|
284 |
# 400+ error.
|
|
5957.2.3
by Vincent Ladeuil
Mask credentials in the -Dhttp logging |
285 |
trace.mutter('got pycurl error in POST: %s, %s, %s, url: %s ', |
286 |
e[0], e[1], e, abspath) |
|
3651.1.2
by Vincent Ladeuil
Fix bug #225020 by catching the CURLE_SEND_ERROR error more broadly. |
287 |
else: |
288 |
# Re-raise otherwise
|
|
289 |
raise
|
|
2018.2.6
by Andrew Bennetts
HTTP client starting to work (pycurl for the moment). |
290 |
data.seek(0) |
291 |
code = curl.getinfo(pycurl.HTTP_CODE) |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
292 |
msg = self._parse_headers(header) |
3945.1.8
by Vincent Ladeuil
Add more tests, fix pycurl double handling, revert previous tracking. |
293 |
return code, response.handle_response(abspath, code, msg, data) |
3956.2.2
by John Arbash Meinel
Start using report_activity for HTTP (pycurl + urllib) |
294 |
|
2018.2.6
by Andrew Bennetts
HTTP client starting to work (pycurl for the moment). |
295 |
|
4912.2.2
by Martin Pool
Include rough unhtml in pycurl error messages |
296 |
def _raise_curl_http_error(self, curl, info=None, body=None): |
297 |
"""Common curl->bzrlib error translation.
|
|
298 |
||
299 |
Some methods may choose to override this for particular cases.
|
|
300 |
||
301 |
The URL and code are automatically included as appropriate.
|
|
302 |
||
303 |
:param info: Extra information to include in the message.
|
|
5923.1.2
by Vincent Ladeuil
Fix some more prompts to be unicode. |
304 |
|
305 |
:param body: File-like object from which the body of the page can be
|
|
306 |
read.
|
|
4912.2.2
by Martin Pool
Include rough unhtml in pycurl error messages |
307 |
"""
|
1612.1.1
by Martin Pool
Raise errors correctly on pycurl connection failure |
308 |
code = curl.getinfo(pycurl.HTTP_CODE) |
309 |
url = curl.getinfo(pycurl.EFFECTIVE_URL) |
|
4912.2.2
by Martin Pool
Include rough unhtml in pycurl error messages |
310 |
if body is not None: |
311 |
response_body = body.read() |
|
312 |
plaintext_body = unhtml_roughly(response_body) |
|
313 |
else: |
|
314 |
response_body = None |
|
315 |
plaintext_body = '' |
|
2004.1.27
by v.ladeuil+lp at free
Fix bug #57644 by issuing an explicit error message. |
316 |
if code == 403: |
2004.1.34
by v.ladeuil+lp at free
Cosmetic fix for bug #57644. |
317 |
raise errors.TransportError( |
3430.3.1
by Vincent Ladeuil
Fix #230223 by making both http implementations raise appropriate exceptions. |
318 |
'Server refuses to fulfill the request (403 Forbidden)'
|
4912.2.2
by Martin Pool
Include rough unhtml in pycurl error messages |
319 |
' for %s: %s' % (url, plaintext_body)) |
1786.1.40
by John Arbash Meinel
code cleanups from Martin Pool. |
320 |
else: |
2004.1.27
by v.ladeuil+lp at free
Fix bug #57644 by issuing an explicit error message. |
321 |
if info is None: |
322 |
msg = '' |
|
323 |
else: |
|
324 |
msg = ': ' + info |
|
325 |
raise errors.InvalidHttpResponse( |
|
5923.1.2
by Vincent Ladeuil
Fix some more prompts to be unicode. |
326 |
url, 'Unable to handle http code %d%s: %s' |
4912.2.2
by Martin Pool
Include rough unhtml in pycurl error messages |
327 |
% (code, msg, plaintext_body)) |
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
328 |
|
3945.1.6
by Vincent Ladeuil
Fix debug handling for pycurl and implement pycurl http activity |
329 |
def _debug_cb(self, kind, text): |
330 |
if kind in (pycurl.INFOTYPE_HEADER_IN, pycurl.INFOTYPE_DATA_IN, |
|
331 |
pycurl.INFOTYPE_SSL_DATA_IN): |
|
332 |
self._report_activity(len(text), 'read') |
|
333 |
if (kind == pycurl.INFOTYPE_HEADER_IN |
|
334 |
and 'http' in debug.debug_flags): |
|
5957.2.3
by Vincent Ladeuil
Mask credentials in the -Dhttp logging |
335 |
trace.mutter('< %s' % (text.rstrip(),)) |
3945.1.6
by Vincent Ladeuil
Fix debug handling for pycurl and implement pycurl http activity |
336 |
elif kind in (pycurl.INFOTYPE_HEADER_OUT, pycurl.INFOTYPE_DATA_OUT, |
337 |
pycurl.INFOTYPE_SSL_DATA_OUT): |
|
338 |
self._report_activity(len(text), 'write') |
|
339 |
if (kind == pycurl.INFOTYPE_HEADER_OUT |
|
340 |
and 'http' in debug.debug_flags): |
|
5957.2.3
by Vincent Ladeuil
Mask credentials in the -Dhttp logging |
341 |
lines = [] |
342 |
for line in text.rstrip().splitlines(): |
|
343 |
# People are often told to paste -Dhttp output to help
|
|
344 |
# debug. Don't compromise credentials.
|
|
345 |
try: |
|
346 |
header, details = line.split(':', 1) |
|
347 |
except ValueError: |
|
348 |
header = None |
|
349 |
if header in ('Authorization', 'Proxy-Authorization'): |
|
350 |
line = '%s: <masked>' % (header,) |
|
351 |
lines.append(line) |
|
352 |
trace.mutter('> ' + '\n> '.join(lines)) |
|
3945.1.6
by Vincent Ladeuil
Fix debug handling for pycurl and implement pycurl http activity |
353 |
elif kind == pycurl.INFOTYPE_TEXT and 'http' in debug.debug_flags: |
5957.2.3
by Vincent Ladeuil
Mask credentials in the -Dhttp logging |
354 |
trace.mutter('* %s' % text.rstrip()) |
3945.1.6
by Vincent Ladeuil
Fix debug handling for pycurl and implement pycurl http activity |
355 |
|
1540.3.13
by Martin Pool
Curl should follow http redirects, the same as urllib |
356 |
def _set_curl_options(self, curl): |
357 |
"""Set options for all requests"""
|
|
3052.3.3
by Vincent Ladeuil
Add -Dhttp support. |
358 |
ua_str = 'bzr/%s (pycurl: %s)' % (bzrlib.__version__, pycurl.version) |
1540.3.15
by Martin Pool
[merge] large merge to sync with bzr.dev |
359 |
curl.setopt(pycurl.USERAGENT, ua_str) |
3945.1.6
by Vincent Ladeuil
Fix debug handling for pycurl and implement pycurl http activity |
360 |
curl.setopt(pycurl.VERBOSE, 1) |
361 |
curl.setopt(pycurl.DEBUGFUNCTION, self._debug_cb) |
|
2298.5.1
by Alexander Belchenko
Bugfix #82086: Searching location of CA bundle for PyCurl in env variable (CURL_CA_BUNDLE), and on win32 along the PATH |
362 |
if self.cabundle: |
363 |
curl.setopt(pycurl.CAINFO, self.cabundle) |
|
3133.1.2
by Vincent Ladeuil
Fix #177643 by making pycurl handle url-embedded credentials again. |
364 |
# Set accepted auth methods
|
365 |
curl.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_ANY) |
|
366 |
curl.setopt(pycurl.PROXYAUTH, pycurl.HTTPAUTH_ANY) |
|
367 |
auth = self._get_credentials() |
|
368 |
user = auth.get('user', None) |
|
369 |
password = auth.get('password', None) |
|
370 |
userpass = None |
|
371 |
if user is not None: |
|
372 |
userpass = user + ':' |
|
373 |
if password is not None: # '' is a valid password |
|
374 |
userpass += password |
|
375 |
curl.setopt(pycurl.USERPWD, userpass) |
|
1540.3.3
by Martin Pool
Review updates of pycurl transport |
376 |
|
2164.2.16
by Vincent Ladeuil
Add tests. |
377 |
def _curl_perform(self, curl, header, more_headers=[]): |
1540.3.3
by Martin Pool
Review updates of pycurl transport |
378 |
"""Perform curl operation and translate exceptions."""
|
379 |
try: |
|
2000.3.1
by v.ladeuil+lp at free
Better connection sharing by using only one curl object. |
380 |
# There's no way in http/1.0 to say "must
|
381 |
# revalidate"; we don't want to force it to always
|
|
382 |
# retrieve. so just turn off the default Pragma
|
|
383 |
# provided by Curl.
|
|
384 |
headers = ['Cache-control: max-age=0', |
|
385 |
'Pragma: no-cache', |
|
386 |
'Connection: Keep-Alive'] |
|
387 |
curl.setopt(pycurl.HTTPHEADER, headers + more_headers) |
|
1540.3.14
by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object. |
388 |
curl.perform() |
1540.3.3
by Martin Pool
Review updates of pycurl transport |
389 |
except pycurl.error, e: |
1786.1.35
by John Arbash Meinel
For pycurl inverse of (NOBODY,1) is (HTTPGET,1) not (NOBODY,0) |
390 |
url = curl.getinfo(pycurl.EFFECTIVE_URL) |
5957.2.3
by Vincent Ladeuil
Mask credentials in the -Dhttp logging |
391 |
trace.mutter('got pycurl error: %s, %s, %s, url: %s ', |
392 |
e[0], e[1], e, url) |
|
2929.3.19
by Vincent Ladeuil
Fix 1.1 related bugs in HTTP server, add HTTPS passing tests (by temporarily disabling pycurl certificate verification). |
393 |
if e[0] in (CURLE_COULDNT_RESOLVE_HOST, |
394 |
CURLE_COULDNT_RESOLVE_PROXY, |
|
2872.6.1
by Vincent Ladeuil
Fix bug #147530 by enabling more robust error code definitions. |
395 |
CURLE_COULDNT_CONNECT, |
396 |
CURLE_GOT_NOTHING, |
|
2929.3.19
by Vincent Ladeuil
Fix 1.1 related bugs in HTTP server, add HTTPS passing tests (by temporarily disabling pycurl certificate verification). |
397 |
CURLE_SSL_CACERT, |
398 |
CURLE_SSL_CACERT_BADFILE, |
|
399 |
):
|
|
3052.3.3
by Vincent Ladeuil
Add -Dhttp support. |
400 |
raise errors.ConnectionError( |
401 |
'curl connection error (%s)\non %s' % (e[1], url)) |
|
4628.1.2
by Vincent Ladeuil
More complete fix. |
402 |
elif e[0] == CURLE_RECV_ERROR: |
403 |
raise errors.ConnectionReset( |
|
404 |
'curl connection error (%s)\non %s' % (e[1], url)) |
|
2872.6.1
by Vincent Ladeuil
Fix bug #147530 by enabling more robust error code definitions. |
405 |
elif e[0] == CURLE_PARTIAL_FILE: |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
406 |
# Pycurl itself has detected a short read. We do not have all
|
407 |
# the information for the ShortReadvError, but that should be
|
|
408 |
# enough
|
|
2000.3.9
by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :) |
409 |
raise errors.ShortReadvError(url, |
410 |
offset='unknown', length='unknown', |
|
411 |
actual='unknown', |
|
412 |
extra='Server aborted the request') |
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
413 |
raise
|
2164.2.16
by Vincent Ladeuil
Add tests. |
414 |
code = curl.getinfo(pycurl.HTTP_CODE) |
415 |
if code in (301, 302, 303, 307): |
|
416 |
url = curl.getinfo(pycurl.EFFECTIVE_URL) |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
417 |
msg = self._parse_headers(header) |
418 |
redirected_to = msg.getheader('location') |
|
2164.2.16
by Vincent Ladeuil
Add tests. |
419 |
raise errors.RedirectRequested(url, |
420 |
redirected_to, |
|
3878.4.4
by Vincent Ladeuil
Cleanup. |
421 |
is_permanent=(code == 301)) |
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
422 |
|
1540.3.10
by Martin Pool
[broken] keep hooking pycurl into test framework |
423 |
|
424 |
def get_test_permutations(): |
|
425 |
"""Return the permutations to be used in testing."""
|
|
5967.12.1
by Martin Pool
Move all test features into bzrlib.tests.features |
426 |
from bzrlib.tests import features |
2929.3.19
by Vincent Ladeuil
Fix 1.1 related bugs in HTTP server, add HTTPS passing tests (by temporarily disabling pycurl certificate verification). |
427 |
from bzrlib.tests import http_server |
428 |
permutations = [(PyCurlTransport, http_server.HttpServer_PyCurl),] |
|
5967.12.1
by Martin Pool
Move all test features into bzrlib.tests.features |
429 |
if features.HTTPSServerFeature.available(): |
2929.3.20
by Vincent Ladeuil
Commit long standing modifications before merging bzr.dev. |
430 |
from bzrlib.tests import ( |
431 |
https_server, |
|
432 |
ssl_certs, |
|
433 |
)
|
|
434 |
||
435 |
class HTTPS_pycurl_transport(PyCurlTransport): |
|
436 |
||
437 |
def __init__(self, base, _from_transport=None): |
|
438 |
super(HTTPS_pycurl_transport, self).__init__(base, |
|
439 |
_from_transport) |
|
440 |
self.cabundle = str(ssl_certs.build_path('ca.crt')) |
|
441 |
||
442 |
permutations.append((HTTPS_pycurl_transport, |
|
2929.3.19
by Vincent Ladeuil
Fix 1.1 related bugs in HTTP server, add HTTPS passing tests (by temporarily disabling pycurl certificate verification). |
443 |
https_server.HTTPSServer_PyCurl)) |
444 |
return permutations |