5247.1.4
by Vincent Ladeuil
Merge cleanup into first-try |
1 |
# Copyright (C) 2006-2010 Canonical Ltd
|
1540.3.18
by Martin Pool
Style review fixes (thanks robertc) |
2 |
#
|
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
3 |
# This program is free software; you can redistribute it and/or modify
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
1540.3.18
by Martin Pool
Style review fixes (thanks robertc) |
7 |
#
|
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
8 |
# This program is distributed in the hope that it will be useful,
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
1540.3.18
by Martin Pool
Style review fixes (thanks robertc) |
12 |
#
|
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
13 |
# You should have received a copy of the GNU General Public License
|
14 |
# along with this program; if not, write to the Free Software
|
|
4183.7.1
by Sabin Iacob
update FSF mailing address |
15 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
16 |
|
6379.6.7
by Jelmer Vernooij
Move importing from future until after doc string, otherwise the doc string will disappear. |
17 |
"""http/https transport using pycurl"""
|
18 |
||
6379.6.3
by Jelmer Vernooij
Use absolute_import. |
19 |
from __future__ import absolute_import |
20 |
||
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
21 |
# TODO: test reporting of http errors
|
1887.1.1
by Adeodato Simó
Do not separate paragraphs in the copyright statement with blank lines, |
22 |
#
|
1616.1.9
by Martin Pool
Set Cache-control: max-age=0 and Pragma: no-cache |
23 |
# TODO: Transport option to control caching of particular requests; broadly we
|
24 |
# would want to offer "caching allowed" or "must revalidate", depending on
|
|
25 |
# whether we expect a particular file will be modified after it's committed.
|
|
26 |
# It's probably safer to just always revalidate. mbp 20060321
|
|
27 |
||
2164.2.16
by Vincent Ladeuil
Add tests. |
28 |
# TODO: Some refactoring could be done to avoid the strange idiom
|
29 |
# used to capture data and headers while setting up the request
|
|
30 |
# (and having to pass 'header' to _curl_perform to handle
|
|
31 |
# redirections) . This could be achieved by creating a
|
|
32 |
# specialized Curl object and returning code, headers and data
|
|
33 |
# from _curl_perform. Not done because we may deprecate pycurl in the
|
|
34 |
# future -- vila 20070212
|
|
35 |
||
1786.1.42
by John Arbash Meinel
Update _extract_headers, make it less generic, and non recursive. |
36 |
from cStringIO import StringIO |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
37 |
import httplib |
1540.3.5
by Martin Pool
Raise exception if unicode is passed to transport; formatting fixes |
38 |
|
6015.21.1
by Vincent Ladeuil
Merge 2.3 into 2.4 (including fix for #614713) |
39 |
import bzrlib |
2004.1.25
by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :) |
40 |
from bzrlib import ( |
3052.3.3
by Vincent Ladeuil
Add -Dhttp support. |
41 |
debug, |
2004.1.25
by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :) |
42 |
errors, |
5957.2.3
by Vincent Ladeuil
Mask credentials in the -Dhttp logging |
43 |
trace, |
2004.1.25
by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :) |
44 |
)
|
45 |
from bzrlib.transport.http import ( |
|
2298.5.1
by Alexander Belchenko
Bugfix #82086: Searching location of CA bundle for PyCurl in env variable (CURL_CA_BUNDLE), and on win32 along the PATH |
46 |
ca_bundle, |
2004.1.25
by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :) |
47 |
HttpTransportBase, |
48 |
response, |
|
4912.2.2
by Martin Pool
Include rough unhtml in pycurl error messages |
49 |
unhtml_roughly, |
2004.1.25
by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :) |
50 |
)
|
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
51 |
|
1540.3.7
by Martin Pool
Prepare to select a transport depending on what dependencies can be satisfied. |
52 |
try: |
53 |
import pycurl |
|
54 |
except ImportError, e: |
|
5957.2.3
by Vincent Ladeuil
Mask credentials in the -Dhttp logging |
55 |
trace.mutter("failed to import pycurl: %s", e) |
3052.3.3
by Vincent Ladeuil
Add -Dhttp support. |
56 |
raise errors.DependencyNotPresent('pycurl', e) |
1540.3.7
by Martin Pool
Prepare to select a transport depending on what dependencies can be satisfied. |
57 |
|
1684.1.5
by Martin Pool
(patch) check that pycurl will actuall initialize as well as load (Alexander) |
58 |
try: |
59 |
# see if we can actually initialize PyCurl - sometimes it will load but
|
|
60 |
# fail to start up due to this bug:
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
61 |
#
|
1684.1.5
by Martin Pool
(patch) check that pycurl will actuall initialize as well as load (Alexander) |
62 |
# 32. (At least on Windows) If libcurl is built with c-ares and there's
|
63 |
# no DNS server configured in the system, the ares_init() call fails and
|
|
64 |
# thus curl_easy_init() fails as well. This causes weird effects for
|
|
65 |
# people who use numerical IP addresses only.
|
|
66 |
#
|
|
67 |
# reported by Alexander Belchenko, 2006-04-26
|
|
68 |
pycurl.Curl() |
|
69 |
except pycurl.error, e: |
|
5957.2.3
by Vincent Ladeuil
Mask credentials in the -Dhttp logging |
70 |
trace.mutter("failed to initialize pycurl: %s", e) |
3052.3.3
by Vincent Ladeuil
Add -Dhttp support. |
71 |
raise errors.DependencyNotPresent('pycurl', e) |
1684.1.5
by Martin Pool
(patch) check that pycurl will actuall initialize as well as load (Alexander) |
72 |
|
1540.3.7
by Martin Pool
Prepare to select a transport depending on what dependencies can be satisfied. |
73 |
|
2872.6.1
by Vincent Ladeuil
Fix bug #147530 by enabling more robust error code definitions. |
74 |
|
75 |
||
76 |
def _get_pycurl_errcode(symbol, default): |
|
77 |
"""
|
|
78 |
Returns the numerical error code for a symbol defined by pycurl.
|
|
79 |
||
80 |
Different pycurl implementations define different symbols for error
|
|
81 |
codes. Old versions never define some symbols (wether they can return the
|
|
82 |
corresponding error code or not). The following addresses the problem by
|
|
83 |
defining the symbols we care about. Note: this allows to define symbols
|
|
84 |
for errors that older versions will never return, which is fine.
|
|
85 |
"""
|
|
86 |
return pycurl.__dict__.get(symbol, default) |
|
87 |
||
88 |
CURLE_COULDNT_CONNECT = _get_pycurl_errcode('E_COULDNT_CONNECT', 7) |
|
89 |
CURLE_COULDNT_RESOLVE_HOST = _get_pycurl_errcode('E_COULDNT_RESOLVE_HOST', 6) |
|
90 |
CURLE_COULDNT_RESOLVE_PROXY = _get_pycurl_errcode('E_COULDNT_RESOLVE_PROXY', 5) |
|
91 |
CURLE_GOT_NOTHING = _get_pycurl_errcode('E_GOT_NOTHING', 52) |
|
92 |
CURLE_PARTIAL_FILE = _get_pycurl_errcode('E_PARTIAL_FILE', 18) |
|
3651.1.1
by Vincent Ladeuil
Fix bug #225020 by catching CURLE_SEND_ERROR error. |
93 |
CURLE_SEND_ERROR = _get_pycurl_errcode('E_SEND_ERROR', 55) |
4628.1.1
by Vincent Ladeuil
Fix test failure by catching an error raised by newer pycurls. |
94 |
CURLE_RECV_ERROR = _get_pycurl_errcode('E_RECV_ERROR', 56) |
2929.3.19
by Vincent Ladeuil
Fix 1.1 related bugs in HTTP server, add HTTPS passing tests (by temporarily disabling pycurl certificate verification). |
95 |
CURLE_SSL_CACERT = _get_pycurl_errcode('E_SSL_CACERT', 60) |
96 |
CURLE_SSL_CACERT_BADFILE = _get_pycurl_errcode('E_SSL_CACERT_BADFILE', 77) |
|
2872.6.1
by Vincent Ladeuil
Fix bug #147530 by enabling more robust error code definitions. |
97 |
|
98 |
||
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
99 |
class PyCurlTransport(HttpTransportBase): |
1540.3.3
by Martin Pool
Review updates of pycurl transport |
100 |
"""http client transport using pycurl
|
101 |
||
102 |
PyCurl is a Python binding to the C "curl" multiprotocol client.
|
|
103 |
||
2004.1.30
by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling. |
104 |
This transport can be significantly faster than the builtin
|
105 |
Python client. Advantages include: DNS caching.
|
|
1540.3.3
by Martin Pool
Review updates of pycurl transport |
106 |
"""
|
107 |
||
2485.8.59
by Vincent Ladeuil
Update from review comments. |
108 |
def __init__(self, base, _from_transport=None): |
3878.4.6
by Vincent Ladeuil
Fix bug #270863 by preserving 'bzr+http[s]' decorator. |
109 |
super(PyCurlTransport, self).__init__(base, 'pycurl', |
110 |
_from_transport=_from_transport) |
|
3878.4.2
by Vincent Ladeuil
Fix bug #265070 by providing a finer sieve for accepted redirections. |
111 |
if self._unqualified_scheme == 'https': |
2294.3.1
by Vincent Ladeuil
Fix #85305 by issuing an exception instead of a traceback. |
112 |
# Check availability of https into pycurl supported
|
113 |
# protocols
|
|
114 |
supported = pycurl.version_info()[8] |
|
115 |
if 'https' not in supported: |
|
3052.3.3
by Vincent Ladeuil
Add -Dhttp support. |
116 |
raise errors.DependencyNotPresent('pycurl', 'no https support') |
2298.5.1
by Alexander Belchenko
Bugfix #82086: Searching location of CA bundle for PyCurl in env variable (CURL_CA_BUNDLE), and on win32 along the PATH |
117 |
self.cabundle = ca_bundle.get_ca_path() |
2485.8.41
by Vincent Ladeuil
Finish http refactoring. Test suite passing. |
118 |
|
119 |
def _get_curl(self): |
|
120 |
connection = self._get_connection() |
|
121 |
if connection is None: |
|
122 |
# First connection ever. There is no credentials for pycurl, either
|
|
123 |
# the password was embedded in the URL or it's not needed. The
|
|
124 |
# connection for pycurl is just the Curl object, it will not
|
|
2485.8.43
by Vincent Ladeuil
Cleaning. |
125 |
# connect to the http server until the first request (which had
|
126 |
# just called us).
|
|
2485.8.41
by Vincent Ladeuil
Finish http refactoring. Test suite passing. |
127 |
connection = pycurl.Curl() |
3133.1.2
by Vincent Ladeuil
Fix #177643 by making pycurl handle url-embedded credentials again. |
128 |
# First request, initialize credentials.
|
129 |
auth = self._create_auth() |
|
130 |
# Proxy handling is out of reach, so we punt
|
|
131 |
self._set_connection(connection, auth) |
|
2485.8.41
by Vincent Ladeuil
Finish http refactoring. Test suite passing. |
132 |
return connection |
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
133 |
|
5247.2.12
by Vincent Ladeuil
Ensure that all transports close their underlying connection. |
134 |
def disconnect(self): |
135 |
connection = self._get_connection() |
|
136 |
if connection is not None: |
|
137 |
connection.close() |
|
138 |
||
1540.3.3
by Martin Pool
Review updates of pycurl transport |
139 |
def has(self, relpath): |
1786.1.32
by John Arbash Meinel
cleanup pass, allow pycurl connections to be shared between transports. |
140 |
"""See Transport.has()"""
|
141 |
# We set NO BODY=0 in _get_full, so it should be safe
|
|
142 |
# to re-use the non-range curl object
|
|
2485.8.41
by Vincent Ladeuil
Finish http refactoring. Test suite passing. |
143 |
curl = self._get_curl() |
2485.8.25
by Vincent Ladeuil
Separate abspath from _remote_path, the intents are different. |
144 |
abspath = self._remote_path(relpath) |
1540.3.14
by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object. |
145 |
curl.setopt(pycurl.URL, abspath) |
146 |
self._set_curl_options(curl) |
|
2018.2.28
by Andrew Bennetts
Changes in response to review: re-use _base_curl, rather than keeping a seperate _post_curl object; add docstring to test_http.RecordingServer, set is_user_error on some new exceptions. |
147 |
curl.setopt(pycurl.HTTPGET, 1) |
1540.3.3
by Martin Pool
Review updates of pycurl transport |
148 |
# don't want the body - ie just do a HEAD request
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
149 |
# This means "NO BODY" not 'nobody'
|
1540.3.14
by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object. |
150 |
curl.setopt(pycurl.NOBODY, 1) |
2164.2.16
by Vincent Ladeuil
Add tests. |
151 |
# But we need headers to handle redirections
|
152 |
header = StringIO() |
|
153 |
curl.setopt(pycurl.HEADERFUNCTION, header.write) |
|
2004.1.16
by v.ladeuil+lp at free
Add tests against erroneous http status lines. |
154 |
# In some erroneous cases, pycurl will emit text on
|
155 |
# stdout if we don't catch it (see InvalidStatus tests
|
|
156 |
# for one such occurrence).
|
|
157 |
blackhole = StringIO() |
|
158 |
curl.setopt(pycurl.WRITEFUNCTION, blackhole.write) |
|
2164.2.16
by Vincent Ladeuil
Add tests. |
159 |
self._curl_perform(curl, header) |
1540.3.14
by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object. |
160 |
code = curl.getinfo(pycurl.HTTP_CODE) |
161 |
if code == 404: # not found |
|
162 |
return False |
|
2164.2.16
by Vincent Ladeuil
Add tests. |
163 |
elif code == 200: # "ok" |
1540.3.14
by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object. |
164 |
return True |
165 |
else: |
|
1612.1.1
by Martin Pool
Raise errors correctly on pycurl connection failure |
166 |
self._raise_curl_http_error(curl) |
2000.3.1
by v.ladeuil+lp at free
Better connection sharing by using only one curl object. |
167 |
|
2520.2.1
by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports. |
168 |
def _get(self, relpath, offsets, tail_amount=0): |
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
169 |
# This just switches based on the type of request
|
2520.2.1
by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports. |
170 |
if offsets is not None or tail_amount not in (0, None): |
171 |
return self._get_ranged(relpath, offsets, tail_amount=tail_amount) |
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
172 |
else: |
2164.2.5
by v.ladeuil+lp at free
Simpler implementation using inspect. 'hints' is a kwargs. |
173 |
return self._get_full(relpath) |
2000.3.1
by v.ladeuil+lp at free
Better connection sharing by using only one curl object. |
174 |
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
175 |
def _setup_get_request(self, curl, relpath): |
2018.2.6
by Andrew Bennetts
HTTP client starting to work (pycurl for the moment). |
176 |
# Make sure we do a GET request. versions > 7.14.1 also set the
|
177 |
# NO BODY flag, but we'll do it ourselves in case it is an older
|
|
178 |
# pycurl version
|
|
179 |
curl.setopt(pycurl.NOBODY, 0) |
|
180 |
curl.setopt(pycurl.HTTPGET, 1) |
|
181 |
return self._setup_request(curl, relpath) |
|
182 |
||
183 |
def _setup_request(self, curl, relpath): |
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
184 |
"""Do the common setup stuff for making a request
|
185 |
||
186 |
:param curl: The curl object to place the request on
|
|
187 |
:param relpath: The relative path that we want to get
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
188 |
:return: (abspath, data, header)
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
189 |
abspath: full url
|
190 |
data: file that will be filled with the body
|
|
191 |
header: file that will be filled with the headers
|
|
192 |
"""
|
|
2485.8.25
by Vincent Ladeuil
Separate abspath from _remote_path, the intents are different. |
193 |
abspath = self._remote_path(relpath) |
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
194 |
curl.setopt(pycurl.URL, abspath) |
195 |
self._set_curl_options(curl) |
|
196 |
||
197 |
data = StringIO() |
|
198 |
header = StringIO() |
|
199 |
curl.setopt(pycurl.WRITEFUNCTION, data.write) |
|
200 |
curl.setopt(pycurl.HEADERFUNCTION, header.write) |
|
201 |
||
202 |
return abspath, data, header |
|
203 |
||
2164.2.5
by v.ladeuil+lp at free
Simpler implementation using inspect. 'hints' is a kwargs. |
204 |
def _get_full(self, relpath): |
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
205 |
"""Make a request for the entire file"""
|
2485.8.41
by Vincent Ladeuil
Finish http refactoring. Test suite passing. |
206 |
curl = self._get_curl() |
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
207 |
abspath, data, header = self._setup_get_request(curl, relpath) |
2164.2.16
by Vincent Ladeuil
Add tests. |
208 |
self._curl_perform(curl, header) |
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
209 |
|
210 |
code = curl.getinfo(pycurl.HTTP_CODE) |
|
211 |
data.seek(0) |
|
212 |
||
213 |
if code == 404: |
|
3052.3.3
by Vincent Ladeuil
Add -Dhttp support. |
214 |
raise errors.NoSuchFile(abspath) |
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
215 |
if code != 200: |
2004.1.25
by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :) |
216 |
self._raise_curl_http_error( |
217 |
curl, 'expected 200 or 404 for full response.') |
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
218 |
|
219 |
return code, data |
|
220 |
||
3059.2.11
by Vincent Ladeuil
Fix typos mentioned by spiv. |
221 |
# The parent class use 0 to minimize the requests, but since we can't
|
3059.2.7
by Vincent Ladeuil
Allow pycurl users to watch the blinkenlights and fix a bug when ranges are contiguous. |
222 |
# exploit the results as soon as they are received (pycurl limitation) we'd
|
3945.1.6
by Vincent Ladeuil
Fix debug handling for pycurl and implement pycurl http activity |
223 |
# better issue more requests and provide a more responsive UI incurring
|
224 |
# more latency costs.
|
|
3059.2.17
by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges. |
225 |
# If you modify this, think about modifying the comment in http/__init__.py
|
3059.2.7
by Vincent Ladeuil
Allow pycurl users to watch the blinkenlights and fix a bug when ranges are contiguous. |
226 |
# too.
|
3059.2.17
by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges. |
227 |
_get_max_size = 4 * 1024 * 1024 |
3059.2.7
by Vincent Ladeuil
Allow pycurl users to watch the blinkenlights and fix a bug when ranges are contiguous. |
228 |
|
2520.2.1
by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports. |
229 |
def _get_ranged(self, relpath, offsets, tail_amount): |
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
230 |
"""Make a request for just part of the file."""
|
2485.8.41
by Vincent Ladeuil
Finish http refactoring. Test suite passing. |
231 |
curl = self._get_curl() |
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
232 |
abspath, data, header = self._setup_get_request(curl, relpath) |
233 |
||
2520.2.1
by Vincent Ladeuil
First step to fix #115209 use _coalesce_offsets like other transports. |
234 |
range_header = self._attempted_range_header(offsets, tail_amount) |
2004.1.30
by v.ladeuil+lp at free
Fix #62276 and #62029 by providing a more robust http range handling. |
235 |
if range_header is None: |
236 |
# Forget ranges, the server can't handle them
|
|
237 |
return self._get_full(relpath) |
|
238 |
||
2481.3.1
by Vincent Ladeuil
Fix bug #112719 by using the right range header. |
239 |
self._curl_perform(curl, header, ['Range: bytes=%s' % range_header]) |
1786.1.33
by John Arbash Meinel
Cleanup pass #2 |
240 |
data.seek(0) |
241 |
||
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
242 |
code = curl.getinfo(pycurl.HTTP_CODE) |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
243 |
|
244 |
if code == 404: # not found |
|
245 |
raise errors.NoSuchFile(abspath) |
|
246 |
elif code in (400, 416): |
|
247 |
# We don't know which, but one of the ranges we specified was
|
|
248 |
# wrong.
|
|
249 |
raise errors.InvalidHttpRange(abspath, range_header, |
|
250 |
'Server return code %d' |
|
251 |
% curl.getinfo(pycurl.HTTP_CODE)) |
|
3059.2.4
by Vincent Ladeuil
Fix typo so that all tests pass now (after merging bzr.dev to get rid of |
252 |
msg = self._parse_headers(header) |
3945.1.8
by Vincent Ladeuil
Add more tests, fix pycurl double handling, revert previous tracking. |
253 |
return code, response.handle_response(abspath, code, msg, data) |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
254 |
|
255 |
def _parse_headers(self, status_and_headers): |
|
256 |
"""Transform the headers provided by curl into an HTTPMessage"""
|
|
257 |
status_and_headers.seek(0) |
|
258 |
# Ignore status line
|
|
259 |
status_and_headers.readline() |
|
260 |
msg = httplib.HTTPMessage(status_and_headers) |
|
261 |
return msg |
|
1786.1.4
by John Arbash Meinel
Adding HEADERFUNCTION which lets us get any response codes we want. |
262 |
|
2018.2.6
by Andrew Bennetts
HTTP client starting to work (pycurl for the moment). |
263 |
def _post(self, body_bytes): |
3651.1.1
by Vincent Ladeuil
Fix bug #225020 by catching CURLE_SEND_ERROR error. |
264 |
curl = self._get_curl() |
265 |
abspath, data, header = self._setup_request(curl, '.bzr/smart') |
|
266 |
curl.setopt(pycurl.POST, 1) |
|
2018.2.6
by Andrew Bennetts
HTTP client starting to work (pycurl for the moment). |
267 |
fake_file = StringIO(body_bytes) |
268 |
curl.setopt(pycurl.POSTFIELDSIZE, len(body_bytes)) |
|
269 |
curl.setopt(pycurl.READFUNCTION, fake_file.read) |
|
2000.3.4
by v.ladeuil+lp at free
Merge bzr.dev |
270 |
# We override the Expect: header so that pycurl will send the POST
|
271 |
# body immediately.
|
|
3651.1.1
by Vincent Ladeuil
Fix bug #225020 by catching CURLE_SEND_ERROR error. |
272 |
try: |
5514.1.1
by Vincent Ladeuil
Correctly set the Content-Type header when POSTing. |
273 |
self._curl_perform(curl, header, |
274 |
['Expect: ', |
|
275 |
'Content-Type: application/octet-stream']) |
|
3651.1.1
by Vincent Ladeuil
Fix bug #225020 by catching CURLE_SEND_ERROR error. |
276 |
except pycurl.error, e: |
277 |
if e[0] == CURLE_SEND_ERROR: |
|
3651.1.2
by Vincent Ladeuil
Fix bug #225020 by catching the CURLE_SEND_ERROR error more broadly. |
278 |
# When talking to an HTTP/1.0 server, getting a 400+ error code
|
279 |
# triggers a bug in some combinations of curl/kernel in rare
|
|
280 |
# occurrences. Basically, the server closes the connection
|
|
281 |
# after sending the error but the client (having received and
|
|
282 |
# parsed the response) still try to send the request body (see
|
|
283 |
# bug #225020 and its upstream associated bug). Since the
|
|
284 |
# error code and the headers are known to be available, we just
|
|
285 |
# swallow the exception, leaving the upper levels handle the
|
|
286 |
# 400+ error.
|
|
5957.2.3
by Vincent Ladeuil
Mask credentials in the -Dhttp logging |
287 |
trace.mutter('got pycurl error in POST: %s, %s, %s, url: %s ', |
288 |
e[0], e[1], e, abspath) |
|
3651.1.2
by Vincent Ladeuil
Fix bug #225020 by catching the CURLE_SEND_ERROR error more broadly. |
289 |
else: |
290 |
# Re-raise otherwise
|
|
291 |
raise
|
|
2018.2.6
by Andrew Bennetts
HTTP client starting to work (pycurl for the moment). |
292 |
data.seek(0) |
293 |
code = curl.getinfo(pycurl.HTTP_CODE) |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
294 |
msg = self._parse_headers(header) |
3945.1.8
by Vincent Ladeuil
Add more tests, fix pycurl double handling, revert previous tracking. |
295 |
return code, response.handle_response(abspath, code, msg, data) |
3956.2.2
by John Arbash Meinel
Start using report_activity for HTTP (pycurl + urllib) |
296 |
|
2018.2.6
by Andrew Bennetts
HTTP client starting to work (pycurl for the moment). |
297 |
|
4912.2.2
by Martin Pool
Include rough unhtml in pycurl error messages |
298 |
def _raise_curl_http_error(self, curl, info=None, body=None): |
299 |
"""Common curl->bzrlib error translation.
|
|
300 |
||
301 |
Some methods may choose to override this for particular cases.
|
|
302 |
||
303 |
The URL and code are automatically included as appropriate.
|
|
304 |
||
305 |
:param info: Extra information to include in the message.
|
|
5923.1.2
by Vincent Ladeuil
Fix some more prompts to be unicode. |
306 |
|
307 |
:param body: File-like object from which the body of the page can be
|
|
308 |
read.
|
|
4912.2.2
by Martin Pool
Include rough unhtml in pycurl error messages |
309 |
"""
|
1612.1.1
by Martin Pool
Raise errors correctly on pycurl connection failure |
310 |
code = curl.getinfo(pycurl.HTTP_CODE) |
311 |
url = curl.getinfo(pycurl.EFFECTIVE_URL) |
|
4912.2.2
by Martin Pool
Include rough unhtml in pycurl error messages |
312 |
if body is not None: |
313 |
response_body = body.read() |
|
314 |
plaintext_body = unhtml_roughly(response_body) |
|
315 |
else: |
|
316 |
response_body = None |
|
317 |
plaintext_body = '' |
|
2004.1.27
by v.ladeuil+lp at free
Fix bug #57644 by issuing an explicit error message. |
318 |
if code == 403: |
2004.1.34
by v.ladeuil+lp at free
Cosmetic fix for bug #57644. |
319 |
raise errors.TransportError( |
3430.3.1
by Vincent Ladeuil
Fix #230223 by making both http implementations raise appropriate exceptions. |
320 |
'Server refuses to fulfill the request (403 Forbidden)'
|
4912.2.2
by Martin Pool
Include rough unhtml in pycurl error messages |
321 |
' for %s: %s' % (url, plaintext_body)) |
1786.1.40
by John Arbash Meinel
code cleanups from Martin Pool. |
322 |
else: |
2004.1.27
by v.ladeuil+lp at free
Fix bug #57644 by issuing an explicit error message. |
323 |
if info is None: |
324 |
msg = '' |
|
325 |
else: |
|
326 |
msg = ': ' + info |
|
327 |
raise errors.InvalidHttpResponse( |
|
5923.1.2
by Vincent Ladeuil
Fix some more prompts to be unicode. |
328 |
url, 'Unable to handle http code %d%s: %s' |
4912.2.2
by Martin Pool
Include rough unhtml in pycurl error messages |
329 |
% (code, msg, plaintext_body)) |
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
330 |
|
3945.1.6
by Vincent Ladeuil
Fix debug handling for pycurl and implement pycurl http activity |
331 |
def _debug_cb(self, kind, text): |
4634.172.1
by Vincent Ladeuil
Fix report activity for https with pycurl |
332 |
if kind in (pycurl.INFOTYPE_HEADER_IN, pycurl.INFOTYPE_DATA_IN): |
3945.1.6
by Vincent Ladeuil
Fix debug handling for pycurl and implement pycurl http activity |
333 |
self._report_activity(len(text), 'read') |
334 |
if (kind == pycurl.INFOTYPE_HEADER_IN |
|
335 |
and 'http' in debug.debug_flags): |
|
5957.2.3
by Vincent Ladeuil
Mask credentials in the -Dhttp logging |
336 |
trace.mutter('< %s' % (text.rstrip(),)) |
4634.172.1
by Vincent Ladeuil
Fix report activity for https with pycurl |
337 |
elif kind in (pycurl.INFOTYPE_HEADER_OUT, pycurl.INFOTYPE_DATA_OUT): |
3945.1.6
by Vincent Ladeuil
Fix debug handling for pycurl and implement pycurl http activity |
338 |
self._report_activity(len(text), 'write') |
339 |
if (kind == pycurl.INFOTYPE_HEADER_OUT |
|
340 |
and 'http' in debug.debug_flags): |
|
5957.2.3
by Vincent Ladeuil
Mask credentials in the -Dhttp logging |
341 |
lines = [] |
342 |
for line in text.rstrip().splitlines(): |
|
343 |
# People are often told to paste -Dhttp output to help
|
|
344 |
# debug. Don't compromise credentials.
|
|
345 |
try: |
|
346 |
header, details = line.split(':', 1) |
|
347 |
except ValueError: |
|
348 |
header = None |
|
349 |
if header in ('Authorization', 'Proxy-Authorization'): |
|
350 |
line = '%s: <masked>' % (header,) |
|
351 |
lines.append(line) |
|
352 |
trace.mutter('> ' + '\n> '.join(lines)) |
|
3945.1.6
by Vincent Ladeuil
Fix debug handling for pycurl and implement pycurl http activity |
353 |
elif kind == pycurl.INFOTYPE_TEXT and 'http' in debug.debug_flags: |
5957.2.3
by Vincent Ladeuil
Mask credentials in the -Dhttp logging |
354 |
trace.mutter('* %s' % text.rstrip()) |
4634.172.1
by Vincent Ladeuil
Fix report activity for https with pycurl |
355 |
elif (kind in (pycurl.INFOTYPE_TEXT, pycurl.INFOTYPE_SSL_DATA_IN, |
356 |
pycurl.INFOTYPE_SSL_DATA_OUT) |
|
357 |
and 'http' in debug.debug_flags): |
|
6015.21.1
by Vincent Ladeuil
Merge 2.3 into 2.4 (including fix for #614713) |
358 |
trace.mutter('* %s' % text) |
3945.1.6
by Vincent Ladeuil
Fix debug handling for pycurl and implement pycurl http activity |
359 |
|
1540.3.13
by Martin Pool
Curl should follow http redirects, the same as urllib |
360 |
def _set_curl_options(self, curl): |
361 |
"""Set options for all requests"""
|
|
3052.3.3
by Vincent Ladeuil
Add -Dhttp support. |
362 |
ua_str = 'bzr/%s (pycurl: %s)' % (bzrlib.__version__, pycurl.version) |
1540.3.15
by Martin Pool
[merge] large merge to sync with bzr.dev |
363 |
curl.setopt(pycurl.USERAGENT, ua_str) |
3945.1.6
by Vincent Ladeuil
Fix debug handling for pycurl and implement pycurl http activity |
364 |
curl.setopt(pycurl.VERBOSE, 1) |
365 |
curl.setopt(pycurl.DEBUGFUNCTION, self._debug_cb) |
|
2298.5.1
by Alexander Belchenko
Bugfix #82086: Searching location of CA bundle for PyCurl in env variable (CURL_CA_BUNDLE), and on win32 along the PATH |
366 |
if self.cabundle: |
367 |
curl.setopt(pycurl.CAINFO, self.cabundle) |
|
3133.1.2
by Vincent Ladeuil
Fix #177643 by making pycurl handle url-embedded credentials again. |
368 |
# Set accepted auth methods
|
369 |
curl.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_ANY) |
|
370 |
curl.setopt(pycurl.PROXYAUTH, pycurl.HTTPAUTH_ANY) |
|
371 |
auth = self._get_credentials() |
|
372 |
user = auth.get('user', None) |
|
373 |
password = auth.get('password', None) |
|
374 |
userpass = None |
|
375 |
if user is not None: |
|
376 |
userpass = user + ':' |
|
377 |
if password is not None: # '' is a valid password |
|
378 |
userpass += password |
|
379 |
curl.setopt(pycurl.USERPWD, userpass) |
|
1540.3.3
by Martin Pool
Review updates of pycurl transport |
380 |
|
2164.2.16
by Vincent Ladeuil
Add tests. |
381 |
def _curl_perform(self, curl, header, more_headers=[]): |
1540.3.3
by Martin Pool
Review updates of pycurl transport |
382 |
"""Perform curl operation and translate exceptions."""
|
383 |
try: |
|
2000.3.1
by v.ladeuil+lp at free
Better connection sharing by using only one curl object. |
384 |
# There's no way in http/1.0 to say "must
|
385 |
# revalidate"; we don't want to force it to always
|
|
386 |
# retrieve. so just turn off the default Pragma
|
|
387 |
# provided by Curl.
|
|
388 |
headers = ['Cache-control: max-age=0', |
|
389 |
'Pragma: no-cache', |
|
390 |
'Connection: Keep-Alive'] |
|
391 |
curl.setopt(pycurl.HTTPHEADER, headers + more_headers) |
|
1540.3.14
by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object. |
392 |
curl.perform() |
1540.3.3
by Martin Pool
Review updates of pycurl transport |
393 |
except pycurl.error, e: |
1786.1.35
by John Arbash Meinel
For pycurl inverse of (NOBODY,1) is (HTTPGET,1) not (NOBODY,0) |
394 |
url = curl.getinfo(pycurl.EFFECTIVE_URL) |
5957.2.3
by Vincent Ladeuil
Mask credentials in the -Dhttp logging |
395 |
trace.mutter('got pycurl error: %s, %s, %s, url: %s ', |
396 |
e[0], e[1], e, url) |
|
2929.3.19
by Vincent Ladeuil
Fix 1.1 related bugs in HTTP server, add HTTPS passing tests (by temporarily disabling pycurl certificate verification). |
397 |
if e[0] in (CURLE_COULDNT_RESOLVE_HOST, |
398 |
CURLE_COULDNT_RESOLVE_PROXY, |
|
2872.6.1
by Vincent Ladeuil
Fix bug #147530 by enabling more robust error code definitions. |
399 |
CURLE_COULDNT_CONNECT, |
400 |
CURLE_GOT_NOTHING, |
|
2929.3.19
by Vincent Ladeuil
Fix 1.1 related bugs in HTTP server, add HTTPS passing tests (by temporarily disabling pycurl certificate verification). |
401 |
CURLE_SSL_CACERT, |
402 |
CURLE_SSL_CACERT_BADFILE, |
|
403 |
):
|
|
3052.3.3
by Vincent Ladeuil
Add -Dhttp support. |
404 |
raise errors.ConnectionError( |
405 |
'curl connection error (%s)\non %s' % (e[1], url)) |
|
4628.1.2
by Vincent Ladeuil
More complete fix. |
406 |
elif e[0] == CURLE_RECV_ERROR: |
407 |
raise errors.ConnectionReset( |
|
408 |
'curl connection error (%s)\non %s' % (e[1], url)) |
|
2872.6.1
by Vincent Ladeuil
Fix bug #147530 by enabling more robust error code definitions. |
409 |
elif e[0] == CURLE_PARTIAL_FILE: |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
410 |
# Pycurl itself has detected a short read. We do not have all
|
411 |
# the information for the ShortReadvError, but that should be
|
|
412 |
# enough
|
|
2000.3.9
by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :) |
413 |
raise errors.ShortReadvError(url, |
414 |
offset='unknown', length='unknown', |
|
415 |
actual='unknown', |
|
416 |
extra='Server aborted the request') |
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
417 |
raise
|
2164.2.16
by Vincent Ladeuil
Add tests. |
418 |
code = curl.getinfo(pycurl.HTTP_CODE) |
419 |
if code in (301, 302, 303, 307): |
|
420 |
url = curl.getinfo(pycurl.EFFECTIVE_URL) |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
421 |
msg = self._parse_headers(header) |
422 |
redirected_to = msg.getheader('location') |
|
2164.2.16
by Vincent Ladeuil
Add tests. |
423 |
raise errors.RedirectRequested(url, |
424 |
redirected_to, |
|
3878.4.4
by Vincent Ladeuil
Cleanup. |
425 |
is_permanent=(code == 301)) |
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
426 |
|
1540.3.10
by Martin Pool
[broken] keep hooking pycurl into test framework |
427 |
|
428 |
def get_test_permutations(): |
|
429 |
"""Return the permutations to be used in testing."""
|
|
5967.12.1
by Martin Pool
Move all test features into bzrlib.tests.features |
430 |
from bzrlib.tests import features |
2929.3.19
by Vincent Ladeuil
Fix 1.1 related bugs in HTTP server, add HTTPS passing tests (by temporarily disabling pycurl certificate verification). |
431 |
from bzrlib.tests import http_server |
432 |
permutations = [(PyCurlTransport, http_server.HttpServer_PyCurl),] |
|
5967.12.1
by Martin Pool
Move all test features into bzrlib.tests.features |
433 |
if features.HTTPSServerFeature.available(): |
2929.3.20
by Vincent Ladeuil
Commit long standing modifications before merging bzr.dev. |
434 |
from bzrlib.tests import ( |
435 |
https_server, |
|
436 |
ssl_certs, |
|
437 |
)
|
|
438 |
||
439 |
class HTTPS_pycurl_transport(PyCurlTransport): |
|
440 |
||
441 |
def __init__(self, base, _from_transport=None): |
|
442 |
super(HTTPS_pycurl_transport, self).__init__(base, |
|
443 |
_from_transport) |
|
444 |
self.cabundle = str(ssl_certs.build_path('ca.crt')) |
|
445 |
||
446 |
permutations.append((HTTPS_pycurl_transport, |
|
2929.3.19
by Vincent Ladeuil
Fix 1.1 related bugs in HTTP server, add HTTPS passing tests (by temporarily disabling pycurl certificate verification). |
447 |
https_server.HTTPSServer_PyCurl)) |
448 |
return permutations |