1
# Copyright (C) 2006-2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Implementaion of urllib2 tailored to bzr needs
19
This file complements the urllib2 class hierarchy with custom classes.
21
For instance, we create a new HTTPConnection and HTTPSConnection that inherit
22
from the original urllib2.HTTP(s)Connection objects, but also have a new base
23
which implements a custom getresponse and cleanup_pipe handlers.
25
And then we implement custom HTTPHandler and HTTPSHandler classes, that use
26
the custom HTTPConnection classes.
28
We have a custom Response class, which lets us maintain a keep-alive
29
connection even for requests that urllib2 doesn't expect to contain body data.
31
And a custom Request class that lets us track redirections, and
32
handle authentication schemes.
34
For coherency with python libraries, we use capitalized header names throughout
35
the code, even if the header names will be titled just before sending the
36
request (see AbstractHTTPHandler.do_open).
41
# FIXME: Oversimplifying, two kind of exceptions should be
42
# raised, once a request is issued: URLError before we have been
43
# able to process the response, HTTPError after that. Process the
44
# response means we are able to leave the socket clean, so if we
45
# are not able to do that, we should close the connection. The
46
# actual code more or less do that, tests should be written to
65
from bzrlib import __version__ as bzrlib_version
78
class _ReportingFileSocket(object):
80
def __init__(self, filesock, report_activity=None):
81
self.filesock = filesock
82
self._report_activity = report_activity
84
def report_activity(self, size, direction):
85
if self._report_activity:
86
self._report_activity(size, direction)
88
def read(self, size=1):
89
s = self.filesock.read(size)
90
self.report_activity(len(s), 'read')
94
# This should be readline(self, size=-1), but httplib in python 2.4 and
95
# 2.5 defines a SSLFile wrapper whose readline method lacks the size
96
# parameter. So until we drop support for 2.4 and 2.5 and since we
97
# don't *need* the size parameter we'll stay with readline(self)
99
s = self.filesock.readline()
100
self.report_activity(len(s), 'read')
103
def __getattr__(self, name):
104
return getattr(self.filesock, name)
107
class _ReportingSocket(object):
109
def __init__(self, sock, report_activity=None):
111
self._report_activity = report_activity
113
def report_activity(self, size, direction):
114
if self._report_activity:
115
self._report_activity(size, direction)
117
def sendall(self, s, *args):
118
self.sock.sendall(s, *args)
119
self.report_activity(len(s), 'write')
121
def recv(self, *args):
122
s = self.sock.recv(*args)
123
self.report_activity(len(s), 'read')
126
def makefile(self, mode='r', bufsize=-1):
127
# httplib creates a fileobject that doesn't do buffering, which
128
# makes fp.readline() very expensive because it only reads one byte
129
# at a time. So we wrap the socket in an object that forces
130
# sock.makefile to make a buffered file.
131
fsock = self.sock.makefile(mode, 65536)
132
# And wrap that into a reporting kind of fileobject
133
return _ReportingFileSocket(fsock, self._report_activity)
135
def __getattr__(self, name):
136
return getattr(self.sock, name)
139
# We define our own Response class to keep our httplib pipe clean
140
class Response(httplib.HTTPResponse):
141
"""Custom HTTPResponse, to avoid the need to decorate.
143
httplib prefers to decorate the returned objects, rather
144
than using a custom object.
147
# Some responses have bodies in which we have no interest
148
_body_ignored_responses = [301,302, 303, 307, 401, 403, 404]
150
# in finish() below, we may have to discard several MB in the worst
151
# case. To avoid buffering that much, we read and discard by chunks
152
# instead. The underlying file is either a socket or a StringIO, so reading
153
# 8k chunks should be fine.
154
_discarded_buf_size = 8192
157
"""Begin to read the response from the server.
159
httplib assumes that some responses get no content and do
160
not even attempt to read the body in that case, leaving
161
the body in the socket, blocking the next request. Let's
162
try to workaround that.
164
httplib.HTTPResponse.begin(self)
165
if self.status in self._body_ignored_responses:
166
if self.debuglevel >= 2:
167
print "For status: [%s]," % self.status,
168
print "will ready body, length: %s" % self.length
169
if not (self.length is None or self.will_close):
170
# In some cases, we just can't read the body not
171
# even try or we may encounter a 104, 'Connection
172
# reset by peer' error if there is indeed no body
173
# and the server closed the connection just after
174
# having issued the response headers (even if the
175
# headers indicate a Content-Type...)
176
body = self.read(self.length)
177
if self.debuglevel >= 9:
178
# This one can be huge and is generally not interesting
179
print "Consumed body: [%s]" % body
181
elif self.status == 200:
182
# Whatever the request is, it went ok, so we surely don't want to
183
# close the connection. Some cases are not correctly detected by
184
# httplib.HTTPConnection.getresponse (called by
185
# httplib.HTTPResponse.begin). The CONNECT response for the https
186
# through proxy case is one. Note: the 'will_close' below refers
187
# to the "true" socket between us and the server, whereas the
188
# 'close()' above refers to the copy of that socket created by
189
# httplib for the response itself. So, in the if above we close the
190
# socket to indicate that we are done with the response whereas
191
# below we keep the socket with the server opened.
192
self.will_close = False
195
"""Finish reading the body.
197
In some cases, the client may have left some bytes to read in the
198
body. That will block the next request to succeed if we use a
199
persistent connection. If we don't use a persistent connection, well,
200
nothing will block the next request since a new connection will be
203
:return: the number of bytes left on the socket (may be None)
206
if not self.isclosed():
207
# Make sure nothing was left to be read on the socket
210
while data and self.length:
211
# read() will update self.length
212
data = self.read(min(self.length, self._discarded_buf_size))
215
trace.mutter("%s bytes left on the HTTP socket", pending)
220
# Not inheriting from 'object' because httplib.HTTPConnection doesn't.
221
class AbstractHTTPConnection:
222
"""A custom HTTP(S) Connection, which can reset itself on a bad response"""
224
response_class = Response
226
# When we detect a server responding with the whole file to range requests,
227
# we want to warn. But not below a given thresold.
228
_range_warning_thresold = 1024 * 1024
230
def __init__(self, report_activity=None):
231
self._response = None
232
self._report_activity = report_activity
233
self._ranges_received_whole_file = None
235
def _mutter_connect(self):
236
netloc = '%s:%s' % (self.host, self.port)
237
if self.proxied_host is not None:
238
netloc += '(proxy for %s)' % self.proxied_host
239
trace.mutter('* About to connect() to %s' % netloc)
241
def getresponse(self):
242
"""Capture the response to be able to cleanup"""
243
self._response = httplib.HTTPConnection.getresponse(self)
244
return self._response
246
def cleanup_pipe(self):
247
"""Read the remaining bytes of the last response if any."""
248
if self._response is not None:
249
pending = self._response.finish()
250
# Warn the user (once)
251
if (self._ranges_received_whole_file is None
252
and self._response.status == 200
253
and pending and pending > self._range_warning_thresold
255
self._ranges_received_whole_file = True
257
'Got a 200 response when asking for multiple ranges,'
258
' does your server at %s:%s support range requests?',
259
self.host, self.port)
260
self._response = None
261
# Preserve our preciousss
264
# Let httplib.HTTPConnection do its housekeeping
266
# Restore our preciousss
269
def _wrap_socket_for_reporting(self, sock):
270
"""Wrap the socket before anybody use it."""
271
self.sock = _ReportingSocket(sock, self._report_activity)
274
class HTTPConnection(AbstractHTTPConnection, httplib.HTTPConnection):
276
# XXX: Needs refactoring at the caller level.
277
def __init__(self, host, port=None, proxied_host=None,
278
report_activity=None):
279
AbstractHTTPConnection.__init__(self, report_activity=report_activity)
280
# Use strict=True since we don't support HTTP/0.9
281
httplib.HTTPConnection.__init__(self, host, port, strict=True)
282
self.proxied_host = proxied_host
285
if 'http' in debug.debug_flags:
286
self._mutter_connect()
287
httplib.HTTPConnection.connect(self)
288
self._wrap_socket_for_reporting(self.sock)
291
# Build the appropriate socket wrapper for ssl
293
# python 2.6 introduced a better ssl package
295
_ssl_wrap_socket = ssl.wrap_socket
297
# python versions prior to 2.6 don't have ssl and ssl.wrap_socket instead
298
# they use httplib.FakeSocket
299
def _ssl_wrap_socket(sock, key_file, cert_file):
300
ssl_sock = socket.ssl(sock, key_file, cert_file)
301
return httplib.FakeSocket(sock, ssl_sock)
304
class HTTPSConnection(AbstractHTTPConnection, httplib.HTTPSConnection):
306
def __init__(self, host, port=None, key_file=None, cert_file=None,
308
report_activity=None):
309
AbstractHTTPConnection.__init__(self, report_activity=report_activity)
310
# Use strict=True since we don't support HTTP/0.9
311
httplib.HTTPSConnection.__init__(self, host, port,
312
key_file, cert_file, strict=True)
313
self.proxied_host = proxied_host
316
if 'http' in debug.debug_flags:
317
self._mutter_connect()
318
httplib.HTTPConnection.connect(self)
319
self._wrap_socket_for_reporting(self.sock)
320
if self.proxied_host is None:
321
self.connect_to_origin()
323
def connect_to_origin(self):
324
ssl_sock = _ssl_wrap_socket(self.sock, self.key_file, self.cert_file)
325
# Wrap the ssl socket before anybody use it
326
self._wrap_socket_for_reporting(ssl_sock)
329
class Request(urllib2.Request):
330
"""A custom Request object.
332
urllib2 determines the request method heuristically (based on
333
the presence or absence of data). We set the method
336
The Request object tracks:
337
- the connection the request will be made on.
338
- the authentication parameters needed to preventively set
339
the authentication header once a first authentication have
343
def __init__(self, method, url, data=None, headers={},
344
origin_req_host=None, unverifiable=False,
345
connection=None, parent=None,
346
accepted_errors=None):
347
urllib2.Request.__init__(self, url, data, headers,
348
origin_req_host, unverifiable)
350
self.connection = connection
351
self.accepted_errors = accepted_errors
352
# To handle redirections
354
self.redirected_to = None
355
# Unless told otherwise, redirections are not followed
356
self.follow_redirections = False
357
# auth and proxy_auth are dicts containing, at least
358
# (scheme, host, port, realm, user, password, protocol, path).
359
# The dict entries are mostly handled by the AuthHandler.
360
# Some authentication schemes may add more entries.
363
self.proxied_host = None
365
def get_method(self):
368
def set_proxy(self, proxy, type):
369
"""Set the proxy and remember the proxied host."""
370
host, port = urllib.splitport(self.get_host())
372
# We need to set the default port ourselves way before it gets set
373
# in the HTTP[S]Connection object at build time.
374
if self.type == 'https':
375
conn_class = HTTPSConnection
377
conn_class = HTTPConnection
378
port = conn_class.default_port
379
self.proxied_host = '%s:%s' % (host, port)
380
urllib2.Request.set_proxy(self, proxy, type)
381
# When urllib2 makes a https request with our wrapper code and a proxy,
382
# it sets Host to the https proxy, not the host we want to talk to.
383
# I'm fairly sure this is our fault, but what is the cause is an open
384
# question. -- Robert Collins May 8 2010.
385
self.add_unredirected_header('Host', self.proxied_host)
388
class _ConnectRequest(Request):
390
def __init__(self, request):
393
:param request: the first request sent to the proxied host, already
394
processed by the opener (i.e. proxied_host is already set).
396
# We give a fake url and redefine get_selector or urllib2 will be
398
Request.__init__(self, 'CONNECT', request.get_full_url(),
399
connection=request.connection)
400
if request.proxied_host is None:
401
raise AssertionError()
402
self.proxied_host = request.proxied_host
404
def get_selector(self):
405
return self.proxied_host
407
def set_proxy(self, proxy, type):
408
"""Set the proxy without remembering the proxied host.
410
We already know the proxied host by definition, the CONNECT request
411
occurs only when the connection goes through a proxy. The usual
412
processing (masquerade the request so that the connection is done to
413
the proxy while the request is targeted at another host) does not apply
414
here. In fact, the connection is already established with proxy and we
415
just want to enable the SSL tunneling.
417
urllib2.Request.set_proxy(self, proxy, type)
420
class ConnectionHandler(urllib2.BaseHandler):
421
"""Provides connection-sharing by pre-processing requests.
423
urllib2 provides no way to access the HTTPConnection object
424
internally used. But we need it in order to achieve
425
connection sharing. So, we add it to the request just before
426
it is processed, and then we override the do_open method for
427
http[s] requests in AbstractHTTPHandler.
430
handler_order = 1000 # after all pre-processings
432
def __init__(self, report_activity=None):
433
self._report_activity = report_activity
435
def create_connection(self, request, http_connection_class):
436
host = request.get_host()
438
# Just a bit of paranoia here, this should have been
439
# handled in the higher levels
440
raise errors.InvalidURL(request.get_full_url(), 'no host given.')
442
# We create a connection (but it will not connect until the first
445
connection = http_connection_class(
446
host, proxied_host=request.proxied_host,
447
report_activity=self._report_activity)
448
except httplib.InvalidURL, exception:
449
# There is only one occurrence of InvalidURL in httplib
450
raise errors.InvalidURL(request.get_full_url(),
451
extra='nonnumeric port')
455
def capture_connection(self, request, http_connection_class):
456
"""Capture or inject the request connection.
459
- the request have no connection: create a new one,
461
- the request have a connection: this one have been used
462
already, let's capture it, so that we can give it to
463
another transport to be reused. We don't do that
464
ourselves: the Transport object get the connection from
465
a first request and then propagate it, from request to
466
request or to cloned transports.
468
connection = request.connection
469
if connection is None:
471
connection = self.create_connection(request, http_connection_class)
472
request.connection = connection
474
# All connections will pass here, propagate debug level
475
connection.set_debuglevel(DEBUG)
478
def http_request(self, request):
479
return self.capture_connection(request, HTTPConnection)
481
def https_request(self, request):
482
return self.capture_connection(request, HTTPSConnection)
485
class AbstractHTTPHandler(urllib2.AbstractHTTPHandler):
486
"""A custom handler for HTTP(S) requests.
488
We overrive urllib2.AbstractHTTPHandler to get a better
489
control of the connection, the ability to implement new
490
request types and return a response able to cope with
491
persistent connections.
494
# We change our order to be before urllib2 HTTP[S]Handlers
495
# and be chosen instead of them (the first http_open called
499
_default_headers = {'Pragma': 'no-cache',
500
'Cache-control': 'max-age=0',
501
'Connection': 'Keep-Alive',
502
'User-agent': 'bzr/%s (urllib)' % bzrlib_version,
507
urllib2.AbstractHTTPHandler.__init__(self, debuglevel=DEBUG)
509
def http_request(self, request):
510
"""Common headers setting"""
512
request.headers.update(self._default_headers.copy())
513
# FIXME: We may have to add the Content-Length header if
514
# we have data to send.
517
def retry_or_raise(self, http_class, request, first_try):
518
"""Retry the request (once) or raise the exception.
520
urllib2 raises exception of application level kind, we
521
just have to translate them.
523
httplib can raise exceptions of transport level (badly
524
formatted dialog, loss of connexion or socket level
525
problems). In that case we should issue the request again
526
(httplib will close and reopen a new connection if
529
# When an exception occurs, we give back the original
530
# Traceback or the bugs are hard to diagnose.
531
exc_type, exc_val, exc_tb = sys.exc_info()
532
if exc_type == socket.gaierror:
533
# No need to retry, that will not help
534
raise errors.ConnectionError("Couldn't resolve host '%s'"
535
% request.get_origin_req_host(),
537
elif isinstance(exc_val, httplib.ImproperConnectionState):
538
# The httplib pipeline is in incorrect state, it's a bug in our
540
raise exc_type, exc_val, exc_tb
543
if self._debuglevel >= 2:
544
print 'Received exception: [%r]' % exc_val
545
print ' On connection: [%r]' % request.connection
546
method = request.get_method()
547
url = request.get_full_url()
548
print ' Will retry, %s %r' % (method, url)
549
request.connection.close()
550
response = self.do_open(http_class, request, False)
552
if self._debuglevel >= 2:
553
print 'Received second exception: [%r]' % exc_val
554
print ' On connection: [%r]' % request.connection
555
if exc_type in (httplib.BadStatusLine, httplib.UnknownProtocol):
556
# httplib.BadStatusLine and
557
# httplib.UnknownProtocol indicates that a
558
# bogus server was encountered or a bad
559
# connection (i.e. transient errors) is
560
# experimented, we have already retried once
561
# for that request so we raise the exception.
562
my_exception = errors.InvalidHttpResponse(
563
request.get_full_url(),
564
'Bad status line received',
566
elif (isinstance(exc_val, socket.error) and len(exc_val.args)
567
and exc_val.args[0] in (errno.ECONNRESET, 10054)):
568
raise errors.ConnectionReset(
569
"Connection lost while sending request.")
571
# All other exception are considered connection related.
573
# socket errors generally occurs for reasons
574
# far outside our scope, so closing the
575
# connection and retrying is the best we can
578
my_exception = errors.ConnectionError(
579
msg= 'while sending %s %s:' % (request.get_method(),
580
request.get_selector()),
583
if self._debuglevel >= 2:
584
print 'On connection: [%r]' % request.connection
585
method = request.get_method()
586
url = request.get_full_url()
587
print ' Failed again, %s %r' % (method, url)
588
print ' Will raise: [%r]' % my_exception
589
raise my_exception, None, exc_tb
592
def do_open(self, http_class, request, first_try=True):
593
"""See urllib2.AbstractHTTPHandler.do_open for the general idea.
595
The request will be retried once if it fails.
597
connection = request.connection
598
if connection is None:
599
raise AssertionError(
600
'Cannot process a request without a connection')
602
# Get all the headers
604
headers.update(request.header_items())
605
headers.update(request.unredirected_hdrs)
606
# Some servers or proxies will choke on headers not properly
607
# cased. httplib/urllib/urllib2 all use capitalize to get canonical
608
# header names, but only python2.5 urllib2 use title() to fix them just
609
# before sending the request. And not all versions of python 2.5 do
610
# that. Since we replace urllib2.AbstractHTTPHandler.do_open we do it
612
headers = dict((name.title(), val) for name, val in headers.iteritems())
615
method = request.get_method()
616
url = request.get_selector()
617
connection._send_request(method, url,
618
# FIXME: implements 100-continue
619
#None, # We don't send the body yet
622
if 'http' in debug.debug_flags:
623
trace.mutter('> %s %s' % (method, url))
624
hdrs = ['%s: %s' % (k, v) for k,v in headers.items()]
625
trace.mutter('> ' + '\n> '.join(hdrs) + '\n')
626
if self._debuglevel >= 1:
627
print 'Request sent: [%r] from (%s)' \
628
% (request, request.connection.sock.getsockname())
629
response = connection.getresponse()
630
convert_to_addinfourl = True
631
except (socket.gaierror, httplib.BadStatusLine, httplib.UnknownProtocol,
632
socket.error, httplib.HTTPException):
633
response = self.retry_or_raise(http_class, request, first_try)
634
convert_to_addinfourl = False
636
# FIXME: HTTPConnection does not fully support 100-continue (the
637
# server responses are just ignored)
640
# mutter('Will send the body')
641
# # We can send the body now
642
# body = request.get_data()
644
# raise URLError("No data given")
645
# connection.send(body)
646
# response = connection.getresponse()
648
if self._debuglevel >= 2:
649
print 'Receives response: %r' % response
650
print ' For: %r(%r)' % (request.get_method(),
651
request.get_full_url())
653
if convert_to_addinfourl:
654
# Shamelessly copied from urllib2
658
fp = socket._fileobject(r, bufsize=65536)
659
resp = urllib2.addinfourl(fp, r.msg, req.get_full_url())
662
resp.version = r.version
663
if self._debuglevel >= 2:
664
print 'Create addinfourl: %r' % resp
665
print ' For: %r(%r)' % (request.get_method(),
666
request.get_full_url())
667
if 'http' in debug.debug_flags:
668
version = 'HTTP/%d.%d'
670
version = version % (resp.version / 10,
673
version = 'HTTP/%r' % resp.version
674
trace.mutter('< %s %s %s' % (version, resp.code,
676
# Use the raw header lines instead of treating resp.info() as a
677
# dict since we may miss duplicated headers otherwise.
678
hdrs = [h.rstrip('\r\n') for h in resp.info().headers]
679
trace.mutter('< ' + '\n< '.join(hdrs) + '\n')
685
class HTTPHandler(AbstractHTTPHandler):
686
"""A custom handler that just thunks into HTTPConnection"""
688
def http_open(self, request):
689
return self.do_open(HTTPConnection, request)
692
class HTTPSHandler(AbstractHTTPHandler):
693
"""A custom handler that just thunks into HTTPSConnection"""
695
https_request = AbstractHTTPHandler.http_request
697
def https_open(self, request):
698
connection = request.connection
699
if connection.sock is None and \
700
connection.proxied_host is not None and \
701
request.get_method() != 'CONNECT' : # Don't loop
702
# FIXME: We need a gazillion connection tests here, but we still
703
# miss a https server :-( :
704
# - with and without proxy
705
# - with and without certificate
706
# - with self-signed certificate
707
# - with and without authentication
708
# - with good and bad credentials (especially the proxy auth around
710
# - with basic and digest schemes
711
# - reconnection on errors
712
# - connection persistence behaviour (including reconnection)
714
# We are about to connect for the first time via a proxy, we must
715
# issue a CONNECT request first to establish the encrypted link
716
connect = _ConnectRequest(request)
717
response = self.parent.open(connect)
718
if response.code != 200:
719
raise errors.ConnectionError("Can't connect to %s via proxy %s" % (
720
connect.proxied_host, self.host))
722
connection.cleanup_pipe()
723
# Establish the connection encryption
724
connection.connect_to_origin()
725
# Propagate the connection to the original request
726
request.connection = connection
727
return self.do_open(HTTPSConnection, request)
729
class HTTPRedirectHandler(urllib2.HTTPRedirectHandler):
730
"""Handles redirect requests.
732
We have to implement our own scheme because we use a specific
733
Request object and because we want to implement a specific
737
# RFC2616 says that only read requests should be redirected
738
# without interacting with the user. But bzr use some
739
# shortcuts to optimize against roundtrips which can leads to
740
# write requests being issued before read requests of
741
# containing dirs can be redirected. So we redirect write
742
# requests in the same way which seems to respect the spirit
743
# of the RFC if not its letter.
745
def redirect_request(self, req, fp, code, msg, headers, newurl):
746
"""See urllib2.HTTPRedirectHandler.redirect_request"""
747
# We would have preferred to update the request instead
748
# of creating a new one, but the urllib2.Request object
749
# has a too complicated creation process to provide a
750
# simple enough equivalent update process. Instead, when
751
# redirecting, we only update the following request in
752
# the redirect chain with a reference to the parent
755
# Some codes make no sense in our context and are treated
758
# 300: Multiple choices for different representations of
759
# the URI. Using that mechanisn with bzr will violate the
760
# protocol neutrality of Transport.
762
# 304: Not modified (SHOULD only occurs with conditional
763
# GETs which are not used by our implementation)
765
# 305: Use proxy. I can't imagine this one occurring in
766
# our context-- vila/20060909
768
# 306: Unused (if the RFC says so...)
770
# If the code is 302 and the request is HEAD, some may
771
# think that it is a sufficent hint that the file exists
772
# and that we MAY avoid following the redirections. But
773
# if we want to be sure, we MUST follow them.
775
if code in (301, 302, 303, 307):
776
return Request(req.get_method(),newurl,
777
headers = req.headers,
778
origin_req_host = req.get_origin_req_host(),
780
# TODO: It will be nice to be able to
781
# detect virtual hosts sharing the same
782
# IP address, that will allow us to
783
# share the same connection...
788
raise urllib2.HTTPError(req.get_full_url(), code, msg, headers, fp)
790
def http_error_302(self, req, fp, code, msg, headers):
791
"""Requests the redirected to URI.
793
Copied from urllib2 to be able to clean the pipe of the associated
794
connection, *before* issuing the redirected request but *after* having
795
eventually raised an error.
797
# Some servers (incorrectly) return multiple Location headers
798
# (so probably same goes for URI). Use first header.
800
# TODO: Once we get rid of addinfourl objects, the
801
# following will need to be updated to use correct case
803
if 'location' in headers:
804
newurl = headers.getheaders('location')[0]
805
elif 'uri' in headers:
806
newurl = headers.getheaders('uri')[0]
809
if self._debuglevel >= 1:
810
print 'Redirected to: %s (followed: %r)' % (newurl,
811
req.follow_redirections)
812
if req.follow_redirections is False:
813
req.redirected_to = newurl
816
newurl = urlparse.urljoin(req.get_full_url(), newurl)
818
# This call succeeds or raise an error. urllib2 returns
819
# if redirect_request returns None, but our
820
# redirect_request never returns None.
821
redirected_req = self.redirect_request(req, fp, code, msg, headers,
825
# .redirect_dict has a key url if url was previously visited.
826
if hasattr(req, 'redirect_dict'):
827
visited = redirected_req.redirect_dict = req.redirect_dict
828
if (visited.get(newurl, 0) >= self.max_repeats or
829
len(visited) >= self.max_redirections):
830
raise urllib2.HTTPError(req.get_full_url(), code,
831
self.inf_msg + msg, headers, fp)
833
visited = redirected_req.redirect_dict = req.redirect_dict = {}
834
visited[newurl] = visited.get(newurl, 0) + 1
836
# We can close the fp now that we are sure that we won't
837
# use it with HTTPError.
839
# We have all we need already in the response
840
req.connection.cleanup_pipe()
842
return self.parent.open(redirected_req)
844
http_error_301 = http_error_303 = http_error_307 = http_error_302
847
class ProxyHandler(urllib2.ProxyHandler):
848
"""Handles proxy setting.
850
Copied and modified from urllib2 to be able to modify the request during
851
the request pre-processing instead of modifying it at _open time. As we
852
capture (or create) the connection object during request processing, _open
855
The main task is to modify the request so that the connection is done to
856
the proxy while the request still refers to the destination host.
858
Note: the proxy handling *may* modify the protocol used; the request may be
859
against an https server proxied through an http proxy. So, https_request
860
will be called, but later it's really http_open that will be called. This
861
explains why we don't have to call self.parent.open as the urllib2 did.
864
# Proxies must be in front
868
def __init__(self, proxies=None):
869
urllib2.ProxyHandler.__init__(self, proxies)
870
# First, let's get rid of urllib2 implementation
871
for type, proxy in self.proxies.items():
872
if self._debuglevel >= 3:
873
print 'Will unbind %s_open for %r' % (type, proxy)
874
delattr(self, '%s_open' % type)
876
def bind_scheme_request(proxy, scheme):
879
scheme_request = scheme + '_request'
880
if self._debuglevel >= 3:
881
print 'Will bind %s for %r' % (scheme_request, proxy)
882
setattr(self, scheme_request,
883
lambda request: self.set_proxy(request, scheme))
884
# We are interested only by the http[s] proxies
885
http_proxy = self.get_proxy_env_var('http')
886
bind_scheme_request(http_proxy, 'http')
887
https_proxy = self.get_proxy_env_var('https')
888
bind_scheme_request(https_proxy, 'https')
890
def get_proxy_env_var(self, name, default_to='all'):
891
"""Get a proxy env var.
893
Note that we indirectly rely on
894
urllib.getproxies_environment taking into account the
895
uppercased values for proxy variables.
898
return self.proxies[name.lower()]
900
if default_to is not None:
901
# Try to get the alternate environment variable
903
return self.proxies[default_to]
908
def proxy_bypass(self, host):
909
"""Check if host should be proxied or not"""
910
no_proxy = self.get_proxy_env_var('no', default_to=None)
913
hhost, hport = urllib.splitport(host)
914
# Does host match any of the domains mentioned in
915
# no_proxy ? The rules about what is authorized in no_proxy
916
# are fuzzy (to say the least). We try to allow most
917
# commonly seen values.
918
for domain in no_proxy.split(','):
919
dhost, dport = urllib.splitport(domain)
920
if hport == dport or dport is None:
922
dhost = dhost.replace(".", r"\.")
923
dhost = dhost.replace("*", r".*")
924
dhost = dhost.replace("?", r".")
925
if re.match(dhost, hhost, re.IGNORECASE):
927
# Nevertheless, there are platform-specific ways to
929
return urllib.proxy_bypass(host)
931
def set_proxy(self, request, type):
932
if self.proxy_bypass(request.get_host()):
935
proxy = self.get_proxy_env_var(type)
936
if self._debuglevel >= 3:
937
print 'set_proxy %s_request for %r' % (type, proxy)
938
# FIXME: python 2.5 urlparse provides a better _parse_proxy which can
939
# grok user:password@host:port as well as
940
# http://user:password@host:port
942
(scheme, user, password,
943
host, port, path) = transport.ConnectedTransport._split_url(proxy)
945
raise errors.InvalidURL(proxy, 'No host component')
947
if request.proxy_auth == {}:
948
# No proxy auth parameter are available, we are handling the first
949
# proxied request, intialize. scheme (the authentication scheme)
950
# and realm will be set by the AuthHandler
951
request.proxy_auth = {
952
'host': host, 'port': port,
953
'user': user, 'password': password,
955
# We ignore path since we connect to a proxy
960
phost = host + ':%d' % port
961
request.set_proxy(phost, type)
962
if self._debuglevel >= 3:
963
print 'set_proxy: proxy set to %s://%s' % (type, phost)
967
class AbstractAuthHandler(urllib2.BaseHandler):
968
"""A custom abstract authentication handler for all http authentications.
970
Provides the meat to handle authentication errors and
971
preventively set authentication headers after the first
972
successful authentication.
974
This can be used for http and proxy, as well as for basic, negotiate and
975
digest authentications.
977
This provides an unified interface for all authentication handlers
978
(urllib2 provides far too many with different policies).
980
The interaction between this handler and the urllib2
981
framework is not obvious, it works as follow:
983
opener.open(request) is called:
985
- that may trigger http_request which will add an authentication header
986
(self.build_header) if enough info is available.
988
- the request is sent to the server,
990
- if an authentication error is received self.auth_required is called,
991
we acquire the authentication info in the error headers and call
992
self.auth_match to check that we are able to try the
993
authentication and complete the authentication parameters,
995
- we call parent.open(request), that may trigger http_request
996
and will add a header (self.build_header), but here we have
997
all the required info (keep in mind that the request and
998
authentication used in the recursive calls are really (and must be)
1001
- if the call returns a response, the authentication have been
1002
successful and the request authentication parameters have been updated.
1006
"""The scheme as it appears in the server header (lower cased)"""
1009
"""We don't want to retry authenticating endlessly"""
1011
requires_username = True
1012
"""Whether the auth mechanism requires a username."""
1014
# The following attributes should be defined by daughter
1016
# - auth_required_header: the header received from the server
1017
# - auth_header: the header sent in the request
1020
# We want to know when we enter into an try/fail cycle of
1021
# authentications so we initialize to None to indicate that we aren't
1022
# in such a cycle by default.
1023
self._retry_count = None
1025
def _parse_auth_header(self, server_header):
1026
"""Parse the authentication header.
1028
:param server_header: The value of the header sent by the server
1029
describing the authenticaion request.
1031
:return: A tuple (scheme, remainder) scheme being the first word in the
1032
given header (lower cased), remainder may be None.
1035
scheme, remainder = server_header.split(None, 1)
1037
scheme = server_header
1039
return (scheme.lower(), remainder)
1041
def update_auth(self, auth, key, value):
1042
"""Update a value in auth marking the auth as modified if needed"""
1043
old_value = auth.get(key, None)
1044
if old_value != value:
1046
auth['modified'] = True
1048
def auth_required(self, request, headers):
1049
"""Retry the request if the auth scheme is ours.
1051
:param request: The request needing authentication.
1052
:param headers: The headers for the authentication error response.
1053
:return: None or the response for the authenticated request.
1055
# Don't try to authenticate endlessly
1056
if self._retry_count is None:
1057
# The retry being recusrsive calls, None identify the first retry
1058
self._retry_count = 1
1060
self._retry_count += 1
1061
if self._retry_count > self._max_retry:
1062
# Let's be ready for next round
1063
self._retry_count = None
1065
server_headers = headers.getheaders(self.auth_required_header)
1066
if not server_headers:
1067
# The http error MUST have the associated
1068
# header. This must never happen in production code.
1069
raise KeyError('%s not found' % self.auth_required_header)
1071
auth = self.get_auth(request)
1072
auth['modified'] = False
1073
# Put some common info in auth if the caller didn't
1074
if auth.get('path', None) is None:
1076
host, port, path) = urlutils.parse_url(request.get_full_url())
1077
self.update_auth(auth, 'protocol', protocol)
1078
self.update_auth(auth, 'host', host)
1079
self.update_auth(auth, 'port', port)
1080
self.update_auth(auth, 'path', path)
1081
# FIXME: the auth handler should be selected at a single place instead
1082
# of letting all handlers try to match all headers, but the current
1083
# design doesn't allow a simple implementation.
1084
for server_header in server_headers:
1085
# Several schemes can be proposed by the server, try to match each
1087
matching_handler = self.auth_match(server_header, auth)
1088
if matching_handler:
1089
# auth_match may have modified auth (by adding the
1090
# password or changing the realm, for example)
1091
if (request.get_header(self.auth_header, None) is not None
1092
and not auth['modified']):
1093
# We already tried that, give up
1096
# Only the most secure scheme proposed by the server should be
1097
# used, since the handlers use 'handler_order' to describe that
1098
# property, the first handler tried takes precedence, the
1099
# others should not attempt to authenticate if the best one
1101
best_scheme = auth.get('best_scheme', None)
1102
if best_scheme is None:
1103
# At that point, if current handler should doesn't succeed
1104
# the credentials are wrong (or incomplete), but we know
1105
# that the associated scheme should be used.
1106
best_scheme = auth['best_scheme'] = self.scheme
1107
if best_scheme != self.scheme:
1110
if self.requires_username and auth.get('user', None) is None:
1111
# Without a known user, we can't authenticate
1115
request.connection.cleanup_pipe()
1116
# Retry the request with an authentication header added
1117
response = self.parent.open(request)
1119
self.auth_successful(request, response)
1121
# We are not qualified to handle the authentication.
1122
# Note: the authentication error handling will try all
1123
# available handlers. If one of them authenticates
1124
# successfully, a response will be returned. If none of
1125
# them succeeds, None will be returned and the error
1126
# handler will raise the 401 'Unauthorized' or the 407
1127
# 'Proxy Authentication Required' error.
1130
def add_auth_header(self, request, header):
1131
"""Add the authentication header to the request"""
1132
request.add_unredirected_header(self.auth_header, header)
1134
def auth_match(self, header, auth):
1135
"""Check that we are able to handle that authentication scheme.
1137
The request authentication parameters may need to be
1138
updated with info from the server. Some of these
1139
parameters, when combined, are considered to be the
1140
authentication key, if one of them change the
1141
authentication result may change. 'user' and 'password'
1142
are exampls, but some auth schemes may have others
1143
(digest's nonce is an example, digest's nonce_count is a
1144
*counter-example*). Such parameters must be updated by
1145
using the update_auth() method.
1147
:param header: The authentication header sent by the server.
1148
:param auth: The auth parameters already known. They may be
1150
:returns: True if we can try to handle the authentication.
1152
raise NotImplementedError(self.auth_match)
1154
def build_auth_header(self, auth, request):
1155
"""Build the value of the header used to authenticate.
1157
:param auth: The auth parameters needed to build the header.
1158
:param request: The request needing authentication.
1160
:return: None or header.
1162
raise NotImplementedError(self.build_auth_header)
1164
def auth_successful(self, request, response):
1165
"""The authentification was successful for the request.
1167
Additional infos may be available in the response.
1169
:param request: The succesfully authenticated request.
1170
:param response: The server response (may contain auth info).
1172
# It may happen that we need to reconnect later, let's be ready
1173
self._retry_count = None
1175
def get_user_password(self, auth):
1176
"""Ask user for a password if none is already available.
1178
:param auth: authentication info gathered so far (from the initial url
1179
and then during dialog with the server).
1181
auth_conf = config.AuthenticationConfig()
1182
user = auth.get('user', None)
1183
password = auth.get('password', None)
1184
realm = auth['realm']
1187
user = auth_conf.get_user(auth['protocol'], auth['host'],
1188
port=auth['port'], path=auth['path'],
1189
realm=realm, ask=True,
1190
prompt=self.build_username_prompt(auth))
1191
if user is not None and password is None:
1192
password = auth_conf.get_password(
1193
auth['protocol'], auth['host'], user, port=auth['port'],
1194
path=auth['path'], realm=realm,
1195
prompt=self.build_password_prompt(auth))
1197
return user, password
1199
def _build_password_prompt(self, auth):
1200
"""Build a prompt taking the protocol used into account.
1202
The AuthHandler is used by http and https, we want that information in
1203
the prompt, so we build the prompt from the authentication dict which
1204
contains all the needed parts.
1206
Also, http and proxy AuthHandlers present different prompts to the
1207
user. The daughter classes should implements a public
1208
build_password_prompt using this method.
1210
prompt = '%s' % auth['protocol'].upper() + ' %(user)s@%(host)s'
1211
realm = auth['realm']
1212
if realm is not None:
1213
prompt += ", Realm: '%s'" % realm
1214
prompt += ' password'
1217
def _build_username_prompt(self, auth):
1218
"""Build a prompt taking the protocol used into account.
1220
The AuthHandler is used by http and https, we want that information in
1221
the prompt, so we build the prompt from the authentication dict which
1222
contains all the needed parts.
1224
Also, http and proxy AuthHandlers present different prompts to the
1225
user. The daughter classes should implements a public
1226
build_username_prompt using this method.
1228
prompt = '%s' % auth['protocol'].upper() + ' %(host)s'
1229
realm = auth['realm']
1230
if realm is not None:
1231
prompt += ", Realm: '%s'" % realm
1232
prompt += ' username'
1235
def http_request(self, request):
1236
"""Insert an authentication header if information is available"""
1237
auth = self.get_auth(request)
1238
if self.auth_params_reusable(auth):
1239
self.add_auth_header(request, self.build_auth_header(auth, request))
1242
https_request = http_request # FIXME: Need test
1245
class NegotiateAuthHandler(AbstractAuthHandler):
1246
"""A authentication handler that handles WWW-Authenticate: Negotiate.
1248
At the moment this handler supports just Kerberos. In the future,
1249
NTLM support may also be added.
1252
scheme = 'negotiate'
1254
requires_username = False
1256
def auth_match(self, header, auth):
1257
scheme, raw_auth = self._parse_auth_header(header)
1258
if scheme != self.scheme:
1260
self.update_auth(auth, 'scheme', scheme)
1261
resp = self._auth_match_kerberos(auth)
1264
# Optionally should try to authenticate using NTLM here
1265
self.update_auth(auth, 'negotiate_response', resp)
1268
def _auth_match_kerberos(self, auth):
1269
"""Try to create a GSSAPI response for authenticating against a host."""
1270
if not have_kerberos:
1272
ret, vc = kerberos.authGSSClientInit("HTTP@%(host)s" % auth)
1274
trace.warning('Unable to create GSSAPI context for %s: %d',
1277
ret = kerberos.authGSSClientStep(vc, "")
1279
trace.mutter('authGSSClientStep failed: %d', ret)
1281
return kerberos.authGSSClientResponse(vc)
1283
def build_auth_header(self, auth, request):
1284
return "Negotiate %s" % auth['negotiate_response']
1286
def auth_params_reusable(self, auth):
1287
# If the auth scheme is known, it means a previous
1288
# authentication was successful, all information is
1289
# available, no further checks are needed.
1290
return (auth.get('scheme', None) == 'negotiate' and
1291
auth.get('negotiate_response', None) is not None)
1294
class BasicAuthHandler(AbstractAuthHandler):
1295
"""A custom basic authentication handler."""
1299
auth_regexp = re.compile('realm="([^"]*)"', re.I)
1301
def build_auth_header(self, auth, request):
1302
raw = '%s:%s' % (auth['user'], auth['password'])
1303
auth_header = 'Basic ' + raw.encode('base64').strip()
1306
def extract_realm(self, header_value):
1307
match = self.auth_regexp.search(header_value)
1310
realm = match.group(1)
1313
def auth_match(self, header, auth):
1314
scheme, raw_auth = self._parse_auth_header(header)
1315
if scheme != self.scheme:
1318
match, realm = self.extract_realm(raw_auth)
1320
# Put useful info into auth
1321
self.update_auth(auth, 'scheme', scheme)
1322
self.update_auth(auth, 'realm', realm)
1323
if (auth.get('user', None) is None
1324
or auth.get('password', None) is None):
1325
user, password = self.get_user_password(auth)
1326
self.update_auth(auth, 'user', user)
1327
self.update_auth(auth, 'password', password)
1328
return match is not None
1330
def auth_params_reusable(self, auth):
1331
# If the auth scheme is known, it means a previous
1332
# authentication was successful, all information is
1333
# available, no further checks are needed.
1334
return auth.get('scheme', None) == 'basic'
1337
def get_digest_algorithm_impls(algorithm):
1340
if algorithm == 'MD5':
1341
H = lambda x: osutils.md5(x).hexdigest()
1342
elif algorithm == 'SHA':
1343
H = lambda x: osutils.sha(x).hexdigest()
1345
KD = lambda secret, data: H("%s:%s" % (secret, data))
1349
def get_new_cnonce(nonce, nonce_count):
1350
raw = '%s:%d:%s:%s' % (nonce, nonce_count, time.ctime(),
1351
urllib2.randombytes(8))
1352
return osutils.sha(raw).hexdigest()[:16]
1355
class DigestAuthHandler(AbstractAuthHandler):
1356
"""A custom digest authentication handler."""
1359
# Before basic as digest is a bit more secure and should be preferred
1362
def auth_params_reusable(self, auth):
1363
# If the auth scheme is known, it means a previous
1364
# authentication was successful, all information is
1365
# available, no further checks are needed.
1366
return auth.get('scheme', None) == 'digest'
1368
def auth_match(self, header, auth):
1369
scheme, raw_auth = self._parse_auth_header(header)
1370
if scheme != self.scheme:
1373
# Put the requested authentication info into a dict
1374
req_auth = urllib2.parse_keqv_list(urllib2.parse_http_list(raw_auth))
1376
# Check that we can handle that authentication
1377
qop = req_auth.get('qop', None)
1378
if qop != 'auth': # No auth-int so far
1381
H, KD = get_digest_algorithm_impls(req_auth.get('algorithm', 'MD5'))
1385
realm = req_auth.get('realm', None)
1386
# Put useful info into auth
1387
self.update_auth(auth, 'scheme', scheme)
1388
self.update_auth(auth, 'realm', realm)
1389
if auth.get('user', None) is None or auth.get('password', None) is None:
1390
user, password = self.get_user_password(auth)
1391
self.update_auth(auth, 'user', user)
1392
self.update_auth(auth, 'password', password)
1395
if req_auth.get('algorithm', None) is not None:
1396
self.update_auth(auth, 'algorithm', req_auth.get('algorithm'))
1397
nonce = req_auth['nonce']
1398
if auth.get('nonce', None) != nonce:
1399
# A new nonce, never used
1400
self.update_auth(auth, 'nonce_count', 0)
1401
self.update_auth(auth, 'nonce', nonce)
1402
self.update_auth(auth, 'qop', qop)
1403
auth['opaque'] = req_auth.get('opaque', None)
1405
# Some required field is not there
1410
def build_auth_header(self, auth, request):
1411
url_scheme, url_selector = urllib.splittype(request.get_selector())
1412
sel_host, uri = urllib.splithost(url_selector)
1414
A1 = '%s:%s:%s' % (auth['user'], auth['realm'], auth['password'])
1415
A2 = '%s:%s' % (request.get_method(), uri)
1417
nonce = auth['nonce']
1420
nonce_count = auth['nonce_count'] + 1
1421
ncvalue = '%08x' % nonce_count
1422
cnonce = get_new_cnonce(nonce, nonce_count)
1424
H, KD = get_digest_algorithm_impls(auth.get('algorithm', 'MD5'))
1425
nonce_data = '%s:%s:%s:%s:%s' % (nonce, ncvalue, cnonce, qop, H(A2))
1426
request_digest = KD(H(A1), nonce_data)
1429
header += 'username="%s", realm="%s", nonce="%s"' % (auth['user'],
1432
header += ', uri="%s"' % uri
1433
header += ', cnonce="%s", nc=%s' % (cnonce, ncvalue)
1434
header += ', qop="%s"' % qop
1435
header += ', response="%s"' % request_digest
1436
# Append the optional fields
1437
opaque = auth.get('opaque', None)
1439
header += ', opaque="%s"' % opaque
1440
if auth.get('algorithm', None):
1441
header += ', algorithm="%s"' % auth.get('algorithm')
1443
# We have used the nonce once more, update the count
1444
auth['nonce_count'] = nonce_count
1449
class HTTPAuthHandler(AbstractAuthHandler):
1450
"""Custom http authentication handler.
1452
Send the authentication preventively to avoid the roundtrip
1453
associated with the 401 error and keep the revelant info in
1454
the auth request attribute.
1457
auth_required_header = 'www-authenticate'
1458
auth_header = 'Authorization'
1460
def get_auth(self, request):
1461
"""Get the auth params from the request"""
1464
def set_auth(self, request, auth):
1465
"""Set the auth params for the request"""
1468
def build_password_prompt(self, auth):
1469
return self._build_password_prompt(auth)
1471
def build_username_prompt(self, auth):
1472
return self._build_username_prompt(auth)
1474
def http_error_401(self, req, fp, code, msg, headers):
1475
return self.auth_required(req, headers)
1478
class ProxyAuthHandler(AbstractAuthHandler):
1479
"""Custom proxy authentication handler.
1481
Send the authentication preventively to avoid the roundtrip
1482
associated with the 407 error and keep the revelant info in
1483
the proxy_auth request attribute..
1486
auth_required_header = 'proxy-authenticate'
1487
# FIXME: the correct capitalization is Proxy-Authorization,
1488
# but python-2.4 urllib2.Request insist on using capitalize()
1489
# instead of title().
1490
auth_header = 'Proxy-authorization'
1492
def get_auth(self, request):
1493
"""Get the auth params from the request"""
1494
return request.proxy_auth
1496
def set_auth(self, request, auth):
1497
"""Set the auth params for the request"""
1498
request.proxy_auth = auth
1500
def build_password_prompt(self, auth):
1501
prompt = self._build_password_prompt(auth)
1502
prompt = 'Proxy ' + prompt
1505
def build_username_prompt(self, auth):
1506
prompt = self._build_username_prompt(auth)
1507
prompt = 'Proxy ' + prompt
1510
def http_error_407(self, req, fp, code, msg, headers):
1511
return self.auth_required(req, headers)
1514
class HTTPBasicAuthHandler(BasicAuthHandler, HTTPAuthHandler):
1515
"""Custom http basic authentication handler"""
1518
class ProxyBasicAuthHandler(BasicAuthHandler, ProxyAuthHandler):
1519
"""Custom proxy basic authentication handler"""
1522
class HTTPDigestAuthHandler(DigestAuthHandler, HTTPAuthHandler):
1523
"""Custom http basic authentication handler"""
1526
class ProxyDigestAuthHandler(DigestAuthHandler, ProxyAuthHandler):
1527
"""Custom proxy basic authentication handler"""
1530
class HTTPNegotiateAuthHandler(NegotiateAuthHandler, HTTPAuthHandler):
1531
"""Custom http negotiate authentication handler"""
1534
class ProxyNegotiateAuthHandler(NegotiateAuthHandler, ProxyAuthHandler):
1535
"""Custom proxy negotiate authentication handler"""
1538
class HTTPErrorProcessor(urllib2.HTTPErrorProcessor):
1539
"""Process HTTP error responses.
1541
We don't really process the errors, quite the contrary
1542
instead, we leave our Transport handle them.
1545
accepted_errors = [200, # Ok
1546
206, # Partial content
1549
"""The error codes the caller will handle.
1551
This can be specialized in the request on a case-by case basis, but the
1552
common cases are covered here.
1555
def http_response(self, request, response):
1556
code, msg, hdrs = response.code, response.msg, response.info()
1558
accepted_errors = request.accepted_errors
1559
if accepted_errors is None:
1560
accepted_errors = self.accepted_errors
1562
if code not in accepted_errors:
1563
response = self.parent.error('http', request, response,
1567
https_response = http_response
1570
class HTTPDefaultErrorHandler(urllib2.HTTPDefaultErrorHandler):
1571
"""Translate common errors into bzr Exceptions"""
1573
def http_error_default(self, req, fp, code, msg, hdrs):
1575
raise errors.TransportError(
1576
'Server refuses to fulfill the request (403 Forbidden)'
1577
' for %s' % req.get_full_url())
1579
raise errors.InvalidHttpResponse(req.get_full_url(),
1580
'Unable to handle http code %d: %s'
1584
class Opener(object):
1585
"""A wrapper around urllib2.build_opener
1587
Daughter classes can override to build their own specific opener
1589
# TODO: Provides hooks for daughter classes.
1592
connection=ConnectionHandler,
1593
redirect=HTTPRedirectHandler,
1594
error=HTTPErrorProcessor,
1595
report_activity=None):
1596
self._opener = urllib2.build_opener(
1597
connection(report_activity=report_activity),
1600
HTTPBasicAuthHandler(),
1601
HTTPDigestAuthHandler(),
1602
HTTPNegotiateAuthHandler(),
1603
ProxyBasicAuthHandler(),
1604
ProxyDigestAuthHandler(),
1605
ProxyNegotiateAuthHandler(),
1608
HTTPDefaultErrorHandler,
1611
self.open = self._opener.open
1613
# When dealing with handler order, it's easy to mess
1614
# things up, the following will help understand which
1615
# handler is used, when and for what.
1617
pprint.pprint(self._opener.__dict__)