~bzr-pqm/bzr/bzr.dev

2004.1.1 by vila
Connection sharing, with redirection. without authentification.
1
# Copyright (C) 2006 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
2004.2.1 by John Arbash Meinel
Cleanup of urllib functions
17
"""Implementaion of urllib2 tailored to bzr needs
18
2363.4.7 by Vincent Ladeuil
Deeper tests, prepare the auth setting that will avoid the
19
This file complements the urllib2 class hierarchy with custom classes.
2004.2.1 by John Arbash Meinel
Cleanup of urllib functions
20
21
For instance, we create a new HTTPConnection and HTTPSConnection that inherit
22
from the original urllib2.HTTP(s)Connection objects, but also have a new base
23
which implements a custom getresponse and fake_close handlers.
24
25
And then we implement custom HTTPHandler and HTTPSHandler classes, that use
26
the custom HTTPConnection classes.
27
28
We have a custom Response class, which lets us maintain a keep-alive
29
connection even for requests that urllib2 doesn't expect to contain body data.
30
2363.4.10 by Vincent Ladeuil
Complete tests.
31
And a custom Request class that lets us track redirections, and
2363.4.12 by Vincent Ladeuil
Take jam's review comments into account. Fix typos, give better
32
handle authentication schemes.
2004.2.1 by John Arbash Meinel
Cleanup of urllib functions
33
"""
34
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
35
DEBUG = 0
36
2004.1.9 by vila
Takes jam's remarks into account when possible, add TODOs for the rest.
37
# TODO: It may be possible to share the password_manager across
38
# all transports by prefixing the realm by the protocol used
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
39
# (especially if other protocols do not use realms). See
40
# PasswordManager below.
41
42
# FIXME: Oversimplifying, two kind of exceptions should be
43
# raised, once a request is issued: URLError before we have been
44
# able to process the response, HTTPError after that. Process the
45
# response means we are able to leave the socket clean, so if we
46
# are not able to do that, we should close the connection. The
47
# actual code more or less do that, tests should be written to
2004.1.16 by v.ladeuil+lp at free
Add tests against erroneous http status lines.
48
# ensure that.
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
49
50
import httplib
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
51
import md5
52
import sha
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
53
import socket
54
import urllib
55
import urllib2
56
import urlparse
2167.3.4 by v.ladeuil+lp at free
Better fix for #74759, but still not tests.
57
import re
2004.1.16 by v.ladeuil+lp at free
Add tests against erroneous http status lines.
58
import sys
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
59
import time
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
60
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
61
from bzrlib import __version__ as bzrlib_version
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
62
from bzrlib import (
63
    errors,
64
    ui,
65
    )
66
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
67
2004.2.1 by John Arbash Meinel
Cleanup of urllib functions
68
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
69
# We define our own Response class to keep our httplib pipe clean
70
class Response(httplib.HTTPResponse):
2004.1.16 by v.ladeuil+lp at free
Add tests against erroneous http status lines.
71
    """Custom HTTPResponse, to avoid the need to decorate.
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
72
73
    httplib prefers to decorate the returned objects, rather
74
    than using a custom object.
75
    """
76
2004.1.7 by vila
Better handling of passwords (user should be queried only once).
77
    # Some responses have bodies in which we have no interest
2004.1.37 by v.ladeuil+lp at free
Small refactoring.
78
    _body_ignored_responses = [301,302, 303, 307, 401, 403, 404]
2004.1.7 by vila
Better handling of passwords (user should be queried only once).
79
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
80
    def __init__(self, *args, **kwargs):
81
        httplib.HTTPResponse.__init__(self, *args, **kwargs)
82
83
    def begin(self):
84
        """Begin to read the response from the server.
85
86
        httplib assumes that some responses get no content and do
87
        not even attempt to read the body in that case, leaving
88
        the body in the socket, blocking the next request. Let's
89
        try to workaround that.
90
        """
2004.1.2 by vila
Implements a BasicAuthManager.
91
        httplib.HTTPResponse.begin(self)
2004.1.7 by vila
Better handling of passwords (user should be queried only once).
92
        if self.status in self._body_ignored_responses:
2004.1.16 by v.ladeuil+lp at free
Add tests against erroneous http status lines.
93
            if self.debuglevel > 0:
94
                print "For status: [%s]," % self.status,
95
                print "will ready body, length: ",
96
                if  self.length is not None:
97
                    print "[%d]" % self.length
98
                else:
99
                    print "None"
100
            if not (self.length is None or self.will_close):
101
                # In some cases, we just can't read the body not
102
                # even try or we may encounter a 104, 'Connection
103
                # reset by peer' error if there is indeed no body
104
                # and the server closed the connection just after
105
                # having issued the response headers (even if the
2004.1.37 by v.ladeuil+lp at free
Small refactoring.
106
                # headers indicate a Content-Type...)
2004.1.16 by v.ladeuil+lp at free
Add tests against erroneous http status lines.
107
                body = self.fp.read(self.length)
108
                if self.debuglevel > 0:
109
                    print "Consumed body: [%s]" % body
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
110
            self.close()
111
2004.1.2 by vila
Implements a BasicAuthManager.
112
2004.2.1 by John Arbash Meinel
Cleanup of urllib functions
113
# Not inheriting from 'object' because httplib.HTTPConnection doesn't.
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
114
class AbstractHTTPConnection:
115
    """A custom HTTP(S) Connection, which can reset itself on a bad response"""
116
117
    response_class = Response
2004.1.16 by v.ladeuil+lp at free
Add tests against erroneous http status lines.
118
    strict = 1 # We don't support HTTP/0.9
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
119
120
    def fake_close(self):
121
        """Make the connection believes the response have been fully handled.
122
123
        That makes the httplib.HTTPConnection happy
124
        """
125
        # Preserve our preciousss
126
        sock = self.sock
127
        self.sock = None
128
        self.close()
129
        self.sock = sock
130
131
132
class HTTPConnection(AbstractHTTPConnection, httplib.HTTPConnection):
133
    pass
134
135
136
class HTTPSConnection(AbstractHTTPConnection, httplib.HTTPSConnection):
137
    pass
138
139
140
class Request(urllib2.Request):
141
    """A custom Request object.
142
143
    urllib2 determines the request method heuristically (based on
144
    the presence or absence of data). We set the method
145
    statically.
146
2420.1.6 by Vincent Ladeuil
Update NEWS to explain the intent of the modification. Also, use dicts
147
    The Request object tracks:
148
    - the connection the request will be made on.
149
    - the authentication parameters needed to preventively set
150
      the authentication header once a first authentication have
151
       been made.
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
152
    """
153
154
    def __init__(self, method, url, data=None, headers={},
155
                 origin_req_host=None, unverifiable=False,
156
                 connection=None, parent=None,):
157
        urllib2.Request.__init__(self, url, data, headers,
158
                                 origin_req_host, unverifiable)
159
        self.method = method
160
        self.connection = connection
161
        # To handle redirections
162
        self.parent = parent
163
        self.redirected_to = None
2164.2.15 by Vincent Ladeuil
Http redirections are not followed by default. Do not use hints
164
        # Unless told otherwise, redirections are not followed
165
        self.follow_redirections = False
2420.1.6 by Vincent Ladeuil
Update NEWS to explain the intent of the modification. Also, use dicts
166
        # auth and proxy_auth are dicts containing, at least
167
        # (scheme, url, realm, user, password).
168
        # The dict entries are mostly handled by the AuthHandler.
169
        # Some authentication schemes may add more entries.
170
        self.auth = {}
171
        self.proxy_auth = {}
2420.1.3 by Vincent Ladeuil
Implement http proxy basic authentication.
172
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
173
    def get_method(self):
174
        return self.method
175
176
2420.1.3 by Vincent Ladeuil
Implement http proxy basic authentication.
177
def extract_credentials(url):
178
    """Extracts credentials information from url.
179
180
    Get user and password from url of the form: http://user:pass@host/path
181
    :returns: (clean_url, user, password)
182
    """
183
    scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
184
185
    if '@' in netloc:
186
        auth, netloc = netloc.split('@', 1)
187
        if ':' in auth:
188
            user, password = auth.split(':', 1)
189
        else:
190
            user, password = auth, None
191
        user = urllib.unquote(user)
192
        if password is not None:
193
            password = urllib.unquote(password)
194
    else:
195
        user = None
196
        password = None
197
198
    # Build the clean url
199
    clean_url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))
200
201
    return clean_url, user, password
202
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
203
def extract_authentication_uri(url):
204
    """Extract the authentication uri from any url.
205
206
    In the context of bzr, we simplified the authentication uri
207
    to the host only. For the transport lifetime, we allow only
208
    one user by realm on a given host. I.e. handling several
209
    users for different paths for the same realm should be done
210
    at a higher level.
211
    """
212
    scheme, host, path, query, fragment = urlparse.urlsplit(url)
213
    return '%s://%s' % (scheme, host)
214
2420.1.3 by Vincent Ladeuil
Implement http proxy basic authentication.
215
2363.4.12 by Vincent Ladeuil
Take jam's review comments into account. Fix typos, give better
216
# The urlib2.xxxAuthHandler handle the authentication of the
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
217
# requests, to do that, they need an urllib2 PasswordManager *at
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
218
# build time*. We also need one to reuse the passwords entered by
219
# the user.
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
220
class PasswordManager(urllib2.HTTPPasswordMgrWithDefaultRealm):
2004.2.1 by John Arbash Meinel
Cleanup of urllib functions
221
2004.1.2 by vila
Implements a BasicAuthManager.
222
    def __init__(self):
223
        urllib2.HTTPPasswordMgrWithDefaultRealm.__init__(self)
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
224
2004.2.1 by John Arbash Meinel
Cleanup of urllib functions
225
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
226
class ConnectionHandler(urllib2.BaseHandler):
227
    """Provides connection-sharing by pre-processing requests.
228
229
    urllib2 provides no way to access the HTTPConnection object
230
    internally used. But we need it in order to achieve
231
    connection sharing. So, we add it to the request just before
232
    it is processed, and then we override the do_open method for
2363.4.7 by Vincent Ladeuil
Deeper tests, prepare the auth setting that will avoid the
233
    http[s] requests in AbstractHTTPHandler.
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
234
    """
235
236
    handler_order = 1000 # after all pre-processings
237
238
    def create_connection(self, request, http_connection_class):
239
        host = request.get_host()
240
        if not host:
2004.1.15 by v.ladeuil+lp at free
Better design for bogus servers. Both urllib and pycurl pass tests.
241
            # Just a bit of paranoia here, this should have been
242
            # handled in the higher levels
2004.1.27 by v.ladeuil+lp at free
Fix bug #57644 by issuing an explicit error message.
243
            raise errors.InvalidURL(request.get_full_url(), 'no host given.')
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
244
2004.1.9 by vila
Takes jam's remarks into account when possible, add TODOs for the rest.
245
        # We create a connection (but it will not connect yet)
2004.1.42 by v.ladeuil+lp at free
Fix #70803 by catching the httplib exception.
246
        try:
247
            connection = http_connection_class(host)
248
        except httplib.InvalidURL, exception:
249
            # There is only one occurrence of InvalidURL in httplib
250
            raise errors.InvalidURL(request.get_full_url(),
251
                                    extra='nonnumeric port')
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
252
253
        return connection
254
255
    def capture_connection(self, request, http_connection_class):
256
        """Capture or inject the request connection.
257
258
        Two cases:
259
        - the request have no connection: create a new one,
260
261
        - the request have a connection: this one have been used
262
          already, let's capture it, so that we can give it to
263
          another transport to be reused. We don't do that
264
          ourselves: the Transport object get the connection from
265
          a first request and then propagate it, from request to
266
          request or to cloned transports.
267
        """
268
        connection = request.connection
269
        if connection is None:
270
            # Create a new one
271
            connection = self.create_connection(request, http_connection_class)
272
            request.connection = connection
273
274
        # All connections will pass here, propagate debug level
275
        connection.set_debuglevel(DEBUG)
276
        return request
277
278
    def http_request(self, request):
279
        return self.capture_connection(request, HTTPConnection)
280
281
    def https_request(self, request):
282
        return self.capture_connection(request, HTTPSConnection)
283
284
285
class AbstractHTTPHandler(urllib2.AbstractHTTPHandler):
286
    """A custom handler for HTTP(S) requests.
287
288
    We overrive urllib2.AbstractHTTPHandler to get a better
289
    control of the connection, the ability to implement new
290
    request types and return a response able to cope with
291
    persistent connections.
292
    """
293
294
    # We change our order to be before urllib2 HTTP[S]Handlers
2004.3.1 by vila
Test ConnectionError exceptions.
295
    # and be chosen instead of them (the first http_open called
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
296
    # wins).
297
    handler_order = 400
298
299
    _default_headers = {'Pragma': 'no-cache',
300
                        'Cache-control': 'max-age=0',
301
                        'Connection': 'Keep-Alive',
2004.1.15 by v.ladeuil+lp at free
Better design for bogus servers. Both urllib and pycurl pass tests.
302
                        # FIXME: Spell it User-*A*gent once we
303
                        # know how to properly avoid bogus
304
                        # urllib2 using capitalize() for headers
305
                        # instead of title(sp?).
306
                        'User-agent': 'bzr/%s (urllib)' % bzrlib_version,
2004.3.3 by vila
Better (but still incomplete) design for bogus servers.
307
                        'Accept': '*/*',
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
308
                        }
309
310
    def __init__(self):
2004.1.16 by v.ladeuil+lp at free
Add tests against erroneous http status lines.
311
        urllib2.AbstractHTTPHandler.__init__(self, debuglevel=DEBUG)
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
312
2004.1.15 by v.ladeuil+lp at free
Better design for bogus servers. Both urllib and pycurl pass tests.
313
    def http_request(self, request):
314
        """Common headers setting"""
315
316
        request.headers.update(self._default_headers.copy())
317
        # FIXME: We may have to add the Content-Length header if
318
        # we have data to send.
319
        return request
320
2004.1.37 by v.ladeuil+lp at free
Small refactoring.
321
    def retry_or_raise(self, http_class, request, first_try):
322
        """Retry the request (once) or raise the exception.
2004.3.1 by vila
Test ConnectionError exceptions.
323
324
        urllib2 raises exception of application level kind, we
325
        just have to translate them.
326
327
        httplib can raise exceptions of transport level (badly
328
        formatted dialog, loss of connexion or socket level
329
        problems). In that case we should issue the request again
330
        (httplib will close and reopen a new connection if
2004.1.37 by v.ladeuil+lp at free
Small refactoring.
331
        needed).
332
        """
333
        # When an exception occurs, we give back the original
334
        # Traceback or the bugs are hard to diagnose.
335
        exc_type, exc_val, exc_tb = sys.exc_info()
336
        if exc_type == socket.gaierror:
337
            # No need to retry, that will not help
338
            raise errors.ConnectionError("Couldn't resolve host '%s'"
339
                                         % request.get_origin_req_host(),
340
                                         orig_error=exc_val)
341
        else:
342
            if first_try:
343
                if self._debuglevel > 0:
344
                    print 'Received exception: [%r]' % exc_val
345
                    print '  On connection: [%r]' % request.connection
346
                    method = request.get_method()
347
                    url = request.get_full_url()
348
                    print '  Will retry, %s %r' % (method, url)
349
                request.connection.close()
350
                response = self.do_open(http_class, request, False)
351
                convert_to_addinfourl = False
352
            else:
2004.1.39 by v.ladeuil+lp at free
Fix a race condition that make selftest fail once in a while.
353
                if self._debuglevel > 0:
354
                    print 'Received second exception: [%r]' % exc_val
355
                    print '  On connection: [%r]' % request.connection
2004.1.37 by v.ladeuil+lp at free
Small refactoring.
356
                if exc_type in (httplib.BadStatusLine, httplib.UnknownProtocol):
357
                    # httplib.BadStatusLine and
358
                    # httplib.UnknownProtocol indicates that a
359
                    # bogus server was encountered or a bad
360
                    # connection (i.e. transient errors) is
361
                    # experimented, we have already retried once
362
                    # for that request so we raise the exception.
363
                    my_exception = errors.InvalidHttpResponse(
364
                        request.get_full_url(),
365
                        'Bad status line received',
366
                        orig_error=exc_val)
367
                else:
368
                    # All other exception are considered connection related.
369
370
                    # httplib.HTTPException should indicate a bug
371
                    # in the urllib implementation, somewhow the
372
                    # httplib pipeline is in an incorrect state,
373
                    # we retry in hope that this will correct the
374
                    # problem but that may need investigation
375
                    # (note that no such bug is known as of
376
                    # 20061005 --vila).
377
378
                    # socket errors generally occurs for reasons
379
                    # far outside our scope, so closing the
380
                    # connection and retrying is the best we can
381
                    # do.
382
383
                    # FIXME: and then there is HTTPError raised by:
384
                    # - HTTPDefaultErrorHandler (we define our own)
385
                    # - HTTPRedirectHandler.redirect_request 
386
                    # - AbstractDigestAuthHandler.http_error_auth_reqed
387
388
                    my_exception = errors.ConnectionError(
389
                        msg= 'while sending %s %s:' % (request.get_method(),
390
                                                       request.get_selector()),
391
                        orig_error=exc_val)
392
393
                if self._debuglevel > 0:
394
                    print 'On connection: [%r]' % request.connection
395
                    method = request.get_method()
396
                    url = request.get_full_url()
397
                    print '  Failed again, %s %r' % (method, url)
398
                    print '  Will raise: [%r]' % my_exception
399
                raise my_exception, None, exc_tb
400
        return response, convert_to_addinfourl
401
402
    def do_open(self, http_class, request, first_try=True):
403
        """See urllib2.AbstractHTTPHandler.do_open for the general idea.
404
405
        The request will be retried once if it fails.
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
406
        """
407
        connection = request.connection
408
        assert connection is not None, \
409
            'Cannot process a request without a connection'
410
2004.1.19 by v.ladeuil+lp at free
Test protocol version in http responses.
411
        # Get all the headers
2004.1.15 by v.ladeuil+lp at free
Better design for bogus servers. Both urllib and pycurl pass tests.
412
        headers = {}
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
413
        headers.update(request.header_items())
414
        headers.update(request.unredirected_hdrs)
2004.3.1 by vila
Test ConnectionError exceptions.
415
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
416
        try:
417
            connection._send_request(request.get_method(),
418
                                     request.get_selector(),
419
                                     # FIXME: implements 100-continue
420
                                     #None, # We don't send the body yet
421
                                     request.get_data(),
422
                                     headers)
2004.3.1 by vila
Test ConnectionError exceptions.
423
            if self._debuglevel > 0:
424
                print 'Request sent: [%r]' % request
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
425
            response = connection.getresponse()
426
            convert_to_addinfourl = True
2004.1.37 by v.ladeuil+lp at free
Small refactoring.
427
        except (socket.gaierror, httplib.BadStatusLine, httplib.UnknownProtocol,
428
                socket.error, httplib.HTTPException):
429
            response, convert_to_addinfourl = self.retry_or_raise(http_class,
430
                                                                  request,
431
                                                                  first_try)
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
432
433
# FIXME: HTTPConnection does not fully support 100-continue (the
434
# server responses are just ignored)
435
436
#        if code == 100:
437
#            mutter('Will send the body')
438
#            # We can send the body now
439
#            body = request.get_data()
440
#            if body is None:
441
#                raise URLError("No data given")
442
#            connection.send(body)
443
#            response = connection.getresponse()
444
445
        if self._debuglevel > 0:
446
            print 'Receives response: %r' % response
447
            print '  For: %r(%r)' % (request.get_method(),
448
                                     request.get_full_url())
449
450
        if convert_to_addinfourl:
451
            # Shamelessly copied from urllib2
452
            req = request
453
            r = response
454
            r.recv = r.read
455
            fp = socket._fileobject(r)
456
            resp = urllib2.addinfourl(fp, r.msg, req.get_full_url())
457
            resp.code = r.status
458
            resp.msg = r.reason
459
            if self._debuglevel > 0:
460
                print 'Create addinfourl: %r' % resp
461
                print '  For: %r(%r)' % (request.get_method(),
462
                                         request.get_full_url())
463
        else:
464
            resp = response
465
        return resp
466
467
#       # we need titled headers in a dict but
468
#       # response.getheaders returns a list of (lower(header).
469
#       # Let's title that because most of bzr handle titled
470
#       # headers, but maybe we should switch to lowercased
471
#       # headers...
472
#        # jam 20060908: I think we actually expect the headers to
473
#        #       be similar to mimetools.Message object, which uses
474
#        #       case insensitive keys. It lowers() all requests.
475
#        #       My concern is that the code may not do perfect title case.
476
#        #       For example, it may use Content-type rather than Content-Type
477
#
478
#        # When we get rid of addinfourl, we must ensure that bzr
479
#        # always use titled headers and that any header received
2004.1.9 by vila
Takes jam's remarks into account when possible, add TODOs for the rest.
480
#        # from server is also titled.
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
481
#
482
#        headers = {}
483
#        for header, value in (response.getheaders()):
484
#            headers[header.title()] = value
485
#        # FIXME: Implements a secured .read method
486
#        response.code = response.status
487
#        response.headers = headers
488
#        return response
489
490
491
class HTTPHandler(AbstractHTTPHandler):
2004.2.1 by John Arbash Meinel
Cleanup of urllib functions
492
    """A custom handler that just thunks into HTTPConnection"""
493
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
494
    def http_open(self, request):
495
        return self.do_open(HTTPConnection, request)
496
497
498
class HTTPSHandler(AbstractHTTPHandler):
2004.2.1 by John Arbash Meinel
Cleanup of urllib functions
499
    """A custom handler that just thunks into HTTPSConnection"""
500
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
501
    def https_open(self, request):
502
        return self.do_open(HTTPSConnection, request)
503
504
505
class HTTPRedirectHandler(urllib2.HTTPRedirectHandler):
506
    """Handles redirect requests.
507
508
    We have to implement our own scheme because we use a specific
509
    Request object and because we want to implement a specific
510
    policy.
511
    """
512
    _debuglevel = DEBUG
513
    # RFC2616 says that only read requests should be redirected
514
    # without interacting with the user. But bzr use some
515
    # shortcuts to optimize against roundtrips which can leads to
516
    # write requests being issued before read requests of
517
    # containing dirs can be redirected. So we redirect write
518
    # requests in the same way which seems to respect the spirit
519
    # of the RFC if not its letter.
520
521
    def redirect_request(self, req, fp, code, msg, headers, newurl):
522
        """See urllib2.HTTPRedirectHandler.redirect_request"""
523
        # We would have preferred to update the request instead
524
        # of creating a new one, but the urllib2.Request object
525
        # has a too complicated creation process to provide a
526
        # simple enough equivalent update process. Instead, when
2164.2.29 by Vincent Ladeuil
Test the http redirection at the request level even if it's not
527
        # redirecting, we only update the following request in
528
        # the redirect chain with a reference to the parent
529
        # request .
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
530
2164.2.1 by v.ladeuil+lp at free
First rough http branch redirection implementation.
531
        # Some codes make no sense in our context and are treated
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
532
        # as errors:
533
534
        # 300: Multiple choices for different representations of
535
        #      the URI. Using that mechanisn with bzr will violate the
536
        #      protocol neutrality of Transport.
537
538
        # 304: Not modified (SHOULD only occurs with conditional
539
        #      GETs which are not used by our implementation)
540
541
        # 305: Use proxy. I can't imagine this one occurring in
542
        #      our context-- vila/20060909
543
544
        # 306: Unused (if the RFC says so...)
545
2164.2.1 by v.ladeuil+lp at free
First rough http branch redirection implementation.
546
        # If the code is 302 and the request is HEAD, some may
547
        # think that it is a sufficent hint that the file exists
548
        # and that we MAY avoid following the redirections. But
549
        # if we want to be sure, we MUST follow them.
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
550
551
        if code in (301, 302, 303, 307):
552
            return Request(req.get_method(),newurl,
553
                           headers = req.headers,
554
                           origin_req_host = req.get_origin_req_host(),
555
                           unverifiable = True,
556
                           # TODO: It will be nice to be able to
557
                           # detect virtual hosts sharing the same
558
                           # IP address, that will allow us to
559
                           # share the same connection...
560
                           connection = None,
561
                           parent = req,
562
                           )
563
        else:
564
            raise urllib2.HTTPError(req.get_full_url(), code, msg, headers, fp)
565
2164.2.29 by Vincent Ladeuil
Test the http redirection at the request level even if it's not
566
    def http_error_302(self, req, fp, code, msg, headers):
2004.3.1 by vila
Test ConnectionError exceptions.
567
        """Requests the redirected to URI.
568
569
        Copied from urllib2 to be able to fake_close the
570
        associated connection, *before* issuing the redirected
571
        request but *after* having eventually raised an error.
572
        """
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
573
        # Some servers (incorrectly) return multiple Location headers
574
        # (so probably same goes for URI).  Use first header.
575
576
        # TODO: Once we get rid of addinfourl objects, the
577
        # following will need to be updated to use correct case
578
        # for headers.
579
        if 'location' in headers:
580
            newurl = headers.getheaders('location')[0]
581
        elif 'uri' in headers:
582
            newurl = headers.getheaders('uri')[0]
583
        else:
584
            return
585
        if self._debuglevel > 0:
2164.2.1 by v.ladeuil+lp at free
First rough http branch redirection implementation.
586
            print 'Redirected to: %s (followed: %r)' % (newurl,
587
                                                        req.follow_redirections)
588
        if req.follow_redirections is False:
589
            req.redirected_to = newurl
590
            return fp
591
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
592
        newurl = urlparse.urljoin(req.get_full_url(), newurl)
593
594
        # This call succeeds or raise an error. urllib2 returns
595
        # if redirect_request returns None, but our
596
        # redirect_request never returns None.
597
        redirected_req = self.redirect_request(req, fp, code, msg, headers,
598
                                               newurl)
599
600
        # loop detection
601
        # .redirect_dict has a key url if url was previously visited.
602
        if hasattr(req, 'redirect_dict'):
603
            visited = redirected_req.redirect_dict = req.redirect_dict
604
            if (visited.get(newurl, 0) >= self.max_repeats or
605
                len(visited) >= self.max_redirections):
606
                raise urllib2.HTTPError(req.get_full_url(), code,
607
                                        self.inf_msg + msg, headers, fp)
608
        else:
609
            visited = redirected_req.redirect_dict = req.redirect_dict = {}
610
        visited[newurl] = visited.get(newurl, 0) + 1
611
612
        # We can close the fp now that we are sure that we won't
613
        # use it with HTTPError.
614
        fp.close()
615
        # We have all we need already in the response
616
        req.connection.fake_close()
617
618
        return self.parent.open(redirected_req)
619
2164.2.29 by Vincent Ladeuil
Test the http redirection at the request level even if it's not
620
    http_error_301 = http_error_303 = http_error_307 = http_error_302
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
621
622
2167.3.1 by v.ladeuil+lp at free
Fix bug #74759.
623
class ProxyHandler(urllib2.ProxyHandler):
624
    """Handles proxy setting.
625
626
    Copied and modified from urllib2 to be able to modify the
627
    request during the request pre-processing instead of
628
    modifying it at _open time. As we capture (or create) the
2182.1.1 by Aaron Bentley
Respect proxy environment settings (Vincent Ladeuil, #74759)
629
    connection object during request processing, _open time was
2167.3.1 by v.ladeuil+lp at free
Fix bug #74759.
630
    too late.
2167.3.3 by v.ladeuil+lp at free
* bzrlib/transport/http/_urllib2_wrappers.py:
631
2182.1.1 by Aaron Bentley
Respect proxy environment settings (Vincent Ladeuil, #74759)
632
    Note that the proxy handling *may* modify the protocol used;
633
    the request may be against an https server proxied through an
2167.3.3 by v.ladeuil+lp at free
* bzrlib/transport/http/_urllib2_wrappers.py:
634
    http proxy. So, https_request will be called, but later it's
635
    really http_open that will be called. This explain why we
636
    don't have to call self.parent.open as the urllib2 did.
2167.3.1 by v.ladeuil+lp at free
Fix bug #74759.
637
    """
638
639
    # Proxies must be in front
640
    handler_order = 100
2167.3.3 by v.ladeuil+lp at free
* bzrlib/transport/http/_urllib2_wrappers.py:
641
    _debuglevel = DEBUG
2167.3.1 by v.ladeuil+lp at free
Fix bug #74759.
642
2420.1.3 by Vincent Ladeuil
Implement http proxy basic authentication.
643
    def __init__(self, password_manager, proxies=None):
2167.3.1 by v.ladeuil+lp at free
Fix bug #74759.
644
        urllib2.ProxyHandler.__init__(self, proxies)
2420.1.3 by Vincent Ladeuil
Implement http proxy basic authentication.
645
        self.password_manager = password_manager
2167.3.4 by v.ladeuil+lp at free
Better fix for #74759, but still not tests.
646
        # First, let's get rid of urllib2 implementation
2167.3.1 by v.ladeuil+lp at free
Fix bug #74759.
647
        for type, proxy in self.proxies.items():
2167.3.3 by v.ladeuil+lp at free
* bzrlib/transport/http/_urllib2_wrappers.py:
648
            if self._debuglevel > 0:
649
                print 'Will unbind %s_open for %r' % (type, proxy)
2167.3.1 by v.ladeuil+lp at free
Fix bug #74759.
650
            delattr(self, '%s_open' % type)
651
2167.3.4 by v.ladeuil+lp at free
Better fix for #74759, but still not tests.
652
        # We are interested only by the http[s] proxies
2167.3.6 by v.ladeuil+lp at free
Take John's comments into account and add more tests.
653
        http_proxy = self.get_proxy_env_var('http')
654
        https_proxy = self.get_proxy_env_var('https')
2167.3.3 by v.ladeuil+lp at free
* bzrlib/transport/http/_urllib2_wrappers.py:
655
656
        if http_proxy is not None:
657
            if self._debuglevel > 0:
658
                print 'Will bind http_request for %r' % http_proxy
659
            setattr(self, 'http_request',
660
                    lambda request: self.set_proxy(request, 'http'))
661
662
        if https_proxy is not None:
663
            if self._debuglevel > 0:
664
                print 'Will bind http_request for %r' % https_proxy
665
            setattr(self, 'https_request',
666
                    lambda request: self.set_proxy(request, 'https'))
667
2167.3.6 by v.ladeuil+lp at free
Take John's comments into account and add more tests.
668
    def get_proxy_env_var(self, name, default_to='all'):
669
        """Get a proxy env var.
670
2182.1.1 by Aaron Bentley
Respect proxy environment settings (Vincent Ladeuil, #74759)
671
        Note that we indirectly rely on
2167.3.6 by v.ladeuil+lp at free
Take John's comments into account and add more tests.
672
        urllib.getproxies_environment taking into account the
673
        uppercased values for proxy variables.
674
        """
2167.3.3 by v.ladeuil+lp at free
* bzrlib/transport/http/_urllib2_wrappers.py:
675
        try:
676
            return self.proxies[name.lower()]
677
        except KeyError:
2167.3.6 by v.ladeuil+lp at free
Take John's comments into account and add more tests.
678
            if default_to is not None:
679
                # Try to get the alternate environment variable
680
                try:
681
                    return self.proxies[default_to]
682
                except KeyError:
683
                    pass
684
        return None
2167.3.3 by v.ladeuil+lp at free
* bzrlib/transport/http/_urllib2_wrappers.py:
685
2167.3.4 by v.ladeuil+lp at free
Better fix for #74759, but still not tests.
686
    def proxy_bypass(self, host):
687
        """Check if host should be proxied or not"""
2167.3.6 by v.ladeuil+lp at free
Take John's comments into account and add more tests.
688
        no_proxy = self.get_proxy_env_var('no', None)
2167.3.4 by v.ladeuil+lp at free
Better fix for #74759, but still not tests.
689
        if no_proxy is None:
690
            return False
691
        hhost, hport = urllib.splitport(host)
2182.1.1 by Aaron Bentley
Respect proxy environment settings (Vincent Ladeuil, #74759)
692
        # Does host match any of the domains mentioned in
693
        # no_proxy ? The rules about what is authorized in no_proxy
694
        # are fuzzy (to say the least). We try to allow most
2167.3.4 by v.ladeuil+lp at free
Better fix for #74759, but still not tests.
695
        # commonly seen values.
696
        for domain in no_proxy.split(','):
697
            dhost, dport = urllib.splitport(domain)
2167.3.5 by v.ladeuil+lp at free
Tests for proxies, covering #74759.
698
            if hport == dport or dport is None:
2167.3.4 by v.ladeuil+lp at free
Better fix for #74759, but still not tests.
699
                # Protect glob chars
700
                dhost = dhost.replace(".", r"\.")
701
                dhost = dhost.replace("*", r".*")
702
                dhost = dhost.replace("?", r".")
2167.3.5 by v.ladeuil+lp at free
Tests for proxies, covering #74759.
703
                if re.match(dhost, hhost, re.IGNORECASE):
2167.3.4 by v.ladeuil+lp at free
Better fix for #74759, but still not tests.
704
                    return True
2182.1.1 by Aaron Bentley
Respect proxy environment settings (Vincent Ladeuil, #74759)
705
        # Nevertheless, there are platform-specific ways to
2167.3.4 by v.ladeuil+lp at free
Better fix for #74759, but still not tests.
706
        # ignore proxies...
707
        return urllib.proxy_bypass(host)
708
2167.3.3 by v.ladeuil+lp at free
* bzrlib/transport/http/_urllib2_wrappers.py:
709
    def set_proxy(self, request, type):
2167.3.4 by v.ladeuil+lp at free
Better fix for #74759, but still not tests.
710
        if self.proxy_bypass(request.get_host()):
711
            return request
712
2167.3.6 by v.ladeuil+lp at free
Take John's comments into account and add more tests.
713
        proxy = self.get_proxy_env_var(type)
2167.3.3 by v.ladeuil+lp at free
* bzrlib/transport/http/_urllib2_wrappers.py:
714
        if self._debuglevel > 0:
715
            print 'set_proxy %s_request for %r' % (type, proxy)
2420.1.6 by Vincent Ladeuil
Update NEWS to explain the intent of the modification. Also, use dicts
716
        # Extract credentials from the url and store them in the
717
        # password manager so that the proxy AuthHandler can use
718
        # them later.
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
719
        proxy, user, password = extract_credentials(proxy)
2420.1.6 by Vincent Ladeuil
Update NEWS to explain the intent of the modification. Also, use dicts
720
        if request.proxy_auth == {}:
721
            # No proxy auth parameter are available, we are
722
            # handling the first proxied request, intialize.
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
723
            # scheme and realm will be set by the AuthHandler
2420.1.6 by Vincent Ladeuil
Update NEWS to explain the intent of the modification. Also, use dicts
724
            authuri = extract_authentication_uri(proxy)
725
            request.proxy_auth = {'user': user, 'password': password,
726
                                  'authuri': authuri}
727
            if user and password is not None: # '' is a valid password
2420.1.12 by Vincent Ladeuil
Cometic changes.
728
                # We default to a realm of None to catch them all.
2420.1.6 by Vincent Ladeuil
Update NEWS to explain the intent of the modification. Also, use dicts
729
                self.password_manager.add_password(None, authuri,
730
                                                   user, password)
2167.3.1 by v.ladeuil+lp at free
Fix bug #74759.
731
        orig_type = request.get_type()
2298.7.1 by Vincent Ladeuil
Fix bug #87765: proxy env variables without scheme should cause
732
        scheme, r_scheme = urllib.splittype(proxy)
733
        if self._debuglevel > 0:
734
            print 'scheme: %s, r_scheme: %s' % (scheme, r_scheme)
735
        host, XXX = urllib.splithost(r_scheme)
736
        if host is None:
737
            raise errors.InvalidURL(proxy,
738
                                    'Invalid syntax in proxy env variable')
2167.3.1 by v.ladeuil+lp at free
Fix bug #74759.
739
        host = urllib.unquote(host)
740
        request.set_proxy(host, type)
2167.3.3 by v.ladeuil+lp at free
* bzrlib/transport/http/_urllib2_wrappers.py:
741
        if self._debuglevel > 0:
2273.2.2 by v.ladeuil+lp at free
Really fix bug #83954, with tests.
742
            print 'set_proxy: proxy set to %s://%s' % (type, host)
2167.3.1 by v.ladeuil+lp at free
Fix bug #74759.
743
        return request
744
745
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
746
class AbstractAuthHandler(urllib2.BaseHandler):
747
    """A custom abstract authentication handler for all http authentications.
748
749
    Provides the meat to handle authentication errors and
750
    preventively set authentication headers after the first
751
    successful authentication.
752
753
    This can be used for http and proxy, as well as for basic and
754
    digest authentications.
755
2420.1.16 by Vincent Ladeuil
Handle nonce changes. Fix a nasty bug breaking the auth parameters sharing.
756
    This provides an unified interface for all authentication handlers
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
757
    (urllib2 provides far too many with different policies).
2420.1.16 by Vincent Ladeuil
Handle nonce changes. Fix a nasty bug breaking the auth parameters sharing.
758
759
    The interaction between this handler and the urllib2
760
    framework is not obvious, it works as follow:
761
762
    opener.open(request) is called:
763
764
    - that may trigger http_request which will add an authentication header
765
      (self.build_header) if enough info is available.
766
767
    - the request is sent to the server,
768
769
    - if an authentication error is received self.auth_required is called,
770
      we acquire the authentication info in the error headers and call
771
      self.auth_match to check that we are able to try the
772
      authentication and complete the authentication parameters,
773
774
    - we call parent.open(request), that may trigger http_request
775
      and will add a header (self.build_header), but here we have
776
      all the required info (keep in mind that the request and
777
      authentication used in the recursive calls are really (and must be)
778
      the *same* objects).
779
780
    - if the call returns a response, the authentication have been
781
      successful and the request authentication parameters have been updated.
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
782
    """
783
784
    # The following attributes should be defined by daughter
785
    # classes:
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
786
    # - auth_required_header:  the header received from the server
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
787
    # - auth_header: the header sent in the request
788
789
    def __init__(self, password_manager):
790
        self.password_manager = password_manager
791
        self.find_user_password = password_manager.find_user_password
792
        self.add_password = password_manager.add_password
793
2420.1.16 by Vincent Ladeuil
Handle nonce changes. Fix a nasty bug breaking the auth parameters sharing.
794
    def update_auth(self, auth, key, value):
795
        """Update a value in auth marking the auth as modified if needed"""
796
        old_value = auth.get(key, None)
797
        if old_value != value:
798
            auth[key] = value
799
            auth['modified'] = True
800
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
801
    def auth_required(self, request, headers):
2420.1.6 by Vincent Ladeuil
Update NEWS to explain the intent of the modification. Also, use dicts
802
        """Retry the request if the auth scheme is ours.
803
804
        :param request: The request needing authentication.
805
        :param headers: The headers for the authentication error response.
806
        :return: None or the response for the authenticated request.
807
        """
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
808
        server_header = headers.get(self.auth_required_header, None)
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
809
        if server_header is None:
810
            # The http error MUST have the associated
811
            # header. This must never happen in production code.
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
812
            raise KeyError('%s not found' % self.auth_required_header)
813
2420.1.16 by Vincent Ladeuil
Handle nonce changes. Fix a nasty bug breaking the auth parameters sharing.
814
        auth = self.get_auth(request)
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
815
        if auth.get('user', None) is None:
816
            # Without a known user, we can't authenticate
817
            return None
818
2420.1.16 by Vincent Ladeuil
Handle nonce changes. Fix a nasty bug breaking the auth parameters sharing.
819
        auth['modified'] = False
2420.1.6 by Vincent Ladeuil
Update NEWS to explain the intent of the modification. Also, use dicts
820
        if self.auth_match(server_header, auth):
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
821
            # auth_match may have modified auth (by adding the
822
            # password or changing the realm, for example)
823
            if request.get_header(self.auth_header, None) is not None \
2420.1.16 by Vincent Ladeuil
Handle nonce changes. Fix a nasty bug breaking the auth parameters sharing.
824
                    and not auth['modified']:
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
825
                # We already tried that, give up
826
                return None
827
828
            response = self.parent.open(request)
829
            if response:
2420.1.16 by Vincent Ladeuil
Handle nonce changes. Fix a nasty bug breaking the auth parameters sharing.
830
                self.auth_successful(request, response)
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
831
            return response
832
        # We are not qualified to handle the authentication.
833
        # Note: the authentication error handling will try all
834
        # available handlers. If one of them authenticates
835
        # successfully, a response will be returned. If none of
836
        # them succeeds, None will be returned and the error
837
        # handler will raise the 401 'Unauthorized' or the 407
838
        # 'Proxy Authentication Required' error.
839
        return None
840
841
    def add_auth_header(self, request, header):
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
842
        """Add the authentication header to the request"""
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
843
        request.add_unredirected_header(self.auth_header, header)
844
2420.1.6 by Vincent Ladeuil
Update NEWS to explain the intent of the modification. Also, use dicts
845
    def auth_match(self, header, auth):
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
846
        """Check that we are able to handle that authentication scheme.
847
848
        The request authentication parameters may need to be
2420.1.16 by Vincent Ladeuil
Handle nonce changes. Fix a nasty bug breaking the auth parameters sharing.
849
        updated with info from the server. Some of these
850
        parameters, when combined, are considered to be the
851
        authentication key, if one of them change the
852
        authentication result may change. 'user' and 'password'
853
        are exampls, but some auth schemes may have others
854
        (digest's nonce is an example, digest's nonce_count is a
855
        *counter-example*). Such parameters must be updated by
856
        using the update_auth() method.
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
857
        
858
        :param header: The authentication header sent by the server.
2420.1.6 by Vincent Ladeuil
Update NEWS to explain the intent of the modification. Also, use dicts
859
        :param auth: The auth parameters already known. They may be
860
             updated.
861
        :returns: True if we can try to handle the authentication.
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
862
        """
863
        raise NotImplementedError(self.auth_match)
864
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
865
    def build_auth_header(self, auth, request):
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
866
        """Build the value of the header used to authenticate.
867
2420.1.6 by Vincent Ladeuil
Update NEWS to explain the intent of the modification. Also, use dicts
868
        :param auth: The auth parameters needed to build the header.
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
869
        :param request: The request needing authentication.
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
870
871
        :return: None or header.
872
        """
873
        raise NotImplementedError(self.build_auth_header)
874
2420.1.16 by Vincent Ladeuil
Handle nonce changes. Fix a nasty bug breaking the auth parameters sharing.
875
    def auth_successful(self, request, response):
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
876
        """The authentification was successful for the request.
877
2420.1.16 by Vincent Ladeuil
Handle nonce changes. Fix a nasty bug breaking the auth parameters sharing.
878
        Additional infos may be available in the response.
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
879
880
        :param request: The succesfully authenticated request.
2420.1.9 by Vincent Ladeuil
Refactor proxy and auth test classes. Tests failing for digest auth.
881
        :param response: The server response (may contain auth info).
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
882
        """
2420.1.16 by Vincent Ladeuil
Handle nonce changes. Fix a nasty bug breaking the auth parameters sharing.
883
        pass
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
884
885
    def get_password(self, user, authuri, realm=None):
2420.1.16 by Vincent Ladeuil
Handle nonce changes. Fix a nasty bug breaking the auth parameters sharing.
886
        """Ask user for a password if none is already available."""
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
887
        user_found, password = self.find_user_password(realm, authuri)
888
        if user_found != user:
889
            # FIXME: write a test for that case
890
            password = None
891
892
        if password is None:
893
            # Prompt user only if we can't find a password
894
            if realm:
895
                realm_prompt = " Realm: '%s'" % realm
896
            else:
897
                realm_prompt = ''
898
            scheme, host, path, query, fragment = urlparse.urlsplit(authuri)
899
            password = ui.ui_factory.get_password(prompt=self.password_prompt,
900
                                                  user=user, host=host,
901
                                                  realm=realm_prompt)
902
            if password is not None:
903
                self.add_password(realm, authuri, user, password)
904
        return password
905
906
    def http_request(self, request):
907
        """Insert an authentication header if information is available"""
2420.1.6 by Vincent Ladeuil
Update NEWS to explain the intent of the modification. Also, use dicts
908
        auth = self.get_auth(request)
909
        if self.auth_params_reusable(auth):
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
910
            self.add_auth_header(request, self.build_auth_header(auth, request))
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
911
        return request
912
913
    https_request = http_request # FIXME: Need test
914
915
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
916
class BasicAuthHandler(AbstractAuthHandler):
917
    """A custom basic authentication handler."""
918
2545.2.1 by Vincent Ladeuil
Fix 121889 by working around urllib2 bug.
919
    handler_order = 500
920
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
921
    auth_regexp = re.compile('realm="([^"]*)"', re.I)
922
923
    def build_auth_header(self, auth, request):
2420.1.6 by Vincent Ladeuil
Update NEWS to explain the intent of the modification. Also, use dicts
924
        raw = '%s:%s' % (auth['user'], auth['password'])
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
925
        auth_header = 'Basic ' + raw.encode('base64').strip()
926
        return auth_header
927
2420.1.6 by Vincent Ladeuil
Update NEWS to explain the intent of the modification. Also, use dicts
928
    def auth_match(self, header, auth):
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
929
        scheme, raw_auth = header.split(None, 1)
930
        scheme = scheme.lower()
931
        if scheme != 'basic':
932
            return False
933
934
        match = self.auth_regexp.search(raw_auth)
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
935
        if match:
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
936
            realm = match.groups()
937
            if scheme != 'basic':
938
                return False
2420.1.6 by Vincent Ladeuil
Update NEWS to explain the intent of the modification. Also, use dicts
939
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
940
            # Put useful info into auth
2420.1.16 by Vincent Ladeuil
Handle nonce changes. Fix a nasty bug breaking the auth parameters sharing.
941
            self.update_auth(auth, 'scheme', scheme)
942
            self.update_auth(auth, 'realm', realm)
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
943
            if auth.get('password',None) is None:
2420.1.16 by Vincent Ladeuil
Handle nonce changes. Fix a nasty bug breaking the auth parameters sharing.
944
                password = self.get_password(auth['user'], auth['authuri'],
945
                                             auth['realm'])
946
                self.update_auth(auth, 'password', password)
2420.1.6 by Vincent Ladeuil
Update NEWS to explain the intent of the modification. Also, use dicts
947
        return match is not None
948
949
    def auth_params_reusable(self, auth):
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
950
        # If the auth scheme is known, it means a previous
2420.1.14 by Vincent Ladeuil
Tested against squid-2.6.5 with digest authentication.
951
        # authentication was successful, all information is
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
952
        # available, no further checks are needed.
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
953
        return auth.get('scheme', None) == 'basic'
954
955
956
def get_digest_algorithm_impls(algorithm):
957
    H = None
2420.1.14 by Vincent Ladeuil
Tested against squid-2.6.5 with digest authentication.
958
    KD = None
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
959
    if algorithm == 'MD5':
960
        H = lambda x: md5.new(x).hexdigest()
961
    elif algorithm == 'SHA':
962
        H = lambda x: sha.new(x).hexdigest()
963
    if H is not None:
964
        KD = lambda secret, data: H("%s:%s" % (secret, data))
965
    return H, KD
966
967
2420.1.14 by Vincent Ladeuil
Tested against squid-2.6.5 with digest authentication.
968
def get_new_cnonce(nonce, nonce_count):
969
    raw = '%s:%d:%s:%s' % (nonce, nonce_count, time.ctime(),
970
                           urllib2.randombytes(8))
971
    return sha.new(raw).hexdigest()[:16]
972
973
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
974
class DigestAuthHandler(AbstractAuthHandler):
975
    """A custom digest authentication handler."""
976
2545.2.1 by Vincent Ladeuil
Fix 121889 by working around urllib2 bug.
977
    # Before basic as digest is a bit more secure
978
    handler_order = 490
979
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
980
    def auth_params_reusable(self, auth):
981
        # If the auth scheme is known, it means a previous
2420.1.14 by Vincent Ladeuil
Tested against squid-2.6.5 with digest authentication.
982
        # authentication was successful, all information is
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
983
        # available, no further checks are needed.
984
        return auth.get('scheme', None) == 'digest'
985
986
    def auth_match(self, header, auth):
987
        scheme, raw_auth = header.split(None, 1)
988
        scheme = scheme.lower()
989
        if scheme != 'digest':
990
            return False
991
992
        # Put the requested authentication info into a dict
993
        req_auth = urllib2.parse_keqv_list(urllib2.parse_http_list(raw_auth))
994
995
        # Check that we can handle that authentication
996
        qop = req_auth.get('qop', None)
997
        if qop != 'auth': # No auth-int so far
998
            return False
999
2420.1.14 by Vincent Ladeuil
Tested against squid-2.6.5 with digest authentication.
1000
        H, KD = get_digest_algorithm_impls(req_auth.get('algorithm', 'MD5'))
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
1001
        if H is None:
1002
            return False
1003
1004
        realm = req_auth.get('realm', None)
1005
        if auth.get('password',None) is None:
1006
            auth['password'] = self.get_password(auth['user'],
1007
                                                 auth['authuri'],
1008
                                                 realm)
1009
        # Put useful info into auth
1010
        try:
2420.1.16 by Vincent Ladeuil
Handle nonce changes. Fix a nasty bug breaking the auth parameters sharing.
1011
            self.update_auth(auth, 'scheme', scheme)
2420.1.14 by Vincent Ladeuil
Tested against squid-2.6.5 with digest authentication.
1012
            if req_auth.get('algorithm', None) is not None:
2420.1.16 by Vincent Ladeuil
Handle nonce changes. Fix a nasty bug breaking the auth parameters sharing.
1013
                self.update_auth(auth, 'algorithm', req_auth.get('algorithm'))
1014
            self.update_auth(auth, 'realm', realm)
1015
            nonce = req_auth['nonce']
1016
            if auth.get('nonce', None) != nonce:
1017
                # A new nonce, never used
1018
                self.update_auth(auth, 'nonce_count', 0)
1019
            self.update_auth(auth, 'nonce', nonce)
1020
            self.update_auth(auth, 'qop', qop)
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
1021
            auth['opaque'] = req_auth.get('opaque', None)
1022
        except KeyError:
2420.1.14 by Vincent Ladeuil
Tested against squid-2.6.5 with digest authentication.
1023
            # Some required field is not there
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
1024
            return False
1025
1026
        return True
1027
1028
    def build_auth_header(self, auth, request):
2420.1.14 by Vincent Ladeuil
Tested against squid-2.6.5 with digest authentication.
1029
        url_scheme, url_selector = urllib.splittype(request.get_selector())
1030
        sel_host, uri = urllib.splithost(url_selector)
1031
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
1032
        A1 = '%s:%s:%s' % (auth['user'], auth['realm'], auth['password'])
1033
        A2 = '%s:%s' % (request.get_method(), uri)
2420.1.14 by Vincent Ladeuil
Tested against squid-2.6.5 with digest authentication.
1034
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
1035
        nonce = auth['nonce']
1036
        qop = auth['qop']
1037
2420.1.16 by Vincent Ladeuil
Handle nonce changes. Fix a nasty bug breaking the auth parameters sharing.
1038
        nonce_count = auth['nonce_count'] + 1
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
1039
        ncvalue = '%08x' % nonce_count
2420.1.14 by Vincent Ladeuil
Tested against squid-2.6.5 with digest authentication.
1040
        cnonce = get_new_cnonce(nonce, nonce_count)
1041
1042
        H, KD = get_digest_algorithm_impls(auth.get('algorithm', 'MD5'))
1043
        nonce_data = '%s:%s:%s:%s:%s' % (nonce, ncvalue, cnonce, qop, H(A2))
1044
        request_digest = KD(H(A1), nonce_data)
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
1045
1046
        header = 'Digest '
2420.1.14 by Vincent Ladeuil
Tested against squid-2.6.5 with digest authentication.
1047
        header += 'username="%s", realm="%s", nonce="%s"' % (auth['user'],
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
1048
                                                             auth['realm'],
1049
                                                             nonce)
2420.1.14 by Vincent Ladeuil
Tested against squid-2.6.5 with digest authentication.
1050
        header += ', uri="%s"' % uri
1051
        header += ', cnonce="%s", nc=%s' % (cnonce, ncvalue)
1052
        header += ', qop="%s"' % qop
1053
        header += ', response="%s"' % request_digest
1054
        # Append the optional fields
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
1055
        opaque = auth.get('opaque', None)
1056
        if opaque:
1057
            header += ', opaque="%s"' % opaque
2420.1.14 by Vincent Ladeuil
Tested against squid-2.6.5 with digest authentication.
1058
        if auth.get('algorithm', None):
1059
            header += ', algorithm="%s"' % auth.get('algorithm')
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
1060
1061
        # We have used the nonce once more, update the count
1062
        auth['nonce_count'] = nonce_count
1063
1064
        return header
1065
1066
1067
class HTTPAuthHandler(AbstractAuthHandler):
1068
    """Custom http authentication handler.
2004.3.1 by vila
Test ConnectionError exceptions.
1069
2363.4.12 by Vincent Ladeuil
Take jam's review comments into account. Fix typos, give better
1070
    Send the authentication preventively to avoid the roundtrip
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
1071
    associated with the 401 error and keep the revelant info in
1072
    the auth request attribute.
2004.3.1 by vila
Test ConnectionError exceptions.
1073
    """
1074
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
1075
    password_prompt = 'HTTP %(user)s@%(host)s%(realm)s password'
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
1076
    auth_required_header = 'www-authenticate'
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
1077
    auth_header = 'Authorization'
1078
2420.1.6 by Vincent Ladeuil
Update NEWS to explain the intent of the modification. Also, use dicts
1079
    def get_auth(self, request):
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
1080
        """Get the auth params from the request"""
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
1081
        return request.auth
1082
2420.1.6 by Vincent Ladeuil
Update NEWS to explain the intent of the modification. Also, use dicts
1083
    def set_auth(self, request, auth):
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
1084
        """Set the auth params for the request"""
2420.1.6 by Vincent Ladeuil
Update NEWS to explain the intent of the modification. Also, use dicts
1085
        request.auth = auth
2004.3.1 by vila
Test ConnectionError exceptions.
1086
2363.4.9 by Vincent Ladeuil
Catch first succesful authentification to avoid further 401
1087
    def http_error_401(self, req, fp, code, msg, headers):
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
1088
        return self.auth_required(req, headers)
1089
1090
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
1091
class ProxyAuthHandler(AbstractAuthHandler):
1092
    """Custom proxy authentication handler.
2420.1.3 by Vincent Ladeuil
Implement http proxy basic authentication.
1093
1094
    Send the authentication preventively to avoid the roundtrip
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
1095
    associated with the 407 error and keep the revelant info in
1096
    the proxy_auth request attribute..
2420.1.3 by Vincent Ladeuil
Implement http proxy basic authentication.
1097
    """
1098
2420.1.7 by Vincent Ladeuil
Tested against squid-2.6.5 with basic authentication.
1099
    password_prompt = 'Proxy %(user)s@%(host)s%(realm)s password'
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
1100
    auth_required_header = 'proxy-authenticate'
2420.1.7 by Vincent Ladeuil
Tested against squid-2.6.5 with basic authentication.
1101
    # FIXME: the correct capitalization is Proxy-Authorization,
2420.1.8 by Vincent Ladeuil
Interesting typo :-) A mix between capitalize, title and fuzzy may be...
1102
    # but python-2.4 urllib2.Request insist on using capitalize()
2420.1.7 by Vincent Ladeuil
Tested against squid-2.6.5 with basic authentication.
1103
    # instead of title().
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
1104
    auth_header = 'Proxy-authorization'
1105
2420.1.6 by Vincent Ladeuil
Update NEWS to explain the intent of the modification. Also, use dicts
1106
    def get_auth(self, request):
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
1107
        """Get the auth params from the request"""
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
1108
        return request.proxy_auth
1109
2420.1.6 by Vincent Ladeuil
Update NEWS to explain the intent of the modification. Also, use dicts
1110
    def set_auth(self, request, auth):
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
1111
        """Set the auth params for the request"""
2420.1.6 by Vincent Ladeuil
Update NEWS to explain the intent of the modification. Also, use dicts
1112
        request.proxy_auth = auth
2420.1.3 by Vincent Ladeuil
Implement http proxy basic authentication.
1113
1114
    def http_error_407(self, req, fp, code, msg, headers):
2420.1.5 by Vincent Ladeuil
Refactor http and proxy authentication. Tests passing. proxy password can be prompted too.
1115
        return self.auth_required(req, headers)
2420.1.3 by Vincent Ladeuil
Implement http proxy basic authentication.
1116
1117
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
1118
class HTTPBasicAuthHandler(BasicAuthHandler, HTTPAuthHandler):
1119
    """Custom http basic authentication handler"""
1120
1121
1122
class ProxyBasicAuthHandler(BasicAuthHandler, ProxyAuthHandler):
1123
    """Custom proxy basic authentication handler"""
1124
1125
1126
class HTTPDigestAuthHandler(DigestAuthHandler, HTTPAuthHandler):
1127
    """Custom http basic authentication handler"""
1128
1129
1130
class ProxyDigestAuthHandler(DigestAuthHandler, ProxyAuthHandler):
1131
    """Custom proxy basic authentication handler"""
1132
2420.1.3 by Vincent Ladeuil
Implement http proxy basic authentication.
1133
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
1134
class HTTPErrorProcessor(urllib2.HTTPErrorProcessor):
1135
    """Process HTTP error responses.
1136
1137
    We don't really process the errors, quite the contrary
1138
    instead, we leave our Transport handle them.
1139
    """
1140
1141
    def http_response(self, request, response):
1142
        code, msg, hdrs = response.code, response.msg, response.info()
1143
1144
        if code not in (200, # Ok
1145
                        206, # Partial content
1146
                        404, # Not found
1147
                        ):
1148
            response = self.parent.error('http', request, response,
1149
                                         code, msg, hdrs)
1150
        return response
1151
1152
    https_response = http_response
1153
1154
1155
class HTTPDefaultErrorHandler(urllib2.HTTPDefaultErrorHandler):
1156
    """Translate common errors into bzr Exceptions"""
2004.2.1 by John Arbash Meinel
Cleanup of urllib functions
1157
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
1158
    def http_error_default(self, req, fp, code, msg, hdrs):
1159
        if code == 404:
2004.1.27 by v.ladeuil+lp at free
Fix bug #57644 by issuing an explicit error message.
1160
            raise errors.NoSuchFile(req.get_selector(),
1161
                                    extra=HTTPError(req.get_full_url(),
1162
                                                    code, msg,
1163
                                                    hdrs, fp))
1164
        elif code == 403:
1165
            raise errors.TransportError('Server refuses to fullfil the request')
2000.3.9 by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :)
1166
        elif code == 416:
1167
            # We don't know which, but one of the ranges we
1168
            # specified was wrong. So we raise with 0 for a lack
1169
            # of a better magic value.
1170
            raise errors.InvalidRange(req.get_full_url(),0)
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
1171
        else:
2004.1.27 by v.ladeuil+lp at free
Fix bug #57644 by issuing an explicit error message.
1172
            raise errors.InvalidHttpResponse(req.get_full_url(),
1173
                                             'Unable to handle http code %d: %s'
1174
                                             % (code, msg))
2004.2.1 by John Arbash Meinel
Cleanup of urllib functions
1175
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
1176
class Opener(object):
1177
    """A wrapper around urllib2.build_opener
1178
1179
    Daughter classes can override to build their own specific opener
1180
    """
2145.1.1 by mbp at sourcefrog
merge urllib keepalive etc
1181
    # TODO: Provides hooks for daughter classes.
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
1182
2004.2.1 by John Arbash Meinel
Cleanup of urllib functions
1183
    def __init__(self,
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
1184
                 connection=ConnectionHandler,
1185
                 redirect=HTTPRedirectHandler,
1186
                 error=HTTPErrorProcessor,):
2004.1.9 by vila
Takes jam's remarks into account when possible, add TODOs for the rest.
1187
        self.password_manager = PasswordManager()
2004.1.2 by vila
Implements a BasicAuthManager.
1188
        self._opener = urllib2.build_opener( \
1189
            connection, redirect, error,
2420.1.3 by Vincent Ladeuil
Implement http proxy basic authentication.
1190
            ProxyHandler(self.password_manager),
2363.4.7 by Vincent Ladeuil
Deeper tests, prepare the auth setting that will avoid the
1191
            HTTPBasicAuthHandler(self.password_manager),
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
1192
            HTTPDigestAuthHandler(self.password_manager),
2420.1.3 by Vincent Ladeuil
Implement http proxy basic authentication.
1193
            ProxyBasicAuthHandler(self.password_manager),
2420.1.11 by Vincent Ladeuil
Implement digest authentication. Test suite passes. Tested against apache-2.x.
1194
            ProxyDigestAuthHandler(self.password_manager),
2004.1.2 by vila
Implements a BasicAuthManager.
1195
            HTTPHandler,
1196
            HTTPSHandler,
1197
            HTTPDefaultErrorHandler,
2004.2.1 by John Arbash Meinel
Cleanup of urllib functions
1198
            )
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
1199
        self.open = self._opener.open
2004.1.22 by v.ladeuil+lp at free
Implements Range header handling for GET requests. Fix a test.
1200
        if DEBUG >= 2:
2004.1.9 by vila
Takes jam's remarks into account when possible, add TODOs for the rest.
1201
            # When dealing with handler order, it's easy to mess
1202
            # things up, the following will help understand which
1203
            # handler is used, when and for what.
2004.1.1 by vila
Connection sharing, with redirection. without authentification.
1204
            import pprint
1205
            pprint.pprint(self._opener.__dict__)