612
class ProxyHandler(urllib2.ProxyHandler):
613
"""Handles proxy setting.
615
Copied and modified from urllib2 to be able to modify the
616
request during the request pre-processing instead of
617
modifying it at _open time. As we capture (or create) the
618
connection object during request processing, _open time was
621
Note that the proxy handling *may* modify the protocol used;
622
the request may be against an https server proxied through an
623
http proxy. So, https_request will be called, but later it's
624
really http_open that will be called. This explain why we
625
don't have to call self.parent.open as the urllib2 did.
628
# Proxies must be in front
632
def __init__(self, proxies=None):
633
urllib2.ProxyHandler.__init__(self, proxies)
634
# First, let's get rid of urllib2 implementation
635
for type, proxy in self.proxies.items():
636
if self._debuglevel > 0:
637
print 'Will unbind %s_open for %r' % (type, proxy)
638
delattr(self, '%s_open' % type)
640
# We are interested only by the http[s] proxies
641
http_proxy = self.get_proxy_env_var('http')
642
https_proxy = self.get_proxy_env_var('https')
644
if http_proxy is not None:
645
if self._debuglevel > 0:
646
print 'Will bind http_request for %r' % http_proxy
647
setattr(self, 'http_request',
648
lambda request: self.set_proxy(request, 'http'))
650
if https_proxy is not None:
651
if self._debuglevel > 0:
652
print 'Will bind http_request for %r' % https_proxy
653
setattr(self, 'https_request',
654
lambda request: self.set_proxy(request, 'https'))
656
def get_proxy_env_var(self, name, default_to='all'):
657
"""Get a proxy env var.
659
Note that we indirectly rely on
660
urllib.getproxies_environment taking into account the
661
uppercased values for proxy variables.
664
return self.proxies[name.lower()]
666
if default_to is not None:
667
# Try to get the alternate environment variable
669
return self.proxies[default_to]
674
def proxy_bypass(self, host):
675
"""Check if host should be proxied or not"""
676
no_proxy = self.get_proxy_env_var('no', None)
679
hhost, hport = urllib.splitport(host)
680
# Does host match any of the domains mentioned in
681
# no_proxy ? The rules about what is authorized in no_proxy
682
# are fuzzy (to say the least). We try to allow most
683
# commonly seen values.
684
for domain in no_proxy.split(','):
685
dhost, dport = urllib.splitport(domain)
686
if hport == dport or dport is None:
688
dhost = dhost.replace(".", r"\.")
689
dhost = dhost.replace("*", r".*")
690
dhost = dhost.replace("?", r".")
691
if re.match(dhost, hhost, re.IGNORECASE):
693
# Nevertheless, there are platform-specific ways to
695
return urllib.proxy_bypass(host)
697
def set_proxy(self, request, type):
698
if self.proxy_bypass(request.get_host()):
701
proxy = self.get_proxy_env_var(type)
702
if self._debuglevel > 0:
703
print 'set_proxy %s_request for %r' % (type, proxy)
704
orig_type = request.get_type()
705
type, r_type = urllib.splittype(proxy)
706
host, XXX = urllib.splithost(r_type)
708
user_pass, host = host.split('@', 1)
710
user, password = user_pass.split(':', 1)
711
user_pass = '%s:%s' % (urllib.unquote(user),
712
urllib.unquote(password))
713
user_pass.encode('base64').strip()
714
req.add_header('Proxy-authorization', 'Basic ' + user_pass)
715
host = urllib.unquote(host)
716
request.set_proxy(host, type)
717
if self._debuglevel > 0:
718
print 'set_proxy: proxy set to %r://%r' % (type, host)
611
722
class HTTPBasicAuthHandler(urllib2.HTTPBasicAuthHandler):
612
723
"""Custom basic authentification handler.