~bzr-pqm/bzr/bzr.dev

1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
1
# Copyright (C) 2006 Canonical Ltd
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
2
#
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
7
#
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
12
#
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""http/https transport using pycurl"""
18
19
# TODO: test reporting of http errors
20
1616.1.9 by Martin Pool
Set Cache-control: max-age=0 and Pragma: no-cache
21
# TODO: Transport option to control caching of particular requests; broadly we
22
# would want to offer "caching allowed" or "must revalidate", depending on
23
# whether we expect a particular file will be modified after it's committed.
24
# It's probably safer to just always revalidate.  mbp 20060321
25
1612.1.1 by Martin Pool
Raise errors correctly on pycurl connection failure
26
import os
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
27
from StringIO import StringIO
1540.3.5 by Martin Pool
Raise exception if unicode is passed to transport; formatting fixes
28
1540.3.15 by Martin Pool
[merge] large merge to sync with bzr.dev
29
import bzrlib
1540.3.5 by Martin Pool
Raise exception if unicode is passed to transport; formatting fixes
30
from bzrlib.errors import (TransportNotPossible, NoSuchFile,
1540.3.7 by Martin Pool
Prepare to select a transport depending on what dependencies can be satisfied.
31
                           TransportError, ConnectionError,
32
                           DependencyNotPresent)
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
33
from bzrlib.trace import mutter
1636.1.2 by Robert Collins
More review fixen to the relpath at '/' fixes.
34
from bzrlib.transport import register_urlparse_netloc_protocol
1540.3.10 by Martin Pool
[broken] keep hooking pycurl into test framework
35
from bzrlib.transport.http import HttpTransportBase, extract_auth, HttpServer
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
36
1540.3.7 by Martin Pool
Prepare to select a transport depending on what dependencies can be satisfied.
37
try:
38
    import pycurl
39
except ImportError, e:
40
    mutter("failed to import pycurl: %s", e)
41
    raise DependencyNotPresent('pycurl', e)
42
1684.1.5 by Martin Pool
(patch) check that pycurl will actuall initialize as well as load (Alexander)
43
try:
44
    # see if we can actually initialize PyCurl - sometimes it will load but
45
    # fail to start up due to this bug:
46
    #  
47
    #   32. (At least on Windows) If libcurl is built with c-ares and there's
48
    #   no DNS server configured in the system, the ares_init() call fails and
49
    #   thus curl_easy_init() fails as well. This causes weird effects for
50
    #   people who use numerical IP addresses only.
51
    #
52
    # reported by Alexander Belchenko, 2006-04-26
53
    pycurl.Curl()
54
except pycurl.error, e:
55
    mutter("failed to initialize pycurl: %s", e)
56
    raise DependencyNotPresent('pycurl', e)
57
1540.3.7 by Martin Pool
Prepare to select a transport depending on what dependencies can be satisfied.
58
1636.1.2 by Robert Collins
More review fixen to the relpath at '/' fixes.
59
register_urlparse_netloc_protocol('http+pycurl')
1636.1.1 by Robert Collins
Fix calling relpath() and abspath() on transports at their root.
60
61
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
62
class PyCurlTransport(HttpTransportBase):
1540.3.3 by Martin Pool
Review updates of pycurl transport
63
    """http client transport using pycurl
64
65
    PyCurl is a Python binding to the C "curl" multiprotocol client.
66
67
    This transport can be significantly faster than the builtin Python client. 
68
    Advantages include: DNS caching, connection keepalive, and ability to 
69
    set headers to allow caching.
70
    """
71
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
72
    def __init__(self, base):
73
        super(PyCurlTransport, self).__init__(base)
1540.3.7 by Martin Pool
Prepare to select a transport depending on what dependencies can be satisfied.
74
        mutter('using pycurl %s' % pycurl.version)
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
75
1540.3.10 by Martin Pool
[broken] keep hooking pycurl into test framework
76
    def should_cache(self):
77
        """Return True if the data pulled across should be cached locally.
78
        """
79
        return True
80
1540.3.3 by Martin Pool
Review updates of pycurl transport
81
    def has(self, relpath):
1540.3.14 by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object.
82
        curl = pycurl.Curl()
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
83
        abspath = self._real_abspath(relpath)
1540.3.14 by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object.
84
        curl.setopt(pycurl.URL, abspath)
85
        curl.setopt(pycurl.FOLLOWLOCATION, 1) # follow redirect responses
86
        self._set_curl_options(curl)
1540.3.3 by Martin Pool
Review updates of pycurl transport
87
        # don't want the body - ie just do a HEAD request
1540.3.14 by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object.
88
        curl.setopt(pycurl.NOBODY, 1)
89
        self._curl_perform(curl)
90
        code = curl.getinfo(pycurl.HTTP_CODE)
91
        if code == 404: # not found
92
            return False
93
        elif code in (200, 302): # "ok", "found"
94
            return True
1612.1.1 by Martin Pool
Raise errors correctly on pycurl connection failure
95
        elif code == 0:
96
            self._raise_curl_connection_error(curl)
1540.3.14 by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object.
97
        else:
1612.1.1 by Martin Pool
Raise errors correctly on pycurl connection failure
98
            self._raise_curl_http_error(curl)
1540.3.3 by Martin Pool
Review updates of pycurl transport
99
        
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
100
    def _get(self, relpath, ranges):
1540.3.14 by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object.
101
        curl = pycurl.Curl()
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
102
        abspath = self._real_abspath(relpath)
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
103
        sio = StringIO()
1540.3.14 by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object.
104
        curl.setopt(pycurl.URL, abspath)
105
        self._set_curl_options(curl)
106
        curl.setopt(pycurl.WRITEFUNCTION, sio.write)
107
        curl.setopt(pycurl.NOBODY, 0)
1540.3.27 by Martin Pool
Integrate http range support for pycurl
108
        if ranges is not None:
109
            assert len(ranges) == 1
110
            # multiple ranges not supported yet because we can't decode the
111
            # response
112
            curl.setopt(pycurl.RANGE, '%d-%d' % ranges[0])
1540.3.14 by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object.
113
        self._curl_perform(curl)
114
        code = curl.getinfo(pycurl.HTTP_CODE)
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
115
        if code == 404:
116
            raise NoSuchFile(abspath)
1540.3.13 by Martin Pool
Curl should follow http redirects, the same as urllib
117
        elif code == 200:
118
            sio.seek(0)
1540.3.27 by Martin Pool
Integrate http range support for pycurl
119
            return code, sio
120
        elif code == 206 and (ranges is not None):
121
            sio.seek(0)
122
            return code, sio
1612.1.1 by Martin Pool
Raise errors correctly on pycurl connection failure
123
        elif code == 0:
124
            self._raise_curl_connection_error(curl)
1540.3.13 by Martin Pool
Curl should follow http redirects, the same as urllib
125
        else:
1612.1.1 by Martin Pool
Raise errors correctly on pycurl connection failure
126
            self._raise_curl_http_error(curl)
127
128
    def _raise_curl_connection_error(self, curl):
129
        curl_errno = curl.getinfo(pycurl.OS_ERRNO)
130
        url = curl.getinfo(pycurl.EFFECTIVE_URL)
131
        raise ConnectionError('curl connection error (%s) on %s'
132
                              % (os.strerror(curl_errno), url))
133
134
    def _raise_curl_http_error(self, curl):
135
        code = curl.getinfo(pycurl.HTTP_CODE)
136
        url = curl.getinfo(pycurl.EFFECTIVE_URL)
137
        raise TransportError('http error %d probing for %s' %
138
                             (code, url))
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
139
1540.3.13 by Martin Pool
Curl should follow http redirects, the same as urllib
140
    def _set_curl_options(self, curl):
141
        """Set options for all requests"""
1540.3.11 by Martin Pool
doc
142
        # There's no way in http/1.0 to say "must revalidate"; we don't want
143
        # to force it to always retrieve.  so just turn off the default Pragma
144
        # provided by Curl.
1616.1.9 by Martin Pool
Set Cache-control: max-age=0 and Pragma: no-cache
145
        headers = ['Cache-control: max-age=0',
146
                   'Pragma: no-cache']
1540.3.14 by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object.
147
        ## curl.setopt(pycurl.VERBOSE, 1)
1616.1.9 by Martin Pool
Set Cache-control: max-age=0 and Pragma: no-cache
148
        # TODO: maybe include a summary of the pycurl version
149
        ua_str = 'bzr/%s (pycurl)' % (bzrlib.__version__)
1540.3.15 by Martin Pool
[merge] large merge to sync with bzr.dev
150
        curl.setopt(pycurl.USERAGENT, ua_str)
1540.3.13 by Martin Pool
Curl should follow http redirects, the same as urllib
151
        curl.setopt(pycurl.HTTPHEADER, headers)
152
        curl.setopt(pycurl.FOLLOWLOCATION, 1) # follow redirect responses
1540.3.3 by Martin Pool
Review updates of pycurl transport
153
1540.3.14 by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object.
154
    def _curl_perform(self, curl):
1540.3.3 by Martin Pool
Review updates of pycurl transport
155
        """Perform curl operation and translate exceptions."""
156
        try:
1540.3.14 by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object.
157
            curl.perform()
1540.3.3 by Martin Pool
Review updates of pycurl transport
158
        except pycurl.error, e:
159
            # XXX: There seem to be no symbolic constants for these values.
160
            if e[0] == 6:
161
                # couldn't resolve host
1540.3.14 by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object.
162
                raise NoSuchFile(curl.getinfo(pycurl.EFFECTIVE_URL), e)
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
163
1540.3.10 by Martin Pool
[broken] keep hooking pycurl into test framework
164
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
165
class HttpServer_PyCurl(HttpServer):
166
    """Subclass of HttpServer that gives http+pycurl urls.
167
168
    This is for use in testing: connections to this server will always go
169
    through pycurl where possible.
170
    """
171
172
    # urls returned by this server should require the pycurl client impl
173
    _url_protocol = 'http+pycurl'
174
175
1540.3.10 by Martin Pool
[broken] keep hooking pycurl into test framework
176
def get_test_permutations():
177
    """Return the permutations to be used in testing."""
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
178
    return [(PyCurlTransport, HttpServer_PyCurl),
1540.3.10 by Martin Pool
[broken] keep hooking pycurl into test framework
179
            ]