~bzr-pqm/bzr/bzr.dev

1185.11.19 by John Arbash Meinel
Testing put and append, also testing agaist file-like objects as well as strings.
1
# Copyright (C) 2005 Canonical Ltd
2
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
"""Implementation of Transport over http.
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
17
"""
18
1393.2.3 by John Arbash Meinel
Fixing typos, updating stores, getting tests to pass.
19
from bzrlib.transport import Transport, register_transport
20
from bzrlib.errors import (TransportNotPossible, NoSuchFile, 
1185.31.44 by John Arbash Meinel
Cleaned up Exceptions for all transports.
21
                           TransportError, ConnectionError)
907.1.57 by John Arbash Meinel
Trying to get pipelined http library working + tests.
22
import os, errno
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
23
from cStringIO import StringIO
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
24
import urllib, urllib2
1185.11.6 by John Arbash Meinel
Made HttpTransport handle a request for a parent directory differently.
25
import urlparse
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
26
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
27
from bzrlib.errors import BzrError, BzrCheckError
1393.2.3 by John Arbash Meinel
Fixing typos, updating stores, getting tests to pass.
28
from bzrlib.branch import Branch
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
29
from bzrlib.trace import mutter
907.1.57 by John Arbash Meinel
Trying to get pipelined http library working + tests.
30
31
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
32
def extract_auth(url, password_manager):
33
    """
34
    Extract auth parameters from am HTTP/HTTPS url and add them to the given
35
    password manager.  Return the url, minus those auth parameters (which
36
    confuse urllib2).
37
    """
38
    assert url.startswith('http://') or url.startswith('https://')
39
    scheme, host = url.split('//', 1)
40
    if '/' in host:
41
        host, path = host.split('/', 1)
42
        path = '/' + path
43
    else:
44
        path = ''
45
    port = ''
46
    if '@' in host:
47
        auth, host = host.split('@', 1)
48
        if ':' in auth:
49
            username, password = auth.split(':', 1)
50
        else:
51
            username, password = auth, None
52
        if ':' in host:
53
            host, port = host.split(':', 1)
54
            port = ':' + port
55
        # FIXME: if password isn't given, should we ask for it?
56
        if password is not None:
57
            username = urllib.unquote(username)
58
            password = urllib.unquote(password)
59
            password_manager.add_password(None, host, username, password)
60
    url = scheme + '//' + host + port + path
61
    return url
62
    
1185.11.14 by John Arbash Meinel
Working on getting tests to run. TestFetch only works if named runTest
63
def get_url(url):
64
    import urllib2
65
    mutter("get_url %s" % url)
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
66
    manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
67
    url = extract_auth(url, manager)
68
    auth_handler = urllib2.HTTPBasicAuthHandler(manager)
69
    opener = urllib2.build_opener(auth_handler)
70
    url_f = opener.open(url)
1185.11.14 by John Arbash Meinel
Working on getting tests to run. TestFetch only works if named runTest
71
    return url_f
72
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
73
class HttpTransport(Transport):
907.1.36 by John Arbash Meinel
Moving the multi-get functionality higher up into the Branch class.
74
    """This is the transport agent for http:// access.
75
    
76
    TODO: Implement pipelined versions of all of the *_multi() functions.
77
    """
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
78
79
    def __init__(self, base):
80
        """Set the base path where files will be stored."""
81
        assert base.startswith('http://') or base.startswith('https://')
82
        super(HttpTransport, self).__init__(base)
907.1.57 by John Arbash Meinel
Trying to get pipelined http library working + tests.
83
        # In the future we might actually connect to the remote host
84
        # rather than using get_url
85
        # self._connection = None
1185.11.6 by John Arbash Meinel
Made HttpTransport handle a request for a parent directory differently.
86
        (self._proto, self._host,
87
            self._path, self._parameters,
88
            self._query, self._fragment) = urlparse.urlparse(self.base)
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
89
907.1.32 by John Arbash Meinel
Renaming is_remote to should_cache as it is more appropriate.
90
    def should_cache(self):
91
        """Return True if the data pulled across should be cached locally.
92
        """
93
        return True
94
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
95
    def clone(self, offset=None):
96
        """Return a new HttpTransport with root at self.base + offset
97
        For now HttpTransport does not actually connect, so just return
98
        a new HttpTransport object.
99
        """
100
        if offset is None:
101
            return HttpTransport(self.base)
102
        else:
103
            return HttpTransport(self.abspath(offset))
104
105
    def abspath(self, relpath):
106
        """Return the full url to the given relative path.
107
        This can be supplied with a string or a list
108
        """
1469 by Robert Collins
Change Transport.* to work with URL's.
109
        assert isinstance(relpath, basestring)
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
110
        if isinstance(relpath, basestring):
1185.16.68 by Martin Pool
- http url fixes suggested by Robey Pointer, and tests
111
            relpath_parts = relpath.split('/')
112
        else:
113
            # TODO: Don't call this with an array - no magic interfaces
114
            relpath_parts = relpath[:]
115
        if len(relpath_parts) > 1:
116
            if relpath_parts[0] == '':
117
                raise ValueError("path %r within branch %r seems to be absolute"
118
                                 % (relpath, self._path))
119
            if relpath_parts[-1] == '':
120
                raise ValueError("path %r within branch %r seems to be a directory"
121
                                 % (relpath, self._path))
1185.11.6 by John Arbash Meinel
Made HttpTransport handle a request for a parent directory differently.
122
        basepath = self._path.split('/')
1185.11.8 by John Arbash Meinel
Fixed typo
123
        if len(basepath) > 0 and basepath[-1] == '':
1185.11.6 by John Arbash Meinel
Made HttpTransport handle a request for a parent directory differently.
124
            basepath = basepath[:-1]
1185.16.68 by Martin Pool
- http url fixes suggested by Robey Pointer, and tests
125
        for p in relpath_parts:
1185.11.6 by John Arbash Meinel
Made HttpTransport handle a request for a parent directory differently.
126
            if p == '..':
1185.16.68 by Martin Pool
- http url fixes suggested by Robey Pointer, and tests
127
                if len(basepath) == 0:
1185.11.7 by John Arbash Meinel
HttpTransport just returns root when parent is requested.
128
                    # In most filesystems, a request for the parent
129
                    # of root, just returns root.
130
                    continue
1185.16.68 by Martin Pool
- http url fixes suggested by Robey Pointer, and tests
131
                basepath.pop()
132
            elif p == '.' or p == '':
1185.11.6 by John Arbash Meinel
Made HttpTransport handle a request for a parent directory differently.
133
                continue # No-op
134
            else:
135
                basepath.append(p)
136
        # Possibly, we could use urlparse.urljoin() here, but
137
        # I'm concerned about when it chooses to strip the last
138
        # portion of the path, and when it doesn't.
139
        path = '/'.join(basepath)
1185.11.9 by John Arbash Meinel
Most tests pass, some problems with unavailable socket recv
140
        return urlparse.urlunparse((self._proto,
141
                self._host, path, '', '', ''))
907.1.24 by John Arbash Meinel
Remote functionality work.
142
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
143
    def has(self, relpath):
907.1.35 by John Arbash Meinel
updating TODO for http_transport.
144
        """Does the target location exist?
145
146
        TODO: HttpTransport.has() should use a HEAD request,
147
        not a full GET request.
1185.11.15 by John Arbash Meinel
Got HttpTransport tests to pass. Check for EAGAIN, pass permit_failure around, etc
148
149
        TODO: This should be changed so that we don't use
150
        urllib2 and get an exception, the code path would be
151
        cleaner if we just do an http HEAD request, and parse
152
        the return code.
907.1.35 by John Arbash Meinel
updating TODO for http_transport.
153
        """
1185.50.15 by John Arbash Meinel
Added some extra logging when we get http exceptions.
154
        path = relpath
907.1.57 by John Arbash Meinel
Trying to get pipelined http library working + tests.
155
        try:
1185.50.15 by John Arbash Meinel
Added some extra logging when we get http exceptions.
156
            path = self.abspath(relpath)
157
            f = get_url(path)
1185.11.15 by John Arbash Meinel
Got HttpTransport tests to pass. Check for EAGAIN, pass permit_failure around, etc
158
            # Without the read and then close()
159
            # we tend to have busy sockets.
160
            f.read()
161
            f.close()
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
162
            return True
1468 by Robert Collins
The HTTP transport would return NoSuchFile inappropriately.
163
        except urllib2.URLError, e:
1185.50.15 by John Arbash Meinel
Added some extra logging when we get http exceptions.
164
            mutter('url error code: %s for has url: %r', e.code, path)
1468 by Robert Collins
The HTTP transport would return NoSuchFile inappropriately.
165
            if e.code == 404:
166
                return False
167
            raise
907.1.57 by John Arbash Meinel
Trying to get pipelined http library working + tests.
168
        except IOError, e:
1185.50.15 by John Arbash Meinel
Added some extra logging when we get http exceptions.
169
            mutter('io error: %s %s for has url: %r', 
170
                e.errno, errno.errorcode.get(e.errno), path)
907.1.57 by John Arbash Meinel
Trying to get pipelined http library working + tests.
171
            if e.errno == errno.ENOENT:
172
                return False
1185.31.44 by John Arbash Meinel
Cleaned up Exceptions for all transports.
173
            raise TransportError(orig_error=e)
907.1.55 by John Arbash Meinel
Adding pipelined http support.
174
907.1.57 by John Arbash Meinel
Trying to get pipelined http library working + tests.
175
    def get(self, relpath, decode=False):
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
176
        """Get the file at the given relative path.
177
178
        :param relpath: The relative path to the file
179
        """
1185.50.15 by John Arbash Meinel
Added some extra logging when we get http exceptions.
180
        path = relpath
907.1.57 by John Arbash Meinel
Trying to get pipelined http library working + tests.
181
        try:
1185.50.15 by John Arbash Meinel
Added some extra logging when we get http exceptions.
182
            path = self.abspath(relpath)
183
            return get_url(path)
1185.25.4 by Aaron Bentley
Switched to specific error; not all URLErrors have a .code member
184
        except urllib2.HTTPError, e:
1185.50.15 by John Arbash Meinel
Added some extra logging when we get http exceptions.
185
            mutter('url error code: %s for has url: %r', e.code, path)
1468 by Robert Collins
The HTTP transport would return NoSuchFile inappropriately.
186
            if e.code == 404:
1185.50.15 by John Arbash Meinel
Added some extra logging when we get http exceptions.
187
                raise NoSuchFile(path, extra=e)
1468 by Robert Collins
The HTTP transport would return NoSuchFile inappropriately.
188
            raise
189
        except (BzrError, IOError), e:
1185.50.15 by John Arbash Meinel
Added some extra logging when we get http exceptions.
190
            if hasattr(e, 'errno'):
191
                mutter('io error: %s %s for has url: %r', 
192
                    e.errno, errno.errorcode.get(e.errno), path)
193
                if e.errno == errno.ENOENT:
194
                    raise NoSuchFile(path, extra=e)
1185.35.31 by Aaron Bentley
Throw ConnectionError instead of NoSuchFile except when we get a 404
195
            raise ConnectionError(msg = "Error retrieving %s: %s" 
1185.16.2 by Martin Pool
- try to get better reporting of http errors
196
                             % (self.abspath(relpath), str(e)),
1428 by Robert Collins
report the url location on failed http requests
197
                             orig_error=e)
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
198
1185.58.2 by John Arbash Meinel
Added mode to the appropriate transport functions, and tests to make sure they work.
199
    def put(self, relpath, f, mode=None):
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
200
        """Copy the file-like or string object into the location.
201
202
        :param relpath: Location to put the contents, relative to base.
203
        :param f:       File-like or string object.
204
        """
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
205
        raise TransportNotPossible('http PUT not supported')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
206
1185.58.2 by John Arbash Meinel
Added mode to the appropriate transport functions, and tests to make sure they work.
207
    def mkdir(self, relpath, mode=None):
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
208
        """Create a directory at the given path."""
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
209
        raise TransportNotPossible('http does not support mkdir()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
210
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
211
    def append(self, relpath, f):
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
212
        """Append the text in the file-like object into the final
213
        location.
214
        """
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
215
        raise TransportNotPossible('http does not support append()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
216
217
    def copy(self, rel_from, rel_to):
218
        """Copy the item at rel_from to the location at rel_to"""
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
219
        raise TransportNotPossible('http does not support copy()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
220
1185.58.2 by John Arbash Meinel
Added mode to the appropriate transport functions, and tests to make sure they work.
221
    def copy_to(self, relpaths, other, mode=None, pb=None):
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
222
        """Copy a set of entries from self into another Transport.
223
224
        :param relpaths: A list/generator of entries to be copied.
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
225
226
        TODO: if other is LocalTransport, is it possible to
227
              do better than put(get())?
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
228
        """
907.1.29 by John Arbash Meinel
Fixing small bug in HttpTransport.copy_to
229
        # At this point HttpTransport might be able to check and see if
230
        # the remote location is the same, and rather than download, and
231
        # then upload, it could just issue a remote copy_this command.
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
232
        if isinstance(other, HttpTransport):
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
233
            raise TransportNotPossible('http cannot be the target of copy_to()')
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
234
        else:
1185.58.2 by John Arbash Meinel
Added mode to the appropriate transport functions, and tests to make sure they work.
235
            return super(HttpTransport, self).copy_to(relpaths, other, mode=mode, pb=pb)
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
236
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
237
    def move(self, rel_from, rel_to):
238
        """Move the item at rel_from to the location at rel_to"""
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
239
        raise TransportNotPossible('http does not support move()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
240
241
    def delete(self, relpath):
242
        """Delete the item at relpath"""
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
243
        raise TransportNotPossible('http does not support delete()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
244
1400.1.1 by Robert Collins
implement a basic test for the ui branch command from http servers
245
    def listable(self):
246
        """See Transport.listable."""
247
        return False
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
248
249
    def stat(self, relpath):
250
        """Return the stat information for a file.
251
        """
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
252
        raise TransportNotPossible('http does not support stat()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
253
907.1.24 by John Arbash Meinel
Remote functionality work.
254
    def lock_read(self, relpath):
255
        """Lock the given file for shared (read) access.
256
        :return: A lock object, which should be passed to Transport.unlock()
257
        """
258
        # The old RemoteBranch ignore lock for reading, so we will
259
        # continue that tradition and return a bogus lock object.
260
        class BogusLock(object):
261
            def __init__(self, path):
262
                self.path = path
263
            def unlock(self):
264
                pass
265
        return BogusLock(relpath)
266
267
    def lock_write(self, relpath):
268
        """Lock the given file for exclusive (write) access.
269
        WARNING: many transports do not support this, so trying avoid using it
270
271
        :return: A lock object, which should be passed to Transport.unlock()
272
        """
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
273
        raise TransportNotPossible('http does not support lock_write()')