~bzr-pqm/bzr/bzr.dev

1540.3.3 by Martin Pool
Review updates of pycurl transport
1
# Copyright (C) 2005, 2006 Canonical Ltd
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
2
#
1185.11.19 by John Arbash Meinel
Testing put and append, also testing agaist file-like objects as well as strings.
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
7
#
1185.11.19 by John Arbash Meinel
Testing put and append, also testing agaist file-like objects as well as strings.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
12
#
1185.11.19 by John Arbash Meinel
Testing put and append, also testing agaist file-like objects as well as strings.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
1540.3.3 by Martin Pool
Review updates of pycurl transport
16
17
"""Base implementation of Transport over http.
18
19
There are separate implementation modules for each http client implementation.
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
20
"""
21
1711.4.14 by John Arbash Meinel
Custom HttpRequestHandler which treats all paths as utf8 encoded
22
from cStringIO import StringIO
1540.3.3 by Martin Pool
Review updates of pycurl transport
23
import errno
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
24
import mimetools
1540.3.6 by Martin Pool
[merge] update from bzr.dev
25
import os
1711.4.14 by John Arbash Meinel
Custom HttpRequestHandler which treats all paths as utf8 encoded
26
import posixpath
1540.3.23 by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl
27
import re
1711.4.15 by John Arbash Meinel
Only interpret HTTP paths as utf8 on win32
28
import sys
1540.3.3 by Martin Pool
Review updates of pycurl transport
29
import urlparse
30
import urllib
1530.1.11 by Robert Collins
Push the transport permutations list into each transport module allowing for automatic testing of new modules that are registered as transports.
31
from warnings import warn
1540.3.3 by Martin Pool
Review updates of pycurl transport
32
1786.1.6 by John Arbash Meinel
Missed a couple of imports
33
# TODO: load these only when running http tests
34
import BaseHTTPServer, SimpleHTTPServer, socket, time
35
import threading
36
1786.1.42 by John Arbash Meinel
Update _extract_headers, make it less generic, and non recursive.
37
from bzrlib import errors
1540.3.6 by Martin Pool
[merge] update from bzr.dev
38
from bzrlib.errors import (TransportNotPossible, NoSuchFile,
1685.1.8 by John Arbash Meinel
Re-allow a couple more tests, fix a bug in http, non_ascii tests still fail.
39
                           TransportError, ConnectionError, InvalidURL)
1393.2.3 by John Arbash Meinel
Fixing typos, updating stores, getting tests to pass.
40
from bzrlib.branch import Branch
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
41
from bzrlib.trace import mutter
1786.1.6 by John Arbash Meinel
Missed a couple of imports
42
from bzrlib.transport import Transport, register_transport, Server
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
43
from bzrlib.transport.http.response import (HttpMultipartRangeResponse,
44
                                            HttpRangeResponse)
1540.2.1 by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary
45
from bzrlib.ui import ui_factory
1540.3.6 by Martin Pool
[merge] update from bzr.dev
46
907.1.57 by John Arbash Meinel
Trying to get pipelined http library working + tests.
47
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
48
def extract_auth(url, password_manager):
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
49
    """Extract auth parameters from am HTTP/HTTPS url and add them to the given
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
50
    password manager.  Return the url, minus those auth parameters (which
51
    confuse urllib2).
52
    """
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
53
    assert re.match(r'^(https?)(\+\w+)?://', url), \
54
            'invalid absolute url %r' % url
1540.2.1 by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary
55
    scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
56
    
57
    if '@' in netloc:
58
        auth, netloc = netloc.split('@', 1)
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
59
        if ':' in auth:
60
            username, password = auth.split(':', 1)
61
        else:
62
            username, password = auth, None
1540.2.1 by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary
63
        if ':' in netloc:
64
            host = netloc.split(':', 1)[0]
65
        else:
66
            host = netloc
67
        username = urllib.unquote(username)
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
68
        if password is not None:
69
            password = urllib.unquote(password)
1540.2.1 by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary
70
        else:
71
            password = ui_factory.get_password(prompt='HTTP %(user)@%(host) password',
72
                                               user=username, host=host)
73
        password_manager.add_password(None, host, username, password)
74
    url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
75
    return url
1553.1.5 by James Henstridge
Make HTTP transport has() method do HEAD requests, and update test to
76
1185.50.83 by John Arbash Meinel
[merge] James Henstridge: Set Agent string in http headers, add tests for it.
77
1786.1.42 by John Arbash Meinel
Update _extract_headers, make it less generic, and non recursive.
78
def _extract_headers(header_text, url):
79
    """Extract the mapping for an rfc2822 header
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
80
1786.1.42 by John Arbash Meinel
Update _extract_headers, make it less generic, and non recursive.
81
    This is a helper function for the test suite and for _pycurl.
1786.1.32 by John Arbash Meinel
cleanup pass, allow pycurl connections to be shared between transports.
82
    (urllib already parses the headers for us)
83
1786.1.42 by John Arbash Meinel
Update _extract_headers, make it less generic, and non recursive.
84
    In the case that there are multiple headers inside the file,
85
    the last one is returned.
86
87
    :param header_text: A string of header information.
88
        This expects that the first line of a header will always be HTTP ...
89
    :param url: The url we are parsing, so we can raise nice errors
90
    :return: mimetools.Message object, which basically acts like a case 
91
        insensitive dictionary.
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
92
    """
1786.1.42 by John Arbash Meinel
Update _extract_headers, make it less generic, and non recursive.
93
    first_header = True
94
    remaining = header_text
95
96
    if not remaining:
97
        raise errors.InvalidHttpResponse(url, 'Empty headers')
98
99
    while remaining:
100
        header_file = StringIO(remaining)
101
        first_line = header_file.readline()
102
        if not first_line.startswith('HTTP'):
103
            if first_header: # The first header *must* start with HTTP
104
                raise errors.InvalidHttpResponse(url,
105
                    'Opening header line did not start with HTTP: %s' 
106
                    % (first_line,))
107
                assert False, 'Opening header line was not HTTP'
108
            else:
109
                break # We are done parsing
110
        first_header = False
111
        m = mimetools.Message(header_file)
112
113
        # mimetools.Message parses the first header up to a blank line
114
        # So while there is remaining data, it probably means there is
115
        # another header to be parsed.
116
        # Get rid of any preceeding whitespace, which if it is all whitespace
117
        # will get rid of everything.
118
        remaining = header_file.read().lstrip()
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
119
    return m
120
121
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
122
class HttpTransportBase(Transport):
123
    """Base class for http implementations.
124
1540.3.23 by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl
125
    Does URL parsing, etc, but not any network IO.
126
127
    The protocol can be given as e.g. http+urllib://host/ to use a particular
128
    implementation.
129
    """
130
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
131
    # _proto: "http" or "https"
132
    # _qualified_proto: may have "+pycurl", etc
133
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
134
    def __init__(self, base):
135
        """Set the base path where files will be stored."""
1540.3.23 by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl
136
        proto_match = re.match(r'^(https?)(\+\w+)?://', base)
137
        if not proto_match:
138
            raise AssertionError("not a http url: %r" % base)
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
139
        self._proto = proto_match.group(1)
140
        impl_name = proto_match.group(2)
1540.3.23 by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl
141
        if impl_name:
142
            impl_name = impl_name[1:]
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
143
        self._impl_name = impl_name
1530.1.3 by Robert Collins
transport implementations now tested consistently.
144
        if base[-1] != '/':
145
            base = base + '/'
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
146
        super(HttpTransportBase, self).__init__(base)
907.1.57 by John Arbash Meinel
Trying to get pipelined http library working + tests.
147
        # In the future we might actually connect to the remote host
148
        # rather than using get_url
149
        # self._connection = None
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
150
        (apparent_proto, self._host,
1185.11.6 by John Arbash Meinel
Made HttpTransport handle a request for a parent directory differently.
151
            self._path, self._parameters,
152
            self._query, self._fragment) = urlparse.urlparse(self.base)
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
153
        self._qualified_proto = apparent_proto
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
154
155
    def abspath(self, relpath):
156
        """Return the full url to the given relative path.
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
157
158
        This can be supplied with a string or a list.
159
1540.3.25 by Martin Pool
New 'http+urllib' scheme
160
        The URL returned always has the protocol scheme originally used to 
161
        construct the transport, even if that includes an explicit
162
        implementation qualifier.
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
163
        """
1469 by Robert Collins
Change Transport.* to work with URL's.
164
        assert isinstance(relpath, basestring)
1185.85.76 by John Arbash Meinel
Adding an InvalidURL so transports can report they expect utf-8 quoted paths. Updated tests
165
        if isinstance(relpath, unicode):
1685.1.8 by John Arbash Meinel
Re-allow a couple more tests, fix a bug in http, non_ascii tests still fail.
166
            raise InvalidURL(relpath, 'paths must not be unicode.')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
167
        if isinstance(relpath, basestring):
1185.16.68 by Martin Pool
- http url fixes suggested by Robey Pointer, and tests
168
            relpath_parts = relpath.split('/')
169
        else:
170
            # TODO: Don't call this with an array - no magic interfaces
171
            relpath_parts = relpath[:]
172
        if len(relpath_parts) > 1:
173
            if relpath_parts[0] == '':
174
                raise ValueError("path %r within branch %r seems to be absolute"
175
                                 % (relpath, self._path))
176
            if relpath_parts[-1] == '':
177
                raise ValueError("path %r within branch %r seems to be a directory"
178
                                 % (relpath, self._path))
1185.11.6 by John Arbash Meinel
Made HttpTransport handle a request for a parent directory differently.
179
        basepath = self._path.split('/')
1185.11.8 by John Arbash Meinel
Fixed typo
180
        if len(basepath) > 0 and basepath[-1] == '':
1185.11.6 by John Arbash Meinel
Made HttpTransport handle a request for a parent directory differently.
181
            basepath = basepath[:-1]
1185.16.68 by Martin Pool
- http url fixes suggested by Robey Pointer, and tests
182
        for p in relpath_parts:
1185.11.6 by John Arbash Meinel
Made HttpTransport handle a request for a parent directory differently.
183
            if p == '..':
1185.16.68 by Martin Pool
- http url fixes suggested by Robey Pointer, and tests
184
                if len(basepath) == 0:
1185.11.7 by John Arbash Meinel
HttpTransport just returns root when parent is requested.
185
                    # In most filesystems, a request for the parent
186
                    # of root, just returns root.
187
                    continue
1185.16.68 by Martin Pool
- http url fixes suggested by Robey Pointer, and tests
188
                basepath.pop()
189
            elif p == '.' or p == '':
1185.11.6 by John Arbash Meinel
Made HttpTransport handle a request for a parent directory differently.
190
                continue # No-op
191
            else:
192
                basepath.append(p)
193
        # Possibly, we could use urlparse.urljoin() here, but
194
        # I'm concerned about when it chooses to strip the last
195
        # portion of the path, and when it doesn't.
196
        path = '/'.join(basepath)
1636.1.1 by Robert Collins
Fix calling relpath() and abspath() on transports at their root.
197
        if path == '':
198
            path = '/'
199
        result = urlparse.urlunparse((self._qualified_proto,
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
200
                                    self._host, path, '', '', ''))
1636.1.1 by Robert Collins
Fix calling relpath() and abspath() on transports at their root.
201
        return result
907.1.24 by John Arbash Meinel
Remote functionality work.
202
1540.3.25 by Martin Pool
New 'http+urllib' scheme
203
    def _real_abspath(self, relpath):
204
        """Produce absolute path, adjusting protocol if needed"""
205
        abspath = self.abspath(relpath)
206
        qp = self._qualified_proto
207
        rp = self._proto
208
        if self._qualified_proto != self._proto:
209
            abspath = rp + abspath[len(qp):]
210
        if not isinstance(abspath, str):
211
            # escaping must be done at a higher level
212
            abspath = abspath.encode('ascii')
213
        return abspath
214
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
215
    def has(self, relpath):
1540.3.15 by Martin Pool
[merge] large merge to sync with bzr.dev
216
        raise NotImplementedError("has() is abstract on %r" % self)
217
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
218
    def get(self, relpath):
1594.2.5 by Robert Collins
Readv patch from Johan Rydberg giving knits partial download support.
219
        """Get the file at the given relative path.
220
221
        :param relpath: The relative path to the file
222
        """
1540.3.27 by Martin Pool
Integrate http range support for pycurl
223
        code, response_file = self._get(relpath, None)
224
        return response_file
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
225
1786.1.39 by John Arbash Meinel
Remove the ability to read negative offsets from readv()
226
    def _get(self, relpath, ranges):
1540.3.27 by Martin Pool
Integrate http range support for pycurl
227
        """Get a file, or part of a file.
228
229
        :param relpath: Path relative to transport base URL
230
        :param byte_range: None to get the whole file;
231
            or [(start,end)] to fetch parts of a file.
232
233
        :returns: (http_code, result_file)
234
235
        Note that the current http implementations can only fetch one range at
236
        a time through this call.
237
        """
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
238
        raise NotImplementedError(self._get)
1594.2.5 by Robert Collins
Readv patch from Johan Rydberg giving knits partial download support.
239
240
    def readv(self, relpath, offsets):
241
        """Get parts of the file at the given relative path.
242
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
243
        :param offsets: A list of (offset, size) tuples.
1540.3.27 by Martin Pool
Integrate http range support for pycurl
244
        :param return: A list or generator of (offset, data) tuples
1594.2.5 by Robert Collins
Readv patch from Johan Rydberg giving knits partial download support.
245
        """
1786.1.39 by John Arbash Meinel
Remove the ability to read negative offsets from readv()
246
        ranges = self.offsets_to_ranges(offsets)
247
        mutter('http readv of %s collapsed %s offsets => %s',
1786.1.34 by John Arbash Meinel
shorten the readv message to cause a smaller debug log.
248
                relpath, len(offsets), ranges)
1786.1.39 by John Arbash Meinel
Remove the ability to read negative offsets from readv()
249
        code, f = self._get(relpath, ranges)
1786.1.5 by John Arbash Meinel
Move the common Multipart stuff into plain http, and wrap pycurl response so that it matches the urllib response object.
250
        for start, size in offsets:
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
251
            f.seek(start, (start < 0) and 2 or 0)
252
            start = f.tell()
1786.1.5 by John Arbash Meinel
Move the common Multipart stuff into plain http, and wrap pycurl response so that it matches the urllib response object.
253
            data = f.read(size)
254
            assert len(data) == size
255
            yield start, data
256
1786.1.23 by John Arbash Meinel
Move offset_to_http_ranges back onto HttpTransportBase, clarify tests.
257
    @staticmethod
1786.1.39 by John Arbash Meinel
Remove the ability to read negative offsets from readv()
258
    def offsets_to_ranges(offsets):
1786.1.23 by John Arbash Meinel
Move offset_to_http_ranges back onto HttpTransportBase, clarify tests.
259
        """Turn a list of offsets and sizes into a list of byte ranges.
260
261
        :param offsets: A list of tuples of (start, size).  An empty list
1786.1.32 by John Arbash Meinel
cleanup pass, allow pycurl connections to be shared between transports.
262
            is not accepted.
1786.1.39 by John Arbash Meinel
Remove the ability to read negative offsets from readv()
263
        :return: a list of inclusive byte ranges (start, end) 
1786.1.32 by John Arbash Meinel
cleanup pass, allow pycurl connections to be shared between transports.
264
            Adjacent ranges will be combined.
1786.1.23 by John Arbash Meinel
Move offset_to_http_ranges back onto HttpTransportBase, clarify tests.
265
        """
1786.1.33 by John Arbash Meinel
Cleanup pass #2
266
        # Make sure we process sorted offsets
1786.1.23 by John Arbash Meinel
Move offset_to_http_ranges back onto HttpTransportBase, clarify tests.
267
        offsets = sorted(offsets)
268
269
        prev_end = None
270
        combined = []
271
272
        for start, size in offsets:
1786.1.39 by John Arbash Meinel
Remove the ability to read negative offsets from readv()
273
            end = start + size - 1
274
            if prev_end is None:
275
                combined.append([start, end])
276
            elif start <= prev_end + 1:
277
                combined[-1][1] = end
1786.1.23 by John Arbash Meinel
Move offset_to_http_ranges back onto HttpTransportBase, clarify tests.
278
            else:
1786.1.39 by John Arbash Meinel
Remove the ability to read negative offsets from readv()
279
                combined.append([start, end])
280
            prev_end = end
1786.1.23 by John Arbash Meinel
Move offset_to_http_ranges back onto HttpTransportBase, clarify tests.
281
1786.1.39 by John Arbash Meinel
Remove the ability to read negative offsets from readv()
282
        return combined
1786.1.24 by John Arbash Meinel
Move the functions/regexes to be static members
283
1185.58.2 by John Arbash Meinel
Added mode to the appropriate transport functions, and tests to make sure they work.
284
    def put(self, relpath, f, mode=None):
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
285
        """Copy the file-like or string object into the location.
286
287
        :param relpath: Location to put the contents, relative to base.
288
        :param f:       File-like or string object.
289
        """
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
290
        raise TransportNotPossible('http PUT not supported')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
291
1185.58.2 by John Arbash Meinel
Added mode to the appropriate transport functions, and tests to make sure they work.
292
    def mkdir(self, relpath, mode=None):
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
293
        """Create a directory at the given path."""
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
294
        raise TransportNotPossible('http does not support mkdir()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
295
1534.4.15 by Robert Collins
Remove shutil dependency in upgrade - create a delete_tree method for transports.
296
    def rmdir(self, relpath):
297
        """See Transport.rmdir."""
298
        raise TransportNotPossible('http does not support rmdir()')
299
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
300
    def append(self, relpath, f):
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
301
        """Append the text in the file-like object into the final
302
        location.
303
        """
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
304
        raise TransportNotPossible('http does not support append()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
305
306
    def copy(self, rel_from, rel_to):
307
        """Copy the item at rel_from to the location at rel_to"""
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
308
        raise TransportNotPossible('http does not support copy()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
309
1185.58.2 by John Arbash Meinel
Added mode to the appropriate transport functions, and tests to make sure they work.
310
    def copy_to(self, relpaths, other, mode=None, pb=None):
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
311
        """Copy a set of entries from self into another Transport.
312
313
        :param relpaths: A list/generator of entries to be copied.
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
314
315
        TODO: if other is LocalTransport, is it possible to
316
              do better than put(get())?
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
317
        """
907.1.29 by John Arbash Meinel
Fixing small bug in HttpTransport.copy_to
318
        # At this point HttpTransport might be able to check and see if
319
        # the remote location is the same, and rather than download, and
320
        # then upload, it could just issue a remote copy_this command.
1540.3.6 by Martin Pool
[merge] update from bzr.dev
321
        if isinstance(other, HttpTransportBase):
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
322
            raise TransportNotPossible('http cannot be the target of copy_to()')
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
323
        else:
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
324
            return super(HttpTransportBase, self).\
325
                    copy_to(relpaths, other, mode=mode, pb=pb)
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
326
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
327
    def move(self, rel_from, rel_to):
328
        """Move the item at rel_from to the location at rel_to"""
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
329
        raise TransportNotPossible('http does not support move()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
330
331
    def delete(self, relpath):
332
        """Delete the item at relpath"""
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
333
        raise TransportNotPossible('http does not support delete()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
334
1530.1.3 by Robert Collins
transport implementations now tested consistently.
335
    def is_readonly(self):
336
        """See Transport.is_readonly."""
337
        return True
338
1400.1.1 by Robert Collins
implement a basic test for the ui branch command from http servers
339
    def listable(self):
340
        """See Transport.listable."""
341
        return False
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
342
343
    def stat(self, relpath):
344
        """Return the stat information for a file.
345
        """
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
346
        raise TransportNotPossible('http does not support stat()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
347
907.1.24 by John Arbash Meinel
Remote functionality work.
348
    def lock_read(self, relpath):
349
        """Lock the given file for shared (read) access.
350
        :return: A lock object, which should be passed to Transport.unlock()
351
        """
352
        # The old RemoteBranch ignore lock for reading, so we will
353
        # continue that tradition and return a bogus lock object.
354
        class BogusLock(object):
355
            def __init__(self, path):
356
                self.path = path
357
            def unlock(self):
358
                pass
359
        return BogusLock(relpath)
360
361
    def lock_write(self, relpath):
362
        """Lock the given file for exclusive (write) access.
363
        WARNING: many transports do not support this, so trying avoid using it
364
365
        :return: A lock object, which should be passed to Transport.unlock()
366
        """
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
367
        raise TransportNotPossible('http does not support lock_write()')
1530.1.1 by Robert Collins
Minimal infrastructure to test TransportTestProviderAdapter.
368
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
369
    def clone(self, offset=None):
370
        """Return a new HttpTransportBase with root at self.base + offset
371
        For now HttpTransportBase does not actually connect, so just return
372
        a new HttpTransportBase object.
373
        """
374
        if offset is None:
375
            return self.__class__(self.base)
376
        else:
377
            return self.__class__(self.abspath(offset))
1530.1.1 by Robert Collins
Minimal infrastructure to test TransportTestProviderAdapter.
378
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
379
    @staticmethod
380
    def range_header(ranges, tail_amount):
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
381
        """Turn a list of bytes ranges into a HTTP Range header value.
382
383
        :param offsets: A list of byte ranges, (start, end). An empty list
384
        is not accepted.
385
386
        :return: HTTP range header string.
387
        """
388
        strings = []
389
        for start, end in ranges:
390
            strings.append('%d-%d' % (start, end))
391
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
392
        if tail_amount:
393
            strings.append('-%d' % tail_amount)
394
1786.1.36 by John Arbash Meinel
pycurl expects us to just set the range of bytes, not including bytes=
395
        return ','.join(strings)
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
396
397
1530.1.3 by Robert Collins
transport implementations now tested consistently.
398
#---------------- test server facilities ----------------
1540.3.6 by Martin Pool
[merge] update from bzr.dev
399
# TODO: load these only when running tests
1530.1.3 by Robert Collins
transport implementations now tested consistently.
400
1636.1.1 by Robert Collins
Fix calling relpath() and abspath() on transports at their root.
401
1530.1.3 by Robert Collins
transport implementations now tested consistently.
402
class WebserverNotAvailable(Exception):
403
    pass
404
405
406
class BadWebserverPath(ValueError):
407
    def __str__(self):
408
        return 'path %s is not in %s' % self.args
409
410
411
class TestingHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
412
413
    def log_message(self, format, *args):
1553.1.3 by James Henstridge
Make bzrlib.transport.http.HttpServer output referer and user agent as in
414
        self.server.test_case.log('webserver - %s - - [%s] %s "%s" "%s"',
1530.1.3 by Robert Collins
transport implementations now tested consistently.
415
                                  self.address_string(),
416
                                  self.log_date_time_string(),
1553.1.3 by James Henstridge
Make bzrlib.transport.http.HttpServer output referer and user agent as in
417
                                  format % args,
418
                                  self.headers.get('referer', '-'),
419
                                  self.headers.get('user-agent', '-'))
1530.1.3 by Robert Collins
transport implementations now tested consistently.
420
421
    def handle_one_request(self):
422
        """Handle a single HTTP request.
423
424
        You normally don't need to override this method; see the class
425
        __doc__ string for information on how to handle specific HTTP
426
        commands such as GET and POST.
427
428
        """
429
        for i in xrange(1,11): # Don't try more than 10 times
430
            try:
431
                self.raw_requestline = self.rfile.readline()
432
            except socket.error, e:
433
                if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):
434
                    # omitted for now because some tests look at the log of
435
                    # the server and expect to see no errors.  see recent
436
                    # email thread. -- mbp 20051021. 
437
                    ## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)
438
                    time.sleep(0.01)
439
                    continue
440
                raise
441
            else:
442
                break
443
        if not self.raw_requestline:
444
            self.close_connection = 1
445
            return
446
        if not self.parse_request(): # An error code has been sent, just exit
447
            return
448
        mname = 'do_' + self.command
449
        if not hasattr(self, mname):
450
            self.send_error(501, "Unsupported method (%r)" % self.command)
451
            return
452
        method = getattr(self, mname)
453
        method()
454
1711.4.15 by John Arbash Meinel
Only interpret HTTP paths as utf8 on win32
455
    if sys.platform == 'win32':
456
        # On win32 you cannot access non-ascii filenames without
457
        # decoding them into unicode first.
458
        # However, under Linux, you can access bytestream paths
459
        # without any problems. If this function was always active
460
        # it would probably break tests when LANG=C was set
461
        def translate_path(self, path):
462
            """Translate a /-separated PATH to the local filename syntax.
1711.4.14 by John Arbash Meinel
Custom HttpRequestHandler which treats all paths as utf8 encoded
463
1711.4.15 by John Arbash Meinel
Only interpret HTTP paths as utf8 on win32
464
            For bzr, all url paths are considered to be utf8 paths.
465
            On Linux, you can access these paths directly over the bytestream
466
            request, but on win32, you must decode them, and access them
467
            as Unicode files.
468
            """
469
            # abandon query parameters
470
            path = urlparse.urlparse(path)[2]
471
            path = posixpath.normpath(urllib.unquote(path))
472
            path = path.decode('utf-8')
473
            words = path.split('/')
474
            words = filter(None, words)
475
            path = os.getcwdu()
476
            for word in words:
477
                drive, word = os.path.splitdrive(word)
478
                head, word = os.path.split(word)
479
                if word in (os.curdir, os.pardir): continue
480
                path = os.path.join(path, word)
481
            return path
1711.4.14 by John Arbash Meinel
Custom HttpRequestHandler which treats all paths as utf8 encoded
482
1185.50.83 by John Arbash Meinel
[merge] James Henstridge: Set Agent string in http headers, add tests for it.
483
1530.1.3 by Robert Collins
transport implementations now tested consistently.
484
class TestingHTTPServer(BaseHTTPServer.HTTPServer):
485
    def __init__(self, server_address, RequestHandlerClass, test_case):
486
        BaseHTTPServer.HTTPServer.__init__(self, server_address,
487
                                                RequestHandlerClass)
488
        self.test_case = test_case
489
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
490
1530.1.3 by Robert Collins
transport implementations now tested consistently.
491
class HttpServer(Server):
1530.1.1 by Robert Collins
Minimal infrastructure to test TransportTestProviderAdapter.
492
    """A test server for http transports."""
1530.1.3 by Robert Collins
transport implementations now tested consistently.
493
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
494
    # used to form the url that connects to this server
495
    _url_protocol = 'http'
496
1530.1.3 by Robert Collins
transport implementations now tested consistently.
497
    def _http_start(self):
498
        httpd = None
1553.1.3 by James Henstridge
Make bzrlib.transport.http.HttpServer output referer and user agent as in
499
        httpd = TestingHTTPServer(('localhost', 0),
500
                                  TestingHTTPRequestHandler,
501
                                  self)
502
        host, port = httpd.socket.getsockname()
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
503
        self._http_base_url = '%s://localhost:%s/' % (self._url_protocol, port)
1530.1.3 by Robert Collins
transport implementations now tested consistently.
504
        self._http_starting.release()
505
        httpd.socket.settimeout(0.1)
506
507
        while self._http_running:
508
            try:
509
                httpd.handle_request()
510
            except socket.timeout:
511
                pass
512
513
    def _get_remote_url(self, path):
514
        path_parts = path.split(os.path.sep)
515
        if os.path.isabs(path):
516
            if path_parts[:len(self._local_path_parts)] != \
517
                   self._local_path_parts:
518
                raise BadWebserverPath(path, self.test_dir)
519
            remote_path = '/'.join(path_parts[len(self._local_path_parts):])
520
        else:
521
            remote_path = '/'.join(path_parts)
522
523
        self._http_starting.acquire()
524
        self._http_starting.release()
525
        return self._http_base_url + remote_path
526
1553.1.3 by James Henstridge
Make bzrlib.transport.http.HttpServer output referer and user agent as in
527
    def log(self, format, *args):
1530.1.3 by Robert Collins
transport implementations now tested consistently.
528
        """Capture Server log output."""
1553.1.3 by James Henstridge
Make bzrlib.transport.http.HttpServer output referer and user agent as in
529
        self.logs.append(format % args)
1530.1.3 by Robert Collins
transport implementations now tested consistently.
530
531
    def setUp(self):
532
        """See bzrlib.transport.Server.setUp."""
533
        self._home_dir = os.getcwdu()
534
        self._local_path_parts = self._home_dir.split(os.path.sep)
535
        self._http_starting = threading.Lock()
536
        self._http_starting.acquire()
537
        self._http_running = True
538
        self._http_base_url = None
539
        self._http_thread = threading.Thread(target=self._http_start)
540
        self._http_thread.setDaemon(True)
541
        self._http_thread.start()
542
        self._http_proxy = os.environ.get("http_proxy")
543
        if self._http_proxy is not None:
544
            del os.environ["http_proxy"]
1530.1.18 by Robert Collins
unbreak test_fetch
545
        self.logs = []
1530.1.3 by Robert Collins
transport implementations now tested consistently.
546
547
    def tearDown(self):
548
        """See bzrlib.transport.Server.tearDown."""
549
        self._http_running = False
550
        self._http_thread.join()
551
        if self._http_proxy is not None:
552
            import os
553
            os.environ["http_proxy"] = self._http_proxy
554
555
    def get_url(self):
556
        """See bzrlib.transport.Server.get_url."""
557
        return self._get_remote_url(self._home_dir)
1530.1.9 by Robert Collins
Test bogus urls with http in the new infrastructure.
558
        
559
    def get_bogus_url(self):
560
        """See bzrlib.transport.Server.get_bogus_url."""
1540.3.30 by Martin Pool
Fix up bogus-url tests for broken dns servers, and error imports
561
        # this is chosen to try to prevent trouble with proxies, wierd dns,
562
        # etc
563
        return 'http://127.0.0.1:1/'
1530.1.9 by Robert Collins
Test bogus urls with http in the new infrastructure.
564