~bzr-pqm/bzr/bzr.dev

1540.3.3 by Martin Pool
Review updates of pycurl transport
1
# Copyright (C) 2005, 2006 Canonical Ltd
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
2
#
1185.11.19 by John Arbash Meinel
Testing put and append, also testing agaist file-like objects as well as strings.
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
7
#
1185.11.19 by John Arbash Meinel
Testing put and append, also testing agaist file-like objects as well as strings.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
12
#
1185.11.19 by John Arbash Meinel
Testing put and append, also testing agaist file-like objects as well as strings.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
1540.3.3 by Martin Pool
Review updates of pycurl transport
16
17
"""Base implementation of Transport over http.
18
19
There are separate implementation modules for each http client implementation.
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
20
"""
21
1711.4.14 by John Arbash Meinel
Custom HttpRequestHandler which treats all paths as utf8 encoded
22
from collections import deque
23
from cStringIO import StringIO
1540.3.3 by Martin Pool
Review updates of pycurl transport
24
import errno
1540.3.6 by Martin Pool
[merge] update from bzr.dev
25
import os
1711.4.14 by John Arbash Meinel
Custom HttpRequestHandler which treats all paths as utf8 encoded
26
import posixpath
1540.3.23 by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl
27
import re
1711.4.15 by John Arbash Meinel
Only interpret HTTP paths as utf8 on win32
28
import sys
1540.3.3 by Martin Pool
Review updates of pycurl transport
29
import urlparse
30
import urllib
1530.1.11 by Robert Collins
Push the transport permutations list into each transport module allowing for automatic testing of new modules that are registered as transports.
31
from warnings import warn
1540.3.3 by Martin Pool
Review updates of pycurl transport
32
1540.3.6 by Martin Pool
[merge] update from bzr.dev
33
from bzrlib.transport import Transport, register_transport, Server
34
from bzrlib.errors import (TransportNotPossible, NoSuchFile,
1685.1.8 by John Arbash Meinel
Re-allow a couple more tests, fix a bug in http, non_ascii tests still fail.
35
                           TransportError, ConnectionError, InvalidURL)
1393.2.3 by John Arbash Meinel
Fixing typos, updating stores, getting tests to pass.
36
from bzrlib.branch import Branch
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
37
from bzrlib.trace import mutter
1540.3.6 by Martin Pool
[merge] update from bzr.dev
38
# TODO: load these only when running http tests
39
import BaseHTTPServer, SimpleHTTPServer, socket, time
40
import threading
1540.2.1 by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary
41
from bzrlib.ui import ui_factory
1540.3.6 by Martin Pool
[merge] update from bzr.dev
42
907.1.57 by John Arbash Meinel
Trying to get pipelined http library working + tests.
43
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
44
def extract_auth(url, password_manager):
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
45
    """Extract auth parameters from am HTTP/HTTPS url and add them to the given
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
46
    password manager.  Return the url, minus those auth parameters (which
47
    confuse urllib2).
48
    """
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
49
    assert re.match(r'^(https?)(\+\w+)?://', url), \
50
            'invalid absolute url %r' % url
1540.2.1 by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary
51
    scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
52
    
53
    if '@' in netloc:
54
        auth, netloc = netloc.split('@', 1)
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
55
        if ':' in auth:
56
            username, password = auth.split(':', 1)
57
        else:
58
            username, password = auth, None
1540.2.1 by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary
59
        if ':' in netloc:
60
            host = netloc.split(':', 1)[0]
61
        else:
62
            host = netloc
63
        username = urllib.unquote(username)
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
64
        if password is not None:
65
            password = urllib.unquote(password)
1540.2.1 by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary
66
        else:
67
            password = ui_factory.get_password(prompt='HTTP %(user)@%(host) password',
68
                                               user=username, host=host)
69
        password_manager.add_password(None, host, username, password)
70
    url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
71
    return url
1553.1.5 by James Henstridge
Make HTTP transport has() method do HEAD requests, and update test to
72
1185.50.83 by John Arbash Meinel
[merge] James Henstridge: Set Agent string in http headers, add tests for it.
73
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
74
class HttpTransportBase(Transport):
75
    """Base class for http implementations.
76
1540.3.23 by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl
77
    Does URL parsing, etc, but not any network IO.
78
79
    The protocol can be given as e.g. http+urllib://host/ to use a particular
80
    implementation.
81
    """
82
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
83
    # _proto: "http" or "https"
84
    # _qualified_proto: may have "+pycurl", etc
85
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
86
    def __init__(self, base):
87
        """Set the base path where files will be stored."""
1540.3.23 by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl
88
        proto_match = re.match(r'^(https?)(\+\w+)?://', base)
89
        if not proto_match:
90
            raise AssertionError("not a http url: %r" % base)
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
91
        self._proto = proto_match.group(1)
92
        impl_name = proto_match.group(2)
1540.3.23 by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl
93
        if impl_name:
94
            impl_name = impl_name[1:]
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
95
        self._impl_name = impl_name
1530.1.3 by Robert Collins
transport implementations now tested consistently.
96
        if base[-1] != '/':
97
            base = base + '/'
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
98
        super(HttpTransportBase, self).__init__(base)
907.1.57 by John Arbash Meinel
Trying to get pipelined http library working + tests.
99
        # In the future we might actually connect to the remote host
100
        # rather than using get_url
101
        # self._connection = None
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
102
        (apparent_proto, self._host,
1185.11.6 by John Arbash Meinel
Made HttpTransport handle a request for a parent directory differently.
103
            self._path, self._parameters,
104
            self._query, self._fragment) = urlparse.urlparse(self.base)
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
105
        self._qualified_proto = apparent_proto
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
106
107
    def abspath(self, relpath):
108
        """Return the full url to the given relative path.
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
109
110
        This can be supplied with a string or a list.
111
1540.3.25 by Martin Pool
New 'http+urllib' scheme
112
        The URL returned always has the protocol scheme originally used to 
113
        construct the transport, even if that includes an explicit
114
        implementation qualifier.
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
115
        """
1469 by Robert Collins
Change Transport.* to work with URL's.
116
        assert isinstance(relpath, basestring)
1185.85.76 by John Arbash Meinel
Adding an InvalidURL so transports can report they expect utf-8 quoted paths. Updated tests
117
        if isinstance(relpath, unicode):
1685.1.8 by John Arbash Meinel
Re-allow a couple more tests, fix a bug in http, non_ascii tests still fail.
118
            raise InvalidURL(relpath, 'paths must not be unicode.')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
119
        if isinstance(relpath, basestring):
1185.16.68 by Martin Pool
- http url fixes suggested by Robey Pointer, and tests
120
            relpath_parts = relpath.split('/')
121
        else:
122
            # TODO: Don't call this with an array - no magic interfaces
123
            relpath_parts = relpath[:]
124
        if len(relpath_parts) > 1:
125
            if relpath_parts[0] == '':
126
                raise ValueError("path %r within branch %r seems to be absolute"
127
                                 % (relpath, self._path))
128
            if relpath_parts[-1] == '':
129
                raise ValueError("path %r within branch %r seems to be a directory"
130
                                 % (relpath, self._path))
1185.11.6 by John Arbash Meinel
Made HttpTransport handle a request for a parent directory differently.
131
        basepath = self._path.split('/')
1185.11.8 by John Arbash Meinel
Fixed typo
132
        if len(basepath) > 0 and basepath[-1] == '':
1185.11.6 by John Arbash Meinel
Made HttpTransport handle a request for a parent directory differently.
133
            basepath = basepath[:-1]
1185.16.68 by Martin Pool
- http url fixes suggested by Robey Pointer, and tests
134
        for p in relpath_parts:
1185.11.6 by John Arbash Meinel
Made HttpTransport handle a request for a parent directory differently.
135
            if p == '..':
1185.16.68 by Martin Pool
- http url fixes suggested by Robey Pointer, and tests
136
                if len(basepath) == 0:
1185.11.7 by John Arbash Meinel
HttpTransport just returns root when parent is requested.
137
                    # In most filesystems, a request for the parent
138
                    # of root, just returns root.
139
                    continue
1185.16.68 by Martin Pool
- http url fixes suggested by Robey Pointer, and tests
140
                basepath.pop()
141
            elif p == '.' or p == '':
1185.11.6 by John Arbash Meinel
Made HttpTransport handle a request for a parent directory differently.
142
                continue # No-op
143
            else:
144
                basepath.append(p)
145
        # Possibly, we could use urlparse.urljoin() here, but
146
        # I'm concerned about when it chooses to strip the last
147
        # portion of the path, and when it doesn't.
148
        path = '/'.join(basepath)
1636.1.1 by Robert Collins
Fix calling relpath() and abspath() on transports at their root.
149
        if path == '':
150
            path = '/'
151
        result = urlparse.urlunparse((self._qualified_proto,
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
152
                                    self._host, path, '', '', ''))
1636.1.1 by Robert Collins
Fix calling relpath() and abspath() on transports at their root.
153
        return result
907.1.24 by John Arbash Meinel
Remote functionality work.
154
1540.3.25 by Martin Pool
New 'http+urllib' scheme
155
    def _real_abspath(self, relpath):
156
        """Produce absolute path, adjusting protocol if needed"""
157
        abspath = self.abspath(relpath)
158
        qp = self._qualified_proto
159
        rp = self._proto
160
        if self._qualified_proto != self._proto:
161
            abspath = rp + abspath[len(qp):]
162
        if not isinstance(abspath, str):
163
            # escaping must be done at a higher level
164
            abspath = abspath.encode('ascii')
165
        return abspath
166
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
167
    def has(self, relpath):
1540.3.15 by Martin Pool
[merge] large merge to sync with bzr.dev
168
        raise NotImplementedError("has() is abstract on %r" % self)
169
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
170
    def get(self, relpath):
1594.2.5 by Robert Collins
Readv patch from Johan Rydberg giving knits partial download support.
171
        """Get the file at the given relative path.
172
173
        :param relpath: The relative path to the file
174
        """
1540.3.27 by Martin Pool
Integrate http range support for pycurl
175
        code, response_file = self._get(relpath, None)
176
        return response_file
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
177
178
    def _get(self, relpath, ranges):
1540.3.27 by Martin Pool
Integrate http range support for pycurl
179
        """Get a file, or part of a file.
180
181
        :param relpath: Path relative to transport base URL
182
        :param byte_range: None to get the whole file;
183
            or [(start,end)] to fetch parts of a file.
184
185
        :returns: (http_code, result_file)
186
187
        Note that the current http implementations can only fetch one range at
188
        a time through this call.
189
        """
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
190
        raise NotImplementedError(self._get)
1594.2.5 by Robert Collins
Readv patch from Johan Rydberg giving knits partial download support.
191
192
    def readv(self, relpath, offsets):
193
        """Get parts of the file at the given relative path.
194
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
195
        :param offsets: A list of (offset, size) tuples.
1540.3.27 by Martin Pool
Integrate http range support for pycurl
196
        :param return: A list or generator of (offset, data) tuples
1594.2.5 by Robert Collins
Readv patch from Johan Rydberg giving knits partial download support.
197
        """
1540.3.27 by Martin Pool
Integrate http range support for pycurl
198
        # Ideally we would pass one big request asking for all the ranges in
199
        # one go; however then the server will give a multipart mime response
200
        # back, and we can't parse them yet.  So instead we just get one range
201
        # per region, and try to coallesce the regions as much as possible.
202
        #
203
        # The read-coallescing code is not quite regular enough to have a
204
        # single driver routine and
1594.3.4 by Robert Collins
Change urllib ranges implementation to be one coalesced range per http request.
205
        # helper method in Transport.
206
        def do_combined_read(combined_offsets):
207
            # read one coalesced block
208
            total_size = 0
209
            for offset, size in combined_offsets:
210
                total_size += size
211
            mutter('readv coalesced %d reads.', len(combined_offsets))
212
            offset = combined_offsets[0][0]
1540.3.27 by Martin Pool
Integrate http range support for pycurl
213
            byte_range = (offset, offset + total_size - 1)
214
            code, result_file = self._get(relpath, [byte_range])
215
            if code == 206:
1594.3.4 by Robert Collins
Change urllib ranges implementation to be one coalesced range per http request.
216
                for off, size in combined_offsets:
1540.3.27 by Martin Pool
Integrate http range support for pycurl
217
                    result_bytes = result_file.read(size)
218
                    assert len(result_bytes) == size
219
                    yield off, result_bytes
220
            elif code == 200:
221
                data = result_file.read(offset + total_size)[offset:offset + total_size]
1594.3.4 by Robert Collins
Change urllib ranges implementation to be one coalesced range per http request.
222
                pos = 0
223
                for offset, size in combined_offsets:
224
                    yield offset, data[pos:pos + size]
225
                    pos += size
226
                del data
1594.3.2 by Robert Collins
make trivial ranges work for HTTP really.
227
        if not len(offsets):
228
            return
1594.3.4 by Robert Collins
Change urllib ranges implementation to be one coalesced range per http request.
229
        pending_offsets = deque(offsets)
230
        combined_offsets = []
231
        while len(pending_offsets):
232
            offset, size = pending_offsets.popleft()
233
            if not combined_offsets:
234
                combined_offsets = [[offset, size]]
235
            else:
1596.2.9 by Robert Collins
Utf8 safety in knit indexes.
236
                if (len (combined_offsets) < 500 and
1594.3.4 by Robert Collins
Change urllib ranges implementation to be one coalesced range per http request.
237
                    combined_offsets[-1][0] + combined_offsets[-1][1] == offset):
238
                    # combatible offset:
239
                    combined_offsets.append([offset, size])
240
                else:
241
                    # incompatible, or over the threshold issue a read and yield
242
                    pending_offsets.appendleft((offset, size))
243
                    for result in do_combined_read(combined_offsets):
244
                        yield result
245
                    combined_offsets = []
246
        # whatever is left is a single coalesced request
247
        if len(combined_offsets):
248
            for result in do_combined_read(combined_offsets):
249
                yield result
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
250
1185.58.2 by John Arbash Meinel
Added mode to the appropriate transport functions, and tests to make sure they work.
251
    def put(self, relpath, f, mode=None):
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
252
        """Copy the file-like or string object into the location.
253
254
        :param relpath: Location to put the contents, relative to base.
255
        :param f:       File-like or string object.
256
        """
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
257
        raise TransportNotPossible('http PUT not supported')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
258
1185.58.2 by John Arbash Meinel
Added mode to the appropriate transport functions, and tests to make sure they work.
259
    def mkdir(self, relpath, mode=None):
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
260
        """Create a directory at the given path."""
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
261
        raise TransportNotPossible('http does not support mkdir()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
262
1534.4.15 by Robert Collins
Remove shutil dependency in upgrade - create a delete_tree method for transports.
263
    def rmdir(self, relpath):
264
        """See Transport.rmdir."""
265
        raise TransportNotPossible('http does not support rmdir()')
266
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
267
    def append(self, relpath, f):
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
268
        """Append the text in the file-like object into the final
269
        location.
270
        """
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
271
        raise TransportNotPossible('http does not support append()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
272
273
    def copy(self, rel_from, rel_to):
274
        """Copy the item at rel_from to the location at rel_to"""
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
275
        raise TransportNotPossible('http does not support copy()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
276
1185.58.2 by John Arbash Meinel
Added mode to the appropriate transport functions, and tests to make sure they work.
277
    def copy_to(self, relpaths, other, mode=None, pb=None):
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
278
        """Copy a set of entries from self into another Transport.
279
280
        :param relpaths: A list/generator of entries to be copied.
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
281
282
        TODO: if other is LocalTransport, is it possible to
283
              do better than put(get())?
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
284
        """
907.1.29 by John Arbash Meinel
Fixing small bug in HttpTransport.copy_to
285
        # At this point HttpTransport might be able to check and see if
286
        # the remote location is the same, and rather than download, and
287
        # then upload, it could just issue a remote copy_this command.
1540.3.6 by Martin Pool
[merge] update from bzr.dev
288
        if isinstance(other, HttpTransportBase):
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
289
            raise TransportNotPossible('http cannot be the target of copy_to()')
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
290
        else:
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
291
            return super(HttpTransportBase, self).\
292
                    copy_to(relpaths, other, mode=mode, pb=pb)
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
293
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
294
    def move(self, rel_from, rel_to):
295
        """Move the item at rel_from to the location at rel_to"""
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
296
        raise TransportNotPossible('http does not support move()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
297
298
    def delete(self, relpath):
299
        """Delete the item at relpath"""
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
300
        raise TransportNotPossible('http does not support delete()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
301
1530.1.3 by Robert Collins
transport implementations now tested consistently.
302
    def is_readonly(self):
303
        """See Transport.is_readonly."""
304
        return True
305
1400.1.1 by Robert Collins
implement a basic test for the ui branch command from http servers
306
    def listable(self):
307
        """See Transport.listable."""
308
        return False
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
309
310
    def stat(self, relpath):
311
        """Return the stat information for a file.
312
        """
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
313
        raise TransportNotPossible('http does not support stat()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
314
907.1.24 by John Arbash Meinel
Remote functionality work.
315
    def lock_read(self, relpath):
316
        """Lock the given file for shared (read) access.
317
        :return: A lock object, which should be passed to Transport.unlock()
318
        """
319
        # The old RemoteBranch ignore lock for reading, so we will
320
        # continue that tradition and return a bogus lock object.
321
        class BogusLock(object):
322
            def __init__(self, path):
323
                self.path = path
324
            def unlock(self):
325
                pass
326
        return BogusLock(relpath)
327
328
    def lock_write(self, relpath):
329
        """Lock the given file for exclusive (write) access.
330
        WARNING: many transports do not support this, so trying avoid using it
331
332
        :return: A lock object, which should be passed to Transport.unlock()
333
        """
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
334
        raise TransportNotPossible('http does not support lock_write()')
1530.1.1 by Robert Collins
Minimal infrastructure to test TransportTestProviderAdapter.
335
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
336
    def clone(self, offset=None):
337
        """Return a new HttpTransportBase with root at self.base + offset
338
        For now HttpTransportBase does not actually connect, so just return
339
        a new HttpTransportBase object.
340
        """
341
        if offset is None:
342
            return self.__class__(self.base)
343
        else:
344
            return self.__class__(self.abspath(offset))
1530.1.1 by Robert Collins
Minimal infrastructure to test TransportTestProviderAdapter.
345
1530.1.3 by Robert Collins
transport implementations now tested consistently.
346
#---------------- test server facilities ----------------
1540.3.6 by Martin Pool
[merge] update from bzr.dev
347
# TODO: load these only when running tests
1530.1.3 by Robert Collins
transport implementations now tested consistently.
348
1636.1.1 by Robert Collins
Fix calling relpath() and abspath() on transports at their root.
349
1530.1.3 by Robert Collins
transport implementations now tested consistently.
350
class WebserverNotAvailable(Exception):
351
    pass
352
353
354
class BadWebserverPath(ValueError):
355
    def __str__(self):
356
        return 'path %s is not in %s' % self.args
357
358
359
class TestingHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
360
361
    def log_message(self, format, *args):
1553.1.3 by James Henstridge
Make bzrlib.transport.http.HttpServer output referer and user agent as in
362
        self.server.test_case.log('webserver - %s - - [%s] %s "%s" "%s"',
1530.1.3 by Robert Collins
transport implementations now tested consistently.
363
                                  self.address_string(),
364
                                  self.log_date_time_string(),
1553.1.3 by James Henstridge
Make bzrlib.transport.http.HttpServer output referer and user agent as in
365
                                  format % args,
366
                                  self.headers.get('referer', '-'),
367
                                  self.headers.get('user-agent', '-'))
1530.1.3 by Robert Collins
transport implementations now tested consistently.
368
369
    def handle_one_request(self):
370
        """Handle a single HTTP request.
371
372
        You normally don't need to override this method; see the class
373
        __doc__ string for information on how to handle specific HTTP
374
        commands such as GET and POST.
375
376
        """
377
        for i in xrange(1,11): # Don't try more than 10 times
378
            try:
379
                self.raw_requestline = self.rfile.readline()
380
            except socket.error, e:
381
                if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):
382
                    # omitted for now because some tests look at the log of
383
                    # the server and expect to see no errors.  see recent
384
                    # email thread. -- mbp 20051021. 
385
                    ## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)
386
                    time.sleep(0.01)
387
                    continue
388
                raise
389
            else:
390
                break
391
        if not self.raw_requestline:
392
            self.close_connection = 1
393
            return
394
        if not self.parse_request(): # An error code has been sent, just exit
395
            return
396
        mname = 'do_' + self.command
397
        if not hasattr(self, mname):
398
            self.send_error(501, "Unsupported method (%r)" % self.command)
399
            return
400
        method = getattr(self, mname)
401
        method()
402
1711.4.15 by John Arbash Meinel
Only interpret HTTP paths as utf8 on win32
403
    if sys.platform == 'win32':
404
        # On win32 you cannot access non-ascii filenames without
405
        # decoding them into unicode first.
406
        # However, under Linux, you can access bytestream paths
407
        # without any problems. If this function was always active
408
        # it would probably break tests when LANG=C was set
409
        def translate_path(self, path):
410
            """Translate a /-separated PATH to the local filename syntax.
1711.4.14 by John Arbash Meinel
Custom HttpRequestHandler which treats all paths as utf8 encoded
411
1711.4.15 by John Arbash Meinel
Only interpret HTTP paths as utf8 on win32
412
            For bzr, all url paths are considered to be utf8 paths.
413
            On Linux, you can access these paths directly over the bytestream
414
            request, but on win32, you must decode them, and access them
415
            as Unicode files.
416
            """
417
            # abandon query parameters
418
            path = urlparse.urlparse(path)[2]
419
            path = posixpath.normpath(urllib.unquote(path))
420
            path = path.decode('utf-8')
421
            words = path.split('/')
422
            words = filter(None, words)
423
            path = os.getcwdu()
424
            for word in words:
425
                drive, word = os.path.splitdrive(word)
426
                head, word = os.path.split(word)
427
                if word in (os.curdir, os.pardir): continue
428
                path = os.path.join(path, word)
429
            return path
1711.4.14 by John Arbash Meinel
Custom HttpRequestHandler which treats all paths as utf8 encoded
430
1185.50.83 by John Arbash Meinel
[merge] James Henstridge: Set Agent string in http headers, add tests for it.
431
1530.1.3 by Robert Collins
transport implementations now tested consistently.
432
class TestingHTTPServer(BaseHTTPServer.HTTPServer):
433
    def __init__(self, server_address, RequestHandlerClass, test_case):
434
        BaseHTTPServer.HTTPServer.__init__(self, server_address,
435
                                                RequestHandlerClass)
436
        self.test_case = test_case
437
438
class HttpServer(Server):
1530.1.1 by Robert Collins
Minimal infrastructure to test TransportTestProviderAdapter.
439
    """A test server for http transports."""
1530.1.3 by Robert Collins
transport implementations now tested consistently.
440
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
441
    # used to form the url that connects to this server
442
    _url_protocol = 'http'
443
1530.1.3 by Robert Collins
transport implementations now tested consistently.
444
    def _http_start(self):
445
        httpd = None
1553.1.3 by James Henstridge
Make bzrlib.transport.http.HttpServer output referer and user agent as in
446
        httpd = TestingHTTPServer(('localhost', 0),
447
                                  TestingHTTPRequestHandler,
448
                                  self)
449
        host, port = httpd.socket.getsockname()
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
450
        self._http_base_url = '%s://localhost:%s/' % (self._url_protocol, port)
1530.1.3 by Robert Collins
transport implementations now tested consistently.
451
        self._http_starting.release()
452
        httpd.socket.settimeout(0.1)
453
454
        while self._http_running:
455
            try:
456
                httpd.handle_request()
457
            except socket.timeout:
458
                pass
459
460
    def _get_remote_url(self, path):
461
        path_parts = path.split(os.path.sep)
462
        if os.path.isabs(path):
463
            if path_parts[:len(self._local_path_parts)] != \
464
                   self._local_path_parts:
465
                raise BadWebserverPath(path, self.test_dir)
466
            remote_path = '/'.join(path_parts[len(self._local_path_parts):])
467
        else:
468
            remote_path = '/'.join(path_parts)
469
470
        self._http_starting.acquire()
471
        self._http_starting.release()
472
        return self._http_base_url + remote_path
473
1553.1.3 by James Henstridge
Make bzrlib.transport.http.HttpServer output referer and user agent as in
474
    def log(self, format, *args):
1530.1.3 by Robert Collins
transport implementations now tested consistently.
475
        """Capture Server log output."""
1553.1.3 by James Henstridge
Make bzrlib.transport.http.HttpServer output referer and user agent as in
476
        self.logs.append(format % args)
1530.1.3 by Robert Collins
transport implementations now tested consistently.
477
478
    def setUp(self):
479
        """See bzrlib.transport.Server.setUp."""
480
        self._home_dir = os.getcwdu()
481
        self._local_path_parts = self._home_dir.split(os.path.sep)
482
        self._http_starting = threading.Lock()
483
        self._http_starting.acquire()
484
        self._http_running = True
485
        self._http_base_url = None
486
        self._http_thread = threading.Thread(target=self._http_start)
487
        self._http_thread.setDaemon(True)
488
        self._http_thread.start()
489
        self._http_proxy = os.environ.get("http_proxy")
490
        if self._http_proxy is not None:
491
            del os.environ["http_proxy"]
1530.1.18 by Robert Collins
unbreak test_fetch
492
        self.logs = []
1530.1.3 by Robert Collins
transport implementations now tested consistently.
493
494
    def tearDown(self):
495
        """See bzrlib.transport.Server.tearDown."""
496
        self._http_running = False
497
        self._http_thread.join()
498
        if self._http_proxy is not None:
499
            import os
500
            os.environ["http_proxy"] = self._http_proxy
501
502
    def get_url(self):
503
        """See bzrlib.transport.Server.get_url."""
504
        return self._get_remote_url(self._home_dir)
1530.1.9 by Robert Collins
Test bogus urls with http in the new infrastructure.
505
        
506
    def get_bogus_url(self):
507
        """See bzrlib.transport.Server.get_bogus_url."""
1540.3.30 by Martin Pool
Fix up bogus-url tests for broken dns servers, and error imports
508
        # this is chosen to try to prevent trouble with proxies, wierd dns,
509
        # etc
510
        return 'http://127.0.0.1:1/'
1530.1.9 by Robert Collins
Test bogus urls with http in the new infrastructure.
511