~bzr-pqm/bzr/bzr.dev

1540.3.3 by Martin Pool
Review updates of pycurl transport
1
# Copyright (C) 2005, 2006 Canonical Ltd
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
2
#
1185.11.19 by John Arbash Meinel
Testing put and append, also testing agaist file-like objects as well as strings.
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
7
#
1185.11.19 by John Arbash Meinel
Testing put and append, also testing agaist file-like objects as well as strings.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
12
#
1185.11.19 by John Arbash Meinel
Testing put and append, also testing agaist file-like objects as well as strings.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
1540.3.3 by Martin Pool
Review updates of pycurl transport
16
17
"""Base implementation of Transport over http.
18
19
There are separate implementation modules for each http client implementation.
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
20
"""
21
1540.3.3 by Martin Pool
Review updates of pycurl transport
22
import errno
1540.3.6 by Martin Pool
[merge] update from bzr.dev
23
import os
1594.3.4 by Robert Collins
Change urllib ranges implementation to be one coalesced range per http request.
24
from collections import deque
1540.3.3 by Martin Pool
Review updates of pycurl transport
25
from cStringIO import StringIO
1540.3.23 by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl
26
import re
1540.3.3 by Martin Pool
Review updates of pycurl transport
27
import urlparse
28
import urllib
1530.1.11 by Robert Collins
Push the transport permutations list into each transport module allowing for automatic testing of new modules that are registered as transports.
29
from warnings import warn
1540.3.3 by Martin Pool
Review updates of pycurl transport
30
1540.3.6 by Martin Pool
[merge] update from bzr.dev
31
from bzrlib.transport import Transport, register_transport, Server
32
from bzrlib.errors import (TransportNotPossible, NoSuchFile,
1185.31.44 by John Arbash Meinel
Cleaned up Exceptions for all transports.
33
                           TransportError, ConnectionError)
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
34
from bzrlib.errors import BzrError, BzrCheckError
1393.2.3 by John Arbash Meinel
Fixing typos, updating stores, getting tests to pass.
35
from bzrlib.branch import Branch
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
36
from bzrlib.trace import mutter
1540.3.6 by Martin Pool
[merge] update from bzr.dev
37
# TODO: load these only when running http tests
38
import BaseHTTPServer, SimpleHTTPServer, socket, time
39
import threading
1540.2.1 by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary
40
from bzrlib.ui import ui_factory
1540.3.6 by Martin Pool
[merge] update from bzr.dev
41
907.1.57 by John Arbash Meinel
Trying to get pipelined http library working + tests.
42
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
43
def extract_auth(url, password_manager):
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
44
    """Extract auth parameters from am HTTP/HTTPS url and add them to the given
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
45
    password manager.  Return the url, minus those auth parameters (which
46
    confuse urllib2).
47
    """
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
48
    assert re.match(r'^(https?)(\+\w+)?://', url), \
49
            'invalid absolute url %r' % url
1540.2.1 by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary
50
    scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
51
    
52
    if '@' in netloc:
53
        auth, netloc = netloc.split('@', 1)
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
54
        if ':' in auth:
55
            username, password = auth.split(':', 1)
56
        else:
57
            username, password = auth, None
1540.2.1 by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary
58
        if ':' in netloc:
59
            host = netloc.split(':', 1)[0]
60
        else:
61
            host = netloc
62
        username = urllib.unquote(username)
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
63
        if password is not None:
64
            password = urllib.unquote(password)
1540.2.1 by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary
65
        else:
66
            password = ui_factory.get_password(prompt='HTTP %(user)@%(host) password',
67
                                               user=username, host=host)
68
        password_manager.add_password(None, host, username, password)
69
    url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
70
    return url
1553.1.5 by James Henstridge
Make HTTP transport has() method do HEAD requests, and update test to
71
1185.50.83 by John Arbash Meinel
[merge] James Henstridge: Set Agent string in http headers, add tests for it.
72
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
73
class HttpTransportBase(Transport):
74
    """Base class for http implementations.
75
1540.3.23 by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl
76
    Does URL parsing, etc, but not any network IO.
77
78
    The protocol can be given as e.g. http+urllib://host/ to use a particular
79
    implementation.
80
    """
81
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
82
    # _proto: "http" or "https"
83
    # _qualified_proto: may have "+pycurl", etc
84
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
85
    def __init__(self, base):
86
        """Set the base path where files will be stored."""
1540.3.23 by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl
87
        proto_match = re.match(r'^(https?)(\+\w+)?://', base)
88
        if not proto_match:
89
            raise AssertionError("not a http url: %r" % base)
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
90
        self._proto = proto_match.group(1)
91
        impl_name = proto_match.group(2)
1540.3.23 by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl
92
        if impl_name:
93
            impl_name = impl_name[1:]
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
94
        self._impl_name = impl_name
1530.1.3 by Robert Collins
transport implementations now tested consistently.
95
        if base[-1] != '/':
96
            base = base + '/'
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
97
        super(HttpTransportBase, self).__init__(base)
907.1.57 by John Arbash Meinel
Trying to get pipelined http library working + tests.
98
        # In the future we might actually connect to the remote host
99
        # rather than using get_url
100
        # self._connection = None
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
101
        (apparent_proto, self._host,
1185.11.6 by John Arbash Meinel
Made HttpTransport handle a request for a parent directory differently.
102
            self._path, self._parameters,
103
            self._query, self._fragment) = urlparse.urlparse(self.base)
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
104
        self._qualified_proto = apparent_proto
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
105
106
    def abspath(self, relpath):
107
        """Return the full url to the given relative path.
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
108
109
        This can be supplied with a string or a list.
110
1540.3.25 by Martin Pool
New 'http+urllib' scheme
111
        The URL returned always has the protocol scheme originally used to 
112
        construct the transport, even if that includes an explicit
113
        implementation qualifier.
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
114
        """
1469 by Robert Collins
Change Transport.* to work with URL's.
115
        assert isinstance(relpath, basestring)
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
116
        if isinstance(relpath, basestring):
1185.16.68 by Martin Pool
- http url fixes suggested by Robey Pointer, and tests
117
            relpath_parts = relpath.split('/')
118
        else:
119
            # TODO: Don't call this with an array - no magic interfaces
120
            relpath_parts = relpath[:]
121
        if len(relpath_parts) > 1:
122
            if relpath_parts[0] == '':
123
                raise ValueError("path %r within branch %r seems to be absolute"
124
                                 % (relpath, self._path))
125
            if relpath_parts[-1] == '':
126
                raise ValueError("path %r within branch %r seems to be a directory"
127
                                 % (relpath, self._path))
1185.11.6 by John Arbash Meinel
Made HttpTransport handle a request for a parent directory differently.
128
        basepath = self._path.split('/')
1185.11.8 by John Arbash Meinel
Fixed typo
129
        if len(basepath) > 0 and basepath[-1] == '':
1185.11.6 by John Arbash Meinel
Made HttpTransport handle a request for a parent directory differently.
130
            basepath = basepath[:-1]
1185.16.68 by Martin Pool
- http url fixes suggested by Robey Pointer, and tests
131
        for p in relpath_parts:
1185.11.6 by John Arbash Meinel
Made HttpTransport handle a request for a parent directory differently.
132
            if p == '..':
1185.16.68 by Martin Pool
- http url fixes suggested by Robey Pointer, and tests
133
                if len(basepath) == 0:
1185.11.7 by John Arbash Meinel
HttpTransport just returns root when parent is requested.
134
                    # In most filesystems, a request for the parent
135
                    # of root, just returns root.
136
                    continue
1185.16.68 by Martin Pool
- http url fixes suggested by Robey Pointer, and tests
137
                basepath.pop()
138
            elif p == '.' or p == '':
1185.11.6 by John Arbash Meinel
Made HttpTransport handle a request for a parent directory differently.
139
                continue # No-op
140
            else:
141
                basepath.append(p)
142
        # Possibly, we could use urlparse.urljoin() here, but
143
        # I'm concerned about when it chooses to strip the last
144
        # portion of the path, and when it doesn't.
145
        path = '/'.join(basepath)
1636.1.1 by Robert Collins
Fix calling relpath() and abspath() on transports at their root.
146
        if path == '':
147
            path = '/'
148
        result = urlparse.urlunparse((self._qualified_proto,
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
149
                                    self._host, path, '', '', ''))
1636.1.1 by Robert Collins
Fix calling relpath() and abspath() on transports at their root.
150
        return result
907.1.24 by John Arbash Meinel
Remote functionality work.
151
1540.3.25 by Martin Pool
New 'http+urllib' scheme
152
    def _real_abspath(self, relpath):
153
        """Produce absolute path, adjusting protocol if needed"""
154
        abspath = self.abspath(relpath)
155
        qp = self._qualified_proto
156
        rp = self._proto
157
        if self._qualified_proto != self._proto:
158
            abspath = rp + abspath[len(qp):]
159
        if not isinstance(abspath, str):
160
            # escaping must be done at a higher level
161
            abspath = abspath.encode('ascii')
162
        return abspath
163
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
164
    def has(self, relpath):
1540.3.15 by Martin Pool
[merge] large merge to sync with bzr.dev
165
        raise NotImplementedError("has() is abstract on %r" % self)
166
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
167
    def get(self, relpath):
1594.2.5 by Robert Collins
Readv patch from Johan Rydberg giving knits partial download support.
168
        """Get the file at the given relative path.
169
170
        :param relpath: The relative path to the file
171
        """
1540.3.27 by Martin Pool
Integrate http range support for pycurl
172
        code, response_file = self._get(relpath, None)
173
        return response_file
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
174
175
    def _get(self, relpath, ranges):
1540.3.27 by Martin Pool
Integrate http range support for pycurl
176
        """Get a file, or part of a file.
177
178
        :param relpath: Path relative to transport base URL
179
        :param byte_range: None to get the whole file;
180
            or [(start,end)] to fetch parts of a file.
181
182
        :returns: (http_code, result_file)
183
184
        Note that the current http implementations can only fetch one range at
185
        a time through this call.
186
        """
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
187
        raise NotImplementedError(self._get)
1594.2.5 by Robert Collins
Readv patch from Johan Rydberg giving knits partial download support.
188
189
    def readv(self, relpath, offsets):
190
        """Get parts of the file at the given relative path.
191
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
192
        :param offsets: A list of (offset, size) tuples.
1540.3.27 by Martin Pool
Integrate http range support for pycurl
193
        :param return: A list or generator of (offset, data) tuples
1594.2.5 by Robert Collins
Readv patch from Johan Rydberg giving knits partial download support.
194
        """
1540.3.27 by Martin Pool
Integrate http range support for pycurl
195
        # Ideally we would pass one big request asking for all the ranges in
196
        # one go; however then the server will give a multipart mime response
197
        # back, and we can't parse them yet.  So instead we just get one range
198
        # per region, and try to coallesce the regions as much as possible.
199
        #
200
        # The read-coallescing code is not quite regular enough to have a
201
        # single driver routine and
1594.3.4 by Robert Collins
Change urllib ranges implementation to be one coalesced range per http request.
202
        # helper method in Transport.
203
        def do_combined_read(combined_offsets):
204
            # read one coalesced block
205
            total_size = 0
206
            for offset, size in combined_offsets:
207
                total_size += size
208
            mutter('readv coalesced %d reads.', len(combined_offsets))
209
            offset = combined_offsets[0][0]
1540.3.27 by Martin Pool
Integrate http range support for pycurl
210
            byte_range = (offset, offset + total_size - 1)
211
            code, result_file = self._get(relpath, [byte_range])
212
            if code == 206:
1594.3.4 by Robert Collins
Change urllib ranges implementation to be one coalesced range per http request.
213
                for off, size in combined_offsets:
1540.3.27 by Martin Pool
Integrate http range support for pycurl
214
                    result_bytes = result_file.read(size)
215
                    assert len(result_bytes) == size
216
                    yield off, result_bytes
217
            elif code == 200:
218
                data = result_file.read(offset + total_size)[offset:offset + total_size]
1594.3.4 by Robert Collins
Change urllib ranges implementation to be one coalesced range per http request.
219
                pos = 0
220
                for offset, size in combined_offsets:
221
                    yield offset, data[pos:pos + size]
222
                    pos += size
223
                del data
1594.3.2 by Robert Collins
make trivial ranges work for HTTP really.
224
        if not len(offsets):
225
            return
1594.3.4 by Robert Collins
Change urllib ranges implementation to be one coalesced range per http request.
226
        pending_offsets = deque(offsets)
227
        combined_offsets = []
228
        while len(pending_offsets):
229
            offset, size = pending_offsets.popleft()
230
            if not combined_offsets:
231
                combined_offsets = [[offset, size]]
232
            else:
1596.2.9 by Robert Collins
Utf8 safety in knit indexes.
233
                if (len (combined_offsets) < 500 and
1594.3.4 by Robert Collins
Change urllib ranges implementation to be one coalesced range per http request.
234
                    combined_offsets[-1][0] + combined_offsets[-1][1] == offset):
235
                    # combatible offset:
236
                    combined_offsets.append([offset, size])
237
                else:
238
                    # incompatible, or over the threshold issue a read and yield
239
                    pending_offsets.appendleft((offset, size))
240
                    for result in do_combined_read(combined_offsets):
241
                        yield result
242
                    combined_offsets = []
243
        # whatever is left is a single coalesced request
244
        if len(combined_offsets):
245
            for result in do_combined_read(combined_offsets):
246
                yield result
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
247
1185.58.2 by John Arbash Meinel
Added mode to the appropriate transport functions, and tests to make sure they work.
248
    def put(self, relpath, f, mode=None):
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
249
        """Copy the file-like or string object into the location.
250
251
        :param relpath: Location to put the contents, relative to base.
252
        :param f:       File-like or string object.
253
        """
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
254
        raise TransportNotPossible('http PUT not supported')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
255
1185.58.2 by John Arbash Meinel
Added mode to the appropriate transport functions, and tests to make sure they work.
256
    def mkdir(self, relpath, mode=None):
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
257
        """Create a directory at the given path."""
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
258
        raise TransportNotPossible('http does not support mkdir()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
259
1534.4.15 by Robert Collins
Remove shutil dependency in upgrade - create a delete_tree method for transports.
260
    def rmdir(self, relpath):
261
        """See Transport.rmdir."""
262
        raise TransportNotPossible('http does not support rmdir()')
263
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
264
    def append(self, relpath, f):
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
265
        """Append the text in the file-like object into the final
266
        location.
267
        """
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
268
        raise TransportNotPossible('http does not support append()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
269
270
    def copy(self, rel_from, rel_to):
271
        """Copy the item at rel_from to the location at rel_to"""
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
272
        raise TransportNotPossible('http does not support copy()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
273
1185.58.2 by John Arbash Meinel
Added mode to the appropriate transport functions, and tests to make sure they work.
274
    def copy_to(self, relpaths, other, mode=None, pb=None):
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
275
        """Copy a set of entries from self into another Transport.
276
277
        :param relpaths: A list/generator of entries to be copied.
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
278
279
        TODO: if other is LocalTransport, is it possible to
280
              do better than put(get())?
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
281
        """
907.1.29 by John Arbash Meinel
Fixing small bug in HttpTransport.copy_to
282
        # At this point HttpTransport might be able to check and see if
283
        # the remote location is the same, and rather than download, and
284
        # then upload, it could just issue a remote copy_this command.
1540.3.6 by Martin Pool
[merge] update from bzr.dev
285
        if isinstance(other, HttpTransportBase):
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
286
            raise TransportNotPossible('http cannot be the target of copy_to()')
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
287
        else:
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
288
            return super(HttpTransportBase, self).\
289
                    copy_to(relpaths, other, mode=mode, pb=pb)
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
290
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
291
    def move(self, rel_from, rel_to):
292
        """Move the item at rel_from to the location at rel_to"""
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
293
        raise TransportNotPossible('http does not support move()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
294
295
    def delete(self, relpath):
296
        """Delete the item at relpath"""
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
297
        raise TransportNotPossible('http does not support delete()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
298
1530.1.3 by Robert Collins
transport implementations now tested consistently.
299
    def is_readonly(self):
300
        """See Transport.is_readonly."""
301
        return True
302
1400.1.1 by Robert Collins
implement a basic test for the ui branch command from http servers
303
    def listable(self):
304
        """See Transport.listable."""
305
        return False
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
306
307
    def stat(self, relpath):
308
        """Return the stat information for a file.
309
        """
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
310
        raise TransportNotPossible('http does not support stat()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
311
907.1.24 by John Arbash Meinel
Remote functionality work.
312
    def lock_read(self, relpath):
313
        """Lock the given file for shared (read) access.
314
        :return: A lock object, which should be passed to Transport.unlock()
315
        """
316
        # The old RemoteBranch ignore lock for reading, so we will
317
        # continue that tradition and return a bogus lock object.
318
        class BogusLock(object):
319
            def __init__(self, path):
320
                self.path = path
321
            def unlock(self):
322
                pass
323
        return BogusLock(relpath)
324
325
    def lock_write(self, relpath):
326
        """Lock the given file for exclusive (write) access.
327
        WARNING: many transports do not support this, so trying avoid using it
328
329
        :return: A lock object, which should be passed to Transport.unlock()
330
        """
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
331
        raise TransportNotPossible('http does not support lock_write()')
1530.1.1 by Robert Collins
Minimal infrastructure to test TransportTestProviderAdapter.
332
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
333
    def clone(self, offset=None):
334
        """Return a new HttpTransportBase with root at self.base + offset
335
        For now HttpTransportBase does not actually connect, so just return
336
        a new HttpTransportBase object.
337
        """
338
        if offset is None:
339
            return self.__class__(self.base)
340
        else:
341
            return self.__class__(self.abspath(offset))
1530.1.1 by Robert Collins
Minimal infrastructure to test TransportTestProviderAdapter.
342
1530.1.3 by Robert Collins
transport implementations now tested consistently.
343
#---------------- test server facilities ----------------
1540.3.6 by Martin Pool
[merge] update from bzr.dev
344
# TODO: load these only when running tests
1530.1.3 by Robert Collins
transport implementations now tested consistently.
345
1636.1.1 by Robert Collins
Fix calling relpath() and abspath() on transports at their root.
346
1530.1.3 by Robert Collins
transport implementations now tested consistently.
347
class WebserverNotAvailable(Exception):
348
    pass
349
350
351
class BadWebserverPath(ValueError):
352
    def __str__(self):
353
        return 'path %s is not in %s' % self.args
354
355
356
class TestingHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
357
358
    def log_message(self, format, *args):
1553.1.3 by James Henstridge
Make bzrlib.transport.http.HttpServer output referer and user agent as in
359
        self.server.test_case.log('webserver - %s - - [%s] %s "%s" "%s"',
1530.1.3 by Robert Collins
transport implementations now tested consistently.
360
                                  self.address_string(),
361
                                  self.log_date_time_string(),
1553.1.3 by James Henstridge
Make bzrlib.transport.http.HttpServer output referer and user agent as in
362
                                  format % args,
363
                                  self.headers.get('referer', '-'),
364
                                  self.headers.get('user-agent', '-'))
1530.1.3 by Robert Collins
transport implementations now tested consistently.
365
366
    def handle_one_request(self):
367
        """Handle a single HTTP request.
368
369
        You normally don't need to override this method; see the class
370
        __doc__ string for information on how to handle specific HTTP
371
        commands such as GET and POST.
372
373
        """
374
        for i in xrange(1,11): # Don't try more than 10 times
375
            try:
376
                self.raw_requestline = self.rfile.readline()
377
            except socket.error, e:
378
                if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):
379
                    # omitted for now because some tests look at the log of
380
                    # the server and expect to see no errors.  see recent
381
                    # email thread. -- mbp 20051021. 
382
                    ## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)
383
                    time.sleep(0.01)
384
                    continue
385
                raise
386
            else:
387
                break
388
        if not self.raw_requestline:
389
            self.close_connection = 1
390
            return
391
        if not self.parse_request(): # An error code has been sent, just exit
392
            return
393
        mname = 'do_' + self.command
394
        if not hasattr(self, mname):
395
            self.send_error(501, "Unsupported method (%r)" % self.command)
396
            return
397
        method = getattr(self, mname)
398
        method()
399
1185.50.83 by John Arbash Meinel
[merge] James Henstridge: Set Agent string in http headers, add tests for it.
400
1530.1.3 by Robert Collins
transport implementations now tested consistently.
401
class TestingHTTPServer(BaseHTTPServer.HTTPServer):
402
    def __init__(self, server_address, RequestHandlerClass, test_case):
403
        BaseHTTPServer.HTTPServer.__init__(self, server_address,
404
                                                RequestHandlerClass)
405
        self.test_case = test_case
406
407
class HttpServer(Server):
1530.1.1 by Robert Collins
Minimal infrastructure to test TransportTestProviderAdapter.
408
    """A test server for http transports."""
1530.1.3 by Robert Collins
transport implementations now tested consistently.
409
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
410
    # used to form the url that connects to this server
411
    _url_protocol = 'http'
412
1530.1.3 by Robert Collins
transport implementations now tested consistently.
413
    def _http_start(self):
414
        httpd = None
1553.1.3 by James Henstridge
Make bzrlib.transport.http.HttpServer output referer and user agent as in
415
        httpd = TestingHTTPServer(('localhost', 0),
416
                                  TestingHTTPRequestHandler,
417
                                  self)
418
        host, port = httpd.socket.getsockname()
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
419
        self._http_base_url = '%s://localhost:%s/' % (self._url_protocol, port)
1530.1.3 by Robert Collins
transport implementations now tested consistently.
420
        self._http_starting.release()
421
        httpd.socket.settimeout(0.1)
422
423
        while self._http_running:
424
            try:
425
                httpd.handle_request()
426
            except socket.timeout:
427
                pass
428
429
    def _get_remote_url(self, path):
430
        path_parts = path.split(os.path.sep)
431
        if os.path.isabs(path):
432
            if path_parts[:len(self._local_path_parts)] != \
433
                   self._local_path_parts:
434
                raise BadWebserverPath(path, self.test_dir)
435
            remote_path = '/'.join(path_parts[len(self._local_path_parts):])
436
        else:
437
            remote_path = '/'.join(path_parts)
438
439
        self._http_starting.acquire()
440
        self._http_starting.release()
441
        return self._http_base_url + remote_path
442
1553.1.3 by James Henstridge
Make bzrlib.transport.http.HttpServer output referer and user agent as in
443
    def log(self, format, *args):
1530.1.3 by Robert Collins
transport implementations now tested consistently.
444
        """Capture Server log output."""
1553.1.3 by James Henstridge
Make bzrlib.transport.http.HttpServer output referer and user agent as in
445
        self.logs.append(format % args)
1530.1.3 by Robert Collins
transport implementations now tested consistently.
446
447
    def setUp(self):
448
        """See bzrlib.transport.Server.setUp."""
449
        self._home_dir = os.getcwdu()
450
        self._local_path_parts = self._home_dir.split(os.path.sep)
451
        self._http_starting = threading.Lock()
452
        self._http_starting.acquire()
453
        self._http_running = True
454
        self._http_base_url = None
455
        self._http_thread = threading.Thread(target=self._http_start)
456
        self._http_thread.setDaemon(True)
457
        self._http_thread.start()
458
        self._http_proxy = os.environ.get("http_proxy")
459
        if self._http_proxy is not None:
460
            del os.environ["http_proxy"]
1530.1.18 by Robert Collins
unbreak test_fetch
461
        self.logs = []
1530.1.3 by Robert Collins
transport implementations now tested consistently.
462
463
    def tearDown(self):
464
        """See bzrlib.transport.Server.tearDown."""
465
        self._http_running = False
466
        self._http_thread.join()
467
        if self._http_proxy is not None:
468
            import os
469
            os.environ["http_proxy"] = self._http_proxy
470
471
    def get_url(self):
472
        """See bzrlib.transport.Server.get_url."""
473
        return self._get_remote_url(self._home_dir)
1530.1.9 by Robert Collins
Test bogus urls with http in the new infrastructure.
474
        
475
    def get_bogus_url(self):
476
        """See bzrlib.transport.Server.get_bogus_url."""
1540.3.30 by Martin Pool
Fix up bogus-url tests for broken dns servers, and error imports
477
        # this is chosen to try to prevent trouble with proxies, wierd dns,
478
        # etc
479
        return 'http://127.0.0.1:1/'
1530.1.9 by Robert Collins
Test bogus urls with http in the new infrastructure.
480