~bzr-pqm/bzr/bzr.dev

1540.3.3 by Martin Pool
Review updates of pycurl transport
1
# Copyright (C) 2005, 2006 Canonical Ltd
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
2
#
1185.11.19 by John Arbash Meinel
Testing put and append, also testing agaist file-like objects as well as strings.
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
7
#
1185.11.19 by John Arbash Meinel
Testing put and append, also testing agaist file-like objects as well as strings.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
12
#
1185.11.19 by John Arbash Meinel
Testing put and append, also testing agaist file-like objects as well as strings.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
1540.3.3 by Martin Pool
Review updates of pycurl transport
16
17
"""Base implementation of Transport over http.
18
19
There are separate implementation modules for each http client implementation.
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
20
"""
21
1711.4.14 by John Arbash Meinel
Custom HttpRequestHandler which treats all paths as utf8 encoded
22
from cStringIO import StringIO
1540.3.3 by Martin Pool
Review updates of pycurl transport
23
import errno
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
24
import mimetools
1540.3.6 by Martin Pool
[merge] update from bzr.dev
25
import os
1711.4.14 by John Arbash Meinel
Custom HttpRequestHandler which treats all paths as utf8 encoded
26
import posixpath
1540.3.23 by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl
27
import re
1711.4.15 by John Arbash Meinel
Only interpret HTTP paths as utf8 on win32
28
import sys
1540.3.3 by Martin Pool
Review updates of pycurl transport
29
import urlparse
30
import urllib
1530.1.11 by Robert Collins
Push the transport permutations list into each transport module allowing for automatic testing of new modules that are registered as transports.
31
from warnings import warn
1540.3.3 by Martin Pool
Review updates of pycurl transport
32
1786.1.6 by John Arbash Meinel
Missed a couple of imports
33
# TODO: load these only when running http tests
34
import BaseHTTPServer, SimpleHTTPServer, socket, time
35
import threading
36
1786.1.42 by John Arbash Meinel
Update _extract_headers, make it less generic, and non recursive.
37
from bzrlib import errors
1540.3.6 by Martin Pool
[merge] update from bzr.dev
38
from bzrlib.errors import (TransportNotPossible, NoSuchFile,
1685.1.8 by John Arbash Meinel
Re-allow a couple more tests, fix a bug in http, non_ascii tests still fail.
39
                           TransportError, ConnectionError, InvalidURL)
1393.2.3 by John Arbash Meinel
Fixing typos, updating stores, getting tests to pass.
40
from bzrlib.branch import Branch
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
41
from bzrlib.trace import mutter
2018.2.2 by Andrew Bennetts
Implement HTTP smart server.
42
from bzrlib.transport import (
43
    get_transport,
44
    register_transport,
45
    Server,
46
    smart,
47
    Transport,
48
    )
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
49
from bzrlib.transport.http.response import (HttpMultipartRangeResponse,
50
                                            HttpRangeResponse)
1540.2.1 by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary
51
from bzrlib.ui import ui_factory
1540.3.6 by Martin Pool
[merge] update from bzr.dev
52
907.1.57 by John Arbash Meinel
Trying to get pipelined http library working + tests.
53
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
54
def extract_auth(url, password_manager):
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
55
    """Extract auth parameters from am HTTP/HTTPS url and add them to the given
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
56
    password manager.  Return the url, minus those auth parameters (which
57
    confuse urllib2).
58
    """
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
59
    assert re.match(r'^(https?)(\+\w+)?://', url), \
60
            'invalid absolute url %r' % url
1540.2.1 by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary
61
    scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
62
    
63
    if '@' in netloc:
64
        auth, netloc = netloc.split('@', 1)
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
65
        if ':' in auth:
66
            username, password = auth.split(':', 1)
67
        else:
68
            username, password = auth, None
1540.2.1 by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary
69
        if ':' in netloc:
70
            host = netloc.split(':', 1)[0]
71
        else:
72
            host = netloc
73
        username = urllib.unquote(username)
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
74
        if password is not None:
75
            password = urllib.unquote(password)
1540.2.1 by Röbey Pointer
change http url parsing to use urlparse, and use the ui_factory to ask for a password if necessary
76
        else:
77
            password = ui_factory.get_password(prompt='HTTP %(user)@%(host) password',
78
                                               user=username, host=host)
79
        password_manager.add_password(None, host, username, password)
80
    url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))
1185.40.20 by Robey Pointer
allow user:pass@ info in http urls to be used for auth; this should be easily expandable later to use auth config files
81
    return url
1553.1.5 by James Henstridge
Make HTTP transport has() method do HEAD requests, and update test to
82
1185.50.83 by John Arbash Meinel
[merge] James Henstridge: Set Agent string in http headers, add tests for it.
83
1786.1.42 by John Arbash Meinel
Update _extract_headers, make it less generic, and non recursive.
84
def _extract_headers(header_text, url):
85
    """Extract the mapping for an rfc2822 header
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
86
1786.1.42 by John Arbash Meinel
Update _extract_headers, make it less generic, and non recursive.
87
    This is a helper function for the test suite and for _pycurl.
1786.1.32 by John Arbash Meinel
cleanup pass, allow pycurl connections to be shared between transports.
88
    (urllib already parses the headers for us)
89
1786.1.42 by John Arbash Meinel
Update _extract_headers, make it less generic, and non recursive.
90
    In the case that there are multiple headers inside the file,
91
    the last one is returned.
92
93
    :param header_text: A string of header information.
94
        This expects that the first line of a header will always be HTTP ...
95
    :param url: The url we are parsing, so we can raise nice errors
96
    :return: mimetools.Message object, which basically acts like a case 
97
        insensitive dictionary.
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
98
    """
1786.1.42 by John Arbash Meinel
Update _extract_headers, make it less generic, and non recursive.
99
    first_header = True
100
    remaining = header_text
101
102
    if not remaining:
103
        raise errors.InvalidHttpResponse(url, 'Empty headers')
104
105
    while remaining:
106
        header_file = StringIO(remaining)
107
        first_line = header_file.readline()
108
        if not first_line.startswith('HTTP'):
109
            if first_header: # The first header *must* start with HTTP
110
                raise errors.InvalidHttpResponse(url,
111
                    'Opening header line did not start with HTTP: %s' 
112
                    % (first_line,))
113
                assert False, 'Opening header line was not HTTP'
114
            else:
115
                break # We are done parsing
116
        first_header = False
117
        m = mimetools.Message(header_file)
118
119
        # mimetools.Message parses the first header up to a blank line
120
        # So while there is remaining data, it probably means there is
121
        # another header to be parsed.
122
        # Get rid of any preceeding whitespace, which if it is all whitespace
123
        # will get rid of everything.
124
        remaining = header_file.read().lstrip()
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
125
    return m
126
127
2018.2.3 by Andrew Bennetts
Starting factoring out the smart server client "medium" from the protocol.
128
class HttpTransportBase(Transport, smart.SmartClientMedium):
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
129
    """Base class for http implementations.
130
1540.3.23 by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl
131
    Does URL parsing, etc, but not any network IO.
132
133
    The protocol can be given as e.g. http+urllib://host/ to use a particular
134
    implementation.
135
    """
136
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
137
    # _proto: "http" or "https"
138
    # _qualified_proto: may have "+pycurl", etc
139
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
140
    def __init__(self, base):
141
        """Set the base path where files will be stored."""
1540.3.23 by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl
142
        proto_match = re.match(r'^(https?)(\+\w+)?://', base)
143
        if not proto_match:
144
            raise AssertionError("not a http url: %r" % base)
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
145
        self._proto = proto_match.group(1)
146
        impl_name = proto_match.group(2)
1540.3.23 by Martin Pool
Allow urls like http+pycurl://host/ to use a particular impl
147
        if impl_name:
148
            impl_name = impl_name[1:]
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
149
        self._impl_name = impl_name
1530.1.3 by Robert Collins
transport implementations now tested consistently.
150
        if base[-1] != '/':
151
            base = base + '/'
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
152
        super(HttpTransportBase, self).__init__(base)
907.1.57 by John Arbash Meinel
Trying to get pipelined http library working + tests.
153
        # In the future we might actually connect to the remote host
154
        # rather than using get_url
155
        # self._connection = None
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
156
        (apparent_proto, self._host,
1185.11.6 by John Arbash Meinel
Made HttpTransport handle a request for a parent directory differently.
157
            self._path, self._parameters,
158
            self._query, self._fragment) = urlparse.urlparse(self.base)
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
159
        self._qualified_proto = apparent_proto
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
160
161
    def abspath(self, relpath):
162
        """Return the full url to the given relative path.
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
163
164
        This can be supplied with a string or a list.
165
1540.3.25 by Martin Pool
New 'http+urllib' scheme
166
        The URL returned always has the protocol scheme originally used to 
167
        construct the transport, even if that includes an explicit
168
        implementation qualifier.
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
169
        """
1469 by Robert Collins
Change Transport.* to work with URL's.
170
        assert isinstance(relpath, basestring)
1185.85.76 by John Arbash Meinel
Adding an InvalidURL so transports can report they expect utf-8 quoted paths. Updated tests
171
        if isinstance(relpath, unicode):
1685.1.8 by John Arbash Meinel
Re-allow a couple more tests, fix a bug in http, non_ascii tests still fail.
172
            raise InvalidURL(relpath, 'paths must not be unicode.')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
173
        if isinstance(relpath, basestring):
1185.16.68 by Martin Pool
- http url fixes suggested by Robey Pointer, and tests
174
            relpath_parts = relpath.split('/')
175
        else:
176
            # TODO: Don't call this with an array - no magic interfaces
177
            relpath_parts = relpath[:]
1910.15.1 by Andrew Bennetts
More tests for abspath and clone behaviour
178
        if relpath.startswith('/'):
179
            basepath = []
180
        else:
181
            # Except for the root, no trailing slashes are allowed
182
            if len(relpath_parts) > 1 and relpath_parts[-1] == '':
1185.16.68 by Martin Pool
- http url fixes suggested by Robey Pointer, and tests
183
                raise ValueError("path %r within branch %r seems to be a directory"
184
                                 % (relpath, self._path))
1910.15.1 by Andrew Bennetts
More tests for abspath and clone behaviour
185
            basepath = self._path.split('/')
186
            if len(basepath) > 0 and basepath[-1] == '':
187
                basepath = basepath[:-1]
188
1185.16.68 by Martin Pool
- http url fixes suggested by Robey Pointer, and tests
189
        for p in relpath_parts:
1185.11.6 by John Arbash Meinel
Made HttpTransport handle a request for a parent directory differently.
190
            if p == '..':
1185.16.68 by Martin Pool
- http url fixes suggested by Robey Pointer, and tests
191
                if len(basepath) == 0:
1185.11.7 by John Arbash Meinel
HttpTransport just returns root when parent is requested.
192
                    # In most filesystems, a request for the parent
193
                    # of root, just returns root.
194
                    continue
1185.16.68 by Martin Pool
- http url fixes suggested by Robey Pointer, and tests
195
                basepath.pop()
196
            elif p == '.' or p == '':
1185.11.6 by John Arbash Meinel
Made HttpTransport handle a request for a parent directory differently.
197
                continue # No-op
198
            else:
199
                basepath.append(p)
200
        # Possibly, we could use urlparse.urljoin() here, but
201
        # I'm concerned about when it chooses to strip the last
202
        # portion of the path, and when it doesn't.
203
        path = '/'.join(basepath)
1636.1.1 by Robert Collins
Fix calling relpath() and abspath() on transports at their root.
204
        if path == '':
205
            path = '/'
206
        result = urlparse.urlunparse((self._qualified_proto,
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
207
                                    self._host, path, '', '', ''))
1636.1.1 by Robert Collins
Fix calling relpath() and abspath() on transports at their root.
208
        return result
907.1.24 by John Arbash Meinel
Remote functionality work.
209
1540.3.25 by Martin Pool
New 'http+urllib' scheme
210
    def _real_abspath(self, relpath):
211
        """Produce absolute path, adjusting protocol if needed"""
212
        abspath = self.abspath(relpath)
213
        qp = self._qualified_proto
214
        rp = self._proto
215
        if self._qualified_proto != self._proto:
216
            abspath = rp + abspath[len(qp):]
217
        if not isinstance(abspath, str):
218
            # escaping must be done at a higher level
219
            abspath = abspath.encode('ascii')
220
        return abspath
221
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
222
    def has(self, relpath):
1540.3.15 by Martin Pool
[merge] large merge to sync with bzr.dev
223
        raise NotImplementedError("has() is abstract on %r" % self)
224
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
225
    def get(self, relpath):
1594.2.5 by Robert Collins
Readv patch from Johan Rydberg giving knits partial download support.
226
        """Get the file at the given relative path.
227
228
        :param relpath: The relative path to the file
229
        """
1540.3.27 by Martin Pool
Integrate http range support for pycurl
230
        code, response_file = self._get(relpath, None)
231
        return response_file
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
232
1786.1.39 by John Arbash Meinel
Remove the ability to read negative offsets from readv()
233
    def _get(self, relpath, ranges):
1540.3.27 by Martin Pool
Integrate http range support for pycurl
234
        """Get a file, or part of a file.
235
236
        :param relpath: Path relative to transport base URL
237
        :param byte_range: None to get the whole file;
238
            or [(start,end)] to fetch parts of a file.
239
240
        :returns: (http_code, result_file)
241
242
        Note that the current http implementations can only fetch one range at
243
        a time through this call.
244
        """
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
245
        raise NotImplementedError(self._get)
1594.2.5 by Robert Collins
Readv patch from Johan Rydberg giving knits partial download support.
246
2018.2.6 by Andrew Bennetts
HTTP client starting to work (pycurl for the moment).
247
    def get_request(self):
2018.2.8 by Andrew Bennetts
Make HttpTransportBase.get_smart_client return self again.
248
        return SmartClientHTTPMediumRequest(self)
2018.2.6 by Andrew Bennetts
HTTP client starting to work (pycurl for the moment).
249
2018.2.3 by Andrew Bennetts
Starting factoring out the smart server client "medium" from the protocol.
250
    def get_smart_medium(self):
251
        """See Transport.get_smart_medium.
252
253
        HttpTransportBase directly implements the minimal interface of
254
        SmartMediumClient, so this returns self.
255
        """
2018.2.8 by Andrew Bennetts
Make HttpTransportBase.get_smart_client return self again.
256
        return self
2018.2.3 by Andrew Bennetts
Starting factoring out the smart server client "medium" from the protocol.
257
1594.2.5 by Robert Collins
Readv patch from Johan Rydberg giving knits partial download support.
258
    def readv(self, relpath, offsets):
259
        """Get parts of the file at the given relative path.
260
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
261
        :param offsets: A list of (offset, size) tuples.
1540.3.27 by Martin Pool
Integrate http range support for pycurl
262
        :param return: A list or generator of (offset, data) tuples
1594.2.5 by Robert Collins
Readv patch from Johan Rydberg giving knits partial download support.
263
        """
1786.1.39 by John Arbash Meinel
Remove the ability to read negative offsets from readv()
264
        ranges = self.offsets_to_ranges(offsets)
265
        mutter('http readv of %s collapsed %s offsets => %s',
1786.1.34 by John Arbash Meinel
shorten the readv message to cause a smaller debug log.
266
                relpath, len(offsets), ranges)
1786.1.39 by John Arbash Meinel
Remove the ability to read negative offsets from readv()
267
        code, f = self._get(relpath, ranges)
1786.1.5 by John Arbash Meinel
Move the common Multipart stuff into plain http, and wrap pycurl response so that it matches the urllib response object.
268
        for start, size in offsets:
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
269
            f.seek(start, (start < 0) and 2 or 0)
270
            start = f.tell()
1786.1.5 by John Arbash Meinel
Move the common Multipart stuff into plain http, and wrap pycurl response so that it matches the urllib response object.
271
            data = f.read(size)
2001.3.2 by John Arbash Meinel
Force all transports to raise ShortReadvError if they can
272
            if len(data) != size:
273
                raise errors.ShortReadvError(relpath, start, size,
2001.3.3 by John Arbash Meinel
review feedback: add the actual count written to ShortReadvError
274
                                             actual=len(data))
1786.1.5 by John Arbash Meinel
Move the common Multipart stuff into plain http, and wrap pycurl response so that it matches the urllib response object.
275
            yield start, data
276
1786.1.23 by John Arbash Meinel
Move offset_to_http_ranges back onto HttpTransportBase, clarify tests.
277
    @staticmethod
1786.1.39 by John Arbash Meinel
Remove the ability to read negative offsets from readv()
278
    def offsets_to_ranges(offsets):
1786.1.23 by John Arbash Meinel
Move offset_to_http_ranges back onto HttpTransportBase, clarify tests.
279
        """Turn a list of offsets and sizes into a list of byte ranges.
280
281
        :param offsets: A list of tuples of (start, size).  An empty list
1786.1.32 by John Arbash Meinel
cleanup pass, allow pycurl connections to be shared between transports.
282
            is not accepted.
1786.1.39 by John Arbash Meinel
Remove the ability to read negative offsets from readv()
283
        :return: a list of inclusive byte ranges (start, end) 
1786.1.32 by John Arbash Meinel
cleanup pass, allow pycurl connections to be shared between transports.
284
            Adjacent ranges will be combined.
1786.1.23 by John Arbash Meinel
Move offset_to_http_ranges back onto HttpTransportBase, clarify tests.
285
        """
1786.1.33 by John Arbash Meinel
Cleanup pass #2
286
        # Make sure we process sorted offsets
1786.1.23 by John Arbash Meinel
Move offset_to_http_ranges back onto HttpTransportBase, clarify tests.
287
        offsets = sorted(offsets)
288
289
        prev_end = None
290
        combined = []
291
292
        for start, size in offsets:
1786.1.39 by John Arbash Meinel
Remove the ability to read negative offsets from readv()
293
            end = start + size - 1
294
            if prev_end is None:
295
                combined.append([start, end])
296
            elif start <= prev_end + 1:
297
                combined[-1][1] = end
1786.1.23 by John Arbash Meinel
Move offset_to_http_ranges back onto HttpTransportBase, clarify tests.
298
            else:
1786.1.39 by John Arbash Meinel
Remove the ability to read negative offsets from readv()
299
                combined.append([start, end])
300
            prev_end = end
1786.1.23 by John Arbash Meinel
Move offset_to_http_ranges back onto HttpTransportBase, clarify tests.
301
1786.1.39 by John Arbash Meinel
Remove the ability to read negative offsets from readv()
302
        return combined
1786.1.24 by John Arbash Meinel
Move the functions/regexes to be static members
303
2018.2.10 by Andrew Bennetts
Tidy up TODOs, further testing and fixes for SmartServerRequestProtocolOne, and remove a read_bytes(1) call.
304
    def _post(self, body_bytes):
305
        """POST body_bytes to .bzr/smart on this transport.
306
        
307
        :returns: (response code, response body file-like object).
308
        """
309
        # TODO: Requiring all the body_bytes to be available at the beginning of
310
        # the POST may require large client buffers.  It would be nice to have
311
        # an interface that allows streaming via POST when possible (and
312
        # degrades to a local buffer when not).
313
        raise NotImplementedError(self._post)
314
1955.3.6 by John Arbash Meinel
Lots of deprecation warnings, but no errors
315
    def put_file(self, relpath, f, mode=None):
316
        """Copy the file-like object into the location.
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
317
318
        :param relpath: Location to put the contents, relative to base.
1955.3.6 by John Arbash Meinel
Lots of deprecation warnings, but no errors
319
        :param f:       File-like object.
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
320
        """
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
321
        raise TransportNotPossible('http PUT not supported')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
322
1185.58.2 by John Arbash Meinel
Added mode to the appropriate transport functions, and tests to make sure they work.
323
    def mkdir(self, relpath, mode=None):
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
324
        """Create a directory at the given path."""
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
325
        raise TransportNotPossible('http does not support mkdir()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
326
1534.4.15 by Robert Collins
Remove shutil dependency in upgrade - create a delete_tree method for transports.
327
    def rmdir(self, relpath):
328
        """See Transport.rmdir."""
329
        raise TransportNotPossible('http does not support rmdir()')
330
1955.3.15 by John Arbash Meinel
Deprecate 'Transport.append' in favor of Transport.append_file or Transport.append_bytes
331
    def append_file(self, relpath, f, mode=None):
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
332
        """Append the text in the file-like object into the final
333
        location.
334
        """
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
335
        raise TransportNotPossible('http does not support append()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
336
337
    def copy(self, rel_from, rel_to):
338
        """Copy the item at rel_from to the location at rel_to"""
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
339
        raise TransportNotPossible('http does not support copy()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
340
1185.58.2 by John Arbash Meinel
Added mode to the appropriate transport functions, and tests to make sure they work.
341
    def copy_to(self, relpaths, other, mode=None, pb=None):
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
342
        """Copy a set of entries from self into another Transport.
343
344
        :param relpaths: A list/generator of entries to be copied.
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
345
346
        TODO: if other is LocalTransport, is it possible to
347
              do better than put(get())?
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
348
        """
907.1.29 by John Arbash Meinel
Fixing small bug in HttpTransport.copy_to
349
        # At this point HttpTransport might be able to check and see if
350
        # the remote location is the same, and rather than download, and
351
        # then upload, it could just issue a remote copy_this command.
1540.3.6 by Martin Pool
[merge] update from bzr.dev
352
        if isinstance(other, HttpTransportBase):
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
353
            raise TransportNotPossible('http cannot be the target of copy_to()')
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
354
        else:
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
355
            return super(HttpTransportBase, self).\
356
                    copy_to(relpaths, other, mode=mode, pb=pb)
907.1.28 by John Arbash Meinel
Added pb to function that were missing, implemented a basic double-dispatch copy_to function.
357
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
358
    def move(self, rel_from, rel_to):
359
        """Move the item at rel_from to the location at rel_to"""
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
360
        raise TransportNotPossible('http does not support move()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
361
362
    def delete(self, relpath):
363
        """Delete the item at relpath"""
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
364
        raise TransportNotPossible('http does not support delete()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
365
1530.1.3 by Robert Collins
transport implementations now tested consistently.
366
    def is_readonly(self):
367
        """See Transport.is_readonly."""
368
        return True
369
1400.1.1 by Robert Collins
implement a basic test for the ui branch command from http servers
370
    def listable(self):
371
        """See Transport.listable."""
372
        return False
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
373
374
    def stat(self, relpath):
375
        """Return the stat information for a file.
376
        """
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
377
        raise TransportNotPossible('http does not support stat()')
907.1.21 by John Arbash Meinel
Adding http transport as a valid transport protocol.
378
907.1.24 by John Arbash Meinel
Remote functionality work.
379
    def lock_read(self, relpath):
380
        """Lock the given file for shared (read) access.
381
        :return: A lock object, which should be passed to Transport.unlock()
382
        """
383
        # The old RemoteBranch ignore lock for reading, so we will
384
        # continue that tradition and return a bogus lock object.
385
        class BogusLock(object):
386
            def __init__(self, path):
387
                self.path = path
388
            def unlock(self):
389
                pass
390
        return BogusLock(relpath)
391
392
    def lock_write(self, relpath):
393
        """Lock the given file for exclusive (write) access.
394
        WARNING: many transports do not support this, so trying avoid using it
395
396
        :return: A lock object, which should be passed to Transport.unlock()
397
        """
907.1.50 by John Arbash Meinel
Removed encode/decode from Transport.put/get, added more exceptions that can be thrown.
398
        raise TransportNotPossible('http does not support lock_write()')
1530.1.1 by Robert Collins
Minimal infrastructure to test TransportTestProviderAdapter.
399
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
400
    def clone(self, offset=None):
401
        """Return a new HttpTransportBase with root at self.base + offset
2025.2.1 by v.ladeuil+lp at free
Fix bug #61606 by providing cloning hint do daughter classes.
402
403
        We leave the daughter classes take advantage of the hint
404
        that it's a cloning not a raw creation.
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
405
        """
406
        if offset is None:
2025.2.1 by v.ladeuil+lp at free
Fix bug #61606 by providing cloning hint do daughter classes.
407
            return self.__class__(self.base, self)
1540.3.26 by Martin Pool
[merge] bzr.dev; pycurl not updated for readv yet
408
        else:
2025.2.1 by v.ladeuil+lp at free
Fix bug #61606 by providing cloning hint do daughter classes.
409
            return self.__class__(self.abspath(offset), self)
1530.1.1 by Robert Collins
Minimal infrastructure to test TransportTestProviderAdapter.
410
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
411
    @staticmethod
412
    def range_header(ranges, tail_amount):
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
413
        """Turn a list of bytes ranges into a HTTP Range header value.
414
415
        :param offsets: A list of byte ranges, (start, end). An empty list
416
        is not accepted.
417
418
        :return: HTTP range header string.
419
        """
420
        strings = []
421
        for start, end in ranges:
422
            strings.append('%d-%d' % (start, end))
423
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
424
        if tail_amount:
425
            strings.append('-%d' % tail_amount)
426
1786.1.36 by John Arbash Meinel
pycurl expects us to just set the range of bytes, not including bytes=
427
        return ','.join(strings)
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
428
2018.2.8 by Andrew Bennetts
Make HttpTransportBase.get_smart_client return self again.
429
    def send_http_smart_request(self, bytes):
430
        code, body_filelike = self._post(bytes)
431
        assert code == 200, 'unexpected HTTP response code %r' % (code,)
432
        return body_filelike
433
434
435
class SmartClientHTTPMediumRequest(smart.SmartClientMediumRequest):
436
    """A SmartClientMediumRequest that works with an HTTP medium."""
437
438
    def __init__(self, medium):
439
        smart.SmartClientMediumRequest.__init__(self, medium)
440
        self._buffer = ''
441
442
    def _accept_bytes(self, bytes):
443
        self._buffer += bytes
444
445
    def _finished_writing(self):
446
        data = self._medium.send_http_smart_request(self._buffer)
447
        self._response_body = data
448
449
    def _read_bytes(self, count):
450
        return self._response_body.read(count)
451
        
452
    def _finished_reading(self):
453
        """See SmartClientMediumRequest._finished_reading."""
454
        pass
455
        
1750.1.2 by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib.
456
1530.1.3 by Robert Collins
transport implementations now tested consistently.
457
#---------------- test server facilities ----------------
1540.3.6 by Martin Pool
[merge] update from bzr.dev
458
# TODO: load these only when running tests
1530.1.3 by Robert Collins
transport implementations now tested consistently.
459
1636.1.1 by Robert Collins
Fix calling relpath() and abspath() on transports at their root.
460
1530.1.3 by Robert Collins
transport implementations now tested consistently.
461
class WebserverNotAvailable(Exception):
462
    pass
463
464
465
class BadWebserverPath(ValueError):
466
    def __str__(self):
467
        return 'path %s is not in %s' % self.args
468
469
470
class TestingHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
471
472
    def log_message(self, format, *args):
1553.1.3 by James Henstridge
Make bzrlib.transport.http.HttpServer output referer and user agent as in
473
        self.server.test_case.log('webserver - %s - - [%s] %s "%s" "%s"',
1530.1.3 by Robert Collins
transport implementations now tested consistently.
474
                                  self.address_string(),
475
                                  self.log_date_time_string(),
1553.1.3 by James Henstridge
Make bzrlib.transport.http.HttpServer output referer and user agent as in
476
                                  format % args,
477
                                  self.headers.get('referer', '-'),
478
                                  self.headers.get('user-agent', '-'))
1530.1.3 by Robert Collins
transport implementations now tested consistently.
479
480
    def handle_one_request(self):
481
        """Handle a single HTTP request.
482
483
        You normally don't need to override this method; see the class
484
        __doc__ string for information on how to handle specific HTTP
485
        commands such as GET and POST.
486
487
        """
488
        for i in xrange(1,11): # Don't try more than 10 times
489
            try:
490
                self.raw_requestline = self.rfile.readline()
491
            except socket.error, e:
492
                if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):
493
                    # omitted for now because some tests look at the log of
494
                    # the server and expect to see no errors.  see recent
495
                    # email thread. -- mbp 20051021. 
496
                    ## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)
497
                    time.sleep(0.01)
498
                    continue
499
                raise
500
            else:
501
                break
502
        if not self.raw_requestline:
503
            self.close_connection = 1
504
            return
505
        if not self.parse_request(): # An error code has been sent, just exit
506
            return
507
        mname = 'do_' + self.command
1963.2.6 by Robey Pointer
pychecker is on crack; go back to using 'is None'.
508
        if getattr(self, mname, None) is None:
1530.1.3 by Robert Collins
transport implementations now tested consistently.
509
            self.send_error(501, "Unsupported method (%r)" % self.command)
510
            return
511
        method = getattr(self, mname)
512
        method()
513
1711.4.15 by John Arbash Meinel
Only interpret HTTP paths as utf8 on win32
514
    if sys.platform == 'win32':
515
        # On win32 you cannot access non-ascii filenames without
516
        # decoding them into unicode first.
517
        # However, under Linux, you can access bytestream paths
518
        # without any problems. If this function was always active
519
        # it would probably break tests when LANG=C was set
520
        def translate_path(self, path):
521
            """Translate a /-separated PATH to the local filename syntax.
1711.4.14 by John Arbash Meinel
Custom HttpRequestHandler which treats all paths as utf8 encoded
522
1711.4.15 by John Arbash Meinel
Only interpret HTTP paths as utf8 on win32
523
            For bzr, all url paths are considered to be utf8 paths.
524
            On Linux, you can access these paths directly over the bytestream
525
            request, but on win32, you must decode them, and access them
526
            as Unicode files.
527
            """
528
            # abandon query parameters
529
            path = urlparse.urlparse(path)[2]
530
            path = posixpath.normpath(urllib.unquote(path))
531
            path = path.decode('utf-8')
532
            words = path.split('/')
533
            words = filter(None, words)
534
            path = os.getcwdu()
535
            for word in words:
536
                drive, word = os.path.splitdrive(word)
537
                head, word = os.path.split(word)
538
                if word in (os.curdir, os.pardir): continue
539
                path = os.path.join(path, word)
540
            return path
1711.4.14 by John Arbash Meinel
Custom HttpRequestHandler which treats all paths as utf8 encoded
541
1185.50.83 by John Arbash Meinel
[merge] James Henstridge: Set Agent string in http headers, add tests for it.
542
1530.1.3 by Robert Collins
transport implementations now tested consistently.
543
class TestingHTTPServer(BaseHTTPServer.HTTPServer):
544
    def __init__(self, server_address, RequestHandlerClass, test_case):
545
        BaseHTTPServer.HTTPServer.__init__(self, server_address,
546
                                                RequestHandlerClass)
547
        self.test_case = test_case
548
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
549
1530.1.3 by Robert Collins
transport implementations now tested consistently.
550
class HttpServer(Server):
1530.1.1 by Robert Collins
Minimal infrastructure to test TransportTestProviderAdapter.
551
    """A test server for http transports."""
1530.1.3 by Robert Collins
transport implementations now tested consistently.
552
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
553
    # used to form the url that connects to this server
554
    _url_protocol = 'http'
555
1960.2.1 by vila
Enable writable http transports.
556
    # Subclasses can provide a specific request handler
1948.3.1 by Vincent LADEUIL
Enable writable http transports.
557
    def __init__(self, request_handler=TestingHTTPRequestHandler):
1948.3.10 by Vincent LADEUIL
Fix Aaron's review remarks.
558
        Server.__init__(self)
1948.3.2 by Vincent LADEUIL
Fix tabs.
559
        self.request_handler = request_handler
1960.2.1 by vila
Enable writable http transports.
560
2018.2.2 by Andrew Bennetts
Implement HTTP smart server.
561
    def _get_httpd(self):
562
        return TestingHTTPServer(('localhost', 0),
1960.2.1 by vila
Enable writable http transports.
563
                                  self.request_handler,
1553.1.3 by James Henstridge
Make bzrlib.transport.http.HttpServer output referer and user agent as in
564
                                  self)
2018.2.2 by Andrew Bennetts
Implement HTTP smart server.
565
566
    def _http_start(self):
567
        httpd = self._get_httpd()
1553.1.3 by James Henstridge
Make bzrlib.transport.http.HttpServer output referer and user agent as in
568
        host, port = httpd.socket.getsockname()
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
569
        self._http_base_url = '%s://localhost:%s/' % (self._url_protocol, port)
1530.1.3 by Robert Collins
transport implementations now tested consistently.
570
        self._http_starting.release()
571
        httpd.socket.settimeout(0.1)
572
573
        while self._http_running:
574
            try:
575
                httpd.handle_request()
576
            except socket.timeout:
577
                pass
578
579
    def _get_remote_url(self, path):
580
        path_parts = path.split(os.path.sep)
581
        if os.path.isabs(path):
582
            if path_parts[:len(self._local_path_parts)] != \
583
                   self._local_path_parts:
584
                raise BadWebserverPath(path, self.test_dir)
585
            remote_path = '/'.join(path_parts[len(self._local_path_parts):])
586
        else:
587
            remote_path = '/'.join(path_parts)
588
589
        self._http_starting.acquire()
590
        self._http_starting.release()
591
        return self._http_base_url + remote_path
592
1553.1.3 by James Henstridge
Make bzrlib.transport.http.HttpServer output referer and user agent as in
593
    def log(self, format, *args):
1530.1.3 by Robert Collins
transport implementations now tested consistently.
594
        """Capture Server log output."""
1553.1.3 by James Henstridge
Make bzrlib.transport.http.HttpServer output referer and user agent as in
595
        self.logs.append(format % args)
1530.1.3 by Robert Collins
transport implementations now tested consistently.
596
597
    def setUp(self):
598
        """See bzrlib.transport.Server.setUp."""
599
        self._home_dir = os.getcwdu()
600
        self._local_path_parts = self._home_dir.split(os.path.sep)
601
        self._http_starting = threading.Lock()
602
        self._http_starting.acquire()
603
        self._http_running = True
604
        self._http_base_url = None
605
        self._http_thread = threading.Thread(target=self._http_start)
606
        self._http_thread.setDaemon(True)
607
        self._http_thread.start()
608
        self._http_proxy = os.environ.get("http_proxy")
609
        if self._http_proxy is not None:
610
            del os.environ["http_proxy"]
1530.1.18 by Robert Collins
unbreak test_fetch
611
        self.logs = []
1530.1.3 by Robert Collins
transport implementations now tested consistently.
612
613
    def tearDown(self):
614
        """See bzrlib.transport.Server.tearDown."""
615
        self._http_running = False
616
        self._http_thread.join()
617
        if self._http_proxy is not None:
618
            import os
619
            os.environ["http_proxy"] = self._http_proxy
620
621
    def get_url(self):
622
        """See bzrlib.transport.Server.get_url."""
623
        return self._get_remote_url(self._home_dir)
1530.1.9 by Robert Collins
Test bogus urls with http in the new infrastructure.
624
        
625
    def get_bogus_url(self):
626
        """See bzrlib.transport.Server.get_bogus_url."""
1960.2.1 by vila
Enable writable http transports.
627
        # this is chosen to try to prevent trouble with proxies, weird dns,
1540.3.30 by Martin Pool
Fix up bogus-url tests for broken dns servers, and error imports
628
        # etc
629
        return 'http://127.0.0.1:1/'
1530.1.9 by Robert Collins
Test bogus urls with http in the new infrastructure.
630
2018.2.2 by Andrew Bennetts
Implement HTTP smart server.
631
632
class HTTPServerWithSmarts(HttpServer):
633
    """HTTPServerWithSmarts extends the HttpServer with POST methods that will
634
    trigger a smart server to execute with a transport rooted at the rootdir of
635
    the HTTP server.
636
    """
637
638
    def __init__(self):
639
        HttpServer.__init__(self, SmartRequestHandler)
640
641
642
class SmartRequestHandler(TestingHTTPRequestHandler):
643
    """Extend TestingHTTPRequestHandler to support smart client POSTs."""
644
645
    def do_POST(self):
646
        """Hand the request off to a smart server instance."""
647
        self.send_response(200)
648
        self.send_header("Content-type", "application/octet-stream")
649
        transport = get_transport(self.server.test_case._home_dir)
2018.2.10 by Andrew Bennetts
Tidy up TODOs, further testing and fixes for SmartServerRequestProtocolOne, and remove a read_bytes(1) call.
650
        # TODO: We might like to support streaming responses.  1.0 allows no
651
        # Content-length in this case, so for integrity we should perform our
652
        # own chunking within the stream.
653
        # 1.1 allows chunked responses, and in this case we could chunk using
654
        # the HTTP chunking as this will allow HTTP persistence safely, even if
655
        # we have to stop early due to error, but we would also have to use the
656
        # HTTP trailer facility which may not be widely available.
2018.2.2 by Andrew Bennetts
Implement HTTP smart server.
657
        out_buffer = StringIO()
2018.2.23 by Andrew Bennetts
Clean up SmartServerStreamMedium implementations, including removing unnecessary flushes.
658
        smart_protocol_request = smart.SmartServerRequestProtocolOne(
659
                transport, out_buffer.write)
2018.2.2 by Andrew Bennetts
Implement HTTP smart server.
660
        # if this fails, we should return 400 bad request, but failure is
661
        # failure for now - RBC 20060919
662
        data_length = int(self.headers['Content-Length'])
2018.2.21 by Andrew Bennetts
Simplify do_POST, and add a comment.
663
        # Perhaps there should be a SmartServerHTTPMedium that takes care of
664
        # feeding the bytes in the http request to the smart_protocol_request,
665
        # but for now it's simpler to just feed the bytes directly.
666
        smart_protocol_request.accept_bytes(self.rfile.read(data_length))
2018.2.15 by Andrew Bennetts
Remove SmartServerRequestProtocolOne.finished_reading attribute, replace with next_read_size method.
667
        assert smart_protocol_request.next_read_size() == 0, (
668
            "not finished reading, but all data sent to protocol.")
2018.2.2 by Andrew Bennetts
Implement HTTP smart server.
669
        self.send_header("Content-Length", str(len(out_buffer.getvalue())))
670
        self.end_headers()
671
        self.wfile.write(out_buffer.getvalue())
672