1
# Copyright (C) 2005, 2006 Canonical Ltd
1
# Copyright (C) 2005 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
5
5
# the Free Software Foundation; either version 2 of the License, or
6
6
# (at your option) any later version.
8
8
# This program is distributed in the hope that it will be useful,
9
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
11
# GNU General Public License for more details.
13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Base implementation of Transport over http.
19
There are separate implementation modules for each http client implementation.
16
"""Implementation of Transport over http.
24
from collections import deque
19
from bzrlib.transport import Transport, register_transport
20
from bzrlib.errors import (TransportNotPossible, NoSuchFile,
21
NonRelativePath, TransportError)
25
23
from cStringIO import StringIO
29
from warnings import warn
31
from bzrlib.transport import Transport, register_transport, Server
32
from bzrlib.errors import (TransportNotPossible, NoSuchFile,
33
TransportError, ConnectionError, InvalidURL)
27
from bzrlib.errors import BzrError, BzrCheckError
34
28
from bzrlib.branch import Branch
35
29
from bzrlib.trace import mutter
36
# TODO: load these only when running http tests
37
import BaseHTTPServer, SimpleHTTPServer, socket, time
39
from bzrlib.ui import ui_factory
42
def extract_auth(url, password_manager):
43
"""Extract auth parameters from am HTTP/HTTPS url and add them to the given
44
password manager. Return the url, minus those auth parameters (which
47
assert re.match(r'^(https?)(\+\w+)?://', url), \
48
'invalid absolute url %r' % url
49
scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
31
# velocitynet.com.au transparently proxies connections and thereby
32
# breaks keep-alive -- sucks!
37
mutter("get_url %s" % url)
38
url_f = urllib2.urlopen(url)
41
class HttpTransportError(TransportError):
44
class HttpTransport(Transport):
45
"""This is the transport agent for http:// access.
52
auth, netloc = netloc.split('@', 1)
54
username, password = auth.split(':', 1)
56
username, password = auth, None
58
host = netloc.split(':', 1)[0]
61
username = urllib.unquote(username)
62
if password is not None:
63
password = urllib.unquote(password)
65
password = ui_factory.get_password(prompt='HTTP %(user)@%(host) password',
66
user=username, host=host)
67
password_manager.add_password(None, host, username, password)
68
url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))
72
class HttpTransportBase(Transport):
73
"""Base class for http implementations.
75
Does URL parsing, etc, but not any network IO.
77
The protocol can be given as e.g. http+urllib://host/ to use a particular
47
TODO: Implement pipelined versions of all of the *_multi() functions.
81
# _proto: "http" or "https"
82
# _qualified_proto: may have "+pycurl", etc
84
50
def __init__(self, base):
85
51
"""Set the base path where files will be stored."""
86
proto_match = re.match(r'^(https?)(\+\w+)?://', base)
88
raise AssertionError("not a http url: %r" % base)
89
self._proto = proto_match.group(1)
90
impl_name = proto_match.group(2)
92
impl_name = impl_name[1:]
93
self._impl_name = impl_name
96
super(HttpTransportBase, self).__init__(base)
52
assert base.startswith('http://') or base.startswith('https://')
53
super(HttpTransport, self).__init__(base)
97
54
# In the future we might actually connect to the remote host
98
55
# rather than using get_url
99
56
# self._connection = None
100
(apparent_proto, self._host,
57
(self._proto, self._host,
101
58
self._path, self._parameters,
102
59
self._query, self._fragment) = urlparse.urlparse(self.base)
103
self._qualified_proto = apparent_proto
61
def should_cache(self):
62
"""Return True if the data pulled across should be cached locally.
66
def clone(self, offset=None):
67
"""Return a new HttpTransport with root at self.base + offset
68
For now HttpTransport does not actually connect, so just return
69
a new HttpTransport object.
72
return HttpTransport(self.base)
74
return HttpTransport(self.abspath(offset))
105
76
def abspath(self, relpath):
106
77
"""Return the full url to the given relative path.
108
This can be supplied with a string or a list.
110
The URL returned always has the protocol scheme originally used to
111
construct the transport, even if that includes an explicit
112
implementation qualifier.
78
This can be supplied with a string or a list
114
assert isinstance(relpath, basestring)
115
if isinstance(relpath, unicode):
116
raise InvalidURL(relpath, 'paths must not be unicode.')
117
80
if isinstance(relpath, basestring):
118
relpath_parts = relpath.split('/')
120
# TODO: Don't call this with an array - no magic interfaces
121
relpath_parts = relpath[:]
122
if len(relpath_parts) > 1:
123
if relpath_parts[0] == '':
124
raise ValueError("path %r within branch %r seems to be absolute"
125
% (relpath, self._path))
126
if relpath_parts[-1] == '':
127
raise ValueError("path %r within branch %r seems to be a directory"
128
% (relpath, self._path))
129
82
basepath = self._path.split('/')
130
83
if len(basepath) > 0 and basepath[-1] == '':
131
84
basepath = basepath[:-1]
132
for p in relpath_parts:
134
if len(basepath) == 0:
135
89
# In most filesystems, a request for the parent
136
90
# of root, just returns root.
139
elif p == '.' or p == '':
142
97
basepath.append(p)
143
99
# Possibly, we could use urlparse.urljoin() here, but
144
100
# I'm concerned about when it chooses to strip the last
145
101
# portion of the path, and when it doesn't.
146
102
path = '/'.join(basepath)
149
result = urlparse.urlunparse((self._qualified_proto,
150
self._host, path, '', '', ''))
103
return urlparse.urlunparse((self._proto,
104
self._host, path, '', '', ''))
153
def _real_abspath(self, relpath):
154
"""Produce absolute path, adjusting protocol if needed"""
155
abspath = self.abspath(relpath)
156
qp = self._qualified_proto
158
if self._qualified_proto != self._proto:
159
abspath = rp + abspath[len(qp):]
160
if not isinstance(abspath, str):
161
# escaping must be done at a higher level
162
abspath = abspath.encode('ascii')
106
def relpath(self, abspath):
107
if not abspath.startswith(self.base):
108
raise NonRelativePath('path %r is not under base URL %r'
109
% (abspath, self.base))
111
return abspath[pl:].lstrip('/')
165
113
def has(self, relpath):
166
raise NotImplementedError("has() is abstract on %r" % self)
168
def get(self, relpath):
114
"""Does the target location exist?
116
TODO: HttpTransport.has() should use a HEAD request,
117
not a full GET request.
119
TODO: This should be changed so that we don't use
120
urllib2 and get an exception, the code path would be
121
cleaner if we just do an http HEAD request, and parse
125
f = get_url(self.abspath(relpath))
126
# Without the read and then close()
127
# we tend to have busy sockets.
133
except urllib2.URLError:
136
if e.errno == errno.ENOENT:
138
raise HttpTransportError(orig_error=e)
140
def get(self, relpath, decode=False):
169
141
"""Get the file at the given relative path.
171
143
:param relpath: The relative path to the file
173
code, response_file = self._get(relpath, None)
176
def _get(self, relpath, ranges):
177
"""Get a file, or part of a file.
179
:param relpath: Path relative to transport base URL
180
:param byte_range: None to get the whole file;
181
or [(start,end)] to fetch parts of a file.
183
:returns: (http_code, result_file)
185
Note that the current http implementations can only fetch one range at
186
a time through this call.
188
raise NotImplementedError(self._get)
190
def readv(self, relpath, offsets):
191
"""Get parts of the file at the given relative path.
193
:param offsets: A list of (offset, size) tuples.
194
:param return: A list or generator of (offset, data) tuples
196
# Ideally we would pass one big request asking for all the ranges in
197
# one go; however then the server will give a multipart mime response
198
# back, and we can't parse them yet. So instead we just get one range
199
# per region, and try to coallesce the regions as much as possible.
201
# The read-coallescing code is not quite regular enough to have a
202
# single driver routine and
203
# helper method in Transport.
204
def do_combined_read(combined_offsets):
205
# read one coalesced block
207
for offset, size in combined_offsets:
209
mutter('readv coalesced %d reads.', len(combined_offsets))
210
offset = combined_offsets[0][0]
211
byte_range = (offset, offset + total_size - 1)
212
code, result_file = self._get(relpath, [byte_range])
214
for off, size in combined_offsets:
215
result_bytes = result_file.read(size)
216
assert len(result_bytes) == size
217
yield off, result_bytes
219
data = result_file.read(offset + total_size)[offset:offset + total_size]
221
for offset, size in combined_offsets:
222
yield offset, data[pos:pos + size]
227
pending_offsets = deque(offsets)
228
combined_offsets = []
229
while len(pending_offsets):
230
offset, size = pending_offsets.popleft()
231
if not combined_offsets:
232
combined_offsets = [[offset, size]]
234
if (len (combined_offsets) < 500 and
235
combined_offsets[-1][0] + combined_offsets[-1][1] == offset):
237
combined_offsets.append([offset, size])
239
# incompatible, or over the threshold issue a read and yield
240
pending_offsets.appendleft((offset, size))
241
for result in do_combined_read(combined_offsets):
243
combined_offsets = []
244
# whatever is left is a single coalesced request
245
if len(combined_offsets):
246
for result in do_combined_read(combined_offsets):
249
def put(self, relpath, f, mode=None):
146
return get_url(self.abspath(relpath))
147
except (BzrError, urllib2.URLError, IOError), e:
148
raise NoSuchFile(orig_error=e)
150
raise HttpTransportError(orig_error=e)
152
def get_partial(self, relpath, start, length=None):
153
"""Get just part of a file.
155
:param relpath: Path to the file, relative to base
156
:param start: The starting position to read from
157
:param length: The length to read. A length of None indicates
158
read to the end of the file.
159
:return: A file-like object containing at least the specified bytes.
160
Some implementations may return objects which can be read
161
past this length, but this is not guaranteed.
163
# TODO: You can make specialized http requests for just
164
# a portion of the file. Figure out how to do that.
165
# For now, urllib2 returns files that cannot seek() so
166
# we just read bytes off the beginning, until we
167
# get to the point that we care about.
168
f = self.get(relpath)
169
# TODO: read in smaller chunks, in case things are
170
# buffered internally.
174
def put(self, relpath, f):
250
175
"""Copy the file-like or string object into the location.
252
177
:param relpath: Location to put the contents, relative to base.
332
248
raise TransportNotPossible('http does not support lock_write()')
334
def clone(self, offset=None):
335
"""Return a new HttpTransportBase with root at self.base + offset
336
For now HttpTransportBase does not actually connect, so just return
337
a new HttpTransportBase object.
340
return self.__class__(self.base)
342
return self.__class__(self.abspath(offset))
344
#---------------- test server facilities ----------------
345
# TODO: load these only when running tests
348
class WebserverNotAvailable(Exception):
352
class BadWebserverPath(ValueError):
354
return 'path %s is not in %s' % self.args
357
class TestingHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
359
def log_message(self, format, *args):
360
self.server.test_case.log('webserver - %s - - [%s] %s "%s" "%s"',
361
self.address_string(),
362
self.log_date_time_string(),
364
self.headers.get('referer', '-'),
365
self.headers.get('user-agent', '-'))
367
def handle_one_request(self):
368
"""Handle a single HTTP request.
370
You normally don't need to override this method; see the class
371
__doc__ string for information on how to handle specific HTTP
372
commands such as GET and POST.
375
for i in xrange(1,11): # Don't try more than 10 times
377
self.raw_requestline = self.rfile.readline()
378
except socket.error, e:
379
if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):
380
# omitted for now because some tests look at the log of
381
# the server and expect to see no errors. see recent
382
# email thread. -- mbp 20051021.
383
## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)
389
if not self.raw_requestline:
390
self.close_connection = 1
392
if not self.parse_request(): # An error code has been sent, just exit
394
mname = 'do_' + self.command
395
if not hasattr(self, mname):
396
self.send_error(501, "Unsupported method (%r)" % self.command)
398
method = getattr(self, mname)
402
class TestingHTTPServer(BaseHTTPServer.HTTPServer):
403
def __init__(self, server_address, RequestHandlerClass, test_case):
404
BaseHTTPServer.HTTPServer.__init__(self, server_address,
406
self.test_case = test_case
408
class HttpServer(Server):
409
"""A test server for http transports."""
411
# used to form the url that connects to this server
412
_url_protocol = 'http'
414
def _http_start(self):
416
httpd = TestingHTTPServer(('localhost', 0),
417
TestingHTTPRequestHandler,
419
host, port = httpd.socket.getsockname()
420
self._http_base_url = '%s://localhost:%s/' % (self._url_protocol, port)
421
self._http_starting.release()
422
httpd.socket.settimeout(0.1)
424
while self._http_running:
426
httpd.handle_request()
427
except socket.timeout:
430
def _get_remote_url(self, path):
431
path_parts = path.split(os.path.sep)
432
if os.path.isabs(path):
433
if path_parts[:len(self._local_path_parts)] != \
434
self._local_path_parts:
435
raise BadWebserverPath(path, self.test_dir)
436
remote_path = '/'.join(path_parts[len(self._local_path_parts):])
438
remote_path = '/'.join(path_parts)
440
self._http_starting.acquire()
441
self._http_starting.release()
442
return self._http_base_url + remote_path
444
def log(self, format, *args):
445
"""Capture Server log output."""
446
self.logs.append(format % args)
449
"""See bzrlib.transport.Server.setUp."""
450
self._home_dir = os.getcwdu()
451
self._local_path_parts = self._home_dir.split(os.path.sep)
452
self._http_starting = threading.Lock()
453
self._http_starting.acquire()
454
self._http_running = True
455
self._http_base_url = None
456
self._http_thread = threading.Thread(target=self._http_start)
457
self._http_thread.setDaemon(True)
458
self._http_thread.start()
459
self._http_proxy = os.environ.get("http_proxy")
460
if self._http_proxy is not None:
461
del os.environ["http_proxy"]
465
"""See bzrlib.transport.Server.tearDown."""
466
self._http_running = False
467
self._http_thread.join()
468
if self._http_proxy is not None:
470
os.environ["http_proxy"] = self._http_proxy
473
"""See bzrlib.transport.Server.get_url."""
474
return self._get_remote_url(self._home_dir)
476
def get_bogus_url(self):
477
"""See bzrlib.transport.Server.get_bogus_url."""
478
# this is chosen to try to prevent trouble with proxies, wierd dns,
480
return 'http://127.0.0.1:1/'
250
register_transport('http://', HttpTransport)
251
register_transport('https://', HttpTransport)