1
# Copyright (C) 2005, 2006 Canonical Ltd
1
# Copyright (C) 2005 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
5
5
# the Free Software Foundation; either version 2 of the License, or
6
6
# (at your option) any later version.
8
8
# This program is distributed in the hope that it will be useful,
9
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
11
# GNU General Public License for more details.
13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Base implementation of Transport over http.
19
There are separate implementation modules for each http client implementation.
16
"""Implementation of Transport over http.
24
from collections import deque
19
from bzrlib.transport import Transport, register_transport
20
from bzrlib.errors import (TransportNotPossible, NoSuchFile,
21
NonRelativePath, TransportError)
25
23
from cStringIO import StringIO
29
from warnings import warn
31
from bzrlib.transport import Transport, register_transport, Server
32
from bzrlib.errors import (TransportNotPossible, NoSuchFile,
33
TransportError, ConnectionError)
34
27
from bzrlib.errors import BzrError, BzrCheckError
35
28
from bzrlib.branch import Branch
36
29
from bzrlib.trace import mutter
37
# TODO: load these only when running http tests
38
import BaseHTTPServer, SimpleHTTPServer, socket, time
40
from bzrlib.ui import ui_factory
43
def extract_auth(url, password_manager):
44
"""Extract auth parameters from am HTTP/HTTPS url and add them to the given
45
password manager. Return the url, minus those auth parameters (which
48
assert re.match(r'^(https?)(\+\w+)?://', url), \
49
'invalid absolute url %r' % url
50
scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
31
# velocitynet.com.au transparently proxies connections and thereby
32
# breaks keep-alive -- sucks!
37
mutter("get_url %s" % url)
38
url_f = urllib2.urlopen(url)
41
class HttpTransportError(TransportError):
44
class HttpTransport(Transport):
45
"""This is the transport agent for http:// access.
53
auth, netloc = netloc.split('@', 1)
55
username, password = auth.split(':', 1)
57
username, password = auth, None
59
host = netloc.split(':', 1)[0]
62
username = urllib.unquote(username)
63
if password is not None:
64
password = urllib.unquote(password)
66
password = ui_factory.get_password(prompt='HTTP %(user)@%(host) password',
67
user=username, host=host)
68
password_manager.add_password(None, host, username, password)
69
url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))
73
class HttpTransportBase(Transport):
74
"""Base class for http implementations.
76
Does URL parsing, etc, but not any network IO.
78
The protocol can be given as e.g. http+urllib://host/ to use a particular
47
TODO: Implement pipelined versions of all of the *_multi() functions.
82
# _proto: "http" or "https"
83
# _qualified_proto: may have "+pycurl", etc
85
50
def __init__(self, base):
86
51
"""Set the base path where files will be stored."""
87
proto_match = re.match(r'^(https?)(\+\w+)?://', base)
89
raise AssertionError("not a http url: %r" % base)
90
self._proto = proto_match.group(1)
91
impl_name = proto_match.group(2)
93
impl_name = impl_name[1:]
94
self._impl_name = impl_name
97
super(HttpTransportBase, self).__init__(base)
52
assert base.startswith('http://') or base.startswith('https://')
53
super(HttpTransport, self).__init__(base)
98
54
# In the future we might actually connect to the remote host
99
55
# rather than using get_url
100
56
# self._connection = None
101
(apparent_proto, self._host,
57
(self._proto, self._host,
102
58
self._path, self._parameters,
103
59
self._query, self._fragment) = urlparse.urlparse(self.base)
104
self._qualified_proto = apparent_proto
61
def should_cache(self):
62
"""Return True if the data pulled across should be cached locally.
66
def clone(self, offset=None):
67
"""Return a new HttpTransport with root at self.base + offset
68
For now HttpTransport does not actually connect, so just return
69
a new HttpTransport object.
72
return HttpTransport(self.base)
74
return HttpTransport(self.abspath(offset))
106
76
def abspath(self, relpath):
107
77
"""Return the full url to the given relative path.
109
This can be supplied with a string or a list.
111
The URL returned always has the protocol scheme originally used to
112
construct the transport, even if that includes an explicit
113
implementation qualifier.
78
This can be supplied with a string or a list
115
assert isinstance(relpath, basestring)
116
80
if isinstance(relpath, basestring):
117
relpath_parts = relpath.split('/')
119
# TODO: Don't call this with an array - no magic interfaces
120
relpath_parts = relpath[:]
121
if len(relpath_parts) > 1:
122
if relpath_parts[0] == '':
123
raise ValueError("path %r within branch %r seems to be absolute"
124
% (relpath, self._path))
125
if relpath_parts[-1] == '':
126
raise ValueError("path %r within branch %r seems to be a directory"
127
% (relpath, self._path))
128
82
basepath = self._path.split('/')
129
83
if len(basepath) > 0 and basepath[-1] == '':
130
84
basepath = basepath[:-1]
131
for p in relpath_parts:
133
if len(basepath) == 0:
134
89
# In most filesystems, a request for the parent
135
90
# of root, just returns root.
138
elif p == '.' or p == '':
141
96
basepath.append(p)
142
98
# Possibly, we could use urlparse.urljoin() here, but
143
99
# I'm concerned about when it chooses to strip the last
144
100
# portion of the path, and when it doesn't.
145
101
path = '/'.join(basepath)
148
result = urlparse.urlunparse((self._qualified_proto,
149
self._host, path, '', '', ''))
102
return urlparse.urlunparse((self._proto,
103
self._host, path, '', '', ''))
152
def _real_abspath(self, relpath):
153
"""Produce absolute path, adjusting protocol if needed"""
154
abspath = self.abspath(relpath)
155
qp = self._qualified_proto
157
if self._qualified_proto != self._proto:
158
abspath = rp + abspath[len(qp):]
159
if not isinstance(abspath, str):
160
# escaping must be done at a higher level
161
abspath = abspath.encode('ascii')
105
def relpath(self, abspath):
106
if not abspath.startswith(self.base):
107
raise NonRelativePath('path %r is not under base URL %r'
108
% (abspath, self.base))
110
return abspath[pl:].lstrip('/')
164
112
def has(self, relpath):
165
raise NotImplementedError("has() is abstract on %r" % self)
167
def get(self, relpath):
113
"""Does the target location exist?
115
TODO: HttpTransport.has() should use a HEAD request,
116
not a full GET request.
118
TODO: This should be changed so that we don't use
119
urllib2 and get an exception, the code path would be
120
cleaner if we just do an http HEAD request, and parse
124
f = get_url(self.abspath(relpath))
125
# Without the read and then close()
126
# we tend to have busy sockets.
132
except urllib2.URLError:
135
if e.errno == errno.ENOENT:
137
raise HttpTransportError(orig_error=e)
139
def get(self, relpath, decode=False):
168
140
"""Get the file at the given relative path.
170
142
:param relpath: The relative path to the file
172
code, response_file = self._get(relpath, None)
175
def _get(self, relpath, ranges):
176
"""Get a file, or part of a file.
178
:param relpath: Path relative to transport base URL
179
:param byte_range: None to get the whole file;
180
or [(start,end)] to fetch parts of a file.
182
:returns: (http_code, result_file)
184
Note that the current http implementations can only fetch one range at
185
a time through this call.
187
raise NotImplementedError(self._get)
189
def readv(self, relpath, offsets):
190
"""Get parts of the file at the given relative path.
192
:param offsets: A list of (offset, size) tuples.
193
:param return: A list or generator of (offset, data) tuples
195
# Ideally we would pass one big request asking for all the ranges in
196
# one go; however then the server will give a multipart mime response
197
# back, and we can't parse them yet. So instead we just get one range
198
# per region, and try to coallesce the regions as much as possible.
200
# The read-coallescing code is not quite regular enough to have a
201
# single driver routine and
202
# helper method in Transport.
203
def do_combined_read(combined_offsets):
204
# read one coalesced block
206
for offset, size in combined_offsets:
208
mutter('readv coalesced %d reads.', len(combined_offsets))
209
offset = combined_offsets[0][0]
210
byte_range = (offset, offset + total_size - 1)
211
code, result_file = self._get(relpath, [byte_range])
213
for off, size in combined_offsets:
214
result_bytes = result_file.read(size)
215
assert len(result_bytes) == size
216
yield off, result_bytes
218
data = result_file.read(offset + total_size)[offset:offset + total_size]
220
for offset, size in combined_offsets:
221
yield offset, data[pos:pos + size]
226
pending_offsets = deque(offsets)
227
combined_offsets = []
228
while len(pending_offsets):
229
offset, size = pending_offsets.popleft()
230
if not combined_offsets:
231
combined_offsets = [[offset, size]]
233
if (len (combined_offsets) < 500 and
234
combined_offsets[-1][0] + combined_offsets[-1][1] == offset):
236
combined_offsets.append([offset, size])
238
# incompatible, or over the threshold issue a read and yield
239
pending_offsets.appendleft((offset, size))
240
for result in do_combined_read(combined_offsets):
242
combined_offsets = []
243
# whatever is left is a single coalesced request
244
if len(combined_offsets):
245
for result in do_combined_read(combined_offsets):
248
def put(self, relpath, f, mode=None):
145
return get_url(self.abspath(relpath))
146
except (BzrError, urllib2.URLError, IOError), e:
147
raise NoSuchFile(orig_error=e)
149
raise HttpTransportError(orig_error=e)
151
def get_partial(self, relpath, start, length=None):
152
"""Get just part of a file.
154
:param relpath: Path to the file, relative to base
155
:param start: The starting position to read from
156
:param length: The length to read. A length of None indicates
157
read to the end of the file.
158
:return: A file-like object containing at least the specified bytes.
159
Some implementations may return objects which can be read
160
past this length, but this is not guaranteed.
162
# TODO: You can make specialized http requests for just
163
# a portion of the file. Figure out how to do that.
164
# For now, urllib2 returns files that cannot seek() so
165
# we just read bytes off the beginning, until we
166
# get to the point that we care about.
167
f = self.get(relpath)
168
# TODO: read in smaller chunks, in case things are
169
# buffered internally.
173
def put(self, relpath, f):
249
174
"""Copy the file-like or string object into the location.
251
176
:param relpath: Location to put the contents, relative to base.
331
250
raise TransportNotPossible('http does not support lock_write()')
333
def clone(self, offset=None):
334
"""Return a new HttpTransportBase with root at self.base + offset
335
For now HttpTransportBase does not actually connect, so just return
336
a new HttpTransportBase object.
339
return self.__class__(self.base)
341
return self.__class__(self.abspath(offset))
343
#---------------- test server facilities ----------------
344
# TODO: load these only when running tests
347
class WebserverNotAvailable(Exception):
351
class BadWebserverPath(ValueError):
353
return 'path %s is not in %s' % self.args
356
class TestingHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
358
def log_message(self, format, *args):
359
self.server.test_case.log('webserver - %s - - [%s] %s "%s" "%s"',
360
self.address_string(),
361
self.log_date_time_string(),
363
self.headers.get('referer', '-'),
364
self.headers.get('user-agent', '-'))
366
def handle_one_request(self):
367
"""Handle a single HTTP request.
369
You normally don't need to override this method; see the class
370
__doc__ string for information on how to handle specific HTTP
371
commands such as GET and POST.
374
for i in xrange(1,11): # Don't try more than 10 times
376
self.raw_requestline = self.rfile.readline()
377
except socket.error, e:
378
if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):
379
# omitted for now because some tests look at the log of
380
# the server and expect to see no errors. see recent
381
# email thread. -- mbp 20051021.
382
## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)
388
if not self.raw_requestline:
389
self.close_connection = 1
391
if not self.parse_request(): # An error code has been sent, just exit
393
mname = 'do_' + self.command
394
if not hasattr(self, mname):
395
self.send_error(501, "Unsupported method (%r)" % self.command)
397
method = getattr(self, mname)
401
class TestingHTTPServer(BaseHTTPServer.HTTPServer):
402
def __init__(self, server_address, RequestHandlerClass, test_case):
403
BaseHTTPServer.HTTPServer.__init__(self, server_address,
405
self.test_case = test_case
407
class HttpServer(Server):
408
"""A test server for http transports."""
410
# used to form the url that connects to this server
411
_url_protocol = 'http'
413
def _http_start(self):
415
httpd = TestingHTTPServer(('localhost', 0),
416
TestingHTTPRequestHandler,
418
host, port = httpd.socket.getsockname()
419
self._http_base_url = '%s://localhost:%s/' % (self._url_protocol, port)
420
self._http_starting.release()
421
httpd.socket.settimeout(0.1)
423
while self._http_running:
425
httpd.handle_request()
426
except socket.timeout:
429
def _get_remote_url(self, path):
430
path_parts = path.split(os.path.sep)
431
if os.path.isabs(path):
432
if path_parts[:len(self._local_path_parts)] != \
433
self._local_path_parts:
434
raise BadWebserverPath(path, self.test_dir)
435
remote_path = '/'.join(path_parts[len(self._local_path_parts):])
437
remote_path = '/'.join(path_parts)
439
self._http_starting.acquire()
440
self._http_starting.release()
441
return self._http_base_url + remote_path
443
def log(self, format, *args):
444
"""Capture Server log output."""
445
self.logs.append(format % args)
448
"""See bzrlib.transport.Server.setUp."""
449
self._home_dir = os.getcwdu()
450
self._local_path_parts = self._home_dir.split(os.path.sep)
451
self._http_starting = threading.Lock()
452
self._http_starting.acquire()
453
self._http_running = True
454
self._http_base_url = None
455
self._http_thread = threading.Thread(target=self._http_start)
456
self._http_thread.setDaemon(True)
457
self._http_thread.start()
458
self._http_proxy = os.environ.get("http_proxy")
459
if self._http_proxy is not None:
460
del os.environ["http_proxy"]
464
"""See bzrlib.transport.Server.tearDown."""
465
self._http_running = False
466
self._http_thread.join()
467
if self._http_proxy is not None:
469
os.environ["http_proxy"] = self._http_proxy
472
"""See bzrlib.transport.Server.get_url."""
473
return self._get_remote_url(self._home_dir)
475
def get_bogus_url(self):
476
"""See bzrlib.transport.Server.get_bogus_url."""
477
# this is chosen to try to prevent trouble with proxies, wierd dns,
479
return 'http://127.0.0.1:1/'
252
register_transport('http://', HttpTransport)
253
register_transport('https://', HttpTransport)