1
# Copyright (C) 2005, 2006 Canonical Ltd
1
# Copyright (C) 2005 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
5
5
# the Free Software Foundation; either version 2 of the License, or
6
6
# (at your option) any later version.
8
8
# This program is distributed in the hope that it will be useful,
9
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
11
# GNU General Public License for more details.
13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Base implementation of Transport over http.
19
There are separate implementation modules for each http client implementation.
16
"""Implementation of Transport over http.
24
from collections import deque
19
from bzrlib.transport import Transport, register_transport
20
from bzrlib.errors import (TransportNotPossible, NoSuchFile,
21
TransportError, ConnectionError)
25
23
from cStringIO import StringIO
24
import urllib, urllib2
29
from warnings import warn
31
from bzrlib.transport import Transport, register_transport, Server
32
from bzrlib.errors import (TransportNotPossible, NoSuchFile,
33
TransportError, ConnectionError, InvalidURL)
27
from bzrlib.errors import BzrError, BzrCheckError
34
28
from bzrlib.branch import Branch
35
29
from bzrlib.trace import mutter
36
# TODO: load these only when running http tests
37
import BaseHTTPServer, SimpleHTTPServer, socket, time
39
from bzrlib.ui import ui_factory
42
32
def extract_auth(url, password_manager):
43
"""Extract auth parameters from am HTTP/HTTPS url and add them to the given
34
Extract auth parameters from am HTTP/HTTPS url and add them to the given
44
35
password manager. Return the url, minus those auth parameters (which
47
assert re.match(r'^(https?)(\+\w+)?://', url), \
48
'invalid absolute url %r' % url
49
scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
52
auth, netloc = netloc.split('@', 1)
38
assert url.startswith('http://') or url.startswith('https://')
39
scheme, host = url.split('//', 1)
41
host, path = host.split('/', 1)
47
auth, host = host.split('@', 1)
54
49
username, password = auth.split(':', 1)
56
51
username, password = auth, None
58
host = netloc.split(':', 1)[0]
61
username = urllib.unquote(username)
53
host, port = host.split(':', 1)
55
# FIXME: if password isn't given, should we ask for it?
62
56
if password is not None:
57
username = urllib.unquote(username)
63
58
password = urllib.unquote(password)
65
password = ui_factory.get_password(prompt='HTTP %(user)@%(host) password',
66
user=username, host=host)
67
password_manager.add_password(None, host, username, password)
68
url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))
59
password_manager.add_password(None, host, username, password)
60
url = scheme + '//' + host + port + path
72
class HttpTransportBase(Transport):
73
"""Base class for http implementations.
75
Does URL parsing, etc, but not any network IO.
77
The protocol can be given as e.g. http+urllib://host/ to use a particular
65
mutter("get_url %s" % url)
66
manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
67
url = extract_auth(url, manager)
68
auth_handler = urllib2.HTTPBasicAuthHandler(manager)
69
opener = urllib2.build_opener(auth_handler)
70
url_f = opener.open(url)
73
class HttpTransport(Transport):
74
"""This is the transport agent for http:// access.
76
TODO: Implement pipelined versions of all of the *_multi() functions.
81
# _proto: "http" or "https"
82
# _qualified_proto: may have "+pycurl", etc
84
79
def __init__(self, base):
85
80
"""Set the base path where files will be stored."""
86
proto_match = re.match(r'^(https?)(\+\w+)?://', base)
88
raise AssertionError("not a http url: %r" % base)
89
self._proto = proto_match.group(1)
90
impl_name = proto_match.group(2)
92
impl_name = impl_name[1:]
93
self._impl_name = impl_name
96
super(HttpTransportBase, self).__init__(base)
81
assert base.startswith('http://') or base.startswith('https://')
82
super(HttpTransport, self).__init__(base)
97
83
# In the future we might actually connect to the remote host
98
84
# rather than using get_url
99
85
# self._connection = None
100
(apparent_proto, self._host,
86
(self._proto, self._host,
101
87
self._path, self._parameters,
102
88
self._query, self._fragment) = urlparse.urlparse(self.base)
103
self._qualified_proto = apparent_proto
90
def should_cache(self):
91
"""Return True if the data pulled across should be cached locally.
95
def clone(self, offset=None):
96
"""Return a new HttpTransport with root at self.base + offset
97
For now HttpTransport does not actually connect, so just return
98
a new HttpTransport object.
101
return HttpTransport(self.base)
103
return HttpTransport(self.abspath(offset))
105
105
def abspath(self, relpath):
106
106
"""Return the full url to the given relative path.
108
This can be supplied with a string or a list.
110
The URL returned always has the protocol scheme originally used to
111
construct the transport, even if that includes an explicit
112
implementation qualifier.
107
This can be supplied with a string or a list
114
109
assert isinstance(relpath, basestring)
115
if isinstance(relpath, unicode):
116
raise InvalidURL(relpath, 'paths must not be unicode.')
117
110
if isinstance(relpath, basestring):
118
111
relpath_parts = relpath.split('/')
144
137
# I'm concerned about when it chooses to strip the last
145
138
# portion of the path, and when it doesn't.
146
139
path = '/'.join(basepath)
149
result = urlparse.urlunparse((self._qualified_proto,
150
self._host, path, '', '', ''))
153
def _real_abspath(self, relpath):
154
"""Produce absolute path, adjusting protocol if needed"""
155
abspath = self.abspath(relpath)
156
qp = self._qualified_proto
158
if self._qualified_proto != self._proto:
159
abspath = rp + abspath[len(qp):]
160
if not isinstance(abspath, str):
161
# escaping must be done at a higher level
162
abspath = abspath.encode('ascii')
140
return urlparse.urlunparse((self._proto,
141
self._host, path, '', '', ''))
165
143
def has(self, relpath):
166
raise NotImplementedError("has() is abstract on %r" % self)
168
def get(self, relpath):
144
"""Does the target location exist?
146
TODO: HttpTransport.has() should use a HEAD request,
147
not a full GET request.
149
TODO: This should be changed so that we don't use
150
urllib2 and get an exception, the code path would be
151
cleaner if we just do an http HEAD request, and parse
156
path = self.abspath(relpath)
158
# Without the read and then close()
159
# we tend to have busy sockets.
163
except urllib2.URLError, e:
164
mutter('url error code: %s for has url: %r', e.code, path)
169
mutter('io error: %s %s for has url: %r',
170
e.errno, errno.errorcode.get(e.errno), path)
171
if e.errno == errno.ENOENT:
173
raise TransportError(orig_error=e)
175
def get(self, relpath, decode=False):
169
176
"""Get the file at the given relative path.
171
178
:param relpath: The relative path to the file
173
code, response_file = self._get(relpath, None)
176
def _get(self, relpath, ranges):
177
"""Get a file, or part of a file.
179
:param relpath: Path relative to transport base URL
180
:param byte_range: None to get the whole file;
181
or [(start,end)] to fetch parts of a file.
183
:returns: (http_code, result_file)
185
Note that the current http implementations can only fetch one range at
186
a time through this call.
188
raise NotImplementedError(self._get)
190
def readv(self, relpath, offsets):
191
"""Get parts of the file at the given relative path.
193
:param offsets: A list of (offset, size) tuples.
194
:param return: A list or generator of (offset, data) tuples
196
# Ideally we would pass one big request asking for all the ranges in
197
# one go; however then the server will give a multipart mime response
198
# back, and we can't parse them yet. So instead we just get one range
199
# per region, and try to coallesce the regions as much as possible.
201
# The read-coallescing code is not quite regular enough to have a
202
# single driver routine and
203
# helper method in Transport.
204
def do_combined_read(combined_offsets):
205
# read one coalesced block
207
for offset, size in combined_offsets:
209
mutter('readv coalesced %d reads.', len(combined_offsets))
210
offset = combined_offsets[0][0]
211
byte_range = (offset, offset + total_size - 1)
212
code, result_file = self._get(relpath, [byte_range])
214
for off, size in combined_offsets:
215
result_bytes = result_file.read(size)
216
assert len(result_bytes) == size
217
yield off, result_bytes
219
data = result_file.read(offset + total_size)[offset:offset + total_size]
221
for offset, size in combined_offsets:
222
yield offset, data[pos:pos + size]
227
pending_offsets = deque(offsets)
228
combined_offsets = []
229
while len(pending_offsets):
230
offset, size = pending_offsets.popleft()
231
if not combined_offsets:
232
combined_offsets = [[offset, size]]
234
if (len (combined_offsets) < 500 and
235
combined_offsets[-1][0] + combined_offsets[-1][1] == offset):
237
combined_offsets.append([offset, size])
239
# incompatible, or over the threshold issue a read and yield
240
pending_offsets.appendleft((offset, size))
241
for result in do_combined_read(combined_offsets):
243
combined_offsets = []
244
# whatever is left is a single coalesced request
245
if len(combined_offsets):
246
for result in do_combined_read(combined_offsets):
249
def put(self, relpath, f, mode=None):
182
path = self.abspath(relpath)
184
except urllib2.HTTPError, e:
185
mutter('url error code: %s for has url: %r', e.code, path)
187
raise NoSuchFile(path, extra=e)
189
except (BzrError, IOError), e:
190
if hasattr(e, 'errno'):
191
mutter('io error: %s %s for has url: %r',
192
e.errno, errno.errorcode.get(e.errno), path)
193
if e.errno == errno.ENOENT:
194
raise NoSuchFile(path, extra=e)
195
raise ConnectionError(msg = "Error retrieving %s: %s"
196
% (self.abspath(relpath), str(e)),
199
def put(self, relpath, f):
250
200
"""Copy the file-like or string object into the location.
252
202
:param relpath: Location to put the contents, relative to base.
330
271
:return: A lock object, which should be passed to Transport.unlock()
332
273
raise TransportNotPossible('http does not support lock_write()')
334
def clone(self, offset=None):
335
"""Return a new HttpTransportBase with root at self.base + offset
336
For now HttpTransportBase does not actually connect, so just return
337
a new HttpTransportBase object.
340
return self.__class__(self.base)
342
return self.__class__(self.abspath(offset))
344
#---------------- test server facilities ----------------
345
# TODO: load these only when running tests
348
class WebserverNotAvailable(Exception):
352
class BadWebserverPath(ValueError):
354
return 'path %s is not in %s' % self.args
357
class TestingHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
359
def log_message(self, format, *args):
360
self.server.test_case.log('webserver - %s - - [%s] %s "%s" "%s"',
361
self.address_string(),
362
self.log_date_time_string(),
364
self.headers.get('referer', '-'),
365
self.headers.get('user-agent', '-'))
367
def handle_one_request(self):
368
"""Handle a single HTTP request.
370
You normally don't need to override this method; see the class
371
__doc__ string for information on how to handle specific HTTP
372
commands such as GET and POST.
375
for i in xrange(1,11): # Don't try more than 10 times
377
self.raw_requestline = self.rfile.readline()
378
except socket.error, e:
379
if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):
380
# omitted for now because some tests look at the log of
381
# the server and expect to see no errors. see recent
382
# email thread. -- mbp 20051021.
383
## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)
389
if not self.raw_requestline:
390
self.close_connection = 1
392
if not self.parse_request(): # An error code has been sent, just exit
394
mname = 'do_' + self.command
395
if not hasattr(self, mname):
396
self.send_error(501, "Unsupported method (%r)" % self.command)
398
method = getattr(self, mname)
402
class TestingHTTPServer(BaseHTTPServer.HTTPServer):
403
def __init__(self, server_address, RequestHandlerClass, test_case):
404
BaseHTTPServer.HTTPServer.__init__(self, server_address,
406
self.test_case = test_case
408
class HttpServer(Server):
409
"""A test server for http transports."""
411
# used to form the url that connects to this server
412
_url_protocol = 'http'
414
def _http_start(self):
416
httpd = TestingHTTPServer(('localhost', 0),
417
TestingHTTPRequestHandler,
419
host, port = httpd.socket.getsockname()
420
self._http_base_url = '%s://localhost:%s/' % (self._url_protocol, port)
421
self._http_starting.release()
422
httpd.socket.settimeout(0.1)
424
while self._http_running:
426
httpd.handle_request()
427
except socket.timeout:
430
def _get_remote_url(self, path):
431
path_parts = path.split(os.path.sep)
432
if os.path.isabs(path):
433
if path_parts[:len(self._local_path_parts)] != \
434
self._local_path_parts:
435
raise BadWebserverPath(path, self.test_dir)
436
remote_path = '/'.join(path_parts[len(self._local_path_parts):])
438
remote_path = '/'.join(path_parts)
440
self._http_starting.acquire()
441
self._http_starting.release()
442
return self._http_base_url + remote_path
444
def log(self, format, *args):
445
"""Capture Server log output."""
446
self.logs.append(format % args)
449
"""See bzrlib.transport.Server.setUp."""
450
self._home_dir = os.getcwdu()
451
self._local_path_parts = self._home_dir.split(os.path.sep)
452
self._http_starting = threading.Lock()
453
self._http_starting.acquire()
454
self._http_running = True
455
self._http_base_url = None
456
self._http_thread = threading.Thread(target=self._http_start)
457
self._http_thread.setDaemon(True)
458
self._http_thread.start()
459
self._http_proxy = os.environ.get("http_proxy")
460
if self._http_proxy is not None:
461
del os.environ["http_proxy"]
465
"""See bzrlib.transport.Server.tearDown."""
466
self._http_running = False
467
self._http_thread.join()
468
if self._http_proxy is not None:
470
os.environ["http_proxy"] = self._http_proxy
473
"""See bzrlib.transport.Server.get_url."""
474
return self._get_remote_url(self._home_dir)
476
def get_bogus_url(self):
477
"""See bzrlib.transport.Server.get_bogus_url."""
478
# this is chosen to try to prevent trouble with proxies, wierd dns,
480
return 'http://127.0.0.1:1/'