1
# Copyright (C) 2005, 2006 Canonical Ltd
1
# Copyright (C) 2005 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
5
5
# the Free Software Foundation; either version 2 of the License, or
6
6
# (at your option) any later version.
8
8
# This program is distributed in the hope that it will be useful,
9
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
11
# GNU General Public License for more details.
13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Base implementation of Transport over http.
19
There are separate implementation modules for each http client implementation.
16
"""Implementation of Transport over http.
22
20
from cStringIO import StringIO
21
import urllib, urllib2
31
23
from warnings import warn
33
# TODO: load these only when running http tests
34
import BaseHTTPServer, SimpleHTTPServer, socket, time
37
from bzrlib import errors
38
from bzrlib.errors import (TransportNotPossible, NoSuchFile,
39
TransportError, ConnectionError, InvalidURL)
25
from bzrlib.transport import Transport, Server
26
from bzrlib.errors import (TransportNotPossible, NoSuchFile,
27
TransportError, ConnectionError)
28
from bzrlib.errors import BzrError, BzrCheckError
40
29
from bzrlib.branch import Branch
41
30
from bzrlib.trace import mutter
42
from bzrlib.transport import Transport, register_transport, Server
43
from bzrlib.transport.http.response import (HttpMultipartRangeResponse,
45
from bzrlib.ui import ui_factory
48
33
def extract_auth(url, password_manager):
49
"""Extract auth parameters from am HTTP/HTTPS url and add them to the given
35
Extract auth parameters from am HTTP/HTTPS url and add them to the given
50
36
password manager. Return the url, minus those auth parameters (which
53
assert re.match(r'^(https?)(\+\w+)?://', url), \
54
'invalid absolute url %r' % url
55
scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
58
auth, netloc = netloc.split('@', 1)
39
assert url.startswith('http://') or url.startswith('https://')
40
scheme, host = url.split('//', 1)
42
host, path = host.split('/', 1)
48
auth, host = host.split('@', 1)
60
50
username, password = auth.split(':', 1)
62
52
username, password = auth, None
64
host = netloc.split(':', 1)[0]
67
username = urllib.unquote(username)
54
host, port = host.split(':', 1)
56
# FIXME: if password isn't given, should we ask for it?
68
57
if password is not None:
58
username = urllib.unquote(username)
69
59
password = urllib.unquote(password)
71
password = ui_factory.get_password(prompt='HTTP %(user)@%(host) password',
72
user=username, host=host)
73
password_manager.add_password(None, host, username, password)
74
url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))
60
password_manager.add_password(None, host, username, password)
61
url = scheme + '//' + host + port + path
78
def _extract_headers(header_text, url):
79
"""Extract the mapping for an rfc2822 header
81
This is a helper function for the test suite and for _pycurl.
82
(urllib already parses the headers for us)
84
In the case that there are multiple headers inside the file,
85
the last one is returned.
87
:param header_text: A string of header information.
88
This expects that the first line of a header will always be HTTP ...
89
:param url: The url we are parsing, so we can raise nice errors
90
:return: mimetools.Message object, which basically acts like a case
91
insensitive dictionary.
94
remaining = header_text
97
raise errors.InvalidHttpResponse(url, 'Empty headers')
100
header_file = StringIO(remaining)
101
first_line = header_file.readline()
102
if not first_line.startswith('HTTP'):
103
if first_header: # The first header *must* start with HTTP
104
raise errors.InvalidHttpResponse(url,
105
'Opening header line did not start with HTTP: %s'
107
assert False, 'Opening header line was not HTTP'
109
break # We are done parsing
111
m = mimetools.Message(header_file)
113
# mimetools.Message parses the first header up to a blank line
114
# So while there is remaining data, it probably means there is
115
# another header to be parsed.
116
# Get rid of any preceeding whitespace, which if it is all whitespace
117
# will get rid of everything.
118
remaining = header_file.read().lstrip()
122
class HttpTransportBase(Transport):
123
"""Base class for http implementations.
125
Does URL parsing, etc, but not any network IO.
127
The protocol can be given as e.g. http+urllib://host/ to use a particular
131
# _proto: "http" or "https"
132
# _qualified_proto: may have "+pycurl", etc
66
mutter("get_url %s" % url)
67
manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
68
url = extract_auth(url, manager)
69
auth_handler = urllib2.HTTPBasicAuthHandler(manager)
70
opener = urllib2.build_opener(auth_handler)
71
url_f = opener.open(url)
74
class HttpTransport(Transport):
75
"""This is the transport agent for http:// access.
77
TODO: Implement pipelined versions of all of the *_multi() functions.
134
80
def __init__(self, base):
135
81
"""Set the base path where files will be stored."""
136
proto_match = re.match(r'^(https?)(\+\w+)?://', base)
138
raise AssertionError("not a http url: %r" % base)
139
self._proto = proto_match.group(1)
140
impl_name = proto_match.group(2)
142
impl_name = impl_name[1:]
143
self._impl_name = impl_name
82
assert base.startswith('http://') or base.startswith('https://')
144
83
if base[-1] != '/':
146
super(HttpTransportBase, self).__init__(base)
85
super(HttpTransport, self).__init__(base)
147
86
# In the future we might actually connect to the remote host
148
87
# rather than using get_url
149
88
# self._connection = None
150
(apparent_proto, self._host,
89
(self._proto, self._host,
151
90
self._path, self._parameters,
152
91
self._query, self._fragment) = urlparse.urlparse(self.base)
153
self._qualified_proto = apparent_proto
93
def should_cache(self):
94
"""Return True if the data pulled across should be cached locally.
98
def clone(self, offset=None):
99
"""Return a new HttpTransport with root at self.base + offset
100
For now HttpTransport does not actually connect, so just return
101
a new HttpTransport object.
104
return HttpTransport(self.base)
106
return HttpTransport(self.abspath(offset))
155
108
def abspath(self, relpath):
156
109
"""Return the full url to the given relative path.
158
This can be supplied with a string or a list.
160
The URL returned always has the protocol scheme originally used to
161
construct the transport, even if that includes an explicit
162
implementation qualifier.
110
This can be supplied with a string or a list
164
112
assert isinstance(relpath, basestring)
165
if isinstance(relpath, unicode):
166
raise InvalidURL(relpath, 'paths must not be unicode.')
167
113
if isinstance(relpath, basestring):
168
114
relpath_parts = relpath.split('/')
194
140
# I'm concerned about when it chooses to strip the last
195
141
# portion of the path, and when it doesn't.
196
142
path = '/'.join(basepath)
199
result = urlparse.urlunparse((self._qualified_proto,
200
self._host, path, '', '', ''))
203
def _real_abspath(self, relpath):
204
"""Produce absolute path, adjusting protocol if needed"""
205
abspath = self.abspath(relpath)
206
qp = self._qualified_proto
208
if self._qualified_proto != self._proto:
209
abspath = rp + abspath[len(qp):]
210
if not isinstance(abspath, str):
211
# escaping must be done at a higher level
212
abspath = abspath.encode('ascii')
143
return urlparse.urlunparse((self._proto,
144
self._host, path, '', '', ''))
215
146
def has(self, relpath):
216
raise NotImplementedError("has() is abstract on %r" % self)
218
def get(self, relpath):
147
"""Does the target location exist?
149
TODO: HttpTransport.has() should use a HEAD request,
150
not a full GET request.
152
TODO: This should be changed so that we don't use
153
urllib2 and get an exception, the code path would be
154
cleaner if we just do an http HEAD request, and parse
159
path = self.abspath(relpath)
161
# Without the read and then close()
162
# we tend to have busy sockets.
166
except urllib2.URLError, e:
167
mutter('url error code: %s for has url: %r', e.code, path)
172
mutter('io error: %s %s for has url: %r',
173
e.errno, errno.errorcode.get(e.errno), path)
174
if e.errno == errno.ENOENT:
176
raise TransportError(orig_error=e)
178
def get(self, relpath, decode=False):
219
179
"""Get the file at the given relative path.
221
181
:param relpath: The relative path to the file
223
code, response_file = self._get(relpath, None)
226
def _get(self, relpath, ranges):
227
"""Get a file, or part of a file.
229
:param relpath: Path relative to transport base URL
230
:param byte_range: None to get the whole file;
231
or [(start,end)] to fetch parts of a file.
233
:returns: (http_code, result_file)
235
Note that the current http implementations can only fetch one range at
236
a time through this call.
238
raise NotImplementedError(self._get)
240
def readv(self, relpath, offsets):
241
"""Get parts of the file at the given relative path.
243
:param offsets: A list of (offset, size) tuples.
244
:param return: A list or generator of (offset, data) tuples
246
ranges = self.offsets_to_ranges(offsets)
247
mutter('http readv of %s collapsed %s offsets => %s',
248
relpath, len(offsets), ranges)
249
code, f = self._get(relpath, ranges)
250
for start, size in offsets:
251
f.seek(start, (start < 0) and 2 or 0)
254
assert len(data) == size
258
def offsets_to_ranges(offsets):
259
"""Turn a list of offsets and sizes into a list of byte ranges.
261
:param offsets: A list of tuples of (start, size). An empty list
263
:return: a list of inclusive byte ranges (start, end)
264
Adjacent ranges will be combined.
266
# Make sure we process sorted offsets
267
offsets = sorted(offsets)
272
for start, size in offsets:
273
end = start + size - 1
275
combined.append([start, end])
276
elif start <= prev_end + 1:
277
combined[-1][1] = end
279
combined.append([start, end])
185
path = self.abspath(relpath)
187
except urllib2.HTTPError, e:
188
mutter('url error code: %s for has url: %r', e.code, path)
190
raise NoSuchFile(path, extra=e)
192
except (BzrError, IOError), e:
193
if hasattr(e, 'errno'):
194
mutter('io error: %s %s for has url: %r',
195
e.errno, errno.errorcode.get(e.errno), path)
196
if e.errno == errno.ENOENT:
197
raise NoSuchFile(path, extra=e)
198
raise ConnectionError(msg = "Error retrieving %s: %s"
199
% (self.abspath(relpath), str(e)),
284
202
def put(self, relpath, f, mode=None):
285
203
"""Copy the file-like or string object into the location.
452
340
method = getattr(self, mname)
455
if sys.platform == 'win32':
456
# On win32 you cannot access non-ascii filenames without
457
# decoding them into unicode first.
458
# However, under Linux, you can access bytestream paths
459
# without any problems. If this function was always active
460
# it would probably break tests when LANG=C was set
461
def translate_path(self, path):
462
"""Translate a /-separated PATH to the local filename syntax.
464
For bzr, all url paths are considered to be utf8 paths.
465
On Linux, you can access these paths directly over the bytestream
466
request, but on win32, you must decode them, and access them
469
# abandon query parameters
470
path = urlparse.urlparse(path)[2]
471
path = posixpath.normpath(urllib.unquote(path))
472
path = path.decode('utf-8')
473
words = path.split('/')
474
words = filter(None, words)
477
drive, word = os.path.splitdrive(word)
478
head, word = os.path.split(word)
479
if word in (os.curdir, os.pardir): continue
480
path = os.path.join(path, word)
484
343
class TestingHTTPServer(BaseHTTPServer.HTTPServer):
485
344
def __init__(self, server_address, RequestHandlerClass, test_case):
486
345
BaseHTTPServer.HTTPServer.__init__(self, server_address,