1
# Copyright (C) 2005, 2006 Canonical Ltd
1
# Copyright (C) 2005 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
5
5
# the Free Software Foundation; either version 2 of the License, or
6
6
# (at your option) any later version.
8
8
# This program is distributed in the hope that it will be useful,
9
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
11
# GNU General Public License for more details.
13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Base implementation of Transport over http.
19
There are separate implementation modules for each http client implementation.
16
"""Implementation of Transport over http.
19
from bzrlib.transport import Transport, register_transport
20
from bzrlib.errors import (TransportNotPossible, NoSuchFile,
21
NonRelativePath, TransportError, ConnectionError)
22
23
from cStringIO import StringIO
29
from bzrlib import errors, ui
30
from bzrlib.smart import medium
27
from bzrlib.errors import BzrError, BzrCheckError
28
from bzrlib.branch import Branch
31
29
from bzrlib.trace import mutter
32
from bzrlib.transport import (
37
# TODO: This is not used anymore by HttpTransport_urllib
38
# (extracting the auth info and prompting the user for a password
39
# have been split), only the tests still use it. It should be
40
# deleted and the tests rewritten ASAP to stay in sync.
41
def extract_auth(url, password_manager):
42
"""Extract auth parameters from am HTTP/HTTPS url and add them to the given
43
password manager. Return the url, minus those auth parameters (which
46
assert re.match(r'^(https?)(\+\w+)?://', url), \
47
'invalid absolute url %r' % url
48
scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
51
auth, netloc = netloc.split('@', 1)
53
username, password = auth.split(':', 1)
55
username, password = auth, None
57
host = netloc.split(':', 1)[0]
60
username = urllib.unquote(username)
61
if password is not None:
62
password = urllib.unquote(password)
64
password = ui.ui_factory.get_password(
65
prompt='HTTP %(user)s@%(host)s password',
66
user=username, host=host)
67
password_manager.add_password(None, host, username, password)
68
url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))
72
def _extract_headers(header_text, url):
73
"""Extract the mapping for an rfc2822 header
75
This is a helper function for the test suite and for _pycurl.
76
(urllib already parses the headers for us)
78
In the case that there are multiple headers inside the file,
79
the last one is returned.
81
:param header_text: A string of header information.
82
This expects that the first line of a header will always be HTTP ...
83
:param url: The url we are parsing, so we can raise nice errors
84
:return: mimetools.Message object, which basically acts like a case
85
insensitive dictionary.
88
remaining = header_text
91
raise errors.InvalidHttpResponse(url, 'Empty headers')
94
header_file = StringIO(remaining)
95
first_line = header_file.readline()
96
if not first_line.startswith('HTTP'):
97
if first_header: # The first header *must* start with HTTP
98
raise errors.InvalidHttpResponse(url,
99
'Opening header line did not start with HTTP: %s'
101
assert False, 'Opening header line was not HTTP'
103
break # We are done parsing
105
m = mimetools.Message(header_file)
107
# mimetools.Message parses the first header up to a blank line
108
# So while there is remaining data, it probably means there is
109
# another header to be parsed.
110
# Get rid of any preceeding whitespace, which if it is all whitespace
111
# will get rid of everything.
112
remaining = header_file.read().lstrip()
116
class HttpTransportBase(Transport, medium.SmartClientMedium):
117
"""Base class for http implementations.
119
Does URL parsing, etc, but not any network IO.
121
The protocol can be given as e.g. http+urllib://host/ to use a particular
125
# _proto: "http" or "https"
126
# _qualified_proto: may have "+pycurl", etc
128
def __init__(self, base, from_transport=None):
34
mutter("get_url %s", url)
35
url_f = urllib2.urlopen(url)
38
class HttpTransportError(TransportError):
41
class HttpTransport(Transport):
42
"""This is the transport agent for http:// access.
44
TODO: Implement pipelined versions of all of the *_multi() functions.
47
def __init__(self, base):
129
48
"""Set the base path where files will be stored."""
130
proto_match = re.match(r'^(https?)(\+\w+)?://', base)
132
raise AssertionError("not a http url: %r" % base)
133
self._proto = proto_match.group(1)
134
impl_name = proto_match.group(2)
136
impl_name = impl_name[1:]
137
self._impl_name = impl_name
140
super(HttpTransportBase, self).__init__(base)
141
(apparent_proto, self._host,
49
assert base.startswith('http://') or base.startswith('https://')
50
super(HttpTransport, self).__init__(base)
51
# In the future we might actually connect to the remote host
52
# rather than using get_url
53
# self._connection = None
54
(self._proto, self._host,
142
55
self._path, self._parameters,
143
56
self._query, self._fragment) = urlparse.urlparse(self.base)
144
self._qualified_proto = apparent_proto
145
# range hint is handled dynamically throughout the life
146
# of the transport object. We start by trying multi-range
147
# requests and if the server returns bogus results, we
148
# retry with single range requests and, finally, we
149
# forget about range if the server really can't
150
# understand. Once acquired, this piece of info is
151
# propagated to clones.
152
if from_transport is not None:
153
self._range_hint = from_transport._range_hint
58
def should_cache(self):
59
"""Return True if the data pulled across should be cached locally.
63
def clone(self, offset=None):
64
"""Return a new HttpTransport with root at self.base + offset
65
For now HttpTransport does not actually connect, so just return
66
a new HttpTransport object.
69
return HttpTransport(self.base)
155
self._range_hint = 'multi'
71
return HttpTransport(self.abspath(offset))
157
73
def abspath(self, relpath):
158
74
"""Return the full url to the given relative path.
160
This can be supplied with a string or a list.
162
The URL returned always has the protocol scheme originally used to
163
construct the transport, even if that includes an explicit
164
implementation qualifier.
75
This can be supplied with a string or a list
166
77
assert isinstance(relpath, basestring)
167
if isinstance(relpath, unicode):
168
raise errors.InvalidURL(relpath, 'paths must not be unicode.')
169
78
if isinstance(relpath, basestring):
170
79
relpath_parts = relpath.split('/')
172
81
# TODO: Don't call this with an array - no magic interfaces
173
82
relpath_parts = relpath[:]
174
if relpath.startswith('/'):
177
# Except for the root, no trailing slashes are allowed
178
if len(relpath_parts) > 1 and relpath_parts[-1] == '':
180
"path %r within branch %r seems to be a directory"
181
% (relpath, self._path))
182
basepath = self._path.split('/')
183
if len(basepath) > 0 and basepath[-1] == '':
184
basepath = basepath[:-1]
83
if len(relpath_parts) > 1:
84
if relpath_parts[0] == '':
85
raise ValueError("path %r within branch %r seems to be absolute"
86
% (relpath, self._path))
87
if relpath_parts[-1] == '':
88
raise ValueError("path %r within branch %r seems to be a directory"
89
% (relpath, self._path))
90
basepath = self._path.split('/')
91
if len(basepath) > 0 and basepath[-1] == '':
92
basepath = basepath[:-1]
186
93
for p in relpath_parts:
188
95
if len(basepath) == 0:
198
105
# I'm concerned about when it chooses to strip the last
199
106
# portion of the path, and when it doesn't.
200
107
path = '/'.join(basepath)
203
result = urlparse.urlunparse((self._qualified_proto,
204
self._host, path, '', '', ''))
207
def _real_abspath(self, relpath):
208
"""Produce absolute path, adjusting protocol if needed"""
209
abspath = self.abspath(relpath)
210
qp = self._qualified_proto
212
if self._qualified_proto != self._proto:
213
abspath = rp + abspath[len(qp):]
214
if not isinstance(abspath, str):
215
# escaping must be done at a higher level
216
abspath = abspath.encode('ascii')
108
return urlparse.urlunparse((self._proto,
109
self._host, path, '', '', ''))
219
111
def has(self, relpath):
220
raise NotImplementedError("has() is abstract on %r" % self)
222
def get(self, relpath):
112
"""Does the target location exist?
114
TODO: HttpTransport.has() should use a HEAD request,
115
not a full GET request.
117
TODO: This should be changed so that we don't use
118
urllib2 and get an exception, the code path would be
119
cleaner if we just do an http HEAD request, and parse
123
f = get_url(self.abspath(relpath))
124
# Without the read and then close()
125
# we tend to have busy sockets.
129
except urllib2.URLError, e:
134
if e.errno == errno.ENOENT:
136
raise HttpTransportError(orig_error=e)
138
def get(self, relpath, decode=False):
223
139
"""Get the file at the given relative path.
225
141
:param relpath: The relative path to the file
227
code, response_file = self._get(relpath, None)
230
def _get(self, relpath, ranges, tail_amount=0):
231
"""Get a file, or part of a file.
233
:param relpath: Path relative to transport base URL
234
:param ranges: None to get the whole file;
235
or [(start,end)+], a list of tuples to fetch parts of a file.
236
:param tail_amount: The amount to get from the end of the file.
238
:returns: (http_code, result_file)
240
raise NotImplementedError(self._get)
242
def get_request(self):
243
return SmartClientHTTPMediumRequest(self)
245
def get_smart_medium(self):
246
"""See Transport.get_smart_medium.
248
HttpTransportBase directly implements the minimal interface of
249
SmartMediumClient, so this returns self.
253
def _retry_get(self, relpath, ranges, exc_info):
254
"""A GET request have failed, let's retry with a simpler request."""
257
# The server does not gives us enough data or
258
# a bogus-looking result, let's try again with
259
# a simpler request if possible.
260
if self._range_hint == 'multi':
261
self._range_hint = 'single'
262
mutter('Retry %s with single range request' % relpath)
264
elif self._range_hint == 'single':
265
self._range_hint = None
266
mutter('Retry %s without ranges' % relpath)
269
# Note that since the offsets and the ranges may not
270
# be in the same order, we don't try to calculate a
271
# restricted single range encompassing unprocessed
273
code, f = self._get(relpath, ranges)
274
return try_again, code, f
276
# We tried all the tricks, but nothing worked. We
277
# re-raise original exception; the 'mutter' calls
278
# above will indicate that further tries were
280
raise exc_info[0], exc_info[1], exc_info[2]
282
def readv(self, relpath, offsets):
283
"""Get parts of the file at the given relative path.
285
:param offsets: A list of (offset, size) tuples.
286
:param return: A list or generator of (offset, data) tuples
288
ranges = self.offsets_to_ranges(offsets)
289
mutter('http readv of %s collapsed %s offsets => %s',
290
relpath, len(offsets), ranges)
296
code, f = self._get(relpath, ranges)
297
except (errors.InvalidRange, errors.ShortReadvError), e:
298
try_again, code, f = self._retry_get(relpath, ranges,
301
for start, size in offsets:
305
f.seek(start, (start < 0) and 2 or 0)
309
if len(data) != size:
310
raise errors.ShortReadvError(relpath, start, size,
312
except (errors.InvalidRange, errors.ShortReadvError), e:
313
# Note that we replace 'f' here and that it
314
# may need cleaning one day before being
316
try_again, code, f = self._retry_get(relpath, ranges,
318
# After one or more tries, we get the data.
322
def offsets_to_ranges(offsets):
323
"""Turn a list of offsets and sizes into a list of byte ranges.
325
:param offsets: A list of tuples of (start, size). An empty list
327
:return: a list of inclusive byte ranges (start, end)
328
Adjacent ranges will be combined.
330
# Make sure we process sorted offsets
331
offsets = sorted(offsets)
336
for start, size in offsets:
337
end = start + size - 1
339
combined.append([start, end])
340
elif start <= prev_end + 1:
341
combined[-1][1] = end
343
combined.append([start, end])
348
def _post(self, body_bytes):
349
"""POST body_bytes to .bzr/smart on this transport.
351
:returns: (response code, response body file-like object).
353
# TODO: Requiring all the body_bytes to be available at the beginning of
354
# the POST may require large client buffers. It would be nice to have
355
# an interface that allows streaming via POST when possible (and
356
# degrades to a local buffer when not).
357
raise NotImplementedError(self._post)
359
def put_file(self, relpath, f, mode=None):
360
"""Copy the file-like object into the location.
144
return get_url(self.abspath(relpath))
145
except urllib2.HTTPError, e:
147
raise NoSuchFile(msg = "Error retrieving %s: %s"
148
% (self.abspath(relpath), str(e)),
151
except (BzrError, IOError), e:
152
raise ConnectionError(msg = "Error retrieving %s: %s"
153
% (self.abspath(relpath), str(e)),
156
def put(self, relpath, f):
157
"""Copy the file-like or string object into the location.
362
159
:param relpath: Location to put the contents, relative to base.
363
:param f: File-like object.
160
:param f: File-like or string object.
365
raise errors.TransportNotPossible('http PUT not supported')
162
raise TransportNotPossible('http PUT not supported')
367
def mkdir(self, relpath, mode=None):
164
def mkdir(self, relpath):
368
165
"""Create a directory at the given path."""
369
raise errors.TransportNotPossible('http does not support mkdir()')
371
def rmdir(self, relpath):
372
"""See Transport.rmdir."""
373
raise errors.TransportNotPossible('http does not support rmdir()')
375
def append_file(self, relpath, f, mode=None):
166
raise TransportNotPossible('http does not support mkdir()')
168
def append(self, relpath, f):
376
169
"""Append the text in the file-like object into the final
379
raise errors.TransportNotPossible('http does not support append()')
172
raise TransportNotPossible('http does not support append()')
381
174
def copy(self, rel_from, rel_to):
382
175
"""Copy the item at rel_from to the location at rel_to"""
383
raise errors.TransportNotPossible('http does not support copy()')
176
raise TransportNotPossible('http does not support copy()')
385
def copy_to(self, relpaths, other, mode=None, pb=None):
178
def copy_to(self, relpaths, other, pb=None):
386
179
"""Copy a set of entries from self into another Transport.
388
181
:param relpaths: A list/generator of entries to be copied.
441
228
:return: A lock object, which should be passed to Transport.unlock()
443
raise errors.TransportNotPossible('http does not support lock_write()')
445
def clone(self, offset=None):
446
"""Return a new HttpTransportBase with root at self.base + offset
448
We leave the daughter classes take advantage of the hint
449
that it's a cloning not a raw creation.
452
return self.__class__(self.base, self)
454
return self.__class__(self.abspath(offset), self)
456
def attempted_range_header(self, ranges, tail_amount):
457
"""Prepare a HTTP Range header at a level the server should accept"""
459
if self._range_hint == 'multi':
461
return self.range_header(ranges, tail_amount)
462
elif self._range_hint == 'single':
463
# Combine all the requested ranges into a single
466
start, ignored = ranges[0]
467
ignored, end = ranges[-1]
468
if tail_amount not in (0, None):
469
# Nothing we can do here to combine ranges
470
# with tail_amount, just returns None. The
471
# whole file should be downloaded.
474
return self.range_header([(start, end)], 0)
476
# Only tail_amount, requested, leave range_header
478
return self.range_header(ranges, tail_amount)
483
def range_header(ranges, tail_amount):
484
"""Turn a list of bytes ranges into a HTTP Range header value.
486
:param ranges: A list of byte ranges, (start, end).
487
:param tail_amount: The amount to get from the end of the file.
489
:return: HTTP range header string.
491
At least a non-empty ranges *or* a tail_amount must be
495
for start, end in ranges:
496
strings.append('%d-%d' % (start, end))
499
strings.append('-%d' % tail_amount)
501
return ','.join(strings)
503
def send_http_smart_request(self, bytes):
504
code, body_filelike = self._post(bytes)
505
assert code == 200, 'unexpected HTTP response code %r' % (code,)
509
class SmartClientHTTPMediumRequest(medium.SmartClientMediumRequest):
510
"""A SmartClientMediumRequest that works with an HTTP medium."""
512
def __init__(self, client_medium):
513
medium.SmartClientMediumRequest.__init__(self, client_medium)
516
def _accept_bytes(self, bytes):
517
self._buffer += bytes
519
def _finished_writing(self):
520
data = self._medium.send_http_smart_request(self._buffer)
521
self._response_body = data
523
def _read_bytes(self, count):
524
return self._response_body.read(count)
526
def _finished_reading(self):
527
"""See SmartClientMediumRequest._finished_reading."""
230
raise TransportNotPossible('http does not support lock_write()')