1
# Copyright (C) 2005, 2006 Canonical Ltd
1
# Copyright (C) 2005 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
5
5
# the Free Software Foundation; either version 2 of the License, or
6
6
# (at your option) any later version.
8
8
# This program is distributed in the hope that it will be useful,
9
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
11
# GNU General Public License for more details.
13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Base implementation of Transport over http.
19
There are separate implementation modules for each http client implementation.
16
"""Implementation of Transport over http.
19
from bzrlib.transport import Transport, register_transport
20
from bzrlib.errors import (TransportNotPossible, NoSuchFile,
21
NonRelativePath, TransportError)
22
23
from cStringIO import StringIO
29
from bzrlib import errors, ui
27
from bzrlib.errors import BzrError, BzrCheckError
28
from bzrlib.branch import Branch
30
29
from bzrlib.trace import mutter
31
from bzrlib.transport import (
37
# TODO: This is not used anymore by HttpTransport_urllib
38
# (extracting the auth info and prompting the user for a password
39
# have been split), only the tests still use it. It should be
40
# deleted and the tests rewritten ASAP to stay in sync.
41
def extract_auth(url, password_manager):
42
"""Extract auth parameters from am HTTP/HTTPS url and add them to the given
43
password manager. Return the url, minus those auth parameters (which
46
assert re.match(r'^(https?)(\+\w+)?://', url), \
47
'invalid absolute url %r' % url
48
scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
51
auth, netloc = netloc.split('@', 1)
53
username, password = auth.split(':', 1)
55
username, password = auth, None
57
host = netloc.split(':', 1)[0]
60
username = urllib.unquote(username)
61
if password is not None:
62
password = urllib.unquote(password)
64
password = ui.ui_factory.get_password(
65
prompt='HTTP %(user)s@%(host)s password',
66
user=username, host=host)
67
password_manager.add_password(None, host, username, password)
68
url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))
72
def _extract_headers(header_text, url):
73
"""Extract the mapping for an rfc2822 header
75
This is a helper function for the test suite and for _pycurl.
76
(urllib already parses the headers for us)
78
In the case that there are multiple headers inside the file,
79
the last one is returned.
81
:param header_text: A string of header information.
82
This expects that the first line of a header will always be HTTP ...
83
:param url: The url we are parsing, so we can raise nice errors
84
:return: mimetools.Message object, which basically acts like a case
85
insensitive dictionary.
88
remaining = header_text
91
raise errors.InvalidHttpResponse(url, 'Empty headers')
94
header_file = StringIO(remaining)
95
first_line = header_file.readline()
96
if not first_line.startswith('HTTP'):
97
if first_header: # The first header *must* start with HTTP
98
raise errors.InvalidHttpResponse(url,
99
'Opening header line did not start with HTTP: %s'
101
assert False, 'Opening header line was not HTTP'
103
break # We are done parsing
105
m = mimetools.Message(header_file)
107
# mimetools.Message parses the first header up to a blank line
108
# So while there is remaining data, it probably means there is
109
# another header to be parsed.
110
# Get rid of any preceeding whitespace, which if it is all whitespace
111
# will get rid of everything.
112
remaining = header_file.read().lstrip()
116
class HttpTransportBase(Transport, smart.SmartClientMedium):
117
"""Base class for http implementations.
119
Does URL parsing, etc, but not any network IO.
121
The protocol can be given as e.g. http+urllib://host/ to use a particular
125
# _proto: "http" or "https"
126
# _qualified_proto: may have "+pycurl", etc
128
def __init__(self, base, from_transport=None):
31
# velocitynet.com.au transparently proxies connections and thereby
32
# breaks keep-alive -- sucks!
37
mutter("get_url %s" % url)
38
url_f = urllib2.urlopen(url)
41
class HttpTransportError(TransportError):
44
class HttpTransport(Transport):
45
"""This is the transport agent for http:// access.
47
TODO: Implement pipelined versions of all of the *_multi() functions.
50
def __init__(self, base):
129
51
"""Set the base path where files will be stored."""
130
proto_match = re.match(r'^(https?)(\+\w+)?://', base)
132
raise AssertionError("not a http url: %r" % base)
133
self._proto = proto_match.group(1)
134
impl_name = proto_match.group(2)
136
impl_name = impl_name[1:]
137
self._impl_name = impl_name
140
super(HttpTransportBase, self).__init__(base)
141
(apparent_proto, self._host,
52
assert base.startswith('http://') or base.startswith('https://')
53
super(HttpTransport, self).__init__(base)
54
# In the future we might actually connect to the remote host
55
# rather than using get_url
56
# self._connection = None
57
(self._proto, self._host,
142
58
self._path, self._parameters,
143
59
self._query, self._fragment) = urlparse.urlparse(self.base)
144
self._qualified_proto = apparent_proto
145
# range hint is handled dynamically throughout the life
146
# of the object. We start by trying mulri-range requests
147
# and if the server returns bougs results, we retry with
148
# single range requests and, finally, we forget about
149
# range if the server really can't understand. Once
150
# aquired, this piece of info is propogated to clones.
151
if from_transport is not None:
152
self._range_hint = from_transport._range_hint
61
def should_cache(self):
62
"""Return True if the data pulled across should be cached locally.
66
def clone(self, offset=None):
67
"""Return a new HttpTransport with root at self.base + offset
68
For now HttpTransport does not actually connect, so just return
69
a new HttpTransport object.
72
return HttpTransport(self.base)
154
self._range_hint = 'multi'
74
return HttpTransport(self.abspath(offset))
156
76
def abspath(self, relpath):
157
77
"""Return the full url to the given relative path.
159
This can be supplied with a string or a list.
161
The URL returned always has the protocol scheme originally used to
162
construct the transport, even if that includes an explicit
163
implementation qualifier.
78
This can be supplied with a string or a list
165
assert isinstance(relpath, basestring)
166
if isinstance(relpath, unicode):
167
raise errors.InvalidURL(relpath, 'paths must not be unicode.')
168
80
if isinstance(relpath, basestring):
169
relpath_parts = relpath.split('/')
171
# TODO: Don't call this with an array - no magic interfaces
172
relpath_parts = relpath[:]
173
if relpath.startswith('/'):
176
# Except for the root, no trailing slashes are allowed
177
if len(relpath_parts) > 1 and relpath_parts[-1] == '':
179
"path %r within branch %r seems to be a directory"
180
% (relpath, self._path))
181
basepath = self._path.split('/')
182
if len(basepath) > 0 and basepath[-1] == '':
183
basepath = basepath[:-1]
82
basepath = self._path.split('/')
83
if len(basepath) > 0 and basepath[-1] == '':
84
basepath = basepath[:-1]
185
for p in relpath_parts:
187
if len(basepath) == 0:
188
89
# In most filesystems, a request for the parent
189
90
# of root, just returns root.
192
elif p == '.' or p == '':
195
96
basepath.append(p)
196
98
# Possibly, we could use urlparse.urljoin() here, but
197
99
# I'm concerned about when it chooses to strip the last
198
100
# portion of the path, and when it doesn't.
199
101
path = '/'.join(basepath)
202
result = urlparse.urlunparse((self._qualified_proto,
203
self._host, path, '', '', ''))
102
return urlparse.urlunparse((self._proto,
103
self._host, path, '', '', ''))
206
def _real_abspath(self, relpath):
207
"""Produce absolute path, adjusting protocol if needed"""
208
abspath = self.abspath(relpath)
209
qp = self._qualified_proto
211
if self._qualified_proto != self._proto:
212
abspath = rp + abspath[len(qp):]
213
if not isinstance(abspath, str):
214
# escaping must be done at a higher level
215
abspath = abspath.encode('ascii')
105
def relpath(self, abspath):
106
if not abspath.startswith(self.base):
107
raise NonRelativePath('path %r is not under base URL %r'
108
% (abspath, self.base))
110
return abspath[pl:].lstrip('/')
218
112
def has(self, relpath):
219
raise NotImplementedError("has() is abstract on %r" % self)
221
def get(self, relpath):
113
"""Does the target location exist?
115
TODO: HttpTransport.has() should use a HEAD request,
116
not a full GET request.
118
TODO: This should be changed so that we don't use
119
urllib2 and get an exception, the code path would be
120
cleaner if we just do an http HEAD request, and parse
124
f = get_url(self.abspath(relpath))
125
# Without the read and then close()
126
# we tend to have busy sockets.
132
except urllib2.URLError:
135
if e.errno == errno.ENOENT:
137
raise HttpTransportError(orig_error=e)
139
def get(self, relpath, decode=False):
222
140
"""Get the file at the given relative path.
224
142
:param relpath: The relative path to the file
226
code, response_file = self._get(relpath, None)
229
def _get(self, relpath, ranges):
230
"""Get a file, or part of a file.
232
:param relpath: Path relative to transport base URL
233
:param byte_range: None to get the whole file;
234
or [(start,end)] to fetch parts of a file.
236
:returns: (http_code, result_file)
238
Note that the current http implementations can only fetch one range at
239
a time through this call.
241
raise NotImplementedError(self._get)
243
def get_request(self):
244
return SmartClientHTTPMediumRequest(self)
246
def get_smart_medium(self):
247
"""See Transport.get_smart_medium.
249
HttpTransportBase directly implements the minimal interface of
250
SmartMediumClient, so this returns self.
254
def _retry_get(self, relpath, ranges, exc_info):
255
"""A GET request have failed, let's retry with a simpler request."""
258
# The server does not gives us enough data or
259
# bogus-looking result, let's try again with
260
# a simpler request if possible.
261
if self._range_hint == 'multi':
262
self._range_hint = 'single'
263
mutter('Retry %s with single range request' % relpath)
265
elif self._range_hint == 'single':
266
self._range_hint = None
267
mutter('Retry %s without ranges' % relpath)
270
# Note that since the offsets and the ranges may not
271
# be in the same order, we don't try to calculate a
272
# restricted single range encompassing unprocessed
274
code, f = self._get(relpath, ranges)
275
return try_again, code, f
277
# We tried all the tricks, but nothing worked. We
278
# re-raise original exception; the 'mutter' calls
279
# above will indicate that further tries were
281
raise exc_info[0], exc_info[1], exc_info[2]
283
def readv(self, relpath, offsets):
284
"""Get parts of the file at the given relative path.
286
:param offsets: A list of (offset, size) tuples.
287
:param return: A list or generator of (offset, data) tuples
289
ranges = self.offsets_to_ranges(offsets)
290
mutter('http readv of %s collapsed %s offsets => %s',
291
relpath, len(offsets), ranges)
297
code, f = self._get(relpath, ranges)
298
except (errors.InvalidRange, errors.ShortReadvError), e:
299
try_again, code, f = self._retry_get(relpath, ranges,
302
for start, size in offsets:
306
f.seek(start, (start < 0) and 2 or 0)
310
if len(data) != size:
311
raise errors.ShortReadvError(relpath, start, size,
313
except (errors.InvalidRange, errors.ShortReadvError), e:
314
# Note that we replace 'f' here and that it
315
# may need cleaning one day before being
317
try_again, code, f = self._retry_get(relpath, ranges,
319
# After one or more tries, we get the data.
323
def offsets_to_ranges(offsets):
324
"""Turn a list of offsets and sizes into a list of byte ranges.
326
:param offsets: A list of tuples of (start, size). An empty list
328
:return: a list of inclusive byte ranges (start, end)
329
Adjacent ranges will be combined.
331
# Make sure we process sorted offsets
332
offsets = sorted(offsets)
337
for start, size in offsets:
338
end = start + size - 1
340
combined.append([start, end])
341
elif start <= prev_end + 1:
342
combined[-1][1] = end
344
combined.append([start, end])
349
def _post(self, body_bytes):
350
"""POST body_bytes to .bzr/smart on this transport.
352
:returns: (response code, response body file-like object).
354
# TODO: Requiring all the body_bytes to be available at the beginning of
355
# the POST may require large client buffers. It would be nice to have
356
# an interface that allows streaming via POST when possible (and
357
# degrades to a local buffer when not).
358
raise NotImplementedError(self._post)
360
def put_file(self, relpath, f, mode=None):
361
"""Copy the file-like object into the location.
145
return get_url(self.abspath(relpath))
146
except (BzrError, urllib2.URLError, IOError), e:
147
raise NoSuchFile(orig_error=e)
149
raise HttpTransportError(orig_error=e)
151
def get_partial(self, relpath, start, length=None):
152
"""Get just part of a file.
154
:param relpath: Path to the file, relative to base
155
:param start: The starting position to read from
156
:param length: The length to read. A length of None indicates
157
read to the end of the file.
158
:return: A file-like object containing at least the specified bytes.
159
Some implementations may return objects which can be read
160
past this length, but this is not guaranteed.
162
# TODO: You can make specialized http requests for just
163
# a portion of the file. Figure out how to do that.
164
# For now, urllib2 returns files that cannot seek() so
165
# we just read bytes off the beginning, until we
166
# get to the point that we care about.
167
f = self.get(relpath)
168
# TODO: read in smaller chunks, in case things are
169
# buffered internally.
173
def put(self, relpath, f):
174
"""Copy the file-like or string object into the location.
363
176
:param relpath: Location to put the contents, relative to base.
364
:param f: File-like object.
177
:param f: File-like or string object.
366
raise errors.TransportNotPossible('http PUT not supported')
179
raise TransportNotPossible('http PUT not supported')
368
def mkdir(self, relpath, mode=None):
181
def mkdir(self, relpath):
369
182
"""Create a directory at the given path."""
370
raise errors.TransportNotPossible('http does not support mkdir()')
372
def rmdir(self, relpath):
373
"""See Transport.rmdir."""
374
raise errors.TransportNotPossible('http does not support rmdir()')
376
def append_file(self, relpath, f, mode=None):
183
raise TransportNotPossible('http does not support mkdir()')
185
def append(self, relpath, f):
377
186
"""Append the text in the file-like object into the final
380
raise errors.TransportNotPossible('http does not support append()')
189
raise TransportNotPossible('http does not support append()')
382
191
def copy(self, rel_from, rel_to):
383
192
"""Copy the item at rel_from to the location at rel_to"""
384
raise errors.TransportNotPossible('http does not support copy()')
193
raise TransportNotPossible('http does not support copy()')
386
def copy_to(self, relpaths, other, mode=None, pb=None):
195
def copy_to(self, relpaths, other, pb=None):
387
196
"""Copy a set of entries from self into another Transport.
389
198
:param relpaths: A list/generator of entries to be copied.