1
# Copyright (C) 2005, 2006 Canonical Ltd
1
# Copyright (C) 2005 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
5
5
# the Free Software Foundation; either version 2 of the License, or
6
6
# (at your option) any later version.
8
8
# This program is distributed in the hope that it will be useful,
9
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
11
# GNU General Public License for more details.
13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Base implementation of Transport over http.
19
There are separate implementation modules for each http client implementation.
16
"""Implementation of Transport over http.
22
from collections import deque
23
20
from cStringIO import StringIO
21
import urllib, urllib2
32
23
from warnings import warn
34
# TODO: load these only when running http tests
35
import BaseHTTPServer, SimpleHTTPServer, socket, time
38
from bzrlib.errors import (TransportNotPossible, NoSuchFile,
39
TransportError, ConnectionError, InvalidURL)
26
from bzrlib.transport import Transport, Server
27
from bzrlib.errors import (TransportNotPossible, NoSuchFile,
28
TransportError, ConnectionError)
29
from bzrlib.errors import BzrError, BzrCheckError
40
30
from bzrlib.branch import Branch
41
31
from bzrlib.trace import mutter
42
from bzrlib.transport import Transport, register_transport, Server
43
from bzrlib.transport.http.response import (HttpMultipartRangeResponse,
45
32
from bzrlib.ui import ui_factory
48
35
def extract_auth(url, password_manager):
49
"""Extract auth parameters from am HTTP/HTTPS url and add them to the given
37
Extract auth parameters from am HTTP/HTTPS url and add them to the given
50
38
password manager. Return the url, minus those auth parameters (which
53
assert re.match(r'^(https?)(\+\w+)?://', url), \
54
'invalid absolute url %r' % url
55
41
scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
42
assert (scheme == 'http') or (scheme == 'https')
58
45
auth, netloc = netloc.split('@', 1)
78
def _extract_headers(header_file, skip_first=True):
79
"""Extract the mapping for an rfc822 header
81
This is a helper function for the test suite, and for _pycurl.
82
(urllib already parses the headers for us)
84
:param header_file: A file-like object to read
85
:param skip_first: HTTP headers start with the HTTP response as
86
the first line. Skip this line while parsing
87
:return: mimetools.Message object
89
header_file.seek(0, 0)
91
header_file.readline()
92
m = mimetools.Message(header_file)
96
class HttpTransportBase(Transport):
97
"""Base class for http implementations.
99
Does URL parsing, etc, but not any network IO.
101
The protocol can be given as e.g. http+urllib://host/ to use a particular
105
# _proto: "http" or "https"
106
# _qualified_proto: may have "+pycurl", etc
65
class Request(urllib2.Request):
66
"""Request object for urllib2 that allows the method to be overridden."""
71
if self.method is not None:
74
return urllib2.Request.get_method(self)
77
def get_url(url, method=None):
79
mutter("get_url %s", url)
80
manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
81
url = extract_auth(url, manager)
82
auth_handler = urllib2.HTTPBasicAuthHandler(manager)
83
opener = urllib2.build_opener(auth_handler)
85
request = Request(url)
86
request.method = method
87
request.add_header('User-Agent', 'bzr/%s' % bzrlib.__version__)
88
response = opener.open(request)
92
class HttpTransport(Transport):
93
"""This is the transport agent for http:// access.
95
TODO: Implement pipelined versions of all of the *_multi() functions.
108
98
def __init__(self, base):
109
99
"""Set the base path where files will be stored."""
110
proto_match = re.match(r'^(https?)(\+\w+)?://', base)
112
raise AssertionError("not a http url: %r" % base)
113
self._proto = proto_match.group(1)
114
impl_name = proto_match.group(2)
116
impl_name = impl_name[1:]
117
self._impl_name = impl_name
100
assert base.startswith('http://') or base.startswith('https://')
118
101
if base[-1] != '/':
119
102
base = base + '/'
120
super(HttpTransportBase, self).__init__(base)
103
super(HttpTransport, self).__init__(base)
121
104
# In the future we might actually connect to the remote host
122
105
# rather than using get_url
123
106
# self._connection = None
124
(apparent_proto, self._host,
107
(self._proto, self._host,
125
108
self._path, self._parameters,
126
109
self._query, self._fragment) = urlparse.urlparse(self.base)
127
self._qualified_proto = apparent_proto
111
def should_cache(self):
112
"""Return True if the data pulled across should be cached locally.
116
def clone(self, offset=None):
117
"""Return a new HttpTransport with root at self.base + offset
118
For now HttpTransport does not actually connect, so just return
119
a new HttpTransport object.
122
return HttpTransport(self.base)
124
return HttpTransport(self.abspath(offset))
129
126
def abspath(self, relpath):
130
127
"""Return the full url to the given relative path.
132
This can be supplied with a string or a list.
134
The URL returned always has the protocol scheme originally used to
135
construct the transport, even if that includes an explicit
136
implementation qualifier.
128
This can be supplied with a string or a list
138
130
assert isinstance(relpath, basestring)
139
if isinstance(relpath, unicode):
140
raise InvalidURL(relpath, 'paths must not be unicode.')
141
131
if isinstance(relpath, basestring):
142
132
relpath_parts = relpath.split('/')
168
158
# I'm concerned about when it chooses to strip the last
169
159
# portion of the path, and when it doesn't.
170
160
path = '/'.join(basepath)
173
result = urlparse.urlunparse((self._qualified_proto,
174
self._host, path, '', '', ''))
177
def _real_abspath(self, relpath):
178
"""Produce absolute path, adjusting protocol if needed"""
179
abspath = self.abspath(relpath)
180
qp = self._qualified_proto
182
if self._qualified_proto != self._proto:
183
abspath = rp + abspath[len(qp):]
184
if not isinstance(abspath, str):
185
# escaping must be done at a higher level
186
abspath = abspath.encode('ascii')
161
return urlparse.urlunparse((self._proto,
162
self._host, path, '', '', ''))
189
164
def has(self, relpath):
190
raise NotImplementedError("has() is abstract on %r" % self)
192
def get(self, relpath):
165
"""Does the target location exist?
167
TODO: This should be changed so that we don't use
168
urllib2 and get an exception, the code path would be
169
cleaner if we just do an http HEAD request, and parse
174
path = self.abspath(relpath)
175
f = get_url(path, method='HEAD')
176
# Without the read and then close()
177
# we tend to have busy sockets.
181
except urllib2.URLError, e:
182
mutter('url error code: %s for has url: %r', e.code, path)
187
mutter('io error: %s %s for has url: %r',
188
e.errno, errno.errorcode.get(e.errno), path)
189
if e.errno == errno.ENOENT:
191
raise TransportError(orig_error=e)
193
def get(self, relpath, decode=False):
193
194
"""Get the file at the given relative path.
195
196
:param relpath: The relative path to the file
197
code, response_file = self._get(relpath, None)
200
def _get(self, relpath, ranges, tail_amount=0):
201
"""Get a file, or part of a file.
203
:param relpath: Path relative to transport base URL
204
:param byte_range: None to get the whole file;
205
or [(start,end)] to fetch parts of a file.
206
:param tail_amount: How much data to fetch from the tail of
209
:returns: (http_code, result_file)
211
Note that the current http implementations can only fetch one range at
212
a time through this call.
214
raise NotImplementedError(self._get)
216
def readv(self, relpath, offsets):
217
"""Get parts of the file at the given relative path.
219
:param offsets: A list of (offset, size) tuples.
220
:param return: A list or generator of (offset, data) tuples
222
ranges, tail_amount = self.offsets_to_ranges(offsets)
223
mutter('readv of %s %s => %s tail:%s',
224
relpath, offsets, ranges, tail_amount)
225
code, f = self._get(relpath, ranges, tail_amount)
226
for start, size in offsets:
227
f.seek(start, (start < 0) and 2 or 0)
230
assert len(data) == size
234
def offsets_to_ranges(offsets, fudge_factor=0):
235
"""Turn a list of offsets and sizes into a list of byte ranges.
237
:param offsets: A list of tuples of (start, size). An empty list
239
:param fudge_factor: Fudge together ranges that are fudge_factor
242
:return: a list of inclusive byte ranges (start, end) and the
243
amount of data to fetch from the tail of the file.
244
Adjacent ranges will be combined.
246
# We need a copy of the offsets, as the caller might expect it to
247
# remain unsorted. This doesn't seem expensive for memory at least.
248
offsets = sorted(offsets)
254
for start, size in offsets:
256
max_negative = min(start, max_negative)
258
end = start + size - 1
260
combined.append([start, end])
261
elif start <= prev_end + 1 + fudge_factor:
262
combined[-1][1] = end
264
combined.append([start, end])
267
return combined, -max_negative
200
path = self.abspath(relpath)
202
except urllib2.HTTPError, e:
203
mutter('url error code: %s for has url: %r', e.code, path)
205
raise NoSuchFile(path, extra=e)
207
except (BzrError, IOError), e:
208
if hasattr(e, 'errno'):
209
mutter('io error: %s %s for has url: %r',
210
e.errno, errno.errorcode.get(e.errno), path)
211
if e.errno == errno.ENOENT:
212
raise NoSuchFile(path, extra=e)
213
raise ConnectionError(msg = "Error retrieving %s: %s"
214
% (self.abspath(relpath), str(e)),
269
217
def put(self, relpath, f, mode=None):
270
218
"""Copy the file-like or string object into the location.
352
299
raise TransportNotPossible('http does not support lock_write()')
354
def clone(self, offset=None):
355
"""Return a new HttpTransportBase with root at self.base + offset
356
For now HttpTransportBase does not actually connect, so just return
357
a new HttpTransportBase object.
360
return self.__class__(self.base)
362
return self.__class__(self.abspath(offset))
365
def range_header(ranges, tail_amount):
366
"""Turn a list of bytes ranges into a HTTP Range header value.
368
:param offsets: A list of byte ranges, (start, end). An empty list
371
:return: HTTP range header string.
374
for start, end in ranges:
375
strings.append('%d-%d' % (start, end))
378
strings.append('-%d' % tail_amount)
380
return 'bytes=' + ','.join(strings)
383
302
#---------------- test server facilities ----------------
384
# TODO: load these only when running tests
303
import BaseHTTPServer, SimpleHTTPServer, socket, time
387
307
class WebserverNotAvailable(Exception):
437
357
method = getattr(self, mname)
440
if sys.platform == 'win32':
441
# On win32 you cannot access non-ascii filenames without
442
# decoding them into unicode first.
443
# However, under Linux, you can access bytestream paths
444
# without any problems. If this function was always active
445
# it would probably break tests when LANG=C was set
446
def translate_path(self, path):
447
"""Translate a /-separated PATH to the local filename syntax.
449
For bzr, all url paths are considered to be utf8 paths.
450
On Linux, you can access these paths directly over the bytestream
451
request, but on win32, you must decode them, and access them
454
# abandon query parameters
455
path = urlparse.urlparse(path)[2]
456
path = posixpath.normpath(urllib.unquote(path))
457
path = path.decode('utf-8')
458
words = path.split('/')
459
words = filter(None, words)
462
drive, word = os.path.splitdrive(word)
463
head, word = os.path.split(word)
464
if word in (os.curdir, os.pardir): continue
465
path = os.path.join(path, word)
469
361
class TestingHTTPServer(BaseHTTPServer.HTTPServer):
470
362
def __init__(self, server_address, RequestHandlerClass, test_case):