1
# Copyright (C) 2005 Canonical Ltd
1
# Copyright (C) 2005, 2006 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
5
5
# the Free Software Foundation; either version 2 of the License, or
6
6
# (at your option) any later version.
8
8
# This program is distributed in the hope that it will be useful,
9
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
11
# GNU General Public License for more details.
13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16
"""Implementation of Transport over http.
17
"""Base implementation of Transport over http.
19
There are separate implementation modules for each http client implementation.
20
24
from collections import deque
21
25
from cStringIO import StringIO
22
import urllib, urllib2
24
29
from warnings import warn
27
from bzrlib.transport import Transport, Server
28
from bzrlib.errors import (TransportNotPossible, NoSuchFile,
31
from bzrlib.transport import Transport, register_transport, Server
32
from bzrlib.errors import (TransportNotPossible, NoSuchFile,
29
33
TransportError, ConnectionError)
30
34
from bzrlib.errors import BzrError, BzrCheckError
31
35
from bzrlib.branch import Branch
32
36
from bzrlib.trace import mutter
37
# TODO: load these only when running http tests
38
import BaseHTTPServer, SimpleHTTPServer, socket, time
33
40
from bzrlib.ui import ui_factory
36
43
def extract_auth(url, password_manager):
38
Extract auth parameters from am HTTP/HTTPS url and add them to the given
44
"""Extract auth parameters from am HTTP/HTTPS url and add them to the given
39
45
password manager. Return the url, minus those auth parameters (which
48
assert re.match(r'^(https?)(\+\w+)?://', url), \
49
'invalid absolute url %r' % url
42
50
scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
43
assert (scheme == 'http') or (scheme == 'https')
46
53
auth, netloc = netloc.split('@', 1)
66
class Request(urllib2.Request):
67
"""Request object for urllib2 that allows the method to be overridden."""
72
if self.method is not None:
75
return urllib2.Request.get_method(self)
78
def get_url(url, method=None, ranges=None):
84
mutter("get_url %s [%s]", url, rangestring)
85
manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
86
url = extract_auth(url, manager)
87
auth_handler = urllib2.HTTPBasicAuthHandler(manager)
88
opener = urllib2.build_opener(auth_handler)
90
request = Request(url)
91
request.method = method
92
request.add_header('User-Agent', 'bzr/%s' % bzrlib.__version__)
94
request.add_header('Range', ranges)
95
response = opener.open(request)
99
class HttpTransport(Transport):
100
"""This is the transport agent for http:// access.
102
TODO: Implement pipelined versions of all of the *_multi() functions.
73
class HttpTransportBase(Transport):
74
"""Base class for http implementations.
76
Does URL parsing, etc, but not any network IO.
78
The protocol can be given as e.g. http+urllib://host/ to use a particular
82
# _proto: "http" or "https"
83
# _qualified_proto: may have "+pycurl", etc
105
85
def __init__(self, base):
106
86
"""Set the base path where files will be stored."""
107
assert base.startswith('http://') or base.startswith('https://')
87
proto_match = re.match(r'^(https?)(\+\w+)?://', base)
89
raise AssertionError("not a http url: %r" % base)
90
self._proto = proto_match.group(1)
91
impl_name = proto_match.group(2)
93
impl_name = impl_name[1:]
94
self._impl_name = impl_name
108
95
if base[-1] != '/':
110
super(HttpTransport, self).__init__(base)
97
super(HttpTransportBase, self).__init__(base)
111
98
# In the future we might actually connect to the remote host
112
99
# rather than using get_url
113
100
# self._connection = None
114
(self._proto, self._host,
101
(apparent_proto, self._host,
115
102
self._path, self._parameters,
116
103
self._query, self._fragment) = urlparse.urlparse(self.base)
118
def should_cache(self):
119
"""Return True if the data pulled across should be cached locally.
123
def clone(self, offset=None):
124
"""Return a new HttpTransport with root at self.base + offset
125
For now HttpTransport does not actually connect, so just return
126
a new HttpTransport object.
129
return HttpTransport(self.base)
131
return HttpTransport(self.abspath(offset))
104
self._qualified_proto = apparent_proto
133
106
def abspath(self, relpath):
134
107
"""Return the full url to the given relative path.
135
This can be supplied with a string or a list
109
This can be supplied with a string or a list.
111
The URL returned always has the protocol scheme originally used to
112
construct the transport, even if that includes an explicit
113
implementation qualifier.
137
115
assert isinstance(relpath, basestring)
138
116
if isinstance(relpath, basestring):
165
143
# I'm concerned about when it chooses to strip the last
166
144
# portion of the path, and when it doesn't.
167
145
path = '/'.join(basepath)
168
return urlparse.urlunparse((self._proto,
169
self._host, path, '', '', ''))
146
return urlparse.urlunparse((self._qualified_proto,
147
self._host, path, '', '', ''))
149
def _real_abspath(self, relpath):
150
"""Produce absolute path, adjusting protocol if needed"""
151
abspath = self.abspath(relpath)
152
qp = self._qualified_proto
154
if self._qualified_proto != self._proto:
155
abspath = rp + abspath[len(qp):]
156
if not isinstance(abspath, str):
157
# escaping must be done at a higher level
158
abspath = abspath.encode('ascii')
171
161
def has(self, relpath):
172
"""Does the target location exist?
174
TODO: This should be changed so that we don't use
175
urllib2 and get an exception, the code path would be
176
cleaner if we just do an http HEAD request, and parse
181
path = self.abspath(relpath)
182
f = get_url(path, method='HEAD')
183
# Without the read and then close()
184
# we tend to have busy sockets.
188
except urllib2.HTTPError, e:
189
mutter('url error code: %s for has url: %r', e.code, path)
194
mutter('io error: %s %s for has url: %r',
195
e.errno, errno.errorcode.get(e.errno), path)
196
if e.errno == errno.ENOENT:
198
raise TransportError(orig_error=e)
200
def _get(self, relpath, decode=False, ranges=None):
203
path = self.abspath(relpath)
204
return get_url(path, ranges=ranges)
205
except urllib2.HTTPError, e:
206
mutter('url error code: %s for has url: %r', e.code, path)
208
raise NoSuchFile(path, extra=e)
210
except (BzrError, IOError), e:
211
if hasattr(e, 'errno'):
212
mutter('io error: %s %s for has url: %r',
213
e.errno, errno.errorcode.get(e.errno), path)
214
if e.errno == errno.ENOENT:
215
raise NoSuchFile(path, extra=e)
216
raise ConnectionError(msg = "Error retrieving %s: %s"
217
% (self.abspath(relpath), str(e)),
220
def get(self, relpath, decode=False):
162
raise NotImplementedError("has() is abstract on %r" % self)
164
def get(self, relpath):
221
165
"""Get the file at the given relative path.
223
167
:param relpath: The relative path to the file
225
return self._get(relpath, decode=decode)
169
code, response_file = self._get(relpath, None)
172
def _get(self, relpath, ranges):
173
"""Get a file, or part of a file.
175
:param relpath: Path relative to transport base URL
176
:param byte_range: None to get the whole file;
177
or [(start,end)] to fetch parts of a file.
179
:returns: (http_code, result_file)
181
Note that the current http implementations can only fetch one range at
182
a time through this call.
184
raise NotImplementedError(self._get)
227
186
def readv(self, relpath, offsets):
228
187
"""Get parts of the file at the given relative path.
230
:offsets: A list of (offset, size) tuples.
231
:return: A list or generator of (offset, data) tuples
189
:param offsets: A list of (offset, size) tuples.
190
:param return: A list or generator of (offset, data) tuples
233
# this is not quite regular enough to have a single driver routine and
192
# Ideally we would pass one big request asking for all the ranges in
193
# one go; however then the server will give a multipart mime response
194
# back, and we can't parse them yet. So instead we just get one range
195
# per region, and try to coallesce the regions as much as possible.
197
# The read-coallescing code is not quite regular enough to have a
198
# single driver routine and
234
199
# helper method in Transport.
235
200
def do_combined_read(combined_offsets):
236
201
# read one coalesced block
239
204
total_size += size
240
205
mutter('readv coalesced %d reads.', len(combined_offsets))
241
206
offset = combined_offsets[0][0]
242
ranges = 'bytes=%d-%d' % (offset, offset + total_size - 1)
243
response = self._get(relpath, ranges=ranges)
244
if response.code == 206:
207
byte_range = (offset, offset + total_size - 1)
208
code, result_file = self._get(relpath, [byte_range])
245
210
for off, size in combined_offsets:
246
yield off, response.read(size)
247
elif response.code == 200:
248
data = response.read(offset + total_size)[offset:offset + total_size]
211
result_bytes = result_file.read(size)
212
assert len(result_bytes) == size
213
yield off, result_bytes
215
data = result_file.read(offset + total_size)[offset:offset + total_size]
250
217
for offset, size in combined_offsets:
251
218
yield offset, data[pos:pos + size]
255
221
if not len(offsets):
257
223
pending_offsets = deque(offsets)
313
279
# At this point HttpTransport might be able to check and see if
314
280
# the remote location is the same, and rather than download, and
315
281
# then upload, it could just issue a remote copy_this command.
316
if isinstance(other, HttpTransport):
282
if isinstance(other, HttpTransportBase):
317
283
raise TransportNotPossible('http cannot be the target of copy_to()')
319
return super(HttpTransport, self).copy_to(relpaths, other, mode=mode, pb=pb)
285
return super(HttpTransportBase, self).\
286
copy_to(relpaths, other, mode=mode, pb=pb)
321
288
def move(self, rel_from, rel_to):
322
289
"""Move the item at rel_from to the location at rel_to"""