1
# Copyright (C) 2006 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
from SimpleHTTPServer import SimpleHTTPRequestHandler
31
from bzrlib.transport import Server
32
from bzrlib.transport.local import LocalURLServer
35
class WebserverNotAvailable(Exception):
39
class BadWebserverPath(ValueError):
41
return 'path %s is not in %s' % self.args
44
class TestingHTTPRequestHandler(SimpleHTTPRequestHandler):
46
def log_message(self, format, *args):
47
tcs = self.server.test_case_server
48
tcs.log('webserver - %s - - [%s] %s "%s" "%s"',
49
self.address_string(),
50
self.log_date_time_string(),
52
self.headers.get('referer', '-'),
53
self.headers.get('user-agent', '-'))
55
def handle_one_request(self):
56
"""Handle a single HTTP request.
58
You normally don't need to override this method; see the class
59
__doc__ string for information on how to handle specific HTTP
60
commands such as GET and POST.
63
for i in xrange(1,11): # Don't try more than 10 times
65
self.raw_requestline = self.rfile.readline()
66
except socket.error, e:
67
if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):
68
# omitted for now because some tests look at the log of
69
# the server and expect to see no errors. see recent
70
# email thread. -- mbp 20051021.
71
## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)
77
if not self.raw_requestline:
78
self.close_connection = 1
80
if not self.parse_request(): # An error code has been sent, just exit
82
mname = 'do_' + self.command
83
if getattr(self, mname, None) is None:
84
self.send_error(501, "Unsupported method (%r)" % self.command)
86
method = getattr(self, mname)
89
_range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')
90
_tail_regexp = re.compile(r'^-(?P<tail>\d+)$')
92
def parse_ranges(self, ranges_header):
93
"""Parse the range header value and returns ranges and tail.
95
RFC2616 14.35 says that syntactically invalid range
96
specifiers MUST be ignored. In that case, we return 0 for
97
tail and [] for ranges.
101
if not ranges_header.startswith('bytes='):
102
# Syntactically invalid header
105
ranges_header = ranges_header[len('bytes='):]
106
for range_str in ranges_header.split(','):
107
# FIXME: RFC2616 says end is optional and default to file_size
108
range_match = self._range_regexp.match(range_str)
109
if range_match is not None:
110
start = int(range_match.group('start'))
111
end = int(range_match.group('end'))
113
# Syntactically invalid range
115
ranges.append((start, end))
117
tail_match = self._tail_regexp.match(range_str)
118
if tail_match is not None:
119
tail = int(tail_match.group('tail'))
121
# Syntactically invalid range
125
def send_range_content(self, file, start, length):
127
self.wfile.write(file.read(length))
129
def get_single_range(self, file, file_size, start, end):
130
self.send_response(206)
131
length = end - start + 1
132
self.send_header('Accept-Ranges', 'bytes')
133
self.send_header("Content-Length", "%d" % length)
135
self.send_header("Content-Type", 'application/octet-stream')
136
self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
140
self.send_range_content(file, start, length)
142
def get_multiple_ranges(self, file, file_size, ranges):
143
self.send_response(206)
144
self.send_header('Accept-Ranges', 'bytes')
145
boundary = "%d" % random.randint(0,0x7FFFFFFF)
146
self.send_header("Content-Type",
147
"multipart/byteranges; boundary=%s" % boundary)
149
for (start, end) in ranges:
150
self.wfile.write("--%s\r\n" % boundary)
151
self.send_header("Content-type", 'application/octet-stream')
152
self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
156
self.send_range_content(file, start, end - start + 1)
157
self.wfile.write("--%s\r\n" % boundary)
160
"""Serve a GET request.
162
Handles the Range header.
165
path = self.translate_path(self.path)
166
ranges_header_value = self.headers.get('Range')
167
if ranges_header_value is None or os.path.isdir(path):
168
# Let the mother class handle most cases
169
return SimpleHTTPRequestHandler.do_GET(self)
172
# Always read in binary mode. Opening files in text
173
# mode may cause newline translations, making the
174
# actual size of the content transmitted *less* than
175
# the content-length!
176
file = open(path, 'rb')
178
self.send_error(404, "File not found")
181
file_size = os.fstat(file.fileno())[6]
182
tail, ranges = self.parse_ranges(ranges_header_value)
183
# Normalize tail into ranges
185
ranges.append((file_size - tail, file_size))
187
self._satisfiable_ranges = True
189
self._satisfiable_ranges = False
191
def check_range(range_specifier):
192
start, end = range_specifier
193
# RFC2616 14.35, ranges are invalid if start >= file_size
194
if start >= file_size:
195
self._satisfiable_ranges = False # Side-effect !
197
# RFC2616 14.35, end values should be truncated
198
# to file_size -1 if they exceed it
199
end = min(end, file_size - 1)
202
ranges = map(check_range, ranges)
204
if not self._satisfiable_ranges:
205
# RFC2616 14.16 and 14.35 says that when a server
206
# encounters unsatisfiable range specifiers, it
207
# SHOULD return a 416.
209
# FIXME: We SHOULD send a Content-Range header too,
210
# but the implementation of send_error does not
211
# allows that. So far.
212
self.send_error(416, "Requested range not satisfiable")
216
(start, end) = ranges[0]
217
self.get_single_range(file, file_size, start, end)
219
self.get_multiple_ranges(file, file_size, ranges)
222
if sys.platform == 'win32':
223
# On win32 you cannot access non-ascii filenames without
224
# decoding them into unicode first.
225
# However, under Linux, you can access bytestream paths
226
# without any problems. If this function was always active
227
# it would probably break tests when LANG=C was set
228
def translate_path(self, path):
229
"""Translate a /-separated PATH to the local filename syntax.
231
For bzr, all url paths are considered to be utf8 paths.
232
On Linux, you can access these paths directly over the bytestream
233
request, but on win32, you must decode them, and access them
236
# abandon query parameters
237
path = urlparse.urlparse(path)[2]
238
path = posixpath.normpath(urllib.unquote(path))
239
path = path.decode('utf-8')
240
words = path.split('/')
241
words = filter(None, words)
244
drive, word = os.path.splitdrive(word)
245
head, word = os.path.split(word)
246
if word in (os.curdir, os.pardir): continue
247
path = os.path.join(path, word)
251
class TestingHTTPServer(BaseHTTPServer.HTTPServer):
253
def __init__(self, server_address, RequestHandlerClass,
255
BaseHTTPServer.HTTPServer.__init__(self, server_address,
257
# test_case_server can be used to communicate between the
258
# tests and the server (or the request handler and the
259
# server), allowing dynamic behaviors to be defined from
261
self.test_case_server = test_case_server
264
class HttpServer(Server):
265
"""A test server for http transports.
267
Subclasses can provide a specific request handler.
270
# used to form the url that connects to this server
271
_url_protocol = 'http'
273
# Subclasses can provide a specific request handler
274
def __init__(self, request_handler=TestingHTTPRequestHandler):
275
Server.__init__(self)
276
self.request_handler = request_handler
277
self.host = 'localhost'
281
def _get_httpd(self):
282
if self._httpd is None:
283
self._httpd = TestingHTTPServer((self.host, self.port),
284
self.request_handler,
286
host, self.port = self._httpd.socket.getsockname()
289
def _http_start(self):
290
httpd = self._get_httpd()
291
self._http_base_url = '%s://%s:%s/' % (self._url_protocol,
294
self._http_starting.release()
295
httpd.socket.settimeout(0.1)
297
while self._http_running:
299
httpd.handle_request()
300
except socket.timeout:
303
def _get_remote_url(self, path):
304
path_parts = path.split(os.path.sep)
305
if os.path.isabs(path):
306
if path_parts[:len(self._local_path_parts)] != \
307
self._local_path_parts:
308
raise BadWebserverPath(path, self.test_dir)
309
remote_path = '/'.join(path_parts[len(self._local_path_parts):])
311
remote_path = '/'.join(path_parts)
313
return self._http_base_url + remote_path
315
def log(self, format, *args):
316
"""Capture Server log output."""
317
self.logs.append(format % args)
319
def setUp(self, backing_transport_server=None):
320
"""See bzrlib.transport.Server.setUp.
322
:param backing_transport_server: The transport that requests over this
323
protocol should be forwarded to. Note that this is currently not
326
# XXX: TODO: make the server back onto vfs_server rather than local
328
assert backing_transport_server is None or \
329
isinstance(backing_transport_server, LocalURLServer), \
330
"HTTPServer currently assumes local transport, got %s" % \
331
backing_transport_server
332
self._home_dir = os.getcwdu()
333
self._local_path_parts = self._home_dir.split(os.path.sep)
334
self._http_starting = threading.Lock()
335
self._http_starting.acquire()
336
self._http_running = True
337
self._http_base_url = None
338
self._http_thread = threading.Thread(target=self._http_start)
339
self._http_thread.setDaemon(True)
340
self._http_thread.start()
341
# Wait for the server thread to start (i.e release the lock)
342
self._http_starting.acquire()
343
self._http_starting.release()
347
"""See bzrlib.transport.Server.tearDown."""
348
self._http_running = False
349
self._http_thread.join()
352
"""See bzrlib.transport.Server.get_url."""
353
return self._get_remote_url(self._home_dir)
355
def get_bogus_url(self):
356
"""See bzrlib.transport.Server.get_bogus_url."""
357
# this is chosen to try to prevent trouble with proxies, weird dns,
359
return 'http://127.0.0.1:1/'
362
class HttpServer_urllib(HttpServer):
363
"""Subclass of HttpServer that gives http+urllib urls.
365
This is for use in testing: connections to this server will always go
366
through urllib where possible.
369
# urls returned by this server should require the urllib client impl
370
_url_protocol = 'http+urllib'
373
class HttpServer_PyCurl(HttpServer):
374
"""Subclass of HttpServer that gives http+pycurl urls.
376
This is for use in testing: connections to this server will always go
377
through pycurl where possible.
380
# We don't care about checking the pycurl availability as
381
# this server will be required only when pycurl is present
383
# urls returned by this server should require the pycurl client impl
384
_url_protocol = 'http+pycurl'