1
# Copyright (C) 2006-2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24
import SimpleHTTPServer
38
from bzrlib.tests import test_server
39
from bzrlib.transport import local
42
class BadWebserverPath(ValueError):
44
return 'path %s is not in %s' % self.args
47
class TestingHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
48
"""Handles one request.
50
A TestingHTTPRequestHandler is instantiated for every request received by
51
the associated server. Note that 'request' here is inherited from the base
52
TCPServer class, for the HTTP server it is really a connection which itself
53
will handle one or several HTTP requests.
55
# Default protocol version
56
protocol_version = 'HTTP/1.1'
58
# The Message-like class used to parse the request headers
59
MessageClass = httplib.HTTPMessage
62
SimpleHTTPServer.SimpleHTTPRequestHandler.setup(self)
63
self._cwd = self.server._home_dir
64
tcs = self.server.test_case_server
65
if tcs.protocol_version is not None:
66
# If the test server forced a protocol version, use it
67
self.protocol_version = tcs.protocol_version
69
def log_message(self, format, *args):
70
tcs = self.server.test_case_server
71
tcs.log('webserver - %s - - [%s] %s "%s" "%s"',
72
self.address_string(),
73
self.log_date_time_string(),
75
self.headers.get('referer', '-'),
76
self.headers.get('user-agent', '-'))
79
SimpleHTTPServer.SimpleHTTPRequestHandler.handle(self)
80
# Some client (pycurl, I'm looking at you) are more picky than others
81
# and require that the socket itself is closed
82
# (SocketServer.StreamRequestHandler only close the two associated
84
self.connection.close()
86
def handle_one_request(self):
87
"""Handle a single HTTP request.
89
We catch all socket errors occurring when the client close the
90
connection early to avoid polluting the test results.
93
self._handle_one_request()
94
except socket.error, e:
95
# Any socket error should close the connection, but some errors are
96
# due to the client closing early and we don't want to pollute test
97
# results, so we raise only the others.
98
self.close_connection = 1
100
or e.args[0] not in (errno.EPIPE, errno.ECONNRESET,
101
errno.ECONNABORTED, errno.EBADF)):
104
error_content_type = 'text/plain'
105
error_message_format = '''\
106
Error code: %(code)s.
107
Message: %(message)s.
110
def send_error(self, code, message=None):
111
"""Send and log an error reply.
113
We redefine the python-provided version to be able to set a
114
``Content-Length`` header as some http/1.1 clients complain otherwise
117
:param code: The HTTP error code.
119
:param message: The explanation of the error code, Defaults to a short
125
message = self.responses[code][0]
128
self.log_error("code %d, message %s", code, message)
129
content = (self.error_message_format %
130
{'code': code, 'message': message})
131
self.send_response(code, message)
132
self.send_header("Content-Type", self.error_content_type)
133
self.send_header("Content-Length", "%d" % len(content))
134
self.send_header('Connection', 'close')
136
if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
137
self.wfile.write(content)
139
def _handle_one_request(self):
140
SimpleHTTPServer.SimpleHTTPRequestHandler.handle_one_request(self)
142
_range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')
143
_tail_regexp = re.compile(r'^-(?P<tail>\d+)$')
145
def parse_ranges(self, ranges_header):
146
"""Parse the range header value and returns ranges and tail.
148
RFC2616 14.35 says that syntactically invalid range
149
specifiers MUST be ignored. In that case, we return 0 for
150
tail and [] for ranges.
154
if not ranges_header.startswith('bytes='):
155
# Syntactically invalid header
158
ranges_header = ranges_header[len('bytes='):]
159
for range_str in ranges_header.split(','):
160
# FIXME: RFC2616 says end is optional and default to file_size
161
range_match = self._range_regexp.match(range_str)
162
if range_match is not None:
163
start = int(range_match.group('start'))
164
end = int(range_match.group('end'))
166
# Syntactically invalid range
168
ranges.append((start, end))
170
tail_match = self._tail_regexp.match(range_str)
171
if tail_match is not None:
172
tail = int(tail_match.group('tail'))
174
# Syntactically invalid range
178
def _header_line_length(self, keyword, value):
179
header_line = '%s: %s\r\n' % (keyword, value)
180
return len(header_line)
183
"""Overrides base implementation to work around a bug in python2.5."""
184
path = self.translate_path(self.path)
185
if os.path.isdir(path) and not self.path.endswith('/'):
186
# redirect browser - doing basically what apache does when
187
# DirectorySlash option is On which is quite common (braindead, but
189
self.send_response(301)
190
self.send_header("Location", self.path + "/")
191
# Indicates that the body is empty for HTTP/1.1 clients
192
self.send_header('Content-Length', '0')
196
return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self)
198
def send_range_content(self, file, start, length):
200
self.wfile.write(file.read(length))
202
def get_single_range(self, file, file_size, start, end):
203
self.send_response(206)
204
length = end - start + 1
205
self.send_header('Accept-Ranges', 'bytes')
206
self.send_header("Content-Length", "%d" % length)
208
self.send_header("Content-Type", 'application/octet-stream')
209
self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
213
self.send_range_content(file, start, length)
215
def get_multiple_ranges(self, file, file_size, ranges):
216
self.send_response(206)
217
self.send_header('Accept-Ranges', 'bytes')
218
boundary = '%d' % random.randint(0,0x7FFFFFFF)
219
self.send_header('Content-Type',
220
'multipart/byteranges; boundary=%s' % boundary)
221
boundary_line = '--%s\r\n' % boundary
222
# Calculate the Content-Length
224
for (start, end) in ranges:
225
content_length += len(boundary_line)
226
content_length += self._header_line_length(
227
'Content-type', 'application/octet-stream')
228
content_length += self._header_line_length(
229
'Content-Range', 'bytes %d-%d/%d' % (start, end, file_size))
230
content_length += len('\r\n') # end headers
231
content_length += end - start + 1
232
content_length += len(boundary_line)
233
self.send_header('Content-length', content_length)
236
# Send the multipart body
237
for (start, end) in ranges:
238
self.wfile.write(boundary_line)
239
self.send_header('Content-type', 'application/octet-stream')
240
self.send_header('Content-Range', 'bytes %d-%d/%d'
241
% (start, end, file_size))
243
self.send_range_content(file, start, end - start + 1)
245
self.wfile.write(boundary_line)
248
"""Serve a GET request.
250
Handles the Range header.
253
self.server.test_case_server.GET_request_nb += 1
255
path = self.translate_path(self.path)
256
ranges_header_value = self.headers.get('Range')
257
if ranges_header_value is None or os.path.isdir(path):
258
# Let the mother class handle most cases
259
return SimpleHTTPServer.SimpleHTTPRequestHandler.do_GET(self)
262
# Always read in binary mode. Opening files in text
263
# mode may cause newline translations, making the
264
# actual size of the content transmitted *less* than
265
# the content-length!
268
self.send_error(404, "File not found")
271
file_size = os.fstat(f.fileno())[6]
272
tail, ranges = self.parse_ranges(ranges_header_value)
273
# Normalize tail into ranges
275
ranges.append((file_size - tail, file_size))
277
self._satisfiable_ranges = True
279
self._satisfiable_ranges = False
281
def check_range(range_specifier):
282
start, end = range_specifier
283
# RFC2616 14.35, ranges are invalid if start >= file_size
284
if start >= file_size:
285
self._satisfiable_ranges = False # Side-effect !
287
# RFC2616 14.35, end values should be truncated
288
# to file_size -1 if they exceed it
289
end = min(end, file_size - 1)
292
ranges = map(check_range, ranges)
294
if not self._satisfiable_ranges:
295
# RFC2616 14.16 and 14.35 says that when a server
296
# encounters unsatisfiable range specifiers, it
297
# SHOULD return a 416.
299
# FIXME: We SHOULD send a Content-Range header too,
300
# but the implementation of send_error does not
301
# allows that. So far.
302
self.send_error(416, "Requested range not satisfiable")
306
(start, end) = ranges[0]
307
self.get_single_range(f, file_size, start, end)
309
self.get_multiple_ranges(f, file_size, ranges)
312
def translate_path(self, path):
313
"""Translate a /-separated PATH to the local filename syntax.
315
If the server requires it, proxy the path before the usual translation
317
if self.server.test_case_server.proxy_requests:
318
# We need to act as a proxy and accept absolute urls,
319
# which SimpleHTTPRequestHandler (parent) is not
320
# ready for. So we just drop the protocol://host:port
321
# part in front of the request-url (because we know
322
# we would not forward the request to *another*
325
# So we do what SimpleHTTPRequestHandler.translate_path
326
# do beginning with python 2.4.3: abandon query
327
# parameters, scheme, host port, etc (which ensure we
328
# provide the right behaviour on all python versions).
329
path = urlparse.urlparse(path)[2]
330
# And now, we can apply *our* trick to proxy files
333
return self._translate_path(path)
335
def _translate_path(self, path):
336
"""Translate a /-separated PATH to the local filename syntax.
338
Note that we're translating http URLs here, not file URLs.
339
The URL root location is the server's startup directory.
340
Components that mean special things to the local file system
341
(e.g. drive or directory names) are ignored. (XXX They should
342
probably be diagnosed.)
344
Override from python standard library to stop it calling os.getcwd()
346
# abandon query parameters
347
path = urlparse.urlparse(path)[2]
348
path = posixpath.normpath(urllib.unquote(path))
349
path = path.decode('utf-8')
350
words = path.split('/')
351
words = filter(None, words)
353
for num, word in enumerate(words):
355
drive, word = os.path.splitdrive(word)
356
head, word = os.path.split(word)
357
if word in (os.curdir, os.pardir): continue
358
path = os.path.join(path, word)
362
class TestingHTTPServerMixin:
364
def __init__(self, test_case_server):
365
# test_case_server can be used to communicate between the
366
# tests and the server (or the request handler and the
367
# server), allowing dynamic behaviors to be defined from
369
self.test_case_server = test_case_server
370
self._home_dir = test_case_server._home_dir
373
class TestingHTTPServer(test_server.TestingTCPServer, TestingHTTPServerMixin):
375
def __init__(self, server_address, request_handler_class,
377
test_server.TestingTCPServer.__init__(self, server_address,
378
request_handler_class)
379
TestingHTTPServerMixin.__init__(self, test_case_server)
382
class TestingThreadingHTTPServer(test_server.TestingThreadingTCPServer,
383
TestingHTTPServerMixin):
384
"""A threading HTTP test server for HTTP 1.1.
386
Since tests can initiate several concurrent connections to the same http
387
server, we need an independent connection for each of them. We achieve that
388
by spawning a new thread for each connection.
390
def __init__(self, server_address, request_handler_class,
392
test_server.TestingThreadingTCPServer.__init__(self, server_address,
393
request_handler_class)
394
TestingHTTPServerMixin.__init__(self, test_case_server)
397
class HttpServer(test_server.TestingTCPServerInAThread):
398
"""A test server for http transports.
400
Subclasses can provide a specific request handler.
403
# The real servers depending on the protocol
404
http_server_class = {'HTTP/1.0': TestingHTTPServer,
405
'HTTP/1.1': TestingThreadingHTTPServer,
408
# Whether or not we proxy the requests (see
409
# TestingHTTPRequestHandler.translate_path).
410
proxy_requests = False
412
# used to form the url that connects to this server
413
_url_protocol = 'http'
415
def __init__(self, request_handler=TestingHTTPRequestHandler,
416
protocol_version=None):
419
:param request_handler: a class that will be instantiated to handle an
420
http connection (one or several requests).
422
:param protocol_version: if specified, will override the protocol
423
version of the request handler.
425
# Depending on the protocol version, we will create the approriate
427
if protocol_version is None:
428
# Use the request handler one
429
proto_vers = request_handler.protocol_version
431
# Use our own, it will be used to override the request handler
433
proto_vers = protocol_version
434
# Get the appropriate server class for the required protocol
435
serv_cls = self.http_server_class.get(proto_vers, None)
437
raise httplib.UnknownProtocol(proto_vers)
438
self.host = 'localhost'
440
super(HttpServer, self).__init__((self.host, self.port),
443
self.protocol_version = proto_vers
444
# Allows tests to verify number of GET requests issued
445
self.GET_request_nb = 0
446
self._http_base_url = None
449
def create_server(self):
450
return self.server_class(
451
(self.host, self.port), self.request_handler_class, self)
453
def _get_remote_url(self, path):
454
path_parts = path.split(os.path.sep)
455
if os.path.isabs(path):
456
if path_parts[:len(self._local_path_parts)] != \
457
self._local_path_parts:
458
raise BadWebserverPath(path, self.test_dir)
459
remote_path = '/'.join(path_parts[len(self._local_path_parts):])
461
remote_path = '/'.join(path_parts)
463
return self._http_base_url + remote_path
465
def log(self, format, *args):
466
"""Capture Server log output."""
467
self.logs.append(format % args)
469
def start_server(self, backing_transport_server=None):
470
"""See bzrlib.transport.Server.start_server.
472
:param backing_transport_server: The transport that requests over this
473
protocol should be forwarded to. Note that this is currently not
476
# XXX: TODO: make the server back onto vfs_server rather than local
478
if not (backing_transport_server is None
479
or isinstance(backing_transport_server,
480
test_server.LocalURLServer)):
481
raise AssertionError(
482
"HTTPServer currently assumes local transport, got %s" %
483
backing_transport_server)
484
self._home_dir = os.getcwdu()
485
self._local_path_parts = self._home_dir.split(os.path.sep)
488
super(HttpServer, self).start_server()
489
self._http_base_url = '%s://%s:%s/' % (
490
self._url_protocol, self.host, self.port)
493
"""See bzrlib.transport.Server.get_url."""
494
return self._get_remote_url(self._home_dir)
496
def get_bogus_url(self):
497
"""See bzrlib.transport.Server.get_bogus_url."""
498
# this is chosen to try to prevent trouble with proxies, weird dns,
500
return self._url_protocol + '://127.0.0.1:1/'
503
class HttpServer_urllib(HttpServer):
504
"""Subclass of HttpServer that gives http+urllib urls.
506
This is for use in testing: connections to this server will always go
507
through urllib where possible.
510
# urls returned by this server should require the urllib client impl
511
_url_protocol = 'http+urllib'
514
class HttpServer_PyCurl(HttpServer):
515
"""Subclass of HttpServer that gives http+pycurl urls.
517
This is for use in testing: connections to this server will always go
518
through pycurl where possible.
521
# We don't care about checking the pycurl availability as
522
# this server will be required only when pycurl is present
524
# urls returned by this server should require the pycurl client impl
525
_url_protocol = 'http+pycurl'