1
# Copyright (C) 2006-2011 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23
import SimpleHTTPServer
28
from bzrlib.tests import test_server
31
class BadWebserverPath(ValueError):
33
return 'path %s is not in %s' % self.args
36
class TestingHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
37
"""Handles one request.
39
A TestingHTTPRequestHandler is instantiated for every request received by
40
the associated server. Note that 'request' here is inherited from the base
41
TCPServer class, for the HTTP server it is really a connection which itself
42
will handle one or several HTTP requests.
44
# Default protocol version
45
protocol_version = 'HTTP/1.1'
47
# The Message-like class used to parse the request headers
48
MessageClass = httplib.HTTPMessage
51
SimpleHTTPServer.SimpleHTTPRequestHandler.setup(self)
52
self._cwd = self.server._home_dir
53
tcs = self.server.test_case_server
54
if tcs.protocol_version is not None:
55
# If the test server forced a protocol version, use it
56
self.protocol_version = tcs.protocol_version
58
def log_message(self, format, *args):
59
tcs = self.server.test_case_server
60
tcs.log('webserver - %s - - [%s] %s "%s" "%s"',
61
self.address_string(),
62
self.log_date_time_string(),
64
self.headers.get('referer', '-'),
65
self.headers.get('user-agent', '-'))
68
SimpleHTTPServer.SimpleHTTPRequestHandler.handle(self)
69
# Some client (pycurl, I'm looking at you) are more picky than others
70
# and require that the socket itself is closed
71
# (SocketServer.StreamRequestHandler only close the two associated
73
self.connection.close()
75
def handle_one_request(self):
76
"""Handle a single HTTP request.
78
We catch all socket errors occurring when the client close the
79
connection early to avoid polluting the test results.
82
self._handle_one_request()
83
except socket.error, e:
84
# Any socket error should close the connection, but some errors are
85
# due to the client closing early and we don't want to pollute test
86
# results, so we raise only the others.
87
self.close_connection = 1
89
or e.args[0] not in (errno.EPIPE, errno.ECONNRESET,
90
errno.ECONNABORTED, errno.EBADF)):
93
error_content_type = 'text/plain'
94
error_message_format = '''\
99
def send_error(self, code, message=None):
100
"""Send and log an error reply.
102
We redefine the python-provided version to be able to set a
103
``Content-Length`` header as some http/1.1 clients complain otherwise
106
:param code: The HTTP error code.
108
:param message: The explanation of the error code, Defaults to a short
114
message = self.responses[code][0]
117
self.log_error("code %d, message %s", code, message)
118
content = (self.error_message_format %
119
{'code': code, 'message': message})
120
self.send_response(code, message)
121
self.send_header("Content-Type", self.error_content_type)
122
self.send_header("Content-Length", "%d" % len(content))
123
self.send_header('Connection', 'close')
125
if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
126
self.wfile.write(content)
128
def _handle_one_request(self):
129
SimpleHTTPServer.SimpleHTTPRequestHandler.handle_one_request(self)
131
_range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')
132
_tail_regexp = re.compile(r'^-(?P<tail>\d+)$')
134
def parse_ranges(self, ranges_header):
135
"""Parse the range header value and returns ranges and tail.
137
RFC2616 14.35 says that syntactically invalid range
138
specifiers MUST be ignored. In that case, we return 0 for
139
tail and [] for ranges.
143
if not ranges_header.startswith('bytes='):
144
# Syntactically invalid header
147
ranges_header = ranges_header[len('bytes='):]
148
for range_str in ranges_header.split(','):
149
# FIXME: RFC2616 says end is optional and default to file_size
150
range_match = self._range_regexp.match(range_str)
151
if range_match is not None:
152
start = int(range_match.group('start'))
153
end = int(range_match.group('end'))
155
# Syntactically invalid range
157
ranges.append((start, end))
159
tail_match = self._tail_regexp.match(range_str)
160
if tail_match is not None:
161
tail = int(tail_match.group('tail'))
163
# Syntactically invalid range
167
def _header_line_length(self, keyword, value):
168
header_line = '%s: %s\r\n' % (keyword, value)
169
return len(header_line)
172
"""Overrides base implementation to work around a bug in python2.5."""
173
path = self.translate_path(self.path)
174
if os.path.isdir(path) and not self.path.endswith('/'):
175
# redirect browser - doing basically what apache does when
176
# DirectorySlash option is On which is quite common (braindead, but
178
self.send_response(301)
179
self.send_header("Location", self.path + "/")
180
# Indicates that the body is empty for HTTP/1.1 clients
181
self.send_header('Content-Length', '0')
185
return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self)
187
def send_range_content(self, file, start, length):
189
self.wfile.write(file.read(length))
191
def get_single_range(self, file, file_size, start, end):
192
self.send_response(206)
193
length = end - start + 1
194
self.send_header('Accept-Ranges', 'bytes')
195
self.send_header("Content-Length", "%d" % length)
197
self.send_header("Content-Type", 'application/octet-stream')
198
self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
202
self.send_range_content(file, start, length)
204
def get_multiple_ranges(self, file, file_size, ranges):
205
self.send_response(206)
206
self.send_header('Accept-Ranges', 'bytes')
207
boundary = '%d' % random.randint(0,0x7FFFFFFF)
208
self.send_header('Content-Type',
209
'multipart/byteranges; boundary=%s' % boundary)
210
boundary_line = '--%s\r\n' % boundary
211
# Calculate the Content-Length
213
for (start, end) in ranges:
214
content_length += len(boundary_line)
215
content_length += self._header_line_length(
216
'Content-type', 'application/octet-stream')
217
content_length += self._header_line_length(
218
'Content-Range', 'bytes %d-%d/%d' % (start, end, file_size))
219
content_length += len('\r\n') # end headers
220
content_length += end - start + 1
221
content_length += len(boundary_line)
222
self.send_header('Content-length', content_length)
225
# Send the multipart body
226
for (start, end) in ranges:
227
self.wfile.write(boundary_line)
228
self.send_header('Content-type', 'application/octet-stream')
229
self.send_header('Content-Range', 'bytes %d-%d/%d'
230
% (start, end, file_size))
232
self.send_range_content(file, start, end - start + 1)
234
self.wfile.write(boundary_line)
237
"""Serve a GET request.
239
Handles the Range header.
242
self.server.test_case_server.GET_request_nb += 1
244
path = self.translate_path(self.path)
245
ranges_header_value = self.headers.get('Range')
246
if ranges_header_value is None or os.path.isdir(path):
247
# Let the mother class handle most cases
248
return SimpleHTTPServer.SimpleHTTPRequestHandler.do_GET(self)
251
# Always read in binary mode. Opening files in text
252
# mode may cause newline translations, making the
253
# actual size of the content transmitted *less* than
254
# the content-length!
257
self.send_error(404, "File not found")
260
file_size = os.fstat(f.fileno())[6]
261
tail, ranges = self.parse_ranges(ranges_header_value)
262
# Normalize tail into ranges
264
ranges.append((file_size - tail, file_size))
266
self._satisfiable_ranges = True
268
self._satisfiable_ranges = False
270
def check_range(range_specifier):
271
start, end = range_specifier
272
# RFC2616 14.35, ranges are invalid if start >= file_size
273
if start >= file_size:
274
self._satisfiable_ranges = False # Side-effect !
276
# RFC2616 14.35, end values should be truncated
277
# to file_size -1 if they exceed it
278
end = min(end, file_size - 1)
281
ranges = map(check_range, ranges)
283
if not self._satisfiable_ranges:
284
# RFC2616 14.16 and 14.35 says that when a server
285
# encounters unsatisfiable range specifiers, it
286
# SHOULD return a 416.
288
# FIXME: We SHOULD send a Content-Range header too,
289
# but the implementation of send_error does not
290
# allows that. So far.
291
self.send_error(416, "Requested range not satisfiable")
295
(start, end) = ranges[0]
296
self.get_single_range(f, file_size, start, end)
298
self.get_multiple_ranges(f, file_size, ranges)
301
def translate_path(self, path):
302
"""Translate a /-separated PATH to the local filename syntax.
304
If the server requires it, proxy the path before the usual translation
306
if self.server.test_case_server.proxy_requests:
307
# We need to act as a proxy and accept absolute urls,
308
# which SimpleHTTPRequestHandler (parent) is not
309
# ready for. So we just drop the protocol://host:port
310
# part in front of the request-url (because we know
311
# we would not forward the request to *another*
314
# So we do what SimpleHTTPRequestHandler.translate_path
315
# do beginning with python 2.4.3: abandon query
316
# parameters, scheme, host port, etc (which ensure we
317
# provide the right behaviour on all python versions).
318
path = urlparse.urlparse(path)[2]
319
# And now, we can apply *our* trick to proxy files
322
return self._translate_path(path)
324
def _translate_path(self, path):
325
"""Translate a /-separated PATH to the local filename syntax.
327
Note that we're translating http URLs here, not file URLs.
328
The URL root location is the server's startup directory.
329
Components that mean special things to the local file system
330
(e.g. drive or directory names) are ignored. (XXX They should
331
probably be diagnosed.)
333
Override from python standard library to stop it calling os.getcwd()
335
# abandon query parameters
336
path = urlparse.urlparse(path)[2]
337
path = posixpath.normpath(urllib.unquote(path))
338
path = path.decode('utf-8')
339
words = path.split('/')
340
words = filter(None, words)
342
for num, word in enumerate(words):
344
drive, word = os.path.splitdrive(word)
345
head, word = os.path.split(word)
346
if word in (os.curdir, os.pardir): continue
347
path = os.path.join(path, word)
351
class TestingHTTPServerMixin:
353
def __init__(self, test_case_server):
354
# test_case_server can be used to communicate between the
355
# tests and the server (or the request handler and the
356
# server), allowing dynamic behaviors to be defined from
358
self.test_case_server = test_case_server
359
self._home_dir = test_case_server._home_dir
362
class TestingHTTPServer(test_server.TestingTCPServer, TestingHTTPServerMixin):
364
def __init__(self, server_address, request_handler_class,
366
test_server.TestingTCPServer.__init__(self, server_address,
367
request_handler_class)
368
TestingHTTPServerMixin.__init__(self, test_case_server)
371
class TestingThreadingHTTPServer(test_server.TestingThreadingTCPServer,
372
TestingHTTPServerMixin):
373
"""A threading HTTP test server for HTTP 1.1.
375
Since tests can initiate several concurrent connections to the same http
376
server, we need an independent connection for each of them. We achieve that
377
by spawning a new thread for each connection.
379
def __init__(self, server_address, request_handler_class,
381
test_server.TestingThreadingTCPServer.__init__(self, server_address,
382
request_handler_class)
383
TestingHTTPServerMixin.__init__(self, test_case_server)
386
class HttpServer(test_server.TestingTCPServerInAThread):
387
"""A test server for http transports.
389
Subclasses can provide a specific request handler.
392
# The real servers depending on the protocol
393
http_server_class = {'HTTP/1.0': TestingHTTPServer,
394
'HTTP/1.1': TestingThreadingHTTPServer,
397
# Whether or not we proxy the requests (see
398
# TestingHTTPRequestHandler.translate_path).
399
proxy_requests = False
401
# used to form the url that connects to this server
402
_url_protocol = 'http'
404
def __init__(self, request_handler=TestingHTTPRequestHandler,
405
protocol_version=None):
408
:param request_handler: a class that will be instantiated to handle an
409
http connection (one or several requests).
411
:param protocol_version: if specified, will override the protocol
412
version of the request handler.
414
# Depending on the protocol version, we will create the approriate
416
if protocol_version is None:
417
# Use the request handler one
418
proto_vers = request_handler.protocol_version
420
# Use our own, it will be used to override the request handler
422
proto_vers = protocol_version
423
# Get the appropriate server class for the required protocol
424
serv_cls = self.http_server_class.get(proto_vers, None)
426
raise httplib.UnknownProtocol(proto_vers)
427
self.host = 'localhost'
429
super(HttpServer, self).__init__((self.host, self.port),
432
self.protocol_version = proto_vers
433
# Allows tests to verify number of GET requests issued
434
self.GET_request_nb = 0
435
self._http_base_url = None
438
def create_server(self):
439
return self.server_class(
440
(self.host, self.port), self.request_handler_class, self)
442
def _get_remote_url(self, path):
443
path_parts = path.split(os.path.sep)
444
if os.path.isabs(path):
445
if path_parts[:len(self._local_path_parts)] != \
446
self._local_path_parts:
447
raise BadWebserverPath(path, self.test_dir)
448
remote_path = '/'.join(path_parts[len(self._local_path_parts):])
450
remote_path = '/'.join(path_parts)
452
return self._http_base_url + remote_path
454
def log(self, format, *args):
455
"""Capture Server log output."""
456
self.logs.append(format % args)
458
def start_server(self, backing_transport_server=None):
459
"""See bzrlib.transport.Server.start_server.
461
:param backing_transport_server: The transport that requests over this
462
protocol should be forwarded to. Note that this is currently not
465
# XXX: TODO: make the server back onto vfs_server rather than local
467
if not (backing_transport_server is None
468
or isinstance(backing_transport_server,
469
test_server.LocalURLServer)):
470
raise AssertionError(
471
"HTTPServer currently assumes local transport, got %s" %
472
backing_transport_server)
473
self._home_dir = os.getcwdu()
474
self._local_path_parts = self._home_dir.split(os.path.sep)
477
super(HttpServer, self).start_server()
478
self._http_base_url = '%s://%s:%s/' % (
479
self._url_protocol, self.host, self.port)
482
"""See bzrlib.transport.Server.get_url."""
483
return self._get_remote_url(self._home_dir)
485
def get_bogus_url(self):
486
"""See bzrlib.transport.Server.get_bogus_url."""
487
# this is chosen to try to prevent trouble with proxies, weird dns,
489
return self._url_protocol + '://127.0.0.1:1/'
492
class HttpServer_urllib(HttpServer):
493
"""Subclass of HttpServer that gives http+urllib urls.
495
This is for use in testing: connections to this server will always go
496
through urllib where possible.
499
# urls returned by this server should require the urllib client impl
500
_url_protocol = 'http+urllib'
503
class HttpServer_PyCurl(HttpServer):
504
"""Subclass of HttpServer that gives http+pycurl urls.
506
This is for use in testing: connections to this server will always go
507
through pycurl where possible.
510
# We don't care about checking the pycurl availability as
511
# this server will be required only when pycurl is present
513
# urls returned by this server should require the pycurl client impl
514
_url_protocol = 'http+pycurl'