1
# Copyright (C) 2006, 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23
import SimpleHTTPServer
32
from bzrlib import transport
33
from bzrlib.transport import local
36
class WebserverNotAvailable(Exception):
40
class BadWebserverPath(ValueError):
42
return 'path %s is not in %s' % self.args
45
class TestingHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
46
"""Handles one request.
48
A TestingHTTPRequestHandler is instantiated for every request received by
49
the associated server. Note that 'request' here is inherited from the base
50
TCPServer class, for the HTTP server it is really a connection which itself
51
will handle one or several HTTP requests.
53
# Default protocol version
54
protocol_version = 'HTTP/1.1'
56
# The Message-like class used to parse the request headers
57
MessageClass = httplib.HTTPMessage
60
SimpleHTTPServer.SimpleHTTPRequestHandler.setup(self)
61
self._cwd = self.server._home_dir
62
tcs = self.server.test_case_server
63
if tcs.protocol_version is not None:
64
# If the test server forced a protocol version, use it
65
self.protocol_version = tcs.protocol_version
67
def log_message(self, format, *args):
68
tcs = self.server.test_case_server
69
tcs.log('webserver - %s - - [%s] %s "%s" "%s"',
70
self.address_string(),
71
self.log_date_time_string(),
73
self.headers.get('referer', '-'),
74
self.headers.get('user-agent', '-'))
76
def handle_one_request(self):
77
"""Handle a single HTTP request.
79
We catch all socket errors occurring when the client close the
80
connection early to avoid polluting the test results.
83
SimpleHTTPServer.SimpleHTTPRequestHandler.handle_one_request(self)
84
except socket.error, e:
85
# Any socket error should close the connection, but some errors are
86
# due to the client closing early and we don't want to pollute test
87
# results, so we raise only the others.
88
self.close_connection = 1
90
or e.args[0] not in (errno.EPIPE, errno.ECONNRESET,
91
errno.ECONNABORTED, errno.EBADF)):
94
_range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')
95
_tail_regexp = re.compile(r'^-(?P<tail>\d+)$')
97
def parse_ranges(self, ranges_header):
98
"""Parse the range header value and returns ranges and tail.
100
RFC2616 14.35 says that syntactically invalid range
101
specifiers MUST be ignored. In that case, we return 0 for
102
tail and [] for ranges.
106
if not ranges_header.startswith('bytes='):
107
# Syntactically invalid header
110
ranges_header = ranges_header[len('bytes='):]
111
for range_str in ranges_header.split(','):
112
# FIXME: RFC2616 says end is optional and default to file_size
113
range_match = self._range_regexp.match(range_str)
114
if range_match is not None:
115
start = int(range_match.group('start'))
116
end = int(range_match.group('end'))
118
# Syntactically invalid range
120
ranges.append((start, end))
122
tail_match = self._tail_regexp.match(range_str)
123
if tail_match is not None:
124
tail = int(tail_match.group('tail'))
126
# Syntactically invalid range
130
def _header_line_length(self, keyword, value):
131
header_line = '%s: %s\r\n' % (keyword, value)
132
return len(header_line)
135
"""Overrides base implementation to work around a bug in python2.5."""
136
path = self.translate_path(self.path)
137
if os.path.isdir(path) and not self.path.endswith('/'):
138
# redirect browser - doing basically what apache does when
139
# DirectorySlash option is On which is quite common (braindead, but
141
self.send_response(301)
142
self.send_header("Location", self.path + "/")
143
# Indicates that the body is empty for HTTP/1.1 clients
144
self.send_header('Content-Length', '0')
148
return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self)
150
def send_range_content(self, file, start, length):
152
self.wfile.write(file.read(length))
154
def get_single_range(self, file, file_size, start, end):
155
self.send_response(206)
156
length = end - start + 1
157
self.send_header('Accept-Ranges', 'bytes')
158
self.send_header("Content-Length", "%d" % length)
160
self.send_header("Content-Type", 'application/octet-stream')
161
self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
165
self.send_range_content(file, start, length)
167
def get_multiple_ranges(self, file, file_size, ranges):
168
self.send_response(206)
169
self.send_header('Accept-Ranges', 'bytes')
170
boundary = '%d' % random.randint(0,0x7FFFFFFF)
171
self.send_header('Content-Type',
172
'multipart/byteranges; boundary=%s' % boundary)
173
boundary_line = '--%s\r\n' % boundary
174
# Calculate the Content-Length
176
for (start, end) in ranges:
177
content_length += len(boundary_line)
178
content_length += self._header_line_length(
179
'Content-type', 'application/octet-stream')
180
content_length += self._header_line_length(
181
'Content-Range', 'bytes %d-%d/%d' % (start, end, file_size))
182
content_length += len('\r\n') # end headers
183
content_length += end - start # + 1
184
content_length += len(boundary_line)
185
self.send_header('Content-length', content_length)
188
# Send the multipart body
189
for (start, end) in ranges:
190
self.wfile.write(boundary_line)
191
self.send_header('Content-type', 'application/octet-stream')
192
self.send_header('Content-Range', 'bytes %d-%d/%d'
193
% (start, end, file_size))
195
self.send_range_content(file, start, end - start + 1)
197
self.wfile.write(boundary_line)
200
"""Serve a GET request.
202
Handles the Range header.
205
self.server.test_case_server.GET_request_nb += 1
207
path = self.translate_path(self.path)
208
ranges_header_value = self.headers.get('Range')
209
if ranges_header_value is None or os.path.isdir(path):
210
# Let the mother class handle most cases
211
return SimpleHTTPServer.SimpleHTTPRequestHandler.do_GET(self)
214
# Always read in binary mode. Opening files in text
215
# mode may cause newline translations, making the
216
# actual size of the content transmitted *less* than
217
# the content-length!
218
file = open(path, 'rb')
220
self.send_error(404, "File not found")
223
file_size = os.fstat(file.fileno())[6]
224
tail, ranges = self.parse_ranges(ranges_header_value)
225
# Normalize tail into ranges
227
ranges.append((file_size - tail, file_size))
229
self._satisfiable_ranges = True
231
self._satisfiable_ranges = False
233
def check_range(range_specifier):
234
start, end = range_specifier
235
# RFC2616 14.35, ranges are invalid if start >= file_size
236
if start >= file_size:
237
self._satisfiable_ranges = False # Side-effect !
239
# RFC2616 14.35, end values should be truncated
240
# to file_size -1 if they exceed it
241
end = min(end, file_size - 1)
244
ranges = map(check_range, ranges)
246
if not self._satisfiable_ranges:
247
# RFC2616 14.16 and 14.35 says that when a server
248
# encounters unsatisfiable range specifiers, it
249
# SHOULD return a 416.
251
# FIXME: We SHOULD send a Content-Range header too,
252
# but the implementation of send_error does not
253
# allows that. So far.
254
self.send_error(416, "Requested range not satisfiable")
258
(start, end) = ranges[0]
259
self.get_single_range(file, file_size, start, end)
261
self.get_multiple_ranges(file, file_size, ranges)
264
def translate_path(self, path):
265
"""Translate a /-separated PATH to the local filename syntax.
267
If the server requires it, proxy the path before the usual translation
269
if self.server.test_case_server.proxy_requests:
270
# We need to act as a proxy and accept absolute urls,
271
# which SimpleHTTPRequestHandler (parent) is not
272
# ready for. So we just drop the protocol://host:port
273
# part in front of the request-url (because we know
274
# we would not forward the request to *another*
277
# So we do what SimpleHTTPRequestHandler.translate_path
278
# do beginning with python 2.4.3: abandon query
279
# parameters, scheme, host port, etc (which ensure we
280
# provide the right behaviour on all python versions).
281
path = urlparse.urlparse(path)[2]
282
# And now, we can apply *our* trick to proxy files
285
return self._translate_path(path)
287
def _translate_path(self, path):
288
"""Translate a /-separated PATH to the local filename syntax.
290
Note that we're translating http URLs here, not file URLs.
291
The URL root location is the server's startup directory.
292
Components that mean special things to the local file system
293
(e.g. drive or directory names) are ignored. (XXX They should
294
probably be diagnosed.)
296
Override from python standard library to stop it calling os.getcwd()
298
# abandon query parameters
299
path = urlparse.urlparse(path)[2]
300
path = posixpath.normpath(urllib.unquote(path))
301
path = path.decode('utf-8')
302
words = path.split('/')
303
words = filter(None, words)
305
for num, word in enumerate(words):
307
drive, word = os.path.splitdrive(word)
308
head, word = os.path.split(word)
309
if word in (os.curdir, os.pardir): continue
310
path = os.path.join(path, word)
314
class TestingHTTPServerMixin:
316
def __init__(self, test_case_server):
317
# test_case_server can be used to communicate between the
318
# tests and the server (or the request handler and the
319
# server), allowing dynamic behaviors to be defined from
321
self.test_case_server = test_case_server
322
self._home_dir = test_case_server._home_dir
325
"""Called to clean-up the server.
327
Since the server may be (surely is, even) in a blocking listen, we
328
shutdown its socket before closing it.
330
# Note that is this executed as part of the implicit tear down in the
331
# main thread while the server runs in its own thread. The clean way
332
# to tear down the server is to instruct him to stop accepting
333
# connections and wait for the current connection(s) to end
334
# naturally. To end the connection naturally, the http transports
335
# should close their socket when they do not need to talk to the
336
# server anymore. This happens naturally during the garbage collection
337
# phase of the test transport objetcs (the server clients), so we
338
# don't have to worry about them. So, for the server, we must tear
339
# down here, from the main thread, when the test have ended. Note
340
# that since the server is in a blocking operation and since python
341
# use select internally, shutting down the socket is reliable and
344
self.socket.shutdown(socket.SHUT_RDWR)
345
except socket.error, e:
346
# WSAENOTCONN (10057) 'Socket is not connected' is harmless on
347
# windows (occurs before the first connection attempt
349
if not len(e.args) or e.args[0] != 10057:
351
# Let the server properly close the socket
355
class TestingHTTPServer(SocketServer.TCPServer, TestingHTTPServerMixin):
357
def __init__(self, server_address, request_handler_class,
359
TestingHTTPServerMixin.__init__(self, test_case_server)
360
SocketServer.TCPServer.__init__(self, server_address,
361
request_handler_class)
364
class TestingThreadingHTTPServer(SocketServer.ThreadingTCPServer,
365
TestingHTTPServerMixin):
366
"""A threading HTTP test server for HTTP 1.1.
368
Since tests can initiate several concurrent connections to the same http
369
server, we need an independent connection for each of them. We achieve that
370
by spawning a new thread for each connection.
373
def __init__(self, server_address, request_handler_class,
375
TestingHTTPServerMixin.__init__(self, test_case_server)
376
SocketServer.ThreadingTCPServer.__init__(self, server_address,
377
request_handler_class)
378
# Decides how threads will act upon termination of the main
379
# process. This is prophylactic as we should not leave the threads
381
self.daemon_threads = True
384
class HttpServer(transport.Server):
385
"""A test server for http transports.
387
Subclasses can provide a specific request handler.
390
# The real servers depending on the protocol
391
http_server_class = {'HTTP/1.0': TestingHTTPServer,
392
'HTTP/1.1': TestingThreadingHTTPServer,
395
# Whether or not we proxy the requests (see
396
# TestingHTTPRequestHandler.translate_path).
397
proxy_requests = False
399
# used to form the url that connects to this server
400
_url_protocol = 'http'
402
def __init__(self, request_handler=TestingHTTPRequestHandler,
403
protocol_version=None):
406
:param request_handler: a class that will be instantiated to handle an
407
http connection (one or several requests).
409
:param protocol_version: if specified, will override the protocol
410
version of the request handler.
412
transport.Server.__init__(self)
413
self.request_handler = request_handler
414
self.host = 'localhost'
417
self.protocol_version = protocol_version
418
# Allows tests to verify number of GET requests issued
419
self.GET_request_nb = 0
421
def _get_httpd(self):
422
if self._httpd is None:
423
rhandler = self.request_handler
424
# Depending on the protocol version, we will create the approriate
426
if self.protocol_version is None:
427
# Use the request handler one
428
proto_vers = rhandler.protocol_version
430
# Use our own, it will be used to override the request handler
432
proto_vers = self.protocol_version
433
# Create the appropriate server for the required protocol
434
serv_cls = self.http_server_class.get(proto_vers, None)
436
raise httplib.UnknownProtocol(proto_vers)
438
self._httpd = serv_cls((self.host, self.port), rhandler, self)
439
host, self.port = self._httpd.socket.getsockname()
442
def _http_start(self):
443
"""Server thread main entry point. """
444
self._http_running = False
447
httpd = self._get_httpd()
448
self._http_base_url = '%s://%s:%s/' % (self._url_protocol,
449
self.host, self.port)
450
self._http_running = True
452
# Whatever goes wrong, we save the exception for the main
453
# thread. Note that since we are running in a thread, no signal
454
# can be received, so we don't care about KeyboardInterrupt.
455
self._http_exception = sys.exc_info()
457
# Release the lock or the main thread will block and the whole
459
self._http_starting.release()
461
# From now on, exceptions are taken care of by the
462
# SocketServer.BaseServer or the request handler.
463
while self._http_running:
465
# Really an HTTP connection but the python framework is generic
466
# and call them requests
467
httpd.handle_request()
468
except socket.timeout:
471
def _get_remote_url(self, path):
472
path_parts = path.split(os.path.sep)
473
if os.path.isabs(path):
474
if path_parts[:len(self._local_path_parts)] != \
475
self._local_path_parts:
476
raise BadWebserverPath(path, self.test_dir)
477
remote_path = '/'.join(path_parts[len(self._local_path_parts):])
479
remote_path = '/'.join(path_parts)
481
return self._http_base_url + remote_path
483
def log(self, format, *args):
484
"""Capture Server log output."""
485
self.logs.append(format % args)
487
def setUp(self, backing_transport_server=None):
488
"""See bzrlib.transport.Server.setUp.
490
:param backing_transport_server: The transport that requests over this
491
protocol should be forwarded to. Note that this is currently not
494
# XXX: TODO: make the server back onto vfs_server rather than local
496
if not (backing_transport_server is None or \
497
isinstance(backing_transport_server, local.LocalURLServer)):
498
raise AssertionError(
499
"HTTPServer currently assumes local transport, got %s" % \
500
backing_transport_server)
501
self._home_dir = os.getcwdu()
502
self._local_path_parts = self._home_dir.split(os.path.sep)
503
self._http_base_url = None
505
# Create the server thread
506
self._http_starting = threading.Lock()
507
self._http_starting.acquire()
508
self._http_thread = threading.Thread(target=self._http_start)
509
self._http_thread.setDaemon(True)
510
self._http_exception = None
511
self._http_thread.start()
513
# Wait for the server thread to start (i.e release the lock)
514
self._http_starting.acquire()
516
if self._http_exception is not None:
517
# Something went wrong during server start
518
exc_class, exc_value, exc_tb = self._http_exception
519
raise exc_class, exc_value, exc_tb
520
self._http_starting.release()
524
"""See bzrlib.transport.Server.tearDown."""
525
self._httpd.tearDown()
526
self._http_running = False
527
# We don't need to 'self._http_thread.join()' here since the thread is
528
# a daemonic one and will be garbage collected anyway. Joining just
529
# slows us down for no added benefit.
532
"""See bzrlib.transport.Server.get_url."""
533
return self._get_remote_url(self._home_dir)
535
def get_bogus_url(self):
536
"""See bzrlib.transport.Server.get_bogus_url."""
537
# this is chosen to try to prevent trouble with proxies, weird dns,
539
return self._url_protocol + '://127.0.0.1:1/'
542
class HttpServer_urllib(HttpServer):
543
"""Subclass of HttpServer that gives http+urllib urls.
545
This is for use in testing: connections to this server will always go
546
through urllib where possible.
549
# urls returned by this server should require the urllib client impl
550
_url_protocol = 'http+urllib'
553
class HttpServer_PyCurl(HttpServer):
554
"""Subclass of HttpServer that gives http+pycurl urls.
556
This is for use in testing: connections to this server will always go
557
through pycurl where possible.
560
# We don't care about checking the pycurl availability as
561
# this server will be required only when pycurl is present
563
# urls returned by this server should require the pycurl client impl
564
_url_protocol = 'http+pycurl'