1
# Copyright (C) 2006, 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24
import SimpleHTTPServer
33
from bzrlib import transport
34
from bzrlib.transport import local
37
class WebserverNotAvailable(Exception):
41
class BadWebserverPath(ValueError):
43
return 'path %s is not in %s' % self.args
46
class TestingHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
47
"""Handles one request.
49
A TestingHTTPRequestHandler is instantiated for every request received by
50
the associated server. Note that 'request' here is inherited from the base
51
TCPServer class, for the HTTP server it is really a connection which itself
52
will handle one or several HTTP requests.
54
# Default protocol version
55
protocol_version = 'HTTP/1.1'
57
# The Message-like class used to parse the request headers
58
MessageClass = httplib.HTTPMessage
61
SimpleHTTPServer.SimpleHTTPRequestHandler.setup(self)
62
self._cwd = self.server._home_dir
63
tcs = self.server.test_case_server
64
if tcs.protocol_version is not None:
65
# If the test server forced a protocol version, use it
66
self.protocol_version = tcs.protocol_version
68
def log_message(self, format, *args):
69
tcs = self.server.test_case_server
70
tcs.log('webserver - %s - - [%s] %s "%s" "%s"',
71
self.address_string(),
72
self.log_date_time_string(),
74
self.headers.get('referer', '-'),
75
self.headers.get('user-agent', '-'))
77
def handle_one_request(self):
78
"""Handle a single HTTP request.
80
We catch all socket errors occurring when the client close the
81
connection early to avoid polluting the test results.
84
SimpleHTTPServer.SimpleHTTPRequestHandler.handle_one_request(self)
85
except socket.error, e:
86
# Any socket error should close the connection, but some errors are
87
# due to the client closing early and we don't want to pollute test
88
# results, so we raise only the others.
89
self.close_connection = 1
91
or e.args[0] not in (errno.EPIPE, errno.ECONNRESET,
92
errno.ECONNABORTED, errno.EBADF)):
95
_range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')
96
_tail_regexp = re.compile(r'^-(?P<tail>\d+)$')
98
def parse_ranges(self, ranges_header):
99
"""Parse the range header value and returns ranges and tail.
101
RFC2616 14.35 says that syntactically invalid range
102
specifiers MUST be ignored. In that case, we return 0 for
103
tail and [] for ranges.
107
if not ranges_header.startswith('bytes='):
108
# Syntactically invalid header
111
ranges_header = ranges_header[len('bytes='):]
112
for range_str in ranges_header.split(','):
113
# FIXME: RFC2616 says end is optional and default to file_size
114
range_match = self._range_regexp.match(range_str)
115
if range_match is not None:
116
start = int(range_match.group('start'))
117
end = int(range_match.group('end'))
119
# Syntactically invalid range
121
ranges.append((start, end))
123
tail_match = self._tail_regexp.match(range_str)
124
if tail_match is not None:
125
tail = int(tail_match.group('tail'))
127
# Syntactically invalid range
131
def _header_line_length(self, keyword, value):
132
header_line = '%s: %s\r\n' % (keyword, value)
133
return len(header_line)
136
"""Overrides base implementation to work around a bug in python2.5."""
137
path = self.translate_path(self.path)
138
if os.path.isdir(path) and not self.path.endswith('/'):
139
# redirect browser - doing basically what apache does when
140
# DirectorySlash option is On which is quite common (braindead, but
142
self.send_response(301)
143
self.send_header("Location", self.path + "/")
144
# Indicates that the body is empty for HTTP/1.1 clients
145
self.send_header('Content-Length', '0')
149
return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self)
151
def send_range_content(self, file, start, length):
153
self.wfile.write(file.read(length))
155
def get_single_range(self, file, file_size, start, end):
156
self.send_response(206)
157
length = end - start + 1
158
self.send_header('Accept-Ranges', 'bytes')
159
self.send_header("Content-Length", "%d" % length)
161
self.send_header("Content-Type", 'application/octet-stream')
162
self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
166
self.send_range_content(file, start, length)
168
def get_multiple_ranges(self, file, file_size, ranges):
169
self.send_response(206)
170
self.send_header('Accept-Ranges', 'bytes')
171
boundary = '%d' % random.randint(0,0x7FFFFFFF)
172
self.send_header('Content-Type',
173
'multipart/byteranges; boundary=%s' % boundary)
174
boundary_line = '--%s\r\n' % boundary
175
# Calculate the Content-Length
177
for (start, end) in ranges:
178
content_length += len(boundary_line)
179
content_length += self._header_line_length(
180
'Content-type', 'application/octet-stream')
181
content_length += self._header_line_length(
182
'Content-Range', 'bytes %d-%d/%d' % (start, end, file_size))
183
content_length += len('\r\n') # end headers
184
content_length += end - start # + 1
185
content_length += len(boundary_line)
186
self.send_header('Content-length', content_length)
189
# Send the multipart body
190
for (start, end) in ranges:
191
self.wfile.write(boundary_line)
192
self.send_header('Content-type', 'application/octet-stream')
193
self.send_header('Content-Range', 'bytes %d-%d/%d'
194
% (start, end, file_size))
196
self.send_range_content(file, start, end - start + 1)
198
self.wfile.write(boundary_line)
201
"""Serve a GET request.
203
Handles the Range header.
206
self.server.test_case_server.GET_request_nb += 1
208
path = self.translate_path(self.path)
209
ranges_header_value = self.headers.get('Range')
210
if ranges_header_value is None or os.path.isdir(path):
211
# Let the mother class handle most cases
212
return SimpleHTTPServer.SimpleHTTPRequestHandler.do_GET(self)
215
# Always read in binary mode. Opening files in text
216
# mode may cause newline translations, making the
217
# actual size of the content transmitted *less* than
218
# the content-length!
219
file = open(path, 'rb')
221
self.send_error(404, "File not found")
224
file_size = os.fstat(file.fileno())[6]
225
tail, ranges = self.parse_ranges(ranges_header_value)
226
# Normalize tail into ranges
228
ranges.append((file_size - tail, file_size))
230
self._satisfiable_ranges = True
232
self._satisfiable_ranges = False
234
def check_range(range_specifier):
235
start, end = range_specifier
236
# RFC2616 14.35, ranges are invalid if start >= file_size
237
if start >= file_size:
238
self._satisfiable_ranges = False # Side-effect !
240
# RFC2616 14.35, end values should be truncated
241
# to file_size -1 if they exceed it
242
end = min(end, file_size - 1)
245
ranges = map(check_range, ranges)
247
if not self._satisfiable_ranges:
248
# RFC2616 14.16 and 14.35 says that when a server
249
# encounters unsatisfiable range specifiers, it
250
# SHOULD return a 416.
252
# FIXME: We SHOULD send a Content-Range header too,
253
# but the implementation of send_error does not
254
# allows that. So far.
255
self.send_error(416, "Requested range not satisfiable")
259
(start, end) = ranges[0]
260
self.get_single_range(file, file_size, start, end)
262
self.get_multiple_ranges(file, file_size, ranges)
265
def translate_path(self, path):
266
"""Translate a /-separated PATH to the local filename syntax.
268
If the server requires it, proxy the path before the usual translation
270
if self.server.test_case_server.proxy_requests:
271
# We need to act as a proxy and accept absolute urls,
272
# which SimpleHTTPRequestHandler (parent) is not
273
# ready for. So we just drop the protocol://host:port
274
# part in front of the request-url (because we know
275
# we would not forward the request to *another*
278
# So we do what SimpleHTTPRequestHandler.translate_path
279
# do beginning with python 2.4.3: abandon query
280
# parameters, scheme, host port, etc (which ensure we
281
# provide the right behaviour on all python versions).
282
path = urlparse.urlparse(path)[2]
283
# And now, we can apply *our* trick to proxy files
286
return self._translate_path(path)
288
def _translate_path(self, path):
289
"""Translate a /-separated PATH to the local filename syntax.
291
Note that we're translating http URLs here, not file URLs.
292
The URL root location is the server's startup directory.
293
Components that mean special things to the local file system
294
(e.g. drive or directory names) are ignored. (XXX They should
295
probably be diagnosed.)
297
Override from python standard library to stop it calling os.getcwd()
299
# abandon query parameters
300
path = urlparse.urlparse(path)[2]
301
path = posixpath.normpath(urllib.unquote(path))
302
path = path.decode('utf-8')
303
words = path.split('/')
304
words = filter(None, words)
306
for num, word in enumerate(words):
308
drive, word = os.path.splitdrive(word)
309
head, word = os.path.split(word)
310
if word in (os.curdir, os.pardir): continue
311
path = os.path.join(path, word)
315
class TestingHTTPServerMixin:
317
def __init__(self, test_case_server):
318
# test_case_server can be used to communicate between the
319
# tests and the server (or the request handler and the
320
# server), allowing dynamic behaviors to be defined from
322
self.test_case_server = test_case_server
323
self._home_dir = test_case_server._home_dir
326
"""Called to clean-up the server.
328
Since the server may be (surely is, even) in a blocking listen, we
329
shutdown its socket before closing it.
331
# Note that is this executed as part of the implicit tear down in the
332
# main thread while the server runs in its own thread. The clean way
333
# to tear down the server is to instruct him to stop accepting
334
# connections and wait for the current connection(s) to end
335
# naturally. To end the connection naturally, the http transports
336
# should close their socket when they do not need to talk to the
337
# server anymore. This happens naturally during the garbage collection
338
# phase of the test transport objetcs (the server clients), so we
339
# don't have to worry about them. So, for the server, we must tear
340
# down here, from the main thread, when the test have ended. Note
341
# that since the server is in a blocking operation and since python
342
# use select internally, shutting down the socket is reliable and
345
self.socket.shutdown(socket.SHUT_RDWR)
346
except socket.error, e:
347
# WSAENOTCONN (10057) 'Socket is not connected' is harmless on
348
# windows (occurs before the first connection attempt
351
# 'Socket is not connected' can also occur on OSX, with a
352
# "regular" ENOTCONN (when something went wrong during test case
353
# setup leading to self.setUp() *not* being called but
354
# self.tearDown() still being called -- vila20081106
355
if not len(e.args) or e.args[0] not in (errno.ENOTCONN, 10057):
357
# Let the server properly close the socket
361
class TestingHTTPServer(SocketServer.TCPServer, TestingHTTPServerMixin):
363
def __init__(self, server_address, request_handler_class,
365
TestingHTTPServerMixin.__init__(self, test_case_server)
366
SocketServer.TCPServer.__init__(self, server_address,
367
request_handler_class)
370
class TestingThreadingHTTPServer(SocketServer.ThreadingTCPServer,
371
TestingHTTPServerMixin):
372
"""A threading HTTP test server for HTTP 1.1.
374
Since tests can initiate several concurrent connections to the same http
375
server, we need an independent connection for each of them. We achieve that
376
by spawning a new thread for each connection.
379
def __init__(self, server_address, request_handler_class,
381
TestingHTTPServerMixin.__init__(self, test_case_server)
382
SocketServer.ThreadingTCPServer.__init__(self, server_address,
383
request_handler_class)
384
# Decides how threads will act upon termination of the main
385
# process. This is prophylactic as we should not leave the threads
387
self.daemon_threads = True
390
class HttpServer(transport.Server):
391
"""A test server for http transports.
393
Subclasses can provide a specific request handler.
396
# The real servers depending on the protocol
397
http_server_class = {'HTTP/1.0': TestingHTTPServer,
398
'HTTP/1.1': TestingThreadingHTTPServer,
401
# Whether or not we proxy the requests (see
402
# TestingHTTPRequestHandler.translate_path).
403
proxy_requests = False
405
# used to form the url that connects to this server
406
_url_protocol = 'http'
408
def __init__(self, request_handler=TestingHTTPRequestHandler,
409
protocol_version=None):
412
:param request_handler: a class that will be instantiated to handle an
413
http connection (one or several requests).
415
:param protocol_version: if specified, will override the protocol
416
version of the request handler.
418
transport.Server.__init__(self)
419
self.request_handler = request_handler
420
self.host = 'localhost'
423
self.protocol_version = protocol_version
424
# Allows tests to verify number of GET requests issued
425
self.GET_request_nb = 0
428
return "%s(%s:%s)" % \
429
(self.__class__.__name__, self.host, self.port)
431
def _get_httpd(self):
432
if self._httpd is None:
433
rhandler = self.request_handler
434
# Depending on the protocol version, we will create the approriate
436
if self.protocol_version is None:
437
# Use the request handler one
438
proto_vers = rhandler.protocol_version
440
# Use our own, it will be used to override the request handler
442
proto_vers = self.protocol_version
443
# Create the appropriate server for the required protocol
444
serv_cls = self.http_server_class.get(proto_vers, None)
446
raise httplib.UnknownProtocol(proto_vers)
448
self._httpd = serv_cls((self.host, self.port), rhandler, self)
449
host, self.port = self._httpd.socket.getsockname()
452
def _http_start(self):
453
"""Server thread main entry point. """
454
self._http_running = False
457
httpd = self._get_httpd()
458
self._http_base_url = '%s://%s:%s/' % (self._url_protocol,
459
self.host, self.port)
460
self._http_running = True
462
# Whatever goes wrong, we save the exception for the main
463
# thread. Note that since we are running in a thread, no signal
464
# can be received, so we don't care about KeyboardInterrupt.
465
self._http_exception = sys.exc_info()
467
# Release the lock or the main thread will block and the whole
469
self._http_starting.release()
471
# From now on, exceptions are taken care of by the
472
# SocketServer.BaseServer or the request handler.
473
while self._http_running:
475
# Really an HTTP connection but the python framework is generic
476
# and call them requests
477
httpd.handle_request()
478
except socket.timeout:
480
except (socket.error, select.error), e:
481
if e[0] == errno.EBADF:
482
# Starting with python-2.6, handle_request may raise socket
483
# or select exceptions when the server is shut down (as we
489
def _get_remote_url(self, path):
490
path_parts = path.split(os.path.sep)
491
if os.path.isabs(path):
492
if path_parts[:len(self._local_path_parts)] != \
493
self._local_path_parts:
494
raise BadWebserverPath(path, self.test_dir)
495
remote_path = '/'.join(path_parts[len(self._local_path_parts):])
497
remote_path = '/'.join(path_parts)
499
return self._http_base_url + remote_path
501
def log(self, format, *args):
502
"""Capture Server log output."""
503
self.logs.append(format % args)
505
def setUp(self, backing_transport_server=None):
506
"""See bzrlib.transport.Server.setUp.
508
:param backing_transport_server: The transport that requests over this
509
protocol should be forwarded to. Note that this is currently not
512
# XXX: TODO: make the server back onto vfs_server rather than local
514
if not (backing_transport_server is None or \
515
isinstance(backing_transport_server, local.LocalURLServer)):
516
raise AssertionError(
517
"HTTPServer currently assumes local transport, got %s" % \
518
backing_transport_server)
519
self._home_dir = os.getcwdu()
520
self._local_path_parts = self._home_dir.split(os.path.sep)
521
self._http_base_url = None
523
# Create the server thread
524
self._http_starting = threading.Lock()
525
self._http_starting.acquire()
526
self._http_thread = threading.Thread(target=self._http_start)
527
self._http_thread.setDaemon(True)
528
self._http_exception = None
529
self._http_thread.start()
531
# Wait for the server thread to start (i.e release the lock)
532
self._http_starting.acquire()
534
if self._http_exception is not None:
535
# Something went wrong during server start
536
exc_class, exc_value, exc_tb = self._http_exception
537
raise exc_class, exc_value, exc_tb
538
self._http_starting.release()
542
"""See bzrlib.transport.Server.tearDown."""
543
self._httpd.tearDown()
544
self._http_running = False
545
# We don't need to 'self._http_thread.join()' here since the thread is
546
# a daemonic one and will be garbage collected anyway. Joining just
547
# slows us down for no added benefit.
550
"""See bzrlib.transport.Server.get_url."""
551
return self._get_remote_url(self._home_dir)
553
def get_bogus_url(self):
554
"""See bzrlib.transport.Server.get_bogus_url."""
555
# this is chosen to try to prevent trouble with proxies, weird dns,
557
return self._url_protocol + '://127.0.0.1:1/'
560
class HttpServer_urllib(HttpServer):
561
"""Subclass of HttpServer that gives http+urllib urls.
563
This is for use in testing: connections to this server will always go
564
through urllib where possible.
567
# urls returned by this server should require the urllib client impl
568
_url_protocol = 'http+urllib'
571
class HttpServer_PyCurl(HttpServer):
572
"""Subclass of HttpServer that gives http+pycurl urls.
574
This is for use in testing: connections to this server will always go
575
through pycurl where possible.
578
# We don't care about checking the pycurl availability as
579
# this server will be required only when pycurl is present
581
# urls returned by this server should require the pycurl client impl
582
_url_protocol = 'http+pycurl'