1
# Copyright (C) 2006, 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24
import SimpleHTTPServer
33
from bzrlib import transport
34
from bzrlib.transport import local
37
class WebserverNotAvailable(Exception):
41
class BadWebserverPath(ValueError):
43
return 'path %s is not in %s' % self.args
46
class TestingHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
47
"""Handles one request.
49
A TestingHTTPRequestHandler is instantiated for every request received by
50
the associated server. Note that 'request' here is inherited from the base
51
TCPServer class, for the HTTP server it is really a connection which itself
52
will handle one or several HTTP requests.
54
# Default protocol version
55
protocol_version = 'HTTP/1.1'
57
# The Message-like class used to parse the request headers
58
MessageClass = httplib.HTTPMessage
61
SimpleHTTPServer.SimpleHTTPRequestHandler.setup(self)
62
self._cwd = self.server._home_dir
63
tcs = self.server.test_case_server
64
if tcs.protocol_version is not None:
65
# If the test server forced a protocol version, use it
66
self.protocol_version = tcs.protocol_version
68
def log_message(self, format, *args):
69
tcs = self.server.test_case_server
70
tcs.log('webserver - %s - - [%s] %s "%s" "%s"',
71
self.address_string(),
72
self.log_date_time_string(),
74
self.headers.get('referer', '-'),
75
self.headers.get('user-agent', '-'))
77
def handle_one_request(self):
78
"""Handle a single HTTP request.
80
We catch all socket errors occurring when the client close the
81
connection early to avoid polluting the test results.
84
SimpleHTTPServer.SimpleHTTPRequestHandler.handle_one_request(self)
85
except socket.error, e:
86
# Any socket error should close the connection, but some errors are
87
# due to the client closing early and we don't want to pollute test
88
# results, so we raise only the others.
89
self.close_connection = 1
91
or e.args[0] not in (errno.EPIPE, errno.ECONNRESET,
92
errno.ECONNABORTED, errno.EBADF)):
95
_range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')
96
_tail_regexp = re.compile(r'^-(?P<tail>\d+)$')
98
def parse_ranges(self, ranges_header):
99
"""Parse the range header value and returns ranges and tail.
101
RFC2616 14.35 says that syntactically invalid range
102
specifiers MUST be ignored. In that case, we return 0 for
103
tail and [] for ranges.
107
if not ranges_header.startswith('bytes='):
108
# Syntactically invalid header
111
ranges_header = ranges_header[len('bytes='):]
112
for range_str in ranges_header.split(','):
113
# FIXME: RFC2616 says end is optional and default to file_size
114
range_match = self._range_regexp.match(range_str)
115
if range_match is not None:
116
start = int(range_match.group('start'))
117
end = int(range_match.group('end'))
119
# Syntactically invalid range
121
ranges.append((start, end))
123
tail_match = self._tail_regexp.match(range_str)
124
if tail_match is not None:
125
tail = int(tail_match.group('tail'))
127
# Syntactically invalid range
131
def _header_line_length(self, keyword, value):
132
header_line = '%s: %s\r\n' % (keyword, value)
133
return len(header_line)
136
"""Overrides base implementation to work around a bug in python2.5."""
137
path = self.translate_path(self.path)
138
if os.path.isdir(path) and not self.path.endswith('/'):
139
# redirect browser - doing basically what apache does when
140
# DirectorySlash option is On which is quite common (braindead, but
142
self.send_response(301)
143
self.send_header("Location", self.path + "/")
144
# Indicates that the body is empty for HTTP/1.1 clients
145
self.send_header('Content-Length', '0')
149
return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self)
151
def send_range_content(self, file, start, length):
153
self.wfile.write(file.read(length))
155
def get_single_range(self, file, file_size, start, end):
156
self.send_response(206)
157
length = end - start + 1
158
self.send_header('Accept-Ranges', 'bytes')
159
self.send_header("Content-Length", "%d" % length)
161
self.send_header("Content-Type", 'application/octet-stream')
162
self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
166
self.send_range_content(file, start, length)
168
def get_multiple_ranges(self, file, file_size, ranges):
169
self.send_response(206)
170
self.send_header('Accept-Ranges', 'bytes')
171
boundary = '%d' % random.randint(0,0x7FFFFFFF)
172
self.send_header('Content-Type',
173
'multipart/byteranges; boundary=%s' % boundary)
174
boundary_line = '--%s\r\n' % boundary
175
# Calculate the Content-Length
177
for (start, end) in ranges:
178
content_length += len(boundary_line)
179
content_length += self._header_line_length(
180
'Content-type', 'application/octet-stream')
181
content_length += self._header_line_length(
182
'Content-Range', 'bytes %d-%d/%d' % (start, end, file_size))
183
content_length += len('\r\n') # end headers
184
content_length += end - start # + 1
185
content_length += len(boundary_line)
186
self.send_header('Content-length', content_length)
189
# Send the multipart body
190
for (start, end) in ranges:
191
self.wfile.write(boundary_line)
192
self.send_header('Content-type', 'application/octet-stream')
193
self.send_header('Content-Range', 'bytes %d-%d/%d'
194
% (start, end, file_size))
196
self.send_range_content(file, start, end - start + 1)
198
self.wfile.write(boundary_line)
201
"""Serve a GET request.
203
Handles the Range header.
206
self.server.test_case_server.GET_request_nb += 1
208
path = self.translate_path(self.path)
209
ranges_header_value = self.headers.get('Range')
210
if ranges_header_value is None or os.path.isdir(path):
211
# Let the mother class handle most cases
212
return SimpleHTTPServer.SimpleHTTPRequestHandler.do_GET(self)
215
# Always read in binary mode. Opening files in text
216
# mode may cause newline translations, making the
217
# actual size of the content transmitted *less* than
218
# the content-length!
219
file = open(path, 'rb')
221
self.send_error(404, "File not found")
224
file_size = os.fstat(file.fileno())[6]
225
tail, ranges = self.parse_ranges(ranges_header_value)
226
# Normalize tail into ranges
228
ranges.append((file_size - tail, file_size))
230
self._satisfiable_ranges = True
232
self._satisfiable_ranges = False
234
def check_range(range_specifier):
235
start, end = range_specifier
236
# RFC2616 14.35, ranges are invalid if start >= file_size
237
if start >= file_size:
238
self._satisfiable_ranges = False # Side-effect !
240
# RFC2616 14.35, end values should be truncated
241
# to file_size -1 if they exceed it
242
end = min(end, file_size - 1)
245
ranges = map(check_range, ranges)
247
if not self._satisfiable_ranges:
248
# RFC2616 14.16 and 14.35 says that when a server
249
# encounters unsatisfiable range specifiers, it
250
# SHOULD return a 416.
252
# FIXME: We SHOULD send a Content-Range header too,
253
# but the implementation of send_error does not
254
# allows that. So far.
255
self.send_error(416, "Requested range not satisfiable")
259
(start, end) = ranges[0]
260
self.get_single_range(file, file_size, start, end)
262
self.get_multiple_ranges(file, file_size, ranges)
265
def translate_path(self, path):
266
"""Translate a /-separated PATH to the local filename syntax.
268
If the server requires it, proxy the path before the usual translation
270
if self.server.test_case_server.proxy_requests:
271
# We need to act as a proxy and accept absolute urls,
272
# which SimpleHTTPRequestHandler (parent) is not
273
# ready for. So we just drop the protocol://host:port
274
# part in front of the request-url (because we know
275
# we would not forward the request to *another*
278
# So we do what SimpleHTTPRequestHandler.translate_path
279
# do beginning with python 2.4.3: abandon query
280
# parameters, scheme, host port, etc (which ensure we
281
# provide the right behaviour on all python versions).
282
path = urlparse.urlparse(path)[2]
283
# And now, we can apply *our* trick to proxy files
286
return self._translate_path(path)
288
def _translate_path(self, path):
289
"""Translate a /-separated PATH to the local filename syntax.
291
Note that we're translating http URLs here, not file URLs.
292
The URL root location is the server's startup directory.
293
Components that mean special things to the local file system
294
(e.g. drive or directory names) are ignored. (XXX They should
295
probably be diagnosed.)
297
Override from python standard library to stop it calling os.getcwd()
299
# abandon query parameters
300
path = urlparse.urlparse(path)[2]
301
path = posixpath.normpath(urllib.unquote(path))
302
path = path.decode('utf-8')
303
words = path.split('/')
304
words = filter(None, words)
306
for num, word in enumerate(words):
308
drive, word = os.path.splitdrive(word)
309
head, word = os.path.split(word)
310
if word in (os.curdir, os.pardir): continue
311
path = os.path.join(path, word)
315
class TestingHTTPServerMixin:
317
def __init__(self, test_case_server):
318
# test_case_server can be used to communicate between the
319
# tests and the server (or the request handler and the
320
# server), allowing dynamic behaviors to be defined from
322
self.test_case_server = test_case_server
323
self._home_dir = test_case_server._home_dir
326
"""Called to clean-up the server.
328
Since the server may be (surely is, even) in a blocking listen, we
329
shutdown its socket before closing it.
331
# Note that is this executed as part of the implicit tear down in the
332
# main thread while the server runs in its own thread. The clean way
333
# to tear down the server is to instruct him to stop accepting
334
# connections and wait for the current connection(s) to end
335
# naturally. To end the connection naturally, the http transports
336
# should close their socket when they do not need to talk to the
337
# server anymore. This happens naturally during the garbage collection
338
# phase of the test transport objetcs (the server clients), so we
339
# don't have to worry about them. So, for the server, we must tear
340
# down here, from the main thread, when the test have ended. Note
341
# that since the server is in a blocking operation and since python
342
# use select internally, shutting down the socket is reliable and
345
self.socket.shutdown(socket.SHUT_RDWR)
346
except socket.error, e:
347
# WSAENOTCONN (10057) 'Socket is not connected' is harmless on
348
# windows (occurs before the first connection attempt
350
if not len(e.args) or e.args[0] != 10057:
352
# Let the server properly close the socket
356
class TestingHTTPServer(SocketServer.TCPServer, TestingHTTPServerMixin):
358
def __init__(self, server_address, request_handler_class,
360
TestingHTTPServerMixin.__init__(self, test_case_server)
361
SocketServer.TCPServer.__init__(self, server_address,
362
request_handler_class)
365
class TestingThreadingHTTPServer(SocketServer.ThreadingTCPServer,
366
TestingHTTPServerMixin):
367
"""A threading HTTP test server for HTTP 1.1.
369
Since tests can initiate several concurrent connections to the same http
370
server, we need an independent connection for each of them. We achieve that
371
by spawning a new thread for each connection.
374
def __init__(self, server_address, request_handler_class,
376
TestingHTTPServerMixin.__init__(self, test_case_server)
377
SocketServer.ThreadingTCPServer.__init__(self, server_address,
378
request_handler_class)
379
# Decides how threads will act upon termination of the main
380
# process. This is prophylactic as we should not leave the threads
382
self.daemon_threads = True
385
class HttpServer(transport.Server):
386
"""A test server for http transports.
388
Subclasses can provide a specific request handler.
391
# The real servers depending on the protocol
392
http_server_class = {'HTTP/1.0': TestingHTTPServer,
393
'HTTP/1.1': TestingThreadingHTTPServer,
396
# Whether or not we proxy the requests (see
397
# TestingHTTPRequestHandler.translate_path).
398
proxy_requests = False
400
# used to form the url that connects to this server
401
_url_protocol = 'http'
403
def __init__(self, request_handler=TestingHTTPRequestHandler,
404
protocol_version=None):
407
:param request_handler: a class that will be instantiated to handle an
408
http connection (one or several requests).
410
:param protocol_version: if specified, will override the protocol
411
version of the request handler.
413
transport.Server.__init__(self)
414
self.request_handler = request_handler
415
self.host = 'localhost'
418
self.protocol_version = protocol_version
419
# Allows tests to verify number of GET requests issued
420
self.GET_request_nb = 0
423
return "%s(%s:%s)" % \
424
(self.__class__.__name__, self.host, self.port)
426
def _get_httpd(self):
427
if self._httpd is None:
428
rhandler = self.request_handler
429
# Depending on the protocol version, we will create the approriate
431
if self.protocol_version is None:
432
# Use the request handler one
433
proto_vers = rhandler.protocol_version
435
# Use our own, it will be used to override the request handler
437
proto_vers = self.protocol_version
438
# Create the appropriate server for the required protocol
439
serv_cls = self.http_server_class.get(proto_vers, None)
441
raise httplib.UnknownProtocol(proto_vers)
443
self._httpd = serv_cls((self.host, self.port), rhandler, self)
444
host, self.port = self._httpd.socket.getsockname()
447
def _http_start(self):
448
"""Server thread main entry point. """
449
self._http_running = False
452
httpd = self._get_httpd()
453
self._http_base_url = '%s://%s:%s/' % (self._url_protocol,
454
self.host, self.port)
455
self._http_running = True
457
# Whatever goes wrong, we save the exception for the main
458
# thread. Note that since we are running in a thread, no signal
459
# can be received, so we don't care about KeyboardInterrupt.
460
self._http_exception = sys.exc_info()
462
# Release the lock or the main thread will block and the whole
464
self._http_starting.release()
466
# From now on, exceptions are taken care of by the
467
# SocketServer.BaseServer or the request handler.
468
while self._http_running:
470
# Really an HTTP connection but the python framework is generic
471
# and call them requests
472
httpd.handle_request()
473
except socket.timeout:
475
except (socket.error, select.error), e:
476
if e[0] == errno.EBADF:
477
# Starting with python-2.6, handle_request may raise socket
478
# or select exceptions when the server is shut down (as we
484
def _get_remote_url(self, path):
485
path_parts = path.split(os.path.sep)
486
if os.path.isabs(path):
487
if path_parts[:len(self._local_path_parts)] != \
488
self._local_path_parts:
489
raise BadWebserverPath(path, self.test_dir)
490
remote_path = '/'.join(path_parts[len(self._local_path_parts):])
492
remote_path = '/'.join(path_parts)
494
return self._http_base_url + remote_path
496
def log(self, format, *args):
497
"""Capture Server log output."""
498
self.logs.append(format % args)
500
def setUp(self, backing_transport_server=None):
501
"""See bzrlib.transport.Server.setUp.
503
:param backing_transport_server: The transport that requests over this
504
protocol should be forwarded to. Note that this is currently not
507
# XXX: TODO: make the server back onto vfs_server rather than local
509
if not (backing_transport_server is None or \
510
isinstance(backing_transport_server, local.LocalURLServer)):
511
raise AssertionError(
512
"HTTPServer currently assumes local transport, got %s" % \
513
backing_transport_server)
514
self._home_dir = os.getcwdu()
515
self._local_path_parts = self._home_dir.split(os.path.sep)
516
self._http_base_url = None
518
# Create the server thread
519
self._http_starting = threading.Lock()
520
self._http_starting.acquire()
521
self._http_thread = threading.Thread(target=self._http_start)
522
self._http_thread.setDaemon(True)
523
self._http_exception = None
524
self._http_thread.start()
526
# Wait for the server thread to start (i.e release the lock)
527
self._http_starting.acquire()
529
if self._http_exception is not None:
530
# Something went wrong during server start
531
exc_class, exc_value, exc_tb = self._http_exception
532
raise exc_class, exc_value, exc_tb
533
self._http_starting.release()
537
"""See bzrlib.transport.Server.tearDown."""
538
self._httpd.tearDown()
539
self._http_running = False
540
# We don't need to 'self._http_thread.join()' here since the thread is
541
# a daemonic one and will be garbage collected anyway. Joining just
542
# slows us down for no added benefit.
545
"""See bzrlib.transport.Server.get_url."""
546
return self._get_remote_url(self._home_dir)
548
def get_bogus_url(self):
549
"""See bzrlib.transport.Server.get_bogus_url."""
550
# this is chosen to try to prevent trouble with proxies, weird dns,
552
return self._url_protocol + '://127.0.0.1:1/'
555
class HttpServer_urllib(HttpServer):
556
"""Subclass of HttpServer that gives http+urllib urls.
558
This is for use in testing: connections to this server will always go
559
through urllib where possible.
562
# urls returned by this server should require the urllib client impl
563
_url_protocol = 'http+urllib'
566
class HttpServer_PyCurl(HttpServer):
567
"""Subclass of HttpServer that gives http+pycurl urls.
569
This is for use in testing: connections to this server will always go
570
through pycurl where possible.
573
# We don't care about checking the pycurl availability as
574
# this server will be required only when pycurl is present
576
# urls returned by this server should require the pycurl client impl
577
_url_protocol = 'http+pycurl'