1
# Copyright (C) 2006-2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24
import SimpleHTTPServer
33
from bzrlib import transport
34
from bzrlib.tests import test_server
35
from bzrlib.transport import local
38
class BadWebserverPath(ValueError):
40
return 'path %s is not in %s' % self.args
43
class TestingHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
44
"""Handles one request.
46
A TestingHTTPRequestHandler is instantiated for every request received by
47
the associated server. Note that 'request' here is inherited from the base
48
TCPServer class, for the HTTP server it is really a connection which itself
49
will handle one or several HTTP requests.
51
# Default protocol version
52
protocol_version = 'HTTP/1.1'
54
# The Message-like class used to parse the request headers
55
MessageClass = httplib.HTTPMessage
58
SimpleHTTPServer.SimpleHTTPRequestHandler.setup(self)
59
self._cwd = self.server._home_dir
60
tcs = self.server.test_case_server
61
if tcs.protocol_version is not None:
62
# If the test server forced a protocol version, use it
63
self.protocol_version = tcs.protocol_version
65
def log_message(self, format, *args):
66
tcs = self.server.test_case_server
67
tcs.log('webserver - %s - - [%s] %s "%s" "%s"',
68
self.address_string(),
69
self.log_date_time_string(),
71
self.headers.get('referer', '-'),
72
self.headers.get('user-agent', '-'))
74
def handle_one_request(self):
75
"""Handle a single HTTP request.
77
We catch all socket errors occurring when the client close the
78
connection early to avoid polluting the test results.
81
SimpleHTTPServer.SimpleHTTPRequestHandler.handle_one_request(self)
82
except socket.error, e:
83
# Any socket error should close the connection, but some errors are
84
# due to the client closing early and we don't want to pollute test
85
# results, so we raise only the others.
86
self.close_connection = 1
88
or e.args[0] not in (errno.EPIPE, errno.ECONNRESET,
89
errno.ECONNABORTED, errno.EBADF)):
92
error_content_type = 'text/plain'
93
error_message_format = '''\
98
def send_error(self, code, message=None):
99
"""Send and log an error reply.
101
We redefine the python-provided version to be able to set a
102
``Content-Length`` header as some http/1.1 clients complain otherwise
105
:param code: The HTTP error code.
107
:param message: The explanation of the error code, Defaults to a short
113
message = self.responses[code][0]
116
self.log_error("code %d, message %s", code, message)
117
content = (self.error_message_format %
118
{'code': code, 'message': message})
119
self.send_response(code, message)
120
self.send_header("Content-Type", self.error_content_type)
121
self.send_header("Content-Length", "%d" % len(content))
122
self.send_header('Connection', 'close')
124
if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
125
self.wfile.write(content)
127
_range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')
128
_tail_regexp = re.compile(r'^-(?P<tail>\d+)$')
130
def parse_ranges(self, ranges_header):
131
"""Parse the range header value and returns ranges and tail.
133
RFC2616 14.35 says that syntactically invalid range
134
specifiers MUST be ignored. In that case, we return 0 for
135
tail and [] for ranges.
139
if not ranges_header.startswith('bytes='):
140
# Syntactically invalid header
143
ranges_header = ranges_header[len('bytes='):]
144
for range_str in ranges_header.split(','):
145
# FIXME: RFC2616 says end is optional and default to file_size
146
range_match = self._range_regexp.match(range_str)
147
if range_match is not None:
148
start = int(range_match.group('start'))
149
end = int(range_match.group('end'))
151
# Syntactically invalid range
153
ranges.append((start, end))
155
tail_match = self._tail_regexp.match(range_str)
156
if tail_match is not None:
157
tail = int(tail_match.group('tail'))
159
# Syntactically invalid range
163
def _header_line_length(self, keyword, value):
164
header_line = '%s: %s\r\n' % (keyword, value)
165
return len(header_line)
168
"""Overrides base implementation to work around a bug in python2.5."""
169
path = self.translate_path(self.path)
170
if os.path.isdir(path) and not self.path.endswith('/'):
171
# redirect browser - doing basically what apache does when
172
# DirectorySlash option is On which is quite common (braindead, but
174
self.send_response(301)
175
self.send_header("Location", self.path + "/")
176
# Indicates that the body is empty for HTTP/1.1 clients
177
self.send_header('Content-Length', '0')
181
return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self)
183
def send_range_content(self, file, start, length):
185
self.wfile.write(file.read(length))
187
def get_single_range(self, file, file_size, start, end):
188
self.send_response(206)
189
length = end - start + 1
190
self.send_header('Accept-Ranges', 'bytes')
191
self.send_header("Content-Length", "%d" % length)
193
self.send_header("Content-Type", 'application/octet-stream')
194
self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
198
self.send_range_content(file, start, length)
200
def get_multiple_ranges(self, file, file_size, ranges):
201
self.send_response(206)
202
self.send_header('Accept-Ranges', 'bytes')
203
boundary = '%d' % random.randint(0,0x7FFFFFFF)
204
self.send_header('Content-Type',
205
'multipart/byteranges; boundary=%s' % boundary)
206
boundary_line = '--%s\r\n' % boundary
207
# Calculate the Content-Length
209
for (start, end) in ranges:
210
content_length += len(boundary_line)
211
content_length += self._header_line_length(
212
'Content-type', 'application/octet-stream')
213
content_length += self._header_line_length(
214
'Content-Range', 'bytes %d-%d/%d' % (start, end, file_size))
215
content_length += len('\r\n') # end headers
216
content_length += end - start + 1
217
content_length += len(boundary_line)
218
self.send_header('Content-length', content_length)
221
# Send the multipart body
222
for (start, end) in ranges:
223
self.wfile.write(boundary_line)
224
self.send_header('Content-type', 'application/octet-stream')
225
self.send_header('Content-Range', 'bytes %d-%d/%d'
226
% (start, end, file_size))
228
self.send_range_content(file, start, end - start + 1)
230
self.wfile.write(boundary_line)
233
"""Serve a GET request.
235
Handles the Range header.
238
self.server.test_case_server.GET_request_nb += 1
240
path = self.translate_path(self.path)
241
ranges_header_value = self.headers.get('Range')
242
if ranges_header_value is None or os.path.isdir(path):
243
# Let the mother class handle most cases
244
return SimpleHTTPServer.SimpleHTTPRequestHandler.do_GET(self)
247
# Always read in binary mode. Opening files in text
248
# mode may cause newline translations, making the
249
# actual size of the content transmitted *less* than
250
# the content-length!
251
file = open(path, 'rb')
253
self.send_error(404, "File not found")
256
file_size = os.fstat(file.fileno())[6]
257
tail, ranges = self.parse_ranges(ranges_header_value)
258
# Normalize tail into ranges
260
ranges.append((file_size - tail, file_size))
262
self._satisfiable_ranges = True
264
self._satisfiable_ranges = False
266
def check_range(range_specifier):
267
start, end = range_specifier
268
# RFC2616 14.35, ranges are invalid if start >= file_size
269
if start >= file_size:
270
self._satisfiable_ranges = False # Side-effect !
272
# RFC2616 14.35, end values should be truncated
273
# to file_size -1 if they exceed it
274
end = min(end, file_size - 1)
277
ranges = map(check_range, ranges)
279
if not self._satisfiable_ranges:
280
# RFC2616 14.16 and 14.35 says that when a server
281
# encounters unsatisfiable range specifiers, it
282
# SHOULD return a 416.
284
# FIXME: We SHOULD send a Content-Range header too,
285
# but the implementation of send_error does not
286
# allows that. So far.
287
self.send_error(416, "Requested range not satisfiable")
291
(start, end) = ranges[0]
292
self.get_single_range(file, file_size, start, end)
294
self.get_multiple_ranges(file, file_size, ranges)
297
def translate_path(self, path):
298
"""Translate a /-separated PATH to the local filename syntax.
300
If the server requires it, proxy the path before the usual translation
302
if self.server.test_case_server.proxy_requests:
303
# We need to act as a proxy and accept absolute urls,
304
# which SimpleHTTPRequestHandler (parent) is not
305
# ready for. So we just drop the protocol://host:port
306
# part in front of the request-url (because we know
307
# we would not forward the request to *another*
310
# So we do what SimpleHTTPRequestHandler.translate_path
311
# do beginning with python 2.4.3: abandon query
312
# parameters, scheme, host port, etc (which ensure we
313
# provide the right behaviour on all python versions).
314
path = urlparse.urlparse(path)[2]
315
# And now, we can apply *our* trick to proxy files
318
return self._translate_path(path)
320
def _translate_path(self, path):
321
"""Translate a /-separated PATH to the local filename syntax.
323
Note that we're translating http URLs here, not file URLs.
324
The URL root location is the server's startup directory.
325
Components that mean special things to the local file system
326
(e.g. drive or directory names) are ignored. (XXX They should
327
probably be diagnosed.)
329
Override from python standard library to stop it calling os.getcwd()
331
# abandon query parameters
332
path = urlparse.urlparse(path)[2]
333
path = posixpath.normpath(urllib.unquote(path))
334
path = path.decode('utf-8')
335
words = path.split('/')
336
words = filter(None, words)
338
for num, word in enumerate(words):
340
drive, word = os.path.splitdrive(word)
341
head, word = os.path.split(word)
342
if word in (os.curdir, os.pardir): continue
343
path = os.path.join(path, word)
347
class TestingHTTPServerMixin:
349
def __init__(self, test_case_server):
350
# test_case_server can be used to communicate between the
351
# tests and the server (or the request handler and the
352
# server), allowing dynamic behaviors to be defined from
354
self.test_case_server = test_case_server
355
self._home_dir = test_case_server._home_dir
357
def stop_server(self):
358
"""Called to clean-up the server.
360
Since the server may be (surely is, even) in a blocking listen, we
361
shutdown its socket before closing it.
363
# Note that is this executed as part of the implicit tear down in the
364
# main thread while the server runs in its own thread. The clean way
365
# to tear down the server is to instruct him to stop accepting
366
# connections and wait for the current connection(s) to end
367
# naturally. To end the connection naturally, the http transports
368
# should close their socket when they do not need to talk to the
369
# server anymore. This happens naturally during the garbage collection
370
# phase of the test transport objetcs (the server clients), so we
371
# don't have to worry about them. So, for the server, we must tear
372
# down here, from the main thread, when the test have ended. Note
373
# that since the server is in a blocking operation and since python
374
# use select internally, shutting down the socket is reliable and
377
self.socket.shutdown(socket.SHUT_RDWR)
378
except socket.error, e:
379
# WSAENOTCONN (10057) 'Socket is not connected' is harmless on
380
# windows (occurs before the first connection attempt
383
# 'Socket is not connected' can also occur on OSX, with a
384
# "regular" ENOTCONN (when something went wrong during test case
385
# setup leading to self.setUp() *not* being called but
386
# self.stop_server() still being called -- vila20081106
387
if not len(e.args) or e.args[0] not in (errno.ENOTCONN, 10057):
389
# Let the server properly close the socket
393
class TestingHTTPServer(SocketServer.TCPServer, TestingHTTPServerMixin):
395
def __init__(self, server_address, request_handler_class,
397
TestingHTTPServerMixin.__init__(self, test_case_server)
398
SocketServer.TCPServer.__init__(self, server_address,
399
request_handler_class)
402
class TestingThreadingHTTPServer(SocketServer.ThreadingTCPServer,
403
TestingHTTPServerMixin):
404
"""A threading HTTP test server for HTTP 1.1.
406
Since tests can initiate several concurrent connections to the same http
407
server, we need an independent connection for each of them. We achieve that
408
by spawning a new thread for each connection.
411
def __init__(self, server_address, request_handler_class,
413
TestingHTTPServerMixin.__init__(self, test_case_server)
414
SocketServer.ThreadingTCPServer.__init__(self, server_address,
415
request_handler_class)
416
# Decides how threads will act upon termination of the main
417
# process. This is prophylactic as we should not leave the threads
419
self.daemon_threads = True
421
def process_request_thread(self, request, client_address):
422
SocketServer.ThreadingTCPServer.process_request_thread(
423
self, request, client_address)
424
# Under some circumstances (as in bug #383920), we need to force the
425
# shutdown as python delays it until gc occur otherwise and the client
428
# The request process has been completed, the thread is about to
429
# die, let's shutdown the socket if we can.
430
request.shutdown(socket.SHUT_RDWR)
431
except (socket.error, select.error), e:
432
if e[0] in (errno.EBADF, errno.ENOTCONN):
433
# Right, the socket is already down
439
class HttpServer(transport.Server):
440
"""A test server for http transports.
442
Subclasses can provide a specific request handler.
445
# The real servers depending on the protocol
446
http_server_class = {'HTTP/1.0': TestingHTTPServer,
447
'HTTP/1.1': TestingThreadingHTTPServer,
450
# Whether or not we proxy the requests (see
451
# TestingHTTPRequestHandler.translate_path).
452
proxy_requests = False
454
# used to form the url that connects to this server
455
_url_protocol = 'http'
457
def __init__(self, request_handler=TestingHTTPRequestHandler,
458
protocol_version=None):
461
:param request_handler: a class that will be instantiated to handle an
462
http connection (one or several requests).
464
:param protocol_version: if specified, will override the protocol
465
version of the request handler.
467
transport.Server.__init__(self)
468
self.request_handler = request_handler
469
self.host = 'localhost'
472
self.protocol_version = protocol_version
473
# Allows tests to verify number of GET requests issued
474
self.GET_request_nb = 0
476
def create_httpd(self, serv_cls, rhandler_cls):
477
return serv_cls((self.host, self.port), self.request_handler, self)
480
return "%s(%s:%s)" % \
481
(self.__class__.__name__, self.host, self.port)
483
def _get_httpd(self):
484
if self._httpd is None:
485
rhandler = self.request_handler
486
# Depending on the protocol version, we will create the approriate
488
if self.protocol_version is None:
489
# Use the request handler one
490
proto_vers = rhandler.protocol_version
492
# Use our own, it will be used to override the request handler
494
proto_vers = self.protocol_version
495
# Create the appropriate server for the required protocol
496
serv_cls = self.http_server_class.get(proto_vers, None)
498
raise httplib.UnknownProtocol(proto_vers)
500
self._httpd = self.create_httpd(serv_cls, rhandler)
501
self.host, self.port = self._httpd.socket.getsockname()
504
def _http_start(self):
505
"""Server thread main entry point. """
506
self._http_running = False
509
httpd = self._get_httpd()
510
self._http_base_url = '%s://%s:%s/' % (self._url_protocol,
511
self.host, self.port)
512
self._http_running = True
514
# Whatever goes wrong, we save the exception for the main
515
# thread. Note that since we are running in a thread, no signal
516
# can be received, so we don't care about KeyboardInterrupt.
517
self._http_exception = sys.exc_info()
519
# Release the lock or the main thread will block and the whole
521
self._http_starting.release()
523
# From now on, exceptions are taken care of by the
524
# SocketServer.BaseServer or the request handler.
525
while self._http_running:
527
# Really an HTTP connection but the python framework is generic
528
# and call them requests
529
httpd.handle_request()
530
except socket.timeout:
532
except (socket.error, select.error), e:
533
if (e[0] == errno.EBADF
534
or (sys.platform == 'win32' and e[0] == 10038)):
535
# Starting with python-2.6, handle_request may raise socket
536
# or select exceptions when the server is shut down (as we
538
# 10038 = WSAENOTSOCK
539
# http://msdn.microsoft.com/en-us/library/ms740668%28VS.85%29.aspx
544
def _get_remote_url(self, path):
545
path_parts = path.split(os.path.sep)
546
if os.path.isabs(path):
547
if path_parts[:len(self._local_path_parts)] != \
548
self._local_path_parts:
549
raise BadWebserverPath(path, self.test_dir)
550
remote_path = '/'.join(path_parts[len(self._local_path_parts):])
552
remote_path = '/'.join(path_parts)
554
return self._http_base_url + remote_path
556
def log(self, format, *args):
557
"""Capture Server log output."""
558
self.logs.append(format % args)
560
def start_server(self, backing_transport_server=None):
561
"""See bzrlib.transport.Server.start_server.
563
:param backing_transport_server: The transport that requests over this
564
protocol should be forwarded to. Note that this is currently not
567
# XXX: TODO: make the server back onto vfs_server rather than local
569
if not (backing_transport_server is None
570
or isinstance(backing_transport_server,
571
test_server.LocalURLServer)):
572
raise AssertionError(
573
"HTTPServer currently assumes local transport, got %s" % \
574
backing_transport_server)
575
self._home_dir = os.getcwdu()
576
self._local_path_parts = self._home_dir.split(os.path.sep)
577
self._http_base_url = None
579
# Create the server thread
580
self._http_starting = threading.Lock()
581
self._http_starting.acquire()
582
self._http_thread = threading.Thread(target=self._http_start)
583
self._http_thread.setDaemon(True)
584
self._http_exception = None
585
self._http_thread.start()
587
# Wait for the server thread to start (i.e release the lock)
588
self._http_starting.acquire()
590
if self._http_exception is not None:
591
# Something went wrong during server start
592
exc_class, exc_value, exc_tb = self._http_exception
593
raise exc_class, exc_value, exc_tb
594
self._http_starting.release()
597
def stop_server(self):
598
self._httpd.stop_server()
599
self._http_running = False
600
# We don't need to 'self._http_thread.join()' here since the thread is
601
# a daemonic one and will be garbage collected anyway. Joining just
602
# slows us down for no added benefit.
605
"""See bzrlib.transport.Server.get_url."""
606
return self._get_remote_url(self._home_dir)
608
def get_bogus_url(self):
609
"""See bzrlib.transport.Server.get_bogus_url."""
610
# this is chosen to try to prevent trouble with proxies, weird dns,
612
return self._url_protocol + '://127.0.0.1:1/'
615
class HttpServer_urllib(HttpServer):
616
"""Subclass of HttpServer that gives http+urllib urls.
618
This is for use in testing: connections to this server will always go
619
through urllib where possible.
622
# urls returned by this server should require the urllib client impl
623
_url_protocol = 'http+urllib'
626
class HttpServer_PyCurl(HttpServer):
627
"""Subclass of HttpServer that gives http+pycurl urls.
629
This is for use in testing: connections to this server will always go
630
through pycurl where possible.
633
# We don't care about checking the pycurl availability as
634
# this server will be required only when pycurl is present
636
# urls returned by this server should require the pycurl client impl
637
_url_protocol = 'http+pycurl'