1
# Copyright (C) 2006, 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24
import SimpleHTTPServer
33
from bzrlib import transport
34
from bzrlib.transport import local
37
class BadWebserverPath(ValueError):
39
return 'path %s is not in %s' % self.args
42
class TestingHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
43
"""Handles one request.
45
A TestingHTTPRequestHandler is instantiated for every request received by
46
the associated server. Note that 'request' here is inherited from the base
47
TCPServer class, for the HTTP server it is really a connection which itself
48
will handle one or several HTTP requests.
50
# Default protocol version
51
protocol_version = 'HTTP/1.1'
53
# The Message-like class used to parse the request headers
54
MessageClass = httplib.HTTPMessage
57
SimpleHTTPServer.SimpleHTTPRequestHandler.setup(self)
58
self._cwd = self.server._home_dir
59
tcs = self.server.test_case_server
60
if tcs.protocol_version is not None:
61
# If the test server forced a protocol version, use it
62
self.protocol_version = tcs.protocol_version
64
def log_message(self, format, *args):
65
tcs = self.server.test_case_server
66
tcs.log('webserver - %s - - [%s] %s "%s" "%s"',
67
self.address_string(),
68
self.log_date_time_string(),
70
self.headers.get('referer', '-'),
71
self.headers.get('user-agent', '-'))
73
def handle_one_request(self):
74
"""Handle a single HTTP request.
76
We catch all socket errors occurring when the client close the
77
connection early to avoid polluting the test results.
80
SimpleHTTPServer.SimpleHTTPRequestHandler.handle_one_request(self)
81
except socket.error, e:
82
# Any socket error should close the connection, but some errors are
83
# due to the client closing early and we don't want to pollute test
84
# results, so we raise only the others.
85
self.close_connection = 1
87
or e.args[0] not in (errno.EPIPE, errno.ECONNRESET,
88
errno.ECONNABORTED, errno.EBADF)):
91
_range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')
92
_tail_regexp = re.compile(r'^-(?P<tail>\d+)$')
94
def parse_ranges(self, ranges_header):
95
"""Parse the range header value and returns ranges and tail.
97
RFC2616 14.35 says that syntactically invalid range
98
specifiers MUST be ignored. In that case, we return 0 for
99
tail and [] for ranges.
103
if not ranges_header.startswith('bytes='):
104
# Syntactically invalid header
107
ranges_header = ranges_header[len('bytes='):]
108
for range_str in ranges_header.split(','):
109
# FIXME: RFC2616 says end is optional and default to file_size
110
range_match = self._range_regexp.match(range_str)
111
if range_match is not None:
112
start = int(range_match.group('start'))
113
end = int(range_match.group('end'))
115
# Syntactically invalid range
117
ranges.append((start, end))
119
tail_match = self._tail_regexp.match(range_str)
120
if tail_match is not None:
121
tail = int(tail_match.group('tail'))
123
# Syntactically invalid range
127
def _header_line_length(self, keyword, value):
128
header_line = '%s: %s\r\n' % (keyword, value)
129
return len(header_line)
132
"""Overrides base implementation to work around a bug in python2.5."""
133
path = self.translate_path(self.path)
134
if os.path.isdir(path) and not self.path.endswith('/'):
135
# redirect browser - doing basically what apache does when
136
# DirectorySlash option is On which is quite common (braindead, but
138
self.send_response(301)
139
self.send_header("Location", self.path + "/")
140
# Indicates that the body is empty for HTTP/1.1 clients
141
self.send_header('Content-Length', '0')
145
return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self)
147
def send_range_content(self, file, start, length):
149
self.wfile.write(file.read(length))
151
def get_single_range(self, file, file_size, start, end):
152
self.send_response(206)
153
length = end - start + 1
154
self.send_header('Accept-Ranges', 'bytes')
155
self.send_header("Content-Length", "%d" % length)
157
self.send_header("Content-Type", 'application/octet-stream')
158
self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
162
self.send_range_content(file, start, length)
164
def get_multiple_ranges(self, file, file_size, ranges):
165
self.send_response(206)
166
self.send_header('Accept-Ranges', 'bytes')
167
boundary = '%d' % random.randint(0,0x7FFFFFFF)
168
self.send_header('Content-Type',
169
'multipart/byteranges; boundary=%s' % boundary)
170
boundary_line = '--%s\r\n' % boundary
171
# Calculate the Content-Length
173
for (start, end) in ranges:
174
content_length += len(boundary_line)
175
content_length += self._header_line_length(
176
'Content-type', 'application/octet-stream')
177
content_length += self._header_line_length(
178
'Content-Range', 'bytes %d-%d/%d' % (start, end, file_size))
179
content_length += len('\r\n') # end headers
180
content_length += end - start + 1
181
content_length += len(boundary_line)
182
self.send_header('Content-length', content_length)
185
# Send the multipart body
186
for (start, end) in ranges:
187
self.wfile.write(boundary_line)
188
self.send_header('Content-type', 'application/octet-stream')
189
self.send_header('Content-Range', 'bytes %d-%d/%d'
190
% (start, end, file_size))
192
self.send_range_content(file, start, end - start + 1)
194
self.wfile.write(boundary_line)
197
"""Serve a GET request.
199
Handles the Range header.
202
self.server.test_case_server.GET_request_nb += 1
204
path = self.translate_path(self.path)
205
ranges_header_value = self.headers.get('Range')
206
if ranges_header_value is None or os.path.isdir(path):
207
# Let the mother class handle most cases
208
return SimpleHTTPServer.SimpleHTTPRequestHandler.do_GET(self)
211
# Always read in binary mode. Opening files in text
212
# mode may cause newline translations, making the
213
# actual size of the content transmitted *less* than
214
# the content-length!
215
file = open(path, 'rb')
217
self.send_error(404, "File not found")
220
file_size = os.fstat(file.fileno())[6]
221
tail, ranges = self.parse_ranges(ranges_header_value)
222
# Normalize tail into ranges
224
ranges.append((file_size - tail, file_size))
226
self._satisfiable_ranges = True
228
self._satisfiable_ranges = False
230
def check_range(range_specifier):
231
start, end = range_specifier
232
# RFC2616 14.35, ranges are invalid if start >= file_size
233
if start >= file_size:
234
self._satisfiable_ranges = False # Side-effect !
236
# RFC2616 14.35, end values should be truncated
237
# to file_size -1 if they exceed it
238
end = min(end, file_size - 1)
241
ranges = map(check_range, ranges)
243
if not self._satisfiable_ranges:
244
# RFC2616 14.16 and 14.35 says that when a server
245
# encounters unsatisfiable range specifiers, it
246
# SHOULD return a 416.
248
# FIXME: We SHOULD send a Content-Range header too,
249
# but the implementation of send_error does not
250
# allows that. So far.
251
self.send_error(416, "Requested range not satisfiable")
255
(start, end) = ranges[0]
256
self.get_single_range(file, file_size, start, end)
258
self.get_multiple_ranges(file, file_size, ranges)
261
def translate_path(self, path):
262
"""Translate a /-separated PATH to the local filename syntax.
264
If the server requires it, proxy the path before the usual translation
266
if self.server.test_case_server.proxy_requests:
267
# We need to act as a proxy and accept absolute urls,
268
# which SimpleHTTPRequestHandler (parent) is not
269
# ready for. So we just drop the protocol://host:port
270
# part in front of the request-url (because we know
271
# we would not forward the request to *another*
274
# So we do what SimpleHTTPRequestHandler.translate_path
275
# do beginning with python 2.4.3: abandon query
276
# parameters, scheme, host port, etc (which ensure we
277
# provide the right behaviour on all python versions).
278
path = urlparse.urlparse(path)[2]
279
# And now, we can apply *our* trick to proxy files
282
return self._translate_path(path)
284
def _translate_path(self, path):
285
"""Translate a /-separated PATH to the local filename syntax.
287
Note that we're translating http URLs here, not file URLs.
288
The URL root location is the server's startup directory.
289
Components that mean special things to the local file system
290
(e.g. drive or directory names) are ignored. (XXX They should
291
probably be diagnosed.)
293
Override from python standard library to stop it calling os.getcwd()
295
# abandon query parameters
296
path = urlparse.urlparse(path)[2]
297
path = posixpath.normpath(urllib.unquote(path))
298
path = path.decode('utf-8')
299
words = path.split('/')
300
words = filter(None, words)
302
for num, word in enumerate(words):
304
drive, word = os.path.splitdrive(word)
305
head, word = os.path.split(word)
306
if word in (os.curdir, os.pardir): continue
307
path = os.path.join(path, word)
311
class TestingHTTPServerMixin:
313
def __init__(self, test_case_server):
314
# test_case_server can be used to communicate between the
315
# tests and the server (or the request handler and the
316
# server), allowing dynamic behaviors to be defined from
318
self.test_case_server = test_case_server
319
self._home_dir = test_case_server._home_dir
322
"""Called to clean-up the server.
324
Since the server may be (surely is, even) in a blocking listen, we
325
shutdown its socket before closing it.
327
# Note that is this executed as part of the implicit tear down in the
328
# main thread while the server runs in its own thread. The clean way
329
# to tear down the server is to instruct him to stop accepting
330
# connections and wait for the current connection(s) to end
331
# naturally. To end the connection naturally, the http transports
332
# should close their socket when they do not need to talk to the
333
# server anymore. This happens naturally during the garbage collection
334
# phase of the test transport objetcs (the server clients), so we
335
# don't have to worry about them. So, for the server, we must tear
336
# down here, from the main thread, when the test have ended. Note
337
# that since the server is in a blocking operation and since python
338
# use select internally, shutting down the socket is reliable and
341
self.socket.shutdown(socket.SHUT_RDWR)
342
except socket.error, e:
343
# WSAENOTCONN (10057) 'Socket is not connected' is harmless on
344
# windows (occurs before the first connection attempt
347
# 'Socket is not connected' can also occur on OSX, with a
348
# "regular" ENOTCONN (when something went wrong during test case
349
# setup leading to self.setUp() *not* being called but
350
# self.tearDown() still being called -- vila20081106
351
if not len(e.args) or e.args[0] not in (errno.ENOTCONN, 10057):
353
# Let the server properly close the socket
357
class TestingHTTPServer(SocketServer.TCPServer, TestingHTTPServerMixin):
359
def __init__(self, server_address, request_handler_class,
361
TestingHTTPServerMixin.__init__(self, test_case_server)
362
SocketServer.TCPServer.__init__(self, server_address,
363
request_handler_class)
366
class TestingThreadingHTTPServer(SocketServer.ThreadingTCPServer,
367
TestingHTTPServerMixin):
368
"""A threading HTTP test server for HTTP 1.1.
370
Since tests can initiate several concurrent connections to the same http
371
server, we need an independent connection for each of them. We achieve that
372
by spawning a new thread for each connection.
375
def __init__(self, server_address, request_handler_class,
377
TestingHTTPServerMixin.__init__(self, test_case_server)
378
SocketServer.ThreadingTCPServer.__init__(self, server_address,
379
request_handler_class)
380
# Decides how threads will act upon termination of the main
381
# process. This is prophylactic as we should not leave the threads
383
self.daemon_threads = True
385
def process_request_thread(self, request, client_address):
386
SocketServer.ThreadingTCPServer.process_request_thread(
387
self, request, client_address)
388
# Under some circumstances (as in bug #383920), we need to force the
389
# shutdown as python delays it until gc occur otherwise and the client
392
# The request process has been completed, the thread is about to
393
# die, let's shutdown the socket if we can.
394
request.shutdown(socket.SHUT_RDWR)
395
except (socket.error, select.error), e:
396
if e[0] in (errno.EBADF, errno.ENOTCONN):
397
# Right, the socket is already down
403
class HttpServer(transport.Server):
404
"""A test server for http transports.
406
Subclasses can provide a specific request handler.
409
# The real servers depending on the protocol
410
http_server_class = {'HTTP/1.0': TestingHTTPServer,
411
'HTTP/1.1': TestingThreadingHTTPServer,
414
# Whether or not we proxy the requests (see
415
# TestingHTTPRequestHandler.translate_path).
416
proxy_requests = False
418
# used to form the url that connects to this server
419
_url_protocol = 'http'
421
def __init__(self, request_handler=TestingHTTPRequestHandler,
422
protocol_version=None):
425
:param request_handler: a class that will be instantiated to handle an
426
http connection (one or several requests).
428
:param protocol_version: if specified, will override the protocol
429
version of the request handler.
431
transport.Server.__init__(self)
432
self.request_handler = request_handler
433
self.host = 'localhost'
436
self.protocol_version = protocol_version
437
# Allows tests to verify number of GET requests issued
438
self.GET_request_nb = 0
440
def create_httpd(self, serv_cls, rhandler_cls):
441
return serv_cls((self.host, self.port), self.request_handler, self)
444
return "%s(%s:%s)" % \
445
(self.__class__.__name__, self.host, self.port)
447
def _get_httpd(self):
448
if self._httpd is None:
449
rhandler = self.request_handler
450
# Depending on the protocol version, we will create the approriate
452
if self.protocol_version is None:
453
# Use the request handler one
454
proto_vers = rhandler.protocol_version
456
# Use our own, it will be used to override the request handler
458
proto_vers = self.protocol_version
459
# Create the appropriate server for the required protocol
460
serv_cls = self.http_server_class.get(proto_vers, None)
462
raise httplib.UnknownProtocol(proto_vers)
464
self._httpd = self.create_httpd(serv_cls, rhandler)
465
host, self.port = self._httpd.socket.getsockname()
468
def _http_start(self):
469
"""Server thread main entry point. """
470
self._http_running = False
473
httpd = self._get_httpd()
474
self._http_base_url = '%s://%s:%s/' % (self._url_protocol,
475
self.host, self.port)
476
self._http_running = True
478
# Whatever goes wrong, we save the exception for the main
479
# thread. Note that since we are running in a thread, no signal
480
# can be received, so we don't care about KeyboardInterrupt.
481
self._http_exception = sys.exc_info()
483
# Release the lock or the main thread will block and the whole
485
self._http_starting.release()
487
# From now on, exceptions are taken care of by the
488
# SocketServer.BaseServer or the request handler.
489
while self._http_running:
491
# Really an HTTP connection but the python framework is generic
492
# and call them requests
493
httpd.handle_request()
494
except socket.timeout:
496
except (socket.error, select.error), e:
497
if e[0] == errno.EBADF:
498
# Starting with python-2.6, handle_request may raise socket
499
# or select exceptions when the server is shut down (as we
505
def _get_remote_url(self, path):
506
path_parts = path.split(os.path.sep)
507
if os.path.isabs(path):
508
if path_parts[:len(self._local_path_parts)] != \
509
self._local_path_parts:
510
raise BadWebserverPath(path, self.test_dir)
511
remote_path = '/'.join(path_parts[len(self._local_path_parts):])
513
remote_path = '/'.join(path_parts)
515
return self._http_base_url + remote_path
517
def log(self, format, *args):
518
"""Capture Server log output."""
519
self.logs.append(format % args)
521
def setUp(self, backing_transport_server=None):
522
"""See bzrlib.transport.Server.setUp.
524
:param backing_transport_server: The transport that requests over this
525
protocol should be forwarded to. Note that this is currently not
528
# XXX: TODO: make the server back onto vfs_server rather than local
530
if not (backing_transport_server is None or \
531
isinstance(backing_transport_server, local.LocalURLServer)):
532
raise AssertionError(
533
"HTTPServer currently assumes local transport, got %s" % \
534
backing_transport_server)
535
self._home_dir = os.getcwdu()
536
self._local_path_parts = self._home_dir.split(os.path.sep)
537
self._http_base_url = None
539
# Create the server thread
540
self._http_starting = threading.Lock()
541
self._http_starting.acquire()
542
self._http_thread = threading.Thread(target=self._http_start)
543
self._http_thread.setDaemon(True)
544
self._http_exception = None
545
self._http_thread.start()
547
# Wait for the server thread to start (i.e release the lock)
548
self._http_starting.acquire()
550
if self._http_exception is not None:
551
# Something went wrong during server start
552
exc_class, exc_value, exc_tb = self._http_exception
553
raise exc_class, exc_value, exc_tb
554
self._http_starting.release()
558
"""See bzrlib.transport.Server.tearDown."""
559
self._httpd.tearDown()
560
self._http_running = False
561
# We don't need to 'self._http_thread.join()' here since the thread is
562
# a daemonic one and will be garbage collected anyway. Joining just
563
# slows us down for no added benefit.
566
"""See bzrlib.transport.Server.get_url."""
567
return self._get_remote_url(self._home_dir)
569
def get_bogus_url(self):
570
"""See bzrlib.transport.Server.get_bogus_url."""
571
# this is chosen to try to prevent trouble with proxies, weird dns,
573
return self._url_protocol + '://127.0.0.1:1/'
576
class HttpServer_urllib(HttpServer):
577
"""Subclass of HttpServer that gives http+urllib urls.
579
This is for use in testing: connections to this server will always go
580
through urllib where possible.
583
# urls returned by this server should require the urllib client impl
584
_url_protocol = 'http+urllib'
587
class HttpServer_PyCurl(HttpServer):
588
"""Subclass of HttpServer that gives http+pycurl urls.
590
This is for use in testing: connections to this server will always go
591
through pycurl where possible.
594
# We don't care about checking the pycurl availability as
595
# this server will be required only when pycurl is present
597
# urls returned by this server should require the pycurl client impl
598
_url_protocol = 'http+pycurl'