1
# Copyright (C) 2006, 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23
import SimpleHTTPServer
32
from bzrlib import transport
33
from bzrlib.transport import local
36
class WebserverNotAvailable(Exception):
40
class BadWebserverPath(ValueError):
42
return 'path %s is not in %s' % self.args
45
class TestingHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
46
"""Handles one request.
48
A TestingHTTPRequestHandler is instantiated for every request received by
49
the associated server. Note that 'request' here is inherited from the base
50
TCPServer class, for the HTTP server it is really a connection which itself
51
will handle one or several HTTP requests.
53
# Default protocol version
54
protocol_version = 'HTTP/1.1'
56
# The Message-like class used to parse the request headers
57
MessageClass = httplib.HTTPMessage
60
SimpleHTTPServer.SimpleHTTPRequestHandler.setup(self)
61
tcs = self.server.test_case_server
62
if tcs.protocol_version is not None:
63
# If the test server forced a protocol version, use it
64
self.protocol_version = tcs.protocol_version
66
def log_message(self, format, *args):
67
tcs = self.server.test_case_server
68
tcs.log('webserver - %s - - [%s] %s "%s" "%s"',
69
self.address_string(),
70
self.log_date_time_string(),
72
self.headers.get('referer', '-'),
73
self.headers.get('user-agent', '-'))
75
def handle_one_request(self):
76
"""Handle a single HTTP request.
78
We catch all socket errors occurring when the client close the
79
connection early to avoid polluting the test results.
82
SimpleHTTPServer.SimpleHTTPRequestHandler.handle_one_request(self)
83
except socket.error, e:
84
# Any socket error should close the connection, but some errors are
85
# due to the client closing early and we don't want to pollute test
86
# results, so we raise only the others.
87
self.close_connection = 1
89
or e.args[0] not in (errno.EPIPE, errno.ECONNRESET,
90
errno.ECONNABORTED, errno.EBADF)):
93
_range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')
94
_tail_regexp = re.compile(r'^-(?P<tail>\d+)$')
96
def parse_ranges(self, ranges_header):
97
"""Parse the range header value and returns ranges and tail.
99
RFC2616 14.35 says that syntactically invalid range
100
specifiers MUST be ignored. In that case, we return 0 for
101
tail and [] for ranges.
105
if not ranges_header.startswith('bytes='):
106
# Syntactically invalid header
109
ranges_header = ranges_header[len('bytes='):]
110
for range_str in ranges_header.split(','):
111
# FIXME: RFC2616 says end is optional and default to file_size
112
range_match = self._range_regexp.match(range_str)
113
if range_match is not None:
114
start = int(range_match.group('start'))
115
end = int(range_match.group('end'))
117
# Syntactically invalid range
119
ranges.append((start, end))
121
tail_match = self._tail_regexp.match(range_str)
122
if tail_match is not None:
123
tail = int(tail_match.group('tail'))
125
# Syntactically invalid range
129
def _header_line_length(self, keyword, value):
130
header_line = '%s: %s\r\n' % (keyword, value)
131
return len(header_line)
134
"""Overrides base implementation to work around a bug in python2.5."""
135
path = self.translate_path(self.path)
136
if os.path.isdir(path) and not self.path.endswith('/'):
137
# redirect browser - doing basically what apache does when
138
# DirectorySlash option is On which is quite common (braindead, but
140
self.send_response(301)
141
self.send_header("Location", self.path + "/")
142
# Indicates that the body is empty for HTTP/1.1 clients
143
self.send_header('Content-Length', '0')
147
return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self)
149
def send_range_content(self, file, start, length):
151
self.wfile.write(file.read(length))
153
def get_single_range(self, file, file_size, start, end):
154
self.send_response(206)
155
length = end - start + 1
156
self.send_header('Accept-Ranges', 'bytes')
157
self.send_header("Content-Length", "%d" % length)
159
self.send_header("Content-Type", 'application/octet-stream')
160
self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
164
self.send_range_content(file, start, length)
166
def get_multiple_ranges(self, file, file_size, ranges):
167
self.send_response(206)
168
self.send_header('Accept-Ranges', 'bytes')
169
boundary = '%d' % random.randint(0,0x7FFFFFFF)
170
self.send_header('Content-Type',
171
'multipart/byteranges; boundary=%s' % boundary)
172
boundary_line = '--%s\r\n' % boundary
173
# Calculate the Content-Length
175
for (start, end) in ranges:
176
content_length += len(boundary_line)
177
content_length += self._header_line_length(
178
'Content-type', 'application/octet-stream')
179
content_length += self._header_line_length(
180
'Content-Range', 'bytes %d-%d/%d' % (start, end, file_size))
181
content_length += len('\r\n') # end headers
182
content_length += end - start # + 1
183
content_length += len(boundary_line)
184
self.send_header('Content-length', content_length)
187
# Send the multipart body
188
for (start, end) in ranges:
189
self.wfile.write(boundary_line)
190
self.send_header('Content-type', 'application/octet-stream')
191
self.send_header('Content-Range', 'bytes %d-%d/%d'
192
% (start, end, file_size))
194
self.send_range_content(file, start, end - start + 1)
196
self.wfile.write(boundary_line)
199
"""Serve a GET request.
201
Handles the Range header.
204
self.server.test_case_server.GET_request_nb += 1
206
path = self.translate_path(self.path)
207
ranges_header_value = self.headers.get('Range')
208
if ranges_header_value is None or os.path.isdir(path):
209
# Let the mother class handle most cases
210
return SimpleHTTPServer.SimpleHTTPRequestHandler.do_GET(self)
213
# Always read in binary mode. Opening files in text
214
# mode may cause newline translations, making the
215
# actual size of the content transmitted *less* than
216
# the content-length!
217
file = open(path, 'rb')
219
self.send_error(404, "File not found")
222
file_size = os.fstat(file.fileno())[6]
223
tail, ranges = self.parse_ranges(ranges_header_value)
224
# Normalize tail into ranges
226
ranges.append((file_size - tail, file_size))
228
self._satisfiable_ranges = True
230
self._satisfiable_ranges = False
232
def check_range(range_specifier):
233
start, end = range_specifier
234
# RFC2616 14.35, ranges are invalid if start >= file_size
235
if start >= file_size:
236
self._satisfiable_ranges = False # Side-effect !
238
# RFC2616 14.35, end values should be truncated
239
# to file_size -1 if they exceed it
240
end = min(end, file_size - 1)
243
ranges = map(check_range, ranges)
245
if not self._satisfiable_ranges:
246
# RFC2616 14.16 and 14.35 says that when a server
247
# encounters unsatisfiable range specifiers, it
248
# SHOULD return a 416.
250
# FIXME: We SHOULD send a Content-Range header too,
251
# but the implementation of send_error does not
252
# allows that. So far.
253
self.send_error(416, "Requested range not satisfiable")
257
(start, end) = ranges[0]
258
self.get_single_range(file, file_size, start, end)
260
self.get_multiple_ranges(file, file_size, ranges)
263
def translate_path(self, path):
264
"""Translate a /-separated PATH to the local filename syntax.
266
If the server requires it, proxy the path before the usual translation
268
if self.server.test_case_server.proxy_requests:
269
# We need to act as a proxy and accept absolute urls,
270
# which SimpleHTTPRequestHandler (parent) is not
271
# ready for. So we just drop the protocol://host:port
272
# part in front of the request-url (because we know
273
# we would not forward the request to *another*
276
# So we do what SimpleHTTPRequestHandler.translate_path
277
# do beginning with python 2.4.3: abandon query
278
# parameters, scheme, host port, etc (which ensure we
279
# provide the right behaviour on all python versions).
280
path = urlparse.urlparse(path)[2]
281
# And now, we can apply *our* trick to proxy files
284
return self._translate_path(path)
286
def _translate_path(self, path):
287
return SimpleHTTPServer.SimpleHTTPRequestHandler.translate_path(
290
if sys.platform == 'win32':
291
# On win32 you cannot access non-ascii filenames without
292
# decoding them into unicode first.
293
# However, under Linux, you can access bytestream paths
294
# without any problems. If this function was always active
295
# it would probably break tests when LANG=C was set
296
def _translate_path(self, path):
297
"""Translate a /-separated PATH to the local filename syntax.
299
For bzr, all url paths are considered to be utf8 paths.
300
On Linux, you can access these paths directly over the bytestream
301
request, but on win32, you must decode them, and access them
304
# abandon query parameters
305
path = urlparse.urlparse(path)[2]
306
path = posixpath.normpath(urllib.unquote(path))
307
path = path.decode('utf-8')
308
words = path.split('/')
309
words = filter(None, words)
312
drive, word = os.path.splitdrive(word)
313
head, word = os.path.split(word)
314
if word in (os.curdir, os.pardir): continue
315
path = os.path.join(path, word)
319
class TestingHTTPServerMixin:
321
def __init__(self, test_case_server):
322
# test_case_server can be used to communicate between the
323
# tests and the server (or the request handler and the
324
# server), allowing dynamic behaviors to be defined from
326
self.test_case_server = test_case_server
329
"""Called to clean-up the server.
331
Since the server may be (surely is, even) in a blocking listen, we
332
shutdown its socket before closing it.
334
# Note that is this executed as part of the implicit tear down in the
335
# main thread while the server runs in its own thread. The clean way
336
# to tear down the server is to instruct him to stop accepting
337
# connections and wait for the current connection(s) to end
338
# naturally. To end the connection naturally, the http transports
339
# should close their socket when they do not need to talk to the
340
# server anymore. This happens naturally during the garbage collection
341
# phase of the test transport objetcs (the server clients), so we
342
# don't have to worry about them. So, for the server, we must tear
343
# down here, from the main thread, when the test have ended. Note
344
# that since the server is in a blocking operation and since python
345
# use select internally, shutting down the socket is reliable and
348
self.socket.shutdown(socket.SHUT_RDWR)
349
except socket.error, e:
350
# WSAENOTCONN (10057) 'Socket is not connected' is harmless on
351
# windows (occurs before the first connection attempt
353
if not len(e.args) or e.args[0] != 10057:
355
# Let the server properly close the socket
358
class TestingHTTPServer(SocketServer.TCPServer, TestingHTTPServerMixin):
360
def __init__(self, server_address, request_handler_class,
362
TestingHTTPServerMixin.__init__(self, test_case_server)
363
SocketServer.TCPServer.__init__(self, server_address,
364
request_handler_class)
367
class TestingThreadingHTTPServer(SocketServer.ThreadingTCPServer,
368
TestingHTTPServerMixin):
369
"""A threading HTTP test server for HTTP 1.1.
371
Since tests can initiate several concurrent connections to the same http
372
server, we need an independent connection for each of them. We achieve that
373
by spawning a new thread for each connection.
376
def __init__(self, server_address, request_handler_class,
378
TestingHTTPServerMixin.__init__(self, test_case_server)
379
SocketServer.ThreadingTCPServer.__init__(self, server_address,
380
request_handler_class)
381
# Decides how threads will act upon termination of the main
382
# process. This is prophylactic as we should not leave the threads
384
self.daemon_threads = True
387
class HttpServer(transport.Server):
388
"""A test server for http transports.
390
Subclasses can provide a specific request handler.
393
# The real servers depending on the protocol
394
http_server_class = {'HTTP/1.0': TestingHTTPServer,
395
'HTTP/1.1': TestingThreadingHTTPServer,
398
# Whether or not we proxy the requests (see
399
# TestingHTTPRequestHandler.translate_path).
400
proxy_requests = False
402
# used to form the url that connects to this server
403
_url_protocol = 'http'
405
def __init__(self, request_handler=TestingHTTPRequestHandler,
406
protocol_version=None):
409
:param request_handler: a class that will be instantiated to handle an
410
http connection (one or several requests).
412
:param protocol_version: if specified, will override the protocol
413
version of the request handler.
415
transport.Server.__init__(self)
416
self.request_handler = request_handler
417
self.host = 'localhost'
420
self.protocol_version = protocol_version
421
# Allows tests to verify number of GET requests issued
422
self.GET_request_nb = 0
424
def _get_httpd(self):
425
if self._httpd is None:
426
rhandler = self.request_handler
427
# Depending on the protocol version, we will create the approriate
429
if self.protocol_version is None:
430
# Use the request handler one
431
proto_vers = rhandler.protocol_version
433
# Use our own, it will be used to override the request handler
435
proto_vers = self.protocol_version
436
# Create the appropriate server for the required protocol
437
serv_cls = self.http_server_class.get(proto_vers, None)
439
raise httplib.UnknownProtocol(proto_vers)
441
self._httpd = serv_cls((self.host, self.port), rhandler, self)
442
host, self.port = self._httpd.socket.getsockname()
445
def _http_start(self):
446
"""Server thread main entry point. """
447
self._http_running = False
450
httpd = self._get_httpd()
451
self._http_base_url = '%s://%s:%s/' % (self._url_protocol,
452
self.host, self.port)
453
self._http_running = True
455
# Whatever goes wrong, we save the exception for the main
456
# thread. Note that since we are running in a thread, no signal
457
# can be received, so we don't care about KeyboardInterrupt.
458
self._http_exception = sys.exc_info()
460
# Release the lock or the main thread will block and the whole
462
self._http_starting.release()
464
# From now on, exceptions are taken care of by the
465
# SocketServer.BaseServer or the request handler.
466
while self._http_running:
468
# Really an HTTP connection but the python framework is generic
469
# and call them requests
470
httpd.handle_request()
471
except socket.timeout:
474
def _get_remote_url(self, path):
475
path_parts = path.split(os.path.sep)
476
if os.path.isabs(path):
477
if path_parts[:len(self._local_path_parts)] != \
478
self._local_path_parts:
479
raise BadWebserverPath(path, self.test_dir)
480
remote_path = '/'.join(path_parts[len(self._local_path_parts):])
482
remote_path = '/'.join(path_parts)
484
return self._http_base_url + remote_path
486
def log(self, format, *args):
487
"""Capture Server log output."""
488
self.logs.append(format % args)
490
def setUp(self, backing_transport_server=None):
491
"""See bzrlib.transport.Server.setUp.
493
:param backing_transport_server: The transport that requests over this
494
protocol should be forwarded to. Note that this is currently not
497
# XXX: TODO: make the server back onto vfs_server rather than local
499
assert backing_transport_server is None or \
500
isinstance(backing_transport_server, local.LocalURLServer), \
501
"HTTPServer currently assumes local transport, got %s" % \
502
backing_transport_server
503
self._home_dir = os.getcwdu()
504
self._local_path_parts = self._home_dir.split(os.path.sep)
505
self._http_base_url = None
507
# Create the server thread
508
self._http_starting = threading.Lock()
509
self._http_starting.acquire()
510
self._http_thread = threading.Thread(target=self._http_start)
511
self._http_thread.setDaemon(True)
512
self._http_exception = None
513
self._http_thread.start()
515
# Wait for the server thread to start (i.e release the lock)
516
self._http_starting.acquire()
518
if self._http_exception is not None:
519
# Something went wrong during server start
520
exc_class, exc_value, exc_tb = self._http_exception
521
raise exc_class, exc_value, exc_tb
522
self._http_starting.release()
526
"""See bzrlib.transport.Server.tearDown."""
527
self._httpd.tearDown()
528
self._http_running = False
529
# We don't need to 'self._http_thread.join()' here since the thread is
530
# a daemonic one and will be garbage collected anyway. Joining just
531
# slows us down for no added benefit.
534
"""See bzrlib.transport.Server.get_url."""
535
return self._get_remote_url(self._home_dir)
537
def get_bogus_url(self):
538
"""See bzrlib.transport.Server.get_bogus_url."""
539
# this is chosen to try to prevent trouble with proxies, weird dns,
541
return self._url_protocol + '://127.0.0.1:1/'
544
class HttpServer_urllib(HttpServer):
545
"""Subclass of HttpServer that gives http+urllib urls.
547
This is for use in testing: connections to this server will always go
548
through urllib where possible.
551
# urls returned by this server should require the urllib client impl
552
_url_protocol = 'http+urllib'
555
class HttpServer_PyCurl(HttpServer):
556
"""Subclass of HttpServer that gives http+pycurl urls.
558
This is for use in testing: connections to this server will always go
559
through pycurl where possible.
562
# We don't care about checking the pycurl availability as
563
# this server will be required only when pycurl is present
565
# urls returned by this server should require the pycurl client impl
566
_url_protocol = 'http+pycurl'