1
# Copyright (C) 2006-2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24
import SimpleHTTPServer
33
from bzrlib import transport
34
from bzrlib.tests import test_server
35
from bzrlib.transport import local
38
class BadWebserverPath(ValueError):
40
return 'path %s is not in %s' % self.args
43
class TestingHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
44
"""Handles one request.
46
A TestingHTTPRequestHandler is instantiated for every request received by
47
the associated server. Note that 'request' here is inherited from the base
48
TCPServer class, for the HTTP server it is really a connection which itself
49
will handle one or several HTTP requests.
51
# Default protocol version
52
protocol_version = 'HTTP/1.1'
54
# The Message-like class used to parse the request headers
55
MessageClass = httplib.HTTPMessage
58
SimpleHTTPServer.SimpleHTTPRequestHandler.setup(self)
59
self._cwd = self.server._home_dir
60
tcs = self.server.test_case_server
61
if tcs.protocol_version is not None:
62
# If the test server forced a protocol version, use it
63
self.protocol_version = tcs.protocol_version
65
def log_message(self, format, *args):
66
tcs = self.server.test_case_server
67
tcs.log('webserver - %s - - [%s] %s "%s" "%s"',
68
self.address_string(),
69
self.log_date_time_string(),
71
self.headers.get('referer', '-'),
72
self.headers.get('user-agent', '-'))
74
def handle_one_request(self):
75
"""Handle a single HTTP request.
77
We catch all socket errors occurring when the client close the
78
connection early to avoid polluting the test results.
81
SimpleHTTPServer.SimpleHTTPRequestHandler.handle_one_request(self)
82
except socket.error, e:
83
# Any socket error should close the connection, but some errors are
84
# due to the client closing early and we don't want to pollute test
85
# results, so we raise only the others.
86
self.close_connection = 1
88
or e.args[0] not in (errno.EPIPE, errno.ECONNRESET,
89
errno.ECONNABORTED, errno.EBADF)):
92
_range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')
93
_tail_regexp = re.compile(r'^-(?P<tail>\d+)$')
95
def parse_ranges(self, ranges_header):
96
"""Parse the range header value and returns ranges and tail.
98
RFC2616 14.35 says that syntactically invalid range
99
specifiers MUST be ignored. In that case, we return 0 for
100
tail and [] for ranges.
104
if not ranges_header.startswith('bytes='):
105
# Syntactically invalid header
108
ranges_header = ranges_header[len('bytes='):]
109
for range_str in ranges_header.split(','):
110
# FIXME: RFC2616 says end is optional and default to file_size
111
range_match = self._range_regexp.match(range_str)
112
if range_match is not None:
113
start = int(range_match.group('start'))
114
end = int(range_match.group('end'))
116
# Syntactically invalid range
118
ranges.append((start, end))
120
tail_match = self._tail_regexp.match(range_str)
121
if tail_match is not None:
122
tail = int(tail_match.group('tail'))
124
# Syntactically invalid range
128
def _header_line_length(self, keyword, value):
129
header_line = '%s: %s\r\n' % (keyword, value)
130
return len(header_line)
133
"""Overrides base implementation to work around a bug in python2.5."""
134
path = self.translate_path(self.path)
135
if os.path.isdir(path) and not self.path.endswith('/'):
136
# redirect browser - doing basically what apache does when
137
# DirectorySlash option is On which is quite common (braindead, but
139
self.send_response(301)
140
self.send_header("Location", self.path + "/")
141
# Indicates that the body is empty for HTTP/1.1 clients
142
self.send_header('Content-Length', '0')
146
return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self)
148
def send_range_content(self, file, start, length):
150
self.wfile.write(file.read(length))
152
def get_single_range(self, file, file_size, start, end):
153
self.send_response(206)
154
length = end - start + 1
155
self.send_header('Accept-Ranges', 'bytes')
156
self.send_header("Content-Length", "%d" % length)
158
self.send_header("Content-Type", 'application/octet-stream')
159
self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
163
self.send_range_content(file, start, length)
165
def get_multiple_ranges(self, file, file_size, ranges):
166
self.send_response(206)
167
self.send_header('Accept-Ranges', 'bytes')
168
boundary = '%d' % random.randint(0,0x7FFFFFFF)
169
self.send_header('Content-Type',
170
'multipart/byteranges; boundary=%s' % boundary)
171
boundary_line = '--%s\r\n' % boundary
172
# Calculate the Content-Length
174
for (start, end) in ranges:
175
content_length += len(boundary_line)
176
content_length += self._header_line_length(
177
'Content-type', 'application/octet-stream')
178
content_length += self._header_line_length(
179
'Content-Range', 'bytes %d-%d/%d' % (start, end, file_size))
180
content_length += len('\r\n') # end headers
181
content_length += end - start + 1
182
content_length += len(boundary_line)
183
self.send_header('Content-length', content_length)
186
# Send the multipart body
187
for (start, end) in ranges:
188
self.wfile.write(boundary_line)
189
self.send_header('Content-type', 'application/octet-stream')
190
self.send_header('Content-Range', 'bytes %d-%d/%d'
191
% (start, end, file_size))
193
self.send_range_content(file, start, end - start + 1)
195
self.wfile.write(boundary_line)
198
"""Serve a GET request.
200
Handles the Range header.
203
self.server.test_case_server.GET_request_nb += 1
205
path = self.translate_path(self.path)
206
ranges_header_value = self.headers.get('Range')
207
if ranges_header_value is None or os.path.isdir(path):
208
# Let the mother class handle most cases
209
return SimpleHTTPServer.SimpleHTTPRequestHandler.do_GET(self)
212
# Always read in binary mode. Opening files in text
213
# mode may cause newline translations, making the
214
# actual size of the content transmitted *less* than
215
# the content-length!
216
file = open(path, 'rb')
218
self.send_error(404, "File not found")
221
file_size = os.fstat(file.fileno())[6]
222
tail, ranges = self.parse_ranges(ranges_header_value)
223
# Normalize tail into ranges
225
ranges.append((file_size - tail, file_size))
227
self._satisfiable_ranges = True
229
self._satisfiable_ranges = False
231
def check_range(range_specifier):
232
start, end = range_specifier
233
# RFC2616 14.35, ranges are invalid if start >= file_size
234
if start >= file_size:
235
self._satisfiable_ranges = False # Side-effect !
237
# RFC2616 14.35, end values should be truncated
238
# to file_size -1 if they exceed it
239
end = min(end, file_size - 1)
242
ranges = map(check_range, ranges)
244
if not self._satisfiable_ranges:
245
# RFC2616 14.16 and 14.35 says that when a server
246
# encounters unsatisfiable range specifiers, it
247
# SHOULD return a 416.
249
# FIXME: We SHOULD send a Content-Range header too,
250
# but the implementation of send_error does not
251
# allows that. So far.
252
self.send_error(416, "Requested range not satisfiable")
256
(start, end) = ranges[0]
257
self.get_single_range(file, file_size, start, end)
259
self.get_multiple_ranges(file, file_size, ranges)
262
def translate_path(self, path):
263
"""Translate a /-separated PATH to the local filename syntax.
265
If the server requires it, proxy the path before the usual translation
267
if self.server.test_case_server.proxy_requests:
268
# We need to act as a proxy and accept absolute urls,
269
# which SimpleHTTPRequestHandler (parent) is not
270
# ready for. So we just drop the protocol://host:port
271
# part in front of the request-url (because we know
272
# we would not forward the request to *another*
275
# So we do what SimpleHTTPRequestHandler.translate_path
276
# do beginning with python 2.4.3: abandon query
277
# parameters, scheme, host port, etc (which ensure we
278
# provide the right behaviour on all python versions).
279
path = urlparse.urlparse(path)[2]
280
# And now, we can apply *our* trick to proxy files
283
return self._translate_path(path)
285
def _translate_path(self, path):
286
"""Translate a /-separated PATH to the local filename syntax.
288
Note that we're translating http URLs here, not file URLs.
289
The URL root location is the server's startup directory.
290
Components that mean special things to the local file system
291
(e.g. drive or directory names) are ignored. (XXX They should
292
probably be diagnosed.)
294
Override from python standard library to stop it calling os.getcwd()
296
# abandon query parameters
297
path = urlparse.urlparse(path)[2]
298
path = posixpath.normpath(urllib.unquote(path))
299
path = path.decode('utf-8')
300
words = path.split('/')
301
words = filter(None, words)
303
for num, word in enumerate(words):
305
drive, word = os.path.splitdrive(word)
306
head, word = os.path.split(word)
307
if word in (os.curdir, os.pardir): continue
308
path = os.path.join(path, word)
312
class TestingHTTPServerMixin:
314
def __init__(self, test_case_server):
315
# test_case_server can be used to communicate between the
316
# tests and the server (or the request handler and the
317
# server), allowing dynamic behaviors to be defined from
319
self.test_case_server = test_case_server
320
self._home_dir = test_case_server._home_dir
322
def stop_server(self):
323
"""Called to clean-up the server.
325
Since the server may be (surely is, even) in a blocking listen, we
326
shutdown its socket before closing it.
328
# Note that is this executed as part of the implicit tear down in the
329
# main thread while the server runs in its own thread. The clean way
330
# to tear down the server is to instruct him to stop accepting
331
# connections and wait for the current connection(s) to end
332
# naturally. To end the connection naturally, the http transports
333
# should close their socket when they do not need to talk to the
334
# server anymore. This happens naturally during the garbage collection
335
# phase of the test transport objetcs (the server clients), so we
336
# don't have to worry about them. So, for the server, we must tear
337
# down here, from the main thread, when the test have ended. Note
338
# that since the server is in a blocking operation and since python
339
# use select internally, shutting down the socket is reliable and
342
self.socket.shutdown(socket.SHUT_RDWR)
343
except socket.error, e:
344
# WSAENOTCONN (10057) 'Socket is not connected' is harmless on
345
# windows (occurs before the first connection attempt
348
# 'Socket is not connected' can also occur on OSX, with a
349
# "regular" ENOTCONN (when something went wrong during test case
350
# setup leading to self.setUp() *not* being called but
351
# self.stop_server() still being called -- vila20081106
352
if not len(e.args) or e.args[0] not in (errno.ENOTCONN, 10057):
354
# Let the server properly close the socket
358
class TestingHTTPServer(SocketServer.TCPServer, TestingHTTPServerMixin):
360
def __init__(self, server_address, request_handler_class,
362
TestingHTTPServerMixin.__init__(self, test_case_server)
363
SocketServer.TCPServer.__init__(self, server_address,
364
request_handler_class)
367
class TestingThreadingHTTPServer(SocketServer.ThreadingTCPServer,
368
TestingHTTPServerMixin):
369
"""A threading HTTP test server for HTTP 1.1.
371
Since tests can initiate several concurrent connections to the same http
372
server, we need an independent connection for each of them. We achieve that
373
by spawning a new thread for each connection.
376
def __init__(self, server_address, request_handler_class,
378
TestingHTTPServerMixin.__init__(self, test_case_server)
379
SocketServer.ThreadingTCPServer.__init__(self, server_address,
380
request_handler_class)
381
# Decides how threads will act upon termination of the main
382
# process. This is prophylactic as we should not leave the threads
384
self.daemon_threads = True
386
def process_request_thread(self, request, client_address):
387
SocketServer.ThreadingTCPServer.process_request_thread(
388
self, request, client_address)
389
# Under some circumstances (as in bug #383920), we need to force the
390
# shutdown as python delays it until gc occur otherwise and the client
393
# The request process has been completed, the thread is about to
394
# die, let's shutdown the socket if we can.
395
request.shutdown(socket.SHUT_RDWR)
396
except (socket.error, select.error), e:
397
if e[0] in (errno.EBADF, errno.ENOTCONN):
398
# Right, the socket is already down
404
class HttpServer(transport.Server):
405
"""A test server for http transports.
407
Subclasses can provide a specific request handler.
410
# The real servers depending on the protocol
411
http_server_class = {'HTTP/1.0': TestingHTTPServer,
412
'HTTP/1.1': TestingThreadingHTTPServer,
415
# Whether or not we proxy the requests (see
416
# TestingHTTPRequestHandler.translate_path).
417
proxy_requests = False
419
# used to form the url that connects to this server
420
_url_protocol = 'http'
422
def __init__(self, request_handler=TestingHTTPRequestHandler,
423
protocol_version=None):
426
:param request_handler: a class that will be instantiated to handle an
427
http connection (one or several requests).
429
:param protocol_version: if specified, will override the protocol
430
version of the request handler.
432
transport.Server.__init__(self)
433
self.request_handler = request_handler
434
self.host = 'localhost'
437
self.protocol_version = protocol_version
438
# Allows tests to verify number of GET requests issued
439
self.GET_request_nb = 0
441
def create_httpd(self, serv_cls, rhandler_cls):
442
return serv_cls((self.host, self.port), self.request_handler, self)
445
return "%s(%s:%s)" % \
446
(self.__class__.__name__, self.host, self.port)
448
def _get_httpd(self):
449
if self._httpd is None:
450
rhandler = self.request_handler
451
# Depending on the protocol version, we will create the approriate
453
if self.protocol_version is None:
454
# Use the request handler one
455
proto_vers = rhandler.protocol_version
457
# Use our own, it will be used to override the request handler
459
proto_vers = self.protocol_version
460
# Create the appropriate server for the required protocol
461
serv_cls = self.http_server_class.get(proto_vers, None)
463
raise httplib.UnknownProtocol(proto_vers)
465
self._httpd = self.create_httpd(serv_cls, rhandler)
466
self.host, self.port = self._httpd.socket.getsockname()
469
def _http_start(self):
470
"""Server thread main entry point. """
471
self._http_running = False
474
httpd = self._get_httpd()
475
self._http_base_url = '%s://%s:%s/' % (self._url_protocol,
476
self.host, self.port)
477
self._http_running = True
479
# Whatever goes wrong, we save the exception for the main
480
# thread. Note that since we are running in a thread, no signal
481
# can be received, so we don't care about KeyboardInterrupt.
482
self._http_exception = sys.exc_info()
484
# Release the lock or the main thread will block and the whole
486
self._http_starting.release()
488
# From now on, exceptions are taken care of by the
489
# SocketServer.BaseServer or the request handler.
490
while self._http_running:
492
# Really an HTTP connection but the python framework is generic
493
# and call them requests
494
httpd.handle_request()
495
except socket.timeout:
497
except (socket.error, select.error), e:
498
if (e[0] == errno.EBADF
499
or (sys.platform == 'win32' and e[0] == 10038)):
500
# Starting with python-2.6, handle_request may raise socket
501
# or select exceptions when the server is shut down (as we
503
# 10038 = WSAENOTSOCK
504
# http://msdn.microsoft.com/en-us/library/ms740668%28VS.85%29.aspx
509
def _get_remote_url(self, path):
510
path_parts = path.split(os.path.sep)
511
if os.path.isabs(path):
512
if path_parts[:len(self._local_path_parts)] != \
513
self._local_path_parts:
514
raise BadWebserverPath(path, self.test_dir)
515
remote_path = '/'.join(path_parts[len(self._local_path_parts):])
517
remote_path = '/'.join(path_parts)
519
return self._http_base_url + remote_path
521
def log(self, format, *args):
522
"""Capture Server log output."""
523
self.logs.append(format % args)
525
def start_server(self, backing_transport_server=None):
526
"""See bzrlib.transport.Server.start_server.
528
:param backing_transport_server: The transport that requests over this
529
protocol should be forwarded to. Note that this is currently not
532
# XXX: TODO: make the server back onto vfs_server rather than local
534
if not (backing_transport_server is None
535
or isinstance(backing_transport_server,
536
test_server.LocalURLServer)):
537
raise AssertionError(
538
"HTTPServer currently assumes local transport, got %s" % \
539
backing_transport_server)
540
self._home_dir = os.getcwdu()
541
self._local_path_parts = self._home_dir.split(os.path.sep)
542
self._http_base_url = None
544
# Create the server thread
545
self._http_starting = threading.Lock()
546
self._http_starting.acquire()
547
self._http_thread = threading.Thread(target=self._http_start)
548
self._http_thread.setDaemon(True)
549
self._http_exception = None
550
self._http_thread.start()
552
# Wait for the server thread to start (i.e release the lock)
553
self._http_starting.acquire()
555
if self._http_exception is not None:
556
# Something went wrong during server start
557
exc_class, exc_value, exc_tb = self._http_exception
558
raise exc_class, exc_value, exc_tb
559
self._http_starting.release()
562
def stop_server(self):
563
self._httpd.stop_server()
564
self._http_running = False
565
# We don't need to 'self._http_thread.join()' here since the thread is
566
# a daemonic one and will be garbage collected anyway. Joining just
567
# slows us down for no added benefit.
570
"""See bzrlib.transport.Server.get_url."""
571
return self._get_remote_url(self._home_dir)
573
def get_bogus_url(self):
574
"""See bzrlib.transport.Server.get_bogus_url."""
575
# this is chosen to try to prevent trouble with proxies, weird dns,
577
return self._url_protocol + '://127.0.0.1:1/'
580
class HttpServer_urllib(HttpServer):
581
"""Subclass of HttpServer that gives http+urllib urls.
583
This is for use in testing: connections to this server will always go
584
through urllib where possible.
587
# urls returned by this server should require the urllib client impl
588
_url_protocol = 'http+urllib'
591
class HttpServer_PyCurl(HttpServer):
592
"""Subclass of HttpServer that gives http+pycurl urls.
594
This is for use in testing: connections to this server will always go
595
through pycurl where possible.
598
# We don't care about checking the pycurl availability as
599
# this server will be required only when pycurl is present
601
# urls returned by this server should require the pycurl client impl
602
_url_protocol = 'http+pycurl'