1
# Copyright (C) 2006 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
from SimpleHTTPServer import SimpleHTTPRequestHandler
31
from bzrlib.transport import Server
32
from bzrlib.transport.local import LocalURLServer
35
class WebserverNotAvailable(Exception):
39
class BadWebserverPath(ValueError):
41
return 'path %s is not in %s' % self.args
44
class TestingHTTPRequestHandler(SimpleHTTPRequestHandler):
45
"""Handles one request.
47
A TestingHTTPRequestHandler is instantiated for every request
48
received by the associated server.
51
def log_message(self, format, *args):
52
tcs = self.server.test_case_server
53
tcs.log('webserver - %s - - [%s] %s "%s" "%s"',
54
self.address_string(),
55
self.log_date_time_string(),
57
self.headers.get('referer', '-'),
58
self.headers.get('user-agent', '-'))
60
def handle_one_request(self):
61
"""Handle a single HTTP request.
63
We catch all socket errors occurring when the client close the
64
connection early to avoid polluting the test results.
67
SimpleHTTPRequestHandler.handle_one_request(self)
68
except socket.error, e:
70
and e.args[0] in (errno.EPIPE, errno.ECONNRESET,
71
errno.ECONNABORTED,)):
72
self.close_connection = 1
77
_range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')
78
_tail_regexp = re.compile(r'^-(?P<tail>\d+)$')
80
def parse_ranges(self, ranges_header):
81
"""Parse the range header value and returns ranges and tail.
83
RFC2616 14.35 says that syntactically invalid range
84
specifiers MUST be ignored. In that case, we return 0 for
85
tail and [] for ranges.
89
if not ranges_header.startswith('bytes='):
90
# Syntactically invalid header
93
ranges_header = ranges_header[len('bytes='):]
94
for range_str in ranges_header.split(','):
95
# FIXME: RFC2616 says end is optional and default to file_size
96
range_match = self._range_regexp.match(range_str)
97
if range_match is not None:
98
start = int(range_match.group('start'))
99
end = int(range_match.group('end'))
101
# Syntactically invalid range
103
ranges.append((start, end))
105
tail_match = self._tail_regexp.match(range_str)
106
if tail_match is not None:
107
tail = int(tail_match.group('tail'))
109
# Syntactically invalid range
113
def send_range_content(self, file, start, length):
115
self.wfile.write(file.read(length))
117
def get_single_range(self, file, file_size, start, end):
118
self.send_response(206)
119
length = end - start + 1
120
self.send_header('Accept-Ranges', 'bytes')
121
self.send_header("Content-Length", "%d" % length)
123
self.send_header("Content-Type", 'application/octet-stream')
124
self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
128
self.send_range_content(file, start, length)
130
def get_multiple_ranges(self, file, file_size, ranges):
131
self.send_response(206)
132
self.send_header('Accept-Ranges', 'bytes')
133
boundary = "%d" % random.randint(0,0x7FFFFFFF)
134
self.send_header("Content-Type",
135
"multipart/byteranges; boundary=%s" % boundary)
137
for (start, end) in ranges:
138
self.wfile.write("--%s\r\n" % boundary)
139
self.send_header("Content-type", 'application/octet-stream')
140
self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
144
self.send_range_content(file, start, end - start + 1)
145
self.wfile.write("--%s\r\n" % boundary)
148
"""Serve a GET request.
150
Handles the Range header.
153
path = self.translate_path(self.path)
154
ranges_header_value = self.headers.get('Range')
155
if ranges_header_value is None or os.path.isdir(path):
156
# Let the mother class handle most cases
157
return SimpleHTTPRequestHandler.do_GET(self)
160
# Always read in binary mode. Opening files in text
161
# mode may cause newline translations, making the
162
# actual size of the content transmitted *less* than
163
# the content-length!
164
file = open(path, 'rb')
166
self.send_error(404, "File not found")
169
file_size = os.fstat(file.fileno())[6]
170
tail, ranges = self.parse_ranges(ranges_header_value)
171
# Normalize tail into ranges
173
ranges.append((file_size - tail, file_size))
175
self._satisfiable_ranges = True
177
self._satisfiable_ranges = False
179
def check_range(range_specifier):
180
start, end = range_specifier
181
# RFC2616 14.35, ranges are invalid if start >= file_size
182
if start >= file_size:
183
self._satisfiable_ranges = False # Side-effect !
185
# RFC2616 14.35, end values should be truncated
186
# to file_size -1 if they exceed it
187
end = min(end, file_size - 1)
190
ranges = map(check_range, ranges)
192
if not self._satisfiable_ranges:
193
# RFC2616 14.16 and 14.35 says that when a server
194
# encounters unsatisfiable range specifiers, it
195
# SHOULD return a 416.
197
# FIXME: We SHOULD send a Content-Range header too,
198
# but the implementation of send_error does not
199
# allows that. So far.
200
self.send_error(416, "Requested range not satisfiable")
204
(start, end) = ranges[0]
205
self.get_single_range(file, file_size, start, end)
207
self.get_multiple_ranges(file, file_size, ranges)
210
def translate_path(self, path):
211
"""Translate a /-separated PATH to the local filename syntax.
213
If the server requires it, proxy the path before the usual translation
215
if self.server.test_case_server.proxy_requests:
216
# We need to act as a proxy and accept absolute urls,
217
# which SimpleHTTPRequestHandler (parent) is not
218
# ready for. So we just drop the protocol://host:port
219
# part in front of the request-url (because we know
220
# we would not forward the request to *another*
223
# So we do what SimpleHTTPRequestHandler.translate_path
224
# do beginning with python 2.4.3: abandon query
225
# parameters, scheme, host port, etc (which ensure we
226
# provide the right behaviour on all python versions).
227
path = urlparse.urlparse(path)[2]
228
# And now, we can apply *our* trick to proxy files
231
return self._translate_path(path)
233
def _translate_path(self, path):
234
return SimpleHTTPRequestHandler.translate_path(self, path)
236
if sys.platform == 'win32':
237
# On win32 you cannot access non-ascii filenames without
238
# decoding them into unicode first.
239
# However, under Linux, you can access bytestream paths
240
# without any problems. If this function was always active
241
# it would probably break tests when LANG=C was set
242
def _translate_path(self, path):
243
"""Translate a /-separated PATH to the local filename syntax.
245
For bzr, all url paths are considered to be utf8 paths.
246
On Linux, you can access these paths directly over the bytestream
247
request, but on win32, you must decode them, and access them
250
# abandon query parameters
251
path = urlparse.urlparse(path)[2]
252
path = posixpath.normpath(urllib.unquote(path))
253
path = path.decode('utf-8')
254
words = path.split('/')
255
words = filter(None, words)
258
drive, word = os.path.splitdrive(word)
259
head, word = os.path.split(word)
260
if word in (os.curdir, os.pardir): continue
261
path = os.path.join(path, word)
265
class TestingHTTPServer(BaseHTTPServer.HTTPServer):
267
def __init__(self, server_address, RequestHandlerClass,
269
BaseHTTPServer.HTTPServer.__init__(self, server_address,
271
# test_case_server can be used to communicate between the
272
# tests and the server (or the request handler and the
273
# server), allowing dynamic behaviors to be defined from
275
self.test_case_server = test_case_server
277
def server_close(self):
278
"""Called to clean-up the server.
280
Since the server may be in a blocking read, we shutdown the socket
283
self.socket.shutdown(socket.SHUT_RDWR)
284
BaseHTTPServer.HTTPServer.server_close(self)
287
class HttpServer(Server):
288
"""A test server for http transports.
290
Subclasses can provide a specific request handler.
293
# Whether or not we proxy the requests (see
294
# TestingHTTPRequestHandler.translate_path).
295
proxy_requests = False
297
# used to form the url that connects to this server
298
_url_protocol = 'http'
300
# Subclasses can provide a specific request handler
301
def __init__(self, request_handler=TestingHTTPRequestHandler):
302
Server.__init__(self)
303
self.request_handler = request_handler
304
self.host = 'localhost'
308
def _get_httpd(self):
309
if self._httpd is None:
310
self._httpd = TestingHTTPServer((self.host, self.port),
311
self.request_handler,
313
host, self.port = self._httpd.socket.getsockname()
316
def _http_start(self):
317
httpd = self._get_httpd()
318
self._http_base_url = '%s://%s:%s/' % (self._url_protocol,
321
self._http_starting.release()
323
while self._http_running:
325
httpd.handle_request()
326
except socket.timeout:
329
def _get_remote_url(self, path):
330
path_parts = path.split(os.path.sep)
331
if os.path.isabs(path):
332
if path_parts[:len(self._local_path_parts)] != \
333
self._local_path_parts:
334
raise BadWebserverPath(path, self.test_dir)
335
remote_path = '/'.join(path_parts[len(self._local_path_parts):])
337
remote_path = '/'.join(path_parts)
339
return self._http_base_url + remote_path
341
def log(self, format, *args):
342
"""Capture Server log output."""
343
self.logs.append(format % args)
345
def setUp(self, backing_transport_server=None):
346
"""See bzrlib.transport.Server.setUp.
348
:param backing_transport_server: The transport that requests over this
349
protocol should be forwarded to. Note that this is currently not
352
# XXX: TODO: make the server back onto vfs_server rather than local
354
assert backing_transport_server is None or \
355
isinstance(backing_transport_server, LocalURLServer), \
356
"HTTPServer currently assumes local transport, got %s" % \
357
backing_transport_server
358
self._home_dir = os.getcwdu()
359
self._local_path_parts = self._home_dir.split(os.path.sep)
360
self._http_starting = threading.Lock()
361
self._http_starting.acquire()
362
self._http_running = True
363
self._http_base_url = None
364
self._http_thread = threading.Thread(target=self._http_start)
365
self._http_thread.setDaemon(True)
366
self._http_thread.start()
367
# Wait for the server thread to start (i.e release the lock)
368
self._http_starting.acquire()
369
self._http_starting.release()
373
"""See bzrlib.transport.Server.tearDown."""
374
self._httpd.server_close()
375
self._http_running = False
376
self._http_thread.join()
379
"""See bzrlib.transport.Server.get_url."""
380
return self._get_remote_url(self._home_dir)
382
def get_bogus_url(self):
383
"""See bzrlib.transport.Server.get_bogus_url."""
384
# this is chosen to try to prevent trouble with proxies, weird dns,
386
return 'http://127.0.0.1:1/'
389
class HttpServer_urllib(HttpServer):
390
"""Subclass of HttpServer that gives http+urllib urls.
392
This is for use in testing: connections to this server will always go
393
through urllib where possible.
396
# urls returned by this server should require the urllib client impl
397
_url_protocol = 'http+urllib'
400
class HttpServer_PyCurl(HttpServer):
401
"""Subclass of HttpServer that gives http+pycurl urls.
403
This is for use in testing: connections to this server will always go
404
through pycurl where possible.
407
# We don't care about checking the pycurl availability as
408
# this server will be required only when pycurl is present
410
# urls returned by this server should require the pycurl client impl
411
_url_protocol = 'http+pycurl'