1
# Copyright (C) 2006 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
from SimpleHTTPServer import SimpleHTTPRequestHandler
31
from bzrlib.transport import Server
32
from bzrlib.transport.local import LocalURLServer
35
class WebserverNotAvailable(Exception):
39
class BadWebserverPath(ValueError):
41
return 'path %s is not in %s' % self.args
44
class TestingHTTPRequestHandler(SimpleHTTPRequestHandler):
45
"""Handles one request.
47
A TestingHTTPRequestHandler is instantiated for every request
48
received by the associated server.
51
def log_message(self, format, *args):
52
tcs = self.server.test_case_server
53
tcs.log('webserver - %s - - [%s] %s "%s" "%s"',
54
self.address_string(),
55
self.log_date_time_string(),
57
self.headers.get('referer', '-'),
58
self.headers.get('user-agent', '-'))
60
def handle_one_request(self):
61
"""Handle a single HTTP request.
63
We catch all socket errors occurring when the client close the
64
connection early to avoid polluting the test results.
67
self._handle_one_request()
68
except socket.error, e:
70
and e.args[0] in (errno.EPIPE, errno.ECONNRESET,
71
errno.ECONNABORTED,)):
72
self.close_connection = 1
77
def _handle_one_request(self):
79
Request handling as defined in the base class.
81
You normally don't need to override this method; see the class
82
__doc__ string for information on how to handle specific HTTP
83
commands such as GET and POST.
85
On some platforms, notably OS X, a lot of EAGAIN (resource temporary
86
unavailable) occur. We retry silently at most 10 times.
88
for i in xrange(1,11): # Don't try more than 10 times
90
self.raw_requestline = self.rfile.readline()
91
except socket.error, e:
92
if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):
93
# omitted for now because some tests look at the log of
94
# the server and expect to see no errors. see recent
95
# email thread. -- mbp 20051021.
96
## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)
102
if not self.raw_requestline:
103
self.close_connection = 1
105
if not self.parse_request(): # An error code has been sent, just exit
107
mname = 'do_' + self.command
108
if getattr(self, mname, None) is None:
109
self.send_error(501, "Unsupported method (%r)" % self.command)
111
method = getattr(self, mname)
114
_range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')
115
_tail_regexp = re.compile(r'^-(?P<tail>\d+)$')
117
def parse_ranges(self, ranges_header):
118
"""Parse the range header value and returns ranges and tail.
120
RFC2616 14.35 says that syntactically invalid range
121
specifiers MUST be ignored. In that case, we return 0 for
122
tail and [] for ranges.
126
if not ranges_header.startswith('bytes='):
127
# Syntactically invalid header
130
ranges_header = ranges_header[len('bytes='):]
131
for range_str in ranges_header.split(','):
132
# FIXME: RFC2616 says end is optional and default to file_size
133
range_match = self._range_regexp.match(range_str)
134
if range_match is not None:
135
start = int(range_match.group('start'))
136
end = int(range_match.group('end'))
138
# Syntactically invalid range
140
ranges.append((start, end))
142
tail_match = self._tail_regexp.match(range_str)
143
if tail_match is not None:
144
tail = int(tail_match.group('tail'))
146
# Syntactically invalid range
150
def send_range_content(self, file, start, length):
152
self.wfile.write(file.read(length))
154
def get_single_range(self, file, file_size, start, end):
155
self.send_response(206)
156
length = end - start + 1
157
self.send_header('Accept-Ranges', 'bytes')
158
self.send_header("Content-Length", "%d" % length)
160
self.send_header("Content-Type", 'application/octet-stream')
161
self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
165
self.send_range_content(file, start, length)
167
def get_multiple_ranges(self, file, file_size, ranges):
168
self.send_response(206)
169
self.send_header('Accept-Ranges', 'bytes')
170
boundary = "%d" % random.randint(0,0x7FFFFFFF)
171
self.send_header("Content-Type",
172
"multipart/byteranges; boundary=%s" % boundary)
174
for (start, end) in ranges:
175
self.wfile.write("--%s\r\n" % boundary)
176
self.send_header("Content-type", 'application/octet-stream')
177
self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
181
self.send_range_content(file, start, end - start + 1)
182
self.wfile.write("--%s\r\n" % boundary)
185
"""Serve a GET request.
187
Handles the Range header.
190
path = self.translate_path(self.path)
191
ranges_header_value = self.headers.get('Range')
192
if ranges_header_value is None or os.path.isdir(path):
193
# Let the mother class handle most cases
194
return SimpleHTTPRequestHandler.do_GET(self)
197
# Always read in binary mode. Opening files in text
198
# mode may cause newline translations, making the
199
# actual size of the content transmitted *less* than
200
# the content-length!
201
file = open(path, 'rb')
203
self.send_error(404, "File not found")
206
file_size = os.fstat(file.fileno())[6]
207
tail, ranges = self.parse_ranges(ranges_header_value)
208
# Normalize tail into ranges
210
ranges.append((file_size - tail, file_size))
212
self._satisfiable_ranges = True
214
self._satisfiable_ranges = False
216
def check_range(range_specifier):
217
start, end = range_specifier
218
# RFC2616 14.35, ranges are invalid if start >= file_size
219
if start >= file_size:
220
self._satisfiable_ranges = False # Side-effect !
222
# RFC2616 14.35, end values should be truncated
223
# to file_size -1 if they exceed it
224
end = min(end, file_size - 1)
227
ranges = map(check_range, ranges)
229
if not self._satisfiable_ranges:
230
# RFC2616 14.16 and 14.35 says that when a server
231
# encounters unsatisfiable range specifiers, it
232
# SHOULD return a 416.
234
# FIXME: We SHOULD send a Content-Range header too,
235
# but the implementation of send_error does not
236
# allows that. So far.
237
self.send_error(416, "Requested range not satisfiable")
241
(start, end) = ranges[0]
242
self.get_single_range(file, file_size, start, end)
244
self.get_multiple_ranges(file, file_size, ranges)
247
def translate_path(self, path):
248
"""Translate a /-separated PATH to the local filename syntax.
250
If the server requires it, proxy the path before the usual translation
252
if self.server.test_case_server.proxy_requests:
253
# We need to act as a proxy and accept absolute urls,
254
# which SimpleHTTPRequestHandler (parent) is not
255
# ready for. So we just drop the protocol://host:port
256
# part in front of the request-url (because we know
257
# we would not forward the request to *another*
260
# So we do what SimpleHTTPRequestHandler.translate_path
261
# do beginning with python 2.4.3: abandon query
262
# parameters, scheme, host port, etc (which ensure we
263
# provide the right behaviour on all python versions).
264
path = urlparse.urlparse(path)[2]
265
# And now, we can apply *our* trick to proxy files
268
return self._translate_path(path)
270
def _translate_path(self, path):
271
return SimpleHTTPRequestHandler.translate_path(self, path)
273
if sys.platform == 'win32':
274
# On win32 you cannot access non-ascii filenames without
275
# decoding them into unicode first.
276
# However, under Linux, you can access bytestream paths
277
# without any problems. If this function was always active
278
# it would probably break tests when LANG=C was set
279
def _translate_path(self, path):
280
"""Translate a /-separated PATH to the local filename syntax.
282
For bzr, all url paths are considered to be utf8 paths.
283
On Linux, you can access these paths directly over the bytestream
284
request, but on win32, you must decode them, and access them
287
# abandon query parameters
288
path = urlparse.urlparse(path)[2]
289
path = posixpath.normpath(urllib.unquote(path))
290
path = path.decode('utf-8')
291
words = path.split('/')
292
words = filter(None, words)
295
drive, word = os.path.splitdrive(word)
296
head, word = os.path.split(word)
297
if word in (os.curdir, os.pardir): continue
298
path = os.path.join(path, word)
302
class TestingHTTPServer(BaseHTTPServer.HTTPServer):
304
def __init__(self, server_address, RequestHandlerClass,
306
BaseHTTPServer.HTTPServer.__init__(self, server_address,
308
# test_case_server can be used to communicate between the
309
# tests and the server (or the request handler and the
310
# server), allowing dynamic behaviors to be defined from
312
self.test_case_server = test_case_server
315
class HttpServer(Server):
316
"""A test server for http transports.
318
Subclasses can provide a specific request handler.
321
# Whether or not we proxy the requests (see
322
# TestingHTTPRequestHandler.translate_path).
323
proxy_requests = False
325
# used to form the url that connects to this server
326
_url_protocol = 'http'
328
# Subclasses can provide a specific request handler
329
def __init__(self, request_handler=TestingHTTPRequestHandler):
330
Server.__init__(self)
331
self.request_handler = request_handler
332
self.host = 'localhost'
336
def _get_httpd(self):
337
if self._httpd is None:
338
self._httpd = TestingHTTPServer((self.host, self.port),
339
self.request_handler,
341
host, self.port = self._httpd.socket.getsockname()
344
def _http_start(self):
345
httpd = self._get_httpd()
346
self._http_base_url = '%s://%s:%s/' % (self._url_protocol,
349
httpd.socket.settimeout(0.1)
350
self._http_starting.release()
352
while self._http_running:
354
httpd.handle_request()
355
except socket.timeout:
358
def _get_remote_url(self, path):
359
path_parts = path.split(os.path.sep)
360
if os.path.isabs(path):
361
if path_parts[:len(self._local_path_parts)] != \
362
self._local_path_parts:
363
raise BadWebserverPath(path, self.test_dir)
364
remote_path = '/'.join(path_parts[len(self._local_path_parts):])
366
remote_path = '/'.join(path_parts)
368
return self._http_base_url + remote_path
370
def log(self, format, *args):
371
"""Capture Server log output."""
372
self.logs.append(format % args)
374
def setUp(self, backing_transport_server=None):
375
"""See bzrlib.transport.Server.setUp.
377
:param backing_transport_server: The transport that requests over this
378
protocol should be forwarded to. Note that this is currently not
381
# XXX: TODO: make the server back onto vfs_server rather than local
383
assert backing_transport_server is None or \
384
isinstance(backing_transport_server, LocalURLServer), \
385
"HTTPServer currently assumes local transport, got %s" % \
386
backing_transport_server
387
self._home_dir = os.getcwdu()
388
self._local_path_parts = self._home_dir.split(os.path.sep)
389
self._http_starting = threading.Lock()
390
self._http_starting.acquire()
391
self._http_running = True
392
self._http_base_url = None
393
self._http_thread = threading.Thread(target=self._http_start)
394
self._http_thread.setDaemon(True)
395
self._http_thread.start()
396
# Wait for the server thread to start (i.e release the lock)
397
self._http_starting.acquire()
398
self._http_starting.release()
402
"""See bzrlib.transport.Server.tearDown."""
403
self._httpd.server_close()
404
self._http_running = False
405
self._http_thread.join()
408
"""See bzrlib.transport.Server.get_url."""
409
return self._get_remote_url(self._home_dir)
411
def get_bogus_url(self):
412
"""See bzrlib.transport.Server.get_bogus_url."""
413
# this is chosen to try to prevent trouble with proxies, weird dns,
415
return 'http://127.0.0.1:1/'
418
class HttpServer_urllib(HttpServer):
419
"""Subclass of HttpServer that gives http+urllib urls.
421
This is for use in testing: connections to this server will always go
422
through urllib where possible.
425
# urls returned by this server should require the urllib client impl
426
_url_protocol = 'http+urllib'
429
class HttpServer_PyCurl(HttpServer):
430
"""Subclass of HttpServer that gives http+pycurl urls.
432
This is for use in testing: connections to this server will always go
433
through pycurl where possible.
436
# We don't care about checking the pycurl availability as
437
# this server will be required only when pycurl is present
439
# urls returned by this server should require the pycurl client impl
440
_url_protocol = 'http+pycurl'