1
# Copyright (C) 2006, 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
from SimpleHTTPServer import SimpleHTTPRequestHandler
31
from bzrlib.transport import Server
32
from bzrlib.transport.local import LocalURLServer
35
class BadWebserverPath(ValueError):
37
return 'path %s is not in %s' % self.args
40
class TestingHTTPRequestHandler(SimpleHTTPRequestHandler):
41
"""Handles one request.
43
A TestingHTTPRequestHandler is instantiated for every request
44
received by the associated server.
47
def log_message(self, format, *args):
48
tcs = self.server.test_case_server
49
tcs.log('webserver - %s - - [%s] %s "%s" "%s"',
50
self.address_string(),
51
self.log_date_time_string(),
53
self.headers.get('referer', '-'),
54
self.headers.get('user-agent', '-'))
56
def handle_one_request(self):
57
"""Handle a single HTTP request.
59
We catch all socket errors occurring when the client close the
60
connection early to avoid polluting the test results.
63
SimpleHTTPRequestHandler.handle_one_request(self)
64
except socket.error, e:
66
and e.args[0] in (errno.EPIPE, errno.ECONNRESET,
67
errno.ECONNABORTED,)):
68
self.close_connection = 1
73
_range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')
74
_tail_regexp = re.compile(r'^-(?P<tail>\d+)$')
76
def parse_ranges(self, ranges_header):
77
"""Parse the range header value and returns ranges and tail.
79
RFC2616 14.35 says that syntactically invalid range
80
specifiers MUST be ignored. In that case, we return 0 for
81
tail and [] for ranges.
85
if not ranges_header.startswith('bytes='):
86
# Syntactically invalid header
89
ranges_header = ranges_header[len('bytes='):]
90
for range_str in ranges_header.split(','):
91
# FIXME: RFC2616 says end is optional and default to file_size
92
range_match = self._range_regexp.match(range_str)
93
if range_match is not None:
94
start = int(range_match.group('start'))
95
end = int(range_match.group('end'))
97
# Syntactically invalid range
99
ranges.append((start, end))
101
tail_match = self._tail_regexp.match(range_str)
102
if tail_match is not None:
103
tail = int(tail_match.group('tail'))
105
# Syntactically invalid range
109
def send_range_content(self, file, start, length):
111
self.wfile.write(file.read(length))
113
def get_single_range(self, file, file_size, start, end):
114
self.send_response(206)
115
length = end - start + 1
116
self.send_header('Accept-Ranges', 'bytes')
117
self.send_header("Content-Length", "%d" % length)
119
self.send_header("Content-Type", 'application/octet-stream')
120
self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
124
self.send_range_content(file, start, length)
126
def get_multiple_ranges(self, file, file_size, ranges):
127
self.send_response(206)
128
self.send_header('Accept-Ranges', 'bytes')
129
boundary = "%d" % random.randint(0,0x7FFFFFFF)
130
self.send_header("Content-Type",
131
"multipart/byteranges; boundary=%s" % boundary)
133
for (start, end) in ranges:
134
self.wfile.write("--%s\r\n" % boundary)
135
self.send_header("Content-type", 'application/octet-stream')
136
self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
140
self.send_range_content(file, start, end - start + 1)
141
self.wfile.write("--%s\r\n" % boundary)
144
"""Serve a GET request.
146
Handles the Range header.
149
path = self.translate_path(self.path)
150
ranges_header_value = self.headers.get('Range')
151
if ranges_header_value is None or os.path.isdir(path):
152
# Let the mother class handle most cases
153
return SimpleHTTPRequestHandler.do_GET(self)
156
# Always read in binary mode. Opening files in text
157
# mode may cause newline translations, making the
158
# actual size of the content transmitted *less* than
159
# the content-length!
160
file = open(path, 'rb')
162
self.send_error(404, "File not found")
165
file_size = os.fstat(file.fileno())[6]
166
tail, ranges = self.parse_ranges(ranges_header_value)
167
# Normalize tail into ranges
169
ranges.append((file_size - tail, file_size))
171
self._satisfiable_ranges = True
173
self._satisfiable_ranges = False
175
def check_range(range_specifier):
176
start, end = range_specifier
177
# RFC2616 14.35, ranges are invalid if start >= file_size
178
if start >= file_size:
179
self._satisfiable_ranges = False # Side-effect !
181
# RFC2616 14.35, end values should be truncated
182
# to file_size -1 if they exceed it
183
end = min(end, file_size - 1)
186
ranges = map(check_range, ranges)
188
if not self._satisfiable_ranges:
189
# RFC2616 14.16 and 14.35 says that when a server
190
# encounters unsatisfiable range specifiers, it
191
# SHOULD return a 416.
193
# FIXME: We SHOULD send a Content-Range header too,
194
# but the implementation of send_error does not
195
# allows that. So far.
196
self.send_error(416, "Requested range not satisfiable")
200
(start, end) = ranges[0]
201
self.get_single_range(file, file_size, start, end)
203
self.get_multiple_ranges(file, file_size, ranges)
206
def translate_path(self, path):
207
"""Translate a /-separated PATH to the local filename syntax.
209
If the server requires it, proxy the path before the usual translation
211
if self.server.test_case_server.proxy_requests:
212
# We need to act as a proxy and accept absolute urls,
213
# which SimpleHTTPRequestHandler (parent) is not
214
# ready for. So we just drop the protocol://host:port
215
# part in front of the request-url (because we know
216
# we would not forward the request to *another*
219
# So we do what SimpleHTTPRequestHandler.translate_path
220
# do beginning with python 2.4.3: abandon query
221
# parameters, scheme, host port, etc (which ensure we
222
# provide the right behaviour on all python versions).
223
path = urlparse.urlparse(path)[2]
224
# And now, we can apply *our* trick to proxy files
227
return self._translate_path(path)
229
def _translate_path(self, path):
230
return SimpleHTTPRequestHandler.translate_path(self, path)
232
if sys.platform == 'win32':
233
# On win32 you cannot access non-ascii filenames without
234
# decoding them into unicode first.
235
# However, under Linux, you can access bytestream paths
236
# without any problems. If this function was always active
237
# it would probably break tests when LANG=C was set
238
def _translate_path(self, path):
239
"""Translate a /-separated PATH to the local filename syntax.
241
For bzr, all url paths are considered to be utf8 paths.
242
On Linux, you can access these paths directly over the bytestream
243
request, but on win32, you must decode them, and access them
246
# abandon query parameters
247
path = urlparse.urlparse(path)[2]
248
path = posixpath.normpath(urllib.unquote(path))
249
path = path.decode('utf-8')
250
words = path.split('/')
251
words = filter(None, words)
254
drive, word = os.path.splitdrive(word)
255
head, word = os.path.split(word)
256
if word in (os.curdir, os.pardir): continue
257
path = os.path.join(path, word)
261
class TestingHTTPServer(BaseHTTPServer.HTTPServer):
263
def __init__(self, server_address, request_handler_class,
265
BaseHTTPServer.HTTPServer.__init__(self, server_address,
266
request_handler_class)
267
# test_case_server can be used to communicate between the
268
# tests and the server (or the request handler and the
269
# server), allowing dynamic behaviors to be defined from
271
self.test_case_server = test_case_server
273
def server_close(self):
274
"""Called to clean-up the server.
276
Since the server may be in a blocking read, we shutdown the socket
279
self.socket.shutdown(socket.SHUT_RDWR)
280
BaseHTTPServer.HTTPServer.server_close(self)
283
class HttpServer(Server):
284
"""A test server for http transports.
286
Subclasses can provide a specific request handler.
289
# Whether or not we proxy the requests (see
290
# TestingHTTPRequestHandler.translate_path).
291
proxy_requests = False
293
# used to form the url that connects to this server
294
_url_protocol = 'http'
296
# Subclasses can provide a specific request handler
297
def __init__(self, request_handler=TestingHTTPRequestHandler):
298
Server.__init__(self)
299
self.request_handler = request_handler
300
self.host = 'localhost'
304
def _get_httpd(self):
305
if self._httpd is None:
306
self._httpd = TestingHTTPServer((self.host, self.port),
307
self.request_handler,
309
host, self.port = self._httpd.socket.getsockname()
312
def _http_start(self):
313
httpd = self._get_httpd()
314
self._http_base_url = '%s://%s:%s/' % (self._url_protocol,
317
self._http_starting.release()
319
while self._http_running:
321
httpd.handle_request()
322
except socket.timeout:
325
def _get_remote_url(self, path):
326
path_parts = path.split(os.path.sep)
327
if os.path.isabs(path):
328
if path_parts[:len(self._local_path_parts)] != \
329
self._local_path_parts:
330
raise BadWebserverPath(path, self.test_dir)
331
remote_path = '/'.join(path_parts[len(self._local_path_parts):])
333
remote_path = '/'.join(path_parts)
335
return self._http_base_url + remote_path
337
def log(self, format, *args):
338
"""Capture Server log output."""
339
self.logs.append(format % args)
341
def setUp(self, backing_transport_server=None):
342
"""See bzrlib.transport.Server.setUp.
344
:param backing_transport_server: The transport that requests over this
345
protocol should be forwarded to. Note that this is currently not
348
# XXX: TODO: make the server back onto vfs_server rather than local
350
assert backing_transport_server is None or \
351
isinstance(backing_transport_server, LocalURLServer), \
352
"HTTPServer currently assumes local transport, got %s" % \
353
backing_transport_server
354
self._home_dir = os.getcwdu()
355
self._local_path_parts = self._home_dir.split(os.path.sep)
356
self._http_starting = threading.Lock()
357
self._http_starting.acquire()
358
self._http_running = True
359
self._http_base_url = None
360
self._http_thread = threading.Thread(target=self._http_start)
361
self._http_thread.setDaemon(True)
362
self._http_thread.start()
363
# Wait for the server thread to start (i.e release the lock)
364
self._http_starting.acquire()
365
self._http_starting.release()
369
"""See bzrlib.transport.Server.tearDown."""
370
self._httpd.server_close()
371
self._http_running = False
372
self._http_thread.join()
375
"""See bzrlib.transport.Server.get_url."""
376
return self._get_remote_url(self._home_dir)
378
def get_bogus_url(self):
379
"""See bzrlib.transport.Server.get_bogus_url."""
380
# this is chosen to try to prevent trouble with proxies, weird dns,
382
return self._url_protocol + '://127.0.0.1:1/'
385
class HttpServer_urllib(HttpServer):
386
"""Subclass of HttpServer that gives http+urllib urls.
388
This is for use in testing: connections to this server will always go
389
through urllib where possible.
392
# urls returned by this server should require the urllib client impl
393
_url_protocol = 'http+urllib'
396
class HttpServer_PyCurl(HttpServer):
397
"""Subclass of HttpServer that gives http+pycurl urls.
399
This is for use in testing: connections to this server will always go
400
through pycurl where possible.
403
# We don't care about checking the pycurl availability as
404
# this server will be required only when pycurl is present
406
# urls returned by this server should require the pycurl client impl
407
_url_protocol = 'http+pycurl'