1
# Copyright (C) 2006 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
from SimpleHTTPServer import SimpleHTTPRequestHandler
31
from bzrlib.transport import Server
32
from bzrlib.transport.local import LocalURLServer
35
class WebserverNotAvailable(Exception):
39
class BadWebserverPath(ValueError):
41
return 'path %s is not in %s' % self.args
44
class TestingHTTPRequestHandler(SimpleHTTPRequestHandler):
45
"""Handles one request.
47
A TestingHTTPRequestHandler is instantiated for every request
48
received by the associated server.
51
def log_message(self, format, *args):
52
tcs = self.server.test_case_server
53
tcs.log('webserver - %s - - [%s] %s "%s" "%s"',
54
self.address_string(),
55
self.log_date_time_string(),
57
self.headers.get('referer', '-'),
58
self.headers.get('user-agent', '-'))
60
def handle_one_request(self):
61
"""Handle a single HTTP request.
63
You normally don't need to override this method; see the class
64
__doc__ string for information on how to handle specific HTTP
65
commands such as GET and POST.
68
for i in xrange(1,11): # Don't try more than 10 times
70
self.raw_requestline = self.rfile.readline()
71
except socket.error, e:
72
if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):
73
# omitted for now because some tests look at the log of
74
# the server and expect to see no errors. see recent
75
# email thread. -- mbp 20051021.
76
## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)
82
if not self.raw_requestline:
83
self.close_connection = 1
85
if not self.parse_request(): # An error code has been sent, just exit
87
mname = 'do_' + self.command
88
if getattr(self, mname, None) is None:
89
self.send_error(501, "Unsupported method (%r)" % self.command)
91
method = getattr(self, mname)
94
_range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')
95
_tail_regexp = re.compile(r'^-(?P<tail>\d+)$')
97
def parse_ranges(self, ranges_header):
98
"""Parse the range header value and returns ranges and tail.
100
RFC2616 14.35 says that syntactically invalid range
101
specifiers MUST be ignored. In that case, we return 0 for
102
tail and [] for ranges.
106
if not ranges_header.startswith('bytes='):
107
# Syntactically invalid header
110
ranges_header = ranges_header[len('bytes='):]
111
for range_str in ranges_header.split(','):
112
# FIXME: RFC2616 says end is optional and default to file_size
113
range_match = self._range_regexp.match(range_str)
114
if range_match is not None:
115
start = int(range_match.group('start'))
116
end = int(range_match.group('end'))
118
# Syntactically invalid range
120
ranges.append((start, end))
122
tail_match = self._tail_regexp.match(range_str)
123
if tail_match is not None:
124
tail = int(tail_match.group('tail'))
126
# Syntactically invalid range
130
def send_range_content(self, file, start, length):
132
self.wfile.write(file.read(length))
134
def get_single_range(self, file, file_size, start, end):
135
self.send_response(206)
136
length = end - start + 1
137
self.send_header('Accept-Ranges', 'bytes')
138
self.send_header("Content-Length", "%d" % length)
140
self.send_header("Content-Type", 'application/octet-stream')
141
self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
145
self.send_range_content(file, start, length)
147
def get_multiple_ranges(self, file, file_size, ranges):
148
self.send_response(206)
149
self.send_header('Accept-Ranges', 'bytes')
150
boundary = "%d" % random.randint(0,0x7FFFFFFF)
151
self.send_header("Content-Type",
152
"multipart/byteranges; boundary=%s" % boundary)
154
for (start, end) in ranges:
155
self.wfile.write("--%s\r\n" % boundary)
156
self.send_header("Content-type", 'application/octet-stream')
157
self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
161
self.send_range_content(file, start, end - start + 1)
162
self.wfile.write("--%s\r\n" % boundary)
165
"""Serve a GET request.
167
Handles the Range header.
170
path = self.translate_path(self.path)
171
ranges_header_value = self.headers.get('Range')
172
if ranges_header_value is None or os.path.isdir(path):
173
# Let the mother class handle most cases
174
return SimpleHTTPRequestHandler.do_GET(self)
177
# Always read in binary mode. Opening files in text
178
# mode may cause newline translations, making the
179
# actual size of the content transmitted *less* than
180
# the content-length!
181
file = open(path, 'rb')
183
self.send_error(404, "File not found")
186
file_size = os.fstat(file.fileno())[6]
187
tail, ranges = self.parse_ranges(ranges_header_value)
188
# Normalize tail into ranges
190
ranges.append((file_size - tail, file_size))
192
self._satisfiable_ranges = True
194
self._satisfiable_ranges = False
196
def check_range(range_specifier):
197
start, end = range_specifier
198
# RFC2616 14.35, ranges are invalid if start >= file_size
199
if start >= file_size:
200
self._satisfiable_ranges = False # Side-effect !
202
# RFC2616 14.35, end values should be truncated
203
# to file_size -1 if they exceed it
204
end = min(end, file_size - 1)
207
ranges = map(check_range, ranges)
209
if not self._satisfiable_ranges:
210
# RFC2616 14.16 and 14.35 says that when a server
211
# encounters unsatisfiable range specifiers, it
212
# SHOULD return a 416.
214
# FIXME: We SHOULD send a Content-Range header too,
215
# but the implementation of send_error does not
216
# allows that. So far.
217
self.send_error(416, "Requested range not satisfiable")
221
(start, end) = ranges[0]
222
self.get_single_range(file, file_size, start, end)
224
self.get_multiple_ranges(file, file_size, ranges)
227
def translate_path(self, path):
228
"""Translate a /-separated PATH to the local filename syntax.
230
If the server requires it, proxy the path before the usual translation
232
if self.server.test_case_server.proxy_requests:
233
# We need to act as a proxy and accept absolute urls,
234
# which SimpleHTTPRequestHandler (parent) is not
235
# ready for. So we just drop the protocol://host:port
236
# part in front of the request-url (because we know
237
# we would not forward the request to *another*
240
# So we do what SimpleHTTPRequestHandler.translate_path
241
# do beginning with python 2.4.3: abandon query
242
# parameters, scheme, host port, etc (which ensure we
243
# provide the right behaviour on all python versions).
244
path = urlparse.urlparse(path)[2]
245
# And now, we can apply *our* trick to proxy files
248
return self._translate_path(path)
250
def _translate_path(self, path):
251
return SimpleHTTPRequestHandler.translate_path(self, path)
253
if sys.platform == 'win32':
254
# On win32 you cannot access non-ascii filenames without
255
# decoding them into unicode first.
256
# However, under Linux, you can access bytestream paths
257
# without any problems. If this function was always active
258
# it would probably break tests when LANG=C was set
259
def _translate_path(self, path):
260
"""Translate a /-separated PATH to the local filename syntax.
262
For bzr, all url paths are considered to be utf8 paths.
263
On Linux, you can access these paths directly over the bytestream
264
request, but on win32, you must decode them, and access them
267
# abandon query parameters
268
path = urlparse.urlparse(path)[2]
269
path = posixpath.normpath(urllib.unquote(path))
270
path = path.decode('utf-8')
271
words = path.split('/')
272
words = filter(None, words)
275
drive, word = os.path.splitdrive(word)
276
head, word = os.path.split(word)
277
if word in (os.curdir, os.pardir): continue
278
path = os.path.join(path, word)
282
class TestingHTTPServer(BaseHTTPServer.HTTPServer):
284
def __init__(self, server_address, RequestHandlerClass,
286
BaseHTTPServer.HTTPServer.__init__(self, server_address,
288
# test_case_server can be used to communicate between the
289
# tests and the server (or the request handler and the
290
# server), allowing dynamic behaviors to be defined from
292
self.test_case_server = test_case_server
295
class HttpServer(Server):
296
"""A test server for http transports.
298
Subclasses can provide a specific request handler.
301
# Whether or not we proxy the requests (see
302
# TestingHTTPRequestHandler.translate_path).
303
proxy_requests = False
305
# used to form the url that connects to this server
306
_url_protocol = 'http'
308
# Subclasses can provide a specific request handler
309
def __init__(self, request_handler=TestingHTTPRequestHandler):
310
Server.__init__(self)
311
self.request_handler = request_handler
312
self.host = 'localhost'
316
def _get_httpd(self):
317
if self._httpd is None:
318
self._httpd = TestingHTTPServer((self.host, self.port),
319
self.request_handler,
321
host, self.port = self._httpd.socket.getsockname()
324
def _http_start(self):
325
httpd = self._get_httpd()
326
self._http_base_url = '%s://%s:%s/' % (self._url_protocol,
329
self._http_starting.release()
330
httpd.socket.settimeout(0.1)
332
while self._http_running:
334
httpd.handle_request()
335
except socket.timeout:
338
def _get_remote_url(self, path):
339
path_parts = path.split(os.path.sep)
340
if os.path.isabs(path):
341
if path_parts[:len(self._local_path_parts)] != \
342
self._local_path_parts:
343
raise BadWebserverPath(path, self.test_dir)
344
remote_path = '/'.join(path_parts[len(self._local_path_parts):])
346
remote_path = '/'.join(path_parts)
348
return self._http_base_url + remote_path
350
def log(self, format, *args):
351
"""Capture Server log output."""
352
self.logs.append(format % args)
354
def setUp(self, backing_transport_server=None):
355
"""See bzrlib.transport.Server.setUp.
357
:param backing_transport_server: The transport that requests over this
358
protocol should be forwarded to. Note that this is currently not
361
# XXX: TODO: make the server back onto vfs_server rather than local
363
assert backing_transport_server is None or \
364
isinstance(backing_transport_server, LocalURLServer), \
365
"HTTPServer currently assumes local transport, got %s" % \
366
backing_transport_server
367
self._home_dir = os.getcwdu()
368
self._local_path_parts = self._home_dir.split(os.path.sep)
369
self._http_starting = threading.Lock()
370
self._http_starting.acquire()
371
self._http_running = True
372
self._http_base_url = None
373
self._http_thread = threading.Thread(target=self._http_start)
374
self._http_thread.setDaemon(True)
375
self._http_thread.start()
376
# Wait for the server thread to start (i.e release the lock)
377
self._http_starting.acquire()
378
self._http_starting.release()
382
"""See bzrlib.transport.Server.tearDown."""
383
self._http_running = False
384
self._http_thread.join()
387
"""See bzrlib.transport.Server.get_url."""
388
return self._get_remote_url(self._home_dir)
390
def get_bogus_url(self):
391
"""See bzrlib.transport.Server.get_bogus_url."""
392
# this is chosen to try to prevent trouble with proxies, weird dns,
394
return 'http://127.0.0.1:1/'
397
class HttpServer_urllib(HttpServer):
398
"""Subclass of HttpServer that gives http+urllib urls.
400
This is for use in testing: connections to this server will always go
401
through urllib where possible.
404
# urls returned by this server should require the urllib client impl
405
_url_protocol = 'http+urllib'
408
class HttpServer_PyCurl(HttpServer):
409
"""Subclass of HttpServer that gives http+pycurl urls.
411
This is for use in testing: connections to this server will always go
412
through pycurl where possible.
415
# We don't care about checking the pycurl availability as
416
# this server will be required only when pycurl is present
418
# urls returned by this server should require the pycurl client impl
419
_url_protocol = 'http+pycurl'