~bzr-pqm/bzr/bzr.dev : contents of bzrlib/tests/HttpServer.py at revision 2178.4.1

~bzr-pqm/bzr/bzr.dev : (revision 2178.4.1)

# Copyright (C) 2006 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

import BaseHTTPServer
import errno
import os
from SimpleHTTPServer import SimpleHTTPRequestHandler
import socket
import posixpath
import random
import re
import sys
import threading
import time
import urllib
import urlparse

from bzrlib.transport import Server


class WebserverNotAvailable(Exception):
    pass


class BadWebserverPath(ValueError):
    def __str__(self):
        return 'path %s is not in %s' % self.args


class TestingHTTPRequestHandler(SimpleHTTPRequestHandler):

    def log_message(self, format, *args):
        self.server.test_case.log('webserver - %s - - [%s] %s "%s" "%s"',
                                  self.address_string(),
                                  self.log_date_time_string(),
                                  format % args,
                                  self.headers.get('referer', '-'),
                                  self.headers.get('user-agent', '-'))

    def handle_one_request(self):
        """Handle a single HTTP request.

        You normally don't need to override this method; see the class
        __doc__ string for information on how to handle specific HTTP
        commands such as GET and POST.

        """
        for i in xrange(1,11): # Don't try more than 10 times
            try:
                self.raw_requestline = self.rfile.readline()
            except socket.error, e:
                if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):
                    # omitted for now because some tests look at the log of
                    # the server and expect to see no errors.  see recent
                    # email thread. -- mbp 20051021. 
                    ## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)
                    time.sleep(0.01)
                    continue
                raise
            else:
                break
        if not self.raw_requestline:
            self.close_connection = 1
            return
        if not self.parse_request(): # An error code has been sent, just exit
            return
        mname = 'do_' + self.command
        if getattr(self, mname, None) is None:
            self.send_error(501, "Unsupported method (%r)" % self.command)
            return
        method = getattr(self, mname)
        method()

    _range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')
    _tail_regexp = re.compile(r'^-(?P<tail>\d+)$')

    def parse_ranges(self, ranges_header):
        """Parse the range header value and returns ranges and tail"""
        tail = 0
        ranges = []
        assert ranges_header.startswith('bytes=')
        ranges_header = ranges_header[len('bytes='):]
        for range_str in ranges_header.split(','):
            range_match = self._range_regexp.match(range_str)
            if range_match is not None:
                ranges.append((int(range_match.group('start')),
                               int(range_match.group('end'))))
            else:
                tail_match = self._tail_regexp.match(range_str)
                if tail_match is not None:
                    tail = int(tail_match.group('tail'))
        return tail, ranges

    def send_range_content(self, file, start, length):
        file.seek(start)
        self.wfile.write(file.read(length))

    def get_single_range(self, file, file_size, start, end):
        self.send_response(206)
        length = end - start + 1
        self.send_header('Accept-Ranges', 'bytes')
        self.send_header("Content-Length", "%d" % length)

        self.send_header("Content-Type", 'application/octet-stream')
        self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
                                                              end,
                                                              file_size))
        self.end_headers()
        self.send_range_content(file, start, length)

    def get_multiple_ranges(self, file, file_size, ranges):
        self.send_response(206)
        self.send_header('Accept-Ranges', 'bytes')
        boundary = "%d" % random.randint(0,0x7FFFFFFF)
        self.send_header("Content-Type",
                         "multipart/byteranges; boundary=%s" % boundary)
        self.end_headers()
        for (start, end) in ranges:
            self.wfile.write("--%s\r\n" % boundary)
            self.send_header("Content-type", 'application/octet-stream')
            self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
                                                                  end,
                                                                  file_size))
            self.end_headers()
            self.send_range_content(file, start, end - start + 1)
            self.wfile.write("--%s\r\n" % boundary)
            pass

    def do_GET(self):
        """Serve a GET request.

        Handles the Range header.
        """

        path = self.translate_path(self.path)
        ranges_header_value = self.headers.get('Range')
        if ranges_header_value is None or os.path.isdir(path):
            # Let the mother class handle most cases
            return SimpleHTTPRequestHandler.do_GET(self)

        try:
            # Always read in binary mode. Opening files in text
            # mode may cause newline translations, making the
            # actual size of the content transmitted *less* than
            # the content-length!
            file = open(path, 'rb')
        except IOError:
            self.send_error(404, "File not found")
            return None

        file_size = os.fstat(file.fileno())[6]
        tail, ranges = self.parse_ranges(ranges_header_value)
        # Normalize tail into ranges
        if tail != 0:
            ranges.append((file_size - tail, file_size))

        ranges_valid = True
        if len(ranges) == 0:
            ranges_valid = False
        else:
            for (start, end) in ranges:
                if start >= file_size or end >= file_size:
                    ranges_valid = False
                    break
        if not ranges_valid:
            # RFC2616 14-16 says that invalid Range headers
            # should be ignored and in that case, the whole file
            # should be returned as if no Range header was
            # present
            file.close() # Will be reopened by the following call
            return SimpleHTTPRequestHandler.do_GET(self)

        if len(ranges) == 1:
            (start, end) = ranges[0]
            self.get_single_range(file, file_size, start, end)
        else:
            self.get_multiple_ranges(file, file_size, ranges)
        file.close()

    if sys.platform == 'win32':
        # On win32 you cannot access non-ascii filenames without
        # decoding them into unicode first.
        # However, under Linux, you can access bytestream paths
        # without any problems. If this function was always active
        # it would probably break tests when LANG=C was set
        def translate_path(self, path):
            """Translate a /-separated PATH to the local filename syntax.

            For bzr, all url paths are considered to be utf8 paths.
            On Linux, you can access these paths directly over the bytestream
            request, but on win32, you must decode them, and access them
            as Unicode files.
            """
            # abandon query parameters
            path = urlparse.urlparse(path)[2]
            path = posixpath.normpath(urllib.unquote(path))
            path = path.decode('utf-8')
            words = path.split('/')
            words = filter(None, words)
            path = os.getcwdu()
            for word in words:
                drive, word = os.path.splitdrive(word)
                head, word = os.path.split(word)
                if word in (os.curdir, os.pardir): continue
                path = os.path.join(path, word)
            return path


class TestingHTTPServer(BaseHTTPServer.HTTPServer):
    def __init__(self, server_address, RequestHandlerClass, test_case):
        BaseHTTPServer.HTTPServer.__init__(self, server_address,
                                                RequestHandlerClass)
        self.test_case = test_case


class HttpServer(Server):
    """A test server for http transports.

    Subclasses can provide a specific request handler.
    """

    # used to form the url that connects to this server
    _url_protocol = 'http'

    # Subclasses can provide a specific request handler
    def __init__(self, request_handler=TestingHTTPRequestHandler):
        Server.__init__(self)
        self.request_handler = request_handler

    def _get_httpd(self):
        return TestingHTTPServer(('localhost', 0),
                                  self.request_handler,
                                  self)

    def _http_start(self):
        httpd = None
        httpd = self._get_httpd()
        host, port = httpd.socket.getsockname()
        self._http_base_url = '%s://localhost:%s/' % (self._url_protocol, port)
        self._http_starting.release()
        httpd.socket.settimeout(0.1)

        while self._http_running:
            try:
                httpd.handle_request()
            except socket.timeout:
                pass

    def _get_remote_url(self, path):
        path_parts = path.split(os.path.sep)
        if os.path.isabs(path):
            if path_parts[:len(self._local_path_parts)] != \
                   self._local_path_parts:
                raise BadWebserverPath(path, self.test_dir)
            remote_path = '/'.join(path_parts[len(self._local_path_parts):])
        else:
            remote_path = '/'.join(path_parts)

        self._http_starting.acquire()
        self._http_starting.release()
        return self._http_base_url + remote_path

    def log(self, format, *args):
        """Capture Server log output."""
        self.logs.append(format % args)

    def setUp(self):
        """See bzrlib.transport.Server.setUp."""
        self._home_dir = os.getcwdu()
        self._local_path_parts = self._home_dir.split(os.path.sep)
        self._http_starting = threading.Lock()
        self._http_starting.acquire()
        self._http_running = True
        self._http_base_url = None
        self._http_thread = threading.Thread(target=self._http_start)
        self._http_thread.setDaemon(True)
        self._http_thread.start()
        self._http_proxy = os.environ.get("http_proxy")
        if self._http_proxy is not None:
            del os.environ["http_proxy"]
        self.logs = []

    def tearDown(self):
        """See bzrlib.transport.Server.tearDown."""
        self._http_running = False
        self._http_thread.join()
        if self._http_proxy is not None:
            import os
            os.environ["http_proxy"] = self._http_proxy

    def get_url(self):
        """See bzrlib.transport.Server.get_url."""
        return self._get_remote_url(self._home_dir)

    def get_bogus_url(self):
        """See bzrlib.transport.Server.get_bogus_url."""
        # this is chosen to try to prevent trouble with proxies, weird dns,
        # etc
        return 'http://127.0.0.1:1/'


class HttpServer_urllib(HttpServer):
    """Subclass of HttpServer that gives http+urllib urls.

    This is for use in testing: connections to this server will always go
    through urllib where possible.
    """

    # urls returned by this server should require the urllib client impl
    _url_protocol = 'http+urllib'


class HttpServer_PyCurl(HttpServer):
    """Subclass of HttpServer that gives http+pycurl urls.

    This is for use in testing: connections to this server will always go
    through pycurl where possible.
    """

    # We don't care about checking the pycurl availability as
    # this server will be required only when pycurl is present

    # urls returned by this server should require the pycurl client impl
    _url_protocol = 'http+pycurl'

2004.1.40 by v.ladeuil+lp at free Fix the race condition again and correct some small typos to be in	1	# Copyright (C) 2006 Canonical Ltd
2004.1.25 by v.ladeuil+lp at free Shuffle http related test code. Hopefully it ends up at the right place :)	2	#
	3	# This program is free software; you can redistribute it and/or modify
	4	# it under the terms of the GNU General Public License as published by
	5	# the Free Software Foundation; either version 2 of the License, or
	6	# (at your option) any later version.
	7	#
	8	# This program is distributed in the hope that it will be useful,
	9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	# GNU General Public License for more details.
	12	#
	13	# You should have received a copy of the GNU General Public License
	14	# along with this program; if not, write to the Free Software
	15	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	16
	17	import BaseHTTPServer
	18	import errno
	19	import os
	20	from SimpleHTTPServer import SimpleHTTPRequestHandler
	21	import socket
2146.1.1 by Alexander Belchenko fixes for test suite: forgotten imports in HttpServer.py	22	import posixpath
2004.1.25 by v.ladeuil+lp at free Shuffle http related test code. Hopefully it ends up at the right place :)	23	import random
	24	import re
	25	import sys
	26	import threading
	27	import time
2146.1.1 by Alexander Belchenko fixes for test suite: forgotten imports in HttpServer.py	28	import urllib
	29	import urlparse
2004.1.25 by v.ladeuil+lp at free Shuffle http related test code. Hopefully it ends up at the right place :)	30
	31	from bzrlib.transport import Server
	32
	33
	34	class WebserverNotAvailable(Exception):
	35	pass
	36
	37
	38	class BadWebserverPath(ValueError):
	39	def __str__(self):
	40	return 'path %s is not in %s' % self.args
	41
	42
	43	class TestingHTTPRequestHandler(SimpleHTTPRequestHandler):
	44
	45	def log_message(self, format, *args):
	46	self.server.test_case.log('webserver - %s - - [%s] %s "%s" "%s"',
	47	self.address_string(),
	48	self.log_date_time_string(),
	49	format % args,
	50	self.headers.get('referer', '-'),
	51	self.headers.get('user-agent', '-'))
	52
	53	def handle_one_request(self):
	54	"""Handle a single HTTP request.
	55
	56	You normally don't need to override this method; see the class
	57	__doc__ string for information on how to handle specific HTTP
	58	commands such as GET and POST.
	59
	60	"""
	61	for i in xrange(1,11): # Don't try more than 10 times
	62	try:
	63	self.raw_requestline = self.rfile.readline()
	64	except socket.error, e:
	65	if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):
	66	# omitted for now because some tests look at the log of
	67	# the server and expect to see no errors. see recent
	68	# email thread. -- mbp 20051021.
	69	## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)
	70	time.sleep(0.01)
	71	continue
	72	raise
	73	else:
	74	break
	75	if not self.raw_requestline:
	76	self.close_connection = 1
	77	return
	78	if not self.parse_request(): # An error code has been sent, just exit
	79	return
	80	mname = 'do_' + self.command
	81	if getattr(self, mname, None) is None:
	82	self.send_error(501, "Unsupported method (%r)" % self.command)
	83	return
	84	method = getattr(self, mname)
	85	method()
	86
	87	_range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')
	88	_tail_regexp = re.compile(r'^-(?P<tail>\d+)$')
	89
	90	def parse_ranges(self, ranges_header):
	91	"""Parse the range header value and returns ranges and tail"""
	92	tail = 0
	93	ranges = []
94	assert ranges_header.startswith('bytes=')
95	ranges_header = ranges_header[len('bytes='):]
96	for range_str in ranges_header.split(','):
97	range_match = self._range_regexp.match(range_str)
98	if range_match is not None:
99	ranges.append((int(range_match.group('start')),
100	int(range_match.group('end'))))
101	else:
102	tail_match = self._tail_regexp.match(range_str)
103	if tail_match is not None:
104	tail = int(tail_match.group('tail'))
105	return tail, ranges
106
107	def send_range_content(self, file, start, length):
108	file.seek(start)
109	self.wfile.write(file.read(length))
110
111	def get_single_range(self, file, file_size, start, end):
112	self.send_response(206)
113	length = end - start + 1
114	self.send_header('Accept-Ranges', 'bytes')
115	self.send_header("Content-Length", "%d" % length)
116
117	self.send_header("Content-Type", 'application/octet-stream')
118	self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
119	end,
120	file_size))
121	self.end_headers()
122	self.send_range_content(file, start, length)
123
124	def get_multiple_ranges(self, file, file_size, ranges):
125	self.send_response(206)
126	self.send_header('Accept-Ranges', 'bytes')
127	boundary = "%d" % random.randint(0,0x7FFFFFFF)
128	self.send_header("Content-Type",
129	"multipart/byteranges; boundary=%s" % boundary)
130	self.end_headers()
131	for (start, end) in ranges:
132	self.wfile.write("--%s\r\n" % boundary)
133	self.send_header("Content-type", 'application/octet-stream')
134	self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
135	end,
136	file_size))
137	self.end_headers()
138	self.send_range_content(file, start, end - start + 1)
139	self.wfile.write("--%s\r\n" % boundary)
140	pass
141
142	def do_GET(self):
143	"""Serve a GET request.
144
145	Handles the Range header.
146	"""
147
148	path = self.translate_path(self.path)
149	ranges_header_value = self.headers.get('Range')
150	if ranges_header_value is None or os.path.isdir(path):
151	# Let the mother class handle most cases
152	return SimpleHTTPRequestHandler.do_GET(self)
153
154	try:
155	# Always read in binary mode. Opening files in text
156	# mode may cause newline translations, making the
157	# actual size of the content transmitted less than
158	# the content-length!
159	file = open(path, 'rb')
160	except IOError:
161	self.send_error(404, "File not found")
162	return None
163
164	file_size = os.fstat(file.fileno())[6]
165	tail, ranges = self.parse_ranges(ranges_header_value)
166	# Normalize tail into ranges
167	if tail != 0:
168	ranges.append((file_size - tail, file_size))
169
170	ranges_valid = True
171	if len(ranges) == 0:
172	ranges_valid = False
173	else:
174	for (start, end) in ranges:
175	if start >= file_size or end >= file_size:
176	ranges_valid = False
177	break
178	if not ranges_valid:
179	# RFC2616 14-16 says that invalid Range headers
180	# should be ignored and in that case, the whole file
181	# should be returned as if no Range header was
182	# present
183	file.close() # Will be reopened by the following call
184	return SimpleHTTPRequestHandler.do_GET(self)
185
186	if len(ranges) == 1:
187	(start, end) = ranges[0]
188	self.get_single_range(file, file_size, start, end)
189	else:
190	self.get_multiple_ranges(file, file_size, ranges)
191	file.close()
192
193	if sys.platform == 'win32':
194	# On win32 you cannot access non-ascii filenames without
195	# decoding them into unicode first.
196	# However, under Linux, you can access bytestream paths
197	# without any problems. If this function was always active
198	# it would probably break tests when LANG=C was set
199	def translate_path(self, path):
200	"""Translate a /-separated PATH to the local filename syntax.
201
202	For bzr, all url paths are considered to be utf8 paths.
203	On Linux, you can access these paths directly over the bytestream
204	request, but on win32, you must decode them, and access them
205	as Unicode files.
206	"""
207	# abandon query parameters
208	path = urlparse.urlparse(path)[2]
209	path = posixpath.normpath(urllib.unquote(path))
210	path = path.decode('utf-8')
211	words = path.split('/')
212	words = filter(None, words)
213	path = os.getcwdu()
214	for word in words:
215	drive, word = os.path.splitdrive(word)
216	head, word = os.path.split(word)
217	if word in (os.curdir, os.pardir): continue
218	path = os.path.join(path, word)
219	return path
220
221
222	class TestingHTTPServer(BaseHTTPServer.HTTPServer):
223	def __init__(self, server_address, RequestHandlerClass, test_case):
224	BaseHTTPServer.HTTPServer.__init__(self, server_address,
225	RequestHandlerClass)
226	self.test_case = test_case
227
228
229	class HttpServer(Server):
230	"""A test server for http transports.
231
232	Subclasses can provide a specific request handler.
233	"""
234
235	# used to form the url that connects to this server
236	_url_protocol = 'http'
237
238	# Subclasses can provide a specific request handler
239	def __init__(self, request_handler=TestingHTTPRequestHandler):
240	Server.__init__(self)
241	self.request_handler = request_handler
242
2004.1.28 by v.ladeuil+lp at free Merge bzr.dev. Including http modifications by "smart" related code	243	def _get_httpd(self):
	244	return TestingHTTPServer(('localhost', 0),
	245	self.request_handler,
	246	self)
	247
2004.1.25 by v.ladeuil+lp at free Shuffle http related test code. Hopefully it ends up at the right place :)	248	def _http_start(self):
	249	httpd = None
2004.1.28 by v.ladeuil+lp at free Merge bzr.dev. Including http modifications by "smart" related code	250	httpd = self._get_httpd()
2004.1.25 by v.ladeuil+lp at free Shuffle http related test code. Hopefully it ends up at the right place :)	251	host, port = httpd.socket.getsockname()
	252	self._http_base_url = '%s://localhost:%s/' % (self._url_protocol, port)
	253	self._http_starting.release()
	254	httpd.socket.settimeout(0.1)
	255
	256	while self._http_running:
	257	try:
	258	httpd.handle_request()
	259	except socket.timeout:
	260	pass
	261
	262	def _get_remote_url(self, path):
	263	path_parts = path.split(os.path.sep)
	264	if os.path.isabs(path):
	265	if path_parts[:len(self._local_path_parts)] != \
	266	self._local_path_parts:
	267	raise BadWebserverPath(path, self.test_dir)
	268	remote_path = '/'.join(path_parts[len(self._local_path_parts):])
	269	else:
	270	remote_path = '/'.join(path_parts)
	271
	272	self._http_starting.acquire()
	273	self._http_starting.release()
	274	return self._http_base_url + remote_path
	275
	276	def log(self, format, *args):
	277	"""Capture Server log output."""
	278	self.logs.append(format % args)
	279
	280	def setUp(self):
	281	"""See bzrlib.transport.Server.setUp."""
	282	self._home_dir = os.getcwdu()
	283	self._local_path_parts = self._home_dir.split(os.path.sep)
	284	self._http_starting = threading.Lock()
	285	self._http_starting.acquire()
	286	self._http_running = True
	287	self._http_base_url = None
	288	self._http_thread = threading.Thread(target=self._http_start)
	289	self._http_thread.setDaemon(True)
	290	self._http_thread.start()
	291	self._http_proxy = os.environ.get("http_proxy")
	292	if self._http_proxy is not None:
	293	del os.environ["http_proxy"]
	294	self.logs = []
	295
	296	def tearDown(self):
	297	"""See bzrlib.transport.Server.tearDown."""
	298	self._http_running = False
	299	self._http_thread.join()
	300	if self._http_proxy is not None:
	301	import os
	302	os.environ["http_proxy"] = self._http_proxy
	303
	304	def get_url(self):
	305	"""See bzrlib.transport.Server.get_url."""
	306	return self._get_remote_url(self._home_dir)
	307
	308	def get_bogus_url(self):
	309	"""See bzrlib.transport.Server.get_bogus_url."""
	310	# this is chosen to try to prevent trouble with proxies, weird dns,
	311	# etc
	312	return 'http://127.0.0.1:1/'
	313
	314
315	class HttpServer_urllib(HttpServer):
316	"""Subclass of HttpServer that gives http+urllib urls.
317
318	This is for use in testing: connections to this server will always go
319	through urllib where possible.
320	"""
321
322	# urls returned by this server should require the urllib client impl
323	_url_protocol = 'http+urllib'
324
325
326	class HttpServer_PyCurl(HttpServer):
327	"""Subclass of HttpServer that gives http+pycurl urls.
328
329	This is for use in testing: connections to this server will always go
330	through pycurl where possible.
331	"""
332
333	# We don't care about checking the pycurl availability as
334	# this server will be required only when pycurl is present
335
336	# urls returned by this server should require the pycurl client impl
337	_url_protocol = 'http+pycurl'