~bzr-pqm/bzr/bzr.dev

2004.1.40 by v.ladeuil+lp at free
Fix the race condition again and correct some small typos to be in
1
# Copyright (C) 2006 Canonical Ltd
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
import BaseHTTPServer
18
import errno
19
import os
20
from SimpleHTTPServer import SimpleHTTPRequestHandler
21
import socket
2146.1.1 by Alexander Belchenko
fixes for test suite: forgotten imports in HttpServer.py
22
import posixpath
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
23
import random
24
import re
25
import sys
26
import threading
27
import time
2146.1.1 by Alexander Belchenko
fixes for test suite: forgotten imports in HttpServer.py
28
import urllib
29
import urlparse
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
30
31
from bzrlib.transport import Server
2381.1.2 by Robert Collins
Fixup the test changes made for hpss to be clean and self contained.
32
from bzrlib.transport.local import LocalURLServer
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
33
34
35
class WebserverNotAvailable(Exception):
36
    pass
37
38
39
class BadWebserverPath(ValueError):
40
    def __str__(self):
41
        return 'path %s is not in %s' % self.args
42
43
44
class TestingHTTPRequestHandler(SimpleHTTPRequestHandler):
2420.1.10 by Vincent Ladeuil
Doc fixes.
45
    """Handles one request.
46
47
    A TestingHTTPRequestHandler is instantiated for every request
2420.1.12 by Vincent Ladeuil
Cometic changes.
48
    received by the associated server.
2420.1.10 by Vincent Ladeuil
Doc fixes.
49
    """
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
50
51
    def log_message(self, format, *args):
2164.2.28 by Vincent Ladeuil
TestingHTTPServer.test_case_server renamed from test_case to avoid confusions.
52
        tcs = self.server.test_case_server
53
        tcs.log('webserver - %s - - [%s] %s "%s" "%s"',
54
                self.address_string(),
55
                self.log_date_time_string(),
56
                format % args,
57
                self.headers.get('referer', '-'),
58
                self.headers.get('user-agent', '-'))
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
59
60
    def handle_one_request(self):
61
        """Handle a single HTTP request.
62
63
        You normally don't need to override this method; see the class
64
        __doc__ string for information on how to handle specific HTTP
65
        commands such as GET and POST.
66
67
        """
68
        for i in xrange(1,11): # Don't try more than 10 times
69
            try:
70
                self.raw_requestline = self.rfile.readline()
71
            except socket.error, e:
72
                if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):
73
                    # omitted for now because some tests look at the log of
74
                    # the server and expect to see no errors.  see recent
75
                    # email thread. -- mbp 20051021. 
76
                    ## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)
77
                    time.sleep(0.01)
78
                    continue
79
                raise
80
            else:
81
                break
82
        if not self.raw_requestline:
83
            self.close_connection = 1
84
            return
85
        if not self.parse_request(): # An error code has been sent, just exit
86
            return
87
        mname = 'do_' + self.command
88
        if getattr(self, mname, None) is None:
89
            self.send_error(501, "Unsupported method (%r)" % self.command)
90
            return
91
        method = getattr(self, mname)
92
        method()
93
94
    _range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')
95
    _tail_regexp = re.compile(r'^-(?P<tail>\d+)$')
96
97
    def parse_ranges(self, ranges_header):
2182.2.1 by v.ladeuil+lp at free
Aaron was right. Thanks to him, the http server RFC2616 compliance
98
        """Parse the range header value and returns ranges and tail.
99
100
        RFC2616 14.35 says that syntactically invalid range
101
        specifiers MUST be ignored. In that case, we return 0 for
102
        tail and [] for ranges.
103
        """
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
104
        tail = 0
105
        ranges = []
2182.2.1 by v.ladeuil+lp at free
Aaron was right. Thanks to him, the http server RFC2616 compliance
106
        if not ranges_header.startswith('bytes='):
107
            # Syntactically invalid header
108
            return 0, []
109
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
110
        ranges_header = ranges_header[len('bytes='):]
111
        for range_str in ranges_header.split(','):
2182.2.1 by v.ladeuil+lp at free
Aaron was right. Thanks to him, the http server RFC2616 compliance
112
            # FIXME: RFC2616 says end is optional and default to file_size
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
113
            range_match = self._range_regexp.match(range_str)
114
            if range_match is not None:
2182.2.2 by v.ladeuil+lp at free
Thanks again to Aaron, the http server RFC2616 compliance
115
                start = int(range_match.group('start'))
116
                end = int(range_match.group('end'))
117
                if start > end:
118
                    # Syntactically invalid range
119
                    return 0, []
120
                ranges.append((start, end))
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
121
            else:
122
                tail_match = self._tail_regexp.match(range_str)
123
                if tail_match is not None:
124
                    tail = int(tail_match.group('tail'))
2182.2.1 by v.ladeuil+lp at free
Aaron was right. Thanks to him, the http server RFC2616 compliance
125
                else:
126
                    # Syntactically invalid range
127
                    return 0, []
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
128
        return tail, ranges
129
130
    def send_range_content(self, file, start, length):
131
        file.seek(start)
132
        self.wfile.write(file.read(length))
133
134
    def get_single_range(self, file, file_size, start, end):
135
        self.send_response(206)
136
        length = end - start + 1
137
        self.send_header('Accept-Ranges', 'bytes')
138
        self.send_header("Content-Length", "%d" % length)
139
140
        self.send_header("Content-Type", 'application/octet-stream')
141
        self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
142
                                                              end,
143
                                                              file_size))
144
        self.end_headers()
145
        self.send_range_content(file, start, length)
146
147
    def get_multiple_ranges(self, file, file_size, ranges):
148
        self.send_response(206)
149
        self.send_header('Accept-Ranges', 'bytes')
150
        boundary = "%d" % random.randint(0,0x7FFFFFFF)
151
        self.send_header("Content-Type",
152
                         "multipart/byteranges; boundary=%s" % boundary)
153
        self.end_headers()
154
        for (start, end) in ranges:
155
            self.wfile.write("--%s\r\n" % boundary)
156
            self.send_header("Content-type", 'application/octet-stream')
157
            self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
158
                                                                  end,
159
                                                                  file_size))
160
            self.end_headers()
161
            self.send_range_content(file, start, end - start + 1)
162
            self.wfile.write("--%s\r\n" % boundary)
163
164
    def do_GET(self):
165
        """Serve a GET request.
166
167
        Handles the Range header.
168
        """
169
170
        path = self.translate_path(self.path)
171
        ranges_header_value = self.headers.get('Range')
172
        if ranges_header_value is None or os.path.isdir(path):
173
            # Let the mother class handle most cases
174
            return SimpleHTTPRequestHandler.do_GET(self)
175
176
        try:
177
            # Always read in binary mode. Opening files in text
178
            # mode may cause newline translations, making the
179
            # actual size of the content transmitted *less* than
180
            # the content-length!
181
            file = open(path, 'rb')
182
        except IOError:
183
            self.send_error(404, "File not found")
2000.3.9 by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :)
184
            return
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
185
186
        file_size = os.fstat(file.fileno())[6]
187
        tail, ranges = self.parse_ranges(ranges_header_value)
188
        # Normalize tail into ranges
189
        if tail != 0:
190
            ranges.append((file_size - tail, file_size))
191
2182.2.2 by v.ladeuil+lp at free
Thanks again to Aaron, the http server RFC2616 compliance
192
        self._satisfiable_ranges = True
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
193
        if len(ranges) == 0:
2182.2.2 by v.ladeuil+lp at free
Thanks again to Aaron, the http server RFC2616 compliance
194
            self._satisfiable_ranges = False
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
195
        else:
2182.2.1 by v.ladeuil+lp at free
Aaron was right. Thanks to him, the http server RFC2616 compliance
196
            def check_range(range_specifier):
197
                start, end = range_specifier
2182.2.2 by v.ladeuil+lp at free
Thanks again to Aaron, the http server RFC2616 compliance
198
                # RFC2616 14.35, ranges are invalid if start >= file_size
199
                if start >= file_size:
200
                    self._satisfiable_ranges = False # Side-effect !
2182.2.1 by v.ladeuil+lp at free
Aaron was right. Thanks to him, the http server RFC2616 compliance
201
                    return 0, 0
202
                # RFC2616 14.35, end values should be truncated
203
                # to file_size -1 if they exceed it
204
                end = min(end, file_size - 1)
205
                return start, end
206
207
            ranges = map(check_range, ranges)
208
2182.2.2 by v.ladeuil+lp at free
Thanks again to Aaron, the http server RFC2616 compliance
209
        if not self._satisfiable_ranges:
2182.2.1 by v.ladeuil+lp at free
Aaron was right. Thanks to him, the http server RFC2616 compliance
210
            # RFC2616 14.16 and 14.35 says that when a server
211
            # encounters unsatisfiable range specifiers, it
212
            # SHOULD return a 416.
2000.3.9 by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :)
213
            file.close()
2182.2.1 by v.ladeuil+lp at free
Aaron was right. Thanks to him, the http server RFC2616 compliance
214
            # FIXME: We SHOULD send a Content-Range header too,
215
            # but the implementation of send_error does not
216
            # allows that. So far.
2000.3.9 by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :)
217
            self.send_error(416, "Requested range not satisfiable")
218
            return
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
219
220
        if len(ranges) == 1:
221
            (start, end) = ranges[0]
222
            self.get_single_range(file, file_size, start, end)
223
        else:
224
            self.get_multiple_ranges(file, file_size, ranges)
225
        file.close()
226
2420.1.9 by Vincent Ladeuil
Refactor proxy and auth test classes. Tests failing for digest auth.
227
    def translate_path(self, path):
228
        """Translate a /-separated PATH to the local filename syntax.
229
230
        If the server requires it, proxy the path before the usual translation
231
        """
232
        if self.server.test_case_server.proxy_requests:
233
            # We need to act as a proxy and accept absolute urls,
234
            # which SimpleHTTPRequestHandler (parent) is not
235
            # ready for. So we just drop the protocol://host:port
236
            # part in front of the request-url (because we know
237
            # we would not forward the request to *another*
238
            # proxy).
239
240
            # So we do what SimpleHTTPRequestHandler.translate_path
241
            # do beginning with python 2.4.3: abandon query
242
            # parameters, scheme, host port, etc (which ensure we
243
            # provide the right behaviour on all python versions).
244
            path = urlparse.urlparse(path)[2]
245
            # And now, we can apply *our* trick to proxy files
246
            path += '-proxied'
247
248
        return self._translate_path(path)
249
250
    def _translate_path(self, path):
251
        return SimpleHTTPRequestHandler.translate_path(self, path)
252
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
253
    if sys.platform == 'win32':
254
        # On win32 you cannot access non-ascii filenames without
255
        # decoding them into unicode first.
256
        # However, under Linux, you can access bytestream paths
257
        # without any problems. If this function was always active
258
        # it would probably break tests when LANG=C was set
2420.1.9 by Vincent Ladeuil
Refactor proxy and auth test classes. Tests failing for digest auth.
259
        def _translate_path(self, path):
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
260
            """Translate a /-separated PATH to the local filename syntax.
261
262
            For bzr, all url paths are considered to be utf8 paths.
263
            On Linux, you can access these paths directly over the bytestream
264
            request, but on win32, you must decode them, and access them
265
            as Unicode files.
266
            """
267
            # abandon query parameters
268
            path = urlparse.urlparse(path)[2]
269
            path = posixpath.normpath(urllib.unquote(path))
270
            path = path.decode('utf-8')
271
            words = path.split('/')
272
            words = filter(None, words)
273
            path = os.getcwdu()
274
            for word in words:
275
                drive, word = os.path.splitdrive(word)
276
                head, word = os.path.split(word)
277
                if word in (os.curdir, os.pardir): continue
278
                path = os.path.join(path, word)
279
            return path
280
281
282
class TestingHTTPServer(BaseHTTPServer.HTTPServer):
2164.2.28 by Vincent Ladeuil
TestingHTTPServer.test_case_server renamed from test_case to avoid confusions.
283
284
    def __init__(self, server_address, RequestHandlerClass,
285
                 test_case_server):
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
286
        BaseHTTPServer.HTTPServer.__init__(self, server_address,
2164.2.28 by Vincent Ladeuil
TestingHTTPServer.test_case_server renamed from test_case to avoid confusions.
287
                                           RequestHandlerClass)
288
        # test_case_server can be used to communicate between the
2164.2.29 by Vincent Ladeuil
Test the http redirection at the request level even if it's not
289
        # tests and the server (or the request handler and the
290
        # server), allowing dynamic behaviors to be defined from
291
        # the tests cases.
2164.2.28 by Vincent Ladeuil
TestingHTTPServer.test_case_server renamed from test_case to avoid confusions.
292
        self.test_case_server = test_case_server
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
293
294
295
class HttpServer(Server):
296
    """A test server for http transports.
297
298
    Subclasses can provide a specific request handler.
299
    """
300
2420.1.9 by Vincent Ladeuil
Refactor proxy and auth test classes. Tests failing for digest auth.
301
    # Whether or not we proxy the requests (see
302
    # TestingHTTPRequestHandler.translate_path).
303
    proxy_requests = False
304
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
305
    # used to form the url that connects to this server
306
    _url_protocol = 'http'
307
308
    # Subclasses can provide a specific request handler
309
    def __init__(self, request_handler=TestingHTTPRequestHandler):
310
        Server.__init__(self)
311
        self.request_handler = request_handler
2164.2.13 by v.ladeuil+lp at free
Add tests for redirection. Preserve transport decorations.
312
        self.host = 'localhost'
313
        self.port = 0
314
        self._httpd = None
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
315
2004.1.28 by v.ladeuil+lp at free
Merge bzr.dev. Including http modifications by "smart" related code
316
    def _get_httpd(self):
2164.2.13 by v.ladeuil+lp at free
Add tests for redirection. Preserve transport decorations.
317
        if self._httpd is None:
318
            self._httpd = TestingHTTPServer((self.host, self.port),
319
                                            self.request_handler,
320
                                            self)
321
            host, self.port = self._httpd.socket.getsockname()
322
        return self._httpd
2004.1.28 by v.ladeuil+lp at free
Merge bzr.dev. Including http modifications by "smart" related code
323
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
324
    def _http_start(self):
2004.1.28 by v.ladeuil+lp at free
Merge bzr.dev. Including http modifications by "smart" related code
325
        httpd = self._get_httpd()
2164.2.13 by v.ladeuil+lp at free
Add tests for redirection. Preserve transport decorations.
326
        self._http_base_url = '%s://%s:%s/' % (self._url_protocol,
327
                                               self.host,
328
                                               self.port)
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
329
        self._http_starting.release()
330
        httpd.socket.settimeout(0.1)
331
332
        while self._http_running:
333
            try:
334
                httpd.handle_request()
335
            except socket.timeout:
336
                pass
337
338
    def _get_remote_url(self, path):
339
        path_parts = path.split(os.path.sep)
340
        if os.path.isabs(path):
341
            if path_parts[:len(self._local_path_parts)] != \
342
                   self._local_path_parts:
343
                raise BadWebserverPath(path, self.test_dir)
344
            remote_path = '/'.join(path_parts[len(self._local_path_parts):])
345
        else:
346
            remote_path = '/'.join(path_parts)
347
348
        return self._http_base_url + remote_path
349
350
    def log(self, format, *args):
351
        """Capture Server log output."""
352
        self.logs.append(format % args)
353
2381.1.2 by Robert Collins
Fixup the test changes made for hpss to be clean and self contained.
354
    def setUp(self, backing_transport_server=None):
2018.5.42 by Robert Collins
Various hopefully improvements, but wsgi is broken, handing over to spiv :).
355
        """See bzrlib.transport.Server.setUp.
356
        
2381.1.2 by Robert Collins
Fixup the test changes made for hpss to be clean and self contained.
357
        :param backing_transport_server: The transport that requests over this
2018.5.42 by Robert Collins
Various hopefully improvements, but wsgi is broken, handing over to spiv :).
358
            protocol should be forwarded to. Note that this is currently not
2381.1.2 by Robert Collins
Fixup the test changes made for hpss to be clean and self contained.
359
            supported for HTTP.
2018.5.42 by Robert Collins
Various hopefully improvements, but wsgi is broken, handing over to spiv :).
360
        """
2381.1.2 by Robert Collins
Fixup the test changes made for hpss to be clean and self contained.
361
        # XXX: TODO: make the server back onto vfs_server rather than local
362
        # disk.
363
        assert backing_transport_server is None or \
364
            isinstance(backing_transport_server, LocalURLServer), \
365
            "HTTPServer currently assumes local transport, got %s" % \
366
            backing_transport_server
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
367
        self._home_dir = os.getcwdu()
368
        self._local_path_parts = self._home_dir.split(os.path.sep)
369
        self._http_starting = threading.Lock()
370
        self._http_starting.acquire()
371
        self._http_running = True
372
        self._http_base_url = None
373
        self._http_thread = threading.Thread(target=self._http_start)
374
        self._http_thread.setDaemon(True)
2167.3.5 by v.ladeuil+lp at free
Tests for proxies, covering #74759.
375
        self._http_thread.start()
2167.3.6 by v.ladeuil+lp at free
Take John's comments into account and add more tests.
376
        # Wait for the server thread to start (i.e release the lock)
377
        self._http_starting.acquire()
378
        self._http_starting.release()
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
379
        self.logs = []
380
381
    def tearDown(self):
382
        """See bzrlib.transport.Server.tearDown."""
383
        self._http_running = False
384
        self._http_thread.join()
385
386
    def get_url(self):
387
        """See bzrlib.transport.Server.get_url."""
388
        return self._get_remote_url(self._home_dir)
389
390
    def get_bogus_url(self):
391
        """See bzrlib.transport.Server.get_bogus_url."""
392
        # this is chosen to try to prevent trouble with proxies, weird dns,
393
        # etc
394
        return 'http://127.0.0.1:1/'
395
396
397
class HttpServer_urllib(HttpServer):
398
    """Subclass of HttpServer that gives http+urllib urls.
399
400
    This is for use in testing: connections to this server will always go
401
    through urllib where possible.
402
    """
403
404
    # urls returned by this server should require the urllib client impl
405
    _url_protocol = 'http+urllib'
406
407
408
class HttpServer_PyCurl(HttpServer):
409
    """Subclass of HttpServer that gives http+pycurl urls.
410
411
    This is for use in testing: connections to this server will always go
412
    through pycurl where possible.
413
    """
414
415
    # We don't care about checking the pycurl availability as
416
    # this server will be required only when pycurl is present
417
418
    # urls returned by this server should require the pycurl client impl
419
    _url_protocol = 'http+pycurl'