~bzr-pqm/bzr/bzr.dev

2004.1.40 by v.ladeuil+lp at free
Fix the race condition again and correct some small typos to be in
1
# Copyright (C) 2006 Canonical Ltd
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
import BaseHTTPServer
18
import errno
19
import os
20
from SimpleHTTPServer import SimpleHTTPRequestHandler
21
import socket
2146.1.1 by Alexander Belchenko
fixes for test suite: forgotten imports in HttpServer.py
22
import posixpath
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
23
import random
24
import re
25
import sys
26
import threading
27
import time
2146.1.1 by Alexander Belchenko
fixes for test suite: forgotten imports in HttpServer.py
28
import urllib
29
import urlparse
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
30
31
from bzrlib.transport import Server
2381.1.2 by Robert Collins
Fixup the test changes made for hpss to be clean and self contained.
32
from bzrlib.transport.local import LocalURLServer
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
33
34
35
class WebserverNotAvailable(Exception):
36
    pass
37
38
39
class BadWebserverPath(ValueError):
40
    def __str__(self):
41
        return 'path %s is not in %s' % self.args
42
43
44
class TestingHTTPRequestHandler(SimpleHTTPRequestHandler):
2420.1.10 by Vincent Ladeuil
Doc fixes.
45
    """Handles one request.
46
47
    A TestingHTTPRequestHandler is instantiated for every request
2420.1.12 by Vincent Ladeuil
Cometic changes.
48
    received by the associated server.
2420.1.10 by Vincent Ladeuil
Doc fixes.
49
    """
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
50
51
    def log_message(self, format, *args):
2164.2.28 by Vincent Ladeuil
TestingHTTPServer.test_case_server renamed from test_case to avoid confusions.
52
        tcs = self.server.test_case_server
53
        tcs.log('webserver - %s - - [%s] %s "%s" "%s"',
54
                self.address_string(),
55
                self.log_date_time_string(),
56
                format % args,
57
                self.headers.get('referer', '-'),
58
                self.headers.get('user-agent', '-'))
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
59
60
    def handle_one_request(self):
61
        """Handle a single HTTP request.
62
2831.6.1 by Vincent Ladeuil
Remove some more noise from test suite.
63
        We catch all socket errors occurring when the client close the
64
        connection early to avoid polluting the test results.
65
        """
66
        try:
67
            self._handle_one_request()
68
        except socket.error, e:
69
            if (len(e.args) > 0
70
                and e.args[0] in (errno.EPIPE, errno.ECONNRESET,
71
                                  errno.ECONNABORTED,)):
72
                self.close_connection = 1
73
                pass
74
            else:
75
                raise
76
77
    def _handle_one_request(self):
78
        """
79
        Request handling as defined in the base class.
80
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
81
        You normally don't need to override this method; see the class
82
        __doc__ string for information on how to handle specific HTTP
83
        commands such as GET and POST.
84
2831.6.2 by Vincent Ladeuil
Review feeback.
85
        On some platforms, notably OS X, a lot of EAGAIN (resource temporary
2831.6.1 by Vincent Ladeuil
Remove some more noise from test suite.
86
        unavailable) occur. We retry silently at most 10 times.
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
87
        """
88
        for i in xrange(1,11): # Don't try more than 10 times
89
            try:
90
                self.raw_requestline = self.rfile.readline()
91
            except socket.error, e:
92
                if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):
93
                    # omitted for now because some tests look at the log of
94
                    # the server and expect to see no errors.  see recent
95
                    # email thread. -- mbp 20051021. 
96
                    ## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)
97
                    time.sleep(0.01)
98
                    continue
99
                raise
100
            else:
101
                break
102
        if not self.raw_requestline:
103
            self.close_connection = 1
104
            return
105
        if not self.parse_request(): # An error code has been sent, just exit
106
            return
107
        mname = 'do_' + self.command
108
        if getattr(self, mname, None) is None:
109
            self.send_error(501, "Unsupported method (%r)" % self.command)
110
            return
111
        method = getattr(self, mname)
112
        method()
113
114
    _range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')
115
    _tail_regexp = re.compile(r'^-(?P<tail>\d+)$')
116
117
    def parse_ranges(self, ranges_header):
2182.2.1 by v.ladeuil+lp at free
Aaron was right. Thanks to him, the http server RFC2616 compliance
118
        """Parse the range header value and returns ranges and tail.
119
120
        RFC2616 14.35 says that syntactically invalid range
121
        specifiers MUST be ignored. In that case, we return 0 for
122
        tail and [] for ranges.
123
        """
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
124
        tail = 0
125
        ranges = []
2182.2.1 by v.ladeuil+lp at free
Aaron was right. Thanks to him, the http server RFC2616 compliance
126
        if not ranges_header.startswith('bytes='):
127
            # Syntactically invalid header
128
            return 0, []
129
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
130
        ranges_header = ranges_header[len('bytes='):]
131
        for range_str in ranges_header.split(','):
2182.2.1 by v.ladeuil+lp at free
Aaron was right. Thanks to him, the http server RFC2616 compliance
132
            # FIXME: RFC2616 says end is optional and default to file_size
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
133
            range_match = self._range_regexp.match(range_str)
134
            if range_match is not None:
2182.2.2 by v.ladeuil+lp at free
Thanks again to Aaron, the http server RFC2616 compliance
135
                start = int(range_match.group('start'))
136
                end = int(range_match.group('end'))
137
                if start > end:
138
                    # Syntactically invalid range
139
                    return 0, []
140
                ranges.append((start, end))
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
141
            else:
142
                tail_match = self._tail_regexp.match(range_str)
143
                if tail_match is not None:
144
                    tail = int(tail_match.group('tail'))
2182.2.1 by v.ladeuil+lp at free
Aaron was right. Thanks to him, the http server RFC2616 compliance
145
                else:
146
                    # Syntactically invalid range
147
                    return 0, []
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
148
        return tail, ranges
149
150
    def send_range_content(self, file, start, length):
151
        file.seek(start)
152
        self.wfile.write(file.read(length))
153
154
    def get_single_range(self, file, file_size, start, end):
155
        self.send_response(206)
156
        length = end - start + 1
157
        self.send_header('Accept-Ranges', 'bytes')
158
        self.send_header("Content-Length", "%d" % length)
159
160
        self.send_header("Content-Type", 'application/octet-stream')
161
        self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
162
                                                              end,
163
                                                              file_size))
164
        self.end_headers()
165
        self.send_range_content(file, start, length)
166
167
    def get_multiple_ranges(self, file, file_size, ranges):
168
        self.send_response(206)
169
        self.send_header('Accept-Ranges', 'bytes')
170
        boundary = "%d" % random.randint(0,0x7FFFFFFF)
171
        self.send_header("Content-Type",
172
                         "multipart/byteranges; boundary=%s" % boundary)
173
        self.end_headers()
174
        for (start, end) in ranges:
175
            self.wfile.write("--%s\r\n" % boundary)
176
            self.send_header("Content-type", 'application/octet-stream')
177
            self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
178
                                                                  end,
179
                                                                  file_size))
180
            self.end_headers()
181
            self.send_range_content(file, start, end - start + 1)
182
            self.wfile.write("--%s\r\n" % boundary)
183
184
    def do_GET(self):
185
        """Serve a GET request.
186
187
        Handles the Range header.
188
        """
189
190
        path = self.translate_path(self.path)
191
        ranges_header_value = self.headers.get('Range')
192
        if ranges_header_value is None or os.path.isdir(path):
193
            # Let the mother class handle most cases
194
            return SimpleHTTPRequestHandler.do_GET(self)
195
196
        try:
197
            # Always read in binary mode. Opening files in text
198
            # mode may cause newline translations, making the
199
            # actual size of the content transmitted *less* than
200
            # the content-length!
201
            file = open(path, 'rb')
202
        except IOError:
203
            self.send_error(404, "File not found")
2000.3.9 by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :)
204
            return
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
205
206
        file_size = os.fstat(file.fileno())[6]
207
        tail, ranges = self.parse_ranges(ranges_header_value)
208
        # Normalize tail into ranges
209
        if tail != 0:
210
            ranges.append((file_size - tail, file_size))
211
2182.2.2 by v.ladeuil+lp at free
Thanks again to Aaron, the http server RFC2616 compliance
212
        self._satisfiable_ranges = True
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
213
        if len(ranges) == 0:
2182.2.2 by v.ladeuil+lp at free
Thanks again to Aaron, the http server RFC2616 compliance
214
            self._satisfiable_ranges = False
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
215
        else:
2182.2.1 by v.ladeuil+lp at free
Aaron was right. Thanks to him, the http server RFC2616 compliance
216
            def check_range(range_specifier):
217
                start, end = range_specifier
2182.2.2 by v.ladeuil+lp at free
Thanks again to Aaron, the http server RFC2616 compliance
218
                # RFC2616 14.35, ranges are invalid if start >= file_size
219
                if start >= file_size:
220
                    self._satisfiable_ranges = False # Side-effect !
2182.2.1 by v.ladeuil+lp at free
Aaron was right. Thanks to him, the http server RFC2616 compliance
221
                    return 0, 0
222
                # RFC2616 14.35, end values should be truncated
223
                # to file_size -1 if they exceed it
224
                end = min(end, file_size - 1)
225
                return start, end
226
227
            ranges = map(check_range, ranges)
228
2182.2.2 by v.ladeuil+lp at free
Thanks again to Aaron, the http server RFC2616 compliance
229
        if not self._satisfiable_ranges:
2182.2.1 by v.ladeuil+lp at free
Aaron was right. Thanks to him, the http server RFC2616 compliance
230
            # RFC2616 14.16 and 14.35 says that when a server
231
            # encounters unsatisfiable range specifiers, it
232
            # SHOULD return a 416.
2000.3.9 by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :)
233
            file.close()
2182.2.1 by v.ladeuil+lp at free
Aaron was right. Thanks to him, the http server RFC2616 compliance
234
            # FIXME: We SHOULD send a Content-Range header too,
235
            # but the implementation of send_error does not
236
            # allows that. So far.
2000.3.9 by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :)
237
            self.send_error(416, "Requested range not satisfiable")
238
            return
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
239
240
        if len(ranges) == 1:
241
            (start, end) = ranges[0]
242
            self.get_single_range(file, file_size, start, end)
243
        else:
244
            self.get_multiple_ranges(file, file_size, ranges)
245
        file.close()
246
2420.1.9 by Vincent Ladeuil
Refactor proxy and auth test classes. Tests failing for digest auth.
247
    def translate_path(self, path):
248
        """Translate a /-separated PATH to the local filename syntax.
249
250
        If the server requires it, proxy the path before the usual translation
251
        """
252
        if self.server.test_case_server.proxy_requests:
253
            # We need to act as a proxy and accept absolute urls,
254
            # which SimpleHTTPRequestHandler (parent) is not
255
            # ready for. So we just drop the protocol://host:port
256
            # part in front of the request-url (because we know
257
            # we would not forward the request to *another*
258
            # proxy).
259
260
            # So we do what SimpleHTTPRequestHandler.translate_path
261
            # do beginning with python 2.4.3: abandon query
262
            # parameters, scheme, host port, etc (which ensure we
263
            # provide the right behaviour on all python versions).
264
            path = urlparse.urlparse(path)[2]
265
            # And now, we can apply *our* trick to proxy files
266
            path += '-proxied'
267
268
        return self._translate_path(path)
269
270
    def _translate_path(self, path):
271
        return SimpleHTTPRequestHandler.translate_path(self, path)
272
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
273
    if sys.platform == 'win32':
274
        # On win32 you cannot access non-ascii filenames without
275
        # decoding them into unicode first.
276
        # However, under Linux, you can access bytestream paths
277
        # without any problems. If this function was always active
278
        # it would probably break tests when LANG=C was set
2420.1.9 by Vincent Ladeuil
Refactor proxy and auth test classes. Tests failing for digest auth.
279
        def _translate_path(self, path):
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
280
            """Translate a /-separated PATH to the local filename syntax.
281
282
            For bzr, all url paths are considered to be utf8 paths.
283
            On Linux, you can access these paths directly over the bytestream
284
            request, but on win32, you must decode them, and access them
285
            as Unicode files.
286
            """
287
            # abandon query parameters
288
            path = urlparse.urlparse(path)[2]
289
            path = posixpath.normpath(urllib.unquote(path))
290
            path = path.decode('utf-8')
291
            words = path.split('/')
292
            words = filter(None, words)
293
            path = os.getcwdu()
294
            for word in words:
295
                drive, word = os.path.splitdrive(word)
296
                head, word = os.path.split(word)
297
                if word in (os.curdir, os.pardir): continue
298
                path = os.path.join(path, word)
299
            return path
300
301
302
class TestingHTTPServer(BaseHTTPServer.HTTPServer):
2164.2.28 by Vincent Ladeuil
TestingHTTPServer.test_case_server renamed from test_case to avoid confusions.
303
304
    def __init__(self, server_address, RequestHandlerClass,
305
                 test_case_server):
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
306
        BaseHTTPServer.HTTPServer.__init__(self, server_address,
2164.2.28 by Vincent Ladeuil
TestingHTTPServer.test_case_server renamed from test_case to avoid confusions.
307
                                           RequestHandlerClass)
308
        # test_case_server can be used to communicate between the
2164.2.29 by Vincent Ladeuil
Test the http redirection at the request level even if it's not
309
        # tests and the server (or the request handler and the
310
        # server), allowing dynamic behaviors to be defined from
311
        # the tests cases.
2164.2.28 by Vincent Ladeuil
TestingHTTPServer.test_case_server renamed from test_case to avoid confusions.
312
        self.test_case_server = test_case_server
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
313
314
315
class HttpServer(Server):
316
    """A test server for http transports.
317
318
    Subclasses can provide a specific request handler.
319
    """
320
2420.1.9 by Vincent Ladeuil
Refactor proxy and auth test classes. Tests failing for digest auth.
321
    # Whether or not we proxy the requests (see
322
    # TestingHTTPRequestHandler.translate_path).
323
    proxy_requests = False
324
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
325
    # used to form the url that connects to this server
326
    _url_protocol = 'http'
327
328
    # Subclasses can provide a specific request handler
329
    def __init__(self, request_handler=TestingHTTPRequestHandler):
330
        Server.__init__(self)
331
        self.request_handler = request_handler
2164.2.13 by v.ladeuil+lp at free
Add tests for redirection. Preserve transport decorations.
332
        self.host = 'localhost'
333
        self.port = 0
334
        self._httpd = None
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
335
2004.1.28 by v.ladeuil+lp at free
Merge bzr.dev. Including http modifications by "smart" related code
336
    def _get_httpd(self):
2164.2.13 by v.ladeuil+lp at free
Add tests for redirection. Preserve transport decorations.
337
        if self._httpd is None:
338
            self._httpd = TestingHTTPServer((self.host, self.port),
339
                                            self.request_handler,
340
                                            self)
341
            host, self.port = self._httpd.socket.getsockname()
342
        return self._httpd
2004.1.28 by v.ladeuil+lp at free
Merge bzr.dev. Including http modifications by "smart" related code
343
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
344
    def _http_start(self):
2004.1.28 by v.ladeuil+lp at free
Merge bzr.dev. Including http modifications by "smart" related code
345
        httpd = self._get_httpd()
2164.2.13 by v.ladeuil+lp at free
Add tests for redirection. Preserve transport decorations.
346
        self._http_base_url = '%s://%s:%s/' % (self._url_protocol,
347
                                               self.host,
348
                                               self.port)
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
349
        httpd.socket.settimeout(0.1)
2831.2.1 by Vincent Ladeuil
Fix #140614 selftest _http_start noise by respecting semaphores.
350
        self._http_starting.release()
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
351
352
        while self._http_running:
353
            try:
354
                httpd.handle_request()
355
            except socket.timeout:
356
                pass
357
358
    def _get_remote_url(self, path):
359
        path_parts = path.split(os.path.sep)
360
        if os.path.isabs(path):
361
            if path_parts[:len(self._local_path_parts)] != \
362
                   self._local_path_parts:
363
                raise BadWebserverPath(path, self.test_dir)
364
            remote_path = '/'.join(path_parts[len(self._local_path_parts):])
365
        else:
366
            remote_path = '/'.join(path_parts)
367
368
        return self._http_base_url + remote_path
369
370
    def log(self, format, *args):
371
        """Capture Server log output."""
372
        self.logs.append(format % args)
373
2381.1.2 by Robert Collins
Fixup the test changes made for hpss to be clean and self contained.
374
    def setUp(self, backing_transport_server=None):
2018.5.42 by Robert Collins
Various hopefully improvements, but wsgi is broken, handing over to spiv :).
375
        """See bzrlib.transport.Server.setUp.
376
        
2381.1.2 by Robert Collins
Fixup the test changes made for hpss to be clean and self contained.
377
        :param backing_transport_server: The transport that requests over this
2018.5.42 by Robert Collins
Various hopefully improvements, but wsgi is broken, handing over to spiv :).
378
            protocol should be forwarded to. Note that this is currently not
2381.1.2 by Robert Collins
Fixup the test changes made for hpss to be clean and self contained.
379
            supported for HTTP.
2018.5.42 by Robert Collins
Various hopefully improvements, but wsgi is broken, handing over to spiv :).
380
        """
2381.1.2 by Robert Collins
Fixup the test changes made for hpss to be clean and self contained.
381
        # XXX: TODO: make the server back onto vfs_server rather than local
382
        # disk.
383
        assert backing_transport_server is None or \
384
            isinstance(backing_transport_server, LocalURLServer), \
385
            "HTTPServer currently assumes local transport, got %s" % \
386
            backing_transport_server
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
387
        self._home_dir = os.getcwdu()
388
        self._local_path_parts = self._home_dir.split(os.path.sep)
389
        self._http_starting = threading.Lock()
390
        self._http_starting.acquire()
391
        self._http_running = True
392
        self._http_base_url = None
393
        self._http_thread = threading.Thread(target=self._http_start)
394
        self._http_thread.setDaemon(True)
2167.3.5 by v.ladeuil+lp at free
Tests for proxies, covering #74759.
395
        self._http_thread.start()
2167.3.6 by v.ladeuil+lp at free
Take John's comments into account and add more tests.
396
        # Wait for the server thread to start (i.e release the lock)
397
        self._http_starting.acquire()
398
        self._http_starting.release()
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
399
        self.logs = []
400
401
    def tearDown(self):
402
        """See bzrlib.transport.Server.tearDown."""
2825.1.1 by Vincent Ladeuil
Fix #140055 by properly closing the http and ftp test servers.
403
        self._httpd.server_close()
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
404
        self._http_running = False
405
        self._http_thread.join()
406
407
    def get_url(self):
408
        """See bzrlib.transport.Server.get_url."""
409
        return self._get_remote_url(self._home_dir)
410
411
    def get_bogus_url(self):
412
        """See bzrlib.transport.Server.get_bogus_url."""
413
        # this is chosen to try to prevent trouble with proxies, weird dns,
414
        # etc
415
        return 'http://127.0.0.1:1/'
416
417
418
class HttpServer_urllib(HttpServer):
419
    """Subclass of HttpServer that gives http+urllib urls.
420
421
    This is for use in testing: connections to this server will always go
422
    through urllib where possible.
423
    """
424
425
    # urls returned by this server should require the urllib client impl
426
    _url_protocol = 'http+urllib'
427
428
429
class HttpServer_PyCurl(HttpServer):
430
    """Subclass of HttpServer that gives http+pycurl urls.
431
432
    This is for use in testing: connections to this server will always go
433
    through pycurl where possible.
434
    """
435
436
    # We don't care about checking the pycurl availability as
437
    # this server will be required only when pycurl is present
438
439
    # urls returned by this server should require the pycurl client impl
440
    _url_protocol = 'http+pycurl'