~bzr-pqm/bzr/bzr.dev

3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
1
# Copyright (C) 2006, 2007 Canonical Ltd
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
import BaseHTTPServer
18
import errno
19
import os
20
from SimpleHTTPServer import SimpleHTTPRequestHandler
21
import socket
2146.1.1 by Alexander Belchenko
fixes for test suite: forgotten imports in HttpServer.py
22
import posixpath
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
23
import random
24
import re
25
import sys
26
import threading
27
import time
2146.1.1 by Alexander Belchenko
fixes for test suite: forgotten imports in HttpServer.py
28
import urllib
29
import urlparse
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
30
31
from bzrlib.transport import Server
2381.1.2 by Robert Collins
Fixup the test changes made for hpss to be clean and self contained.
32
from bzrlib.transport.local import LocalURLServer
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
33
34
35
class WebserverNotAvailable(Exception):
36
    pass
37
38
39
class BadWebserverPath(ValueError):
40
    def __str__(self):
41
        return 'path %s is not in %s' % self.args
42
43
44
class TestingHTTPRequestHandler(SimpleHTTPRequestHandler):
2420.1.10 by Vincent Ladeuil
Doc fixes.
45
    """Handles one request.
46
47
    A TestingHTTPRequestHandler is instantiated for every request
2420.1.12 by Vincent Ladeuil
Cometic changes.
48
    received by the associated server.
2420.1.10 by Vincent Ladeuil
Doc fixes.
49
    """
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
50
51
    def log_message(self, format, *args):
2164.2.28 by Vincent Ladeuil
TestingHTTPServer.test_case_server renamed from test_case to avoid confusions.
52
        tcs = self.server.test_case_server
53
        tcs.log('webserver - %s - - [%s] %s "%s" "%s"',
54
                self.address_string(),
55
                self.log_date_time_string(),
56
                format % args,
57
                self.headers.get('referer', '-'),
58
                self.headers.get('user-agent', '-'))
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
59
60
    def handle_one_request(self):
61
        """Handle a single HTTP request.
62
2831.6.1 by Vincent Ladeuil
Remove some more noise from test suite.
63
        We catch all socket errors occurring when the client close the
64
        connection early to avoid polluting the test results.
65
        """
66
        try:
2953.2.1 by Vincent Ladeuil
Fix #158972 by not using timeout for HttpServer.
67
            SimpleHTTPRequestHandler.handle_one_request(self)
2831.6.1 by Vincent Ladeuil
Remove some more noise from test suite.
68
        except socket.error, e:
69
            if (len(e.args) > 0
70
                and e.args[0] in (errno.EPIPE, errno.ECONNRESET,
71
                                  errno.ECONNABORTED,)):
72
                self.close_connection = 1
73
                pass
74
            else:
75
                raise
76
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
77
    _range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')
78
    _tail_regexp = re.compile(r'^-(?P<tail>\d+)$')
79
80
    def parse_ranges(self, ranges_header):
2182.2.1 by v.ladeuil+lp at free
Aaron was right. Thanks to him, the http server RFC2616 compliance
81
        """Parse the range header value and returns ranges and tail.
82
83
        RFC2616 14.35 says that syntactically invalid range
84
        specifiers MUST be ignored. In that case, we return 0 for
85
        tail and [] for ranges.
86
        """
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
87
        tail = 0
88
        ranges = []
2182.2.1 by v.ladeuil+lp at free
Aaron was right. Thanks to him, the http server RFC2616 compliance
89
        if not ranges_header.startswith('bytes='):
90
            # Syntactically invalid header
91
            return 0, []
92
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
93
        ranges_header = ranges_header[len('bytes='):]
94
        for range_str in ranges_header.split(','):
2182.2.1 by v.ladeuil+lp at free
Aaron was right. Thanks to him, the http server RFC2616 compliance
95
            # FIXME: RFC2616 says end is optional and default to file_size
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
96
            range_match = self._range_regexp.match(range_str)
97
            if range_match is not None:
2182.2.2 by v.ladeuil+lp at free
Thanks again to Aaron, the http server RFC2616 compliance
98
                start = int(range_match.group('start'))
99
                end = int(range_match.group('end'))
100
                if start > end:
101
                    # Syntactically invalid range
102
                    return 0, []
103
                ranges.append((start, end))
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
104
            else:
105
                tail_match = self._tail_regexp.match(range_str)
106
                if tail_match is not None:
107
                    tail = int(tail_match.group('tail'))
2182.2.1 by v.ladeuil+lp at free
Aaron was right. Thanks to him, the http server RFC2616 compliance
108
                else:
109
                    # Syntactically invalid range
110
                    return 0, []
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
111
        return tail, ranges
112
113
    def send_range_content(self, file, start, length):
114
        file.seek(start)
115
        self.wfile.write(file.read(length))
116
117
    def get_single_range(self, file, file_size, start, end):
118
        self.send_response(206)
119
        length = end - start + 1
120
        self.send_header('Accept-Ranges', 'bytes')
121
        self.send_header("Content-Length", "%d" % length)
122
123
        self.send_header("Content-Type", 'application/octet-stream')
124
        self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
125
                                                              end,
126
                                                              file_size))
127
        self.end_headers()
128
        self.send_range_content(file, start, length)
129
130
    def get_multiple_ranges(self, file, file_size, ranges):
131
        self.send_response(206)
132
        self.send_header('Accept-Ranges', 'bytes')
133
        boundary = "%d" % random.randint(0,0x7FFFFFFF)
134
        self.send_header("Content-Type",
135
                         "multipart/byteranges; boundary=%s" % boundary)
136
        self.end_headers()
137
        for (start, end) in ranges:
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
138
            self.wfile.write("--%s\r\n" % boundary)
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
139
            self.send_header("Content-type", 'application/octet-stream')
140
            self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
141
                                                                  end,
142
                                                                  file_size))
143
            self.end_headers()
144
            self.send_range_content(file, start, end - start + 1)
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
145
        # Final boundary
146
        self.wfile.write("--%s\r\n" % boundary)
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
147
148
    def do_GET(self):
149
        """Serve a GET request.
150
151
        Handles the Range header.
152
        """
3052.3.2 by Vincent Ladeuil
Add tests and fix trivial bugs and other typos.
153
        # Update statistics
154
        self.server.test_case_server.GET_request_nb += 1
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
155
156
        path = self.translate_path(self.path)
157
        ranges_header_value = self.headers.get('Range')
158
        if ranges_header_value is None or os.path.isdir(path):
159
            # Let the mother class handle most cases
160
            return SimpleHTTPRequestHandler.do_GET(self)
161
162
        try:
163
            # Always read in binary mode. Opening files in text
164
            # mode may cause newline translations, making the
165
            # actual size of the content transmitted *less* than
166
            # the content-length!
167
            file = open(path, 'rb')
168
        except IOError:
169
            self.send_error(404, "File not found")
2000.3.9 by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :)
170
            return
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
171
172
        file_size = os.fstat(file.fileno())[6]
173
        tail, ranges = self.parse_ranges(ranges_header_value)
174
        # Normalize tail into ranges
175
        if tail != 0:
176
            ranges.append((file_size - tail, file_size))
177
2182.2.2 by v.ladeuil+lp at free
Thanks again to Aaron, the http server RFC2616 compliance
178
        self._satisfiable_ranges = True
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
179
        if len(ranges) == 0:
2182.2.2 by v.ladeuil+lp at free
Thanks again to Aaron, the http server RFC2616 compliance
180
            self._satisfiable_ranges = False
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
181
        else:
2182.2.1 by v.ladeuil+lp at free
Aaron was right. Thanks to him, the http server RFC2616 compliance
182
            def check_range(range_specifier):
183
                start, end = range_specifier
2182.2.2 by v.ladeuil+lp at free
Thanks again to Aaron, the http server RFC2616 compliance
184
                # RFC2616 14.35, ranges are invalid if start >= file_size
185
                if start >= file_size:
186
                    self._satisfiable_ranges = False # Side-effect !
2182.2.1 by v.ladeuil+lp at free
Aaron was right. Thanks to him, the http server RFC2616 compliance
187
                    return 0, 0
188
                # RFC2616 14.35, end values should be truncated
189
                # to file_size -1 if they exceed it
190
                end = min(end, file_size - 1)
191
                return start, end
192
193
            ranges = map(check_range, ranges)
194
2182.2.2 by v.ladeuil+lp at free
Thanks again to Aaron, the http server RFC2616 compliance
195
        if not self._satisfiable_ranges:
2182.2.1 by v.ladeuil+lp at free
Aaron was right. Thanks to him, the http server RFC2616 compliance
196
            # RFC2616 14.16 and 14.35 says that when a server
197
            # encounters unsatisfiable range specifiers, it
198
            # SHOULD return a 416.
2000.3.9 by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :)
199
            file.close()
2182.2.1 by v.ladeuil+lp at free
Aaron was right. Thanks to him, the http server RFC2616 compliance
200
            # FIXME: We SHOULD send a Content-Range header too,
201
            # but the implementation of send_error does not
202
            # allows that. So far.
2000.3.9 by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :)
203
            self.send_error(416, "Requested range not satisfiable")
204
            return
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
205
206
        if len(ranges) == 1:
207
            (start, end) = ranges[0]
208
            self.get_single_range(file, file_size, start, end)
209
        else:
210
            self.get_multiple_ranges(file, file_size, ranges)
211
        file.close()
212
2420.1.9 by Vincent Ladeuil
Refactor proxy and auth test classes. Tests failing for digest auth.
213
    def translate_path(self, path):
214
        """Translate a /-separated PATH to the local filename syntax.
215
216
        If the server requires it, proxy the path before the usual translation
217
        """
218
        if self.server.test_case_server.proxy_requests:
219
            # We need to act as a proxy and accept absolute urls,
220
            # which SimpleHTTPRequestHandler (parent) is not
221
            # ready for. So we just drop the protocol://host:port
222
            # part in front of the request-url (because we know
223
            # we would not forward the request to *another*
224
            # proxy).
225
226
            # So we do what SimpleHTTPRequestHandler.translate_path
227
            # do beginning with python 2.4.3: abandon query
228
            # parameters, scheme, host port, etc (which ensure we
229
            # provide the right behaviour on all python versions).
230
            path = urlparse.urlparse(path)[2]
231
            # And now, we can apply *our* trick to proxy files
232
            path += '-proxied'
233
234
        return self._translate_path(path)
235
236
    def _translate_path(self, path):
237
        return SimpleHTTPRequestHandler.translate_path(self, path)
238
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
239
    if sys.platform == 'win32':
240
        # On win32 you cannot access non-ascii filenames without
241
        # decoding them into unicode first.
242
        # However, under Linux, you can access bytestream paths
243
        # without any problems. If this function was always active
244
        # it would probably break tests when LANG=C was set
2420.1.9 by Vincent Ladeuil
Refactor proxy and auth test classes. Tests failing for digest auth.
245
        def _translate_path(self, path):
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
246
            """Translate a /-separated PATH to the local filename syntax.
247
248
            For bzr, all url paths are considered to be utf8 paths.
249
            On Linux, you can access these paths directly over the bytestream
250
            request, but on win32, you must decode them, and access them
251
            as Unicode files.
252
            """
253
            # abandon query parameters
254
            path = urlparse.urlparse(path)[2]
255
            path = posixpath.normpath(urllib.unquote(path))
256
            path = path.decode('utf-8')
257
            words = path.split('/')
258
            words = filter(None, words)
259
            path = os.getcwdu()
260
            for word in words:
261
                drive, word = os.path.splitdrive(word)
262
                head, word = os.path.split(word)
263
                if word in (os.curdir, os.pardir): continue
264
                path = os.path.join(path, word)
265
            return path
266
267
268
class TestingHTTPServer(BaseHTTPServer.HTTPServer):
2164.2.28 by Vincent Ladeuil
TestingHTTPServer.test_case_server renamed from test_case to avoid confusions.
269
270
    def __init__(self, server_address, RequestHandlerClass,
271
                 test_case_server):
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
272
        BaseHTTPServer.HTTPServer.__init__(self, server_address,
2164.2.28 by Vincent Ladeuil
TestingHTTPServer.test_case_server renamed from test_case to avoid confusions.
273
                                           RequestHandlerClass)
274
        # test_case_server can be used to communicate between the
2164.2.29 by Vincent Ladeuil
Test the http redirection at the request level even if it's not
275
        # tests and the server (or the request handler and the
276
        # server), allowing dynamic behaviors to be defined from
277
        # the tests cases.
2164.2.28 by Vincent Ladeuil
TestingHTTPServer.test_case_server renamed from test_case to avoid confusions.
278
        self.test_case_server = test_case_server
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
279
2953.2.1 by Vincent Ladeuil
Fix #158972 by not using timeout for HttpServer.
280
    def server_close(self):
281
        """Called to clean-up the server.
282
283
        Since the server may be in a blocking read, we shutdown the socket
284
        before closing it.
285
        """
286
        self.socket.shutdown(socket.SHUT_RDWR)
287
        BaseHTTPServer.HTTPServer.server_close(self)
288
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
289
290
class HttpServer(Server):
291
    """A test server for http transports.
292
293
    Subclasses can provide a specific request handler.
294
    """
295
2420.1.9 by Vincent Ladeuil
Refactor proxy and auth test classes. Tests failing for digest auth.
296
    # Whether or not we proxy the requests (see
297
    # TestingHTTPRequestHandler.translate_path).
298
    proxy_requests = False
299
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
300
    # used to form the url that connects to this server
301
    _url_protocol = 'http'
302
303
    # Subclasses can provide a specific request handler
304
    def __init__(self, request_handler=TestingHTTPRequestHandler):
305
        Server.__init__(self)
306
        self.request_handler = request_handler
2164.2.13 by v.ladeuil+lp at free
Add tests for redirection. Preserve transport decorations.
307
        self.host = 'localhost'
308
        self.port = 0
309
        self._httpd = None
3052.3.2 by Vincent Ladeuil
Add tests and fix trivial bugs and other typos.
310
        # Allows tests to verify number of GET requests issued
311
        self.GET_request_nb = 0
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
312
2004.1.28 by v.ladeuil+lp at free
Merge bzr.dev. Including http modifications by "smart" related code
313
    def _get_httpd(self):
2164.2.13 by v.ladeuil+lp at free
Add tests for redirection. Preserve transport decorations.
314
        if self._httpd is None:
315
            self._httpd = TestingHTTPServer((self.host, self.port),
316
                                            self.request_handler,
317
                                            self)
318
            host, self.port = self._httpd.socket.getsockname()
319
        return self._httpd
2004.1.28 by v.ladeuil+lp at free
Merge bzr.dev. Including http modifications by "smart" related code
320
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
321
    def _http_start(self):
2004.1.28 by v.ladeuil+lp at free
Merge bzr.dev. Including http modifications by "smart" related code
322
        httpd = self._get_httpd()
2164.2.13 by v.ladeuil+lp at free
Add tests for redirection. Preserve transport decorations.
323
        self._http_base_url = '%s://%s:%s/' % (self._url_protocol,
324
                                               self.host,
325
                                               self.port)
2831.2.1 by Vincent Ladeuil
Fix #140614 selftest _http_start noise by respecting semaphores.
326
        self._http_starting.release()
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
327
328
        while self._http_running:
329
            try:
330
                httpd.handle_request()
331
            except socket.timeout:
332
                pass
333
334
    def _get_remote_url(self, path):
335
        path_parts = path.split(os.path.sep)
336
        if os.path.isabs(path):
337
            if path_parts[:len(self._local_path_parts)] != \
338
                   self._local_path_parts:
339
                raise BadWebserverPath(path, self.test_dir)
340
            remote_path = '/'.join(path_parts[len(self._local_path_parts):])
341
        else:
342
            remote_path = '/'.join(path_parts)
343
344
        return self._http_base_url + remote_path
345
346
    def log(self, format, *args):
347
        """Capture Server log output."""
348
        self.logs.append(format % args)
349
2381.1.2 by Robert Collins
Fixup the test changes made for hpss to be clean and self contained.
350
    def setUp(self, backing_transport_server=None):
2018.5.42 by Robert Collins
Various hopefully improvements, but wsgi is broken, handing over to spiv :).
351
        """See bzrlib.transport.Server.setUp.
352
        
2381.1.2 by Robert Collins
Fixup the test changes made for hpss to be clean and self contained.
353
        :param backing_transport_server: The transport that requests over this
2018.5.42 by Robert Collins
Various hopefully improvements, but wsgi is broken, handing over to spiv :).
354
            protocol should be forwarded to. Note that this is currently not
2381.1.2 by Robert Collins
Fixup the test changes made for hpss to be clean and self contained.
355
            supported for HTTP.
2018.5.42 by Robert Collins
Various hopefully improvements, but wsgi is broken, handing over to spiv :).
356
        """
2381.1.2 by Robert Collins
Fixup the test changes made for hpss to be clean and self contained.
357
        # XXX: TODO: make the server back onto vfs_server rather than local
358
        # disk.
359
        assert backing_transport_server is None or \
360
            isinstance(backing_transport_server, LocalURLServer), \
361
            "HTTPServer currently assumes local transport, got %s" % \
362
            backing_transport_server
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
363
        self._home_dir = os.getcwdu()
364
        self._local_path_parts = self._home_dir.split(os.path.sep)
365
        self._http_starting = threading.Lock()
366
        self._http_starting.acquire()
367
        self._http_running = True
368
        self._http_base_url = None
369
        self._http_thread = threading.Thread(target=self._http_start)
370
        self._http_thread.setDaemon(True)
2167.3.5 by v.ladeuil+lp at free
Tests for proxies, covering #74759.
371
        self._http_thread.start()
2167.3.6 by v.ladeuil+lp at free
Take John's comments into account and add more tests.
372
        # Wait for the server thread to start (i.e release the lock)
373
        self._http_starting.acquire()
374
        self._http_starting.release()
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
375
        self.logs = []
376
377
    def tearDown(self):
378
        """See bzrlib.transport.Server.tearDown."""
2825.1.1 by Vincent Ladeuil
Fix #140055 by properly closing the http and ftp test servers.
379
        self._httpd.server_close()
2004.1.25 by v.ladeuil+lp at free
Shuffle http related test code. Hopefully it ends up at the right place :)
380
        self._http_running = False
381
        self._http_thread.join()
382
383
    def get_url(self):
384
        """See bzrlib.transport.Server.get_url."""
385
        return self._get_remote_url(self._home_dir)
386
387
    def get_bogus_url(self):
388
        """See bzrlib.transport.Server.get_bogus_url."""
389
        # this is chosen to try to prevent trouble with proxies, weird dns,
390
        # etc
391
        return 'http://127.0.0.1:1/'
392
393
394
class HttpServer_urllib(HttpServer):
395
    """Subclass of HttpServer that gives http+urllib urls.
396
397
    This is for use in testing: connections to this server will always go
398
    through urllib where possible.
399
    """
400
401
    # urls returned by this server should require the urllib client impl
402
    _url_protocol = 'http+urllib'
403
404
405
class HttpServer_PyCurl(HttpServer):
406
    """Subclass of HttpServer that gives http+pycurl urls.
407
408
    This is for use in testing: connections to this server will always go
409
    through pycurl where possible.
410
    """
411
412
    # We don't care about checking the pycurl availability as
413
    # this server will be required only when pycurl is present
414
415
    # urls returned by this server should require the pycurl client impl
416
    _url_protocol = 'http+pycurl'