~bzr-pqm/bzr/bzr.dev

3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
1
# Copyright (C) 2005, 2006, 2007 Canonical Ltd
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
17
"""Tests from HTTP response parsing.
18
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
19
The handle_response method read the response body of a GET request an returns
20
the corresponding RangeFile.
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
21
22
There are four different kinds of RangeFile:
23
- a whole file whose size is unknown, seen as a simple byte stream,
24
- a whole file whose size is known, we can't read past its end,
25
- a single range file, a part of a file with a start and a size,
26
- a multiple range file, several consecutive parts with known start offset
27
  and size.
28
29
Some properties are common to all kinds:
30
- seek can only be forward (its really a socket underneath),
31
- read can't cross ranges,
32
- successive ranges are taken into account transparently,
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
33
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
34
- the expected pattern of use is either seek(offset)+read(size) or a single
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
35
  read with no size specified. For multiple range files, multiple read() will
36
  return the corresponding ranges, trying to read further will raise
37
  InvalidHttpResponse.
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
38
"""
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
39
40
from cStringIO import StringIO
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
41
import httplib
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
42
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
43
from bzrlib import (
44
    errors,
45
    tests,
46
    )
3104.3.4 by Vincent Ladeuil
Add test.
47
from bzrlib.transport.http import (
48
    response,
49
    _urllib2_wrappers,
50
    )
3408.6.1 by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while
51
from bzrlib.tests.file_utils import (
52
    FakeReadFile,
53
    )
3104.3.4 by Vincent Ladeuil
Add test.
54
55
56
class ReadSocket(object):
57
    """A socket-like object that can be given a predefined content."""
58
59
    def __init__(self, data):
60
        self.readfile = StringIO(data)
61
62
    def makefile(self, mode='r', bufsize=None):
63
        return self.readfile
64
3408.6.1 by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while
65
3104.3.4 by Vincent Ladeuil
Add test.
66
class FakeHTTPConnection(_urllib2_wrappers.HTTPConnection):
67
68
    def __init__(self, sock):
69
        _urllib2_wrappers.HTTPConnection.__init__(self, 'localhost')
70
        # Set the socket to bypass the connection
71
        self.sock = sock
72
73
    def send(self, str):
74
        """Ignores the writes on the socket."""
75
        pass
76
77
78
class TestHTTPConnection(tests.TestCase):
79
80
    def test_cleanup_pipe(self):
81
        sock = ReadSocket("""HTTP/1.1 200 OK\r
82
Content-Type: text/plain; charset=UTF-8\r
83
Content-Length: 18
84
\r
85
0123456789
86
garbage""")
87
        conn = FakeHTTPConnection(sock)
88
        # Simulate the request sending so that the connection will be able to
89
        # read the response.
90
        conn.putrequest('GET', 'http://localhost/fictious')
91
        conn.endheaders()
92
        # Now, get the response
93
        resp = conn.getresponse()
94
        # Read part of the response
95
        self.assertEquals('0123456789\n', resp.read(11))
96
        # Override the thresold to force the warning emission
97
        conn._range_warning_thresold = 6 # There are 7 bytes pending
98
        conn.cleanup_pipe()
99
        self.assertContainsRe(self._get_log(keep_log_file=True),
100
                              'Got a 200 response when asking')
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
101
102
103
class TestRangeFileMixin(object):
104
    """Tests for accessing the first range in a RangeFile."""
105
106
    # A simple string used to represent a file part (also called a range), in
107
    # which offsets are easy to calculate for test writers. It's used as a
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
108
    # building block with slight variations but basically 'a' is the first char
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
109
    # of the range and 'z' is the last.
110
    alpha = 'abcdefghijklmnopqrstuvwxyz'
111
112
    def test_can_read_at_first_access(self):
113
        """Test that the just created file can be read."""
114
        self.assertEquals(self.alpha, self._file.read())
115
116
    def test_seek_read(self):
117
        """Test seek/read inside the range."""
118
        f = self._file
119
        start = self.first_range_start
120
        # Before any use, tell() should be at the range start
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
121
        self.assertEquals(start, f.tell())
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
122
        cur = start # For an overall offset assertion
123
        f.seek(start + 3)
124
        cur += 3
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
125
        self.assertEquals('def', f.read(3))
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
126
        cur += len('def')
127
        f.seek(4, 1)
128
        cur += 4
129
        self.assertEquals('klmn', f.read(4))
130
        cur += len('klmn')
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
131
        # read(0) in the middle of a range
132
        self.assertEquals('', f.read(0))
133
        # seek in place
134
        here = f.tell()
135
        f.seek(0, 1)
136
        self.assertEquals(here, f.tell())
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
137
        self.assertEquals(cur, f.tell())
138
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
139
    def test_read_zero(self):
140
        f = self._file
141
        start = self.first_range_start
142
        self.assertEquals('', f.read(0))
143
        f.seek(10, 1)
144
        self.assertEquals('', f.read(0))
145
146
    def test_seek_at_range_end(self):
147
        f = self._file
148
        f.seek(26, 1)
149
150
    def test_read_at_range_end(self):
151
        """Test read behaviour at range end."""
152
        f = self._file
153
        self.assertEquals(self.alpha, f.read())
154
        self.assertEquals('', f.read(0))
155
        self.assertRaises(errors.InvalidRange, f.read, 1)
156
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
157
    def test_unbounded_read_after_seek(self):
158
        f = self._file
159
        f.seek(24, 1)
160
        # Should not cross ranges
161
        self.assertEquals('yz', f.read())
162
163
    def test_seek_backwards(self):
164
        f = self._file
165
        start = self.first_range_start
166
        f.seek(start)
167
        f.read(12)
168
        self.assertRaises(errors.InvalidRange, f.seek, start + 5)
169
170
    def test_seek_outside_single_range(self):
171
        f = self._file
172
        if f._size == -1 or f._boundary is not None:
173
            raise tests.TestNotApplicable('Needs a fully defined range')
174
        # Will seek past the range and then errors out
175
        self.assertRaises(errors.InvalidRange,
176
                          f.seek, self.first_range_start + 27)
177
178
    def test_read_past_end_of_range(self):
179
        f = self._file
180
        if f._size == -1:
181
            raise tests.TestNotApplicable("Can't check an unknown size")
182
        start = self.first_range_start
183
        f.seek(start + 20)
184
        self.assertRaises(errors.InvalidRange, f.read, 10)
185
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
186
    def test_seek_from_end(self):
187
       """Test seeking from the end of the file.
188
189
       The semantic is unclear in case of multiple ranges. Seeking from end
190
       exists only for the http transports, cannot be used if the file size is
191
       unknown and is not used in bzrlib itself. This test must be (and is)
192
       overridden by daughter classes.
193
194
       Reading from end makes sense only when a range has been requested from
195
       the end of the file (see HttpTransportBase._get() when using the
196
       'tail_amount' parameter). The HTTP response can only be a whole file or
197
       a single range.
198
       """
199
       f = self._file
200
       f.seek(-2, 2)
201
       self.assertEquals('yz', f.read())
202
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
203
204
class TestRangeFileSizeUnknown(tests.TestCase, TestRangeFileMixin):
205
    """Test a RangeFile for a whole file whose size is not known."""
206
207
    def setUp(self):
208
        super(TestRangeFileSizeUnknown, self).setUp()
209
        self._file = response.RangeFile('Whole_file_size_known',
210
                                        StringIO(self.alpha))
211
        # We define no range, relying on RangeFile to provide default values
212
        self.first_range_start = 0 # It's the whole file
213
214
    def test_seek_from_end(self):
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
215
        """See TestRangeFileMixin.test_seek_from_end.
216
217
        The end of the file can't be determined since the size is unknown.
218
        """
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
219
        self.assertRaises(errors.InvalidRange, self._file.seek, -1, 2)
220
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
221
    def test_read_at_range_end(self):
222
        """Test read behaviour at range end."""
223
        f = self._file
224
        self.assertEquals(self.alpha, f.read())
225
        self.assertEquals('', f.read(0))
226
        self.assertEquals('', f.read(1))
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
227
3537.1.1 by Vincent Ladeuil
Fix some more PEP8isms and delete useless import
228
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
229
class TestRangeFileSizeKnown(tests.TestCase, TestRangeFileMixin):
230
    """Test a RangeFile for a whole file whose size is known."""
231
232
    def setUp(self):
233
        super(TestRangeFileSizeKnown, self).setUp()
234
        self._file = response.RangeFile('Whole_file_size_known',
235
                                        StringIO(self.alpha))
236
        self._file.set_range(0, len(self.alpha))
237
        self.first_range_start = 0 # It's the whole file
238
239
240
class TestRangeFileSingleRange(tests.TestCase, TestRangeFileMixin):
241
    """Test a RangeFile for a single range."""
242
243
    def setUp(self):
244
        super(TestRangeFileSingleRange, self).setUp()
245
        self._file = response.RangeFile('Single_range_file',
246
                                        StringIO(self.alpha))
247
        self.first_range_start = 15
248
        self._file.set_range(self.first_range_start, len(self.alpha))
249
250
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
251
    def test_read_before_range(self):
252
        # This can't occur under normal circumstances, we have to force it
253
        f = self._file
254
        f._pos = 0 # Force an invalid pos
255
        self.assertRaises(errors.InvalidRange, f.read, 2)
256
3537.1.1 by Vincent Ladeuil
Fix some more PEP8isms and delete useless import
257
3146.3.2 by Vincent Ladeuil
Fix #179368 by keeping the current range hint on ShortReadvErrors.
258
class TestRangeFileMultipleRanges(tests.TestCase, TestRangeFileMixin):
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
259
    """Test a RangeFile for multiple ranges.
260
261
    The RangeFile used for the tests contains three ranges:
262
263
    - at offset 25: alpha
264
    - at offset 100: alpha
265
    - at offset 126: alpha.upper()
266
267
    The two last ranges are contiguous. This only rarely occurs (should not in
268
    fact) in real uses but may lead to hard to track bugs.
269
    """
3535.1.4 by adwi2
Changes as suggested by Mr Ladeuil.
270
271
    # The following is used to represent the boundary paramter defined
272
    # in HTTP response headers and the boundary lines that separate
273
    # multipart content.
274
275
    boundary = "separation"
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
276
277
    def setUp(self):
3146.3.2 by Vincent Ladeuil
Fix #179368 by keeping the current range hint on ShortReadvErrors.
278
        super(TestRangeFileMultipleRanges, self).setUp()
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
279
3535.1.4 by adwi2
Changes as suggested by Mr Ladeuil.
280
        boundary = self.boundary
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
281
282
        content = ''
283
        self.first_range_start = 25
284
        file_size = 200 # big enough to encompass all ranges
285
        for (start, part) in [(self.first_range_start, self.alpha),
286
                              # Two contiguous ranges
287
                              (100, self.alpha),
288
                              (126, self.alpha.upper())]:
289
            content += self._multipart_byterange(part, start, boundary,
290
                                                 file_size)
291
        # Final boundary
3535.1.4 by adwi2
Changes as suggested by Mr Ladeuil.
292
        content += self._boundary_line()
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
293
294
        self._file = response.RangeFile('Multiple_ranges_file',
295
                                        StringIO(content))
3535.1.4 by adwi2
Changes as suggested by Mr Ladeuil.
296
        self.set_file_boundary()
297
298
    def _boundary_line(self):
299
        """Helper to build the formatted boundary line."""
300
        return '--' + self.boundary + '\r\n'
301
302
    def set_file_boundary(self):
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
303
        # Ranges are set by decoding the range headers, the RangeFile user is
304
        # supposed to call the following before using seek or read since it
305
        # requires knowing the *response* headers (in that case the boundary
306
        # which is part of the Content-Type header).
3535.1.4 by adwi2
Changes as suggested by Mr Ladeuil.
307
        self._file.set_boundary(self.boundary)
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
308
309
    def _multipart_byterange(self, data, offset, boundary, file_size='*'):
310
        """Encode a part of a file as a multipart/byterange MIME type.
311
312
        When a range request is issued, the HTTP response body can be
313
        decomposed in parts, each one representing a range (start, size) in a
314
        file.
315
316
        :param data: The payload.
317
        :param offset: where data starts in the file
318
        :param boundary: used to separate the parts
319
        :param file_size: the size of the file containing the range (default to
320
            '*' meaning unknown)
321
322
        :return: a string containing the data encoded as it will appear in the
323
            HTTP response body.
324
        """
3535.1.4 by adwi2
Changes as suggested by Mr Ladeuil.
325
        bline = self._boundary_line()
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
326
        # Each range begins with a boundary line
327
        range = bline
328
        # A range is described by a set of headers, but only 'Content-Range' is
329
        # required for our implementation (TestHandleResponse below will
330
        # exercise ranges with multiple or missing headers')
331
        range += 'Content-Range: bytes %d-%d/%d\r\n' % (offset,
332
                                                        offset+len(data)-1,
333
                                                        file_size)
334
        range += '\r\n'
335
        # Finally the raw bytes
336
        range += data
337
        return range
338
339
    def test_read_all_ranges(self):
340
        f = self._file
341
        self.assertEquals(self.alpha, f.read()) # Read first range
342
        f.seek(100) # Trigger the second range recognition
343
        self.assertEquals(self.alpha, f.read()) # Read second range
344
        self.assertEquals(126, f.tell())
345
        f.seek(126) # Start of third range which is also the current pos !
346
        self.assertEquals('A', f.read(1))
347
        f.seek(10, 1)
348
        self.assertEquals('LMN', f.read(3))
349
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
350
    def test_seek_from_end(self):
351
        """See TestRangeFileMixin.test_seek_from_end."""
352
        # The actual implementation will seek from end for the first range only
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
353
        # and then fail. Since seeking from end is intended to be used for a
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
354
        # single range only anyway, this test just document the actual
355
        # behaviour.
356
        f = self._file
357
        f.seek(-2, 2)
358
        self.assertEquals('yz', f.read())
359
        self.assertRaises(errors.InvalidRange, f.seek, -2, 2)
360
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
361
    def test_seek_into_void(self):
362
        f = self._file
363
        start = self.first_range_start
364
        f.seek(start)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
365
        # Seeking to a point between two ranges is possible (only once) but
366
        # reading there is forbidden
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
367
        f.seek(start + 40)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
368
        # We crossed a range boundary, so now the file is positioned at the
369
        # start of the new range (i.e. trying to seek below 100 will error out)
370
        f.seek(100)
3059.2.7 by Vincent Ladeuil
Allow pycurl users to watch the blinkenlights and fix a bug when ranges are contiguous.
371
        f.seek(125)
372
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
373
    def test_seek_across_ranges(self):
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
374
        f = self._file
375
        start = self.first_range_start
376
        f.seek(126) # skip the two first ranges
377
        self.assertEquals('AB', f.read(2))
378
3146.3.2 by Vincent Ladeuil
Fix #179368 by keeping the current range hint on ShortReadvErrors.
379
    def test_checked_read_dont_overflow_buffers(self):
380
        f = self._file
381
        start = self.first_range_start
382
        # We force a very low value to exercise all code paths in _checked_read
383
        f._discarded_buf_size = 8
384
        f.seek(126) # skip the two first ranges
385
        self.assertEquals('AB', f.read(2))
386
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
387
    def test_seek_twice_between_ranges(self):
388
        f = self._file
389
        start = self.first_range_start
390
        f.seek(start + 40) # Past the first range but before the second
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
391
        # Now the file is positioned at the second range start (100)
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
392
        self.assertRaises(errors.InvalidRange, f.seek, start + 41)
393
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
394
    def test_seek_at_range_end(self):
395
        """Test seek behavior at range end."""
396
        f = self._file
397
        f.seek(25 + 25)
398
        f.seek(100 + 25)
399
        f.seek(126 + 25)
400
401
    def test_read_at_range_end(self):
402
        f = self._file
403
        self.assertEquals(self.alpha, f.read())
404
        self.assertEquals(self.alpha, f.read())
405
        self.assertEquals(self.alpha.upper(), f.read())
406
        self.assertRaises(errors.InvalidHttpResponse, f.read, 1)
407
3537.1.1 by Vincent Ladeuil
Fix some more PEP8isms and delete useless import
408
3535.1.1 by Adrian Wilkins
Made the behaviour of the existing multi-range test more like the real thing by
409
class TestRangeFileMultipleRangesQuotedBoundaries(TestRangeFileMultipleRanges):
410
    """Perform the same tests as TestRangeFileMultipleRanges, but uses 
411
    an angle-bracket quoted boundary string like IIS 6.0 and 7.0
3535.1.4 by adwi2
Changes as suggested by Mr Ladeuil.
412
    (but not IIS 5, which breaks the RFC in a different way
413
    by using square brackets, not angle brackets)
3535.1.1 by Adrian Wilkins
Made the behaviour of the existing multi-range test more like the real thing by
414
    
415
    This reveals a bug caused by 
416
    
3535.1.4 by adwi2
Changes as suggested by Mr Ladeuil.
417
    - The bad implementation of RFC 822 unquoting in Python (angles are not 
418
      quotes), coupled with 
419
420
    - The bad implementation of RFC 2046 in IIS (angles are not permitted chars
421
      in boundary lines).
422
 
3535.1.1 by Adrian Wilkins
Made the behaviour of the existing multi-range test more like the real thing by
423
    """
3535.1.4 by adwi2
Changes as suggested by Mr Ladeuil.
424
    # The boundary as it appears in boundary lines
425
    # IIS 6 and 7 use this value
426
    _boundary_trimmed = "q1w2e3r4t5y6u7i8o9p0zaxscdvfbgnhmjklkl"
427
    boundary = '<' + _boundary_trimmed + '>'
428
429
    def set_file_boundary(self):
430
        # Emulate broken rfc822.unquote() here by removing angles
431
        self._file.set_boundary(self._boundary_trimmed)
3537.1.1 by Vincent Ladeuil
Fix some more PEP8isms and delete useless import
432
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
433
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
434
class TestRangeFileVarious(tests.TestCase):
435
    """Tests RangeFile aspects not covered elsewhere."""
436
437
    def test_seek_whence(self):
438
        """Test the seek whence parameter values."""
439
        f = response.RangeFile('foo', StringIO('abc'))
440
        f.set_range(0, 3)
441
        f.seek(0)
442
        f.seek(1, 1)
443
        f.seek(-1, 2)
444
        self.assertRaises(ValueError, f.seek, 0, 14)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
445
446
    def test_range_syntax(self):
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
447
        """Test the Content-Range scanning."""
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
448
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
449
        f = response.RangeFile('foo', StringIO())
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
450
451
        def ok(expected, header_value):
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
452
            f.set_range_from_header(header_value)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
453
            # Slightly peek under the covers to get the size
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
454
            self.assertEquals(expected, (f.tell(), f._size))
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
455
456
        ok((1, 10), 'bytes 1-10/11')
457
        ok((1, 10), 'bytes 1-10/*')
458
        ok((12, 2), '\tbytes 12-13/*')
459
        ok((28, 1), '  bytes 28-28/*')
460
        ok((2123, 2120), 'bytes  2123-4242/12310')
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
461
        ok((1, 10), 'bytes 1-10/ttt') # We don't check total (ttt)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
462
463
        def nok(header_value):
464
            self.assertRaises(errors.InvalidHttpRange,
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
465
                              f.set_range_from_header, header_value)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
466
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
467
        nok('bytes 10-2/3')
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
468
        nok('chars 1-2/3')
469
        nok('bytes xx-yyy/zzz')
470
        nok('bytes xx-12/zzz')
471
        nok('bytes 11-yy/zzz')
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
472
        nok('bytes10-2/3')
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
473
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
474
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
475
# Taken from real request responses
1786.1.26 by John Arbash Meinel
Update and test handle_response.
476
_full_text_response = (200, """HTTP/1.1 200 OK\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
477
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
478
Server: Apache/2.0.54 (Fedora)\r
479
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
480
ETag: "56691-23-38e9ae00"\r
481
Accept-Ranges: bytes\r
482
Content-Length: 35\r
483
Connection: close\r
484
Content-Type: text/plain; charset=UTF-8\r
485
\r
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
486
""", """Bazaar-NG meta directory, format 1
487
""")
488
489
1786.1.26 by John Arbash Meinel
Update and test handle_response.
490
_single_range_response = (206, """HTTP/1.1 206 Partial Content\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
491
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
492
Server: Apache/2.0.54 (Fedora)\r
493
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
494
ETag: "238a3c-16ec2-805c5540"\r
495
Accept-Ranges: bytes\r
496
Content-Length: 100\r
1786.1.26 by John Arbash Meinel
Update and test handle_response.
497
Content-Range: bytes 100-199/93890\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
498
Connection: close\r
499
Content-Type: text/plain; charset=UTF-8\r
500
\r
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
501
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
1786.1.26 by John Arbash Meinel
Update and test handle_response.
502
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
503
504
2070.1.1 by John Arbash Meinel
Fix bug #62473 by not requiring content-type in range responses
505
_single_range_no_content_type = (206, """HTTP/1.1 206 Partial Content\r
506
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
507
Server: Apache/2.0.54 (Fedora)\r
508
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
509
ETag: "238a3c-16ec2-805c5540"\r
510
Accept-Ranges: bytes\r
511
Content-Length: 100\r
512
Content-Range: bytes 100-199/93890\r
513
Connection: close\r
514
\r
515
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
516
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
517
518
1786.1.26 by John Arbash Meinel
Update and test handle_response.
519
_multipart_range_response = (206, """HTTP/1.1 206 Partial Content\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
520
Date: Tue, 11 Jul 2006 04:49:48 GMT\r
521
Server: Apache/2.0.54 (Fedora)\r
522
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
523
ETag: "238a3c-16ec2-805c5540"\r
524
Accept-Ranges: bytes\r
525
Content-Length: 1534\r
526
Connection: close\r
527
Content-Type: multipart/byteranges; boundary=418470f848b63279b\r
528
\r
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
529
\r""", """--418470f848b63279b\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
530
Content-type: text/plain; charset=UTF-8\r
531
Content-range: bytes 0-254/93890\r
532
\r
533
mbp@sourcefrog.net-20050309040815-13242001617e4a06
534
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e7627
535
mbp@sourcefrog.net-20050309040957-6cad07f466bb0bb8
536
mbp@sourcefrog.net-20050309041501-c840e09071de3b67
537
mbp@sourcefrog.net-20050309044615-c24a3250be83220a
538
\r
539
--418470f848b63279b\r
540
Content-type: text/plain; charset=UTF-8\r
541
Content-range: bytes 1000-2049/93890\r
542
\r
543
40-fd4ec249b6b139ab
544
mbp@sourcefrog.net-20050311063625-07858525021f270b
545
mbp@sourcefrog.net-20050311231934-aa3776aff5200bb9
546
mbp@sourcefrog.net-20050311231953-73aeb3a131c3699a
547
mbp@sourcefrog.net-20050311232353-f5e33da490872c6a
548
mbp@sourcefrog.net-20050312071639-0a8f59a34a024ff0
549
mbp@sourcefrog.net-20050312073432-b2c16a55e0d6e9fb
550
mbp@sourcefrog.net-20050312073831-a47c3335ece1920f
551
mbp@sourcefrog.net-20050312085412-13373aa129ccbad3
552
mbp@sourcefrog.net-20050313052251-2bf004cb96b39933
553
mbp@sourcefrog.net-20050313052856-3edd84094687cb11
554
mbp@sourcefrog.net-20050313053233-e30a4f28aef48f9d
555
mbp@sourcefrog.net-20050313053853-7c64085594ff3072
556
mbp@sourcefrog.net-20050313054757-a86c3f5871069e22
557
mbp@sourcefrog.net-20050313061422-418f1f73b94879b9
558
mbp@sourcefrog.net-20050313120651-497bd231b19df600
559
mbp@sourcefrog.net-20050314024931-eae0170ef25a5d1a
560
mbp@sourcefrog.net-20050314025438-d52099f915fe65fc
561
mbp@sourcefrog.net-20050314025539-637a636692c055cf
562
mbp@sourcefrog.net-20050314025737-55eb441f430ab4ba
563
mbp@sourcefrog.net-20050314025901-d74aa93bb7ee8f62
564
mbp@source\r
1979.1.1 by John Arbash Meinel
Fix bug #57723, parse boundary="" correctly, since Squid uses it
565
--418470f848b63279b--\r
566
""")
567
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
568
1979.1.1 by John Arbash Meinel
Fix bug #57723, parse boundary="" correctly, since Squid uses it
569
_multipart_squid_range_response = (206, """HTTP/1.0 206 Partial Content\r
570
Date: Thu, 31 Aug 2006 21:16:22 GMT\r
571
Server: Apache/2.2.2 (Unix) DAV/2\r
572
Last-Modified: Thu, 31 Aug 2006 17:57:06 GMT\r
573
Accept-Ranges: bytes\r
574
Content-Type: multipart/byteranges; boundary="squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196"\r
575
Content-Length: 598\r
576
X-Cache: MISS from localhost.localdomain\r
577
X-Cache-Lookup: HIT from localhost.localdomain:3128\r
578
Proxy-Connection: keep-alive\r
579
\r
580
""",
581
"""\r
582
--squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196\r
583
Content-Type: text/plain\r
584
Content-Range: bytes 0-99/18672\r
585
\r
586
# bzr knit index 8
587
588
scott@netsplit.com-20050708230047-47c7868f276b939f fulltext 0 863  :
589
scott@netsp\r
590
--squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196\r
591
Content-Type: text/plain\r
592
Content-Range: bytes 300-499/18672\r
593
\r
594
com-20050708231537-2b124b835395399a :
595
scott@netsplit.com-20050820234126-551311dbb7435b51 line-delta 1803 479 .scott@netsplit.com-20050820232911-dc4322a084eadf7e :
596
scott@netsplit.com-20050821213706-c86\r
597
--squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196--\r
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
598
""")
599
600
1786.1.26 by John Arbash Meinel
Update and test handle_response.
601
# This is made up
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
602
_full_text_response_no_content_type = (200, """HTTP/1.1 200 OK\r
603
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
604
Server: Apache/2.0.54 (Fedora)\r
605
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
606
ETag: "56691-23-38e9ae00"\r
607
Accept-Ranges: bytes\r
608
Content-Length: 35\r
609
Connection: close\r
610
\r
611
""", """Bazaar-NG meta directory, format 1
612
""")
613
614
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
615
_full_text_response_no_content_length = (200, """HTTP/1.1 200 OK\r
616
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
617
Server: Apache/2.0.54 (Fedora)\r
618
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
619
ETag: "56691-23-38e9ae00"\r
620
Accept-Ranges: bytes\r
621
Connection: close\r
622
Content-Type: text/plain; charset=UTF-8\r
623
\r
624
""", """Bazaar-NG meta directory, format 1
625
""")
626
627
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
628
_single_range_no_content_range = (206, """HTTP/1.1 206 Partial Content\r
629
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
630
Server: Apache/2.0.54 (Fedora)\r
631
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
632
ETag: "238a3c-16ec2-805c5540"\r
633
Accept-Ranges: bytes\r
634
Content-Length: 100\r
635
Connection: close\r
636
\r
637
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
638
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
639
640
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
641
_single_range_response_truncated = (206, """HTTP/1.1 206 Partial Content\r
642
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
643
Server: Apache/2.0.54 (Fedora)\r
644
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
645
ETag: "238a3c-16ec2-805c5540"\r
646
Accept-Ranges: bytes\r
647
Content-Length: 100\r
648
Content-Range: bytes 100-199/93890\r
649
Connection: close\r
650
Content-Type: text/plain; charset=UTF-8\r
651
\r
652
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06""")
653
654
1786.1.26 by John Arbash Meinel
Update and test handle_response.
655
_invalid_response = (444, """HTTP/1.1 444 Bad Response\r
656
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
657
Connection: close\r
658
Content-Type: text/html; charset=iso-8859-1\r
659
\r
660
""", """<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
661
<html><head>
662
<title>404 Not Found</title>
663
</head><body>
664
<h1>Not Found</h1>
665
<p>I don't know what I'm doing</p>
666
<hr>
667
</body></html>
668
""")
669
670
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
671
_multipart_no_content_range = (206, """HTTP/1.0 206 Partial Content\r
672
Content-Type: multipart/byteranges; boundary=THIS_SEPARATES\r
673
Content-Length: 598\r
674
\r
675
""",
676
"""\r
677
--THIS_SEPARATES\r
678
Content-Type: text/plain\r
679
\r
680
# bzr knit index 8
681
--THIS_SEPARATES\r
682
""")
683
684
685
_multipart_no_boundary = (206, """HTTP/1.0 206 Partial Content\r
686
Content-Type: multipart/byteranges; boundary=THIS_SEPARATES\r
687
Content-Length: 598\r
688
\r
689
""",
690
"""\r
691
--THIS_SEPARATES\r
692
Content-Type: text/plain\r
693
Content-Range: bytes 0-18/18672\r
694
\r
695
# bzr knit index 8
696
697
The range ended at the line above, this text is garbage instead of a boundary
698
line
699
""")
700
701
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
702
class TestHandleResponse(tests.TestCase):
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
703
704
    def _build_HTTPMessage(self, raw_headers):
705
        status_and_headers = StringIO(raw_headers)
3059.2.11 by Vincent Ladeuil
Fix typos mentioned by spiv.
706
        # Get rid of the status line
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
707
        status_and_headers.readline()
708
        msg = httplib.HTTPMessage(status_and_headers)
709
        return msg
710
1786.1.26 by John Arbash Meinel
Update and test handle_response.
711
    def get_response(self, a_response):
712
        """Process a supplied response, and return the result."""
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
713
        code, raw_headers, body = a_response
714
        msg = self._build_HTTPMessage(raw_headers)
715
        return response.handle_response('http://foo', code, msg,
1786.1.26 by John Arbash Meinel
Update and test handle_response.
716
                                        StringIO(a_response[2]))
717
718
    def test_full_text(self):
719
        out = self.get_response(_full_text_response)
720
        # It is a StringIO from the original data
721
        self.assertEqual(_full_text_response[2], out.read())
722
723
    def test_single_range(self):
724
        out = self.get_response(_single_range_response)
725
726
        out.seek(100)
727
        self.assertEqual(_single_range_response[2], out.read(100))
728
2070.1.1 by John Arbash Meinel
Fix bug #62473 by not requiring content-type in range responses
729
    def test_single_range_no_content(self):
730
        out = self.get_response(_single_range_no_content_type)
731
732
        out.seek(100)
733
        self.assertEqual(_single_range_no_content_type[2], out.read(100))
734
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
735
    def test_single_range_truncated(self):
736
        out = self.get_response(_single_range_response_truncated)
737
        # Content-Range declares 100 but only 51 present
738
        self.assertRaises(errors.ShortReadvError, out.seek, out.tell() + 51)
739
1786.1.26 by John Arbash Meinel
Update and test handle_response.
740
    def test_multi_range(self):
741
        out = self.get_response(_multipart_range_response)
742
743
        # Just make sure we can read the right contents
744
        out.seek(0)
745
        out.read(255)
746
747
        out.seek(1000)
748
        out.read(1050)
749
1979.1.1 by John Arbash Meinel
Fix bug #57723, parse boundary="" correctly, since Squid uses it
750
    def test_multi_squid_range(self):
751
        out = self.get_response(_multipart_squid_range_response)
752
753
        # Just make sure we can read the right contents
754
        out.seek(0)
755
        out.read(100)
756
757
        out.seek(300)
758
        out.read(200)
759
1786.1.26 by John Arbash Meinel
Update and test handle_response.
760
    def test_invalid_response(self):
761
        self.assertRaises(errors.InvalidHttpResponse,
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
762
                          self.get_response, _invalid_response)
1786.1.26 by John Arbash Meinel
Update and test handle_response.
763
764
    def test_full_text_no_content_type(self):
765
        # We should not require Content-Type for a full response
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
766
        code, raw_headers, body = _full_text_response_no_content_type
767
        msg = self._build_HTTPMessage(raw_headers)
768
        out = response.handle_response('http://foo', code, msg, StringIO(body))
769
        self.assertEqual(body, out.read())
1786.1.26 by John Arbash Meinel
Update and test handle_response.
770
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
771
    def test_full_text_no_content_length(self):
772
        code, raw_headers, body = _full_text_response_no_content_length
773
        msg = self._build_HTTPMessage(raw_headers)
774
        out = response.handle_response('http://foo', code, msg, StringIO(body))
775
        self.assertEqual(body, out.read())
776
1786.1.26 by John Arbash Meinel
Update and test handle_response.
777
    def test_missing_content_range(self):
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
778
        code, raw_headers, body = _single_range_no_content_range
779
        msg = self._build_HTTPMessage(raw_headers)
1786.1.26 by John Arbash Meinel
Update and test handle_response.
780
        self.assertRaises(errors.InvalidHttpResponse,
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
781
                          response.handle_response,
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
782
                          'http://bogus', code, msg, StringIO(body))
783
784
    def test_multipart_no_content_range(self):
785
        code, raw_headers, body = _multipart_no_content_range
786
        msg = self._build_HTTPMessage(raw_headers)
787
        self.assertRaises(errors.InvalidHttpResponse,
788
                          response.handle_response,
789
                          'http://bogus', code, msg, StringIO(body))
790
791
    def test_multipart_no_boundary(self):
792
        out = self.get_response(_multipart_no_boundary)
793
        out.read()  # Read the whole range
794
        # Fail to find the boundary line
795
        self.assertRaises(errors.InvalidHttpResponse, out.seek, 1, 1)
3408.6.1 by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while
796
797
798
class TestRangeFileSizeReadLimited(tests.TestCase):
799
    """Test RangeFile _max_read_size functionality which limits the size of
800
    read blocks to prevent MemoryError messages in socket.recv.
801
    """
802
803
    def setUp(self):
804
        # create a test datablock larger than _max_read_size.
805
        chunk_size = response.RangeFile._max_read_size
806
        test_pattern = '0123456789ABCDEF'
807
        self.test_data =  test_pattern * (3 * chunk_size / len(test_pattern))
808
        self.test_data_len = len(self.test_data)
809
810
    def test_max_read_size(self):
811
        """Read data in blocks and verify that the reads are not larger than
812
           the maximum read size.
813
        """
814
        # retrieve data in large blocks from response.RangeFile object
815
        mock_read_file = FakeReadFile(self.test_data)
816
        range_file = response.RangeFile('test_max_read_size', mock_read_file)
817
        response_data = range_file.read(self.test_data_len)
818
819
        # verify read size was equal to the maximum read size
820
        self.assertTrue(mock_read_file.get_max_read_size() > 0)
821
        self.assertEqual(mock_read_file.get_max_read_size(),
822
                         response.RangeFile._max_read_size)
823
        self.assertEqual(mock_read_file.get_read_count(), 3)
824
825
        # report error if the data wasn't equal (we only report the size due
826
        # to the length of the data)
827
        if response_data != self.test_data:
828
            message = "Data not equal.  Expected %d bytes, received %d."
829
            self.fail(message % (len(response_data), self.test_data_len))
830