~bzr-pqm/bzr/bzr.dev

4763.2.4 by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry.
1
# Copyright (C) 2006-2010 Canonical Ltd
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
16
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
17
"""Tests from HTTP response parsing.
18
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
19
The handle_response method read the response body of a GET request an returns
20
the corresponding RangeFile.
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
21
22
There are four different kinds of RangeFile:
23
- a whole file whose size is unknown, seen as a simple byte stream,
24
- a whole file whose size is known, we can't read past its end,
25
- a single range file, a part of a file with a start and a size,
26
- a multiple range file, several consecutive parts with known start offset
27
  and size.
28
29
Some properties are common to all kinds:
30
- seek can only be forward (its really a socket underneath),
31
- read can't cross ranges,
32
- successive ranges are taken into account transparently,
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
33
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
34
- the expected pattern of use is either seek(offset)+read(size) or a single
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
35
  read with no size specified. For multiple range files, multiple read() will
36
  return the corresponding ranges, trying to read further will raise
37
  InvalidHttpResponse.
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
38
"""
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
39
40
from cStringIO import StringIO
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
41
import httplib
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
42
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
43
from bzrlib import (
44
    errors,
45
    tests,
46
    )
3104.3.4 by Vincent Ladeuil
Add test.
47
from bzrlib.transport.http import (
48
    response,
49
    _urllib2_wrappers,
50
    )
3408.6.1 by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while
51
from bzrlib.tests.file_utils import (
52
    FakeReadFile,
53
    )
3104.3.4 by Vincent Ladeuil
Add test.
54
55
56
class ReadSocket(object):
57
    """A socket-like object that can be given a predefined content."""
58
59
    def __init__(self, data):
60
        self.readfile = StringIO(data)
61
62
    def makefile(self, mode='r', bufsize=None):
63
        return self.readfile
64
3408.6.1 by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while
65
3104.3.4 by Vincent Ladeuil
Add test.
66
class FakeHTTPConnection(_urllib2_wrappers.HTTPConnection):
67
68
    def __init__(self, sock):
69
        _urllib2_wrappers.HTTPConnection.__init__(self, 'localhost')
70
        # Set the socket to bypass the connection
71
        self.sock = sock
72
73
    def send(self, str):
74
        """Ignores the writes on the socket."""
75
        pass
76
77
6575.1.2 by Vincent Ladeuil
TDD backwards, works here ;)
78
class TestResponseFileIter(tests.TestCase):
79
80
    def test_iter_empty(self):
6575.1.3 by Vincent Ladeuil
Simpler.
81
        f = response.ResponseFile('empty', StringIO())
82
        self.assertEqual([], list(f))
6575.1.2 by Vincent Ladeuil
TDD backwards, works here ;)
83
84
    def test_iter_many(self):
6575.1.3 by Vincent Ladeuil
Simpler.
85
        f = response.ResponseFile('many', StringIO('0\n1\nboo!\n'))
86
        self.assertEqual(['0\n', '1\n', 'boo!\n'], list(f))
6575.1.2 by Vincent Ladeuil
TDD backwards, works here ;)
87
88
3104.3.4 by Vincent Ladeuil
Add test.
89
class TestHTTPConnection(tests.TestCase):
90
91
    def test_cleanup_pipe(self):
92
        sock = ReadSocket("""HTTP/1.1 200 OK\r
93
Content-Type: text/plain; charset=UTF-8\r
94
Content-Length: 18
95
\r
96
0123456789
97
garbage""")
98
        conn = FakeHTTPConnection(sock)
99
        # Simulate the request sending so that the connection will be able to
100
        # read the response.
101
        conn.putrequest('GET', 'http://localhost/fictious')
102
        conn.endheaders()
103
        # Now, get the response
104
        resp = conn.getresponse()
105
        # Read part of the response
106
        self.assertEquals('0123456789\n', resp.read(11))
107
        # Override the thresold to force the warning emission
108
        conn._range_warning_thresold = 6 # There are 7 bytes pending
109
        conn.cleanup_pipe()
4794.1.15 by Robert Collins
Review feedback.
110
        self.assertContainsRe(self.get_log(), 'Got a 200 response when asking')
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
111
112
113
class TestRangeFileMixin(object):
114
    """Tests for accessing the first range in a RangeFile."""
115
116
    # A simple string used to represent a file part (also called a range), in
117
    # which offsets are easy to calculate for test writers. It's used as a
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
118
    # building block with slight variations but basically 'a' is the first char
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
119
    # of the range and 'z' is the last.
120
    alpha = 'abcdefghijklmnopqrstuvwxyz'
121
122
    def test_can_read_at_first_access(self):
123
        """Test that the just created file can be read."""
124
        self.assertEquals(self.alpha, self._file.read())
125
126
    def test_seek_read(self):
127
        """Test seek/read inside the range."""
128
        f = self._file
129
        start = self.first_range_start
130
        # Before any use, tell() should be at the range start
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
131
        self.assertEquals(start, f.tell())
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
132
        cur = start # For an overall offset assertion
133
        f.seek(start + 3)
134
        cur += 3
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
135
        self.assertEquals('def', f.read(3))
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
136
        cur += len('def')
137
        f.seek(4, 1)
138
        cur += 4
139
        self.assertEquals('klmn', f.read(4))
140
        cur += len('klmn')
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
141
        # read(0) in the middle of a range
142
        self.assertEquals('', f.read(0))
143
        # seek in place
144
        here = f.tell()
145
        f.seek(0, 1)
146
        self.assertEquals(here, f.tell())
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
147
        self.assertEquals(cur, f.tell())
148
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
149
    def test_read_zero(self):
150
        f = self._file
151
        self.assertEquals('', f.read(0))
152
        f.seek(10, 1)
153
        self.assertEquals('', f.read(0))
154
155
    def test_seek_at_range_end(self):
156
        f = self._file
157
        f.seek(26, 1)
158
159
    def test_read_at_range_end(self):
160
        """Test read behaviour at range end."""
161
        f = self._file
162
        self.assertEquals(self.alpha, f.read())
163
        self.assertEquals('', f.read(0))
164
        self.assertRaises(errors.InvalidRange, f.read, 1)
165
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
166
    def test_unbounded_read_after_seek(self):
167
        f = self._file
168
        f.seek(24, 1)
169
        # Should not cross ranges
170
        self.assertEquals('yz', f.read())
171
172
    def test_seek_backwards(self):
173
        f = self._file
174
        start = self.first_range_start
175
        f.seek(start)
176
        f.read(12)
177
        self.assertRaises(errors.InvalidRange, f.seek, start + 5)
178
179
    def test_seek_outside_single_range(self):
180
        f = self._file
181
        if f._size == -1 or f._boundary is not None:
182
            raise tests.TestNotApplicable('Needs a fully defined range')
183
        # Will seek past the range and then errors out
184
        self.assertRaises(errors.InvalidRange,
185
                          f.seek, self.first_range_start + 27)
186
187
    def test_read_past_end_of_range(self):
188
        f = self._file
189
        if f._size == -1:
190
            raise tests.TestNotApplicable("Can't check an unknown size")
191
        start = self.first_range_start
192
        f.seek(start + 20)
193
        self.assertRaises(errors.InvalidRange, f.read, 10)
194
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
195
    def test_seek_from_end(self):
196
       """Test seeking from the end of the file.
197
198
       The semantic is unclear in case of multiple ranges. Seeking from end
199
       exists only for the http transports, cannot be used if the file size is
200
       unknown and is not used in bzrlib itself. This test must be (and is)
201
       overridden by daughter classes.
202
203
       Reading from end makes sense only when a range has been requested from
204
       the end of the file (see HttpTransportBase._get() when using the
205
       'tail_amount' parameter). The HTTP response can only be a whole file or
206
       a single range.
207
       """
208
       f = self._file
209
       f.seek(-2, 2)
210
       self.assertEquals('yz', f.read())
211
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
212
213
class TestRangeFileSizeUnknown(tests.TestCase, TestRangeFileMixin):
214
    """Test a RangeFile for a whole file whose size is not known."""
215
216
    def setUp(self):
217
        super(TestRangeFileSizeUnknown, self).setUp()
218
        self._file = response.RangeFile('Whole_file_size_known',
219
                                        StringIO(self.alpha))
220
        # We define no range, relying on RangeFile to provide default values
221
        self.first_range_start = 0 # It's the whole file
222
223
    def test_seek_from_end(self):
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
224
        """See TestRangeFileMixin.test_seek_from_end.
225
226
        The end of the file can't be determined since the size is unknown.
227
        """
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
228
        self.assertRaises(errors.InvalidRange, self._file.seek, -1, 2)
229
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
230
    def test_read_at_range_end(self):
231
        """Test read behaviour at range end."""
232
        f = self._file
233
        self.assertEquals(self.alpha, f.read())
234
        self.assertEquals('', f.read(0))
235
        self.assertEquals('', f.read(1))
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
236
3537.1.1 by Vincent Ladeuil
Fix some more PEP8isms and delete useless import
237
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
238
class TestRangeFileSizeKnown(tests.TestCase, TestRangeFileMixin):
239
    """Test a RangeFile for a whole file whose size is known."""
240
241
    def setUp(self):
242
        super(TestRangeFileSizeKnown, self).setUp()
243
        self._file = response.RangeFile('Whole_file_size_known',
244
                                        StringIO(self.alpha))
245
        self._file.set_range(0, len(self.alpha))
246
        self.first_range_start = 0 # It's the whole file
247
248
249
class TestRangeFileSingleRange(tests.TestCase, TestRangeFileMixin):
250
    """Test a RangeFile for a single range."""
251
252
    def setUp(self):
253
        super(TestRangeFileSingleRange, self).setUp()
254
        self._file = response.RangeFile('Single_range_file',
255
                                        StringIO(self.alpha))
256
        self.first_range_start = 15
257
        self._file.set_range(self.first_range_start, len(self.alpha))
258
259
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
260
    def test_read_before_range(self):
261
        # This can't occur under normal circumstances, we have to force it
262
        f = self._file
263
        f._pos = 0 # Force an invalid pos
264
        self.assertRaises(errors.InvalidRange, f.read, 2)
265
3537.1.1 by Vincent Ladeuil
Fix some more PEP8isms and delete useless import
266
3146.3.2 by Vincent Ladeuil
Fix #179368 by keeping the current range hint on ShortReadvErrors.
267
class TestRangeFileMultipleRanges(tests.TestCase, TestRangeFileMixin):
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
268
    """Test a RangeFile for multiple ranges.
269
270
    The RangeFile used for the tests contains three ranges:
271
272
    - at offset 25: alpha
273
    - at offset 100: alpha
274
    - at offset 126: alpha.upper()
275
276
    The two last ranges are contiguous. This only rarely occurs (should not in
277
    fact) in real uses but may lead to hard to track bugs.
278
    """
3535.1.4 by adwi2
Changes as suggested by Mr Ladeuil.
279
280
    # The following is used to represent the boundary paramter defined
281
    # in HTTP response headers and the boundary lines that separate
282
    # multipart content.
283
284
    boundary = "separation"
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
285
286
    def setUp(self):
3146.3.2 by Vincent Ladeuil
Fix #179368 by keeping the current range hint on ShortReadvErrors.
287
        super(TestRangeFileMultipleRanges, self).setUp()
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
288
3535.1.4 by adwi2
Changes as suggested by Mr Ladeuil.
289
        boundary = self.boundary
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
290
291
        content = ''
292
        self.first_range_start = 25
293
        file_size = 200 # big enough to encompass all ranges
294
        for (start, part) in [(self.first_range_start, self.alpha),
295
                              # Two contiguous ranges
296
                              (100, self.alpha),
297
                              (126, self.alpha.upper())]:
298
            content += self._multipart_byterange(part, start, boundary,
299
                                                 file_size)
300
        # Final boundary
3535.1.4 by adwi2
Changes as suggested by Mr Ladeuil.
301
        content += self._boundary_line()
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
302
303
        self._file = response.RangeFile('Multiple_ranges_file',
304
                                        StringIO(content))
3535.1.4 by adwi2
Changes as suggested by Mr Ladeuil.
305
        self.set_file_boundary()
306
307
    def _boundary_line(self):
308
        """Helper to build the formatted boundary line."""
309
        return '--' + self.boundary + '\r\n'
310
311
    def set_file_boundary(self):
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
312
        # Ranges are set by decoding the range headers, the RangeFile user is
313
        # supposed to call the following before using seek or read since it
314
        # requires knowing the *response* headers (in that case the boundary
315
        # which is part of the Content-Type header).
3535.1.4 by adwi2
Changes as suggested by Mr Ladeuil.
316
        self._file.set_boundary(self.boundary)
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
317
318
    def _multipart_byterange(self, data, offset, boundary, file_size='*'):
319
        """Encode a part of a file as a multipart/byterange MIME type.
320
321
        When a range request is issued, the HTTP response body can be
322
        decomposed in parts, each one representing a range (start, size) in a
323
        file.
324
325
        :param data: The payload.
326
        :param offset: where data starts in the file
327
        :param boundary: used to separate the parts
328
        :param file_size: the size of the file containing the range (default to
329
            '*' meaning unknown)
330
331
        :return: a string containing the data encoded as it will appear in the
332
            HTTP response body.
333
        """
3535.1.4 by adwi2
Changes as suggested by Mr Ladeuil.
334
        bline = self._boundary_line()
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
335
        # Each range begins with a boundary line
336
        range = bline
337
        # A range is described by a set of headers, but only 'Content-Range' is
338
        # required for our implementation (TestHandleResponse below will
339
        # exercise ranges with multiple or missing headers')
340
        range += 'Content-Range: bytes %d-%d/%d\r\n' % (offset,
341
                                                        offset+len(data)-1,
342
                                                        file_size)
343
        range += '\r\n'
344
        # Finally the raw bytes
345
        range += data
346
        return range
347
348
    def test_read_all_ranges(self):
349
        f = self._file
350
        self.assertEquals(self.alpha, f.read()) # Read first range
351
        f.seek(100) # Trigger the second range recognition
352
        self.assertEquals(self.alpha, f.read()) # Read second range
353
        self.assertEquals(126, f.tell())
354
        f.seek(126) # Start of third range which is also the current pos !
355
        self.assertEquals('A', f.read(1))
356
        f.seek(10, 1)
357
        self.assertEquals('LMN', f.read(3))
358
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
359
    def test_seek_from_end(self):
360
        """See TestRangeFileMixin.test_seek_from_end."""
361
        # The actual implementation will seek from end for the first range only
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
362
        # and then fail. Since seeking from end is intended to be used for a
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
363
        # single range only anyway, this test just document the actual
364
        # behaviour.
365
        f = self._file
366
        f.seek(-2, 2)
367
        self.assertEquals('yz', f.read())
368
        self.assertRaises(errors.InvalidRange, f.seek, -2, 2)
369
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
370
    def test_seek_into_void(self):
371
        f = self._file
372
        start = self.first_range_start
373
        f.seek(start)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
374
        # Seeking to a point between two ranges is possible (only once) but
375
        # reading there is forbidden
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
376
        f.seek(start + 40)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
377
        # We crossed a range boundary, so now the file is positioned at the
378
        # start of the new range (i.e. trying to seek below 100 will error out)
379
        f.seek(100)
3059.2.7 by Vincent Ladeuil
Allow pycurl users to watch the blinkenlights and fix a bug when ranges are contiguous.
380
        f.seek(125)
381
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
382
    def test_seek_across_ranges(self):
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
383
        f = self._file
384
        f.seek(126) # skip the two first ranges
385
        self.assertEquals('AB', f.read(2))
386
3146.3.2 by Vincent Ladeuil
Fix #179368 by keeping the current range hint on ShortReadvErrors.
387
    def test_checked_read_dont_overflow_buffers(self):
388
        f = self._file
389
        # We force a very low value to exercise all code paths in _checked_read
390
        f._discarded_buf_size = 8
391
        f.seek(126) # skip the two first ranges
392
        self.assertEquals('AB', f.read(2))
393
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
394
    def test_seek_twice_between_ranges(self):
395
        f = self._file
396
        start = self.first_range_start
397
        f.seek(start + 40) # Past the first range but before the second
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
398
        # Now the file is positioned at the second range start (100)
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
399
        self.assertRaises(errors.InvalidRange, f.seek, start + 41)
400
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
401
    def test_seek_at_range_end(self):
402
        """Test seek behavior at range end."""
403
        f = self._file
404
        f.seek(25 + 25)
405
        f.seek(100 + 25)
406
        f.seek(126 + 25)
407
408
    def test_read_at_range_end(self):
409
        f = self._file
410
        self.assertEquals(self.alpha, f.read())
411
        self.assertEquals(self.alpha, f.read())
412
        self.assertEquals(self.alpha.upper(), f.read())
413
        self.assertRaises(errors.InvalidHttpResponse, f.read, 1)
414
3537.1.1 by Vincent Ladeuil
Fix some more PEP8isms and delete useless import
415
3535.1.1 by Adrian Wilkins
Made the behaviour of the existing multi-range test more like the real thing by
416
class TestRangeFileMultipleRangesQuotedBoundaries(TestRangeFileMultipleRanges):
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
417
    """Perform the same tests as TestRangeFileMultipleRanges, but uses
3535.1.1 by Adrian Wilkins
Made the behaviour of the existing multi-range test more like the real thing by
418
    an angle-bracket quoted boundary string like IIS 6.0 and 7.0
3535.1.4 by adwi2
Changes as suggested by Mr Ladeuil.
419
    (but not IIS 5, which breaks the RFC in a different way
420
    by using square brackets, not angle brackets)
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
421
422
    This reveals a bug caused by
423
424
    - The bad implementation of RFC 822 unquoting in Python (angles are not
425
      quotes), coupled with
3535.1.4 by adwi2
Changes as suggested by Mr Ladeuil.
426
427
    - The bad implementation of RFC 2046 in IIS (angles are not permitted chars
428
      in boundary lines).
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
429
3535.1.1 by Adrian Wilkins
Made the behaviour of the existing multi-range test more like the real thing by
430
    """
3535.1.4 by adwi2
Changes as suggested by Mr Ladeuil.
431
    # The boundary as it appears in boundary lines
432
    # IIS 6 and 7 use this value
433
    _boundary_trimmed = "q1w2e3r4t5y6u7i8o9p0zaxscdvfbgnhmjklkl"
434
    boundary = '<' + _boundary_trimmed + '>'
435
436
    def set_file_boundary(self):
437
        # Emulate broken rfc822.unquote() here by removing angles
438
        self._file.set_boundary(self._boundary_trimmed)
3537.1.1 by Vincent Ladeuil
Fix some more PEP8isms and delete useless import
439
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
440
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
441
class TestRangeFileVarious(tests.TestCase):
442
    """Tests RangeFile aspects not covered elsewhere."""
443
444
    def test_seek_whence(self):
445
        """Test the seek whence parameter values."""
446
        f = response.RangeFile('foo', StringIO('abc'))
447
        f.set_range(0, 3)
448
        f.seek(0)
449
        f.seek(1, 1)
450
        f.seek(-1, 2)
451
        self.assertRaises(ValueError, f.seek, 0, 14)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
452
453
    def test_range_syntax(self):
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
454
        """Test the Content-Range scanning."""
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
455
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
456
        f = response.RangeFile('foo', StringIO())
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
457
458
        def ok(expected, header_value):
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
459
            f.set_range_from_header(header_value)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
460
            # Slightly peek under the covers to get the size
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
461
            self.assertEquals(expected, (f.tell(), f._size))
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
462
463
        ok((1, 10), 'bytes 1-10/11')
464
        ok((1, 10), 'bytes 1-10/*')
465
        ok((12, 2), '\tbytes 12-13/*')
466
        ok((28, 1), '  bytes 28-28/*')
467
        ok((2123, 2120), 'bytes  2123-4242/12310')
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
468
        ok((1, 10), 'bytes 1-10/ttt') # We don't check total (ttt)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
469
470
        def nok(header_value):
471
            self.assertRaises(errors.InvalidHttpRange,
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
472
                              f.set_range_from_header, header_value)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
473
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
474
        nok('bytes 10-2/3')
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
475
        nok('chars 1-2/3')
476
        nok('bytes xx-yyy/zzz')
477
        nok('bytes xx-12/zzz')
478
        nok('bytes 11-yy/zzz')
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
479
        nok('bytes10-2/3')
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
480
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
481
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
482
# Taken from real request responses
1786.1.26 by John Arbash Meinel
Update and test handle_response.
483
_full_text_response = (200, """HTTP/1.1 200 OK\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
484
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
485
Server: Apache/2.0.54 (Fedora)\r
486
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
487
ETag: "56691-23-38e9ae00"\r
488
Accept-Ranges: bytes\r
489
Content-Length: 35\r
490
Connection: close\r
491
Content-Type: text/plain; charset=UTF-8\r
492
\r
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
493
""", """Bazaar-NG meta directory, format 1
494
""")
495
496
1786.1.26 by John Arbash Meinel
Update and test handle_response.
497
_single_range_response = (206, """HTTP/1.1 206 Partial Content\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
498
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
499
Server: Apache/2.0.54 (Fedora)\r
500
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
501
ETag: "238a3c-16ec2-805c5540"\r
502
Accept-Ranges: bytes\r
503
Content-Length: 100\r
1786.1.26 by John Arbash Meinel
Update and test handle_response.
504
Content-Range: bytes 100-199/93890\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
505
Connection: close\r
506
Content-Type: text/plain; charset=UTF-8\r
507
\r
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
508
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
1786.1.26 by John Arbash Meinel
Update and test handle_response.
509
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
510
511
2070.1.1 by John Arbash Meinel
Fix bug #62473 by not requiring content-type in range responses
512
_single_range_no_content_type = (206, """HTTP/1.1 206 Partial Content\r
513
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
514
Server: Apache/2.0.54 (Fedora)\r
515
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
516
ETag: "238a3c-16ec2-805c5540"\r
517
Accept-Ranges: bytes\r
518
Content-Length: 100\r
519
Content-Range: bytes 100-199/93890\r
520
Connection: close\r
521
\r
522
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
523
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
524
525
1786.1.26 by John Arbash Meinel
Update and test handle_response.
526
_multipart_range_response = (206, """HTTP/1.1 206 Partial Content\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
527
Date: Tue, 11 Jul 2006 04:49:48 GMT\r
528
Server: Apache/2.0.54 (Fedora)\r
529
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
530
ETag: "238a3c-16ec2-805c5540"\r
531
Accept-Ranges: bytes\r
532
Content-Length: 1534\r
533
Connection: close\r
534
Content-Type: multipart/byteranges; boundary=418470f848b63279b\r
535
\r
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
536
\r""", """--418470f848b63279b\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
537
Content-type: text/plain; charset=UTF-8\r
538
Content-range: bytes 0-254/93890\r
539
\r
540
mbp@sourcefrog.net-20050309040815-13242001617e4a06
541
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e7627
542
mbp@sourcefrog.net-20050309040957-6cad07f466bb0bb8
543
mbp@sourcefrog.net-20050309041501-c840e09071de3b67
544
mbp@sourcefrog.net-20050309044615-c24a3250be83220a
545
\r
546
--418470f848b63279b\r
547
Content-type: text/plain; charset=UTF-8\r
548
Content-range: bytes 1000-2049/93890\r
549
\r
550
40-fd4ec249b6b139ab
551
mbp@sourcefrog.net-20050311063625-07858525021f270b
552
mbp@sourcefrog.net-20050311231934-aa3776aff5200bb9
553
mbp@sourcefrog.net-20050311231953-73aeb3a131c3699a
554
mbp@sourcefrog.net-20050311232353-f5e33da490872c6a
555
mbp@sourcefrog.net-20050312071639-0a8f59a34a024ff0
556
mbp@sourcefrog.net-20050312073432-b2c16a55e0d6e9fb
557
mbp@sourcefrog.net-20050312073831-a47c3335ece1920f
558
mbp@sourcefrog.net-20050312085412-13373aa129ccbad3
559
mbp@sourcefrog.net-20050313052251-2bf004cb96b39933
560
mbp@sourcefrog.net-20050313052856-3edd84094687cb11
561
mbp@sourcefrog.net-20050313053233-e30a4f28aef48f9d
562
mbp@sourcefrog.net-20050313053853-7c64085594ff3072
563
mbp@sourcefrog.net-20050313054757-a86c3f5871069e22
564
mbp@sourcefrog.net-20050313061422-418f1f73b94879b9
565
mbp@sourcefrog.net-20050313120651-497bd231b19df600
566
mbp@sourcefrog.net-20050314024931-eae0170ef25a5d1a
567
mbp@sourcefrog.net-20050314025438-d52099f915fe65fc
568
mbp@sourcefrog.net-20050314025539-637a636692c055cf
569
mbp@sourcefrog.net-20050314025737-55eb441f430ab4ba
570
mbp@sourcefrog.net-20050314025901-d74aa93bb7ee8f62
571
mbp@source\r
1979.1.1 by John Arbash Meinel
Fix bug #57723, parse boundary="" correctly, since Squid uses it
572
--418470f848b63279b--\r
573
""")
574
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
575
1979.1.1 by John Arbash Meinel
Fix bug #57723, parse boundary="" correctly, since Squid uses it
576
_multipart_squid_range_response = (206, """HTTP/1.0 206 Partial Content\r
577
Date: Thu, 31 Aug 2006 21:16:22 GMT\r
578
Server: Apache/2.2.2 (Unix) DAV/2\r
579
Last-Modified: Thu, 31 Aug 2006 17:57:06 GMT\r
580
Accept-Ranges: bytes\r
581
Content-Type: multipart/byteranges; boundary="squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196"\r
582
Content-Length: 598\r
583
X-Cache: MISS from localhost.localdomain\r
584
X-Cache-Lookup: HIT from localhost.localdomain:3128\r
585
Proxy-Connection: keep-alive\r
586
\r
587
""",
588
"""\r
589
--squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196\r
590
Content-Type: text/plain\r
591
Content-Range: bytes 0-99/18672\r
592
\r
593
# bzr knit index 8
594
595
scott@netsplit.com-20050708230047-47c7868f276b939f fulltext 0 863  :
596
scott@netsp\r
597
--squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196\r
598
Content-Type: text/plain\r
599
Content-Range: bytes 300-499/18672\r
600
\r
601
com-20050708231537-2b124b835395399a :
602
scott@netsplit.com-20050820234126-551311dbb7435b51 line-delta 1803 479 .scott@netsplit.com-20050820232911-dc4322a084eadf7e :
603
scott@netsplit.com-20050821213706-c86\r
604
--squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196--\r
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
605
""")
606
607
1786.1.26 by John Arbash Meinel
Update and test handle_response.
608
# This is made up
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
609
_full_text_response_no_content_type = (200, """HTTP/1.1 200 OK\r
610
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
611
Server: Apache/2.0.54 (Fedora)\r
612
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
613
ETag: "56691-23-38e9ae00"\r
614
Accept-Ranges: bytes\r
615
Content-Length: 35\r
616
Connection: close\r
617
\r
618
""", """Bazaar-NG meta directory, format 1
619
""")
620
621
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
622
_full_text_response_no_content_length = (200, """HTTP/1.1 200 OK\r
623
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
624
Server: Apache/2.0.54 (Fedora)\r
625
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
626
ETag: "56691-23-38e9ae00"\r
627
Accept-Ranges: bytes\r
628
Connection: close\r
629
Content-Type: text/plain; charset=UTF-8\r
630
\r
631
""", """Bazaar-NG meta directory, format 1
632
""")
633
634
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
635
_single_range_no_content_range = (206, """HTTP/1.1 206 Partial Content\r
636
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
637
Server: Apache/2.0.54 (Fedora)\r
638
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
639
ETag: "238a3c-16ec2-805c5540"\r
640
Accept-Ranges: bytes\r
641
Content-Length: 100\r
642
Connection: close\r
643
\r
644
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
645
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
646
647
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
648
_single_range_response_truncated = (206, """HTTP/1.1 206 Partial Content\r
649
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
650
Server: Apache/2.0.54 (Fedora)\r
651
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
652
ETag: "238a3c-16ec2-805c5540"\r
653
Accept-Ranges: bytes\r
654
Content-Length: 100\r
655
Content-Range: bytes 100-199/93890\r
656
Connection: close\r
657
Content-Type: text/plain; charset=UTF-8\r
658
\r
659
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06""")
660
661
1786.1.26 by John Arbash Meinel
Update and test handle_response.
662
_invalid_response = (444, """HTTP/1.1 444 Bad Response\r
663
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
664
Connection: close\r
665
Content-Type: text/html; charset=iso-8859-1\r
666
\r
667
""", """<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
668
<html><head>
669
<title>404 Not Found</title>
670
</head><body>
671
<h1>Not Found</h1>
672
<p>I don't know what I'm doing</p>
673
<hr>
674
</body></html>
675
""")
676
677
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
678
_multipart_no_content_range = (206, """HTTP/1.0 206 Partial Content\r
679
Content-Type: multipart/byteranges; boundary=THIS_SEPARATES\r
680
Content-Length: 598\r
681
\r
682
""",
683
"""\r
684
--THIS_SEPARATES\r
685
Content-Type: text/plain\r
686
\r
687
# bzr knit index 8
688
--THIS_SEPARATES\r
689
""")
690
691
692
_multipart_no_boundary = (206, """HTTP/1.0 206 Partial Content\r
693
Content-Type: multipart/byteranges; boundary=THIS_SEPARATES\r
694
Content-Length: 598\r
695
\r
696
""",
697
"""\r
698
--THIS_SEPARATES\r
699
Content-Type: text/plain\r
700
Content-Range: bytes 0-18/18672\r
701
\r
702
# bzr knit index 8
703
704
The range ended at the line above, this text is garbage instead of a boundary
705
line
706
""")
707
708
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
709
class TestHandleResponse(tests.TestCase):
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
710
711
    def _build_HTTPMessage(self, raw_headers):
712
        status_and_headers = StringIO(raw_headers)
3059.2.11 by Vincent Ladeuil
Fix typos mentioned by spiv.
713
        # Get rid of the status line
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
714
        status_and_headers.readline()
715
        msg = httplib.HTTPMessage(status_and_headers)
716
        return msg
717
1786.1.26 by John Arbash Meinel
Update and test handle_response.
718
    def get_response(self, a_response):
719
        """Process a supplied response, and return the result."""
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
720
        code, raw_headers, body = a_response
721
        msg = self._build_HTTPMessage(raw_headers)
722
        return response.handle_response('http://foo', code, msg,
1786.1.26 by John Arbash Meinel
Update and test handle_response.
723
                                        StringIO(a_response[2]))
724
725
    def test_full_text(self):
726
        out = self.get_response(_full_text_response)
727
        # It is a StringIO from the original data
728
        self.assertEqual(_full_text_response[2], out.read())
729
730
    def test_single_range(self):
731
        out = self.get_response(_single_range_response)
732
733
        out.seek(100)
734
        self.assertEqual(_single_range_response[2], out.read(100))
735
2070.1.1 by John Arbash Meinel
Fix bug #62473 by not requiring content-type in range responses
736
    def test_single_range_no_content(self):
737
        out = self.get_response(_single_range_no_content_type)
738
739
        out.seek(100)
740
        self.assertEqual(_single_range_no_content_type[2], out.read(100))
741
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
742
    def test_single_range_truncated(self):
743
        out = self.get_response(_single_range_response_truncated)
744
        # Content-Range declares 100 but only 51 present
745
        self.assertRaises(errors.ShortReadvError, out.seek, out.tell() + 51)
746
1786.1.26 by John Arbash Meinel
Update and test handle_response.
747
    def test_multi_range(self):
748
        out = self.get_response(_multipart_range_response)
749
750
        # Just make sure we can read the right contents
751
        out.seek(0)
752
        out.read(255)
753
754
        out.seek(1000)
755
        out.read(1050)
756
1979.1.1 by John Arbash Meinel
Fix bug #57723, parse boundary="" correctly, since Squid uses it
757
    def test_multi_squid_range(self):
758
        out = self.get_response(_multipart_squid_range_response)
759
760
        # Just make sure we can read the right contents
761
        out.seek(0)
762
        out.read(100)
763
764
        out.seek(300)
765
        out.read(200)
766
1786.1.26 by John Arbash Meinel
Update and test handle_response.
767
    def test_invalid_response(self):
768
        self.assertRaises(errors.InvalidHttpResponse,
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
769
                          self.get_response, _invalid_response)
1786.1.26 by John Arbash Meinel
Update and test handle_response.
770
771
    def test_full_text_no_content_type(self):
772
        # We should not require Content-Type for a full response
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
773
        code, raw_headers, body = _full_text_response_no_content_type
774
        msg = self._build_HTTPMessage(raw_headers)
775
        out = response.handle_response('http://foo', code, msg, StringIO(body))
776
        self.assertEqual(body, out.read())
1786.1.26 by John Arbash Meinel
Update and test handle_response.
777
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
778
    def test_full_text_no_content_length(self):
779
        code, raw_headers, body = _full_text_response_no_content_length
780
        msg = self._build_HTTPMessage(raw_headers)
781
        out = response.handle_response('http://foo', code, msg, StringIO(body))
782
        self.assertEqual(body, out.read())
783
1786.1.26 by John Arbash Meinel
Update and test handle_response.
784
    def test_missing_content_range(self):
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
785
        code, raw_headers, body = _single_range_no_content_range
786
        msg = self._build_HTTPMessage(raw_headers)
1786.1.26 by John Arbash Meinel
Update and test handle_response.
787
        self.assertRaises(errors.InvalidHttpResponse,
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
788
                          response.handle_response,
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
789
                          'http://bogus', code, msg, StringIO(body))
790
791
    def test_multipart_no_content_range(self):
792
        code, raw_headers, body = _multipart_no_content_range
793
        msg = self._build_HTTPMessage(raw_headers)
794
        self.assertRaises(errors.InvalidHttpResponse,
795
                          response.handle_response,
796
                          'http://bogus', code, msg, StringIO(body))
797
798
    def test_multipart_no_boundary(self):
799
        out = self.get_response(_multipart_no_boundary)
800
        out.read()  # Read the whole range
801
        # Fail to find the boundary line
802
        self.assertRaises(errors.InvalidHttpResponse, out.seek, 1, 1)
3408.6.1 by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while
803
804
805
class TestRangeFileSizeReadLimited(tests.TestCase):
806
    """Test RangeFile _max_read_size functionality which limits the size of
807
    read blocks to prevent MemoryError messages in socket.recv.
808
    """
809
810
    def setUp(self):
6552.1.3 by Vincent Ladeuil
Use super() instead of calling <base>.setup(self), as the original fix illustrated a too-easy-to-fall-into trap.
811
        super(TestRangeFileSizeReadLimited, self).setUp()
3408.6.1 by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while
812
        # create a test datablock larger than _max_read_size.
813
        chunk_size = response.RangeFile._max_read_size
814
        test_pattern = '0123456789ABCDEF'
815
        self.test_data =  test_pattern * (3 * chunk_size / len(test_pattern))
816
        self.test_data_len = len(self.test_data)
817
818
    def test_max_read_size(self):
819
        """Read data in blocks and verify that the reads are not larger than
820
           the maximum read size.
821
        """
822
        # retrieve data in large blocks from response.RangeFile object
823
        mock_read_file = FakeReadFile(self.test_data)
824
        range_file = response.RangeFile('test_max_read_size', mock_read_file)
825
        response_data = range_file.read(self.test_data_len)
826
827
        # verify read size was equal to the maximum read size
828
        self.assertTrue(mock_read_file.get_max_read_size() > 0)
829
        self.assertEqual(mock_read_file.get_max_read_size(),
830
                         response.RangeFile._max_read_size)
831
        self.assertEqual(mock_read_file.get_read_count(), 3)
832
833
        # report error if the data wasn't equal (we only report the size due
834
        # to the length of the data)
835
        if response_data != self.test_data:
836
            message = "Data not equal.  Expected %d bytes, received %d."
837
            self.fail(message % (len(response_data), self.test_data_len))
838