~bzr-pqm/bzr/bzr.dev

4763.2.4 by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry.
1
# Copyright (C) 2006-2010 Canonical Ltd
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
16
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
17
"""Tests from HTTP response parsing.
18
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
19
The handle_response method read the response body of a GET request an returns
20
the corresponding RangeFile.
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
21
22
There are four different kinds of RangeFile:
23
- a whole file whose size is unknown, seen as a simple byte stream,
24
- a whole file whose size is known, we can't read past its end,
25
- a single range file, a part of a file with a start and a size,
26
- a multiple range file, several consecutive parts with known start offset
27
  and size.
28
29
Some properties are common to all kinds:
30
- seek can only be forward (its really a socket underneath),
31
- read can't cross ranges,
32
- successive ranges are taken into account transparently,
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
33
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
34
- the expected pattern of use is either seek(offset)+read(size) or a single
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
35
  read with no size specified. For multiple range files, multiple read() will
36
  return the corresponding ranges, trying to read further will raise
37
  InvalidHttpResponse.
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
38
"""
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
39
40
from cStringIO import StringIO
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
41
import httplib
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
42
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
43
from bzrlib import (
44
    errors,
45
    tests,
46
    )
3104.3.4 by Vincent Ladeuil
Add test.
47
from bzrlib.transport.http import (
48
    response,
49
    _urllib2_wrappers,
50
    )
3408.6.1 by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while
51
from bzrlib.tests.file_utils import (
52
    FakeReadFile,
53
    )
3104.3.4 by Vincent Ladeuil
Add test.
54
55
56
class ReadSocket(object):
57
    """A socket-like object that can be given a predefined content."""
58
59
    def __init__(self, data):
60
        self.readfile = StringIO(data)
61
62
    def makefile(self, mode='r', bufsize=None):
63
        return self.readfile
64
3408.6.1 by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while
65
3104.3.4 by Vincent Ladeuil
Add test.
66
class FakeHTTPConnection(_urllib2_wrappers.HTTPConnection):
67
68
    def __init__(self, sock):
69
        _urllib2_wrappers.HTTPConnection.__init__(self, 'localhost')
70
        # Set the socket to bypass the connection
71
        self.sock = sock
72
73
    def send(self, str):
74
        """Ignores the writes on the socket."""
75
        pass
76
77
78
class TestHTTPConnection(tests.TestCase):
79
80
    def test_cleanup_pipe(self):
81
        sock = ReadSocket("""HTTP/1.1 200 OK\r
82
Content-Type: text/plain; charset=UTF-8\r
83
Content-Length: 18
84
\r
85
0123456789
86
garbage""")
87
        conn = FakeHTTPConnection(sock)
88
        # Simulate the request sending so that the connection will be able to
89
        # read the response.
90
        conn.putrequest('GET', 'http://localhost/fictious')
91
        conn.endheaders()
92
        # Now, get the response
93
        resp = conn.getresponse()
94
        # Read part of the response
95
        self.assertEquals('0123456789\n', resp.read(11))
96
        # Override the thresold to force the warning emission
97
        conn._range_warning_thresold = 6 # There are 7 bytes pending
98
        conn.cleanup_pipe()
4794.1.15 by Robert Collins
Review feedback.
99
        self.assertContainsRe(self.get_log(), 'Got a 200 response when asking')
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
100
101
102
class TestRangeFileMixin(object):
103
    """Tests for accessing the first range in a RangeFile."""
104
105
    # A simple string used to represent a file part (also called a range), in
106
    # which offsets are easy to calculate for test writers. It's used as a
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
107
    # building block with slight variations but basically 'a' is the first char
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
108
    # of the range and 'z' is the last.
109
    alpha = 'abcdefghijklmnopqrstuvwxyz'
110
111
    def test_can_read_at_first_access(self):
112
        """Test that the just created file can be read."""
113
        self.assertEquals(self.alpha, self._file.read())
114
115
    def test_seek_read(self):
116
        """Test seek/read inside the range."""
117
        f = self._file
118
        start = self.first_range_start
119
        # Before any use, tell() should be at the range start
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
120
        self.assertEquals(start, f.tell())
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
121
        cur = start # For an overall offset assertion
122
        f.seek(start + 3)
123
        cur += 3
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
124
        self.assertEquals('def', f.read(3))
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
125
        cur += len('def')
126
        f.seek(4, 1)
127
        cur += 4
128
        self.assertEquals('klmn', f.read(4))
129
        cur += len('klmn')
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
130
        # read(0) in the middle of a range
131
        self.assertEquals('', f.read(0))
132
        # seek in place
133
        here = f.tell()
134
        f.seek(0, 1)
135
        self.assertEquals(here, f.tell())
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
136
        self.assertEquals(cur, f.tell())
137
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
138
    def test_read_zero(self):
139
        f = self._file
140
        start = self.first_range_start
141
        self.assertEquals('', f.read(0))
142
        f.seek(10, 1)
143
        self.assertEquals('', f.read(0))
144
145
    def test_seek_at_range_end(self):
146
        f = self._file
147
        f.seek(26, 1)
148
149
    def test_read_at_range_end(self):
150
        """Test read behaviour at range end."""
151
        f = self._file
152
        self.assertEquals(self.alpha, f.read())
153
        self.assertEquals('', f.read(0))
154
        self.assertRaises(errors.InvalidRange, f.read, 1)
155
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
156
    def test_unbounded_read_after_seek(self):
157
        f = self._file
158
        f.seek(24, 1)
159
        # Should not cross ranges
160
        self.assertEquals('yz', f.read())
161
162
    def test_seek_backwards(self):
163
        f = self._file
164
        start = self.first_range_start
165
        f.seek(start)
166
        f.read(12)
167
        self.assertRaises(errors.InvalidRange, f.seek, start + 5)
168
169
    def test_seek_outside_single_range(self):
170
        f = self._file
171
        if f._size == -1 or f._boundary is not None:
172
            raise tests.TestNotApplicable('Needs a fully defined range')
173
        # Will seek past the range and then errors out
174
        self.assertRaises(errors.InvalidRange,
175
                          f.seek, self.first_range_start + 27)
176
177
    def test_read_past_end_of_range(self):
178
        f = self._file
179
        if f._size == -1:
180
            raise tests.TestNotApplicable("Can't check an unknown size")
181
        start = self.first_range_start
182
        f.seek(start + 20)
183
        self.assertRaises(errors.InvalidRange, f.read, 10)
184
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
185
    def test_seek_from_end(self):
186
       """Test seeking from the end of the file.
187
188
       The semantic is unclear in case of multiple ranges. Seeking from end
189
       exists only for the http transports, cannot be used if the file size is
190
       unknown and is not used in bzrlib itself. This test must be (and is)
191
       overridden by daughter classes.
192
193
       Reading from end makes sense only when a range has been requested from
194
       the end of the file (see HttpTransportBase._get() when using the
195
       'tail_amount' parameter). The HTTP response can only be a whole file or
196
       a single range.
197
       """
198
       f = self._file
199
       f.seek(-2, 2)
200
       self.assertEquals('yz', f.read())
201
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
202
203
class TestRangeFileSizeUnknown(tests.TestCase, TestRangeFileMixin):
204
    """Test a RangeFile for a whole file whose size is not known."""
205
206
    def setUp(self):
207
        super(TestRangeFileSizeUnknown, self).setUp()
208
        self._file = response.RangeFile('Whole_file_size_known',
209
                                        StringIO(self.alpha))
210
        # We define no range, relying on RangeFile to provide default values
211
        self.first_range_start = 0 # It's the whole file
212
213
    def test_seek_from_end(self):
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
214
        """See TestRangeFileMixin.test_seek_from_end.
215
216
        The end of the file can't be determined since the size is unknown.
217
        """
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
218
        self.assertRaises(errors.InvalidRange, self._file.seek, -1, 2)
219
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
220
    def test_read_at_range_end(self):
221
        """Test read behaviour at range end."""
222
        f = self._file
223
        self.assertEquals(self.alpha, f.read())
224
        self.assertEquals('', f.read(0))
225
        self.assertEquals('', f.read(1))
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
226
3537.1.1 by Vincent Ladeuil
Fix some more PEP8isms and delete useless import
227
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
228
class TestRangeFileSizeKnown(tests.TestCase, TestRangeFileMixin):
229
    """Test a RangeFile for a whole file whose size is known."""
230
231
    def setUp(self):
232
        super(TestRangeFileSizeKnown, self).setUp()
233
        self._file = response.RangeFile('Whole_file_size_known',
234
                                        StringIO(self.alpha))
235
        self._file.set_range(0, len(self.alpha))
236
        self.first_range_start = 0 # It's the whole file
237
238
239
class TestRangeFileSingleRange(tests.TestCase, TestRangeFileMixin):
240
    """Test a RangeFile for a single range."""
241
242
    def setUp(self):
243
        super(TestRangeFileSingleRange, self).setUp()
244
        self._file = response.RangeFile('Single_range_file',
245
                                        StringIO(self.alpha))
246
        self.first_range_start = 15
247
        self._file.set_range(self.first_range_start, len(self.alpha))
248
249
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
250
    def test_read_before_range(self):
251
        # This can't occur under normal circumstances, we have to force it
252
        f = self._file
253
        f._pos = 0 # Force an invalid pos
254
        self.assertRaises(errors.InvalidRange, f.read, 2)
255
3537.1.1 by Vincent Ladeuil
Fix some more PEP8isms and delete useless import
256
3146.3.2 by Vincent Ladeuil
Fix #179368 by keeping the current range hint on ShortReadvErrors.
257
class TestRangeFileMultipleRanges(tests.TestCase, TestRangeFileMixin):
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
258
    """Test a RangeFile for multiple ranges.
259
260
    The RangeFile used for the tests contains three ranges:
261
262
    - at offset 25: alpha
263
    - at offset 100: alpha
264
    - at offset 126: alpha.upper()
265
266
    The two last ranges are contiguous. This only rarely occurs (should not in
267
    fact) in real uses but may lead to hard to track bugs.
268
    """
3535.1.4 by adwi2
Changes as suggested by Mr Ladeuil.
269
270
    # The following is used to represent the boundary paramter defined
271
    # in HTTP response headers and the boundary lines that separate
272
    # multipart content.
273
274
    boundary = "separation"
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
275
276
    def setUp(self):
3146.3.2 by Vincent Ladeuil
Fix #179368 by keeping the current range hint on ShortReadvErrors.
277
        super(TestRangeFileMultipleRanges, self).setUp()
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
278
3535.1.4 by adwi2
Changes as suggested by Mr Ladeuil.
279
        boundary = self.boundary
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
280
281
        content = ''
282
        self.first_range_start = 25
283
        file_size = 200 # big enough to encompass all ranges
284
        for (start, part) in [(self.first_range_start, self.alpha),
285
                              # Two contiguous ranges
286
                              (100, self.alpha),
287
                              (126, self.alpha.upper())]:
288
            content += self._multipart_byterange(part, start, boundary,
289
                                                 file_size)
290
        # Final boundary
3535.1.4 by adwi2
Changes as suggested by Mr Ladeuil.
291
        content += self._boundary_line()
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
292
293
        self._file = response.RangeFile('Multiple_ranges_file',
294
                                        StringIO(content))
3535.1.4 by adwi2
Changes as suggested by Mr Ladeuil.
295
        self.set_file_boundary()
296
297
    def _boundary_line(self):
298
        """Helper to build the formatted boundary line."""
299
        return '--' + self.boundary + '\r\n'
300
301
    def set_file_boundary(self):
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
302
        # Ranges are set by decoding the range headers, the RangeFile user is
303
        # supposed to call the following before using seek or read since it
304
        # requires knowing the *response* headers (in that case the boundary
305
        # which is part of the Content-Type header).
3535.1.4 by adwi2
Changes as suggested by Mr Ladeuil.
306
        self._file.set_boundary(self.boundary)
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
307
308
    def _multipart_byterange(self, data, offset, boundary, file_size='*'):
309
        """Encode a part of a file as a multipart/byterange MIME type.
310
311
        When a range request is issued, the HTTP response body can be
312
        decomposed in parts, each one representing a range (start, size) in a
313
        file.
314
315
        :param data: The payload.
316
        :param offset: where data starts in the file
317
        :param boundary: used to separate the parts
318
        :param file_size: the size of the file containing the range (default to
319
            '*' meaning unknown)
320
321
        :return: a string containing the data encoded as it will appear in the
322
            HTTP response body.
323
        """
3535.1.4 by adwi2
Changes as suggested by Mr Ladeuil.
324
        bline = self._boundary_line()
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
325
        # Each range begins with a boundary line
326
        range = bline
327
        # A range is described by a set of headers, but only 'Content-Range' is
328
        # required for our implementation (TestHandleResponse below will
329
        # exercise ranges with multiple or missing headers')
330
        range += 'Content-Range: bytes %d-%d/%d\r\n' % (offset,
331
                                                        offset+len(data)-1,
332
                                                        file_size)
333
        range += '\r\n'
334
        # Finally the raw bytes
335
        range += data
336
        return range
337
338
    def test_read_all_ranges(self):
339
        f = self._file
340
        self.assertEquals(self.alpha, f.read()) # Read first range
341
        f.seek(100) # Trigger the second range recognition
342
        self.assertEquals(self.alpha, f.read()) # Read second range
343
        self.assertEquals(126, f.tell())
344
        f.seek(126) # Start of third range which is also the current pos !
345
        self.assertEquals('A', f.read(1))
346
        f.seek(10, 1)
347
        self.assertEquals('LMN', f.read(3))
348
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
349
    def test_seek_from_end(self):
350
        """See TestRangeFileMixin.test_seek_from_end."""
351
        # The actual implementation will seek from end for the first range only
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
352
        # and then fail. Since seeking from end is intended to be used for a
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
353
        # single range only anyway, this test just document the actual
354
        # behaviour.
355
        f = self._file
356
        f.seek(-2, 2)
357
        self.assertEquals('yz', f.read())
358
        self.assertRaises(errors.InvalidRange, f.seek, -2, 2)
359
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
360
    def test_seek_into_void(self):
361
        f = self._file
362
        start = self.first_range_start
363
        f.seek(start)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
364
        # Seeking to a point between two ranges is possible (only once) but
365
        # reading there is forbidden
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
366
        f.seek(start + 40)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
367
        # We crossed a range boundary, so now the file is positioned at the
368
        # start of the new range (i.e. trying to seek below 100 will error out)
369
        f.seek(100)
3059.2.7 by Vincent Ladeuil
Allow pycurl users to watch the blinkenlights and fix a bug when ranges are contiguous.
370
        f.seek(125)
371
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
372
    def test_seek_across_ranges(self):
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
373
        f = self._file
374
        start = self.first_range_start
375
        f.seek(126) # skip the two first ranges
376
        self.assertEquals('AB', f.read(2))
377
3146.3.2 by Vincent Ladeuil
Fix #179368 by keeping the current range hint on ShortReadvErrors.
378
    def test_checked_read_dont_overflow_buffers(self):
379
        f = self._file
380
        start = self.first_range_start
381
        # We force a very low value to exercise all code paths in _checked_read
382
        f._discarded_buf_size = 8
383
        f.seek(126) # skip the two first ranges
384
        self.assertEquals('AB', f.read(2))
385
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
386
    def test_seek_twice_between_ranges(self):
387
        f = self._file
388
        start = self.first_range_start
389
        f.seek(start + 40) # Past the first range but before the second
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
390
        # Now the file is positioned at the second range start (100)
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
391
        self.assertRaises(errors.InvalidRange, f.seek, start + 41)
392
3059.2.18 by Vincent Ladeuil
Take spiv review comments into account.
393
    def test_seek_at_range_end(self):
394
        """Test seek behavior at range end."""
395
        f = self._file
396
        f.seek(25 + 25)
397
        f.seek(100 + 25)
398
        f.seek(126 + 25)
399
400
    def test_read_at_range_end(self):
401
        f = self._file
402
        self.assertEquals(self.alpha, f.read())
403
        self.assertEquals(self.alpha, f.read())
404
        self.assertEquals(self.alpha.upper(), f.read())
405
        self.assertRaises(errors.InvalidHttpResponse, f.read, 1)
406
3537.1.1 by Vincent Ladeuil
Fix some more PEP8isms and delete useless import
407
3535.1.1 by Adrian Wilkins
Made the behaviour of the existing multi-range test more like the real thing by
408
class TestRangeFileMultipleRangesQuotedBoundaries(TestRangeFileMultipleRanges):
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
409
    """Perform the same tests as TestRangeFileMultipleRanges, but uses
3535.1.1 by Adrian Wilkins
Made the behaviour of the existing multi-range test more like the real thing by
410
    an angle-bracket quoted boundary string like IIS 6.0 and 7.0
3535.1.4 by adwi2
Changes as suggested by Mr Ladeuil.
411
    (but not IIS 5, which breaks the RFC in a different way
412
    by using square brackets, not angle brackets)
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
413
414
    This reveals a bug caused by
415
416
    - The bad implementation of RFC 822 unquoting in Python (angles are not
417
      quotes), coupled with
3535.1.4 by adwi2
Changes as suggested by Mr Ladeuil.
418
419
    - The bad implementation of RFC 2046 in IIS (angles are not permitted chars
420
      in boundary lines).
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
421
3535.1.1 by Adrian Wilkins
Made the behaviour of the existing multi-range test more like the real thing by
422
    """
3535.1.4 by adwi2
Changes as suggested by Mr Ladeuil.
423
    # The boundary as it appears in boundary lines
424
    # IIS 6 and 7 use this value
425
    _boundary_trimmed = "q1w2e3r4t5y6u7i8o9p0zaxscdvfbgnhmjklkl"
426
    boundary = '<' + _boundary_trimmed + '>'
427
428
    def set_file_boundary(self):
429
        # Emulate broken rfc822.unquote() here by removing angles
430
        self._file.set_boundary(self._boundary_trimmed)
3537.1.1 by Vincent Ladeuil
Fix some more PEP8isms and delete useless import
431
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
432
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
433
class TestRangeFileVarious(tests.TestCase):
434
    """Tests RangeFile aspects not covered elsewhere."""
435
436
    def test_seek_whence(self):
437
        """Test the seek whence parameter values."""
438
        f = response.RangeFile('foo', StringIO('abc'))
439
        f.set_range(0, 3)
440
        f.seek(0)
441
        f.seek(1, 1)
442
        f.seek(-1, 2)
443
        self.assertRaises(ValueError, f.seek, 0, 14)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
444
445
    def test_range_syntax(self):
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
446
        """Test the Content-Range scanning."""
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
447
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
448
        f = response.RangeFile('foo', StringIO())
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
449
450
        def ok(expected, header_value):
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
451
            f.set_range_from_header(header_value)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
452
            # Slightly peek under the covers to get the size
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
453
            self.assertEquals(expected, (f.tell(), f._size))
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
454
455
        ok((1, 10), 'bytes 1-10/11')
456
        ok((1, 10), 'bytes 1-10/*')
457
        ok((12, 2), '\tbytes 12-13/*')
458
        ok((28, 1), '  bytes 28-28/*')
459
        ok((2123, 2120), 'bytes  2123-4242/12310')
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
460
        ok((1, 10), 'bytes 1-10/ttt') # We don't check total (ttt)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
461
462
        def nok(header_value):
463
            self.assertRaises(errors.InvalidHttpRange,
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
464
                              f.set_range_from_header, header_value)
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
465
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
466
        nok('bytes 10-2/3')
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
467
        nok('chars 1-2/3')
468
        nok('bytes xx-yyy/zzz')
469
        nok('bytes xx-12/zzz')
470
        nok('bytes 11-yy/zzz')
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
471
        nok('bytes10-2/3')
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
472
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
473
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
474
# Taken from real request responses
1786.1.26 by John Arbash Meinel
Update and test handle_response.
475
_full_text_response = (200, """HTTP/1.1 200 OK\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
476
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
477
Server: Apache/2.0.54 (Fedora)\r
478
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
479
ETag: "56691-23-38e9ae00"\r
480
Accept-Ranges: bytes\r
481
Content-Length: 35\r
482
Connection: close\r
483
Content-Type: text/plain; charset=UTF-8\r
484
\r
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
485
""", """Bazaar-NG meta directory, format 1
486
""")
487
488
1786.1.26 by John Arbash Meinel
Update and test handle_response.
489
_single_range_response = (206, """HTTP/1.1 206 Partial Content\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
490
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
491
Server: Apache/2.0.54 (Fedora)\r
492
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
493
ETag: "238a3c-16ec2-805c5540"\r
494
Accept-Ranges: bytes\r
495
Content-Length: 100\r
1786.1.26 by John Arbash Meinel
Update and test handle_response.
496
Content-Range: bytes 100-199/93890\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
497
Connection: close\r
498
Content-Type: text/plain; charset=UTF-8\r
499
\r
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
500
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
1786.1.26 by John Arbash Meinel
Update and test handle_response.
501
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
502
503
2070.1.1 by John Arbash Meinel
Fix bug #62473 by not requiring content-type in range responses
504
_single_range_no_content_type = (206, """HTTP/1.1 206 Partial Content\r
505
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
506
Server: Apache/2.0.54 (Fedora)\r
507
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
508
ETag: "238a3c-16ec2-805c5540"\r
509
Accept-Ranges: bytes\r
510
Content-Length: 100\r
511
Content-Range: bytes 100-199/93890\r
512
Connection: close\r
513
\r
514
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
515
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
516
517
1786.1.26 by John Arbash Meinel
Update and test handle_response.
518
_multipart_range_response = (206, """HTTP/1.1 206 Partial Content\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
519
Date: Tue, 11 Jul 2006 04:49:48 GMT\r
520
Server: Apache/2.0.54 (Fedora)\r
521
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
522
ETag: "238a3c-16ec2-805c5540"\r
523
Accept-Ranges: bytes\r
524
Content-Length: 1534\r
525
Connection: close\r
526
Content-Type: multipart/byteranges; boundary=418470f848b63279b\r
527
\r
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
528
\r""", """--418470f848b63279b\r
1786.1.21 by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers.
529
Content-type: text/plain; charset=UTF-8\r
530
Content-range: bytes 0-254/93890\r
531
\r
532
mbp@sourcefrog.net-20050309040815-13242001617e4a06
533
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e7627
534
mbp@sourcefrog.net-20050309040957-6cad07f466bb0bb8
535
mbp@sourcefrog.net-20050309041501-c840e09071de3b67
536
mbp@sourcefrog.net-20050309044615-c24a3250be83220a
537
\r
538
--418470f848b63279b\r
539
Content-type: text/plain; charset=UTF-8\r
540
Content-range: bytes 1000-2049/93890\r
541
\r
542
40-fd4ec249b6b139ab
543
mbp@sourcefrog.net-20050311063625-07858525021f270b
544
mbp@sourcefrog.net-20050311231934-aa3776aff5200bb9
545
mbp@sourcefrog.net-20050311231953-73aeb3a131c3699a
546
mbp@sourcefrog.net-20050311232353-f5e33da490872c6a
547
mbp@sourcefrog.net-20050312071639-0a8f59a34a024ff0
548
mbp@sourcefrog.net-20050312073432-b2c16a55e0d6e9fb
549
mbp@sourcefrog.net-20050312073831-a47c3335ece1920f
550
mbp@sourcefrog.net-20050312085412-13373aa129ccbad3
551
mbp@sourcefrog.net-20050313052251-2bf004cb96b39933
552
mbp@sourcefrog.net-20050313052856-3edd84094687cb11
553
mbp@sourcefrog.net-20050313053233-e30a4f28aef48f9d
554
mbp@sourcefrog.net-20050313053853-7c64085594ff3072
555
mbp@sourcefrog.net-20050313054757-a86c3f5871069e22
556
mbp@sourcefrog.net-20050313061422-418f1f73b94879b9
557
mbp@sourcefrog.net-20050313120651-497bd231b19df600
558
mbp@sourcefrog.net-20050314024931-eae0170ef25a5d1a
559
mbp@sourcefrog.net-20050314025438-d52099f915fe65fc
560
mbp@sourcefrog.net-20050314025539-637a636692c055cf
561
mbp@sourcefrog.net-20050314025737-55eb441f430ab4ba
562
mbp@sourcefrog.net-20050314025901-d74aa93bb7ee8f62
563
mbp@source\r
1979.1.1 by John Arbash Meinel
Fix bug #57723, parse boundary="" correctly, since Squid uses it
564
--418470f848b63279b--\r
565
""")
566
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
567
1979.1.1 by John Arbash Meinel
Fix bug #57723, parse boundary="" correctly, since Squid uses it
568
_multipart_squid_range_response = (206, """HTTP/1.0 206 Partial Content\r
569
Date: Thu, 31 Aug 2006 21:16:22 GMT\r
570
Server: Apache/2.2.2 (Unix) DAV/2\r
571
Last-Modified: Thu, 31 Aug 2006 17:57:06 GMT\r
572
Accept-Ranges: bytes\r
573
Content-Type: multipart/byteranges; boundary="squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196"\r
574
Content-Length: 598\r
575
X-Cache: MISS from localhost.localdomain\r
576
X-Cache-Lookup: HIT from localhost.localdomain:3128\r
577
Proxy-Connection: keep-alive\r
578
\r
579
""",
580
"""\r
581
--squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196\r
582
Content-Type: text/plain\r
583
Content-Range: bytes 0-99/18672\r
584
\r
585
# bzr knit index 8
586
587
scott@netsplit.com-20050708230047-47c7868f276b939f fulltext 0 863  :
588
scott@netsp\r
589
--squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196\r
590
Content-Type: text/plain\r
591
Content-Range: bytes 300-499/18672\r
592
\r
593
com-20050708231537-2b124b835395399a :
594
scott@netsplit.com-20050820234126-551311dbb7435b51 line-delta 1803 479 .scott@netsplit.com-20050820232911-dc4322a084eadf7e :
595
scott@netsplit.com-20050821213706-c86\r
596
--squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196--\r
1786.1.25 by John Arbash Meinel
Test that we can extract headers properly.
597
""")
598
599
1786.1.26 by John Arbash Meinel
Update and test handle_response.
600
# This is made up
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
601
_full_text_response_no_content_type = (200, """HTTP/1.1 200 OK\r
602
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
603
Server: Apache/2.0.54 (Fedora)\r
604
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
605
ETag: "56691-23-38e9ae00"\r
606
Accept-Ranges: bytes\r
607
Content-Length: 35\r
608
Connection: close\r
609
\r
610
""", """Bazaar-NG meta directory, format 1
611
""")
612
613
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
614
_full_text_response_no_content_length = (200, """HTTP/1.1 200 OK\r
615
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
616
Server: Apache/2.0.54 (Fedora)\r
617
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
618
ETag: "56691-23-38e9ae00"\r
619
Accept-Ranges: bytes\r
620
Connection: close\r
621
Content-Type: text/plain; charset=UTF-8\r
622
\r
623
""", """Bazaar-NG meta directory, format 1
624
""")
625
626
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
627
_single_range_no_content_range = (206, """HTTP/1.1 206 Partial Content\r
628
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
629
Server: Apache/2.0.54 (Fedora)\r
630
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
631
ETag: "238a3c-16ec2-805c5540"\r
632
Accept-Ranges: bytes\r
633
Content-Length: 100\r
634
Connection: close\r
635
\r
636
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
637
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
638
639
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
640
_single_range_response_truncated = (206, """HTTP/1.1 206 Partial Content\r
641
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
642
Server: Apache/2.0.54 (Fedora)\r
643
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
644
ETag: "238a3c-16ec2-805c5540"\r
645
Accept-Ranges: bytes\r
646
Content-Length: 100\r
647
Content-Range: bytes 100-199/93890\r
648
Connection: close\r
649
Content-Type: text/plain; charset=UTF-8\r
650
\r
651
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06""")
652
653
1786.1.26 by John Arbash Meinel
Update and test handle_response.
654
_invalid_response = (444, """HTTP/1.1 444 Bad Response\r
655
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
656
Connection: close\r
657
Content-Type: text/html; charset=iso-8859-1\r
658
\r
659
""", """<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
660
<html><head>
661
<title>404 Not Found</title>
662
</head><body>
663
<h1>Not Found</h1>
664
<p>I don't know what I'm doing</p>
665
<hr>
666
</body></html>
667
""")
668
669
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
670
_multipart_no_content_range = (206, """HTTP/1.0 206 Partial Content\r
671
Content-Type: multipart/byteranges; boundary=THIS_SEPARATES\r
672
Content-Length: 598\r
673
\r
674
""",
675
"""\r
676
--THIS_SEPARATES\r
677
Content-Type: text/plain\r
678
\r
679
# bzr knit index 8
680
--THIS_SEPARATES\r
681
""")
682
683
684
_multipart_no_boundary = (206, """HTTP/1.0 206 Partial Content\r
685
Content-Type: multipart/byteranges; boundary=THIS_SEPARATES\r
686
Content-Length: 598\r
687
\r
688
""",
689
"""\r
690
--THIS_SEPARATES\r
691
Content-Type: text/plain\r
692
Content-Range: bytes 0-18/18672\r
693
\r
694
# bzr knit index 8
695
696
The range ended at the line above, this text is garbage instead of a boundary
697
line
698
""")
699
700
3059.2.12 by Vincent Ladeuil
Spiv review feedback.
701
class TestHandleResponse(tests.TestCase):
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
702
703
    def _build_HTTPMessage(self, raw_headers):
704
        status_and_headers = StringIO(raw_headers)
3059.2.11 by Vincent Ladeuil
Fix typos mentioned by spiv.
705
        # Get rid of the status line
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
706
        status_and_headers.readline()
707
        msg = httplib.HTTPMessage(status_and_headers)
708
        return msg
709
1786.1.26 by John Arbash Meinel
Update and test handle_response.
710
    def get_response(self, a_response):
711
        """Process a supplied response, and return the result."""
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
712
        code, raw_headers, body = a_response
713
        msg = self._build_HTTPMessage(raw_headers)
714
        return response.handle_response('http://foo', code, msg,
1786.1.26 by John Arbash Meinel
Update and test handle_response.
715
                                        StringIO(a_response[2]))
716
717
    def test_full_text(self):
718
        out = self.get_response(_full_text_response)
719
        # It is a StringIO from the original data
720
        self.assertEqual(_full_text_response[2], out.read())
721
722
    def test_single_range(self):
723
        out = self.get_response(_single_range_response)
724
725
        out.seek(100)
726
        self.assertEqual(_single_range_response[2], out.read(100))
727
2070.1.1 by John Arbash Meinel
Fix bug #62473 by not requiring content-type in range responses
728
    def test_single_range_no_content(self):
729
        out = self.get_response(_single_range_no_content_type)
730
731
        out.seek(100)
732
        self.assertEqual(_single_range_no_content_type[2], out.read(100))
733
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
734
    def test_single_range_truncated(self):
735
        out = self.get_response(_single_range_response_truncated)
736
        # Content-Range declares 100 but only 51 present
737
        self.assertRaises(errors.ShortReadvError, out.seek, out.tell() + 51)
738
1786.1.26 by John Arbash Meinel
Update and test handle_response.
739
    def test_multi_range(self):
740
        out = self.get_response(_multipart_range_response)
741
742
        # Just make sure we can read the right contents
743
        out.seek(0)
744
        out.read(255)
745
746
        out.seek(1000)
747
        out.read(1050)
748
1979.1.1 by John Arbash Meinel
Fix bug #57723, parse boundary="" correctly, since Squid uses it
749
    def test_multi_squid_range(self):
750
        out = self.get_response(_multipart_squid_range_response)
751
752
        # Just make sure we can read the right contents
753
        out.seek(0)
754
        out.read(100)
755
756
        out.seek(300)
757
        out.read(200)
758
1786.1.26 by John Arbash Meinel
Update and test handle_response.
759
    def test_invalid_response(self):
760
        self.assertRaises(errors.InvalidHttpResponse,
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
761
                          self.get_response, _invalid_response)
1786.1.26 by John Arbash Meinel
Update and test handle_response.
762
763
    def test_full_text_no_content_type(self):
764
        # We should not require Content-Type for a full response
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
765
        code, raw_headers, body = _full_text_response_no_content_type
766
        msg = self._build_HTTPMessage(raw_headers)
767
        out = response.handle_response('http://foo', code, msg, StringIO(body))
768
        self.assertEqual(body, out.read())
1786.1.26 by John Arbash Meinel
Update and test handle_response.
769
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
770
    def test_full_text_no_content_length(self):
771
        code, raw_headers, body = _full_text_response_no_content_length
772
        msg = self._build_HTTPMessage(raw_headers)
773
        out = response.handle_response('http://foo', code, msg, StringIO(body))
774
        self.assertEqual(body, out.read())
775
1786.1.26 by John Arbash Meinel
Update and test handle_response.
776
    def test_missing_content_range(self):
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
777
        code, raw_headers, body = _single_range_no_content_range
778
        msg = self._build_HTTPMessage(raw_headers)
1786.1.26 by John Arbash Meinel
Update and test handle_response.
779
        self.assertRaises(errors.InvalidHttpResponse,
3059.2.2 by Vincent Ladeuil
Read http responses on demand without buffering the whole body
780
                          response.handle_response,
3059.2.14 by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a
781
                          'http://bogus', code, msg, StringIO(body))
782
783
    def test_multipart_no_content_range(self):
784
        code, raw_headers, body = _multipart_no_content_range
785
        msg = self._build_HTTPMessage(raw_headers)
786
        self.assertRaises(errors.InvalidHttpResponse,
787
                          response.handle_response,
788
                          'http://bogus', code, msg, StringIO(body))
789
790
    def test_multipart_no_boundary(self):
791
        out = self.get_response(_multipart_no_boundary)
792
        out.read()  # Read the whole range
793
        # Fail to find the boundary line
794
        self.assertRaises(errors.InvalidHttpResponse, out.seek, 1, 1)
3408.6.1 by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while
795
796
797
class TestRangeFileSizeReadLimited(tests.TestCase):
798
    """Test RangeFile _max_read_size functionality which limits the size of
799
    read blocks to prevent MemoryError messages in socket.recv.
800
    """
801
802
    def setUp(self):
4153.1.2 by Andrew Bennetts
Add missing TestCase.setUp upcalls.
803
        tests.TestCase.setUp(self)
3408.6.1 by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while
804
        # create a test datablock larger than _max_read_size.
805
        chunk_size = response.RangeFile._max_read_size
806
        test_pattern = '0123456789ABCDEF'
807
        self.test_data =  test_pattern * (3 * chunk_size / len(test_pattern))
808
        self.test_data_len = len(self.test_data)
809
810
    def test_max_read_size(self):
811
        """Read data in blocks and verify that the reads are not larger than
812
           the maximum read size.
813
        """
814
        # retrieve data in large blocks from response.RangeFile object
815
        mock_read_file = FakeReadFile(self.test_data)
816
        range_file = response.RangeFile('test_max_read_size', mock_read_file)
817
        response_data = range_file.read(self.test_data_len)
818
819
        # verify read size was equal to the maximum read size
820
        self.assertTrue(mock_read_file.get_max_read_size() > 0)
821
        self.assertEqual(mock_read_file.get_max_read_size(),
822
                         response.RangeFile._max_read_size)
823
        self.assertEqual(mock_read_file.get_read_count(), 3)
824
825
        # report error if the data wasn't equal (we only report the size due
826
        # to the length of the data)
827
        if response_data != self.test_data:
828
            message = "Data not equal.  Expected %d bytes, received %d."
829
            self.fail(message % (len(response_data), self.test_data_len))
830