1
# Copyright (C) 2005, 2006 by Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Tests from HTTP response parsing."""
19
from cStringIO import StringIO
22
from bzrlib import errors
23
from bzrlib.transport import http
24
from bzrlib.transport.http import response
25
from bzrlib.tests import TestCase
28
class TestResponseRange(TestCase):
29
"""Test the ResponseRange class."""
32
RR = response.ResponseRange
35
self.assertTrue(r1 < r2)
36
self.assertFalse(r1 > r2)
37
self.assertTrue(r1 < 5)
38
self.assertFalse(r2 < 5)
40
self.assertEqual(RR(0, 10, 5), RR(0, 10, 5))
41
self.assertNotEqual(RR(0, 10, 5), RR(0, 8, 5))
42
self.assertNotEqual(RR(0, 10, 5), RR(0, 10, 6))
44
def test_sort_list(self):
45
"""Ensure longer ranges are sorted after shorter ones"""
46
RR = response.ResponseRange
47
lst = [RR(3, 8, 0), 5, RR(3, 7, 0), 6]
49
self.assertEqual([RR(3,7,0), RR(3,8,0), 5, 6], lst)
52
class TestRangeFile(TestCase):
56
content = "abcdefghijklmnopqrstuvwxyz"
57
self.fp = response.RangeFile('foo', StringIO(content))
58
self.fp._add_range(0, 9, 0)
59
self.fp._add_range(20, 29, 10)
60
self.fp._add_range(30, 39, 15)
62
def test_valid_accesses(self):
63
"""Test so that valid accesses work to the file."""
65
self.assertEquals(self.fp.read(3), 'abc')
66
self.assertEquals(self.fp.read(3), 'def')
67
self.assertEquals(self.fp.tell(), 6)
69
self.assertEquals(self.fp.read(3), 'klm')
70
self.assertEquals(self.fp.read(2), 'no')
71
self.assertEquals(self.fp.tell(), 25)
72
# should wrap over to 30-39 entity
73
self.assertEquals(self.fp.read(3), 'pqr')
75
self.assertEquals(self.fp.read(3), 'def')
76
self.assertEquals(self.fp.tell(), 6)
78
def test_invalid_accesses(self):
79
"""Test so that invalid accesses trigger errors."""
81
self.assertRaises(errors.InvalidRange, self.fp.read, 2)
83
self.assertRaises(errors.InvalidRange, self.fp.read, 2)
85
self.assertRaises(errors.InvalidRange, self.fp.read, 2)
87
def test__finish_ranges(self):
88
"""Test that after RangeFile._finish_ranges the list is sorted."""
89
self.fp._add_range(1, 2, 3)
90
self.fp._add_range(8, 9, 10)
91
self.fp._add_range(3, 4, 5)
93
# TODO: jam 20060706 If we switch to inserting
94
# in sorted order, remove this test
95
self.assertNotEqual(self.fp._ranges, sorted(self.fp._ranges))
97
self.fp._finish_ranges()
98
self.assertEqual(self.fp._ranges, sorted(self.fp._ranges))
100
def test_seek_and_tell(self):
101
# Check for seeking before start
103
self.assertEqual(0, self.fp.tell())
106
self.assertEqual(5, self.fp.tell())
109
self.assertEqual(3, self.fp.tell())
111
# TODO: jam 20060706 following tests will fail if this
112
# is not true, and would be difficult to debug
113
# but it is a layering violation
114
self.assertEqual(39, self.fp._len)
117
self.assertEqual(39, self.fp.tell())
120
self.assertEqual(29, self.fp.tell())
122
self.assertRaises(ValueError, self.fp.seek, 0, 4)
123
self.assertRaises(ValueError, self.fp.seek, 0, -1)
126
class TestRegexes(TestCase):
128
def assertRegexMatches(self, groups, text):
129
"""Check that the regex matches and returns the right values"""
130
m = self.regex.match(text)
131
self.assertNotEqual(None, m, "text %s did not match regex" % (text,))
133
self.assertEqual(groups, m.groups())
135
def test_range_re(self):
136
"""Test that we match valid ranges."""
137
self.regex = response.HttpRangeResponse._CONTENT_RANGE_RE
138
self.assertRegexMatches(('bytes', '1', '10', '11'),
140
self.assertRegexMatches(('bytes', '1', '10', '11'),
142
self.assertRegexMatches(('bytes', '2123', '4242', '1231'),
143
'\tbytes 2123-4242/1231 ')
144
self.assertRegexMatches(('chars', '1', '2', '3'),
147
def test_content_type_re(self):
148
self.regex = response.HttpMultipartRangeResponse._CONTENT_TYPE_RE
149
self.assertRegexMatches(('xxyyzz',),
150
'multipart/byteranges; boundary = xxyyzz')
151
self.assertRegexMatches(('xxyyzz',),
152
'multipart/byteranges;boundary=xxyyzz')
153
self.assertRegexMatches(('xx yy zz',),
154
' multipart/byteranges ; boundary= xx yy zz ')
155
self.assertEqual(None,
156
self.regex.match('multipart byteranges;boundary=xx'))
162
Content-range: bytes 1-10/20\r
166
Content-Range: bytes 21-30/20\r
171
content-range: bytes 41-50/20\r
175
content-range: bytes 51-60/20\r
181
class TestHelpers(TestCase):
182
"""Test the helper functions"""
184
def test__parse_range(self):
185
"""Test that _parse_range acts reasonably."""
186
content = StringIO('')
187
parse_range = response.HttpRangeResponse._parse_range
188
self.assertEqual((1,2), parse_range('bytes 1-2/3'))
189
self.assertEqual((10,20), parse_range('bytes 10-20/2'))
191
self.assertRaises(errors.InvalidHttpRange, parse_range, 'char 1-3/2')
192
self.assertRaises(errors.InvalidHttpRange, parse_range, 'bytes a-3/2')
195
parse_range('bytes x-10/3', path='http://foo/bar')
196
except errors.InvalidHttpRange, e:
197
self.assertContainsRe(str(e), 'http://foo/bar')
198
self.assertContainsRe(str(e), 'bytes x-10/3')
200
self.fail('Did not raise InvalidHttpRange')
202
def test__parse_boundary_simple(self):
203
"""Test that _parse_boundary handles Content-type properly"""
204
parse_boundary = response.HttpMultipartRangeResponse._parse_boundary
205
m = parse_boundary(' multipart/byteranges; boundary=xxyyzz')
206
self.assertNotEqual(None, m)
207
# Check that the returned regex is capable of splitting simple_data
208
matches = list(m.finditer(simple_data))
209
self.assertEqual(4, len(matches))
211
# match.group() should be the content-range entry
212
# and match.end() should be the start of the content
213
self.assertEqual(' bytes 1-10/20', matches[0].group(1))
214
self.assertEqual(simple_data.find('1234567890'), matches[0].end())
215
self.assertEqual(' bytes 21-30/20', matches[1].group(1))
216
self.assertEqual(simple_data.find('abcdefghij'), matches[1].end())
217
self.assertEqual(' bytes 41-50/20', matches[2].group(1))
218
self.assertEqual(simple_data.find('zyxwvutsrq'), matches[2].end())
219
self.assertEqual(' bytes 51-60/20', matches[3].group(1))
220
self.assertEqual(simple_data.find('xxyyzz fbd'), matches[3].end())
222
def test__parse_boundary_invalid(self):
223
parse_boundary = response.HttpMultipartRangeResponse._parse_boundary
225
parse_boundary(' multipart/bytes;boundary=xxyyzz',
226
path='http://foo/bar')
227
except errors.InvalidHttpContentType, e:
228
self.assertContainsRe(str(e), 'http://foo/bar')
229
self.assertContainsRe(str(e), 'multipart/bytes;boundary=xxyyzz')
231
self.fail('Did not raise InvalidHttpContentType')
234
class TestHttpRangeResponse(TestCase):
236
def test_smoketest(self):
237
"""A basic test that HttpRangeResponse is reasonable."""
238
content = StringIO('0123456789')
239
f = response.HttpRangeResponse('http://foo', 'bytes 1-10/9', content)
240
self.assertEqual([response.ResponseRange(1,10,0)], f._ranges)
243
self.assertRaises(errors.InvalidRange, f.read, 2)
245
self.assertEqual('012345', f.read(6))
247
def test_invalid(self):
249
f = response.HttpRangeResponse('http://foo', 'bytes x-10/9',
250
StringIO('0123456789'))
251
except errors.InvalidHttpRange, e:
252
self.assertContainsRe(str(e), 'http://foo')
253
self.assertContainsRe(str(e), 'bytes x-10/9')
255
self.fail('Failed to raise InvalidHttpRange')
258
class TestHttpMultipartRangeResponse(TestCase):
259
"""Test the handling of multipart range responses"""
261
def test_simple(self):
262
content = StringIO(simple_data)
263
multi = response.HttpMultipartRangeResponse('http://foo',
264
'multipart/byteranges; boundary = xxyyzz', content)
266
self.assertEqual(4, len(multi._ranges))
269
self.assertEqual('1234567890', multi.read(10))
271
self.assertEqual('abcdefghij', multi.read(10))
273
self.assertEqual('zyxwvutsrq', multi.read(10))
275
self.assertEqual('xxyyzz fbd', multi.read(10))
276
# TODO: jam 20060706 Currently RangeFile does not support
277
# reading across ranges. Consider adding it.
279
# self.assertEqual('zyxwvutsrqxxyyzz fbd', multi.read(20))
280
self.assertRaises(errors.InvalidRange, multi.read, 20)
283
self.assertRaises(errors.InvalidRange, multi.read, 11)
285
self.assertRaises(errors.InvalidRange, multi.read, 10)
287
def test_invalid(self):
288
content = StringIO('')
290
response.HttpMultipartRangeResponse('http://foo',
291
'multipart/byte;boundary=invalid', content)
292
except errors.InvalidHttpContentType, e:
293
self.assertContainsRe(str(e), 'http://foo')
294
self.assertContainsRe(str(e), 'multipart/byte;')
297
# Taken from real request responses
298
_full_text_response = (200, """HTTP/1.1 200 OK\r
299
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
300
Server: Apache/2.0.54 (Fedora)\r
301
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
302
ETag: "56691-23-38e9ae00"\r
303
Accept-Ranges: bytes\r
306
Content-Type: text/plain; charset=UTF-8\r
308
""", """Bazaar-NG meta directory, format 1
312
_missing_response = (404, """HTTP/1.1 404 Not Found\r
313
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
314
Server: Apache/2.0.54 (Fedora)\r
315
Content-Length: 336\r
317
Content-Type: text/html; charset=iso-8859-1\r
319
""", """<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
321
<title>404 Not Found</title>
324
<p>The requested URL /branches/bzr/jam-integration/.bzr/repository/format was not found on this server.</p>
326
<address>Apache/2.0.54 (Fedora) Server at bzr.arbash-meinel.com Port 80</address>
331
_single_range_response = (206, """HTTP/1.1 206 Partial Content\r
332
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
333
Server: Apache/2.0.54 (Fedora)\r
334
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
335
ETag: "238a3c-16ec2-805c5540"\r
336
Accept-Ranges: bytes\r
337
Content-Length: 100\r
338
Content-Range: bytes 100-199/93890\r
340
Content-Type: text/plain; charset=UTF-8\r
342
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
343
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")
346
_multipart_range_response = (206, """HTTP/1.1 206 Partial Content\r
347
Date: Tue, 11 Jul 2006 04:49:48 GMT\r
348
Server: Apache/2.0.54 (Fedora)\r
349
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
350
ETag: "238a3c-16ec2-805c5540"\r
351
Accept-Ranges: bytes\r
352
Content-Length: 1534\r
354
Content-Type: multipart/byteranges; boundary=418470f848b63279b\r
356
\r""", """--418470f848b63279b\r
357
Content-type: text/plain; charset=UTF-8\r
358
Content-range: bytes 0-254/93890\r
360
mbp@sourcefrog.net-20050309040815-13242001617e4a06
361
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e7627
362
mbp@sourcefrog.net-20050309040957-6cad07f466bb0bb8
363
mbp@sourcefrog.net-20050309041501-c840e09071de3b67
364
mbp@sourcefrog.net-20050309044615-c24a3250be83220a
366
--418470f848b63279b\r
367
Content-type: text/plain; charset=UTF-8\r
368
Content-range: bytes 1000-2049/93890\r
371
mbp@sourcefrog.net-20050311063625-07858525021f270b
372
mbp@sourcefrog.net-20050311231934-aa3776aff5200bb9
373
mbp@sourcefrog.net-20050311231953-73aeb3a131c3699a
374
mbp@sourcefrog.net-20050311232353-f5e33da490872c6a
375
mbp@sourcefrog.net-20050312071639-0a8f59a34a024ff0
376
mbp@sourcefrog.net-20050312073432-b2c16a55e0d6e9fb
377
mbp@sourcefrog.net-20050312073831-a47c3335ece1920f
378
mbp@sourcefrog.net-20050312085412-13373aa129ccbad3
379
mbp@sourcefrog.net-20050313052251-2bf004cb96b39933
380
mbp@sourcefrog.net-20050313052856-3edd84094687cb11
381
mbp@sourcefrog.net-20050313053233-e30a4f28aef48f9d
382
mbp@sourcefrog.net-20050313053853-7c64085594ff3072
383
mbp@sourcefrog.net-20050313054757-a86c3f5871069e22
384
mbp@sourcefrog.net-20050313061422-418f1f73b94879b9
385
mbp@sourcefrog.net-20050313120651-497bd231b19df600
386
mbp@sourcefrog.net-20050314024931-eae0170ef25a5d1a
387
mbp@sourcefrog.net-20050314025438-d52099f915fe65fc
388
mbp@sourcefrog.net-20050314025539-637a636692c055cf
389
mbp@sourcefrog.net-20050314025737-55eb441f430ab4ba
390
mbp@sourcefrog.net-20050314025901-d74aa93bb7ee8f62
392
--418470f848b63279b--\r\n'
396
_redirect_response = (206, """HTTP/1.1 301 Moved Permanently\r
397
Date: Tue, 18 Jul 2006 20:29:22 GMT\r
398
Server: Apache/2.0.54 (Ubuntu) PHP/4.4.0-3ubuntu1 mod_ssl/2.0.54 OpenSSL/0.9.7g\r
399
Location: http://bazaar-vcs.org/bzr/bzr.dev/.bzr/repository/inventory.knit\r
400
Content-Length: 272\r
401
Keep-Alive: timeout=15, max=100\r
402
Connection: Keep-Alive\r
403
Content-Type: text/html; charset=iso-8859-1\r
405
HTTP/1.1 206 Partial Content\r
406
Date: Tue, 18 Jul 2006 20:29:23 GMT\r
407
Server: Apache/2.0.54 (Ubuntu) PHP/4.4.0-3ubuntu1 mod_ssl/2.0.54 OpenSSL/0.9.7g\r
408
Last-Modified: Tue, 18 Jul 2006 20:24:59 GMT\r
409
ETag: "be8213-83958c-f0d3dcc0"\r
410
Accept-Ranges: bytes\r
411
Content-Length: 425\r
412
Content-Range: bytes 8623075-8623499/8623500\r
413
Keep-Alive: timeout=15, max=100\r
414
Connection: Keep-Alive\r
415
Content-Type: text/plain; charset=UTF-8\r
417
""", """this data intentionally removed,
418
this is not meant to be tested by
419
handle_response, just _extract_headers
424
_invalid_response = (444, """HTTP/1.1 444 Bad Response\r
425
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
427
Content-Type: text/html; charset=iso-8859-1\r
429
""", """<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
431
<title>404 Not Found</title>
434
<p>I don't know what I'm doing</p>
440
# This should be in test_http.py, but the headers we
441
# want to parse are here
442
class TestExtractHeader(TestCase):
444
def use_response(self, response):
445
self.headers = http._extract_headers(response[1], 'http://foo')
447
def check_header(self, header, value):
448
self.assertEqual(value, self.headers[header])
450
def test_full_text(self):
451
self.use_response(_full_text_response)
453
self.check_header('Date', 'Tue, 11 Jul 2006 04:32:56 GMT')
454
self.check_header('date', 'Tue, 11 Jul 2006 04:32:56 GMT')
455
self.check_header('Content-Length', '35')
456
self.check_header('Content-Type', 'text/plain; charset=UTF-8')
457
self.check_header('content-type', 'text/plain; charset=UTF-8')
459
def test_missing_response(self):
460
self.use_response(_missing_response)
462
self.check_header('Content-Length', '336')
463
self.check_header('Content-Type', 'text/html; charset=iso-8859-1')
465
def test_single_range(self):
466
self.use_response(_single_range_response)
468
self.check_header('Content-Length', '100')
469
self.check_header('Content-Range', 'bytes 100-199/93890')
470
self.check_header('Content-Type', 'text/plain; charset=UTF-8')
472
def test_multi_range(self):
473
self.use_response(_multipart_range_response)
475
self.check_header('Content-Length', '1534')
476
self.check_header('Content-Type',
477
'multipart/byteranges; boundary=418470f848b63279b')
479
def test_redirect(self):
480
"""We default to returning the last group of headers in the file."""
481
self.use_response(_redirect_response)
482
self.check_header('Content-Range', 'bytes 8623075-8623499/8623500')
483
self.check_header('Content-Type', 'text/plain; charset=UTF-8')
485
def test_empty(self):
486
self.assertRaises(errors.InvalidHttpResponse,
487
http._extract_headers, '', 'bad url')
489
def test_no_opening_http(self):
490
# Remove the HTTP line from the header
491
first, txt = _full_text_response[1].split('\r\n', 1)
492
self.assertRaises(errors.InvalidHttpResponse,
493
http._extract_headers, txt, 'missing HTTTP')
495
def test_trailing_whitespace(self):
496
# Test that we ignore bogus whitespace on the end
497
code, txt, body = _full_text_response
498
txt += '\r\n\n\n\n\n'
499
self.use_response((code, txt, body))
501
self.check_header('Date', 'Tue, 11 Jul 2006 04:32:56 GMT')
502
self.check_header('Content-Length', '35')
503
self.check_header('Content-Type', 'text/plain; charset=UTF-8')
505
def test_trailing_non_http(self):
506
# Test that we ignore bogus stuff on the end
507
code, txt, body = _full_text_response
508
txt = txt + 'Foo: Bar\r\nBaz: Bling\r\n\r\n'
509
self.use_response((code, txt, body))
511
self.check_header('Date', 'Tue, 11 Jul 2006 04:32:56 GMT')
512
self.check_header('Content-Length', '35')
513
self.check_header('Content-Type', 'text/plain; charset=UTF-8')
514
self.assertRaises(KeyError, self.headers.__getitem__, 'Foo')
516
def test_extra_whitespace(self):
517
# Test that we read an HTTP response, even with extra whitespace
518
code, txt, body = _redirect_response
519
# Find the second HTTP location
520
loc = txt.find('HTTP', 5)
521
txt = txt[:loc] + '\r\n\n' + txt[loc:]
522
self.use_response((code, txt, body))
523
self.check_header('Content-Range', 'bytes 8623075-8623499/8623500')
524
self.check_header('Content-Type', 'text/plain; charset=UTF-8')
527
class TestHandleResponse(TestCase):
529
def get_response(self, a_response):
530
"""Process a supplied response, and return the result."""
531
headers = http._extract_headers(a_response[1], 'http://foo')
532
return response.handle_response('http://foo', a_response[0], headers,
533
StringIO(a_response[2]))
535
def test_full_text(self):
536
out = self.get_response(_full_text_response)
537
# It is a StringIO from the original data
538
self.assertEqual(_full_text_response[2], out.read())
540
def test_missing_response(self):
541
self.assertRaises(errors.NoSuchFile,
542
self.get_response, _missing_response)
544
def test_single_range(self):
545
out = self.get_response(_single_range_response)
546
self.assertIsInstance(out, response.HttpRangeResponse)
548
self.assertRaises(errors.InvalidRange, out.read, 20)
551
self.assertEqual(_single_range_response[2], out.read(100))
553
def test_multi_range(self):
554
out = self.get_response(_multipart_range_response)
555
self.assertIsInstance(out, response.HttpMultipartRangeResponse)
557
# Just make sure we can read the right contents
564
def test_invalid_response(self):
565
self.assertRaises(errors.InvalidHttpResponse,
566
self.get_response, _invalid_response)
568
def test_full_text_no_content_type(self):
569
# We should not require Content-Type for a full response
570
a_response = _full_text_response
571
headers = http._extract_headers(a_response[1], 'http://foo')
572
del headers['Content-Type']
573
out = response.handle_response('http://foo', a_response[0], headers,
574
StringIO(a_response[2]))
575
self.assertEqual(_full_text_response[2], out.read())
577
def test_missing_no_content_type(self):
578
# Without Content-Type we should still raise NoSuchFile on a 404
579
a_response = _missing_response
580
headers = http._extract_headers(a_response[1], 'http://missing')
581
del headers['Content-Type']
582
self.assertRaises(errors.NoSuchFile,
583
response.handle_response, 'http://missing', a_response[0], headers,
584
StringIO(a_response[2]))
586
def test_missing_content_type(self):
587
a_response = _single_range_response
588
headers = http._extract_headers(a_response[1], 'http://nocontent')
589
del headers['Content-Type']
590
self.assertRaises(errors.InvalidHttpContentType,
591
response.handle_response, 'http://nocontent', a_response[0],
592
headers, StringIO(a_response[2]))
594
def test_missing_content_range(self):
595
a_response = _single_range_response
596
headers = http._extract_headers(a_response[1], 'http://nocontent')
597
del headers['Content-Range']
598
self.assertRaises(errors.InvalidHttpResponse,
599
response.handle_response, 'http://nocontent', a_response[0],
600
headers, StringIO(a_response[2]))