~bzr-pqm/bzr/bzr.dev

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
# Copyright (C) 2005, 2006, 2007 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

"""Tests from HTTP response parsing.

The handle_response method read the response body of a GET request an returns
the corresponding RangeFile.

There are four different kinds of RangeFile:
- a whole file whose size is unknown, seen as a simple byte stream,
- a whole file whose size is known, we can't read past its end,
- a single range file, a part of a file with a start and a size,
- a multiple range file, several consecutive parts with known start offset
  and size.

Some properties are common to all kinds:
- seek can only be forward (its really a socket underneath),
- read can't cross ranges,
- successive ranges are taken into account transparently,

- the expected pattern of use is either seek(offset)+read(size) or a single
  read with no size specified. For multiple range files, multiple read() will
  return the corresponding ranges, trying to read further will raise
  InvalidHttpResponse.
"""

from cStringIO import StringIO
import httplib

from bzrlib import (
    errors,
    tests,
    )
from bzrlib.transport.http import (
    response,
    _urllib2_wrappers,
    )
from bzrlib.tests.file_utils import (
    FakeReadFile,
    )


class ReadSocket(object):
    """A socket-like object that can be given a predefined content."""

    def __init__(self, data):
        self.readfile = StringIO(data)

    def makefile(self, mode='r', bufsize=None):
        return self.readfile


class FakeHTTPConnection(_urllib2_wrappers.HTTPConnection):

    def __init__(self, sock):
        _urllib2_wrappers.HTTPConnection.__init__(self, 'localhost')
        # Set the socket to bypass the connection
        self.sock = sock

    def send(self, str):
        """Ignores the writes on the socket."""
        pass


class TestHTTPConnection(tests.TestCase):

    def test_cleanup_pipe(self):
        sock = ReadSocket("""HTTP/1.1 200 OK\r
Content-Type: text/plain; charset=UTF-8\r
Content-Length: 18
\r
0123456789
garbage""")
        conn = FakeHTTPConnection(sock)
        # Simulate the request sending so that the connection will be able to
        # read the response.
        conn.putrequest('GET', 'http://localhost/fictious')
        conn.endheaders()
        # Now, get the response
        resp = conn.getresponse()
        # Read part of the response
        self.assertEquals('0123456789\n', resp.read(11))
        # Override the thresold to force the warning emission
        conn._range_warning_thresold = 6 # There are 7 bytes pending
        conn.cleanup_pipe()
        self.assertContainsRe(self._get_log(keep_log_file=True),
                              'Got a 200 response when asking')


class TestRangeFileMixin(object):
    """Tests for accessing the first range in a RangeFile."""

    # A simple string used to represent a file part (also called a range), in
    # which offsets are easy to calculate for test writers. It's used as a
    # building block with slight variations but basically 'a' is the first char
    # of the range and 'z' is the last.
    alpha = 'abcdefghijklmnopqrstuvwxyz'

    def test_can_read_at_first_access(self):
        """Test that the just created file can be read."""
        self.assertEquals(self.alpha, self._file.read())

    def test_seek_read(self):
        """Test seek/read inside the range."""
        f = self._file
        start = self.first_range_start
        # Before any use, tell() should be at the range start
        self.assertEquals(start, f.tell())
        cur = start # For an overall offset assertion
        f.seek(start + 3)
        cur += 3
        self.assertEquals('def', f.read(3))
        cur += len('def')
        f.seek(4, 1)
        cur += 4
        self.assertEquals('klmn', f.read(4))
        cur += len('klmn')
        # read(0) in the middle of a range
        self.assertEquals('', f.read(0))
        # seek in place
        here = f.tell()
        f.seek(0, 1)
        self.assertEquals(here, f.tell())
        self.assertEquals(cur, f.tell())

    def test_read_zero(self):
        f = self._file
        start = self.first_range_start
        self.assertEquals('', f.read(0))
        f.seek(10, 1)
        self.assertEquals('', f.read(0))

    def test_seek_at_range_end(self):
        f = self._file
        f.seek(26, 1)

    def test_read_at_range_end(self):
        """Test read behaviour at range end."""
        f = self._file
        self.assertEquals(self.alpha, f.read())
        self.assertEquals('', f.read(0))
        self.assertRaises(errors.InvalidRange, f.read, 1)

    def test_unbounded_read_after_seek(self):
        f = self._file
        f.seek(24, 1)
        # Should not cross ranges
        self.assertEquals('yz', f.read())

    def test_seek_backwards(self):
        f = self._file
        start = self.first_range_start
        f.seek(start)
        f.read(12)
        self.assertRaises(errors.InvalidRange, f.seek, start + 5)

    def test_seek_outside_single_range(self):
        f = self._file
        if f._size == -1 or f._boundary is not None:
            raise tests.TestNotApplicable('Needs a fully defined range')
        # Will seek past the range and then errors out
        self.assertRaises(errors.InvalidRange,
                          f.seek, self.first_range_start + 27)

    def test_read_past_end_of_range(self):
        f = self._file
        if f._size == -1:
            raise tests.TestNotApplicable("Can't check an unknown size")
        start = self.first_range_start
        f.seek(start + 20)
        self.assertRaises(errors.InvalidRange, f.read, 10)

    def test_seek_from_end(self):
       """Test seeking from the end of the file.

       The semantic is unclear in case of multiple ranges. Seeking from end
       exists only for the http transports, cannot be used if the file size is
       unknown and is not used in bzrlib itself. This test must be (and is)
       overridden by daughter classes.

       Reading from end makes sense only when a range has been requested from
       the end of the file (see HttpTransportBase._get() when using the
       'tail_amount' parameter). The HTTP response can only be a whole file or
       a single range.
       """
       f = self._file
       f.seek(-2, 2)
       self.assertEquals('yz', f.read())


class TestRangeFileSizeUnknown(tests.TestCase, TestRangeFileMixin):
    """Test a RangeFile for a whole file whose size is not known."""

    def setUp(self):
        super(TestRangeFileSizeUnknown, self).setUp()
        self._file = response.RangeFile('Whole_file_size_known',
                                        StringIO(self.alpha))
        # We define no range, relying on RangeFile to provide default values
        self.first_range_start = 0 # It's the whole file

    def test_seek_from_end(self):
        """See TestRangeFileMixin.test_seek_from_end.

        The end of the file can't be determined since the size is unknown.
        """
        self.assertRaises(errors.InvalidRange, self._file.seek, -1, 2)

    def test_read_at_range_end(self):
        """Test read behaviour at range end."""
        f = self._file
        self.assertEquals(self.alpha, f.read())
        self.assertEquals('', f.read(0))
        self.assertEquals('', f.read(1))


class TestRangeFileSizeKnown(tests.TestCase, TestRangeFileMixin):
    """Test a RangeFile for a whole file whose size is known."""

    def setUp(self):
        super(TestRangeFileSizeKnown, self).setUp()
        self._file = response.RangeFile('Whole_file_size_known',
                                        StringIO(self.alpha))
        self._file.set_range(0, len(self.alpha))
        self.first_range_start = 0 # It's the whole file


class TestRangeFileSingleRange(tests.TestCase, TestRangeFileMixin):
    """Test a RangeFile for a single range."""

    def setUp(self):
        super(TestRangeFileSingleRange, self).setUp()
        self._file = response.RangeFile('Single_range_file',
                                        StringIO(self.alpha))
        self.first_range_start = 15
        self._file.set_range(self.first_range_start, len(self.alpha))


    def test_read_before_range(self):
        # This can't occur under normal circumstances, we have to force it
        f = self._file
        f._pos = 0 # Force an invalid pos
        self.assertRaises(errors.InvalidRange, f.read, 2)


class TestRangeFileMultipleRanges(tests.TestCase, TestRangeFileMixin):
    """Test a RangeFile for multiple ranges.

    The RangeFile used for the tests contains three ranges:

    - at offset 25: alpha
    - at offset 100: alpha
    - at offset 126: alpha.upper()

    The two last ranges are contiguous. This only rarely occurs (should not in
    fact) in real uses but may lead to hard to track bugs.
    """

    # The following is used to represent the boundary paramter defined
    # in HTTP response headers and the boundary lines that separate
    # multipart content.

    boundary = "separation"

    def setUp(self):
        super(TestRangeFileMultipleRanges, self).setUp()

        boundary = self.boundary

        content = ''
        self.first_range_start = 25
        file_size = 200 # big enough to encompass all ranges
        for (start, part) in [(self.first_range_start, self.alpha),
                              # Two contiguous ranges
                              (100, self.alpha),
                              (126, self.alpha.upper())]:
            content += self._multipart_byterange(part, start, boundary,
                                                 file_size)
        # Final boundary
        content += self._boundary_line()

        self._file = response.RangeFile('Multiple_ranges_file',
                                        StringIO(content))
        self.set_file_boundary()

    def _boundary_line(self):
        """Helper to build the formatted boundary line."""
        return '--' + self.boundary + '\r\n'

    def set_file_boundary(self):
        # Ranges are set by decoding the range headers, the RangeFile user is
        # supposed to call the following before using seek or read since it
        # requires knowing the *response* headers (in that case the boundary
        # which is part of the Content-Type header).
        self._file.set_boundary(self.boundary)

    def _multipart_byterange(self, data, offset, boundary, file_size='*'):
        """Encode a part of a file as a multipart/byterange MIME type.

        When a range request is issued, the HTTP response body can be
        decomposed in parts, each one representing a range (start, size) in a
        file.

        :param data: The payload.
        :param offset: where data starts in the file
        :param boundary: used to separate the parts
        :param file_size: the size of the file containing the range (default to
            '*' meaning unknown)

        :return: a string containing the data encoded as it will appear in the
            HTTP response body.
        """
        bline = self._boundary_line()
        # Each range begins with a boundary line
        range = bline
        # A range is described by a set of headers, but only 'Content-Range' is
        # required for our implementation (TestHandleResponse below will
        # exercise ranges with multiple or missing headers')
        range += 'Content-Range: bytes %d-%d/%d\r\n' % (offset,
                                                        offset+len(data)-1,
                                                        file_size)
        range += '\r\n'
        # Finally the raw bytes
        range += data
        return range

    def test_read_all_ranges(self):
        f = self._file
        self.assertEquals(self.alpha, f.read()) # Read first range
        f.seek(100) # Trigger the second range recognition
        self.assertEquals(self.alpha, f.read()) # Read second range
        self.assertEquals(126, f.tell())
        f.seek(126) # Start of third range which is also the current pos !
        self.assertEquals('A', f.read(1))
        f.seek(10, 1)
        self.assertEquals('LMN', f.read(3))

    def test_seek_from_end(self):
        """See TestRangeFileMixin.test_seek_from_end."""
        # The actual implementation will seek from end for the first range only
        # and then fail. Since seeking from end is intended to be used for a
        # single range only anyway, this test just document the actual
        # behaviour.
        f = self._file
        f.seek(-2, 2)
        self.assertEquals('yz', f.read())
        self.assertRaises(errors.InvalidRange, f.seek, -2, 2)

    def test_seek_into_void(self):
        f = self._file
        start = self.first_range_start
        f.seek(start)
        # Seeking to a point between two ranges is possible (only once) but
        # reading there is forbidden
        f.seek(start + 40)
        # We crossed a range boundary, so now the file is positioned at the
        # start of the new range (i.e. trying to seek below 100 will error out)
        f.seek(100)
        f.seek(125)

    def test_seek_across_ranges(self):
        f = self._file
        start = self.first_range_start
        f.seek(126) # skip the two first ranges
        self.assertEquals('AB', f.read(2))

    def test_checked_read_dont_overflow_buffers(self):
        f = self._file
        start = self.first_range_start
        # We force a very low value to exercise all code paths in _checked_read
        f._discarded_buf_size = 8
        f.seek(126) # skip the two first ranges
        self.assertEquals('AB', f.read(2))

    def test_seek_twice_between_ranges(self):
        f = self._file
        start = self.first_range_start
        f.seek(start + 40) # Past the first range but before the second
        # Now the file is positioned at the second range start (100)
        self.assertRaises(errors.InvalidRange, f.seek, start + 41)

    def test_seek_at_range_end(self):
        """Test seek behavior at range end."""
        f = self._file
        f.seek(25 + 25)
        f.seek(100 + 25)
        f.seek(126 + 25)

    def test_read_at_range_end(self):
        f = self._file
        self.assertEquals(self.alpha, f.read())
        self.assertEquals(self.alpha, f.read())
        self.assertEquals(self.alpha.upper(), f.read())
        self.assertRaises(errors.InvalidHttpResponse, f.read, 1)


class TestRangeFileMultipleRangesQuotedBoundaries(TestRangeFileMultipleRanges):
    """Perform the same tests as TestRangeFileMultipleRanges, but uses
    an angle-bracket quoted boundary string like IIS 6.0 and 7.0
    (but not IIS 5, which breaks the RFC in a different way
    by using square brackets, not angle brackets)

    This reveals a bug caused by

    - The bad implementation of RFC 822 unquoting in Python (angles are not
      quotes), coupled with

    - The bad implementation of RFC 2046 in IIS (angles are not permitted chars
      in boundary lines).

    """
    # The boundary as it appears in boundary lines
    # IIS 6 and 7 use this value
    _boundary_trimmed = "q1w2e3r4t5y6u7i8o9p0zaxscdvfbgnhmjklkl"
    boundary = '<' + _boundary_trimmed + '>'

    def set_file_boundary(self):
        # Emulate broken rfc822.unquote() here by removing angles
        self._file.set_boundary(self._boundary_trimmed)


class TestRangeFileVarious(tests.TestCase):
    """Tests RangeFile aspects not covered elsewhere."""

    def test_seek_whence(self):
        """Test the seek whence parameter values."""
        f = response.RangeFile('foo', StringIO('abc'))
        f.set_range(0, 3)
        f.seek(0)
        f.seek(1, 1)
        f.seek(-1, 2)
        self.assertRaises(ValueError, f.seek, 0, 14)

    def test_range_syntax(self):
        """Test the Content-Range scanning."""

        f = response.RangeFile('foo', StringIO())

        def ok(expected, header_value):
            f.set_range_from_header(header_value)
            # Slightly peek under the covers to get the size
            self.assertEquals(expected, (f.tell(), f._size))

        ok((1, 10), 'bytes 1-10/11')
        ok((1, 10), 'bytes 1-10/*')
        ok((12, 2), '\tbytes 12-13/*')
        ok((28, 1), '  bytes 28-28/*')
        ok((2123, 2120), 'bytes  2123-4242/12310')
        ok((1, 10), 'bytes 1-10/ttt') # We don't check total (ttt)

        def nok(header_value):
            self.assertRaises(errors.InvalidHttpRange,
                              f.set_range_from_header, header_value)

        nok('bytes 10-2/3')
        nok('chars 1-2/3')
        nok('bytes xx-yyy/zzz')
        nok('bytes xx-12/zzz')
        nok('bytes 11-yy/zzz')
        nok('bytes10-2/3')


# Taken from real request responses
_full_text_response = (200, """HTTP/1.1 200 OK\r
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
Server: Apache/2.0.54 (Fedora)\r
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
ETag: "56691-23-38e9ae00"\r
Accept-Ranges: bytes\r
Content-Length: 35\r
Connection: close\r
Content-Type: text/plain; charset=UTF-8\r
\r
""", """Bazaar-NG meta directory, format 1
""")


_single_range_response = (206, """HTTP/1.1 206 Partial Content\r
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
Server: Apache/2.0.54 (Fedora)\r
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
ETag: "238a3c-16ec2-805c5540"\r
Accept-Ranges: bytes\r
Content-Length: 100\r
Content-Range: bytes 100-199/93890\r
Connection: close\r
Content-Type: text/plain; charset=UTF-8\r
\r
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")


_single_range_no_content_type = (206, """HTTP/1.1 206 Partial Content\r
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
Server: Apache/2.0.54 (Fedora)\r
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
ETag: "238a3c-16ec2-805c5540"\r
Accept-Ranges: bytes\r
Content-Length: 100\r
Content-Range: bytes 100-199/93890\r
Connection: close\r
\r
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")


_multipart_range_response = (206, """HTTP/1.1 206 Partial Content\r
Date: Tue, 11 Jul 2006 04:49:48 GMT\r
Server: Apache/2.0.54 (Fedora)\r
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
ETag: "238a3c-16ec2-805c5540"\r
Accept-Ranges: bytes\r
Content-Length: 1534\r
Connection: close\r
Content-Type: multipart/byteranges; boundary=418470f848b63279b\r
\r
\r""", """--418470f848b63279b\r
Content-type: text/plain; charset=UTF-8\r
Content-range: bytes 0-254/93890\r
\r
mbp@sourcefrog.net-20050309040815-13242001617e4a06
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e7627
mbp@sourcefrog.net-20050309040957-6cad07f466bb0bb8
mbp@sourcefrog.net-20050309041501-c840e09071de3b67
mbp@sourcefrog.net-20050309044615-c24a3250be83220a
\r
--418470f848b63279b\r
Content-type: text/plain; charset=UTF-8\r
Content-range: bytes 1000-2049/93890\r
\r
40-fd4ec249b6b139ab
mbp@sourcefrog.net-20050311063625-07858525021f270b
mbp@sourcefrog.net-20050311231934-aa3776aff5200bb9
mbp@sourcefrog.net-20050311231953-73aeb3a131c3699a
mbp@sourcefrog.net-20050311232353-f5e33da490872c6a
mbp@sourcefrog.net-20050312071639-0a8f59a34a024ff0
mbp@sourcefrog.net-20050312073432-b2c16a55e0d6e9fb
mbp@sourcefrog.net-20050312073831-a47c3335ece1920f
mbp@sourcefrog.net-20050312085412-13373aa129ccbad3
mbp@sourcefrog.net-20050313052251-2bf004cb96b39933
mbp@sourcefrog.net-20050313052856-3edd84094687cb11
mbp@sourcefrog.net-20050313053233-e30a4f28aef48f9d
mbp@sourcefrog.net-20050313053853-7c64085594ff3072
mbp@sourcefrog.net-20050313054757-a86c3f5871069e22
mbp@sourcefrog.net-20050313061422-418f1f73b94879b9
mbp@sourcefrog.net-20050313120651-497bd231b19df600
mbp@sourcefrog.net-20050314024931-eae0170ef25a5d1a
mbp@sourcefrog.net-20050314025438-d52099f915fe65fc
mbp@sourcefrog.net-20050314025539-637a636692c055cf
mbp@sourcefrog.net-20050314025737-55eb441f430ab4ba
mbp@sourcefrog.net-20050314025901-d74aa93bb7ee8f62
mbp@source\r
--418470f848b63279b--\r
""")


_multipart_squid_range_response = (206, """HTTP/1.0 206 Partial Content\r
Date: Thu, 31 Aug 2006 21:16:22 GMT\r
Server: Apache/2.2.2 (Unix) DAV/2\r
Last-Modified: Thu, 31 Aug 2006 17:57:06 GMT\r
Accept-Ranges: bytes\r
Content-Type: multipart/byteranges; boundary="squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196"\r
Content-Length: 598\r
X-Cache: MISS from localhost.localdomain\r
X-Cache-Lookup: HIT from localhost.localdomain:3128\r
Proxy-Connection: keep-alive\r
\r
""",
"""\r
--squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196\r
Content-Type: text/plain\r
Content-Range: bytes 0-99/18672\r
\r
# bzr knit index 8

scott@netsplit.com-20050708230047-47c7868f276b939f fulltext 0 863  :
scott@netsp\r
--squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196\r
Content-Type: text/plain\r
Content-Range: bytes 300-499/18672\r
\r
com-20050708231537-2b124b835395399a :
scott@netsplit.com-20050820234126-551311dbb7435b51 line-delta 1803 479 .scott@netsplit.com-20050820232911-dc4322a084eadf7e :
scott@netsplit.com-20050821213706-c86\r
--squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196--\r
""")


# This is made up
_full_text_response_no_content_type = (200, """HTTP/1.1 200 OK\r
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
Server: Apache/2.0.54 (Fedora)\r
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
ETag: "56691-23-38e9ae00"\r
Accept-Ranges: bytes\r
Content-Length: 35\r
Connection: close\r
\r
""", """Bazaar-NG meta directory, format 1
""")


_full_text_response_no_content_length = (200, """HTTP/1.1 200 OK\r
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
Server: Apache/2.0.54 (Fedora)\r
Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
ETag: "56691-23-38e9ae00"\r
Accept-Ranges: bytes\r
Connection: close\r
Content-Type: text/plain; charset=UTF-8\r
\r
""", """Bazaar-NG meta directory, format 1
""")


_single_range_no_content_range = (206, """HTTP/1.1 206 Partial Content\r
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
Server: Apache/2.0.54 (Fedora)\r
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
ETag: "238a3c-16ec2-805c5540"\r
Accept-Ranges: bytes\r
Content-Length: 100\r
Connection: close\r
\r
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06
mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""")


_single_range_response_truncated = (206, """HTTP/1.1 206 Partial Content\r
Date: Tue, 11 Jul 2006 04:45:22 GMT\r
Server: Apache/2.0.54 (Fedora)\r
Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r
ETag: "238a3c-16ec2-805c5540"\r
Accept-Ranges: bytes\r
Content-Length: 100\r
Content-Range: bytes 100-199/93890\r
Connection: close\r
Content-Type: text/plain; charset=UTF-8\r
\r
""", """mbp@sourcefrog.net-20050309040815-13242001617e4a06""")


_invalid_response = (444, """HTTP/1.1 444 Bad Response\r
Date: Tue, 11 Jul 2006 04:32:56 GMT\r
Connection: close\r
Content-Type: text/html; charset=iso-8859-1\r
\r
""", """<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
<html><head>
<title>404 Not Found</title>
</head><body>
<h1>Not Found</h1>
<p>I don't know what I'm doing</p>
<hr>
</body></html>
""")


_multipart_no_content_range = (206, """HTTP/1.0 206 Partial Content\r
Content-Type: multipart/byteranges; boundary=THIS_SEPARATES\r
Content-Length: 598\r
\r
""",
"""\r
--THIS_SEPARATES\r
Content-Type: text/plain\r
\r
# bzr knit index 8
--THIS_SEPARATES\r
""")


_multipart_no_boundary = (206, """HTTP/1.0 206 Partial Content\r
Content-Type: multipart/byteranges; boundary=THIS_SEPARATES\r
Content-Length: 598\r
\r
""",
"""\r
--THIS_SEPARATES\r
Content-Type: text/plain\r
Content-Range: bytes 0-18/18672\r
\r
# bzr knit index 8

The range ended at the line above, this text is garbage instead of a boundary
line
""")


class TestHandleResponse(tests.TestCase):

    def _build_HTTPMessage(self, raw_headers):
        status_and_headers = StringIO(raw_headers)
        # Get rid of the status line
        status_and_headers.readline()
        msg = httplib.HTTPMessage(status_and_headers)
        return msg

    def get_response(self, a_response):
        """Process a supplied response, and return the result."""
        code, raw_headers, body = a_response
        msg = self._build_HTTPMessage(raw_headers)
        return response.handle_response('http://foo', code, msg,
                                        StringIO(a_response[2]))

    def test_full_text(self):
        out = self.get_response(_full_text_response)
        # It is a StringIO from the original data
        self.assertEqual(_full_text_response[2], out.read())

    def test_single_range(self):
        out = self.get_response(_single_range_response)

        out.seek(100)
        self.assertEqual(_single_range_response[2], out.read(100))

    def test_single_range_no_content(self):
        out = self.get_response(_single_range_no_content_type)

        out.seek(100)
        self.assertEqual(_single_range_no_content_type[2], out.read(100))

    def test_single_range_truncated(self):
        out = self.get_response(_single_range_response_truncated)
        # Content-Range declares 100 but only 51 present
        self.assertRaises(errors.ShortReadvError, out.seek, out.tell() + 51)

    def test_multi_range(self):
        out = self.get_response(_multipart_range_response)

        # Just make sure we can read the right contents
        out.seek(0)
        out.read(255)

        out.seek(1000)
        out.read(1050)

    def test_multi_squid_range(self):
        out = self.get_response(_multipart_squid_range_response)

        # Just make sure we can read the right contents
        out.seek(0)
        out.read(100)

        out.seek(300)
        out.read(200)

    def test_invalid_response(self):
        self.assertRaises(errors.InvalidHttpResponse,
                          self.get_response, _invalid_response)

    def test_full_text_no_content_type(self):
        # We should not require Content-Type for a full response
        code, raw_headers, body = _full_text_response_no_content_type
        msg = self._build_HTTPMessage(raw_headers)
        out = response.handle_response('http://foo', code, msg, StringIO(body))
        self.assertEqual(body, out.read())

    def test_full_text_no_content_length(self):
        code, raw_headers, body = _full_text_response_no_content_length
        msg = self._build_HTTPMessage(raw_headers)
        out = response.handle_response('http://foo', code, msg, StringIO(body))
        self.assertEqual(body, out.read())

    def test_missing_content_range(self):
        code, raw_headers, body = _single_range_no_content_range
        msg = self._build_HTTPMessage(raw_headers)
        self.assertRaises(errors.InvalidHttpResponse,
                          response.handle_response,
                          'http://bogus', code, msg, StringIO(body))

    def test_multipart_no_content_range(self):
        code, raw_headers, body = _multipart_no_content_range
        msg = self._build_HTTPMessage(raw_headers)
        self.assertRaises(errors.InvalidHttpResponse,
                          response.handle_response,
                          'http://bogus', code, msg, StringIO(body))

    def test_multipart_no_boundary(self):
        out = self.get_response(_multipart_no_boundary)
        out.read()  # Read the whole range
        # Fail to find the boundary line
        self.assertRaises(errors.InvalidHttpResponse, out.seek, 1, 1)


class TestRangeFileSizeReadLimited(tests.TestCase):
    """Test RangeFile _max_read_size functionality which limits the size of
    read blocks to prevent MemoryError messages in socket.recv.
    """

    def setUp(self):
        tests.TestCase.setUp(self)
        # create a test datablock larger than _max_read_size.
        chunk_size = response.RangeFile._max_read_size
        test_pattern = '0123456789ABCDEF'
        self.test_data =  test_pattern * (3 * chunk_size / len(test_pattern))
        self.test_data_len = len(self.test_data)

    def test_max_read_size(self):
        """Read data in blocks and verify that the reads are not larger than
           the maximum read size.
        """
        # retrieve data in large blocks from response.RangeFile object
        mock_read_file = FakeReadFile(self.test_data)
        range_file = response.RangeFile('test_max_read_size', mock_read_file)
        response_data = range_file.read(self.test_data_len)

        # verify read size was equal to the maximum read size
        self.assertTrue(mock_read_file.get_max_read_size() > 0)
        self.assertEqual(mock_read_file.get_max_read_size(),
                         response.RangeFile._max_read_size)
        self.assertEqual(mock_read_file.get_read_count(), 3)

        # report error if the data wasn't equal (we only report the size due
        # to the length of the data)
        if response_data != self.test_data:
            message = "Data not equal.  Expected %d bytes, received %d."
            self.fail(message % (len(response_data), self.test_data_len))