1
# Copyright (C) 2006-2011 Canonical Ltd
1
# Copyright (C) 2006, 2007 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
17
"""Handlers for HTTP Responses.
24
from __future__ import absolute_import
28
from cStringIO import StringIO
31
27
from bzrlib import (
37
class ResponseFile(object):
38
"""A wrapper around the http socket containing the result of a GET request.
40
Only read() and seek() (forward) are supported.
43
def __init__(self, path, infile):
46
:param path: File url, for error reports.
48
:param infile: File-like socket set at body start.
57
Dummy implementation for consistency with the 'file' API.
60
def read(self, size=-1):
61
"""Read size bytes from the current position in the file.
63
:param size: The number of bytes to read. Leave unspecified or pass
66
data = self._file.read(size)
67
self._pos += len(data)
71
data = self._file.readline()
72
self._pos += len(data)
77
line = self.readline()
85
def seek(self, offset, whence=os.SEEK_SET):
86
if whence == os.SEEK_SET:
87
if offset < self._pos:
89
"Can't seek backwards, pos: %s, offset: %s"
90
% (self._pos, offset))
91
to_discard = offset - self._pos
92
elif whence == os.SEEK_CUR:
95
raise AssertionError("Can't seek backwards")
97
# Just discard the unwanted bytes
100
33
# A RangeFile expects the following grammar (simplified to outline the
101
34
# assumptions we rely upon).
104
38
# | multiple_range
40
# whole_file: [content_length_header] data
106
42
# single_range: content_range_header data
108
44
# multiple_range: boundary_header boundary (content_range_header data boundary)+
110
class RangeFile(ResponseFile):
46
class RangeFile(object):
111
47
"""File-like object that allow access to partial available data.
113
49
All accesses should happen sequentially since the acquisition occurs during
125
61
# 8k chunks should be fine.
126
62
_discarded_buf_size = 8192
128
# maximum size of read requests -- used to avoid MemoryError issues in recv
129
_max_read_size = 512 * 1024
131
64
def __init__(self, path, infile):
134
67
:param path: File url, for error reports.
136
68
:param infile: File-like socket set at body start.
138
super(RangeFile, self).__init__(path, infile)
139
72
self._boundary = None
140
73
# When using multi parts response, this will be set with the headers
141
74
# associated with the range currently read.
153
86
def set_boundary(self, boundary):
154
87
"""Define the boundary used in a multi parts message.
156
89
The file should be at the beginning of the body, the first range
157
90
definition is read and taken into account.
170
103
# To be on the safe side we allow it before any boundary line
171
104
boundary_line = self._file.readline()
173
if boundary_line == '':
174
# A timeout in the proxy server caused the response to end early.
175
# See launchpad bug 198646.
176
raise errors.HttpBoundaryMissing(
180
105
if boundary_line != '--' + self._boundary + '\r\n':
181
# rfc822.unquote() incorrectly unquotes strings enclosed in <>
182
# IIS 6 and 7 incorrectly wrap boundary strings in <>
183
# together they make a beautiful bug, which we will be gracious
185
if (self._unquote_boundary(boundary_line) !=
186
'--' + self._boundary + '\r\n'):
187
raise errors.InvalidHttpResponse(
189
"Expected a boundary (%s) line, got '%s'"
190
% (self._boundary, boundary_line))
192
def _unquote_boundary(self, b):
193
return b[:2] + rfc822.unquote(b[2:-2]) + b[-2:]
106
raise errors.InvalidHttpResponse(
108
"Expected a boundary (%s) line, got '%s'" % (self._boundary,
195
111
def read_range_definition(self):
196
112
"""Read a new range definition in a multi parts message.
266
182
client to clean the socket if we leave bytes unread. This may occur for
267
183
the final boundary line of a multipart response or for any range
268
184
request not entirely consumed by the client (due to offset coalescing)
270
:param size: The number of bytes to read. Leave unspecified or pass
273
186
if (self._size > 0
274
187
and self._pos == self._start + self._size):
288
201
"Can't read %s bytes across range (%s, %s)"
289
202
% (size, self._start, self._size))
291
# read data from file
294
204
if self._size > 0:
295
205
# Don't read past the range definition
296
206
limited = self._start + self._size - self._pos
298
208
limited = min(limited, size)
299
osutils.pumpfile(self._file, buffer, limited, self._max_read_size)
300
data = buffer.getvalue()
209
data = self._file.read(limited)
211
# Size of file unknown, the user may have specified a size or not,
212
# we delegate that to the filesocket object (-1 means read until
214
data = self._file.read(size)
302
215
# Update _pos respecting the data effectively read
303
216
self._pos += len(data)
356
269
:param msg: An HTTPMessage containing the headers for the response
357
270
:param data: A file-like object that can be read() to get the
359
:return: A file-like object that can seek()+read() the
272
:return: A file-like object that can seek()+read() the
360
273
ranges indicated by the headers.
275
rfile = RangeFile(url, data)
364
rfile = ResponseFile(url, data)
278
size = msg.getheader('content-length', None)
283
rfile.set_range(0, size)
365
284
elif code == 206:
366
rfile = RangeFile(url, data)
367
285
content_type = msg.getheader('content-type', None)
368
286
if content_type is None:
369
287
# When there is no content-type header we treat the response as