5609.52.1
by Martin Pool
Cope with buggy squids interrupting the response before a mime multipart boundary |
1 |
# Copyright (C) 2006-2011 Canonical Ltd
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
2 |
#
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
4183.7.1
by Sabin Iacob
update FSF mailing address |
15 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
16 |
|
17 |
"""Handlers for HTTP Responses.
|
|
18 |
||
19 |
The purpose of these classes is to provide a uniform interface for clients
|
|
20 |
to standard HTTP responses, single range responses and multipart range
|
|
21 |
responses.
|
|
22 |
"""
|
|
23 |
||
6379.6.7
by Jelmer Vernooij
Move importing from future until after doc string, otherwise the doc string will disappear. |
24 |
from __future__ import absolute_import |
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
25 |
|
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
26 |
import os |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
27 |
import httplib |
3408.6.1
by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while |
28 |
from cStringIO import StringIO |
3535.1.2
by Adrian Wilkins
Fix ability to use IIS as a dumb HTTP server. |
29 |
import rfc822 |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
30 |
|
31 |
from bzrlib import ( |
|
32 |
errors, |
|
3408.6.1
by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while |
33 |
osutils, |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
34 |
)
|
35 |
||
36 |
||
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
37 |
class ResponseFile(object): |
38 |
"""A wrapper around the http socket containing the result of a GET request.
|
|
39 |
||
40 |
Only read() and seek() (forward) are supported.
|
|
6575.1.2
by Vincent Ladeuil
TDD backwards, works here ;) |
41 |
|
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
42 |
"""
|
43 |
def __init__(self, path, infile): |
|
44 |
"""Constructor.
|
|
45 |
||
46 |
:param path: File url, for error reports.
|
|
47 |
||
48 |
:param infile: File-like socket set at body start.
|
|
49 |
"""
|
|
50 |
self._path = path |
|
51 |
self._file = infile |
|
52 |
self._pos = 0 |
|
53 |
||
54 |
def close(self): |
|
55 |
"""Close this file.
|
|
56 |
||
57 |
Dummy implementation for consistency with the 'file' API.
|
|
58 |
"""
|
|
59 |
||
60 |
def read(self, size=-1): |
|
61 |
"""Read size bytes from the current position in the file.
|
|
62 |
||
63 |
:param size: The number of bytes to read. Leave unspecified or pass
|
|
64 |
-1 to read to EOF.
|
|
65 |
"""
|
|
66 |
data = self._file.read(size) |
|
67 |
self._pos += len(data) |
|
68 |
return data |
|
69 |
||
6519.1.2
by Jelmer Vernooij
Implement ResponseFile.readline and ResponseFile.tell. |
70 |
def readline(self): |
71 |
data = self._file.readline() |
|
72 |
self._pos += len(data) |
|
73 |
return data |
|
74 |
||
6575.1.1
by Jelmer Vernooij
Implement basic ResponseFile.__iter__ |
75 |
def __iter__(self): |
76 |
while True: |
|
77 |
line = self.readline() |
|
78 |
if not line: |
|
79 |
return
|
|
80 |
yield line |
|
81 |
||
6519.1.2
by Jelmer Vernooij
Implement ResponseFile.readline and ResponseFile.tell. |
82 |
def tell(self): |
83 |
return self._pos |
|
84 |
||
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
85 |
def seek(self, offset, whence=os.SEEK_SET): |
86 |
if whence == os.SEEK_SET: |
|
87 |
if offset < self._pos: |
|
6519.1.1
by Jelmer Vernooij
Fix typos in assertionerror. |
88 |
raise AssertionError( |
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
89 |
"Can't seek backwards, pos: %s, offset: %s" |
6519.1.1
by Jelmer Vernooij
Fix typos in assertionerror. |
90 |
% (self._pos, offset)) |
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
91 |
to_discard = offset - self._pos |
92 |
elif whence == os.SEEK_CUR: |
|
93 |
to_discard = offset |
|
94 |
else: |
|
95 |
raise AssertionError("Can't seek backwards") |
|
96 |
if to_discard: |
|
97 |
# Just discard the unwanted bytes
|
|
98 |
self.read(to_discard) |
|
99 |
||
3059.2.18
by Vincent Ladeuil
Take spiv review comments into account. |
100 |
# A RangeFile expects the following grammar (simplified to outline the
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
101 |
# assumptions we rely upon).
|
102 |
||
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
103 |
# file: single_range
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
104 |
# | multiple_range
|
105 |
||
106 |
# single_range: content_range_header data
|
|
107 |
||
108 |
# multiple_range: boundary_header boundary (content_range_header data boundary)+
|
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
109 |
|
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
110 |
class RangeFile(ResponseFile): |
1786.1.8
by John Arbash Meinel
[merge] Johan Rydberg test updates |
111 |
"""File-like object that allow access to partial available data.
|
112 |
||
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
113 |
All accesses should happen sequentially since the acquisition occurs during
|
114 |
an http response reception (as sockets can't be seeked, we simulate the
|
|
115 |
seek by just reading and discarding the data).
|
|
116 |
||
117 |
The access pattern is defined by a set of ranges discovered as reading
|
|
118 |
progress. Only one range is available at a given time, so all accesses
|
|
119 |
should happen with monotonically increasing offsets.
|
|
1786.1.8
by John Arbash Meinel
[merge] Johan Rydberg test updates |
120 |
"""
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
121 |
|
3146.3.4
by Vincent Ladeuil
Review feedback, simpler loops. |
122 |
# in _checked_read() below, we may have to discard several MB in the worst
|
123 |
# case. To avoid buffering that much, we read and discard by chunks
|
|
124 |
# instead. The underlying file is either a socket or a StringIO, so reading
|
|
125 |
# 8k chunks should be fine.
|
|
126 |
_discarded_buf_size = 8192 |
|
127 |
||
3408.6.1
by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while |
128 |
# maximum size of read requests -- used to avoid MemoryError issues in recv
|
129 |
_max_read_size = 512 * 1024 |
|
130 |
||
3945.1.8
by Vincent Ladeuil
Add more tests, fix pycurl double handling, revert previous tracking. |
131 |
def __init__(self, path, infile): |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
132 |
"""Constructor.
|
133 |
||
134 |
:param path: File url, for error reports.
|
|
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
135 |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
136 |
:param infile: File-like socket set at body start.
|
137 |
"""
|
|
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
138 |
super(RangeFile, self).__init__(path, infile) |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
139 |
self._boundary = None |
3059.2.17
by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges. |
140 |
# When using multi parts response, this will be set with the headers
|
141 |
# associated with the range currently read.
|
|
142 |
self._headers = None |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
143 |
# Default to the whole file of unspecified size
|
144 |
self.set_range(0, -1) |
|
145 |
||
146 |
def set_range(self, start, size): |
|
147 |
"""Change the range mapping"""
|
|
148 |
self._start = start |
|
149 |
self._size = size |
|
150 |
# Set the new _pos since that's what we want to expose
|
|
151 |
self._pos = self._start |
|
152 |
||
153 |
def set_boundary(self, boundary): |
|
154 |
"""Define the boundary used in a multi parts message.
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
155 |
|
3059.2.18
by Vincent Ladeuil
Take spiv review comments into account. |
156 |
The file should be at the beginning of the body, the first range
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
157 |
definition is read and taken into account.
|
158 |
"""
|
|
159 |
self._boundary = boundary |
|
160 |
# Decode the headers and setup the first range
|
|
161 |
self.read_boundary() |
|
162 |
self.read_range_definition() |
|
163 |
||
164 |
def read_boundary(self): |
|
165 |
"""Read the boundary headers defining a new range"""
|
|
166 |
boundary_line = '\r\n' |
|
167 |
while boundary_line == '\r\n': |
|
3059.2.18
by Vincent Ladeuil
Take spiv review comments into account. |
168 |
# RFC2616 19.2 Additional CRLFs may precede the first boundary
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
169 |
# string entity.
|
170 |
# To be on the safe side we allow it before any boundary line
|
|
171 |
boundary_line = self._file.readline() |
|
3535.1.4
by adwi2
Changes as suggested by Mr Ladeuil. |
172 |
|
5609.52.1
by Martin Pool
Cope with buggy squids interrupting the response before a mime multipart boundary |
173 |
if boundary_line == '': |
174 |
# A timeout in the proxy server caused the response to end early.
|
|
175 |
# See launchpad bug 198646.
|
|
176 |
raise errors.HttpBoundaryMissing( |
|
177 |
self._path, |
|
178 |
self._boundary) |
|
179 |
||
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
180 |
if boundary_line != '--' + self._boundary + '\r\n': |
3535.1.3
by adwi2
Fix ability to use IIS as a dumb HTTP server by unquoting the boundary |
181 |
# rfc822.unquote() incorrectly unquotes strings enclosed in <>
|
182 |
# IIS 6 and 7 incorrectly wrap boundary strings in <>
|
|
183 |
# together they make a beautiful bug, which we will be gracious
|
|
184 |
# about here
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
185 |
if (self._unquote_boundary(boundary_line) != |
3535.1.4
by adwi2
Changes as suggested by Mr Ladeuil. |
186 |
'--' + self._boundary + '\r\n'): |
3535.1.3
by adwi2
Fix ability to use IIS as a dumb HTTP server by unquoting the boundary |
187 |
raise errors.InvalidHttpResponse( |
188 |
self._path, |
|
3537.1.1
by Vincent Ladeuil
Fix some more PEP8isms and delete useless import |
189 |
"Expected a boundary (%s) line, got '%s'" |
190 |
% (self._boundary, boundary_line)) |
|
191 |
||
3535.1.2
by Adrian Wilkins
Fix ability to use IIS as a dumb HTTP server. |
192 |
def _unquote_boundary(self, b): |
193 |
return b[:2] + rfc822.unquote(b[2:-2]) + b[-2:] |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
194 |
|
195 |
def read_range_definition(self): |
|
196 |
"""Read a new range definition in a multi parts message.
|
|
197 |
||
198 |
Parse the headers including the empty line following them so that we
|
|
199 |
are ready to read the data itself.
|
|
200 |
"""
|
|
3059.2.17
by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges. |
201 |
self._headers = httplib.HTTPMessage(self._file, seekable=0) |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
202 |
# Extract the range definition
|
3059.2.17
by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges. |
203 |
content_range = self._headers.getheader('content-range', None) |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
204 |
if content_range is None: |
205 |
raise errors.InvalidHttpResponse( |
|
206 |
self._path, |
|
207 |
'Content-Range header missing in a multi-part response') |
|
208 |
self.set_range_from_header(content_range) |
|
209 |
||
210 |
def set_range_from_header(self, content_range): |
|
3059.2.17
by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges. |
211 |
"""Helper to set the new range from its description in the headers"""
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
212 |
try: |
213 |
rtype, values = content_range.split() |
|
3059.2.10
by Vincent Ladeuil
Jam's review feedback. |
214 |
except ValueError: |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
215 |
raise errors.InvalidHttpRange(self._path, content_range, |
3059.2.17
by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges. |
216 |
'Malformed header') |
3059.2.11
by Vincent Ladeuil
Fix typos mentioned by spiv. |
217 |
if rtype != 'bytes': |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
218 |
raise errors.InvalidHttpRange(self._path, content_range, |
219 |
"Unsupported range type '%s'" % rtype) |
|
220 |
try: |
|
221 |
# We don't need total, but note that it may be either the file size
|
|
222 |
# or '*' if the server can't or doesn't want to return the file
|
|
223 |
# size.
|
|
224 |
start_end, total = values.split('/') |
|
225 |
start, end = start_end.split('-') |
|
226 |
start = int(start) |
|
227 |
end = int(end) |
|
3059.2.10
by Vincent Ladeuil
Jam's review feedback. |
228 |
except ValueError: |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
229 |
raise errors.InvalidHttpRange(self._path, content_range, |
3059.2.17
by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges. |
230 |
'Invalid range values') |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
231 |
size = end - start + 1 |
232 |
if size <= 0: |
|
233 |
raise errors.InvalidHttpRange(self._path, content_range, |
|
3059.2.17
by Vincent Ladeuil
Limit GET requests by body size instead of number of ranges. |
234 |
'Invalid range, size <= 0') |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
235 |
self.set_range(start, size) |
236 |
||
237 |
def _checked_read(self, size): |
|
3146.3.2
by Vincent Ladeuil
Fix #179368 by keeping the current range hint on ShortReadvErrors. |
238 |
"""Read the file checking for short reads.
|
239 |
||
240 |
The data read is discarded along the way.
|
|
241 |
"""
|
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
242 |
pos = self._pos |
3146.3.4
by Vincent Ladeuil
Review feedback, simpler loops. |
243 |
remaining = size |
244 |
while remaining > 0: |
|
245 |
data = self._file.read(min(remaining, self._discarded_buf_size)) |
|
246 |
remaining -= len(data) |
|
247 |
if not data: |
|
248 |
raise errors.ShortReadvError(self._path, pos, size, |
|
249 |
size - remaining) |
|
250 |
self._pos += size |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
251 |
|
3059.2.18
by Vincent Ladeuil
Take spiv review comments into account. |
252 |
def _seek_to_next_range(self): |
253 |
# We will cross range boundaries
|
|
254 |
if self._boundary is None: |
|
255 |
# If we don't have a boundary, we can't find another range
|
|
3146.3.2
by Vincent Ladeuil
Fix #179368 by keeping the current range hint on ShortReadvErrors. |
256 |
raise errors.InvalidRange(self._path, self._pos, |
257 |
"Range (%s, %s) exhausted" |
|
258 |
% (self._start, self._size)) |
|
3059.2.18
by Vincent Ladeuil
Take spiv review comments into account. |
259 |
self.read_boundary() |
260 |
self.read_range_definition() |
|
261 |
||
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
262 |
def read(self, size=-1): |
3408.6.3
by Andrew Bennetts
Docstring/NEWS tweaks requested by Ian's review. |
263 |
"""Read size bytes from the current position in the file.
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
264 |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
265 |
Reading across ranges is not supported. We rely on the underlying http
|
266 |
client to clean the socket if we leave bytes unread. This may occur for
|
|
267 |
the final boundary line of a multipart response or for any range
|
|
268 |
request not entirely consumed by the client (due to offset coalescing)
|
|
3408.6.3
by Andrew Bennetts
Docstring/NEWS tweaks requested by Ian's review. |
269 |
|
270 |
:param size: The number of bytes to read. Leave unspecified or pass
|
|
271 |
-1 to read to EOF.
|
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
272 |
"""
|
3059.2.18
by Vincent Ladeuil
Take spiv review comments into account. |
273 |
if (self._size > 0 |
274 |
and self._pos == self._start + self._size): |
|
275 |
if size == 0: |
|
276 |
return '' |
|
277 |
else: |
|
278 |
self._seek_to_next_range() |
|
279 |
elif self._pos < self._start: |
|
280 |
raise errors.InvalidRange( |
|
281 |
self._path, self._pos, |
|
282 |
"Can't read %s bytes before range (%s, %s)" |
|
283 |
% (size, self._start, self._size)) |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
284 |
if self._size > 0: |
285 |
if size > 0 and self._pos + size > self._start + self._size: |
|
286 |
raise errors.InvalidRange( |
|
287 |
self._path, self._pos, |
|
288 |
"Can't read %s bytes across range (%s, %s)" |
|
289 |
% (size, self._start, self._size)) |
|
290 |
||
3408.6.1
by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while |
291 |
# read data from file
|
6586.1.1
by Vincent Ladeuil
Fix various typos in docstrings. Rename 'buffer' to 'buf' since it's now a python builtin function. |
292 |
buf = StringIO() |
3408.6.1
by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while |
293 |
limited = size |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
294 |
if self._size > 0: |
295 |
# Don't read past the range definition
|
|
296 |
limited = self._start + self._size - self._pos |
|
3059.2.18
by Vincent Ladeuil
Take spiv review comments into account. |
297 |
if size >= 0: |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
298 |
limited = min(limited, size) |
6586.1.1
by Vincent Ladeuil
Fix various typos in docstrings. Rename 'buffer' to 'buf' since it's now a python builtin function. |
299 |
osutils.pumpfile(self._file, buf, limited, self._max_read_size) |
300 |
data = buf.getvalue() |
|
3408.6.1
by Eric Holmberg
Fix for Bug #215426 in which bzr can cause a MemoryError in socket.recv while |
301 |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
302 |
# Update _pos respecting the data effectively read
|
303 |
self._pos += len(data) |
|
304 |
return data |
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
305 |
|
306 |
def seek(self, offset, whence=0): |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
307 |
start_pos = self._pos |
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
308 |
if whence == 0: |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
309 |
final_pos = offset |
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
310 |
elif whence == 1: |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
311 |
final_pos = start_pos + offset |
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
312 |
elif whence == 2: |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
313 |
if self._size > 0: |
3059.2.14
by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a |
314 |
final_pos = self._start + self._size + offset # offset < 0 |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
315 |
else: |
316 |
raise errors.InvalidRange( |
|
317 |
self._path, self._pos, |
|
3059.2.14
by Vincent Ladeuil
Complete coverage by adding tests for more invalid inputs. Fix a |
318 |
"RangeFile: can't seek from end while size is unknown") |
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
319 |
else: |
320 |
raise ValueError("Invalid value %s for whence." % whence) |
|
321 |
||
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
322 |
if final_pos < self._pos: |
323 |
# Can't seek backwards
|
|
324 |
raise errors.InvalidRange( |
|
325 |
self._path, self._pos, |
|
326 |
'RangeFile: trying to seek backwards to %s' % final_pos) |
|
327 |
||
328 |
if self._size > 0: |
|
329 |
cur_limit = self._start + self._size |
|
3059.2.18
by Vincent Ladeuil
Take spiv review comments into account. |
330 |
while final_pos > cur_limit: |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
331 |
# We will cross range boundaries
|
332 |
remain = cur_limit - self._pos |
|
333 |
if remain > 0: |
|
334 |
# Finish reading the current range
|
|
335 |
self._checked_read(remain) |
|
3059.2.18
by Vincent Ladeuil
Take spiv review comments into account. |
336 |
self._seek_to_next_range() |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
337 |
cur_limit = self._start + self._size |
338 |
||
339 |
size = final_pos - self._pos |
|
340 |
if size > 0: # size can be < 0 if we crossed a range boundary |
|
341 |
# We don't need the data, just read it and throw it away
|
|
342 |
self._checked_read(size) |
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
343 |
|
1786.1.8
by John Arbash Meinel
[merge] Johan Rydberg test updates |
344 |
def tell(self): |
345 |
return self._pos |
|
346 |
||
1786.1.5
by John Arbash Meinel
Move the common Multipart stuff into plain http, and wrap pycurl response so that it matches the urllib response object. |
347 |
|
3945.1.8
by Vincent Ladeuil
Add more tests, fix pycurl double handling, revert previous tracking. |
348 |
def handle_response(url, code, msg, data): |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
349 |
"""Interpret the code & headers and wrap the provided data in a RangeFile.
|
350 |
||
351 |
This is a factory method which returns an appropriate RangeFile based on
|
|
352 |
the code & headers it's given.
|
|
1786.1.21
by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers. |
353 |
|
354 |
:param url: The url being processed. Mostly for error reporting
|
|
355 |
:param code: The integer HTTP response code
|
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
356 |
:param msg: An HTTPMessage containing the headers for the response
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
357 |
:param data: A file-like object that can be read() to get the
|
358 |
requested data
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
359 |
:return: A file-like object that can seek()+read() the
|
1786.1.21
by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers. |
360 |
ranges indicated by the headers.
|
361 |
"""
|
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
362 |
if code == 200: |
363 |
# A whole file
|
|
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
364 |
rfile = ResponseFile(url, data) |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
365 |
elif code == 206: |
6450.2.1
by Vincent Ladeuil
Avoid invalid range access errors on whole files when using http transport |
366 |
rfile = RangeFile(url, data) |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
367 |
content_type = msg.getheader('content-type', None) |
368 |
if content_type is None: |
|
369 |
# When there is no content-type header we treat the response as
|
|
370 |
# being of type 'application/octet-stream' as per RFC2616 section
|
|
371 |
# 7.2.1.
|
|
2073.1.1
by John Arbash Meinel
Robert's comments: Refer to RFC2616 to explain how we handle missing Content-Type |
372 |
# Therefore it is obviously not multipart
|
373 |
content_type = 'application/octet-stream' |
|
2070.1.1
by John Arbash Meinel
Fix bug #62473 by not requiring content-type in range responses |
374 |
is_multipart = False |
375 |
else: |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
376 |
is_multipart = (msg.getmaintype() == 'multipart' |
377 |
and msg.getsubtype() == 'byteranges') |
|
1786.1.26
by John Arbash Meinel
Update and test handle_response. |
378 |
|
2070.1.1
by John Arbash Meinel
Fix bug #62473 by not requiring content-type in range responses |
379 |
if is_multipart: |
1786.1.26
by John Arbash Meinel
Update and test handle_response. |
380 |
# Full fledged multipart response
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
381 |
rfile.set_boundary(msg.getparam('boundary')) |
1786.1.26
by John Arbash Meinel
Update and test handle_response. |
382 |
else: |
383 |
# A response to a range request, but not multipart
|
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
384 |
content_range = msg.getheader('content-range', None) |
385 |
if content_range is None: |
|
1786.1.26
by John Arbash Meinel
Update and test handle_response. |
386 |
raise errors.InvalidHttpResponse(url, |
387 |
'Missing the Content-Range header in a 206 range response') |
|
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
388 |
rfile.set_range_from_header(content_range) |
1786.1.40
by John Arbash Meinel
code cleanups from Martin Pool. |
389 |
else: |
3059.2.2
by Vincent Ladeuil
Read http responses on demand without buffering the whole body |
390 |
raise errors.InvalidHttpResponse(url, |
391 |
'Unknown response code %s' % code) |
|
392 |
||
393 |
return rfile |
|
1786.1.21
by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers. |
394 |