2052.3.1
by John Arbash Meinel
Add tests to cleanup the copyright of all source files |
1 |
# Copyright (C) 2006 Canonical Ltd
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
2 |
#
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
16 |
||
17 |
"""Handlers for HTTP Responses.
|
|
18 |
||
19 |
The purpose of these classes is to provide a uniform interface for clients
|
|
20 |
to standard HTTP responses, single range responses and multipart range
|
|
21 |
responses.
|
|
22 |
"""
|
|
23 |
||
24 |
||
25 |
from bisect import bisect |
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
26 |
from cStringIO import StringIO |
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
27 |
import re |
28 |
||
1786.1.13
by John Arbash Meinel
Found a few bugs in error handling code, updated tests |
29 |
from bzrlib import errors |
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
30 |
from bzrlib.trace import mutter |
31 |
||
32 |
||
33 |
class ResponseRange(object): |
|
1786.1.8
by John Arbash Meinel
[merge] Johan Rydberg test updates |
34 |
"""A range in a RangeFile-object."""
|
35 |
||
1786.1.12
by John Arbash Meinel
Add tests for ResponseRange and streamline class |
36 |
__slots__ = ['_ent_start', '_ent_end', '_data_start'] |
37 |
||
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
38 |
def __init__(self, ent_start, ent_end, data_start): |
39 |
self._ent_start = ent_start |
|
40 |
self._ent_end = ent_end |
|
41 |
self._data_start = data_start |
|
42 |
||
43 |
def __cmp__(self, other): |
|
1786.1.12
by John Arbash Meinel
Add tests for ResponseRange and streamline class |
44 |
"""Compare this to other.
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
45 |
|
1786.1.12
by John Arbash Meinel
Add tests for ResponseRange and streamline class |
46 |
We need this both for sorting, and so that we can
|
47 |
bisect the list of ranges.
|
|
48 |
"""
|
|
49 |
if isinstance(other, int): |
|
50 |
# Later on we bisect for a starting point
|
|
51 |
# so we allow comparing against a single integer
|
|
52 |
return cmp(self._ent_start, other) |
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
53 |
else: |
1786.1.12
by John Arbash Meinel
Add tests for ResponseRange and streamline class |
54 |
return cmp((self._ent_start, self._ent_end, self._data_start), |
55 |
(other._ent_start, other._ent_end, other._data_start)) |
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
56 |
|
57 |
def __str__(self): |
|
1786.1.12
by John Arbash Meinel
Add tests for ResponseRange and streamline class |
58 |
return "%s(%s-%s,%s)" % (self.__class__.__name__, |
59 |
self._ent_start, self._ent_end, |
|
60 |
self._data_start) |
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
61 |
|
1979.1.1
by John Arbash Meinel
Fix bug #57723, parse boundary="" correctly, since Squid uses it |
62 |
__repr__ = __str__ |
63 |
||
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
64 |
|
65 |
class RangeFile(object): |
|
1786.1.8
by John Arbash Meinel
[merge] Johan Rydberg test updates |
66 |
"""File-like object that allow access to partial available data.
|
67 |
||
68 |
Specified by a set of ranges.
|
|
69 |
"""
|
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
70 |
|
71 |
def __init__(self, path, input_file): |
|
72 |
self._path = path |
|
73 |
self._pos = 0 |
|
74 |
self._len = 0 |
|
75 |
self._ranges = [] |
|
76 |
self._data = input_file.read() |
|
77 |
||
1786.1.8
by John Arbash Meinel
[merge] Johan Rydberg test updates |
78 |
def _add_range(self, ent_start, ent_end, data_start): |
79 |
"""Add an entity range.
|
|
80 |
||
81 |
:param ent_start: Start offset of entity
|
|
82 |
:param ent_end: End offset of entity (inclusive)
|
|
83 |
:param data_start: Start offset of data in data stream.
|
|
84 |
"""
|
|
85 |
self._ranges.append(ResponseRange(ent_start, ent_end, data_start)) |
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
86 |
self._len = max(self._len, ent_end) |
87 |
||
88 |
def _finish_ranges(self): |
|
89 |
self._ranges.sort() |
|
90 |
||
91 |
def read(self, size): |
|
92 |
"""Read size bytes from the current position in the file.
|
|
93 |
||
94 |
Reading across ranges is not supported.
|
|
95 |
"""
|
|
96 |
# find the last range which has a start <= pos
|
|
97 |
i = bisect(self._ranges, self._pos) - 1 |
|
98 |
||
99 |
if i < 0 or self._pos > self._ranges[i]._ent_end: |
|
1979.1.1
by John Arbash Meinel
Fix bug #57723, parse boundary="" correctly, since Squid uses it |
100 |
mutter('Bisect for pos: %s failed. Found offset: %d, ranges:%s', |
101 |
self._pos, i, self._ranges) |
|
1786.1.13
by John Arbash Meinel
Found a few bugs in error handling code, updated tests |
102 |
raise errors.InvalidRange(self._path, self._pos) |
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
103 |
|
104 |
r = self._ranges[i] |
|
105 |
||
1786.1.32
by John Arbash Meinel
cleanup pass, allow pycurl connections to be shared between transports. |
106 |
# mutter('found range %s %s for pos %s', i, self._ranges[i], self._pos)
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
107 |
|
108 |
if (self._pos + size - 1) > r._ent_end: |
|
1786.1.13
by John Arbash Meinel
Found a few bugs in error handling code, updated tests |
109 |
raise errors.InvalidRange(self._path, self._pos) |
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
110 |
|
111 |
start = r._data_start + (self._pos - r._ent_start) |
|
112 |
end = start + size |
|
1786.1.32
by John Arbash Meinel
cleanup pass, allow pycurl connections to be shared between transports. |
113 |
# mutter("range read %d bytes at %d == %d-%d", size, self._pos,
|
114 |
# start, end)
|
|
1786.1.8
by John Arbash Meinel
[merge] Johan Rydberg test updates |
115 |
self._pos += (end-start) |
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
116 |
return self._data[start:end] |
117 |
||
118 |
def seek(self, offset, whence=0): |
|
119 |
if whence == 0: |
|
120 |
self._pos = offset |
|
121 |
elif whence == 1: |
|
122 |
self._pos += offset |
|
123 |
elif whence == 2: |
|
124 |
self._pos = self._len + offset |
|
125 |
else: |
|
126 |
raise ValueError("Invalid value %s for whence." % whence) |
|
127 |
||
128 |
if self._pos < 0: |
|
129 |
self._pos = 0 |
|
130 |
||
1786.1.8
by John Arbash Meinel
[merge] Johan Rydberg test updates |
131 |
def tell(self): |
132 |
return self._pos |
|
133 |
||
1786.1.5
by John Arbash Meinel
Move the common Multipart stuff into plain http, and wrap pycurl response so that it matches the urllib response object. |
134 |
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
135 |
class HttpRangeResponse(RangeFile): |
136 |
"""A single-range HTTP response."""
|
|
137 |
||
1786.1.24
by John Arbash Meinel
Move the functions/regexes to be static members |
138 |
# TODO: jam 20060706 Consider compiling these regexes on demand
|
139 |
_CONTENT_RANGE_RE = re.compile( |
|
1979.1.2
by John Arbash Meinel
Use the regex, rather than stripping off the boundary later. |
140 |
r'\s*([^\s]+)\s+([0-9]+)-([0-9]+)/([0-9]+)\s*$') |
1786.1.24
by John Arbash Meinel
Move the functions/regexes to be static members |
141 |
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
142 |
def __init__(self, path, content_range, input_file): |
1786.1.32
by John Arbash Meinel
cleanup pass, allow pycurl connections to be shared between transports. |
143 |
# mutter("parsing 206 non-multipart response for %s", path)
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
144 |
RangeFile.__init__(self, path, input_file) |
1786.1.24
by John Arbash Meinel
Move the functions/regexes to be static members |
145 |
start, end = self._parse_range(content_range, path) |
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
146 |
self._add_range(start, end, 0) |
147 |
self._finish_ranges() |
|
148 |
||
1786.1.24
by John Arbash Meinel
Move the functions/regexes to be static members |
149 |
@staticmethod
|
150 |
def _parse_range(range, path='<unknown>'): |
|
151 |
"""Parse an http Content-range header and return start + end
|
|
152 |
||
153 |
:param range: The value for Content-range
|
|
154 |
:param path: Provide to give better error messages.
|
|
155 |
:return: (start, end) A tuple of integers
|
|
156 |
"""
|
|
157 |
match = HttpRangeResponse._CONTENT_RANGE_RE.match(range) |
|
158 |
if not match: |
|
159 |
raise errors.InvalidHttpRange(path, range, |
|
160 |
"Invalid Content-range") |
|
161 |
||
162 |
rtype, start, end, total = match.groups() |
|
163 |
||
164 |
if rtype != 'bytes': |
|
165 |
raise errors.InvalidHttpRange(path, range, |
|
166 |
"Unsupported range type '%s'" % (rtype,)) |
|
167 |
||
168 |
try: |
|
169 |
start = int(start) |
|
170 |
end = int(end) |
|
171 |
except ValueError, e: |
|
172 |
raise errors.InvalidHttpRange(path, range, str(e)) |
|
173 |
||
174 |
return start, end |
|
1786.1.16
by John Arbash Meinel
Refactor tests |
175 |
|
176 |
||
177 |
class HttpMultipartRangeResponse(RangeFile): |
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
178 |
"""A multi-range HTTP response."""
|
1786.1.24
by John Arbash Meinel
Move the functions/regexes to be static members |
179 |
|
180 |
_CONTENT_TYPE_RE = re.compile( |
|
1979.1.2
by John Arbash Meinel
Use the regex, rather than stripping off the boundary later. |
181 |
r'^\s*multipart/byteranges\s*;\s*boundary\s*=\s*("?)([^"]*?)\1\s*$') |
1786.1.24
by John Arbash Meinel
Move the functions/regexes to be static members |
182 |
|
183 |
# Start with --<boundary>\r\n
|
|
184 |
# and ignore all headers ending in \r\n
|
|
185 |
# except for content-range:
|
|
186 |
# and find the two trailing \r\n separators
|
|
187 |
# indicating the start of the text
|
|
188 |
# TODO: jam 20060706 This requires exact conformance
|
|
189 |
# to the spec, we probably could relax the requirement
|
|
190 |
# of \r\n, and use something more like (\r?\n)
|
|
191 |
_BOUNDARY_PATT = ( |
|
192 |
"^--%s(?:\r\n(?:(?:content-range:([^\r]+))|[^\r]+))+\r\n\r\n") |
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
193 |
|
194 |
def __init__(self, path, content_type, input_file): |
|
1786.1.32
by John Arbash Meinel
cleanup pass, allow pycurl connections to be shared between transports. |
195 |
# mutter("parsing 206 multipart response for %s", path)
|
1786.1.14
by John Arbash Meinel
Testing basic functionality of HttpMultipartRangeResponse |
196 |
# TODO: jam 20060706 Is it valid to initialize a
|
197 |
# grandparent without initializing parent?
|
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
198 |
RangeFile.__init__(self, path, input_file) |
199 |
||
1786.1.24
by John Arbash Meinel
Move the functions/regexes to be static members |
200 |
self.boundary_regex = self._parse_boundary(content_type, path) |
1979.1.1
by John Arbash Meinel
Fix bug #57723, parse boundary="" correctly, since Squid uses it |
201 |
# mutter('response:\n%r', self._data)
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
202 |
|
1786.1.18
by John Arbash Meinel
Add tests for HttpMultiRangeResponse |
203 |
for match in self.boundary_regex.finditer(self._data): |
1786.1.33
by John Arbash Meinel
Cleanup pass #2 |
204 |
ent_start, ent_end = HttpRangeResponse._parse_range(match.group(1), |
205 |
path) |
|
1750.1.2
by Michael Ellerman
Add support for HTTP multipart ranges and hook it into http+urllib. |
206 |
self._add_range(ent_start, ent_end, match.end()) |
207 |
||
208 |
self._finish_ranges() |
|
209 |
||
1786.1.24
by John Arbash Meinel
Move the functions/regexes to be static members |
210 |
@staticmethod
|
211 |
def _parse_boundary(ctype, path='<unknown>'): |
|
212 |
"""Parse the Content-type field.
|
|
213 |
|
|
214 |
This expects a multipart Content-type, and returns a
|
|
215 |
regex which is capable of finding the boundaries
|
|
216 |
in the multipart data.
|
|
217 |
"""
|
|
218 |
match = HttpMultipartRangeResponse._CONTENT_TYPE_RE.match(ctype) |
|
219 |
if not match: |
|
220 |
raise errors.InvalidHttpContentType(path, ctype, |
|
221 |
"Expected multipart/byteranges with boundary") |
|
1786.1.21
by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers. |
222 |
|
1979.1.2
by John Arbash Meinel
Use the regex, rather than stripping off the boundary later. |
223 |
boundary = match.group(2) |
1786.1.32
by John Arbash Meinel
cleanup pass, allow pycurl connections to be shared between transports. |
224 |
# mutter('multipart boundary is %s', boundary)
|
1786.1.24
by John Arbash Meinel
Move the functions/regexes to be static members |
225 |
pattern = HttpMultipartRangeResponse._BOUNDARY_PATT |
226 |
return re.compile(pattern % re.escape(boundary), |
|
227 |
re.IGNORECASE | re.MULTILINE) |
|
1786.1.21
by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers. |
228 |
|
229 |
||
1786.1.25
by John Arbash Meinel
Test that we can extract headers properly. |
230 |
def _is_multipart(content_type): |
231 |
return content_type.startswith('multipart/byteranges;') |
|
232 |
||
233 |
||
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
234 |
def handle_response(url, code, headers, data): |
1786.1.21
by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers. |
235 |
"""Interpret the code & headers and return a HTTP response.
|
236 |
||
237 |
This is a factory method which returns an appropriate HTTP response
|
|
238 |
based on the code & headers it's given.
|
|
239 |
||
240 |
:param url: The url being processed. Mostly for error reporting
|
|
241 |
:param code: The integer HTTP response code
|
|
242 |
:param headers: A dict-like object that contains the HTTP response headers
|
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
243 |
:param data: A file-like object that can be read() to get the
|
244 |
requested data
|
|
1786.1.21
by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers. |
245 |
:return: A file-like object that can seek()+read() the
|
246 |
ranges indicated by the headers.
|
|
247 |
"""
|
|
1786.1.26
by John Arbash Meinel
Update and test handle_response. |
248 |
|
249 |
if code == 206: |
|
250 |
try: |
|
251 |
content_type = headers['Content-Type'] |
|
252 |
except KeyError: |
|
2073.1.1
by John Arbash Meinel
Robert's comments: Refer to RFC2616 to explain how we handle missing Content-Type |
253 |
# When there is no content-type header we treat
|
254 |
# the response as being of type 'application/octet-stream' as per
|
|
255 |
# RFC2616 section 7.2.1.
|
|
256 |
# Therefore it is obviously not multipart
|
|
257 |
content_type = 'application/octet-stream' |
|
2070.1.1
by John Arbash Meinel
Fix bug #62473 by not requiring content-type in range responses |
258 |
is_multipart = False |
259 |
else: |
|
260 |
is_multipart = _is_multipart(content_type) |
|
1786.1.26
by John Arbash Meinel
Update and test handle_response. |
261 |
|
2070.1.1
by John Arbash Meinel
Fix bug #62473 by not requiring content-type in range responses |
262 |
if is_multipart: |
1786.1.26
by John Arbash Meinel
Update and test handle_response. |
263 |
# Full fledged multipart response
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
264 |
return HttpMultipartRangeResponse(url, content_type, data) |
1786.1.26
by John Arbash Meinel
Update and test handle_response. |
265 |
else: |
266 |
# A response to a range request, but not multipart
|
|
267 |
try: |
|
268 |
content_range = headers['Content-Range'] |
|
269 |
except KeyError: |
|
270 |
raise errors.InvalidHttpResponse(url, |
|
271 |
'Missing the Content-Range header in a 206 range response') |
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
272 |
return HttpRangeResponse(url, content_range, data) |
1786.1.25
by John Arbash Meinel
Test that we can extract headers properly. |
273 |
elif code == 200: |
1786.1.21
by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers. |
274 |
# A regular non-range response, unfortunately the result from
|
275 |
# urllib doesn't support seek, so we wrap it in a StringIO
|
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
276 |
tell = getattr(data, 'tell', None) |
1786.1.25
by John Arbash Meinel
Test that we can extract headers properly. |
277 |
if tell is None: |
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
278 |
return StringIO(data.read()) |
279 |
return data |
|
1786.1.25
by John Arbash Meinel
Test that we can extract headers properly. |
280 |
elif code == 404: |
1786.1.26
by John Arbash Meinel
Update and test handle_response. |
281 |
raise errors.NoSuchFile(url) |
2000.3.9
by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :) |
282 |
elif code == 416: |
2172.3.2
by v.ladeuil+lp at free
Fix the missing import and typos in comments. |
283 |
# We don't know which, but one of the ranges we specified
|
284 |
# was wrong. So we raise with 0 for a lack of a better
|
|
285 |
# magic value.
|
|
2000.3.9
by v.ladeuil+lp at free
The tests that would have help avoid bug #73948 and all that mess :) |
286 |
raise errors.InvalidRange(url,0) |
1786.1.21
by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers. |
287 |
|
1786.1.33
by John Arbash Meinel
Cleanup pass #2 |
288 |
# TODO: jam 20060713 Properly handle redirects (302 Found, etc)
|
289 |
# The '_get' code says to follow redirects, we probably
|
|
290 |
# should actually handle the return values
|
|
1786.1.40
by John Arbash Meinel
code cleanups from Martin Pool. |
291 |
else: |
292 |
raise errors.InvalidHttpResponse(url, "Unknown response code %s" |
|
293 |
% (code,)) |
|
1786.1.21
by John Arbash Meinel
(broken) Work on factoring out handle_response so we can test with fake headers. |
294 |