130
# TODO: jam 20060706 Consider compiling these regexes on demand
131
_CONTENT_RANGE_RE = re.compile(
132
'\s*([^\s]+)\s+([0-9]+)-([0-9]+)/([0-9]+)\s*$')
135
def _parse_range(range, path='<unknown>'):
136
"""Parse an http Content-range header and return start + end
138
:param range: The value for Content-range
139
:param path: Provide to give better error messages.
140
:return: (start, end) A tuple of integers
142
match = _CONTENT_RANGE_RE.match(range)
144
raise errors.InvalidHttpRange(path, range,
145
"Invalid Content-range")
147
rtype, start, end, total = match.groups()
150
raise errors.InvalidHttpRange(path, range,
151
"Unsupported range type '%s'" % (rtype,))
156
except ValueError, e:
157
raise errors.InvalidHttpRange(path, range, str(e))
162
130
class HttpRangeResponse(RangeFile):
163
131
"""A single-range HTTP response."""
133
# TODO: jam 20060706 Consider compiling these regexes on demand
134
_CONTENT_RANGE_RE = re.compile(
135
'\s*([^\s]+)\s+([0-9]+)-([0-9]+)/([0-9]+)\s*$')
165
137
def __init__(self, path, content_range, input_file):
166
138
mutter("parsing 206 non-multipart response for %s", path)
167
139
RangeFile.__init__(self, path, input_file)
168
start, end = _parse_range(content_range, path)
140
start, end = self._parse_range(content_range, path)
169
141
self._add_range(start, end, 0)
170
142
self._finish_ranges()
173
_CONTENT_TYPE_RE = re.compile(
174
'^\s*multipart/byteranges\s*;\s*boundary\s*=\s*(.*?)\s*$')
177
# Start with --<boundary>\r\n
178
# and ignore all headers ending in \r\n
179
# except for content-range:
180
# and find the two trailing \r\n separators
181
# indicating the start of the text
182
# TODO: jam 20060706 This requires exact conformance
183
# to the spec, we probably could relax the requirement
184
# of \r\n, and use something more like (\r?\n)
186
"^--%s(?:\r\n(?:(?:content-range:([^\r]+))|[^\r]+))+\r\n\r\n")
189
def _parse_boundary(ctype, path='<unknown>'):
190
"""Parse the Content-type field.
192
This expects a multipart Content-type, and returns a
193
regex which is capable of finding the boundaries
194
in the multipart data.
196
match = _CONTENT_TYPE_RE.match(ctype)
198
raise errors.InvalidHttpContentType(path, ctype,
199
"Expected multipart/byteranges with boundary")
201
boundary = match.group(1)
202
mutter('multipart boundary is %s', boundary)
203
return re.compile(_BOUNDARY_PATT % re.escape(boundary),
204
re.IGNORECASE | re.MULTILINE)
145
def _parse_range(range, path='<unknown>'):
146
"""Parse an http Content-range header and return start + end
148
:param range: The value for Content-range
149
:param path: Provide to give better error messages.
150
:return: (start, end) A tuple of integers
152
match = HttpRangeResponse._CONTENT_RANGE_RE.match(range)
154
raise errors.InvalidHttpRange(path, range,
155
"Invalid Content-range")
157
rtype, start, end, total = match.groups()
160
raise errors.InvalidHttpRange(path, range,
161
"Unsupported range type '%s'" % (rtype,))
166
except ValueError, e:
167
raise errors.InvalidHttpRange(path, range, str(e))
207
172
class HttpMultipartRangeResponse(RangeFile):
208
173
"""A multi-range HTTP response."""
175
_CONTENT_TYPE_RE = re.compile(
176
'^\s*multipart/byteranges\s*;\s*boundary\s*=\s*(.*?)\s*$')
178
# Start with --<boundary>\r\n
179
# and ignore all headers ending in \r\n
180
# except for content-range:
181
# and find the two trailing \r\n separators
182
# indicating the start of the text
183
# TODO: jam 20060706 This requires exact conformance
184
# to the spec, we probably could relax the requirement
185
# of \r\n, and use something more like (\r?\n)
187
"^--%s(?:\r\n(?:(?:content-range:([^\r]+))|[^\r]+))+\r\n\r\n")
210
189
def __init__(self, path, content_type, input_file):
211
190
mutter("parsing 206 multipart response for %s", path)
213
192
# grandparent without initializing parent?
214
193
RangeFile.__init__(self, path, input_file)
216
self.boundary_regex = _parse_boundary(content_type, path)
195
self.boundary_regex = self._parse_boundary(content_type, path)
218
197
for match in self.boundary_regex.finditer(self._data):
219
ent_start, ent_end = _parse_range(match.group(1), path)
198
ent_start, ent_end = HttpRangeResponse._parse_range(match.group(1), path)
220
199
self._add_range(ent_start, ent_end, match.end())
222
201
self._finish_ranges()
204
def _parse_boundary(ctype, path='<unknown>'):
205
"""Parse the Content-type field.
207
This expects a multipart Content-type, and returns a
208
regex which is capable of finding the boundaries
209
in the multipart data.
211
match = HttpMultipartRangeResponse._CONTENT_TYPE_RE.match(ctype)
213
raise errors.InvalidHttpContentType(path, ctype,
214
"Expected multipart/byteranges with boundary")
225
def _is_multipart(content_type):
226
"""Check if a Content-Type field indicates a multipart dataset."""
227
return content_type.startswith('multipart/byteranges;')
216
boundary = match.group(1)
217
mutter('multipart boundary is %s', boundary)
218
pattern = HttpMultipartRangeResponse._BOUNDARY_PATT
219
return re.compile(pattern % re.escape(boundary),
220
re.IGNORECASE | re.MULTILINE)
230
223
def handle_response(url, code, headers, response):