1
# This library is free software; you can redistribute it and/or
2
# modify it under the terms of the GNU Lesser General Public
3
# License as published by the Free Software Foundation; either
4
# version 2.1 of the License, or (at your option) any later version.
6
# This library is distributed in the hope that it will be useful,
7
# but WITHOUT ANY WARRANTY; without even the implied warranty of
8
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9
# Lesser General Public License for more details.
11
# You should have received a copy of the GNU Lesser General Public
12
# License along with this library; if not, write to the
13
# Free Software Foundation, Inc.,
14
# 59 Temple Place, Suite 330,
15
# Boston, MA 02111-1307 USA
17
# This file is part of urlgrabber, a high-level cross-protocol url-grabber
18
# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
20
# $Id: byterange.py,v 1.9 2005/02/14 21:55:07 mstenner Exp $
29
from cStringIO import StringIO
30
except ImportError, msg:
31
from StringIO import StringIO
33
class RangeError(IOError):
34
"""Error raised when an unsatisfiable range is requested."""
37
class HTTPRangeHandler(urllib2.BaseHandler):
38
"""Handler that enables HTTP Range headers.
40
This was extremely simple. The Range header is a HTTP feature to
41
begin with so all this class does is tell urllib2 that the
42
"206 Partial Content" reponse from the HTTP server is what we
49
range_handler = range.HTTPRangeHandler()
50
opener = urllib2.build_opener(range_handler)
53
urllib2.install_opener(opener)
55
# create Request and set Range header
56
req = urllib2.Request('http://www.python.org/')
57
req.header['Range'] = 'bytes=30-50'
58
f = urllib2.urlopen(req)
61
def http_error_206(self, req, fp, code, msg, hdrs):
62
# 206 Partial Content Response
63
r = urllib.addinfourl(fp, hdrs, req.get_full_url())
68
def http_error_416(self, req, fp, code, msg, hdrs):
69
# HTTP's Range Not Satisfiable error
70
raise RangeError('Requested Range Not Satisfiable')
72
class RangeableFileObject:
73
"""File object wrapper to enable raw range handling.
74
This was implemented primarilary for handling range
75
specifications for file:// urls. This object effectively makes
76
a file object look like it consists only of a range of bytes in
80
# expose 10 bytes, starting at byte position 20, from
82
>>> fo = RangeableFileObject(file('/etc/passwd', 'r'), (20,30))
83
# seek seeks within the range (to position 23 in this case)
85
# tell tells where your at _within the range_ (position 3 in
88
# read EOFs if an attempt is made to read past the last
89
# byte in the range. the following will return only 7 bytes.
93
def __init__(self, fo, rangetup):
94
"""Create a RangeableFileObject.
95
fo -- a file like object. only the read() method need be
96
supported but supporting an optimized seek() is
98
rangetup -- a (firstbyte,lastbyte) tuple specifying the range
100
The file object provided is assumed to be at byte offset 0.
103
(self.firstbyte, self.lastbyte) = range_tuple_normalize(rangetup)
105
self._do_seek(self.firstbyte)
107
def __getattr__(self, name):
108
"""This effectively allows us to wrap at the instance level.
109
Any attribute not found in _this_ object will be searched for
110
in self.fo. This includes methods."""
111
if hasattr(self.fo, name):
112
return getattr(self.fo, name)
113
raise AttributeError, name
116
"""Return the position within the range.
117
This is different from fo.seek in that position 0 is the
118
first byte position of the range tuple. For example, if
119
this object was created with a range tuple of (500,899),
120
tell() will return 0 when at byte position 500 of the file.
122
return (self.realpos - self.firstbyte)
124
def seek(self,offset,whence=0):
125
"""Seek within the byte range.
126
Positioning is identical to that described under tell().
128
assert whence in (0, 1, 2)
129
if whence == 0: # absolute seek
130
realoffset = self.firstbyte + offset
131
elif whence == 1: # relative seek
132
realoffset = self.realpos + offset
133
elif whence == 2: # absolute from end of file
134
# XXX: are we raising the right Error here?
135
raise IOError('seek from end of file not supported.')
137
# do not allow seek past lastbyte in range
138
if self.lastbyte and (realoffset >= self.lastbyte):
139
realoffset = self.lastbyte
141
self._do_seek(realoffset - self.realpos)
143
def read(self, size=-1):
144
"""Read within the range.
145
This method will limit the size read based on the range.
147
size = self._calc_read_size(size)
148
rslt = self.fo.read(size)
149
self.realpos += len(rslt)
152
def readline(self, size=-1):
153
"""Read lines within the range.
154
This method will limit the size read based on the range.
156
size = self._calc_read_size(size)
157
rslt = self.fo.readline(size)
158
self.realpos += len(rslt)
161
def _calc_read_size(self, size):
162
"""Handles calculating the amount of data to read based on
167
if ((self.realpos + size) >= self.lastbyte):
168
size = (self.lastbyte - self.realpos)
170
size = (self.lastbyte - self.realpos)
173
def _do_seek(self,offset):
174
"""Seek based on whether wrapped object supports seek().
175
offset is relative to the current position (self.realpos).
178
if not hasattr(self.fo, 'seek'):
179
self._poor_mans_seek(offset)
181
self.fo.seek(self.realpos + offset)
182
self.realpos+= offset
184
def _poor_mans_seek(self,offset):
185
"""Seek by calling the wrapped file objects read() method.
186
This is used for file like objects that do not have native
187
seek support. The wrapped objects read() method is called
188
to manually seek to the desired position.
189
offset -- read this number of bytes from the wrapped
191
raise RangeError if we encounter EOF before reaching the
197
if (pos + bufsize) > offset:
198
bufsize = offset - pos
199
buf = self.fo.read(bufsize)
200
if len(buf) != bufsize:
201
raise RangeError('Requested Range Not Satisfiable')
204
class FileRangeHandler(urllib2.FileHandler):
205
"""FileHandler subclass that adds Range support.
206
This class handles Range headers exactly like an HTTP
209
def open_local_file(self, req):
212
host = req.get_host()
213
file = req.get_selector()
214
localfile = urllib.url2pathname(file)
215
stats = os.stat(localfile)
216
size = stats[stat.ST_SIZE]
217
modified = rfc822.formatdate(stats[stat.ST_MTIME])
218
mtype = mimetypes.guess_type(file)[0]
220
host, port = urllib.splitport(host)
221
if port or socket.gethostbyname(host) not in self.get_names():
222
raise URLError('file not on local host')
223
fo = open(localfile,'rb')
224
brange = req.headers.get('Range',None)
225
brange = range_header_to_tuple(brange)
229
if lb == '': lb = size
230
if fb < 0 or fb > size or lb > size:
231
raise RangeError('Requested Range Not Satisfiable')
233
fo = RangeableFileObject(fo, (fb,lb))
234
headers = mimetools.Message(StringIO(
235
'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
236
(mtype or 'text/plain', size, modified)))
237
return urllib.addinfourl(fo, headers, 'file:'+file)
241
# Unfortunately, a large amount of base FTP code had to be copied
242
# from urllib and urllib2 in order to insert the FTP REST command.
243
# Code modifications for range support have been commented as
245
# -- range support modifications start/end here
247
from urllib import splitport, splituser, splitpasswd, splitattr, \
248
unquote, addclosehook, addinfourl
256
class FTPRangeHandler(urllib2.FTPHandler):
257
def ftp_open(self, req):
258
host = req.get_host()
260
raise IOError, ('ftp error', 'no host given')
261
host, port = splitport(host)
263
port = ftplib.FTP_PORT
265
# username/password handling
266
user, host = splituser(host)
268
user, passwd = splitpasswd(user)
272
user = unquote(user or '')
273
passwd = unquote(passwd or '')
276
host = socket.gethostbyname(host)
277
except socket.error, msg:
279
path, attrs = splitattr(req.get_selector())
280
dirs = path.split('/')
281
dirs = map(unquote, dirs)
282
dirs, file = dirs[:-1], dirs[-1]
283
if dirs and not dirs[0]:
286
fw = self.connect_ftp(user, passwd, host, port, dirs)
287
type = file and 'I' or 'D'
289
attr, value = splitattr(attr)
290
if attr.lower() == 'type' and \
291
value in ('a', 'A', 'i', 'I', 'd', 'D'):
294
# -- range support modifications start here
296
range_tup = range_header_to_tuple(req.headers.get('Range',None))
297
assert range_tup != ()
301
# -- range support modifications end here
303
fp, retrlen = fw.retrfile(file, type, rest)
305
# -- range support modifications start here
309
if retrlen is None or retrlen == 0:
310
raise RangeError('Requested Range Not Satisfiable due to unobtainable file length.')
314
# beginning of range is larger than file
315
raise RangeError('Requested Range Not Satisfiable')
318
fp = RangeableFileObject(fp, (0,retrlen))
319
# -- range support modifications end here
322
mtype = mimetypes.guess_type(req.get_full_url())[0]
324
headers += "Content-Type: %s\n" % mtype
325
if retrlen is not None and retrlen >= 0:
326
headers += "Content-Length: %d\n" % retrlen
327
sf = StringIO(headers)
328
headers = mimetools.Message(sf)
329
return addinfourl(fp, headers, req.get_full_url())
330
except ftplib.all_errors, msg:
331
raise IOError, ('ftp error', msg), sys.exc_info()[2]
333
def connect_ftp(self, user, passwd, host, port, dirs):
334
fw = ftpwrapper(user, passwd, host, port, dirs)
337
class ftpwrapper(urllib.ftpwrapper):
338
# range support note:
339
# this ftpwrapper code is copied directly from
340
# urllib. The only enhancement is to add the rest
341
# argument and pass it on to ftp.ntransfercmd
342
def retrfile(self, file, type, rest=None):
344
if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
345
else: cmd = 'TYPE ' + type; isdir = 0
347
self.ftp.voidcmd(cmd)
348
except ftplib.all_errors:
350
self.ftp.voidcmd(cmd)
352
if file and not isdir:
353
# Use nlst to see if the file exists at all
356
except ftplib.error_perm, reason:
357
raise IOError, ('ftp error', reason), sys.exc_info()[2]
358
# Restore the transfer mode!
359
self.ftp.voidcmd(cmd)
360
# Try to retrieve as a file
363
conn = self.ftp.ntransfercmd(cmd, rest)
364
except ftplib.error_perm, reason:
365
if str(reason)[:3] == '501':
366
# workaround for REST not supported error
367
fp, retrlen = self.retrfile(file, type)
368
fp = RangeableFileObject(fp, (rest,''))
370
elif str(reason)[:3] != '550':
371
raise IOError, ('ftp error', reason), sys.exc_info()[2]
373
# Set transfer mode to ASCII!
374
self.ftp.voidcmd('TYPE A')
375
# Try a directory listing
376
if file: cmd = 'LIST ' + file
378
conn = self.ftp.ntransfercmd(cmd)
380
# Pass back both a suitably decorated object and a retrieval length
381
return (addclosehook(conn[0].makefile('rb'),
382
self.endtransfer), conn[1])
385
####################################################################
386
# Range Tuple Functions
387
# XXX: These range tuple functions might go better in a class.
390
def range_header_to_tuple(range_header):
391
"""Get a (firstbyte,lastbyte) tuple from a Range header value.
393
Range headers have the form "bytes=<firstbyte>-<lastbyte>". This
394
function pulls the firstbyte and lastbyte values and returns
395
a (firstbyte,lastbyte) tuple. If lastbyte is not specified in
396
the header value, it is returned as an empty string in the
399
Return None if range_header is None
400
Return () if range_header does not conform to the range spec
405
if range_header is None: return None
408
_rangere = re.compile(r'^bytes=(\d{1,})-(\d*)')
409
match = _rangere.match(range_header)
411
tup = range_tuple_normalize(match.group(1,2))
413
tup = (tup[0],tup[1]+1)
417
def range_tuple_to_header(range_tup):
418
"""Convert a range tuple to a Range header value.
419
Return a string of the form "bytes=<firstbyte>-<lastbyte>" or None
420
if no range is needed.
422
if range_tup is None: return None
423
range_tup = range_tuple_normalize(range_tup)
426
range_tup = (range_tup[0],range_tup[1] - 1)
427
return 'bytes=%s-%s' % range_tup
429
def range_tuple_normalize(range_tup):
430
"""Normalize a (first_byte,last_byte) range tuple.
431
Return a tuple whose first element is guaranteed to be an int
432
and whose second element will be '' (meaning: the last byte) or
433
an int. Finally, return None if the normalized tuple == (0,'')
434
as that is equivelant to retrieving the entire file.
436
if range_tup is None: return None
439
if fb in (None,''): fb = 0
442
try: lb = range_tup[1]
443
except IndexError: lb = ''
445
if lb is None: lb = ''
446
elif lb != '': lb = int(lb)
447
# check if range is over the entire file
448
if (fb,lb) == (0,''): return None
449
# check that the range is valid
450
if lb < fb: raise RangeError('Invalid byte range: %s-%s' % (fb,lb))