189
189
_bytes_to_read_before_seek = 128
190
190
# No limit on the offset number that get combined into one, we are trying
191
# to avoid downloading the whole file. But see _pycurl.py for a different
191
# to avoid downloading the whole file.
193
192
_max_readv_combine = 0
194
193
# By default Apache has a limit of ~400 ranges before replying with a 400
195
194
# Bad Request. So we go underneath that amount to be safe.
196
195
_max_get_ranges = 200
196
# We impose no limit on the range size. But see _pycurl.py for a different
198
200
def _readv(self, relpath, offsets):
199
201
"""Get parts of the file at the given relative path.
214
216
sorted_offsets = sorted(offsets)
215
217
coalesced = self._coalesce_offsets(
216
218
sorted_offsets, limit=self._max_readv_combine,
217
fudge_factor=self._bytes_to_read_before_seek)
219
fudge_factor=self._bytes_to_read_before_seek,
220
max_size=self._get_max_size)
219
222
# Turn it into a list, we will iterate it several times
220
223
coalesced = list(coalesced)
269
272
def _coalesce_readv(self, relpath, coalesced):
270
273
"""Issue several GET requests to satisfy the coalesced offsets"""
271
total = len(coalesced)
272
if self._range_hint == 'multi':
273
max_ranges = self._max_get_ranges
274
elif self._range_hint == 'single':
275
def get_and_yield(relpath, coalesced):
277
# Note that the _get below may raise
278
# errors.InvalidHttpRange. It's the caller's responsability to
279
# decide how to retry since it may provide different coalesced
281
code, rfile = self._get(relpath, coalesced)
282
for coal in coalesced:
285
if self._range_hint is None:
286
# Download whole file
287
for c, rfile in get_and_yield(relpath, coalesced):
277
# The whole file will be downloaded anyway
279
# TODO: Some web servers may ignore the range requests and return the
280
# whole file, we may want to detect that and avoid further requests.
281
# Hint: test_readv_multiple_get_requests will fail once we do that
282
for group in xrange(0, len(coalesced), max_ranges):
283
ranges = coalesced[group:group+max_ranges]
284
# Note that the following may raise errors.InvalidHttpRange. It's
285
# the caller's responsability to decide how to retry since it may
286
# provide different coalesced offsets.
287
code, rfile = self._get(relpath, ranges)
290
total = len(coalesced)
291
if self._range_hint == 'multi':
292
max_ranges = self._max_get_ranges
293
else: # self._range_hint == 'single'
295
# TODO: Some web servers may ignore the range requests and return
296
# the whole file, we may want to detect that and avoid further
298
# Hint: test_readv_multiple_get_requests will fail once we do that
301
for coal in coalesced:
302
if ((self._get_max_size > 0
303
and cumul + coal.length > self._get_max_size)
304
or len(ranges) >= max_ranges):
305
# Get that much and yield
306
for c, rfile in get_and_yield(relpath, ranges):
308
# Restart with the current offset
314
# Get the rest and yield
315
for c, rfile in get_and_yield(relpath, ranges):
291
318
def recommended_page_size(self):
292
319
"""See Transport.recommended_page_size().
410
437
return self.__class__(self.abspath(offset), self)
412
439
def _attempted_range_header(self, offsets, tail_amount):
413
"""Prepare a HTTP Range header at a level the server should accept"""
440
"""Prepare a HTTP Range header at a level the server should accept.
442
:return: the range header representing offsets/tail_amount or None if
443
no header can be built.
415
446
if self._range_hint == 'multi':
416
447
# Generate the header describing all offsets