258
261
:param offsets: A list of (offset, size) tuples.
259
262
:param return: A list or generator of (offset, data) tuples
261
sorted_offsets = sorted(list(offsets))
262
fudge = self._bytes_to_read_before_seek
263
coalesced = self._coalesce_offsets(sorted_offsets,
264
limit=self._max_readv_combine,
266
coalesced = list(coalesced)
267
mutter('http readv of %s offsets => %s collapsed %s',
268
relpath, len(offsets), len(coalesced))
270
f = self._get_ranges_hinted(relpath, coalesced)
271
for start, size in offsets:
275
f.seek(start, ((start < 0) and 2) or 0)
279
if len(data) != size:
280
raise errors.ShortReadvError(relpath, start, size,
282
except errors.ShortReadvError, e:
283
self._degrade_range_hint(relpath, coalesced, sys.exc_info())
285
# Since the offsets and the ranges may not be in the same
286
# order, we don't try to calculate a restricted single
287
# range encompassing unprocessed offsets.
289
# Note: we replace 'f' here, it may need cleaning one day
290
# before being thrown that way.
291
f = self._get_ranges_hinted(relpath, coalesced)
294
# After one or more tries, we get the data.
265
# offsets may be a genarator, we will iterate it several times, so
267
offsets = list(offsets)
273
# Coalesce the offsets to minimize the GET requests issued
274
sorted_offsets = sorted(offsets)
275
coalesced = self._coalesce_offsets(
276
sorted_offsets, limit=self._max_readv_combine,
277
fudge_factor=self._bytes_to_read_before_seek)
279
# Turn it into a list, we will iterate it several times
280
coalesced = list(coalesced)
281
mutter('http readv of %s offsets => %s collapsed %s',
282
relpath, len(offsets), len(coalesced))
284
# Cache the data read, but only until it's been used
286
# We will iterate on the data received from the GET requests and
287
# serve the corresponding offsets repecting the initial order. We
288
# need an offset iterator for that.
289
iter_offsets = iter(offsets)
290
cur_offset_and_size = iter_offsets.next()
293
for cur_coal, file in self._coalesce_readv(relpath, coalesced):
294
# Split the received chunk
295
for offset, size in cur_coal.ranges:
296
start = cur_coal.start + offset
298
data = file.read(size)
301
raise errors.ShortReadvError(relpath, start, size,
303
data_map[(start, size)] = data
305
# Yield everything we can
306
while cur_offset_and_size in data_map:
307
# Clean the cached data since we use it
308
# XXX: will break if offsets contains duplicates --
310
this_data = data_map.pop(cur_offset_and_size)
311
yield cur_offset_and_size[0], this_data
312
cur_offset_and_size = iter_offsets.next()
314
except (errors.ShortReadvError,errors.InvalidRange), e:
315
self._degrade_range_hint(relpath, coalesced, sys.exc_info())
316
# Some offsets may have been already processed, so we retry
317
# only the unsuccessful ones.
318
offsets = [cur_offset_and_size] + [o for o in iter_offsets]
321
def _coalesce_readv(self, relpath, coalesced):
322
"""Issue several GET requests to satisfy the coalesced offsets"""
323
total = len(coalesced)
324
if self._range_hint == 'multi':
325
max_ranges = self._max_get_ranges
326
elif self._range_hint == 'single':
329
# The whole file will be downloaded anyway
331
# TODO: Some web servers may ignore the range requests and return the
332
# whole file, we may want to detect that and avoid further requests.
333
# Hint: test_readv_multiple_get_requests will fail in that case .
334
for group in xrange(0, len(coalesced), max_ranges):
335
ranges = coalesced[group:group+max_ranges]
336
# Note that the following may raise errors.InvalidRange. It's the
337
# caller responsability to decide how to retry since it may provide
338
# different coalesced offsets.
339
code, file = self._get(relpath, ranges)
297
343
def recommended_page_size(self):
298
344
"""See Transport.recommended_page_size().