~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/transport/http/__init__.py

Committer: Canonical.com Patch Queue Manager
Date: 2007-11-30 18:21:02 UTC
mfrom: (3059.1.1 trunk)
Revision ID: pqm@pqm.ubuntu.com-20071130182102-i0t564k01anm7uk2

Fix bug #172701 by catching the pycurl ShortReadvError and allowing readv to issue several get requests

files modified:
NEWS

bzrlib/debug.py

bzrlib/help_topics.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/HttpServer.py

bzrlib/tests/test_http.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/response.py

Show diffs side-by-side

added added

removed removed

bzrlib/transport/http/__init__.py

208

self._range_hint = None

209

mutter('Retry "%s" without ranges' % relpath)

210

else:

211

# We tried all the tricks, but nothing worked. We re-raise original

212

# exception; the 'mutter' calls above will indicate that further

213

# tries were unsuccessful

211

# We tried all the tricks, but nothing worked. We re-raise the

212

# original exception; the 'mutter' calls above will indicate that

213

# further tries were unsuccessful

214

raise exc_info[0], exc_info[1], exc_info[2]

215

216

def _get_ranges_hinted(self, relpath, ranges):

251

# No limit on the offset number that get combined into one, we are trying

252

# to avoid downloading the whole file.

253

_max_readv_combine = 0

254

# By default Apache has a limit of ~400 ranges before replying with a 400

255

# Bad Request. So we go underneath that amount to be safe.

256

_max_get_ranges = 200

254

257

255

258

def _readv(self, relpath, offsets):

256

259

"""Get parts of the file at the given relative path.

258

261

:param offsets: A list of (offset, size) tuples.

259

262

:param return: A list or generator of (offset, data) tuples

260

263

"""

261

sorted_offsets = sorted(list(offsets))

262

fudge = self._bytes_to_read_before_seek

263

coalesced = self._coalesce_offsets(sorted_offsets,

264

limit=self._max_readv_combine,

265

fudge_factor=fudge)

266

coalesced = list(coalesced)

267

mutter('http readv of %s offsets => %s collapsed %s',

268

relpath, len(offsets), len(coalesced))

269

270

f = self._get_ranges_hinted(relpath, coalesced)

271

for start, size in offsets:

272

try_again = True

273

while try_again:

274

try_again = False

275

f.seek(start, ((start < 0) and 2) or 0)

276

start = f.tell()

277

try:

278

data = f.read(size)

279

if len(data) != size:

280

raise errors.ShortReadvError(relpath, start, size,

281

actual=len(data))

282

except errors.ShortReadvError, e:

283

self._degrade_range_hint(relpath, coalesced, sys.exc_info())

284

285

# Since the offsets and the ranges may not be in the same

286

# order, we don't try to calculate a restricted single

287

# range encompassing unprocessed offsets.

288

289

# Note: we replace 'f' here, it may need cleaning one day

290

# before being thrown that way.

291

f = self._get_ranges_hinted(relpath, coalesced)

292

try_again = True

293

294

# After one or more tries, we get the data.

295

yield start, data

264

265

# offsets may be a genarator, we will iterate it several times, so

266

# build a list

267

offsets = list(offsets)

268

269

try_again = True

270

while try_again:

271

try_again = False

272

273

# Coalesce the offsets to minimize the GET requests issued

274

sorted_offsets = sorted(offsets)

275

coalesced = self._coalesce_offsets(

276

sorted_offsets, limit=self._max_readv_combine,

277

fudge_factor=self._bytes_to_read_before_seek)

278

279

# Turn it into a list, we will iterate it several times

280

coalesced = list(coalesced)

281

mutter('http readv of %s offsets => %s collapsed %s',

282

relpath, len(offsets), len(coalesced))

283

284

# Cache the data read, but only until it's been used

285

data_map = {}

286

# We will iterate on the data received from the GET requests and

287

# serve the corresponding offsets repecting the initial order. We

288

# need an offset iterator for that.

289

iter_offsets = iter(offsets)

290

cur_offset_and_size = iter_offsets.next()

291

292

try:

293

for cur_coal, file in self._coalesce_readv(relpath, coalesced):

294

# Split the received chunk

295

for offset, size in cur_coal.ranges:

296

start = cur_coal.start + offset

297

file.seek(start, 0)

298

data = file.read(size)

299

data_len = len(data)

300

if data_len != size:

301

raise errors.ShortReadvError(relpath, start, size,

302

actual=data_len)

303

data_map[(start, size)] = data

304

305

# Yield everything we can

306

while cur_offset_and_size in data_map:

307

# Clean the cached data since we use it

308

# XXX: will break if offsets contains duplicates --

309

# vila20071129

310

this_data = data_map.pop(cur_offset_and_size)

311

yield cur_offset_and_size[0], this_data

312

cur_offset_and_size = iter_offsets.next()

313

314

except (errors.ShortReadvError,errors.InvalidRange), e:

315

self._degrade_range_hint(relpath, coalesced, sys.exc_info())

316

# Some offsets may have been already processed, so we retry

317

# only the unsuccessful ones.

318

offsets = [cur_offset_and_size] + [o for o in iter_offsets]

319

try_again = True

320

321

def _coalesce_readv(self, relpath, coalesced):

322

"""Issue several GET requests to satisfy the coalesced offsets"""

323

total = len(coalesced)

324

if self._range_hint == 'multi':

325

max_ranges = self._max_get_ranges

326

elif self._range_hint == 'single':

327

max_ranges = total

328

else:

329

# The whole file will be downloaded anyway

330

max_ranges = total

331

# TODO: Some web servers may ignore the range requests and return the

332

# whole file, we may want to detect that and avoid further requests.

333

# Hint: test_readv_multiple_get_requests will fail in that case .

334

for group in xrange(0, len(coalesced), max_ranges):

335

ranges = coalesced[group:group+max_ranges]

336

# Note that the following may raise errors.InvalidRange. It's the

337

# caller responsability to decide how to retry since it may provide

338

# different coalesced offsets.

339

code, file = self._get(relpath, ranges)

340

for range in ranges:

341

yield range, file

296

342

297

343

def recommended_page_size(self):

298

344

"""See Transport.recommended_page_size().

447

493

"""Prepare a HTTP Range header at a level the server should accept"""

448

494

449

495

if self._range_hint == 'multi':

450

# Nothing to do here

496

# Generate the header describing all offsets

451

497

return self._range_header(offsets, tail_amount)

452

498

elif self._range_hint == 'single':

453

499

# Combine all the requested ranges into a single

Older »