~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/transport/http/__init__.py

Committer: Ian Clatworthy
Date: 2007-07-03 07:03:32 UTC
mfrom: (2520.2.3 115209)
mto: This revision was merged to the branch mainline in revision 2575.
Revision ID: ian.clatworthy@internode.on.net-20070703070332-45j7qw8z03fnulav

(Vincent Ladeuil) Fix #115209 - Unable to handle http code 400: Bad Request When issuing too many ranges

files modified:
bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/test_http.py

bzrlib/transport/__init__.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/response.py

Show diffs side-by-side

added added

removed removed

bzrlib/transport/http/__init__.py

from bzrlib import errors, ui

from bzrlib.smart import medium

from bzrlib.symbol_versioning import (

deprecated_method,

zero_seventeen,

)

from bzrlib.trace import mutter

from bzrlib.transport import (

_CoalescedOffset,

Transport,

)

231

236

232

237

:param relpath: Path relative to transport base URL

233

238

:param ranges: None to get the whole file;

234

or [(start,end)+], a list of tuples to fetch parts of a file.

239

or a list of _CoalescedOffset to fetch parts of a file.

235

240

:param tail_amount: The amount to get from the end of the file.

236

241

237

242

:returns: (http_code, result_file)

249

254

"""

250

255

return self

251

256

252

def _retry_get(self, relpath, ranges, exc_info):

253

"""A GET request have failed, let's retry with a simpler request."""

254

255

try_again = False

256

# The server does not gives us enough data or

257

# a bogus-looking result, let's try again with

258

# a simpler request if possible.

257

def _degrade_range_hint(self, relpath, ranges, exc_info):

259

258

if self._range_hint == 'multi':

260

259

self._range_hint = 'single'

261

mutter('Retry %s with single range request' % relpath)

262

try_again = True

260

mutter('Retry "%s" with single range request' % relpath)

263

261

elif self._range_hint == 'single':

264

262

self._range_hint = None

265

mutter('Retry %s without ranges' % relpath)

266

try_again = True

267

if try_again:

268

# Note that since the offsets and the ranges may not

269

# be in the same order, we don't try to calculate a

270

# restricted single range encompassing unprocessed

271

# offsets.

272

code, f = self._get(relpath, ranges)

273

return try_again, code, f

263

mutter('Retry "%s" without ranges' % relpath)

274

264

else:

275

# We tried all the tricks, but nothing worked. We

276

# re-raise original exception; the 'mutter' calls

277

# above will indicate that further tries were

278

# unsuccessful

265

# We tried all the tricks, but nothing worked. We re-raise original

266

# exception; the 'mutter' calls above will indicate that further

267

# tries were unsuccessful

279

268

raise exc_info[0], exc_info[1], exc_info[2]

280

269

281

def readv(self, relpath, offsets):

282

"""Get parts of the file at the given relative path.

283

284

:param offsets: A list of (offset, size) tuples.

285

:param return: A list or generator of (offset, data) tuples

270

def _get_ranges_hinted(self, relpath, ranges):

271

"""Issue a ranged GET request taking server capabilities into account.

272

273

Depending of the errors returned by the server, we try several GET

274

requests, trying to minimize the data transferred.

275

276

:param relpath: Path relative to transport base URL

277

:param ranges: None to get the whole file;

278

or a list of _CoalescedOffset to fetch parts of a file.

279

:returns: A file handle containing at least the requested ranges.

286

280

"""

287

ranges = self.offsets_to_ranges(offsets)

288

mutter('http readv of %s collapsed %s offsets => %s',

289

relpath, len(offsets), ranges)

290

281

exc_info = None

291

282

try_again = True

292

283

while try_again:

293

284

try_again = False

294

285

try:

295

286

code, f = self._get(relpath, ranges)

296

except (errors.InvalidRange, errors.ShortReadvError), e:

297

try_again, code, f = self._retry_get(relpath, ranges,

298

sys.exc_info())

299

287

except errors.InvalidRange, e:

288

if exc_info is None:

289

exc_info = sys.exc_info()

290

self._degrade_range_hint(relpath, ranges, exc_info)

291

try_again = True

292

return f

293

294

# _coalesce_offsets is a helper for readv, it try to combine ranges without

295

# degrading readv performances. _bytes_to_read_before_seek is the value

296

# used for the limit parameter and has been tuned for other transports. For

297

# HTTP, the name is inappropriate but the parameter is still useful and

298

# helps reduce the number of chunks in the response. The overhead for a

299

# chunk (headers, length, footer around the data itself is variable but

300

# around 50 bytes. We use 128 to reduce the range specifiers that appear in

301

# the header, some servers (notably Apache) enforce a maximum length for a

302

# header and issue a '400: Bad request' error when too much ranges are

303

# specified.

304

_bytes_to_read_before_seek = 128

305

# No limit on the offset number that get combined into one, we are trying

306

# to avoid downloading the whole file.

307

_max_readv_combined = 0

308

309

def readv(self, relpath, offsets):

310

"""Get parts of the file at the given relative path.

311

312

:param offsets: A list of (offset, size) tuples.

313

:param return: A list or generator of (offset, data) tuples

314

"""

315

sorted_offsets = sorted(list(offsets))

316

fudge = self._bytes_to_read_before_seek

317

coalesced = self._coalesce_offsets(sorted_offsets,

318

limit=self._max_readv_combine,

319

fudge_factor=fudge)

320

coalesced = list(coalesced)

321

mutter('http readv of %s offsets => %s collapsed %s',

322

relpath, len(offsets), len(coalesced))

323

324

f = self._get_ranges_hinted(relpath, coalesced)

300

325

for start, size in offsets:

301

326

try_again = True

302

327

while try_again:

303

328

try_again = False

304

f.seek(start, (start < 0) and 2 or 0)

329

f.seek(start, ((start < 0) and 2) or 0)

305

330

start = f.tell()

306

331

try:

307

332

data = f.read(size)

308

333

if len(data) != size:

309

334

raise errors.ShortReadvError(relpath, start, size,

310

335

actual=len(data))

311

except (errors.InvalidRange, errors.ShortReadvError), e:

312

# Note that we replace 'f' here and that it

313

# may need cleaning one day before being

314

# thrown that way.

315

try_again, code, f = self._retry_get(relpath, ranges,

316

sys.exc_info())

336

except errors.ShortReadvError, e:

337

self._degrade_range_hint(relpath, coalesced, sys.exc_info())

338

339

# Since the offsets and the ranges may not be in the same

340

# order, we don't try to calculate a restricted single

341

# range encompassing unprocessed offsets.

342

343

# Note: we replace 'f' here, it may need cleaning one day

344

# before being thrown that way.

345

f = self._get_ranges_hinted(relpath, coalesced)

346

try_again = True

347

317

348

# After one or more tries, we get the data.

318

349

yield start, data

319

350

320

351

@staticmethod

352

@deprecated_method(zero_seventeen)

321

353

def offsets_to_ranges(offsets):

322

354

"""Turn a list of offsets and sizes into a list of byte ranges.

323

355

452

484

else:

453

485

return self.__class__(self.abspath(offset), self)

454

486

455

def attempted_range_header(self, ranges, tail_amount):

487

def _attempted_range_header(self, offsets, tail_amount):

456

488

"""Prepare a HTTP Range header at a level the server should accept"""

457

489

458

490

if self._range_hint == 'multi':

459

491

# Nothing to do here

460

return self.range_header(ranges, tail_amount)

492

return self._range_header(offsets, tail_amount)

461

493

elif self._range_hint == 'single':

462

494

# Combine all the requested ranges into a single

463

495

# encompassing one

464

if len(ranges) > 0:

465

start, ignored = ranges[0]

466

ignored, end = ranges[-1]

496

if len(offsets) > 0:

467

497

if tail_amount not in (0, None):

468

# Nothing we can do here to combine ranges

469

# with tail_amount, just returns None. The

470

# whole file should be downloaded.

498

# Nothing we can do here to combine ranges with tail_amount

499

# in a single range, just returns None. The whole file

500

# should be downloaded.

471

501

return None

472

502

else:

473

return self.range_header([(start, end)], 0)

503

start = offsets[0].start

504

last = offsets[-1]

505

end = last.start + last.length - 1

506

whole = self._coalesce_offsets([(start, end - start + 1)],

507

limit=0, fudge_factor=0)

508

return self._range_header(list(whole), 0)

474

509

else:

475

510

# Only tail_amount, requested, leave range_header

476

511

# do its work

477

return self.range_header(ranges, tail_amount)

512

return self._range_header(offsets, tail_amount)

478

513

else:

479

514

return None

480

515

481

516

@staticmethod

482

def range_header(ranges, tail_amount):

517

def _range_header(ranges, tail_amount):

483

518

"""Turn a list of bytes ranges into a HTTP Range header value.

484

519

485

:param ranges: A list of byte ranges, (start, end).

520

:param ranges: A list of _CoalescedOffset

486

521

:param tail_amount: The amount to get from the end of the file.

487

522

488

523

:return: HTTP range header string.

491

526

provided.

492

527

"""

493

528

strings = []

494

for start, end in ranges:

495

strings.append('%d-%d' % (start, end))

529

for offset in ranges:

530

strings.append('%d-%d' % (offset.start,

531

offset.start + offset.length - 1))

496

532

497

533

if tail_amount:

498

534

strings.append('-%d' % tail_amount)

Older »