~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/transport/http/__init__.py

Committer: Aaron Bentley
Date: 2006-11-28 04:05:35 UTC
mfrom: (2151 +trunk)
mto: This revision was merged to the branch mainline in revision 2162.
Revision ID: aaron.bentley@utoronto.ca-20061128040535-akirmje66cdnsclg

Merge bzr.dev

files added:
bzrlib/tests/HttpServer.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/test_wsgi.py

bzrlib/transport/chroot.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/wsgi.py

doc/http_smart_server.txt

files modified:
BRANCH.TODO

NEWS

bzrlib/__init__.py

bzrlib/builtins.py

bzrlib/config.py

bzrlib/ignores.py

bzrlib/knit.py

bzrlib/symbol_versioning.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_config.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_http.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_versionedfile.py

bzrlib/trace.py

bzrlib/transport/__init__.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/smart.py

doc/index.txt

Show diffs side-by-side

added added

removed removed

bzrlib/transport/http/__init__.py

"""

from cStringIO import StringIO

import errno

import mimetools

import os

import posixpath

import re

import sys

import urlparse

import urllib

from warnings import warn

# TODO: load these only when running http tests

import BaseHTTPServer, SimpleHTTPServer, socket, time

import threading

from bzrlib import errors

from bzrlib.errors import (TransportNotPossible, NoSuchFile,

TransportError, ConnectionError, InvalidURL)

from bzrlib.branch import Branch

from bzrlib.trace import mutter

from bzrlib.transport import (

get_transport,

register_transport,

Server,

smart,

Transport,

)

from bzrlib.transport.http.response import (HttpMultipartRangeResponse,

HttpRangeResponse)

from bzrlib.ui import ui_factory

# TODO: This is not used anymore by HttpTransport_urllib

# (extracting the auth info and prompting the user for a password

# have been split), only the tests still use it. It should be

# deleted and the tests rewritten ASAP to stay in sync.

def extract_auth(url, password_manager):

"""Extract auth parameters from am HTTP/HTTPS url and add them to the given

password manager. Return the url, minus those auth parameters (which

assert re.match(r'^(https?)(\+\w+)?://', url), \

'invalid absolute url %r' % url

scheme, netloc, path, query, fragment = urlparse.urlsplit(url)

if '@' in netloc:

auth, netloc = netloc.split('@', 1)

if ':' in auth:

if password is not None:

password = urllib.unquote(password)

else:

password = ui_factory.get_password(prompt='HTTP %(user)@%(host) password',

user=username, host=host)

password = ui_factory.get_password(

prompt='HTTP %(user)s@%(host)s password',

user=username, host=host)

password_manager.add_password(None, host, username, password)

url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))

return url

108

if not first_line.startswith('HTTP'):

109

if first_header: # The first header *must* start with HTTP

110

raise errors.InvalidHttpResponse(url,

111

'Opening header line did not start with HTTP: %s'

112

100

% (first_line,))

113

101

assert False, 'Opening header line was not HTTP'

114

102

else:

137

125

# _proto: "http" or "https"

138

126

# _qualified_proto: may have "+pycurl", etc

139

127

140

def __init__(self, base):

128

def __init__(self, base, from_transport=None):

141

129

"""Set the base path where files will be stored."""

142

130

proto_match = re.match(r'^(https?)(\+\w+)?://', base)

143

131

if not proto_match:

150

138

if base[-1] != '/':

151

139

base = base + '/'

152

140

super(HttpTransportBase, self).__init__(base)

153

# In the future we might actually connect to the remote host

154

# rather than using get_url

155

# self._connection = None

156

141

(apparent_proto, self._host,

157

142

self._path, self._parameters,

158

143

self._query, self._fragment) = urlparse.urlparse(self.base)

159

144

self._qualified_proto = apparent_proto

145

# range hint is handled dynamically throughout the life

146

# of the object. We start by trying mulri-range requests

147

# and if the server returns bougs results, we retry with

148

# single range requests and, finally, we forget about

149

# range if the server really can't understand. Once

150

# aquired, this piece of info is propogated to clones.

151

if from_transport is not None:

152

self._range_hint = from_transport._range_hint

153

else:

154

self._range_hint = 'multi'

160

155

161

156

def abspath(self, relpath):

162

157

"""Return the full url to the given relative path.

169

164

"""

170

165

assert isinstance(relpath, basestring)

171

166

if isinstance(relpath, unicode):

172

raise InvalidURL(relpath, 'paths must not be unicode.')

167

raise errors.InvalidURL(relpath, 'paths must not be unicode.')

173

168

if isinstance(relpath, basestring):

174

169

relpath_parts = relpath.split('/')

175

170

else:

180

175

else:

181

176

# Except for the root, no trailing slashes are allowed

182

177

if len(relpath_parts) > 1 and relpath_parts[-1] == '':

183

raise ValueError("path %r within branch %r seems to be a directory"

184

% (relpath, self._path))

178

raise ValueError(

179

"path %r within branch %r seems to be a directory"

180

% (relpath, self._path))

185

181

basepath = self._path.split('/')

186

182

if len(basepath) > 0 and basepath[-1] == '':

187

183

basepath = basepath[:-1]

266

262

relpath, len(offsets), ranges)

267

263

code, f = self._get(relpath, ranges)

268

264

for start, size in offsets:

269

f.seek(start, (start < 0) and 2 or 0)

270

start = f.tell()

271

data = f.read(size)

272

if len(data) != size:

273

raise errors.ShortReadvError(relpath, start, size,

274

actual=len(data))

265

try_again = True

266

while try_again:

267

try_again = False

268

f.seek(start, (start < 0) and 2 or 0)

269

start = f.tell()

270

try:

271

data = f.read(size)

272

if len(data) != size:

273

raise errors.ShortReadvError(relpath, start, size,

274

actual=len(data))

275

except (errors.InvalidRange, errors.ShortReadvError):

276

# The server does not gives us enough data or

277

# bogus-looking result, let's try again with

278

# a simpler request if possible.

279

if self._range_hint == 'multi':

280

self._range_hint = 'single'

281

mutter('Retry %s with single range request' % relpath)

282

try_again = True

283

elif self._range_hint == 'single':

284

self._range_hint = None

285

mutter('Retry %s without ranges' % relpath)

286

try_again = True

287

if try_again:

288

# Note that since the offsets and the

289

# ranges may not be in the same order we

290

# dont't try to calculate a restricted

291

# single range encompassing unprocessed

292

# offsets. Note that we replace 'f' here

293

# and that it may need cleaning one day

294

# before being thrown that way.

295

code, f = self._get(relpath, ranges)

296

else:

297

# We tried all the tricks, nothing worked

298

raise

299

275

300

yield start, data

276

301

277

302

@staticmethod

318

343

:param relpath: Location to put the contents, relative to base.

319

344

:param f: File-like object.

320

345

"""

321

raise TransportNotPossible('http PUT not supported')

346

raise errors.TransportNotPossible('http PUT not supported')

322

347

323

348

def mkdir(self, relpath, mode=None):

324

349

"""Create a directory at the given path."""

325

raise TransportNotPossible('http does not support mkdir()')

350

raise errors.TransportNotPossible('http does not support mkdir()')

326

351

327

352

def rmdir(self, relpath):

328

353

"""See Transport.rmdir."""

329

raise TransportNotPossible('http does not support rmdir()')

354

raise errors.TransportNotPossible('http does not support rmdir()')

330

355

331

356

def append_file(self, relpath, f, mode=None):

332

357

"""Append the text in the file-like object into the final

333

358

location.

334

359

"""

335

raise TransportNotPossible('http does not support append()')

360

raise errors.TransportNotPossible('http does not support append()')

336

361

337

362

def copy(self, rel_from, rel_to):

338

363

"""Copy the item at rel_from to the location at rel_to"""

339

raise TransportNotPossible('http does not support copy()')

364

raise errors.TransportNotPossible('http does not support copy()')

340

365

341

366

def copy_to(self, relpaths, other, mode=None, pb=None):

342

367

"""Copy a set of entries from self into another Transport.

350

375

# the remote location is the same, and rather than download, and

351

376

# then upload, it could just issue a remote copy_this command.

352

377

if isinstance(other, HttpTransportBase):

353

raise TransportNotPossible('http cannot be the target of copy_to()')

378

raise errors.TransportNotPossible(

379

'http cannot be the target of copy_to()')

354

380

else:

355

381

return super(HttpTransportBase, self).\

356

382

copy_to(relpaths, other, mode=mode, pb=pb)

357

383

358

384

def move(self, rel_from, rel_to):

359

385

"""Move the item at rel_from to the location at rel_to"""

360

raise TransportNotPossible('http does not support move()')

386

raise errors.TransportNotPossible('http does not support move()')

361

387

362

388

def delete(self, relpath):

363

389

"""Delete the item at relpath"""

364

raise TransportNotPossible('http does not support delete()')

390

raise errors.TransportNotPossible('http does not support delete()')

365

391

366

392

def is_readonly(self):

367

393

"""See Transport.is_readonly."""

374

400

def stat(self, relpath):

375

401

"""Return the stat information for a file.

376

402

"""

377

raise TransportNotPossible('http does not support stat()')

403

raise errors.TransportNotPossible('http does not support stat()')

378

404

379

405

def lock_read(self, relpath):

380

406

"""Lock the given file for shared (read) access.

395

421

396

422

:return: A lock object, which should be passed to Transport.unlock()

397

423

"""

398

raise TransportNotPossible('http does not support lock_write()')

424

raise errors.TransportNotPossible('http does not support lock_write()')

399

425

400

426

def clone(self, offset=None):

401

427

"""Return a new HttpTransportBase with root at self.base + offset

408

434

else:

409

435

return self.__class__(self.abspath(offset), self)

410

436

437

def attempted_range_header(self, ranges, tail_amount):

438

"""Prepare a HTTP Range header at a level the server should accept"""

439

440

if self._range_hint == 'multi':

441

# Nothing to do here

442

return self.range_header(ranges, tail_amount)

443

elif self._range_hint == 'single':

444

# Combine all the requested ranges into a single

445

# encompassing one

446

if len(ranges) > 0:

447

start, ignored = ranges[0]

448

ignored, end = ranges[-1]

449

if tail_amount not in (0, None):

450

# Nothing we can do here to combine ranges

451

# with tail_amount, just returns None. The

452

# whole file should be downloaded.

453

return None

454

else:

455

return self.range_header([(start, end)], 0)

456

else:

457

# Only tail_amount, requested, leave range_header

458

# do its work

459

return self.range_header(ranges, tail_amount)

460

else:

461

return None

462

411

463

@staticmethod

412

464

def range_header(ranges, tail_amount):

413

465

"""Turn a list of bytes ranges into a HTTP Range header value.

414

466

415

:param offsets: A list of byte ranges, (start, end). An empty list

416

is not accepted.

467

:param ranges: A list of byte ranges, (start, end).

468

:param tail_amount: The amount to get from the end of the file.

417

469

418

470

:return: HTTP range header string.

471

472

At least a non-empty ranges *or* a tail_amount must be

473

provided.

419

474

"""

420

475

strings = []

421

476

for start, end in ranges:

448

503

449

504

def _read_bytes(self, count):

450

505

return self._response_body.read(count)

451

506

452

507

def _finished_reading(self):

453

508

"""See SmartClientMediumRequest._finished_reading."""

454

509

pass

455

456

457

#---------------- test server facilities ----------------

458

# TODO: load these only when running tests

459

460

461

class WebserverNotAvailable(Exception):

462

pass

463

464

465

class BadWebserverPath(ValueError):

466

def __str__(self):

467

return 'path %s is not in %s' % self.args

468

469

470

class TestingHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):

471

472

def log_message(self, format, *args):

473

self.server.test_case.log('webserver - %s - - [%s] %s "%s" "%s"',

474

self.address_string(),

475

self.log_date_time_string(),

476

format % args,

477

self.headers.get('referer', '-'),

478

self.headers.get('user-agent', '-'))

479

480

def handle_one_request(self):

481

"""Handle a single HTTP request.

482

483

You normally don't need to override this method; see the class

484

__doc__ string for information on how to handle specific HTTP

485

commands such as GET and POST.

486

487

"""

488

for i in xrange(1,11): # Don't try more than 10 times

489

try:

490

self.raw_requestline = self.rfile.readline()

491

except socket.error, e:

492

if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):

493

# omitted for now because some tests look at the log of

494

# the server and expect to see no errors. see recent

495

# email thread. -- mbp 20051021.

496

## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)

497

time.sleep(0.01)

498

continue

499

raise

500

else:

501

break

502

if not self.raw_requestline:

503

self.close_connection = 1

504

return

505

if not self.parse_request(): # An error code has been sent, just exit

506

return

507

mname = 'do_' + self.command

508

if getattr(self, mname, None) is None:

509

self.send_error(501, "Unsupported method (%r)" % self.command)

510

return

511

method = getattr(self, mname)

512

method()

513

514

if sys.platform == 'win32':

515

# On win32 you cannot access non-ascii filenames without

516

# decoding them into unicode first.

517

# However, under Linux, you can access bytestream paths

518

# without any problems. If this function was always active

519

# it would probably break tests when LANG=C was set

520

def translate_path(self, path):

521

"""Translate a /-separated PATH to the local filename syntax.

522

523

For bzr, all url paths are considered to be utf8 paths.

524

On Linux, you can access these paths directly over the bytestream

525

request, but on win32, you must decode them, and access them

526

as Unicode files.

527

"""

528

# abandon query parameters

529

path = urlparse.urlparse(path)[2]

530

path = posixpath.normpath(urllib.unquote(path))

531

path = path.decode('utf-8')

532

words = path.split('/')

533

words = filter(None, words)

534

path = os.getcwdu()

535

for word in words:

536

drive, word = os.path.splitdrive(word)

537

head, word = os.path.split(word)

538

if word in (os.curdir, os.pardir): continue

539

path = os.path.join(path, word)

540

return path

541

542

543

class TestingHTTPServer(BaseHTTPServer.HTTPServer):

544

def __init__(self, server_address, RequestHandlerClass, test_case):

545

BaseHTTPServer.HTTPServer.__init__(self, server_address,

546

RequestHandlerClass)

547

self.test_case = test_case

548

549

550

class HttpServer(Server):

551

"""A test server for http transports."""

552

553

# used to form the url that connects to this server

554

_url_protocol = 'http'

555

556

# Subclasses can provide a specific request handler

557

def __init__(self, request_handler=TestingHTTPRequestHandler):

558

Server.__init__(self)

559

self.request_handler = request_handler

560

561

def _get_httpd(self):

562

return TestingHTTPServer(('localhost', 0),

563

self.request_handler,

564

self)

565

566

def _http_start(self):

567

httpd = self._get_httpd()

568

host, port = httpd.socket.getsockname()

569

self._http_base_url = '%s://localhost:%s/' % (self._url_protocol, port)

570

self._http_starting.release()

571

httpd.socket.settimeout(0.1)

572

573

while self._http_running:

574

try:

575

httpd.handle_request()

576

except socket.timeout:

577

pass

578

579

def _get_remote_url(self, path):

580

path_parts = path.split(os.path.sep)

581

if os.path.isabs(path):

582

if path_parts[:len(self._local_path_parts)] != \

583

self._local_path_parts:

584

raise BadWebserverPath(path, self.test_dir)

585

remote_path = '/'.join(path_parts[len(self._local_path_parts):])

586

else:

587

remote_path = '/'.join(path_parts)

588

589

self._http_starting.acquire()

590

self._http_starting.release()

591

return self._http_base_url + remote_path

592

593

def log(self, format, *args):

594

"""Capture Server log output."""

595

self.logs.append(format % args)

596

597

def setUp(self):

598

"""See bzrlib.transport.Server.setUp."""

599

self._home_dir = os.getcwdu()

600

self._local_path_parts = self._home_dir.split(os.path.sep)

601

self._http_starting = threading.Lock()

602

self._http_starting.acquire()

603

self._http_running = True

604

self._http_base_url = None

605

self._http_thread = threading.Thread(target=self._http_start)

606

self._http_thread.setDaemon(True)

607

self._http_thread.start()

608

self._http_proxy = os.environ.get("http_proxy")

609

if self._http_proxy is not None:

610

del os.environ["http_proxy"]

611

self.logs = []

612

613

def tearDown(self):

614

"""See bzrlib.transport.Server.tearDown."""

615

self._http_running = False

616

self._http_thread.join()

617

if self._http_proxy is not None:

618

import os

619

os.environ["http_proxy"] = self._http_proxy

620

621

def get_url(self):

622

"""See bzrlib.transport.Server.get_url."""

623

return self._get_remote_url(self._home_dir)

624

625

def get_bogus_url(self):

626

"""See bzrlib.transport.Server.get_bogus_url."""

627

# this is chosen to try to prevent trouble with proxies, weird dns,

628

# etc

629

return 'http://127.0.0.1:1/'

630

631

632

class HTTPServerWithSmarts(HttpServer):

633

"""HTTPServerWithSmarts extends the HttpServer with POST methods that will

634

trigger a smart server to execute with a transport rooted at the rootdir of

635

the HTTP server.

636

"""

637

638

def __init__(self):

639

HttpServer.__init__(self, SmartRequestHandler)

640

641

642

class SmartRequestHandler(TestingHTTPRequestHandler):

643

"""Extend TestingHTTPRequestHandler to support smart client POSTs."""

644

645

def do_POST(self):

646

"""Hand the request off to a smart server instance."""

647

self.send_response(200)

648

self.send_header("Content-type", "application/octet-stream")

649

transport = get_transport(self.server.test_case._home_dir)

650

# TODO: We might like to support streaming responses. 1.0 allows no

651

# Content-length in this case, so for integrity we should perform our

652

# own chunking within the stream.

653

# 1.1 allows chunked responses, and in this case we could chunk using

654

# the HTTP chunking as this will allow HTTP persistence safely, even if

655

# we have to stop early due to error, but we would also have to use the

656

# HTTP trailer facility which may not be widely available.

657

out_buffer = StringIO()

658

smart_protocol_request = smart.SmartServerRequestProtocolOne(

659

transport, out_buffer.write)

660

# if this fails, we should return 400 bad request, but failure is

661

# failure for now - RBC 20060919

662

data_length = int(self.headers['Content-Length'])

663

# Perhaps there should be a SmartServerHTTPMedium that takes care of

664

# feeding the bytes in the http request to the smart_protocol_request,

665

# but for now it's simpler to just feed the bytes directly.

666

smart_protocol_request.accept_bytes(self.rfile.read(data_length))

667

assert smart_protocol_request.next_read_size() == 0, (

668

"not finished reading, but all data sent to protocol.")

669

self.send_header("Content-Length", str(len(out_buffer.getvalue())))

670

self.end_headers()

671

self.wfile.write(out_buffer.getvalue())

672

Older »