~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/transport/http/__init__.py

Committer: John Arbash Meinel
Date: 2006-12-11 19:25:13 UTC
mfrom: (2172 +trunk)
mto: This revision was merged to the branch mainline in revision 2173.
Revision ID: john@arbash-meinel.com-20061211192513-u9ivkqwkp8j3v2i5

[merge] bzr.dev 2172 and move NEWS to the correct location

files added:
bzrlib/tests/HttpServer.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/test_wsgi.py

bzrlib/transport/chroot.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/wsgi.py

doc/http_smart_server.txt

files modified:
BRANCH.TODO

NEWS

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/fetch.py

bzrlib/hashcache.py

bzrlib/ignores.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/lockdir.py

bzrlib/merge.py

bzrlib/msgeditor.py

bzrlib/osutils.py

bzrlib/repository.py

bzrlib/revisiontree.py

bzrlib/status.py

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/symbol_versioning.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_config.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_versionedfile.py

bzrlib/trace.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/smart.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/urlutils.py

bzrlib/workingtree.py

doc/configuration.txt

doc/index.txt

doc/tutorial.txt

Show diffs side-by-side

added added

removed removed

bzrlib/transport/http/__init__.py

"""

from cStringIO import StringIO

import errno

import mimetools

import os

import posixpath

import re

import sys

import urlparse

import urllib

from warnings import warn

# TODO: load these only when running http tests

import BaseHTTPServer, SimpleHTTPServer, socket, time

import threading

from bzrlib import errors

from bzrlib.errors import (TransportNotPossible, NoSuchFile,

TransportError, ConnectionError, InvalidURL)

from bzrlib.branch import Branch

from bzrlib import errors, ui

from bzrlib.trace import mutter

from bzrlib.transport import (

get_transport,

register_transport,

Server,

smart,

Transport,

)

from bzrlib.transport.http.response import (HttpMultipartRangeResponse,

HttpRangeResponse)

from bzrlib.ui import ui_factory

# TODO: This is not used anymore by HttpTransport_urllib

# (extracting the auth info and prompting the user for a password

# have been split), only the tests still use it. It should be

# deleted and the tests rewritten ASAP to stay in sync.

def extract_auth(url, password_manager):

"""Extract auth parameters from am HTTP/HTTPS url and add them to the given

password manager. Return the url, minus those auth parameters (which

assert re.match(r'^(https?)(\+\w+)?://', url), \

'invalid absolute url %r' % url

scheme, netloc, path, query, fragment = urlparse.urlsplit(url)

if '@' in netloc:

auth, netloc = netloc.split('@', 1)

if ':' in auth:

if password is not None:

password = urllib.unquote(password)

else:

password = ui_factory.get_password(prompt='HTTP %(user)@%(host) password',

user=username, host=host)

password = ui.ui_factory.get_password(

prompt='HTTP %(user)s@%(host)s password',

user=username, host=host)

password_manager.add_password(None, host, username, password)

url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))

return url

108

if not first_line.startswith('HTTP'):

109

if first_header: # The first header *must* start with HTTP

110

raise errors.InvalidHttpResponse(url,

111

'Opening header line did not start with HTTP: %s'

112

% (first_line,))

113

100

assert False, 'Opening header line was not HTTP'

114

101

else:

137

124

# _proto: "http" or "https"

138

125

# _qualified_proto: may have "+pycurl", etc

139

126

140

def __init__(self, base):

127

def __init__(self, base, from_transport=None):

141

128

"""Set the base path where files will be stored."""

142

129

proto_match = re.match(r'^(https?)(\+\w+)?://', base)

143

130

if not proto_match:

150

137

if base[-1] != '/':

151

138

base = base + '/'

152

139

super(HttpTransportBase, self).__init__(base)

153

# In the future we might actually connect to the remote host

154

# rather than using get_url

155

# self._connection = None

156

140

(apparent_proto, self._host,

157

141

self._path, self._parameters,

158

142

self._query, self._fragment) = urlparse.urlparse(self.base)

159

143

self._qualified_proto = apparent_proto

144

# range hint is handled dynamically throughout the life

145

# of the object. We start by trying mulri-range requests

146

# and if the server returns bougs results, we retry with

147

# single range requests and, finally, we forget about

148

# range if the server really can't understand. Once

149

# aquired, this piece of info is propogated to clones.

150

if from_transport is not None:

151

self._range_hint = from_transport._range_hint

152

else:

153

self._range_hint = 'multi'

160

154

161

155

def abspath(self, relpath):

162

156

"""Return the full url to the given relative path.

169

163

"""

170

164

assert isinstance(relpath, basestring)

171

165

if isinstance(relpath, unicode):

172

raise InvalidURL(relpath, 'paths must not be unicode.')

166

raise errors.InvalidURL(relpath, 'paths must not be unicode.')

173

167

if isinstance(relpath, basestring):

174

168

relpath_parts = relpath.split('/')

175

169

else:

180

174

else:

181

175

# Except for the root, no trailing slashes are allowed

182

176

if len(relpath_parts) > 1 and relpath_parts[-1] == '':

183

raise ValueError("path %r within branch %r seems to be a directory"

184

% (relpath, self._path))

177

raise ValueError(

178

"path %r within branch %r seems to be a directory"

179

% (relpath, self._path))

185

180

basepath = self._path.split('/')

186

181

if len(basepath) > 0 and basepath[-1] == '':

187

182

basepath = basepath[:-1]

266

261

relpath, len(offsets), ranges)

267

262

code, f = self._get(relpath, ranges)

268

263

for start, size in offsets:

269

f.seek(start, (start < 0) and 2 or 0)

270

start = f.tell()

271

data = f.read(size)

272

if len(data) != size:

273

raise errors.ShortReadvError(relpath, start, size,

274

actual=len(data))

264

try_again = True

265

while try_again:

266

try_again = False

267

f.seek(start, (start < 0) and 2 or 0)

268

start = f.tell()

269

try:

270

data = f.read(size)

271

if len(data) != size:

272

raise errors.ShortReadvError(relpath, start, size,

273

actual=len(data))

274

except (errors.InvalidRange, errors.ShortReadvError):

275

# The server does not gives us enough data or

276

# bogus-looking result, let's try again with

277

# a simpler request if possible.

278

if self._range_hint == 'multi':

279

self._range_hint = 'single'

280

mutter('Retry %s with single range request' % relpath)

281

try_again = True

282

elif self._range_hint == 'single':

283

self._range_hint = None

284

mutter('Retry %s without ranges' % relpath)

285

try_again = True

286

if try_again:

287

# Note that since the offsets and the

288

# ranges may not be in the same order we

289

# dont't try to calculate a restricted

290

# single range encompassing unprocessed

291

# offsets. Note that we replace 'f' here

292

# and that it may need cleaning one day

293

# before being thrown that way.

294

code, f = self._get(relpath, ranges)

295

else:

296

# We tried all the tricks, nothing worked

297

raise

298

275

299

yield start, data

276

300

277

301

@staticmethod

318

342

:param relpath: Location to put the contents, relative to base.

319

343

:param f: File-like object.

320

344

"""

321

raise TransportNotPossible('http PUT not supported')

345

raise errors.TransportNotPossible('http PUT not supported')

322

346

323

347

def mkdir(self, relpath, mode=None):

324

348

"""Create a directory at the given path."""

325

raise TransportNotPossible('http does not support mkdir()')

349

raise errors.TransportNotPossible('http does not support mkdir()')

326

350

327

351

def rmdir(self, relpath):

328

352

"""See Transport.rmdir."""

329

raise TransportNotPossible('http does not support rmdir()')

353

raise errors.TransportNotPossible('http does not support rmdir()')

330

354

331

355

def append_file(self, relpath, f, mode=None):

332

356

"""Append the text in the file-like object into the final

333

357

location.

334

358

"""

335

raise TransportNotPossible('http does not support append()')

359

raise errors.TransportNotPossible('http does not support append()')

336

360

337

361

def copy(self, rel_from, rel_to):

338

362

"""Copy the item at rel_from to the location at rel_to"""

339

raise TransportNotPossible('http does not support copy()')

363

raise errors.TransportNotPossible('http does not support copy()')

340

364

341

365

def copy_to(self, relpaths, other, mode=None, pb=None):

342

366

"""Copy a set of entries from self into another Transport.

350

374

# the remote location is the same, and rather than download, and

351

375

# then upload, it could just issue a remote copy_this command.

352

376

if isinstance(other, HttpTransportBase):

353

raise TransportNotPossible('http cannot be the target of copy_to()')

377

raise errors.TransportNotPossible(

378

'http cannot be the target of copy_to()')

354

379

else:

355

380

return super(HttpTransportBase, self).\

356

381

copy_to(relpaths, other, mode=mode, pb=pb)

357

382

358

383

def move(self, rel_from, rel_to):

359

384

"""Move the item at rel_from to the location at rel_to"""

360

raise TransportNotPossible('http does not support move()')

385

raise errors.TransportNotPossible('http does not support move()')

361

386

362

387

def delete(self, relpath):

363

388

"""Delete the item at relpath"""

364

raise TransportNotPossible('http does not support delete()')

389

raise errors.TransportNotPossible('http does not support delete()')

365

390

366

391

def is_readonly(self):

367

392

"""See Transport.is_readonly."""

374

399

def stat(self, relpath):

375

400

"""Return the stat information for a file.

376

401

"""

377

raise TransportNotPossible('http does not support stat()')

402

raise errors.TransportNotPossible('http does not support stat()')

378

403

379

404

def lock_read(self, relpath):

380

405

"""Lock the given file for shared (read) access.

395

420

396

421

:return: A lock object, which should be passed to Transport.unlock()

397

422

"""

398

raise TransportNotPossible('http does not support lock_write()')

423

raise errors.TransportNotPossible('http does not support lock_write()')

399

424

400

425

def clone(self, offset=None):

401

426

"""Return a new HttpTransportBase with root at self.base + offset

408

433

else:

409

434

return self.__class__(self.abspath(offset), self)

410

435

436

def attempted_range_header(self, ranges, tail_amount):

437

"""Prepare a HTTP Range header at a level the server should accept"""

438

439

if self._range_hint == 'multi':

440

# Nothing to do here

441

return self.range_header(ranges, tail_amount)

442

elif self._range_hint == 'single':

443

# Combine all the requested ranges into a single

444

# encompassing one

445

if len(ranges) > 0:

446

start, ignored = ranges[0]

447

ignored, end = ranges[-1]

448

if tail_amount not in (0, None):

449

# Nothing we can do here to combine ranges

450

# with tail_amount, just returns None. The

451

# whole file should be downloaded.

452

return None

453

else:

454

return self.range_header([(start, end)], 0)

455

else:

456

# Only tail_amount, requested, leave range_header

457

# do its work

458

return self.range_header(ranges, tail_amount)

459

else:

460

return None

461

411

462

@staticmethod

412

463

def range_header(ranges, tail_amount):

413

464

"""Turn a list of bytes ranges into a HTTP Range header value.

414

465

415

:param offsets: A list of byte ranges, (start, end). An empty list

416

is not accepted.

466

:param ranges: A list of byte ranges, (start, end).

467

:param tail_amount: The amount to get from the end of the file.

417

468

418

469

:return: HTTP range header string.

470

471

At least a non-empty ranges *or* a tail_amount must be

472

provided.

419

473

"""

420

474

strings = []

421

475

for start, end in ranges:

448

502

449

503

def _read_bytes(self, count):

450

504

return self._response_body.read(count)

451

505

452

506

def _finished_reading(self):

453

507

"""See SmartClientMediumRequest._finished_reading."""

454

508

pass

455

456

457

#---------------- test server facilities ----------------

458

# TODO: load these only when running tests

459

460

461

class WebserverNotAvailable(Exception):

462

pass

463

464

465

class BadWebserverPath(ValueError):

466

def __str__(self):

467

return 'path %s is not in %s' % self.args

468

469

470

class TestingHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):

471

472

def log_message(self, format, *args):

473

self.server.test_case.log('webserver - %s - - [%s] %s "%s" "%s"',

474

self.address_string(),

475

self.log_date_time_string(),

476

format % args,

477

self.headers.get('referer', '-'),

478

self.headers.get('user-agent', '-'))

479

480

def handle_one_request(self):

481

"""Handle a single HTTP request.

482

483

You normally don't need to override this method; see the class

484

__doc__ string for information on how to handle specific HTTP

485

commands such as GET and POST.

486

487

"""

488

for i in xrange(1,11): # Don't try more than 10 times

489

try:

490

self.raw_requestline = self.rfile.readline()

491

except socket.error, e:

492

if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):

493

# omitted for now because some tests look at the log of

494

# the server and expect to see no errors. see recent

495

# email thread. -- mbp 20051021.

496

## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)

497

time.sleep(0.01)

498

continue

499

raise

500

else:

501

break

502

if not self.raw_requestline:

503

self.close_connection = 1

504

return

505

if not self.parse_request(): # An error code has been sent, just exit

506

return

507

mname = 'do_' + self.command

508

if getattr(self, mname, None) is None:

509

self.send_error(501, "Unsupported method (%r)" % self.command)

510

return

511

method = getattr(self, mname)

512

method()

513

514

if sys.platform == 'win32':

515

# On win32 you cannot access non-ascii filenames without

516

# decoding them into unicode first.

517

# However, under Linux, you can access bytestream paths

518

# without any problems. If this function was always active

519

# it would probably break tests when LANG=C was set

520

def translate_path(self, path):

521

"""Translate a /-separated PATH to the local filename syntax.

522

523

For bzr, all url paths are considered to be utf8 paths.

524

On Linux, you can access these paths directly over the bytestream

525

request, but on win32, you must decode them, and access them

526

as Unicode files.

527

"""

528

# abandon query parameters

529

path = urlparse.urlparse(path)[2]

530

path = posixpath.normpath(urllib.unquote(path))

531

path = path.decode('utf-8')

532

words = path.split('/')

533

words = filter(None, words)

534

path = os.getcwdu()

535

for word in words:

536

drive, word = os.path.splitdrive(word)

537

head, word = os.path.split(word)

538

if word in (os.curdir, os.pardir): continue

539

path = os.path.join(path, word)

540

return path

541

542

543

class TestingHTTPServer(BaseHTTPServer.HTTPServer):

544

def __init__(self, server_address, RequestHandlerClass, test_case):

545

BaseHTTPServer.HTTPServer.__init__(self, server_address,

546

RequestHandlerClass)

547

self.test_case = test_case

548

549

550

class HttpServer(Server):

551

"""A test server for http transports."""

552

553

# used to form the url that connects to this server

554

_url_protocol = 'http'

555

556

# Subclasses can provide a specific request handler

557

def __init__(self, request_handler=TestingHTTPRequestHandler):

558

Server.__init__(self)

559

self.request_handler = request_handler

560

561

def _get_httpd(self):

562

return TestingHTTPServer(('localhost', 0),

563

self.request_handler,

564

self)

565

566

def _http_start(self):

567

httpd = self._get_httpd()

568

host, port = httpd.socket.getsockname()

569

self._http_base_url = '%s://localhost:%s/' % (self._url_protocol, port)

570

self._http_starting.release()

571

httpd.socket.settimeout(0.1)

572

573

while self._http_running:

574

try:

575

httpd.handle_request()

576

except socket.timeout:

577

pass

578

579

def _get_remote_url(self, path):

580

path_parts = path.split(os.path.sep)

581

if os.path.isabs(path):

582

if path_parts[:len(self._local_path_parts)] != \

583

self._local_path_parts:

584

raise BadWebserverPath(path, self.test_dir)

585

remote_path = '/'.join(path_parts[len(self._local_path_parts):])

586

else:

587

remote_path = '/'.join(path_parts)

588

589

self._http_starting.acquire()

590

self._http_starting.release()

591

return self._http_base_url + remote_path

592

593

def log(self, format, *args):

594

"""Capture Server log output."""

595

self.logs.append(format % args)

596

597

def setUp(self):

598

"""See bzrlib.transport.Server.setUp."""

599

self._home_dir = os.getcwdu()

600

self._local_path_parts = self._home_dir.split(os.path.sep)

601

self._http_starting = threading.Lock()

602

self._http_starting.acquire()

603

self._http_running = True

604

self._http_base_url = None

605

self._http_thread = threading.Thread(target=self._http_start)

606

self._http_thread.setDaemon(True)

607

self._http_thread.start()

608

self._http_proxy = os.environ.get("http_proxy")

609

if self._http_proxy is not None:

610

del os.environ["http_proxy"]

611

self.logs = []

612

613

def tearDown(self):

614

"""See bzrlib.transport.Server.tearDown."""

615

self._http_running = False

616

self._http_thread.join()

617

if self._http_proxy is not None:

618

import os

619

os.environ["http_proxy"] = self._http_proxy

620

621

def get_url(self):

622

"""See bzrlib.transport.Server.get_url."""

623

return self._get_remote_url(self._home_dir)

624

625

def get_bogus_url(self):

626

"""See bzrlib.transport.Server.get_bogus_url."""

627

# this is chosen to try to prevent trouble with proxies, weird dns,

628

# etc

629

return 'http://127.0.0.1:1/'

630

631

632

class HTTPServerWithSmarts(HttpServer):

633

"""HTTPServerWithSmarts extends the HttpServer with POST methods that will

634

trigger a smart server to execute with a transport rooted at the rootdir of

635

the HTTP server.

636

"""

637

638

def __init__(self):

639

HttpServer.__init__(self, SmartRequestHandler)

640

641

642

class SmartRequestHandler(TestingHTTPRequestHandler):

643

"""Extend TestingHTTPRequestHandler to support smart client POSTs."""

644

645

def do_POST(self):

646

"""Hand the request off to a smart server instance."""

647

self.send_response(200)

648

self.send_header("Content-type", "application/octet-stream")

649

transport = get_transport(self.server.test_case._home_dir)

650

# TODO: We might like to support streaming responses. 1.0 allows no

651

# Content-length in this case, so for integrity we should perform our

652

# own chunking within the stream.

653

# 1.1 allows chunked responses, and in this case we could chunk using

654

# the HTTP chunking as this will allow HTTP persistence safely, even if

655

# we have to stop early due to error, but we would also have to use the

656

# HTTP trailer facility which may not be widely available.

657

out_buffer = StringIO()

658

smart_protocol_request = smart.SmartServerRequestProtocolOne(

659

transport, out_buffer.write)

660

# if this fails, we should return 400 bad request, but failure is

661

# failure for now - RBC 20060919

662

data_length = int(self.headers['Content-Length'])

663

# Perhaps there should be a SmartServerHTTPMedium that takes care of

664

# feeding the bytes in the http request to the smart_protocol_request,

665

# but for now it's simpler to just feed the bytes directly.

666

smart_protocol_request.accept_bytes(self.rfile.read(data_length))

667

assert smart_protocol_request.next_read_size() == 0, (

668

"not finished reading, but all data sent to protocol.")

669

self.send_header("Content-Length", str(len(out_buffer.getvalue())))

670

self.end_headers()

671

self.wfile.write(out_buffer.getvalue())

672

Older »