~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/transport/http.py

Committer: Martin Pool
Date: 2006-01-30 06:23:50 UTC
mfrom: (1534.1.17 integration)
Revision ID: mbp@sourcefrog.net-20060130062350-d6f25277ddcdfd79

[merge] robert's integration of much recent work

files added:
BRANCH.TODO

INSTALL

bzrlib/decorators.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/transport.txt

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/gpg.py

bzrlib/iterablefile.py

bzrlib/lockable_files.py

bzrlib/lsprof.py

bzrlib/option.py

bzrlib/repository.py

bzrlib/rio.py

bzrlib/symbol_versioning.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_api.py

bzrlib/tests/test_basis_inventory.py

bzrlib/tests/test_command.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_fileid_involved.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_http.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_remove.py

bzrlib/tests/test_reweave.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/transport/ftp.py

bzrlib/transport/memory.py

bzrlib/transport/sftp.py

bzrlib/tsort.py

bzrlib/ui

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/configobj/validate.py

bzrlib/win32console.py

tools/biobench.py

tools/riodemo.py

tools/trace-revisions

files removed:
bzrlib/clone.py

bzrlib/mdiff.py

bzrlib/revfile.py

bzrlib/store/compressed_text.py

patches/cache-remote-revisions.diff

patches/cache_weave_inclusions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

testbzr

files renamed:
bzr-man.py => bzr_man.py

bzrlib/changeset.py => bzrlib/_changeset.py

bzrlib/merge_core.py => bzrlib/_merge_core.py

bzrlib/selftest/ => bzrlib/tests/

bzrlib/selftest/test_revision_info.py => bzrlib/tests/blackbox/test_revision_info.py

bzrlib/selftest/blackbox.py => bzrlib/tests/blackbox/test_too_much.py

bzrlib/selftest/versioning.py => bzrlib/tests/blackbox/test_versioning.py

bzrlib/selftest/testannotate.py => bzrlib/tests/test_annotate.py

bzrlib/selftest/testbranch.py => bzrlib/tests/test_branch.py

bzrlib/selftest/testconfig.py => bzrlib/tests/test_config.py

bzrlib/selftest/testdiff.py => bzrlib/tests/test_diff.py

bzrlib/selftest/testfetch.py => bzrlib/tests/test_fetch.py

bzrlib/selftest/testgraph.py => bzrlib/tests/test_graph.py

bzrlib/selftest/testhashcache.py => bzrlib/tests/test_hashcache.py

bzrlib/selftest/testidentitymap.py => bzrlib/tests/test_identitymap.py

bzrlib/selftest/testinv.py => bzrlib/tests/test_inv.py

bzrlib/selftest/testlog.py => bzrlib/tests/test_log.py

bzrlib/selftest/testmerge.py => bzrlib/tests/test_merge.py

bzrlib/selftest/testmerge3.py => bzrlib/tests/test_merge3.py

bzrlib/selftest/plugins.py => bzrlib/tests/test_plugins.py

bzrlib/selftest/testrevision.py => bzrlib/tests/test_revision.py

bzrlib/selftest/testrevisionnamespaces.py => bzrlib/tests/test_revisionnamespaces.py

bzrlib/selftest/testrevprops.py => bzrlib/tests/test_revprops.py

bzrlib/selftest/testsampler.py => bzrlib/tests/test_sampler.py

bzrlib/selftest/teststatus.py => bzrlib/tests/test_status.py

bzrlib/selftest/teststore.py => bzrlib/tests/test_store.py

bzrlib/selftest/testtestament.py => bzrlib/tests/test_testament.py

bzrlib/selftest/testtransactions.py => bzrlib/tests/test_transactions.py

bzrlib/selftest/testtransport.py => bzrlib/tests/test_transport.py

bzrlib/selftest/whitebox.py => bzrlib/tests/test_whitebox.py

bzrlib/selftest/testworkingtree.py => bzrlib/tests/test_workingtree.py

bzrlib/ui.py => bzrlib/ui/__init__.py

bzrlib/xml.py => bzrlib/xml_serializer.py

files modified:
.bzrignore

.rsyncexclude

HACKING

Makefile

NEWS

README

TODO

build-api

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/info.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/merge.py *

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/osutils.py

bzrlib/plugin.py

bzrlib/progress.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/shellcomplete.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/text.py

bzrlib/store/weave.py

bzrlib/testament.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_parent.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/trace.py

bzrlib/transport/__init__.py

bzrlib/transport/http.py

bzrlib/transport/local.py

bzrlib/tree.py

bzrlib/upgrade.py

bzrlib/weave.py *

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

contrib/zsh/_bzr

doc/random.txt

setup.py *

tutorial.txt

Show diffs side-by-side

added added

removed removed

bzrlib/transport/http.py

"""Implementation of Transport over http.

"""

from bzrlib.transport import Transport, register_transport

from bzrlib.errors import (TransportNotPossible, NoSuchFile,

NonRelativePath, TransportError)

import os, errno

from cStringIO import StringIO

import urllib2

import urllib, urllib2

import urlparse

from warnings import warn

from bzrlib.transport import Transport, Server

from bzrlib.errors import (TransportNotPossible, NoSuchFile,

TransportError, ConnectionError)

from bzrlib.errors import BzrError, BzrCheckError

from bzrlib.branch import Branch

from bzrlib.trace import mutter

# velocitynet.com.au transparently proxies connections and thereby

# breaks keep-alive -- sucks!

def extract_auth(url, password_manager):

"""

Extract auth parameters from am HTTP/HTTPS url and add them to the given

password manager. Return the url, minus those auth parameters (which

confuse urllib2).

"""

assert url.startswith('http://') or url.startswith('https://')

scheme, host = url.split('//', 1)

if '/' in host:

host, path = host.split('/', 1)

path = '/' + path

else:

path = ''

port = ''

if '@' in host:

auth, host = host.split('@', 1)

if ':' in auth:

username, password = auth.split(':', 1)

else:

username, password = auth, None

if ':' in host:

host, port = host.split(':', 1)

port = ':' + port

# FIXME: if password isn't given, should we ask for it?

if password is not None:

username = urllib.unquote(username)

password = urllib.unquote(password)

password_manager.add_password(None, host, username, password)

url = scheme + '//' + host + port + path

return url

def get_url(url):

import urllib2

mutter("get_url %s" % url)

url_f = urllib2.urlopen(url)

manager = urllib2.HTTPPasswordMgrWithDefaultRealm()

url = extract_auth(url, manager)

auth_handler = urllib2.HTTPBasicAuthHandler(manager)

opener = urllib2.build_opener(auth_handler)

url_f = opener.open(url)

return url_f

class HttpTransportError(TransportError):

pass

class HttpTransport(Transport):

"""This is the transport agent for http:// access.

def __init__(self, base):

"""Set the base path where files will be stored."""

assert base.startswith('http://') or base.startswith('https://')

if base[-1] != '/':

base = base + '/'

super(HttpTransport, self).__init__(base)

# In the future we might actually connect to the remote host

# rather than using get_url

109

"""Return the full url to the given relative path.

110

This can be supplied with a string or a list

111

"""

112

assert isinstance(relpath, basestring)

113

if isinstance(relpath, basestring):

relpath = [relpath]

114

relpath_parts = relpath.split('/')

115

else:

116

# TODO: Don't call this with an array - no magic interfaces

117

relpath_parts = relpath[:]

118

if len(relpath_parts) > 1:

119

if relpath_parts[0] == '':

120

raise ValueError("path %r within branch %r seems to be absolute"

121

% (relpath, self._path))

122

if relpath_parts[-1] == '':

123

raise ValueError("path %r within branch %r seems to be a directory"

124

% (relpath, self._path))

125

basepath = self._path.split('/')

126

if len(basepath) > 0 and basepath[-1] == '':

127

basepath = basepath[:-1]

for p in relpath:

128

for p in relpath_parts:

129

if p == '..':

if len(basepath) < 0:

130

if len(basepath) == 0:

131

# In most filesystems, a request for the parent

132

# of root, just returns root.

133

continue

if len(basepath) > 0:

basepath.pop()

elif p == '.':

134

basepath.pop()

135

elif p == '.' or p == '':

136

continue # No-op

137

else:

138

basepath.append(p)

139

# Possibly, we could use urlparse.urljoin() here, but

100

140

# I'm concerned about when it chooses to strip the last

101

141

# portion of the path, and when it doesn't.

103

143

return urlparse.urlunparse((self._proto,

104

144

self._host, path, '', '', ''))

105

145

106

def relpath(self, abspath):

107

if not abspath.startswith(self.base):

108

raise NonRelativePath('path %r is not under base URL %r'

109

% (abspath, self.base))

110

pl = len(self.base)

111

return abspath[pl:].lstrip('/')

112

113

146

def has(self, relpath):

114

147

"""Does the target location exist?

115

148

121

154

cleaner if we just do an http HEAD request, and parse

122

155

the return code.

123

156

"""

157

path = relpath

124

158

try:

125

f = get_url(self.abspath(relpath))

159

path = self.abspath(relpath)

160

f = get_url(path)

126

161

# Without the read and then close()

127

162

# we tend to have busy sockets.

128

163

f.read()

129

164

f.close()

130

165

return True

131

except BzrError:

132

return False

133

except urllib2.URLError:

134

return False

166

except urllib2.URLError, e:

167

mutter('url error code: %s for has url: %r', e.code, path)

168

if e.code == 404:

169

return False

170

raise

135

171

except IOError, e:

172

mutter('io error: %s %s for has url: %r',

173

e.errno, errno.errorcode.get(e.errno), path)

136

174

if e.errno == errno.ENOENT:

137

175

return False

138

raise HttpTransportError(orig_error=e)

176

raise TransportError(orig_error=e)

139

177

140

178

def get(self, relpath, decode=False):

141

179

"""Get the file at the given relative path.

142

180

143

181

:param relpath: The relative path to the file

144

182

"""

183

path = relpath

145

184

try:

146

return get_url(self.abspath(relpath))

147

except (BzrError, urllib2.URLError, IOError), e:

148

raise NoSuchFile(msg = "Error retrieving %s: %s"

185

path = self.abspath(relpath)

186

return get_url(path)

187

except urllib2.HTTPError, e:

188

mutter('url error code: %s for has url: %r', e.code, path)

189

if e.code == 404:

190

raise NoSuchFile(path, extra=e)

191

raise

192

except (BzrError, IOError), e:

193

if hasattr(e, 'errno'):

194

mutter('io error: %s %s for has url: %r',

195

e.errno, errno.errorcode.get(e.errno), path)

196

if e.errno == errno.ENOENT:

197

raise NoSuchFile(path, extra=e)

198

raise ConnectionError(msg = "Error retrieving %s: %s"

149

199

% (self.abspath(relpath), str(e)),

150

200

orig_error=e)

151

201

152

def get_partial(self, relpath, start, length=None):

153

"""Get just part of a file.

154

155

:param relpath: Path to the file, relative to base

156

:param start: The starting position to read from

157

:param length: The length to read. A length of None indicates

158

read to the end of the file.

159

:return: A file-like object containing at least the specified bytes.

160

Some implementations may return objects which can be read

161

past this length, but this is not guaranteed.

162

"""

163

# TODO: You can make specialized http requests for just

164

# a portion of the file. Figure out how to do that.

165

# For now, urllib2 returns files that cannot seek() so

166

# we just read bytes off the beginning, until we

167

# get to the point that we care about.

168

f = self.get(relpath)

169

# TODO: read in smaller chunks, in case things are

170

# buffered internally.

171

f.read(start)

172

return f

173

174

def put(self, relpath, f):

202

def put(self, relpath, f, mode=None):

175

203

"""Copy the file-like or string object into the location.

176

204

177

205

:param relpath: Location to put the contents, relative to base.

179

207

"""

180

208

raise TransportNotPossible('http PUT not supported')

181

209

182

def mkdir(self, relpath):

210

def mkdir(self, relpath, mode=None):

183

211

"""Create a directory at the given path."""

184

212

raise TransportNotPossible('http does not support mkdir()')

185

213

193

221

"""Copy the item at rel_from to the location at rel_to"""

194

222

raise TransportNotPossible('http does not support copy()')

195

223

196

def copy_to(self, relpaths, other, pb=None):

224

def copy_to(self, relpaths, other, mode=None, pb=None):

197

225

"""Copy a set of entries from self into another Transport.

198

226

199

227

:param relpaths: A list/generator of entries to be copied.

207

235

if isinstance(other, HttpTransport):

208

236

raise TransportNotPossible('http cannot be the target of copy_to()')

209

237

else:

210

return super(HttpTransport, self).copy_to(relpaths, other, pb=pb)

238

return super(HttpTransport, self).copy_to(relpaths, other, mode=mode, pb=pb)

211

239

212

240

def move(self, rel_from, rel_to):

213

241

"""Move the item at rel_from to the location at rel_to"""

217

245

"""Delete the item at relpath"""

218

246

raise TransportNotPossible('http does not support delete()')

219

247

248

def is_readonly(self):

249

"""See Transport.is_readonly."""

250

return True

251

220

252

def listable(self):

221

253

"""See Transport.listable."""

222

254

return False

247

279

"""

248

280

raise TransportNotPossible('http does not support lock_write()')

249

281

250

register_transport('http://', HttpTransport)

251

register_transport('https://', HttpTransport)

282

283

#---------------- test server facilities ----------------

284

import BaseHTTPServer, SimpleHTTPServer, socket, time

285

import threading

286

287

288

class WebserverNotAvailable(Exception):

289

pass

290

291

292

class BadWebserverPath(ValueError):

293

def __str__(self):

294

return 'path %s is not in %s' % self.args

295

296

297

class TestingHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):

298

299

def log_message(self, format, *args):

300

self.server.test_case.log("webserver - %s - - [%s] %s",

301

self.address_string(),

302

self.log_date_time_string(),

303

format%args)

304

305

def handle_one_request(self):

306

"""Handle a single HTTP request.

307

308

You normally don't need to override this method; see the class

309

__doc__ string for information on how to handle specific HTTP

310

commands such as GET and POST.

311

312

"""

313

for i in xrange(1,11): # Don't try more than 10 times

314

try:

315

self.raw_requestline = self.rfile.readline()

316

except socket.error, e:

317

if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):

318

# omitted for now because some tests look at the log of

319

# the server and expect to see no errors. see recent

320

# email thread. -- mbp 20051021.

321

## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)

322

time.sleep(0.01)

323

continue

324

raise

325

else:

326

break

327

if not self.raw_requestline:

328

self.close_connection = 1

329

return

330

if not self.parse_request(): # An error code has been sent, just exit

331

return

332

mname = 'do_' + self.command

333

if not hasattr(self, mname):

334

self.send_error(501, "Unsupported method (%r)" % self.command)

335

return

336

method = getattr(self, mname)

337

method()

338

339

class TestingHTTPServer(BaseHTTPServer.HTTPServer):

340

def __init__(self, server_address, RequestHandlerClass, test_case):

341

BaseHTTPServer.HTTPServer.__init__(self, server_address,

342

RequestHandlerClass)

343

self.test_case = test_case

344

345

346

class HttpServer(Server):

347

"""A test server for http transports."""

348

349

_HTTP_PORTS = range(13000, 0x8000)

350

351

def _http_start(self):

352

httpd = None

353

for port in self._HTTP_PORTS:

354

try:

355

httpd = TestingHTTPServer(('localhost', port),

356

TestingHTTPRequestHandler,

357

self)

358

except socket.error, e:

359

if e.args[0] == errno.EADDRINUSE:

360

continue

361

print >>sys.stderr, "Cannot run webserver :-("

362

raise

363

else:

364

break

365

366

if httpd is None:

367

raise WebserverNotAvailable("Cannot run webserver :-( "

368

"no free ports in range %s..%s" %

369

(_HTTP_PORTS[0], _HTTP_PORTS[-1]))

370

371

self._http_base_url = 'http://localhost:%s/' % port

372

self._http_starting.release()

373

httpd.socket.settimeout(0.1)

374

375

while self._http_running:

376

try:

377

httpd.handle_request()

378

except socket.timeout:

379

pass

380

381

def _get_remote_url(self, path):

382

path_parts = path.split(os.path.sep)

383

if os.path.isabs(path):

384

if path_parts[:len(self._local_path_parts)] != \

385

self._local_path_parts:

386

raise BadWebserverPath(path, self.test_dir)

387

remote_path = '/'.join(path_parts[len(self._local_path_parts):])

388

else:

389

remote_path = '/'.join(path_parts)

390

391

self._http_starting.acquire()

392

self._http_starting.release()

393

return self._http_base_url + remote_path

394

395

def log(self, *args, **kwargs):

396

"""Capture Server log output."""

397

self.logs.append(args[3])

398

399

def setUp(self):

400

"""See bzrlib.transport.Server.setUp."""

401

self._home_dir = os.getcwdu()

402

self._local_path_parts = self._home_dir.split(os.path.sep)

403

self._http_starting = threading.Lock()

404

self._http_starting.acquire()

405

self._http_running = True

406

self._http_base_url = None

407

self._http_thread = threading.Thread(target=self._http_start)

408

self._http_thread.setDaemon(True)

409

self._http_thread.start()

410

self._http_proxy = os.environ.get("http_proxy")

411

if self._http_proxy is not None:

412

del os.environ["http_proxy"]

413

self.logs = []

414

415

def tearDown(self):

416

"""See bzrlib.transport.Server.tearDown."""

417

self._http_running = False

418

self._http_thread.join()

419

if self._http_proxy is not None:

420

import os

421

os.environ["http_proxy"] = self._http_proxy

422

423

def get_url(self):

424

"""See bzrlib.transport.Server.get_url."""

425

return self._get_remote_url(self._home_dir)

426

427

def get_bogus_url(self):

428

"""See bzrlib.transport.Server.get_bogus_url."""

429

return 'http://jasldkjsalkdjalksjdkljasd'

430

431

432

def get_test_permutations():

433

"""Return the permutations to be used in testing."""

434

warn("There are no HTTPS transport provider tests yet.")

435

return [(HttpTransport, HttpServer),

436

]

Older »