~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/transport/http.py

Committer: John Arbash Meinel
Date: 2006-02-06 14:02:09 UTC
mfrom: (1185.76.8 Erik Bågfors' pull --revision branch)
mto: (1534.1.20 integration) (1553.4.1 revision-ranges) (1185.79.1 bzr-jam-pending)
mto: This revision was merged to the branch mainline in revision 1554.
Revision ID: john@arbash-meinel.com-20060206140209-793eeeb7f3f3d069

[merge] Erik Bågfors: add --revision to bzr pull

files added:
BRANCH.TODO

HACKING

INSTALL

Makefile

NEWS.developers

bzrlib/annotate.py

bzrlib/builtins.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/transport.txt

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/identitymap.py

bzrlib/intset.py

bzrlib/iterablefile.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lsprof.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/progress.py

bzrlib/repository.py

bzrlib/revisionspec.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/store

bzrlib/store/text.py

bzrlib/store/weave.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_basis_inventory.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_command.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_fileid_involved.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_parent.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revprops.py

bzrlib/tests/test_reweave.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_source.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/transactions.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/ftp.py

bzrlib/transport/http.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/tsort.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/configobj/validate.py

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/win32console.py

bzrlib/xml4.py

bzrlib/xml5.py

contrib/bash/bzr

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

doc/split-join-files.txt

generate_docs.py

notes/inventory-v2-sample.xml

notes/inventory-v2.rnc

notes/revfile.txt

notes/schemas.xml

tools

tools/__init__.py

tools/biobench.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tutorial.txt

files removed:
bzrlib/mdiff.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/statcache.py

testbzr

files renamed:
bzrlib/changeset.py => bzrlib/_changeset.py

bzrlib/merge_core.py => bzrlib/_merge_core.py

bzrlib/store.py => bzrlib/store/__init__.py

elementtree/ => bzrlib/util/elementtree/

urlgrabber/ => bzrlib/util/urlgrabber/

bzrlib/xml.py => bzrlib/xml_serializer.py

contrib/bash/bzr => contrib/bash/bzr.simple

bzrlib/newinventory.py => contrib/newinventory.py

files modified:
.bzrignore

.rsyncexclude

NEWS

README

TODO

build-api

bzr *

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/help.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/revision.py

bzrlib/status.py

bzrlib/textinv.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/workingtree.py

contrib/add-bzr-to-baz

contrib/zsh/_bzr

doc/formats.txt

doc/index.txt

doc/random.txt

doc/todo-from-arch.txt

setup.py *

Show diffs side-by-side

added added

removed removed

bzrlib/transport/http.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Implementation of Transport over http.

"""

import os, errno

from cStringIO import StringIO

import urllib, urllib2

import urlparse

from warnings import warn

import bzrlib

from bzrlib.transport import Transport, Server

from bzrlib.errors import (TransportNotPossible, NoSuchFile,

TransportError, ConnectionError)

from bzrlib.errors import BzrError, BzrCheckError

from bzrlib.branch import Branch

from bzrlib.trace import mutter

from bzrlib.ui import ui_factory

def extract_auth(url, password_manager):

"""

Extract auth parameters from am HTTP/HTTPS url and add them to the given

password manager. Return the url, minus those auth parameters (which

confuse urllib2).

"""

scheme, netloc, path, query, fragment = urlparse.urlsplit(url)

assert (scheme == 'http') or (scheme == 'https')

if '@' in netloc:

auth, netloc = netloc.split('@', 1)

if ':' in auth:

username, password = auth.split(':', 1)

else:

username, password = auth, None

if ':' in netloc:

host = netloc.split(':', 1)[0]

else:

host = netloc

username = urllib.unquote(username)

if password is not None:

password = urllib.unquote(password)

else:

password = ui_factory.get_password(prompt='HTTP %(user)@%(host) password',

user=username, host=host)

password_manager.add_password(None, host, username, password)

url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))

return url

def get_url(url):

import urllib2

mutter("get_url %s", url)

manager = urllib2.HTTPPasswordMgrWithDefaultRealm()

url = extract_auth(url, manager)

auth_handler = urllib2.HTTPBasicAuthHandler(manager)

opener = urllib2.build_opener(auth_handler)

request = urllib2.Request(url)

request.add_header('User-Agent', 'bzr/%s' % bzrlib.__version__)

response = opener.open(request)

return response

class HttpTransport(Transport):

"""This is the transport agent for http:// access.

TODO: Implement pipelined versions of all of the *_multi() functions.

"""

def __init__(self, base):

"""Set the base path where files will be stored."""

assert base.startswith('http://') or base.startswith('https://')

if base[-1] != '/':

base = base + '/'

super(HttpTransport, self).__init__(base)

# In the future we might actually connect to the remote host

# rather than using get_url

# self._connection = None

(self._proto, self._host,

self._path, self._parameters,

self._query, self._fragment) = urlparse.urlparse(self.base)

def should_cache(self):

"""Return True if the data pulled across should be cached locally.

"""

100

return True

101

102

def clone(self, offset=None):

103

"""Return a new HttpTransport with root at self.base + offset

104

For now HttpTransport does not actually connect, so just return

105

a new HttpTransport object.

106

"""

107

if offset is None:

108

return HttpTransport(self.base)

109

else:

110

return HttpTransport(self.abspath(offset))

111

112

def abspath(self, relpath):

113

"""Return the full url to the given relative path.

114

This can be supplied with a string or a list

115

"""

116

assert isinstance(relpath, basestring)

117

if isinstance(relpath, basestring):

118

relpath_parts = relpath.split('/')

119

else:

120

# TODO: Don't call this with an array - no magic interfaces

121

relpath_parts = relpath[:]

122

if len(relpath_parts) > 1:

123

if relpath_parts[0] == '':

124

raise ValueError("path %r within branch %r seems to be absolute"

125

% (relpath, self._path))

126

if relpath_parts[-1] == '':

127

raise ValueError("path %r within branch %r seems to be a directory"

128

% (relpath, self._path))

129

basepath = self._path.split('/')

130

if len(basepath) > 0 and basepath[-1] == '':

131

basepath = basepath[:-1]

132

for p in relpath_parts:

133

if p == '..':

134

if len(basepath) == 0:

135

# In most filesystems, a request for the parent

136

# of root, just returns root.

137

continue

138

basepath.pop()

139

elif p == '.' or p == '':

140

continue # No-op

141

else:

142

basepath.append(p)

143

# Possibly, we could use urlparse.urljoin() here, but

144

# I'm concerned about when it chooses to strip the last

145

# portion of the path, and when it doesn't.

146

path = '/'.join(basepath)

147

return urlparse.urlunparse((self._proto,

148

self._host, path, '', '', ''))

149

150

def has(self, relpath):

151

"""Does the target location exist?

152

153

TODO: HttpTransport.has() should use a HEAD request,

154

not a full GET request.

155

156

TODO: This should be changed so that we don't use

157

urllib2 and get an exception, the code path would be

158

cleaner if we just do an http HEAD request, and parse

159

the return code.

160

"""

161

path = relpath

162

try:

163

path = self.abspath(relpath)

164

f = get_url(path)

165

# Without the read and then close()

166

# we tend to have busy sockets.

167

f.read()

168

f.close()

169

return True

170

except urllib2.URLError, e:

171

mutter('url error code: %s for has url: %r', e.code, path)

172

if e.code == 404:

173

return False

174

raise

175

except IOError, e:

176

mutter('io error: %s %s for has url: %r',

177

e.errno, errno.errorcode.get(e.errno), path)

178

if e.errno == errno.ENOENT:

179

return False

180

raise TransportError(orig_error=e)

181

182

def get(self, relpath, decode=False):

183

"""Get the file at the given relative path.

184

185

:param relpath: The relative path to the file

186

"""

187

path = relpath

188

try:

189

path = self.abspath(relpath)

190

return get_url(path)

191

except urllib2.HTTPError, e:

192

mutter('url error code: %s for has url: %r', e.code, path)

193

if e.code == 404:

194

raise NoSuchFile(path, extra=e)

195

raise

196

except (BzrError, IOError), e:

197

if hasattr(e, 'errno'):

198

mutter('io error: %s %s for has url: %r',

199

e.errno, errno.errorcode.get(e.errno), path)

200

if e.errno == errno.ENOENT:

201

raise NoSuchFile(path, extra=e)

202

raise ConnectionError(msg = "Error retrieving %s: %s"

203

% (self.abspath(relpath), str(e)),

204

orig_error=e)

205

206

def put(self, relpath, f, mode=None):

207

"""Copy the file-like or string object into the location.

208

209

:param relpath: Location to put the contents, relative to base.

210

:param f: File-like or string object.

211

"""

212

raise TransportNotPossible('http PUT not supported')

213

214

def mkdir(self, relpath, mode=None):

215

"""Create a directory at the given path."""

216

raise TransportNotPossible('http does not support mkdir()')

217

218

def rmdir(self, relpath):

219

"""See Transport.rmdir."""

220

raise TransportNotPossible('http does not support rmdir()')

221

222

def append(self, relpath, f):

223

"""Append the text in the file-like object into the final

224

location.

225

"""

226

raise TransportNotPossible('http does not support append()')

227

228

def copy(self, rel_from, rel_to):

229

"""Copy the item at rel_from to the location at rel_to"""

230

raise TransportNotPossible('http does not support copy()')

231

232

def copy_to(self, relpaths, other, mode=None, pb=None):

233

"""Copy a set of entries from self into another Transport.

234

235

:param relpaths: A list/generator of entries to be copied.

236

237

TODO: if other is LocalTransport, is it possible to

238

do better than put(get())?

239

"""

240

# At this point HttpTransport might be able to check and see if

241

# the remote location is the same, and rather than download, and

242

# then upload, it could just issue a remote copy_this command.

243

if isinstance(other, HttpTransport):

244

raise TransportNotPossible('http cannot be the target of copy_to()')

245

else:

246

return super(HttpTransport, self).copy_to(relpaths, other, mode=mode, pb=pb)

247

248

def move(self, rel_from, rel_to):

249

"""Move the item at rel_from to the location at rel_to"""

250

raise TransportNotPossible('http does not support move()')

251

252

def delete(self, relpath):

253

"""Delete the item at relpath"""

254

raise TransportNotPossible('http does not support delete()')

255

256

def is_readonly(self):

257

"""See Transport.is_readonly."""

258

return True

259

260

def listable(self):

261

"""See Transport.listable."""

262

return False

263

264

def stat(self, relpath):

265

"""Return the stat information for a file.

266

"""

267

raise TransportNotPossible('http does not support stat()')

268

269

def lock_read(self, relpath):

270

"""Lock the given file for shared (read) access.

271

:return: A lock object, which should be passed to Transport.unlock()

272

"""

273

# The old RemoteBranch ignore lock for reading, so we will

274

# continue that tradition and return a bogus lock object.

275

class BogusLock(object):

276

def __init__(self, path):

277

self.path = path

278

def unlock(self):

279

pass

280

return BogusLock(relpath)

281

282

def lock_write(self, relpath):

283

"""Lock the given file for exclusive (write) access.

284

WARNING: many transports do not support this, so trying avoid using it

285

286

:return: A lock object, which should be passed to Transport.unlock()

287

"""

288

raise TransportNotPossible('http does not support lock_write()')

289

290

291

#---------------- test server facilities ----------------

292

import BaseHTTPServer, SimpleHTTPServer, socket, time

293

import threading

294

295

296

class WebserverNotAvailable(Exception):

297

pass

298

299

300

class BadWebserverPath(ValueError):

301

def __str__(self):

302

return 'path %s is not in %s' % self.args

303

304

305

class TestingHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):

306

307

def log_message(self, format, *args):

308

self.server.test_case.log("webserver - %s - - [%s] %s",

309

self.address_string(),

310

self.log_date_time_string(),

311

format%args)

312

313

def handle_one_request(self):

314

"""Handle a single HTTP request.

315

316

You normally don't need to override this method; see the class

317

__doc__ string for information on how to handle specific HTTP

318

commands such as GET and POST.

319

320

"""

321

for i in xrange(1,11): # Don't try more than 10 times

322

try:

323

self.raw_requestline = self.rfile.readline()

324

except socket.error, e:

325

if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):

326

# omitted for now because some tests look at the log of

327

# the server and expect to see no errors. see recent

328

# email thread. -- mbp 20051021.

329

## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)

330

time.sleep(0.01)

331

continue

332

raise

333

else:

334

break

335

if not self.raw_requestline:

336

self.close_connection = 1

337

return

338

if not self.parse_request(): # An error code has been sent, just exit

339

return

340

mname = 'do_' + self.command

341

if not hasattr(self, mname):

342

self.send_error(501, "Unsupported method (%r)" % self.command)

343

return

344

method = getattr(self, mname)

345

method()

346

347

class TestingHTTPServer(BaseHTTPServer.HTTPServer):

348

def __init__(self, server_address, RequestHandlerClass, test_case):

349

BaseHTTPServer.HTTPServer.__init__(self, server_address,

350

RequestHandlerClass)

351

self.test_case = test_case

352

353

354

class HttpServer(Server):

355

"""A test server for http transports."""

356

357

_HTTP_PORTS = range(13000, 0x8000)

358

359

def _http_start(self):

360

httpd = None

361

for port in self._HTTP_PORTS:

362

try:

363

httpd = TestingHTTPServer(('localhost', port),

364

TestingHTTPRequestHandler,

365

self)

366

except socket.error, e:

367

if e.args[0] == errno.EADDRINUSE:

368

continue

369

print >>sys.stderr, "Cannot run webserver :-("

370

raise

371

else:

372

break

373

374

if httpd is None:

375

raise WebserverNotAvailable("Cannot run webserver :-( "

376

"no free ports in range %s..%s" %

377

(_HTTP_PORTS[0], _HTTP_PORTS[-1]))

378

379

self._http_base_url = 'http://localhost:%s/' % port

380

self._http_starting.release()

381

httpd.socket.settimeout(0.1)

382

383

while self._http_running:

384

try:

385

httpd.handle_request()

386

except socket.timeout:

387

pass

388

389

def _get_remote_url(self, path):

390

path_parts = path.split(os.path.sep)

391

if os.path.isabs(path):

392

if path_parts[:len(self._local_path_parts)] != \

393

self._local_path_parts:

394

raise BadWebserverPath(path, self.test_dir)

395

remote_path = '/'.join(path_parts[len(self._local_path_parts):])

396

else:

397

remote_path = '/'.join(path_parts)

398

399

self._http_starting.acquire()

400

self._http_starting.release()

401

return self._http_base_url + remote_path

402

403

def log(self, *args, **kwargs):

404

"""Capture Server log output."""

405

self.logs.append(args[3])

406

407

def setUp(self):

408

"""See bzrlib.transport.Server.setUp."""

409

self._home_dir = os.getcwdu()

410

self._local_path_parts = self._home_dir.split(os.path.sep)

411

self._http_starting = threading.Lock()

412

self._http_starting.acquire()

413

self._http_running = True

414

self._http_base_url = None

415

self._http_thread = threading.Thread(target=self._http_start)

416

self._http_thread.setDaemon(True)

417

self._http_thread.start()

418

self._http_proxy = os.environ.get("http_proxy")

419

if self._http_proxy is not None:

420

del os.environ["http_proxy"]

421

self.logs = []

422

423

def tearDown(self):

424

"""See bzrlib.transport.Server.tearDown."""

425

self._http_running = False

426

self._http_thread.join()

427

if self._http_proxy is not None:

428

import os

429

os.environ["http_proxy"] = self._http_proxy

430

431

def get_url(self):

432

"""See bzrlib.transport.Server.get_url."""

433

return self._get_remote_url(self._home_dir)

434

435

def get_bogus_url(self):

436

"""See bzrlib.transport.Server.get_bogus_url."""

437

return 'http://jasldkjsalkdjalksjdkljasd'

438

439

440

def get_test_permutations():

441

"""Return the permutations to be used in testing."""

442

warn("There are no HTTPS transport provider tests yet.")

443

return [(HttpTransport, HttpServer),

444

]

Older »