~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/transport/http.py

Committer: John Arbash Meinel
Date: 2006-01-19 23:14:42 UTC
mfrom: (1540.1.10 bzr-0.7-bugfix)
mto: (1534.1.10 integration) (1185.79.1 bzr-jam-pending) (1553.5.1 bzr.dev (Main development branch)) (1608.2.1 bzr.mbp.escape-stores)
mto: This revision was merged to the branch mainline in revision 1549.
Revision ID: john@arbash-meinel.com-20060119231442-b842ad9c4c2e47fc

[merge] 0.7-bugfix: Fix fileid_involved to unescape xml characters, fix StubServer to handle paramiko > 1.5.2

files added:
BRANCH.TODO

INSTALL

NEWS.developers

bzrlib/annotate.py

bzrlib/clone.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/identitymap.py

bzrlib/lsprof.py

bzrlib/option.py

bzrlib/revisionspec.py

bzrlib/rio.py

bzrlib/store

bzrlib/store/text.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_basis_inventory.py

bzrlib/tests/test_command.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_fileid_involved.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_http.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_remove.py

bzrlib/tests/test_revprops.py

bzrlib/tests/test_reweave.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/treeshape.py

bzrlib/transactions.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/ftp.py

bzrlib/transport/http.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/sftp.py

bzrlib/tsort.py

bzrlib/ui

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/configobj/validate.py

bzrlib/win32console.py

bzrlib/xml4.py

tools/biobench.py

tools/capture_tree.py

tools/riodemo.py

tools/trace-revisions

files removed:
bzrlib/mdiff.py

bzrlib/meta_store.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/upgrade.py

patches/annotate3.patch

patches/annotate4.patch

patches/cache-remote-revisions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

patches/pending-merge.patch

patches/plugins-no-plugins.patch

patches/progress.diff

patches/symlink-support.patch

testbzr

testsweet.py

files renamed:
bzr-man.py => bzr_man.py

bzrlib/store.py => bzrlib/store/__init__.py

bzrlib/weavestore.py => bzrlib/store/weave.py

bzrlib/selftest/ => bzrlib/tests/

bzrlib/selftest/blackbox.py => bzrlib/tests/blackbox/test_too_much.py

bzrlib/selftest/versioning.py => bzrlib/tests/blackbox/test_versioning.py

bzrlib/selftest/testbranch.py => bzrlib/tests/test_branch.py

bzrlib/selftest/testdiff.py => bzrlib/tests/test_diff.py

bzrlib/selftest/testfetch.py => bzrlib/tests/test_fetch.py

bzrlib/selftest/testhashcache.py => bzrlib/tests/test_hashcache.py

bzrlib/selftest/testinv.py => bzrlib/tests/test_inv.py

bzrlib/selftest/testlog.py => bzrlib/tests/test_log.py

bzrlib/selftest/testmerge3.py => bzrlib/tests/test_merge3.py

bzrlib/selftest/plugins.py => bzrlib/tests/test_plugins.py

bzrlib/selftest/testrevision.py => bzrlib/tests/test_revision.py

bzrlib/selftest/testrevisionnamespaces.py => bzrlib/tests/test_revisionnamespaces.py

bzrlib/selftest/teststatus.py => bzrlib/tests/test_status.py

bzrlib/selftest/teststore.py => bzrlib/tests/test_store.py

bzrlib/selftest/whitebox.py => bzrlib/tests/test_whitebox.py

bzrlib/ui.py => bzrlib/ui/__init__.py

tools/history2weaves.py => bzrlib/upgrade.py

bzrlib/xml.py => bzrlib/xml_serializer.py

files modified:
.bzrignore

.rsyncexclude

HACKING

Makefile

NEWS

README

TODO

build-api

bzr *

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/changeset.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/info.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/merge.py *

bzrlib/merge3.py

bzrlib/merge_core.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/progress.py

bzrlib/revision.py

bzrlib/shellcomplete.py

bzrlib/status.py

bzrlib/tests/__init__.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_parent.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_xml.py

bzrlib/textinv.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/weave.py *

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml5.py

contrib/newinventory.py

contrib/zsh/_bzr

doc/random.txt

setup.py *

tutorial.txt

Show diffs side-by-side

added added

removed removed

bzrlib/transport/http.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Implementation of Transport over http.

"""

import os, errno

from cStringIO import StringIO

import urllib, urllib2

import urlparse

from warnings import warn

from bzrlib.transport import Transport, Server

from bzrlib.errors import (TransportNotPossible, NoSuchFile,

TransportError, ConnectionError)

from bzrlib.errors import BzrError, BzrCheckError

from bzrlib.branch import Branch

from bzrlib.trace import mutter

def extract_auth(url, password_manager):

"""

Extract auth parameters from am HTTP/HTTPS url and add them to the given

password manager. Return the url, minus those auth parameters (which

confuse urllib2).

"""

assert url.startswith('http://') or url.startswith('https://')

scheme, host = url.split('//', 1)

if '/' in host:

host, path = host.split('/', 1)

path = '/' + path

else:

path = ''

port = ''

if '@' in host:

auth, host = host.split('@', 1)

if ':' in auth:

username, password = auth.split(':', 1)

else:

username, password = auth, None

if ':' in host:

host, port = host.split(':', 1)

port = ':' + port

# FIXME: if password isn't given, should we ask for it?

if password is not None:

username = urllib.unquote(username)

password = urllib.unquote(password)

password_manager.add_password(None, host, username, password)

url = scheme + '//' + host + port + path

return url

def get_url(url):

import urllib2

mutter("get_url %s" % url)

manager = urllib2.HTTPPasswordMgrWithDefaultRealm()

url = extract_auth(url, manager)

auth_handler = urllib2.HTTPBasicAuthHandler(manager)

opener = urllib2.build_opener(auth_handler)

url_f = opener.open(url)

return url_f

class HttpTransport(Transport):

"""This is the transport agent for http:// access.

TODO: Implement pipelined versions of all of the *_multi() functions.

"""

def __init__(self, base):

"""Set the base path where files will be stored."""

assert base.startswith('http://') or base.startswith('https://')

if base[-1] != '/':

base = base + '/'

super(HttpTransport, self).__init__(base)

# In the future we might actually connect to the remote host

# rather than using get_url

# self._connection = None

(self._proto, self._host,

self._path, self._parameters,

self._query, self._fragment) = urlparse.urlparse(self.base)

def should_cache(self):

"""Return True if the data pulled across should be cached locally.

"""

return True

def clone(self, offset=None):

"""Return a new HttpTransport with root at self.base + offset

100

For now HttpTransport does not actually connect, so just return

101

a new HttpTransport object.

102

"""

103

if offset is None:

104

return HttpTransport(self.base)

105

else:

106

return HttpTransport(self.abspath(offset))

107

108

def abspath(self, relpath):

109

"""Return the full url to the given relative path.

110

This can be supplied with a string or a list

111

"""

112

assert isinstance(relpath, basestring)

113

if isinstance(relpath, basestring):

114

relpath_parts = relpath.split('/')

115

else:

116

# TODO: Don't call this with an array - no magic interfaces

117

relpath_parts = relpath[:]

118

if len(relpath_parts) > 1:

119

if relpath_parts[0] == '':

120

raise ValueError("path %r within branch %r seems to be absolute"

121

% (relpath, self._path))

122

if relpath_parts[-1] == '':

123

raise ValueError("path %r within branch %r seems to be a directory"

124

% (relpath, self._path))

125

basepath = self._path.split('/')

126

if len(basepath) > 0 and basepath[-1] == '':

127

basepath = basepath[:-1]

128

for p in relpath_parts:

129

if p == '..':

130

if len(basepath) == 0:

131

# In most filesystems, a request for the parent

132

# of root, just returns root.

133

continue

134

basepath.pop()

135

elif p == '.' or p == '':

136

continue # No-op

137

else:

138

basepath.append(p)

139

# Possibly, we could use urlparse.urljoin() here, but

140

# I'm concerned about when it chooses to strip the last

141

# portion of the path, and when it doesn't.

142

path = '/'.join(basepath)

143

return urlparse.urlunparse((self._proto,

144

self._host, path, '', '', ''))

145

146

def has(self, relpath):

147

"""Does the target location exist?

148

149

TODO: HttpTransport.has() should use a HEAD request,

150

not a full GET request.

151

152

TODO: This should be changed so that we don't use

153

urllib2 and get an exception, the code path would be

154

cleaner if we just do an http HEAD request, and parse

155

the return code.

156

"""

157

path = relpath

158

try:

159

path = self.abspath(relpath)

160

f = get_url(path)

161

# Without the read and then close()

162

# we tend to have busy sockets.

163

f.read()

164

f.close()

165

return True

166

except urllib2.URLError, e:

167

mutter('url error code: %s for has url: %r', e.code, path)

168

if e.code == 404:

169

return False

170

raise

171

except IOError, e:

172

mutter('io error: %s %s for has url: %r',

173

e.errno, errno.errorcode.get(e.errno), path)

174

if e.errno == errno.ENOENT:

175

return False

176

raise TransportError(orig_error=e)

177

178

def get(self, relpath, decode=False):

179

"""Get the file at the given relative path.

180

181

:param relpath: The relative path to the file

182

"""

183

path = relpath

184

try:

185

path = self.abspath(relpath)

186

return get_url(path)

187

except urllib2.HTTPError, e:

188

mutter('url error code: %s for has url: %r', e.code, path)

189

if e.code == 404:

190

raise NoSuchFile(path, extra=e)

191

raise

192

except (BzrError, IOError), e:

193

if hasattr(e, 'errno'):

194

mutter('io error: %s %s for has url: %r',

195

e.errno, errno.errorcode.get(e.errno), path)

196

if e.errno == errno.ENOENT:

197

raise NoSuchFile(path, extra=e)

198

raise ConnectionError(msg = "Error retrieving %s: %s"

199

% (self.abspath(relpath), str(e)),

200

orig_error=e)

201

202

def put(self, relpath, f, mode=None):

203

"""Copy the file-like or string object into the location.

204

205

:param relpath: Location to put the contents, relative to base.

206

:param f: File-like or string object.

207

"""

208

raise TransportNotPossible('http PUT not supported')

209

210

def mkdir(self, relpath, mode=None):

211

"""Create a directory at the given path."""

212

raise TransportNotPossible('http does not support mkdir()')

213

214

def append(self, relpath, f):

215

"""Append the text in the file-like object into the final

216

location.

217

"""

218

raise TransportNotPossible('http does not support append()')

219

220

def copy(self, rel_from, rel_to):

221

"""Copy the item at rel_from to the location at rel_to"""

222

raise TransportNotPossible('http does not support copy()')

223

224

def copy_to(self, relpaths, other, mode=None, pb=None):

225

"""Copy a set of entries from self into another Transport.

226

227

:param relpaths: A list/generator of entries to be copied.

228

229

TODO: if other is LocalTransport, is it possible to

230

do better than put(get())?

231

"""

232

# At this point HttpTransport might be able to check and see if

233

# the remote location is the same, and rather than download, and

234

# then upload, it could just issue a remote copy_this command.

235

if isinstance(other, HttpTransport):

236

raise TransportNotPossible('http cannot be the target of copy_to()')

237

else:

238

return super(HttpTransport, self).copy_to(relpaths, other, mode=mode, pb=pb)

239

240

def move(self, rel_from, rel_to):

241

"""Move the item at rel_from to the location at rel_to"""

242

raise TransportNotPossible('http does not support move()')

243

244

def delete(self, relpath):

245

"""Delete the item at relpath"""

246

raise TransportNotPossible('http does not support delete()')

247

248

def is_readonly(self):

249

"""See Transport.is_readonly."""

250

return True

251

252

def listable(self):

253

"""See Transport.listable."""

254

return False

255

256

def stat(self, relpath):

257

"""Return the stat information for a file.

258

"""

259

raise TransportNotPossible('http does not support stat()')

260

261

def lock_read(self, relpath):

262

"""Lock the given file for shared (read) access.

263

:return: A lock object, which should be passed to Transport.unlock()

264

"""

265

# The old RemoteBranch ignore lock for reading, so we will

266

# continue that tradition and return a bogus lock object.

267

class BogusLock(object):

268

def __init__(self, path):

269

self.path = path

270

def unlock(self):

271

pass

272

return BogusLock(relpath)

273

274

def lock_write(self, relpath):

275

"""Lock the given file for exclusive (write) access.

276

WARNING: many transports do not support this, so trying avoid using it

277

278

:return: A lock object, which should be passed to Transport.unlock()

279

"""

280

raise TransportNotPossible('http does not support lock_write()')

281

282

283

#---------------- test server facilities ----------------

284

import BaseHTTPServer, SimpleHTTPServer, socket, time

285

import threading

286

287

288

class WebserverNotAvailable(Exception):

289

pass

290

291

292

class BadWebserverPath(ValueError):

293

def __str__(self):

294

return 'path %s is not in %s' % self.args

295

296

297

class TestingHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):

298

299

def log_message(self, format, *args):

300

self.server.test_case.log("webserver - %s - - [%s] %s",

301

self.address_string(),

302

self.log_date_time_string(),

303

format%args)

304

305

def handle_one_request(self):

306

"""Handle a single HTTP request.

307

308

You normally don't need to override this method; see the class

309

__doc__ string for information on how to handle specific HTTP

310

commands such as GET and POST.

311

312

"""

313

for i in xrange(1,11): # Don't try more than 10 times

314

try:

315

self.raw_requestline = self.rfile.readline()

316

except socket.error, e:

317

if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):

318

# omitted for now because some tests look at the log of

319

# the server and expect to see no errors. see recent

320

# email thread. -- mbp 20051021.

321

## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)

322

time.sleep(0.01)

323

continue

324

raise

325

else:

326

break

327

if not self.raw_requestline:

328

self.close_connection = 1

329

return

330

if not self.parse_request(): # An error code has been sent, just exit

331

return

332

mname = 'do_' + self.command

333

if not hasattr(self, mname):

334

self.send_error(501, "Unsupported method (%r)" % self.command)

335

return

336

method = getattr(self, mname)

337

method()

338

339

class TestingHTTPServer(BaseHTTPServer.HTTPServer):

340

def __init__(self, server_address, RequestHandlerClass, test_case):

341

BaseHTTPServer.HTTPServer.__init__(self, server_address,

342

RequestHandlerClass)

343

self.test_case = test_case

344

345

346

class HttpServer(Server):

347

"""A test server for http transports."""

348

349

_HTTP_PORTS = range(13000, 0x8000)

350

351

def _http_start(self):

352

httpd = None

353

for port in self._HTTP_PORTS:

354

try:

355

httpd = TestingHTTPServer(('localhost', port),

356

TestingHTTPRequestHandler,

357

self)

358

except socket.error, e:

359

if e.args[0] == errno.EADDRINUSE:

360

continue

361

print >>sys.stderr, "Cannot run webserver :-("

362

raise

363

else:

364

break

365

366

if httpd is None:

367

raise WebserverNotAvailable("Cannot run webserver :-( "

368

"no free ports in range %s..%s" %

369

(_HTTP_PORTS[0], _HTTP_PORTS[-1]))

370

371

self._http_base_url = 'http://localhost:%s/' % port

372

self._http_starting.release()

373

httpd.socket.settimeout(0.1)

374

375

while self._http_running:

376

try:

377

httpd.handle_request()

378

except socket.timeout:

379

pass

380

381

def _get_remote_url(self, path):

382

path_parts = path.split(os.path.sep)

383

if os.path.isabs(path):

384

if path_parts[:len(self._local_path_parts)] != \

385

self._local_path_parts:

386

raise BadWebserverPath(path, self.test_dir)

387

remote_path = '/'.join(path_parts[len(self._local_path_parts):])

388

else:

389

remote_path = '/'.join(path_parts)

390

391

self._http_starting.acquire()

392

self._http_starting.release()

393

return self._http_base_url + remote_path

394

395

def log(self, *args, **kwargs):

396

"""Capture Server log output."""

397

self.logs.append(args[3])

398

399

def setUp(self):

400

"""See bzrlib.transport.Server.setUp."""

401

self._home_dir = os.getcwdu()

402

self._local_path_parts = self._home_dir.split(os.path.sep)

403

self._http_starting = threading.Lock()

404

self._http_starting.acquire()

405

self._http_running = True

406

self._http_base_url = None

407

self._http_thread = threading.Thread(target=self._http_start)

408

self._http_thread.setDaemon(True)

409

self._http_thread.start()

410

self._http_proxy = os.environ.get("http_proxy")

411

if self._http_proxy is not None:

412

del os.environ["http_proxy"]

413

self.logs = []

414

415

def tearDown(self):

416

"""See bzrlib.transport.Server.tearDown."""

417

self._http_running = False

418

self._http_thread.join()

419

if self._http_proxy is not None:

420

import os

421

os.environ["http_proxy"] = self._http_proxy

422

423

def get_url(self):

424

"""See bzrlib.transport.Server.get_url."""

425

return self._get_remote_url(self._home_dir)

426

427

def get_bogus_url(self):

428

"""See bzrlib.transport.Server.get_bogus_url."""

429

return 'http://jasldkjsalkdjalksjdkljasd'

430

431

432

def get_test_permutations():

433

"""Return the permutations to be used in testing."""

434

warn("There are no HTTPS transport provider tests yet.")

435

return [(HttpTransport, HttpServer),

436

]

Older »