~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/transport/http/__init__.py

Committer: Aaron Bentley
Date: 2006-03-31 02:39:09 UTC
mfrom: (1558.7.10 bzr.ab.integration)
mto: This revision was merged to the branch mainline in revision 1634.
Revision ID: aaron.bentley@utoronto.ca-20060331023909-957277c1233c6aff

Merge from bzr.ab.integration

files added:
bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/transport/http

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/weave_commands.py

files renamed:
bzrlib/transport/http.py => bzrlib/transport/http/__init__.py

files modified:
NEWS

README

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/doc/api/transport.txt

bzrlib/errors.py

bzrlib/fetch.py

bzrlib/hashcache.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/msgeditor.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_config.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_options.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/transport/__init__.py

bzrlib/transport/ftp.py

bzrlib/transport/local.py

bzrlib/transport/sftp.py

bzrlib/tsort.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/workingtree.py

setup.py

Show diffs side-by-side

added added

removed removed

bzrlib/transport/http/__init__.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Implementation of Transport over http.

"""Base implementation of Transport over http.

There are separate implementation modules for each http client implementation.

"""

import os, errno

import errno

import os

from collections import deque

from cStringIO import StringIO

import urllib, urllib2

import re

import urlparse

import urllib

from warnings import warn

import bzrlib

from bzrlib.transport import Transport, Server

from bzrlib.errors import (TransportNotPossible, NoSuchFile,

from bzrlib.transport import Transport, register_transport, Server

from bzrlib.errors import (TransportNotPossible, NoSuchFile,

TransportError, ConnectionError)

from bzrlib.errors import BzrError, BzrCheckError

from bzrlib.branch import Branch

from bzrlib.trace import mutter

# TODO: load these only when running http tests

import BaseHTTPServer, SimpleHTTPServer, socket, time

import threading

from bzrlib.ui import ui_factory

def extract_auth(url, password_manager):

"""

Extract auth parameters from am HTTP/HTTPS url and add them to the given

"""Extract auth parameters from am HTTP/HTTPS url and add them to the given

password manager. Return the url, minus those auth parameters (which

confuse urllib2).

"""

assert re.match(r'^(https?)(\+\w+)?://', url), \

'invalid absolute url %r' % url

scheme, netloc, path, query, fragment = urlparse.urlsplit(url)

assert (scheme == 'http') or (scheme == 'https')

if '@' in netloc:

auth, netloc = netloc.split('@', 1)

return url

class Request(urllib2.Request):

"""Request object for urllib2 that allows the method to be overridden."""

method = None

def get_method(self):

if self.method is not None:

return self.method

else:

return urllib2.Request.get_method(self)

def get_url(url, method=None, ranges=None):

import urllib2

if ranges:

rangestring = ranges

else:

rangestring = 'all'

mutter("get_url %s [%s]", url, rangestring)

manager = urllib2.HTTPPasswordMgrWithDefaultRealm()

url = extract_auth(url, manager)

auth_handler = urllib2.HTTPBasicAuthHandler(manager)

opener = urllib2.build_opener(auth_handler)

request = Request(url)

request.method = method

request.add_header('User-Agent', 'bzr/%s' % bzrlib.__version__)

if ranges:

request.add_header('Range', ranges)

response = opener.open(request)

return response

class HttpTransport(Transport):

100

"""This is the transport agent for http:// access.

101

102

TODO: Implement pipelined versions of all of the *_multi() functions.

class HttpTransportBase(Transport):

"""Base class for http implementations.

Does URL parsing, etc, but not any network IO.

The protocol can be given as e.g. http+urllib://host/ to use a particular

implementation.

103

"""

104

# _proto: "http" or "https"

# _qualified_proto: may have "+pycurl", etc

105

def __init__(self, base):

106

"""Set the base path where files will be stored."""

107

assert base.startswith('http://') or base.startswith('https://')

proto_match = re.match(r'^(https?)(\+\w+)?://', base)

if not proto_match:

raise AssertionError("not a http url: %r" % base)

self._proto = proto_match.group(1)

impl_name = proto_match.group(2)

if impl_name:

impl_name = impl_name[1:]

self._impl_name = impl_name

108

if base[-1] != '/':

109

base = base + '/'

110

super(HttpTransport, self).__init__(base)

super(HttpTransportBase, self).__init__(base)

111

# In the future we might actually connect to the remote host

112

# rather than using get_url

113

100

# self._connection = None

114

(self._proto, self._host,

101

(apparent_proto, self._host,

115

102

self._path, self._parameters,

116

103

self._query, self._fragment) = urlparse.urlparse(self.base)

117

118

def should_cache(self):

119

"""Return True if the data pulled across should be cached locally.

120

"""

121

return True

122

123

def clone(self, offset=None):

124

"""Return a new HttpTransport with root at self.base + offset

125

For now HttpTransport does not actually connect, so just return

126

a new HttpTransport object.

127

"""

128

if offset is None:

129

return HttpTransport(self.base)

130

else:

131

return HttpTransport(self.abspath(offset))

104

self._qualified_proto = apparent_proto

132

105

133

106

def abspath(self, relpath):

134

107

"""Return the full url to the given relative path.

135

This can be supplied with a string or a list

108

109

This can be supplied with a string or a list.

110

111

The URL returned always has the protocol scheme originally used to

112

construct the transport, even if that includes an explicit

113

implementation qualifier.

136

114

"""

137

115

assert isinstance(relpath, basestring)

138

116

if isinstance(relpath, basestring):

165

143

# I'm concerned about when it chooses to strip the last

166

144

# portion of the path, and when it doesn't.

167

145

path = '/'.join(basepath)

168

return urlparse.urlunparse((self._proto,

169

self._host, path, '', '', ''))

146

return urlparse.urlunparse((self._qualified_proto,

147

self._host, path, '', '', ''))

148

149

def _real_abspath(self, relpath):

150

"""Produce absolute path, adjusting protocol if needed"""

151

abspath = self.abspath(relpath)

152

qp = self._qualified_proto

153

rp = self._proto

154

if self._qualified_proto != self._proto:

155

abspath = rp + abspath[len(qp):]

156

if not isinstance(abspath, str):

157

# escaping must be done at a higher level

158

abspath = abspath.encode('ascii')

159

return abspath

170

160

171

161

def has(self, relpath):

172

"""Does the target location exist?

173

174

TODO: This should be changed so that we don't use

175

urllib2 and get an exception, the code path would be

176

cleaner if we just do an http HEAD request, and parse

177

the return code.

178

"""

179

path = relpath

180

try:

181

path = self.abspath(relpath)

182

f = get_url(path, method='HEAD')

183

# Without the read and then close()

184

# we tend to have busy sockets.

185

f.read()

186

f.close()

187

return True

188

except urllib2.HTTPError, e:

189

mutter('url error code: %s for has url: %r', e.code, path)

190

if e.code == 404:

191

return False

192

raise

193

except IOError, e:

194

mutter('io error: %s %s for has url: %r',

195

e.errno, errno.errorcode.get(e.errno), path)

196

if e.errno == errno.ENOENT:

197

return False

198

raise TransportError(orig_error=e)

199

200

def _get(self, relpath, decode=False, ranges=None):

201

path = relpath

202

try:

203

path = self.abspath(relpath)

204

return get_url(path, ranges=ranges)

205

except urllib2.HTTPError, e:

206

mutter('url error code: %s for has url: %r', e.code, path)

207

if e.code == 404:

208

raise NoSuchFile(path, extra=e)

209

raise

210

except (BzrError, IOError), e:

211

if hasattr(e, 'errno'):

212

mutter('io error: %s %s for has url: %r',

213

e.errno, errno.errorcode.get(e.errno), path)

214

if e.errno == errno.ENOENT:

215

raise NoSuchFile(path, extra=e)

216

raise ConnectionError(msg = "Error retrieving %s: %s"

217

% (self.abspath(relpath), str(e)),

218

orig_error=e)

219

220

def get(self, relpath, decode=False):

162

raise NotImplementedError("has() is abstract on %r" % self)

163

164

def get(self, relpath):

221

165

"""Get the file at the given relative path.

222

166

223

167

:param relpath: The relative path to the file

224

168

"""

225

return self._get(relpath, decode=decode)

169

code, response_file = self._get(relpath, None)

170

return response_file

171

172

def _get(self, relpath, ranges):

173

"""Get a file, or part of a file.

174

175

:param relpath: Path relative to transport base URL

176

:param byte_range: None to get the whole file;

177

or [(start,end)] to fetch parts of a file.

178

179

:returns: (http_code, result_file)

180

181

Note that the current http implementations can only fetch one range at

182

a time through this call.

183

"""

184

raise NotImplementedError(self._get)

226

185

227

186

def readv(self, relpath, offsets):

228

187

"""Get parts of the file at the given relative path.

229

188

230

:offsets: A list of (offset, size) tuples.

231

:return: A list or generator of (offset, data) tuples

189

:param offsets: A list of (offset, size) tuples.

190

:param return: A list or generator of (offset, data) tuples

232

191

"""

233

# this is not quite regular enough to have a single driver routine and

192

# Ideally we would pass one big request asking for all the ranges in

193

# one go; however then the server will give a multipart mime response

194

# back, and we can't parse them yet. So instead we just get one range

195

# per region, and try to coallesce the regions as much as possible.

196

197

# The read-coallescing code is not quite regular enough to have a

198

# single driver routine and

234

199

# helper method in Transport.

235

200

def do_combined_read(combined_offsets):

236

201

# read one coalesced block

239

204

total_size += size

240

205

mutter('readv coalesced %d reads.', len(combined_offsets))

241

206

offset = combined_offsets[0][0]

242

ranges = 'bytes=%d-%d' % (offset, offset + total_size - 1)

243

response = self._get(relpath, ranges=ranges)

244

if response.code == 206:

207

byte_range = (offset, offset + total_size - 1)

208

code, result_file = self._get(relpath, [byte_range])

209

if code == 206:

245

210

for off, size in combined_offsets:

246

yield off, response.read(size)

247

elif response.code == 200:

248

data = response.read(offset + total_size)[offset:offset + total_size]

211

result_bytes = result_file.read(size)

212

assert len(result_bytes) == size

213

yield off, result_bytes

214

elif code == 200:

215

data = result_file.read(offset + total_size)[offset:offset + total_size]

249

216

pos = 0

250

217

for offset, size in combined_offsets:

251

218

yield offset, data[pos:pos + size]

252

219

pos += size

253

220

del data

254

255

221

if not len(offsets):

256

222

return

257

223

pending_offsets = deque(offsets)

313

279

# At this point HttpTransport might be able to check and see if

314

280

# the remote location is the same, and rather than download, and

315

281

# then upload, it could just issue a remote copy_this command.

316

if isinstance(other, HttpTransport):

282

if isinstance(other, HttpTransportBase):

317

283

raise TransportNotPossible('http cannot be the target of copy_to()')

318

284

else:

319

return super(HttpTransport, self).copy_to(relpaths, other, mode=mode, pb=pb)

285

return super(HttpTransportBase, self).\

286

copy_to(relpaths, other, mode=mode, pb=pb)

320

287

321

288

def move(self, rel_from, rel_to):

322

289

"""Move the item at rel_from to the location at rel_to"""

360

327

"""

361

328

raise TransportNotPossible('http does not support lock_write()')

362

329

330

def clone(self, offset=None):

331

"""Return a new HttpTransportBase with root at self.base + offset

332

For now HttpTransportBase does not actually connect, so just return

333

a new HttpTransportBase object.

334

"""

335

if offset is None:

336

return self.__class__(self.base)

337

else:

338

return self.__class__(self.abspath(offset))

363

339

364

340

#---------------- test server facilities ----------------

365

import BaseHTTPServer, SimpleHTTPServer, socket, time

366

import threading

367

341

# TODO: load these only when running tests

368

342

369

343

class WebserverNotAvailable(Exception):

370

344

pass

426

400

RequestHandlerClass)

427

401

self.test_case = test_case

428

402

429

430

403

class HttpServer(Server):

431

404

"""A test server for http transports."""

432

405

406

# used to form the url that connects to this server

407

_url_protocol = 'http'

408

433

409

def _http_start(self):

434

410

httpd = None

435

411

httpd = TestingHTTPServer(('localhost', 0),

436

412

TestingHTTPRequestHandler,

437

413

self)

438

414

host, port = httpd.socket.getsockname()

439

self._http_base_url = 'http://localhost:%s/' % port

415

self._http_base_url = '%s://localhost:%s/' % (self._url_protocol, port)

440

416

self._http_starting.release()

441

417

httpd.socket.settimeout(0.1)

442

418

494

470

495

471

def get_bogus_url(self):

496

472

"""See bzrlib.transport.Server.get_bogus_url."""

497

return 'http://jasldkjsalkdjalksjdkljasd'

498

499

500

def get_test_permutations():

501

"""Return the permutations to be used in testing."""

502

warn("There are no HTTPS transport provider tests yet.")

503

return [(HttpTransport, HttpServer),

504

]

473

# this is chosen to try to prevent trouble with proxies, wierd dns,

474

# etc

475

return 'http://127.0.0.1:1/'

476

Older »