~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/transport/http/_pycurl.py

Committer: John Arbash Meinel
Date: 2006-12-01 19:41:16 UTC
mfrom: (2158 +trunk)
mto: This revision was merged to the branch mainline in revision 2159.
Revision ID: john@arbash-meinel.com-20061201194116-nvn5qhfxux5284jc

[merge] bzr.dev 2158

files added:
bzrlib/debug.py

bzrlib/generate_ids.py

bzrlib/help_topics.py

bzrlib/tests/HttpServer.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_wsgi.py

bzrlib/transport/chroot.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/wsgi.py

doc/bazaar-vcs.org.kid

doc/http_smart_server.txt

tools/rst2prettyhtml.py

files modified:
.bzrignore

BRANCH.TODO

HACKING

Makefile

NEWS

README

bzrlib/__init__.py

bzrlib/annotate.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/builtins.py

bzrlib/bundle/serializer/v08.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/help.py

bzrlib/ignores.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patiencediff.py

bzrlib/progress.py

bzrlib/repository.py

bzrlib/revisionspec.py

bzrlib/shellcomplete.py

bzrlib/status.py

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_config.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_http.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_workingtree.py

bzrlib/trace.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/memory.py

bzrlib/transport/smart.py

bzrlib/transport/ssh.py

bzrlib/ui/__init__.py

bzrlib/workingtree.py

doc/centralized_workflow.txt

doc/configuration.txt

doc/index.txt

doc/specifying_revisions.txt

doc/tutorial.txt

Show diffs side-by-side

added added

removed removed

bzrlib/transport/http/_pycurl.py

import os

from cStringIO import StringIO

from bzrlib import errors

from bzrlib import (

errors,

__version__ as bzrlib_version,

)

import bzrlib

from bzrlib.errors import (TransportNotPossible, NoSuchFile,

TransportError, ConnectionError,

from bzrlib.errors import (NoSuchFile,

ConnectionError,

DependencyNotPresent)

from bzrlib.trace import mutter

from bzrlib.transport import register_urlparse_netloc_protocol

from bzrlib.transport.http import (HttpTransportBase, HttpServer,

_extract_headers,

response, _pycurl_errors)

from bzrlib.transport.http import (

_extract_headers,

HttpTransportBase,

_pycurl_errors,

response,

)

try:

import pycurl

PyCurl is a Python binding to the C "curl" multiprotocol client.

This transport can be significantly faster than the builtin Python client.

Advantages include: DNS caching, connection keepalive, and ability to

set headers to allow caching.

This transport can be significantly faster than the builtin

Python client. Advantages include: DNS caching.

"""

def __init__(self, base, from_transport=None):

super(PyCurlTransport, self).__init__(base)

if from_transport is not None:

self._base_curl = from_transport._base_curl

self._range_curl = from_transport._range_curl

self._curl = from_transport._curl

else:

mutter('using pycurl %s' % pycurl.version)

self._base_curl = pycurl.Curl()

self._range_curl = pycurl.Curl()

self._curl = pycurl.Curl()

def should_cache(self):

"""Return True if the data pulled across should be cached locally.

"""See Transport.has()"""

# We set NO BODY=0 in _get_full, so it should be safe

# to re-use the non-range curl object

curl = self._base_curl

curl = self._curl

abspath = self._real_abspath(relpath)

curl.setopt(pycurl.URL, abspath)

100

self._set_curl_options(curl)

102

# don't want the body - ie just do a HEAD request

100

103

# This means "NO BODY" not 'nobody'

101

104

curl.setopt(pycurl.NOBODY, 1)

105

# In some erroneous cases, pycurl will emit text on

106

# stdout if we don't catch it (see InvalidStatus tests

107

# for one such occurrence).

108

blackhole = StringIO()

109

curl.setopt(pycurl.WRITEFUNCTION, blackhole.write)

102

110

self._curl_perform(curl)

103

111

code = curl.getinfo(pycurl.HTTP_CODE)

104

112

if code == 404: # not found

107

115

return True

108

116

else:

109

117

self._raise_curl_http_error(curl)

110

118

111

119

def _get(self, relpath, ranges, tail_amount=0):

112

120

# This just switches based on the type of request

113

121

if ranges is not None or tail_amount not in (0, None):

114

122

return self._get_ranged(relpath, ranges, tail_amount=tail_amount)

115

123

else:

116

124

return self._get_full(relpath)

117

125

118

126

def _setup_get_request(self, curl, relpath):

119

127

# Make sure we do a GET request. versions > 7.14.1 also set the

120

128

# NO BODY flag, but we'll do it ourselves in case it is an older

146

154

147

155

def _get_full(self, relpath):

148

156

"""Make a request for the entire file"""

149

curl = self._base_curl

157

curl = self._curl

150

158

abspath, data, header = self._setup_get_request(curl, relpath)

151

159

self._curl_perform(curl)

152

160

156

164

if code == 404:

157

165

raise NoSuchFile(abspath)

158

166

if code != 200:

159

self._raise_curl_http_error(curl, 'expected 200 or 404 for full response.')

167

self._raise_curl_http_error(

168

curl, 'expected 200 or 404 for full response.')

160

169

161

170

return code, data

162

171

163

172

def _get_ranged(self, relpath, ranges, tail_amount):

164

173

"""Make a request for just part of the file."""

165

# We would like to re-use the same curl object for

166

# full requests and partial requests

167

# Documentation says 'Pass in NULL to disable the use of ranges'

168

# None is the closest we have, but at least with pycurl 7.13.1

169

# It raises an 'invalid arguments' response

170

# curl.setopt(pycurl.RANGE, None)

171

# curl.unsetopt(pycurl.RANGE) doesn't support the RANGE parameter

172

# So instead we hack around this by using a separate objects

173

curl = self._range_curl

174

curl = self._curl

174

175

abspath, data, header = self._setup_get_request(curl, relpath)

175

176

curl.setopt(pycurl.RANGE, self.range_header(ranges, tail_amount))

177

self._curl_perform(curl)

177

range_header = self.attempted_range_header(ranges, tail_amount)

178

if range_header is None:

179

# Forget ranges, the server can't handle them

180

return self._get_full(relpath)

181

182

self._curl_perform(curl, ['Range: bytes=%s'

183

% self.range_header(ranges, tail_amount)])

178

184

data.seek(0)

179

185

180

186

code = curl.getinfo(pycurl.HTTP_CODE)

185

191

186

192

def _post(self, body_bytes):

187

193

fake_file = StringIO(body_bytes)

188

curl = self._base_curl

194

curl = self._curl

189

195

# Other places that use _base_curl for GET requests explicitly set

190

196

# HTTPGET, so it should be safe to re-use the same object for both GETs

191

197

# and POSTs.

193

199

curl.setopt(pycurl.POSTFIELDSIZE, len(body_bytes))

194

200

curl.setopt(pycurl.READFUNCTION, fake_file.read)

195

201

abspath, data, header = self._setup_request(curl, '.bzr/smart')

196

self._curl_perform(curl)

202

# We override the Expect: header so that pycurl will send the POST

203

# body immediately.

204

self._curl_perform(curl,['Expect: '])

197

205

data.seek(0)

198

206

code = curl.getinfo(pycurl.HTTP_CODE)

199

207

headers = _extract_headers(header.getvalue(), abspath)

202

210

def _raise_curl_http_error(self, curl, info=None):

203

211

code = curl.getinfo(pycurl.HTTP_CODE)

204

212

url = curl.getinfo(pycurl.EFFECTIVE_URL)

205

if info is None:

206

msg = ''

213

# Some error codes can be handled the same way for all

214

# requests

215

if code == 403:

216

raise errors.TransportError(

217

'Server refuses to fullfil the request for: %s' % url)

207

218

else:

208

msg = ': ' + info

209

raise errors.InvalidHttpResponse(url, 'Unable to handle http code %d%s'

210

% (code,msg))

219

if info is None:

220

msg = ''

221

else:

222

msg = ': ' + info

223

raise errors.InvalidHttpResponse(

224

url, 'Unable to handle http code %d%s' % (code,msg))

211

225

212

226

def _set_curl_options(self, curl):

213

227

"""Set options for all requests"""

214

# There's no way in http/1.0 to say "must revalidate"; we don't want

215

# to force it to always retrieve. so just turn off the default Pragma

216

# provided by Curl.

217

# Also, we override the Expect: header so that pycurl will send the POST

218

# body immediately.

219

headers = ['Cache-control: max-age=0',

220

'Pragma: no-cache',

221

'Connection: Keep-Alive',

222

'Expect: ',]

223

228

## curl.setopt(pycurl.VERBOSE, 1)

224

229

# TODO: maybe include a summary of the pycurl version

225

230

ua_str = 'bzr/%s (pycurl)' % (bzrlib.__version__,)

226

231

curl.setopt(pycurl.USERAGENT, ua_str)

227

curl.setopt(pycurl.HTTPHEADER, headers)

228

232

curl.setopt(pycurl.FOLLOWLOCATION, 1) # follow redirect responses

229

233

230

def _curl_perform(self, curl):

234

def _curl_perform(self, curl, more_headers=[]):

231

235

"""Perform curl operation and translate exceptions."""

232

236

try:

237

# There's no way in http/1.0 to say "must

238

# revalidate"; we don't want to force it to always

239

# retrieve. so just turn off the default Pragma

240

# provided by Curl.

241

headers = ['Cache-control: max-age=0',

242

'Pragma: no-cache',

243

'Connection: Keep-Alive']

244

curl.setopt(pycurl.HTTPHEADER, headers + more_headers)

233

245

curl.perform()

234

246

except pycurl.error, e:

235

# XXX: There seem to be no symbolic constants for these values.

236

247

url = curl.getinfo(pycurl.EFFECTIVE_URL)

237

248

mutter('got pycurl error: %s, %s, %s, url: %s ',

238

249

e[0], _pycurl_errors.errorcode[e[0]], e, url)

239

250

if e[0] in (_pycurl_errors.CURLE_COULDNT_RESOLVE_HOST,

240

251

_pycurl_errors.CURLE_COULDNT_CONNECT,

252

_pycurl_errors.CURLE_GOT_NOTHING,

241

253

_pycurl_errors.CURLE_COULDNT_RESOLVE_PROXY):

242

254

raise ConnectionError('curl connection error (%s)\non %s'

243

255

% (e[1], url))

246

258

raise

247

259

248

260

249

class HttpServer_PyCurl(HttpServer):

250

"""Subclass of HttpServer that gives http+pycurl urls.

251

252

This is for use in testing: connections to this server will always go

253

through pycurl where possible.

254

"""

255

256

# urls returned by this server should require the pycurl client impl

257

_url_protocol = 'http+pycurl'

258

259

260

261

def get_test_permutations():

261

262

"""Return the permutations to be used in testing."""

263

from bzrlib.tests.HttpServer import HttpServer_PyCurl

262

264

return [(PyCurlTransport, HttpServer_PyCurl),

263

265

]

Older »