~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/transport/http/_pycurl.py

Committer: Robert Collins
Date: 2006-07-26 07:48:17 UTC
mfrom: (1852.11.1 deprecate compare_trees.)
mto: (1852.17.1 WorkingTree Format 4)
mto: This revision was merged to the branch mainline in revision 1896.
Revision ID: robertc@robertcollins.net-20060726074817-78581226f4efe2ed

Merge Tree.changes_from work.

files added:
bzrlib/ignores.py

bzrlib/revisiontree.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_tree.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/response.py

files removed:
bzrlib/tests/test_emptytree.py

files modified:
NEWS

bzrlib/__init__.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/serializer/v08.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/info.py

bzrlib/inter.py

bzrlib/inventory.py

bzrlib/merge.py

bzrlib/missing.py

bzrlib/repository.py

bzrlib/revisionspec.py

bzrlib/status.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_config.py

bzrlib/tests/test_http.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/transport/__init__.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/tree.py

bzrlib/ui/text.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

doc/tutorial.txt

Show diffs side-by-side

added added

removed removed

bzrlib/transport/http/_pycurl.py

# It's probably safer to just always revalidate. mbp 20060321

import os

from StringIO import StringIO

from cStringIO import StringIO

from bzrlib import errors

import bzrlib

from bzrlib.errors import (TransportNotPossible, NoSuchFile,

TransportError, ConnectionError,

DependencyNotPresent)

from bzrlib.trace import mutter

from bzrlib.transport import register_urlparse_netloc_protocol

from bzrlib.transport.http import HttpTransportBase, extract_auth, HttpServer

from bzrlib.transport.http import (HttpTransportBase, HttpServer,

_extract_headers,

response, _pycurl_errors)

try:

import pycurl

set headers to allow caching.

"""

def __init__(self, base):

def __init__(self, base, from_transport=None):

super(PyCurlTransport, self).__init__(base)

mutter('using pycurl %s' % pycurl.version)

if from_transport is not None:

self._base_curl = from_transport._base_curl

self._range_curl = from_transport._range_curl

else:

mutter('using pycurl %s' % pycurl.version)

self._base_curl = pycurl.Curl()

self._range_curl = pycurl.Curl()

def should_cache(self):

"""Return True if the data pulled across should be cached locally.

return True

def has(self, relpath):

curl = pycurl.Curl()

"""See Transport.has()"""

# We set NO BODY=0 in _get_full, so it should be safe

# to re-use the non-range curl object

curl = self._base_curl

abspath = self._real_abspath(relpath)

curl.setopt(pycurl.URL, abspath)

curl.setopt(pycurl.FOLLOWLOCATION, 1) # follow redirect responses

self._set_curl_options(curl)

# don't want the body - ie just do a HEAD request

# This means "NO BODY" not 'nobody'

100

curl.setopt(pycurl.NOBODY, 1)

101

self._curl_perform(curl)

102

code = curl.getinfo(pycurl.HTTP_CODE)

104

return False

105

elif code in (200, 302): # "ok", "found"

106

return True

elif code == 0:

self._raise_curl_connection_error(curl)

107

else:

108

self._raise_curl_http_error(curl)

109

100

def _get(self, relpath, ranges):

101

curl = pycurl.Curl()

110

def _get(self, relpath, ranges, tail_amount=0):

111

# This just switches based on the type of request

112

if ranges is not None or tail_amount not in (0, None):

113

return self._get_ranged(relpath, ranges, tail_amount=tail_amount)

114

else:

115

return self._get_full(relpath)

116

117

def _setup_get_request(self, curl, relpath):

118

"""Do the common setup stuff for making a request

119

120

:param curl: The curl object to place the request on

121

:param relpath: The relative path that we want to get

122

:return: (abspath, data, header)

123

abspath: full url

124

data: file that will be filled with the body

125

header: file that will be filled with the headers

126

"""

102

127

abspath = self._real_abspath(relpath)

103

sio = StringIO()

104

128

curl.setopt(pycurl.URL, abspath)

105

129

self._set_curl_options(curl)

106

curl.setopt(pycurl.WRITEFUNCTION, sio.write)

130

# Make sure we do a GET request. versions > 7.14.1 also set the

131

# NO BODY flag, but we'll do it ourselves in case it is an older

132

# pycurl version

107

133

curl.setopt(pycurl.NOBODY, 0)

108

if ranges is not None:

109

assert len(ranges) == 1

110

# multiple ranges not supported yet because we can't decode the

111

# response

112

curl.setopt(pycurl.RANGE, '%d-%d' % ranges[0])

134

curl.setopt(pycurl.HTTPGET, 1)

135

136

data = StringIO()

137

header = StringIO()

138

curl.setopt(pycurl.WRITEFUNCTION, data.write)

139

curl.setopt(pycurl.HEADERFUNCTION, header.write)

140

141

return abspath, data, header

142

143

def _get_full(self, relpath):

144

"""Make a request for the entire file"""

145

curl = self._base_curl

146

abspath, data, header = self._setup_get_request(curl, relpath)

113

147

self._curl_perform(curl)

148

114

149

code = curl.getinfo(pycurl.HTTP_CODE)

150

data.seek(0)

151

115

152

if code == 404:

116

153

raise NoSuchFile(abspath)

117

elif code == 200:

118

sio.seek(0)

119

return code, sio

120

elif code == 206 and (ranges is not None):

121

sio.seek(0)

122

return code, sio

123

elif code == 0:

124

self._raise_curl_connection_error(curl)

125

else:

126

self._raise_curl_http_error(curl)

154

if code != 200:

155

self._raise_curl_http_error(curl, 'expected 200 or 404 for full response.')

156

157

return code, data

158

159

def _get_ranged(self, relpath, ranges, tail_amount):

160

"""Make a request for just part of the file."""

161

# We would like to re-use the same curl object for

162

# full requests and partial requests

163

# Documentation says 'Pass in NULL to disable the use of ranges'

164

# None is the closest we have, but at least with pycurl 7.13.1

165

# It raises an 'invalid arguments' response

166

# curl.setopt(pycurl.RANGE, None)

167

# curl.unsetopt(pycurl.RANGE) doesn't support the RANGE parameter

168

# So instead we hack around this by using a separate objects

169

curl = self._range_curl

170

abspath, data, header = self._setup_get_request(curl, relpath)

171

172

curl.setopt(pycurl.RANGE, self.range_header(ranges, tail_amount))

173

self._curl_perform(curl)

174

data.seek(0)

175

176

code = curl.getinfo(pycurl.HTTP_CODE)

177

# mutter('url: %s header:\n%s', abspath, header.getvalue())

178

headers = _extract_headers(header.getvalue(), abspath)

179

# handle_response will raise NoSuchFile, etc based on the response code

180

return code, response.handle_response(abspath, code, headers, data)

127

181

128

182

def _raise_curl_connection_error(self, curl):

129

183

curl_errno = curl.getinfo(pycurl.OS_ERRNO)

131

185

raise ConnectionError('curl connection error (%s) on %s'

132

186

% (os.strerror(curl_errno), url))

133

187

134

def _raise_curl_http_error(self, curl):

188

def _raise_curl_http_error(self, curl, info=None):

135

189

code = curl.getinfo(pycurl.HTTP_CODE)

136

190

url = curl.getinfo(pycurl.EFFECTIVE_URL)

137

raise TransportError('http error %d probing for %s' %

138

(code, url))

191

if info is None:

192

msg = ''

193

else:

194

msg = ': ' + info

195

raise errors.InvalidHttpResponse(url, 'Unable to handle http code %d%s'

196

% (code,msg))

139

197

140

198

def _set_curl_options(self, curl):

141

199

"""Set options for all requests"""

143

201

# to force it to always retrieve. so just turn off the default Pragma

144

202

# provided by Curl.

145

203

headers = ['Cache-control: max-age=0',

146

'Pragma: no-cache']

204

'Pragma: no-cache',

205

'Connection: Keep-Alive']

147

206

## curl.setopt(pycurl.VERBOSE, 1)

148

207

# TODO: maybe include a summary of the pycurl version

149

ua_str = 'bzr/%s (pycurl)' % (bzrlib.__version__)

208

ua_str = 'bzr/%s (pycurl)' % (bzrlib.__version__,)

150

209

curl.setopt(pycurl.USERAGENT, ua_str)

151

210

curl.setopt(pycurl.HTTPHEADER, headers)

152

211

curl.setopt(pycurl.FOLLOWLOCATION, 1) # follow redirect responses

157

216

curl.perform()

158

217

except pycurl.error, e:

159

218

# XXX: There seem to be no symbolic constants for these values.

160

if e[0] == 6:

161

# couldn't resolve host

162

raise NoSuchFile(curl.getinfo(pycurl.EFFECTIVE_URL), e)

219

url = curl.getinfo(pycurl.EFFECTIVE_URL)

220

mutter('got pycurl error: %s, %s, %s, url: %s ',

221

e[0], _pycurl_errors.errorcode[e[0]], e, url)

222

if e[0] in (_pycurl_errors.CURLE_COULDNT_RESOLVE_HOST,

223

_pycurl_errors.CURLE_COULDNT_CONNECT):

224

self._raise_curl_connection_error(curl)

225

# jam 20060713 The code didn't use to re-raise the exception here

226

# but that seemed bogus

227

raise

163

228

164

229

165

230

class HttpServer_PyCurl(HttpServer):

Older »