~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/transport/http/_urllib2_wrappers.py

Committer: John Arbash Meinel
Date: 2007-02-02 04:49:38 UTC
mfrom: (1908.11.7 tree-revision-tree.)
mto: (1852.15.19 test_write_inventory)
mto: This revision was merged to the branch mainline in revision 2322.
Revision ID: john@arbash-meinel.com-20070202044938-6c04umlv9y4j2tz1

[merge] bzr.dev 2255, resolve conflicts, update copyrights

files added:
bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/debug.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/help_topics.py

bzrlib/lazy_regex.py

bzrlib/registry.py

bzrlib/repofmt

bzrlib/tests/HttpServer.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/transport/chroot.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/wsgi.py

doc/bazaar-vcs.org.kid

doc/http_smart_server.txt

tools/rst2prettyhtml.py

tools/win32/info.txt

files renamed:
bzrlib/tests/test_command.py => bzrlib/tests/test_commands.py

files modified:
.bzrignore

BRANCH.TODO

HACKING

Makefile

NEWS

README

TODO

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/common.py

bzrlib/bundle/old/send_changeset.py

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc/__init__.py

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/errors.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_bundle.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/decorator.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/response.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/smart.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/urlutils.py

bzrlib/version.py

bzrlib/version_info_formats/__init__.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/win32console.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml_serializer.py

doc/centralized_workflow.txt

doc/configuration.txt

doc/index.txt

doc/specifying_revisions.txt

doc/tutorial.txt

profile_imports.py

setup.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

Show diffs side-by-side

added added

removed removed

bzrlib/transport/http/_urllib2_wrappers.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Implementaion of urllib2 tailored to bzr needs

This file re-implements the urllib2 class hierarchy with custom classes.

For instance, we create a new HTTPConnection and HTTPSConnection that inherit

from the original urllib2.HTTP(s)Connection objects, but also have a new base

which implements a custom getresponse and fake_close handlers.

And then we implement custom HTTPHandler and HTTPSHandler classes, that use

the custom HTTPConnection classes.

We have a custom Response class, which lets us maintain a keep-alive

connection even for requests that urllib2 doesn't expect to contain body data.

And a custom Request class that lets us track redirections, and send

authentication data without requiring an extra round trip to get rejected by

the server. We also create a Request hierarchy, to make it clear what type

of request is being made.

"""

DEBUG = 0

# TODO: It may be possible to share the password_manager across

# all transports by prefixing the realm by the protocol used

# (especially if other protocols do not use realms). See

# PasswordManager below.

# FIXME: Oversimplifying, two kind of exceptions should be

# raised, once a request is issued: URLError before we have been

# able to process the response, HTTPError after that. Process the

# response means we are able to leave the socket clean, so if we

# are not able to do that, we should close the connection. The

# actual code more or less do that, tests should be written to

# ensure that.

import httplib

import socket

import urllib

import urllib2

import urlparse

import re

import sys

from bzrlib import __version__ as bzrlib_version

from bzrlib import errors

# We define our own Response class to keep our httplib pipe clean

class Response(httplib.HTTPResponse):

"""Custom HTTPResponse, to avoid the need to decorate.

httplib prefers to decorate the returned objects, rather

than using a custom object.

"""

# Some responses have bodies in which we have no interest

_body_ignored_responses = [301,302, 303, 307, 401, 403, 404]

def __init__(self, *args, **kwargs):

httplib.HTTPResponse.__init__(self, *args, **kwargs)

def begin(self):

"""Begin to read the response from the server.

httplib assumes that some responses get no content and do

not even attempt to read the body in that case, leaving

the body in the socket, blocking the next request. Let's

try to workaround that.

"""

httplib.HTTPResponse.begin(self)

if self.status in self._body_ignored_responses:

if self.debuglevel > 0:

print "For status: [%s]," % self.status,

print "will ready body, length: ",

if self.length is not None:

print "[%d]" % self.length

else:

print "None"

if not (self.length is None or self.will_close):

# In some cases, we just can't read the body not

# even try or we may encounter a 104, 'Connection

# reset by peer' error if there is indeed no body

# and the server closed the connection just after

100

# having issued the response headers (even if the

101

# headers indicate a Content-Type...)

102

body = self.fp.read(self.length)

103

if self.debuglevel > 0:

104

print "Consumed body: [%s]" % body

105

self.close()

106

107

108

# Not inheriting from 'object' because httplib.HTTPConnection doesn't.

109

class AbstractHTTPConnection:

110

"""A custom HTTP(S) Connection, which can reset itself on a bad response"""

111

112

response_class = Response

113

strict = 1 # We don't support HTTP/0.9

114

115

def fake_close(self):

116

"""Make the connection believes the response have been fully handled.

117

118

That makes the httplib.HTTPConnection happy

119

"""

120

# Preserve our preciousss

121

sock = self.sock

122

self.sock = None

123

self.close()

124

self.sock = sock

125

126

127

class HTTPConnection(AbstractHTTPConnection, httplib.HTTPConnection):

128

pass

129

130

131

class HTTPSConnection(AbstractHTTPConnection, httplib.HTTPSConnection):

132

pass

133

134

135

class Request(urllib2.Request):

136

"""A custom Request object.

137

138

urllib2 determines the request method heuristically (based on

139

the presence or absence of data). We set the method

140

statically.

141

142

Also, the Request object tracks the connection the request will

143

be made on.

144

"""

145

146

def __init__(self, method, url, data=None, headers={},

147

origin_req_host=None, unverifiable=False,

148

connection=None, parent=None,):

149

# urllib2.Request will be confused if we don't extract

150

# authentification info before building the request

151

url, self.user, self.password = self.extract_auth(url)

152

urllib2.Request.__init__(self, url, data, headers,

153

origin_req_host, unverifiable)

154

self.method = method

155

self.connection = connection

156

# To handle redirections

157

self.parent = parent

158

self.redirected_to = None

159

160

def extract_auth(self, url):

161

"""Extracts authentification information from url.

162

163

Get user and password from url of the form: http://user:pass@host/path

164

"""

165

scheme, netloc, path, query, fragment = urlparse.urlsplit(url)

166

167

if '@' in netloc:

168

auth, netloc = netloc.split('@', 1)

169

if ':' in auth:

170

user, password = auth.split(':', 1)

171

else:

172

user, password = auth, None

173

user = urllib.unquote(user)

174

if password is not None:

175

password = urllib.unquote(password)

176

else:

177

user = None

178

password = None

179

180

url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))

181

182

return url, user, password

183

184

def get_method(self):

185

return self.method

186

187

188

# The urlib2.xxxAuthHandler handle the authentification of the

189

# requests, to do that, they need an urllib2 PasswordManager *at

190

# build time*. We also need one to reuse the passwords already

191

# typed by the user.

192

class PasswordManager(urllib2.HTTPPasswordMgrWithDefaultRealm):

193

194

def __init__(self):

195

urllib2.HTTPPasswordMgrWithDefaultRealm.__init__(self)

196

197

198

class ConnectionHandler(urllib2.BaseHandler):

199

"""Provides connection-sharing by pre-processing requests.

200

201

urllib2 provides no way to access the HTTPConnection object

202

internally used. But we need it in order to achieve

203

connection sharing. So, we add it to the request just before

204

it is processed, and then we override the do_open method for

205

http[s] requests.

206

"""

207

208

handler_order = 1000 # after all pre-processings

209

210

def get_key(self, connection):

211

"""Returns the key for the connection in the cache"""

212

return '%s:%d' % (connection.host, connection.port)

213

214

def create_connection(self, request, http_connection_class):

215

host = request.get_host()

216

if not host:

217

# Just a bit of paranoia here, this should have been

218

# handled in the higher levels

219

raise errors.InvalidURL(request.get_full_url(), 'no host given.')

220

221

# We create a connection (but it will not connect yet)

222

try:

223

connection = http_connection_class(host)

224

except httplib.InvalidURL, exception:

225

# There is only one occurrence of InvalidURL in httplib

226

raise errors.InvalidURL(request.get_full_url(),

227

extra='nonnumeric port')

228

229

return connection

230

231

def capture_connection(self, request, http_connection_class):

232

"""Capture or inject the request connection.

233

234

Two cases:

235

- the request have no connection: create a new one,

236

237

- the request have a connection: this one have been used

238

already, let's capture it, so that we can give it to

239

another transport to be reused. We don't do that

240

ourselves: the Transport object get the connection from

241

a first request and then propagate it, from request to

242

request or to cloned transports.

243

"""

244

connection = request.connection

245

if connection is None:

246

# Create a new one

247

connection = self.create_connection(request, http_connection_class)

248

request.connection = connection

249

250

# All connections will pass here, propagate debug level

251

connection.set_debuglevel(DEBUG)

252

return request

253

254

def http_request(self, request):

255

return self.capture_connection(request, HTTPConnection)

256

257

def https_request(self, request):

258

return self.capture_connection(request, HTTPSConnection)

259

260

261

class AbstractHTTPHandler(urllib2.AbstractHTTPHandler):

262

"""A custom handler for HTTP(S) requests.

263

264

We overrive urllib2.AbstractHTTPHandler to get a better

265

control of the connection, the ability to implement new

266

request types and return a response able to cope with

267

persistent connections.

268

"""

269

270

# We change our order to be before urllib2 HTTP[S]Handlers

271

# and be chosen instead of them (the first http_open called

272

# wins).

273

handler_order = 400

274

275

_default_headers = {'Pragma': 'no-cache',

276

'Cache-control': 'max-age=0',

277

'Connection': 'Keep-Alive',

278

# FIXME: Spell it User-*A*gent once we

279

# know how to properly avoid bogus

280

# urllib2 using capitalize() for headers

281

# instead of title(sp?).

282

'User-agent': 'bzr/%s (urllib)' % bzrlib_version,

283

# FIXME: pycurl also set the following, understand why

284

'Accept': '*/*',

285

}

286

287

def __init__(self):

288

urllib2.AbstractHTTPHandler.__init__(self, debuglevel=DEBUG)

289

290

def http_request(self, request):

291

"""Common headers setting"""

292

293

request.headers.update(self._default_headers.copy())

294

# FIXME: We may have to add the Content-Length header if

295

# we have data to send.

296

return request

297

298

def retry_or_raise(self, http_class, request, first_try):

299

"""Retry the request (once) or raise the exception.

300

301

urllib2 raises exception of application level kind, we

302

just have to translate them.

303

304

httplib can raise exceptions of transport level (badly

305

formatted dialog, loss of connexion or socket level

306

problems). In that case we should issue the request again

307

(httplib will close and reopen a new connection if

308

needed).

309

"""

310

# When an exception occurs, we give back the original

311

# Traceback or the bugs are hard to diagnose.

312

exc_type, exc_val, exc_tb = sys.exc_info()

313

if exc_type == socket.gaierror:

314

# No need to retry, that will not help

315

raise errors.ConnectionError("Couldn't resolve host '%s'"

316

% request.get_origin_req_host(),

317

orig_error=exc_val)

318

else:

319

if first_try:

320

if self._debuglevel > 0:

321

print 'Received exception: [%r]' % exc_val

322

print ' On connection: [%r]' % request.connection

323

method = request.get_method()

324

url = request.get_full_url()

325

print ' Will retry, %s %r' % (method, url)

326

request.connection.close()

327

response = self.do_open(http_class, request, False)

328

convert_to_addinfourl = False

329

else:

330

if self._debuglevel > 0:

331

print 'Received second exception: [%r]' % exc_val

332

print ' On connection: [%r]' % request.connection

333

if exc_type in (httplib.BadStatusLine, httplib.UnknownProtocol):

334

# httplib.BadStatusLine and

335

# httplib.UnknownProtocol indicates that a

336

# bogus server was encountered or a bad

337

# connection (i.e. transient errors) is

338

# experimented, we have already retried once

339

# for that request so we raise the exception.

340

my_exception = errors.InvalidHttpResponse(

341

request.get_full_url(),

342

'Bad status line received',

343

orig_error=exc_val)

344

else:

345

# All other exception are considered connection related.

346

347

# httplib.HTTPException should indicate a bug

348

# in the urllib implementation, somewhow the

349

# httplib pipeline is in an incorrect state,

350

# we retry in hope that this will correct the

351

# problem but that may need investigation

352

# (note that no such bug is known as of

353

# 20061005 --vila).

354

355

# socket errors generally occurs for reasons

356

# far outside our scope, so closing the

357

# connection and retrying is the best we can

358

# do.

359

360

# FIXME: and then there is HTTPError raised by:

361

# - HTTPDefaultErrorHandler (we define our own)

362

# - HTTPRedirectHandler.redirect_request

363

# - AbstractDigestAuthHandler.http_error_auth_reqed

364

365

my_exception = errors.ConnectionError(

366

msg= 'while sending %s %s:' % (request.get_method(),

367

request.get_selector()),

368

orig_error=exc_val)

369

370

if self._debuglevel > 0:

371

print 'On connection: [%r]' % request.connection

372

method = request.get_method()

373

url = request.get_full_url()

374

print ' Failed again, %s %r' % (method, url)

375

print ' Will raise: [%r]' % my_exception

376

raise my_exception, None, exc_tb

377

return response, convert_to_addinfourl

378

379

def do_open(self, http_class, request, first_try=True):

380

"""See urllib2.AbstractHTTPHandler.do_open for the general idea.

381

382

The request will be retried once if it fails.

383

"""

384

connection = request.connection

385

assert connection is not None, \

386

'Cannot process a request without a connection'

387

388

# Get all the headers

389

headers = {}

390

headers.update(request.header_items())

391

headers.update(request.unredirected_hdrs)

392

393

try:

394

connection._send_request(request.get_method(),

395

request.get_selector(),

396

# FIXME: implements 100-continue

397

#None, # We don't send the body yet

398

request.get_data(),

399

headers)

400

if self._debuglevel > 0:

401

print 'Request sent: [%r]' % request

402

response = connection.getresponse()

403

convert_to_addinfourl = True

404

except (socket.gaierror, httplib.BadStatusLine, httplib.UnknownProtocol,

405

socket.error, httplib.HTTPException):

406

response, convert_to_addinfourl = self.retry_or_raise(http_class,

407

request,

408

first_try)

409

410

# FIXME: HTTPConnection does not fully support 100-continue (the

411

# server responses are just ignored)

412

413

# if code == 100:

414

# mutter('Will send the body')

415

# # We can send the body now

416

# body = request.get_data()

417

# if body is None:

418

# raise URLError("No data given")

419

# connection.send(body)

420

# response = connection.getresponse()

421

422

if self._debuglevel > 0:

423

print 'Receives response: %r' % response

424

print ' For: %r(%r)' % (request.get_method(),

425

request.get_full_url())

426

427

if convert_to_addinfourl:

428

# Shamelessly copied from urllib2

429

req = request

430

r = response

431

r.recv = r.read

432

fp = socket._fileobject(r)

433

resp = urllib2.addinfourl(fp, r.msg, req.get_full_url())

434

resp.code = r.status

435

resp.msg = r.reason

436

if self._debuglevel > 0:

437

print 'Create addinfourl: %r' % resp

438

print ' For: %r(%r)' % (request.get_method(),

439

request.get_full_url())

440

else:

441

resp = response

442

return resp

443

444

# # we need titled headers in a dict but

445

# # response.getheaders returns a list of (lower(header).

446

# # Let's title that because most of bzr handle titled

447

# # headers, but maybe we should switch to lowercased

448

# # headers...

449

# # jam 20060908: I think we actually expect the headers to

450

# # be similar to mimetools.Message object, which uses

451

# # case insensitive keys. It lowers() all requests.

452

# # My concern is that the code may not do perfect title case.

453

# # For example, it may use Content-type rather than Content-Type

454

455

# # When we get rid of addinfourl, we must ensure that bzr

456

# # always use titled headers and that any header received

457

# # from server is also titled.

458

459

# headers = {}

460

# for header, value in (response.getheaders()):

461

# headers[header.title()] = value

462

# # FIXME: Implements a secured .read method

463

# response.code = response.status

464

# response.headers = headers

465

# return response

466

467

468

class HTTPHandler(AbstractHTTPHandler):

469

"""A custom handler that just thunks into HTTPConnection"""

470

471

def http_open(self, request):

472

return self.do_open(HTTPConnection, request)

473

474

475

class HTTPSHandler(AbstractHTTPHandler):

476

"""A custom handler that just thunks into HTTPSConnection"""

477

478

def https_open(self, request):

479

return self.do_open(HTTPSConnection, request)

480

481

482

class HTTPRedirectHandler(urllib2.HTTPRedirectHandler):

483

"""Handles redirect requests.

484

485

We have to implement our own scheme because we use a specific

486

Request object and because we want to implement a specific

487

policy.

488

"""

489

_debuglevel = DEBUG

490

# RFC2616 says that only read requests should be redirected

491

# without interacting with the user. But bzr use some

492

# shortcuts to optimize against roundtrips which can leads to

493

# write requests being issued before read requests of

494

# containing dirs can be redirected. So we redirect write

495

# requests in the same way which seems to respect the spirit

496

# of the RFC if not its letter.

497

498

def redirect_request(self, req, fp, code, msg, headers, newurl):

499

"""See urllib2.HTTPRedirectHandler.redirect_request"""

500

# We would have preferred to update the request instead

501

# of creating a new one, but the urllib2.Request object

502

# has a too complicated creation process to provide a

503

# simple enough equivalent update process. Instead, when

504

# redirecting, we only update the original request with a

505

# reference to the following request in the redirect

506

# chain.

507

508

# Some codes make no sense on out context and are treated

509

# as errors:

510

511

# 300: Multiple choices for different representations of

512

# the URI. Using that mechanisn with bzr will violate the

513

# protocol neutrality of Transport.

514

515

# 304: Not modified (SHOULD only occurs with conditional

516

# GETs which are not used by our implementation)

517

518

# 305: Use proxy. I can't imagine this one occurring in

519

# our context-- vila/20060909

520

521

# 306: Unused (if the RFC says so...)

522

523

# FIXME: If the code is 302 and the request is HEAD, we

524

# MAY avoid following the redirections if the intent is

525

# to check the existence, we have a hint that the file

526

# exist, now if we want to be sure, we must follow the

527

# redirection. Let's do that for now.

528

529

if code in (301, 302, 303, 307):

530

return Request(req.get_method(),newurl,

531

headers = req.headers,

532

origin_req_host = req.get_origin_req_host(),

533

unverifiable = True,

534

# TODO: It will be nice to be able to

535

# detect virtual hosts sharing the same

536

# IP address, that will allow us to

537

# share the same connection...

538

connection = None,

539

parent = req,

540

)

541

else:

542

raise urllib2.HTTPError(req.get_full_url(), code, msg, headers, fp)

543

544

def http_error_30x(self, req, fp, code, msg, headers):

545

"""Requests the redirected to URI.

546

547

Copied from urllib2 to be able to fake_close the

548

associated connection, *before* issuing the redirected

549

request but *after* having eventually raised an error.

550

"""

551

# Some servers (incorrectly) return multiple Location headers

552

# (so probably same goes for URI). Use first header.

553

554

# TODO: Once we get rid of addinfourl objects, the

555

# following will need to be updated to use correct case

556

# for headers.

557

if 'location' in headers:

558

newurl = headers.getheaders('location')[0]

559

elif 'uri' in headers:

560

newurl = headers.getheaders('uri')[0]

561

else:

562

return

563

if self._debuglevel > 0:

564

print 'Redirected to: %s' % newurl

565

newurl = urlparse.urljoin(req.get_full_url(), newurl)

566

567

# This call succeeds or raise an error. urllib2 returns

568

# if redirect_request returns None, but our

569

# redirect_request never returns None.

570

redirected_req = self.redirect_request(req, fp, code, msg, headers,

571

newurl)

572

573

# loop detection

574

# .redirect_dict has a key url if url was previously visited.

575

if hasattr(req, 'redirect_dict'):

576

visited = redirected_req.redirect_dict = req.redirect_dict

577

if (visited.get(newurl, 0) >= self.max_repeats or

578

len(visited) >= self.max_redirections):

579

raise urllib2.HTTPError(req.get_full_url(), code,

580

self.inf_msg + msg, headers, fp)

581

else:

582

visited = redirected_req.redirect_dict = req.redirect_dict = {}

583

visited[newurl] = visited.get(newurl, 0) + 1

584

585

# We can close the fp now that we are sure that we won't

586

# use it with HTTPError.

587

fp.close()

588

# We have all we need already in the response

589

req.connection.fake_close()

590

591

return self.parent.open(redirected_req)

592

593

http_error_302 = http_error_303 = http_error_307 = http_error_30x

594

595

def http_error_301(self, req, fp, code, msg, headers):

596

response = self.http_error_30x(req, fp, code, msg, headers)

597

# If one or several 301 response occur during the

598

# redirection chain, we MUST update the original request

599

# to indicate where the URI where finally found.

600

601

original_req = req

602

while original_req.parent is not None:

603

original_req = original_req.parent

604

if original_req.redirected_to is None:

605

# Only the last occurring 301 should be taken

606

# into account i.e. the first occurring here when

607

# redirected_to has not yet been set.

608

original_req.redirected_to = redirected_url

609

return response

610

611

612

class ProxyHandler(urllib2.ProxyHandler):

613

"""Handles proxy setting.

614

615

Copied and modified from urllib2 to be able to modify the

616

request during the request pre-processing instead of

617

modifying it at _open time. As we capture (or create) the

618

connection object during request processing, _open time was

619

too late.

620

621

Note that the proxy handling *may* modify the protocol used;

622

the request may be against an https server proxied through an

623

http proxy. So, https_request will be called, but later it's

624

really http_open that will be called. This explain why we

625

don't have to call self.parent.open as the urllib2 did.

626

"""

627

628

# Proxies must be in front

629

handler_order = 100

630

_debuglevel = DEBUG

631

632

def __init__(self, proxies=None):

633

urllib2.ProxyHandler.__init__(self, proxies)

634

# First, let's get rid of urllib2 implementation

635

for type, proxy in self.proxies.items():

636

if self._debuglevel > 0:

637

print 'Will unbind %s_open for %r' % (type, proxy)

638

delattr(self, '%s_open' % type)

639

640

# We are interested only by the http[s] proxies

641

http_proxy = self.get_proxy_env_var('http')

642

https_proxy = self.get_proxy_env_var('https')

643

644

if http_proxy is not None:

645

if self._debuglevel > 0:

646

print 'Will bind http_request for %r' % http_proxy

647

setattr(self, 'http_request',

648

lambda request: self.set_proxy(request, 'http'))

649

650

if https_proxy is not None:

651

if self._debuglevel > 0:

652

print 'Will bind http_request for %r' % https_proxy

653

setattr(self, 'https_request',

654

lambda request: self.set_proxy(request, 'https'))

655

656

def get_proxy_env_var(self, name, default_to='all'):

657

"""Get a proxy env var.

658

659

Note that we indirectly rely on

660

urllib.getproxies_environment taking into account the

661

uppercased values for proxy variables.

662

"""

663

try:

664

return self.proxies[name.lower()]

665

except KeyError:

666

if default_to is not None:

667

# Try to get the alternate environment variable

668

try:

669

return self.proxies[default_to]

670

except KeyError:

671

pass

672

return None

673

674

def proxy_bypass(self, host):

675

"""Check if host should be proxied or not"""

676

no_proxy = self.get_proxy_env_var('no', None)

677

if no_proxy is None:

678

return False

679

hhost, hport = urllib.splitport(host)

680

# Does host match any of the domains mentioned in

681

# no_proxy ? The rules about what is authorized in no_proxy

682

# are fuzzy (to say the least). We try to allow most

683

# commonly seen values.

684

for domain in no_proxy.split(','):

685

dhost, dport = urllib.splitport(domain)

686

if hport == dport or dport is None:

687

# Protect glob chars

688

dhost = dhost.replace(".", r"\.")

689

dhost = dhost.replace("*", r".*")

690

dhost = dhost.replace("?", r".")

691

if re.match(dhost, hhost, re.IGNORECASE):

692

return True

693

# Nevertheless, there are platform-specific ways to

694

# ignore proxies...

695

return urllib.proxy_bypass(host)

696

697

def set_proxy(self, request, type):

698

if self.proxy_bypass(request.get_host()):

699

return request

700

701

proxy = self.get_proxy_env_var(type)

702

if self._debuglevel > 0:

703

print 'set_proxy %s_request for %r' % (type, proxy)

704

orig_type = request.get_type()

705

type, r_type = urllib.splittype(proxy)

706

host, XXX = urllib.splithost(r_type)

707

if '@' in host:

708

user_pass, host = host.split('@', 1)

709

if ':' in user_pass:

710

user, password = user_pass.split(':', 1)

711

user_pass = '%s:%s' % (urllib.unquote(user),

712

urllib.unquote(password))

713

user_pass.encode('base64').strip()

714

req.add_header('Proxy-authorization', 'Basic ' + user_pass)

715

host = urllib.unquote(host)

716

request.set_proxy(host, type)

717

if self._debuglevel > 0:

718

print 'set_proxy: proxy set to %r://%r' % (type, host)

719

return request

720

721

722

class HTTPBasicAuthHandler(urllib2.HTTPBasicAuthHandler):

723

"""Custom basic authentification handler.

724

725

Send the authentification preventively to avoid the the

726

roundtrip associated with the 401 error.

727

"""

728

729

# def http_request(self, request):

730

# """Insert an authentification header if information is available"""

731

# if request.auth == 'basic' and request.password is not None:

732

733

# return request

734

735

736

class HTTPErrorProcessor(urllib2.HTTPErrorProcessor):

737

"""Process HTTP error responses.

738

739

We don't really process the errors, quite the contrary

740

instead, we leave our Transport handle them.

741

"""

742

handler_order = 1000 # after all other processing

743

744

def http_response(self, request, response):

745

code, msg, hdrs = response.code, response.msg, response.info()

746

747

if code not in (200, # Ok

748

206, # Partial content

749

404, # Not found

750

751

response = self.parent.error('http', request, response,

752

code, msg, hdrs)

753

return response

754

755

https_response = http_response

756

757

758

class HTTPDefaultErrorHandler(urllib2.HTTPDefaultErrorHandler):

759

"""Translate common errors into bzr Exceptions"""

760

761

def http_error_default(self, req, fp, code, msg, hdrs):

762

if code == 404:

763

raise errors.NoSuchFile(req.get_selector(),

764

extra=HTTPError(req.get_full_url(),

765

code, msg,

766

hdrs, fp))

767

elif code == 403:

768

raise errors.TransportError('Server refuses to fullfil the request')

769

elif code == 416:

770

# We don't know which, but one of the ranges we

771

# specified was wrong. So we raise with 0 for a lack

772

# of a better magic value.

773

raise errors.InvalidRange(req.get_full_url(),0)

774

else:

775

# TODO: A test is needed to exercise that code path

776

raise errors.InvalidHttpResponse(req.get_full_url(),

777

'Unable to handle http code %d: %s'

778

% (code, msg))

779

780

class Opener(object):

781

"""A wrapper around urllib2.build_opener

782

783

Daughter classes can override to build their own specific opener

784

"""

785

# TODO: Provides hooks for daughter classes.

786

787

def __init__(self,

788

connection=ConnectionHandler,

789

redirect=HTTPRedirectHandler,

790

error=HTTPErrorProcessor,):

791

self.password_manager = PasswordManager()

792

# TODO: Implements the necessary wrappers for the handlers

793

# commented out below

794

self._opener = urllib2.build_opener( \

795

connection, redirect, error,

796

ProxyHandler,

797

urllib2.HTTPBasicAuthHandler(self.password_manager),

798

#urllib2.HTTPDigestAuthHandler(self.password_manager),

799

#urllib2.ProxyBasicAuthHandler,

800

#urllib2.ProxyDigestAuthHandler,

801

HTTPHandler,

802

HTTPSHandler,

803

HTTPDefaultErrorHandler,

804

)

805

self.open = self._opener.open

806

if DEBUG >= 2:

807

# When dealing with handler order, it's easy to mess

808

# things up, the following will help understand which

809

# handler is used, when and for what.

810

import pprint

811

pprint.pprint(self._opener.__dict__)

812

Older »