~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/transport/http/__init__.py

Committer: Robert Collins
Date: 2007-03-08 04:06:06 UTC
mfrom: (2323.1.1 integration)
mto: This revision was merged to the branch mainline in revision 2442.
Revision ID: robertc@robertcollins.net-20070308040606-84gsniv56huiyjt4

Merge bzr.dev.

files added:
COPYING.txt

bzr.ico

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bundle/serializer/v09.py

bzrlib/cache_utf8.py

bzrlib/cmd_version_info.py

bzrlib/debug.py

bzrlib/dirstate.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/help_topics.py

bzrlib/ignores.py

bzrlib/inspect_for_copy.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/memorytree.py

bzrlib/mutabletree.py

bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/registry.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/weaverepo.py

bzrlib/revisiontree.py

bzrlib/tag.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HttpServer.py

bzrlib/tests/blackbox/test_bundle.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/transport/chroot.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/smart.py

bzrlib/transport/ssh.py

bzrlib/treebuilder.py

bzrlib/urlutils.py

bzrlib/util/bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/workingtree_4.py

bzrlib/xml6.py

bzrlib/xml7.py

doc/bazaar-vcs.org.kid

doc/centralized_workflow.txt

doc/default.css

doc/http_smart_server.txt

doc/index.txt

doc/server.txt

doc/version_info.txt

profile_imports.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/start_bzr.bat

files removed:
bzrlib/util/configobj/validate.py

bzrlib/util/urlgrabber

bzrlib/util/urlgrabber/__init__.py

bzrlib/util/urlgrabber/byterange.py

bzrlib/util/urlgrabber/grabber.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/util/urlgrabber/mirror.py

bzrlib/util/urlgrabber/progress.py

patience-test.py

files renamed:
bzrlib/bundle/read_bundle.py => bzrlib/bundle/bundle_data.py

bzrlib/bundle/serializer/v07.py => bzrlib/bundle/serializer/v08.py

bzrlib/tests/test_revprops.py => bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/test_command.py => bzrlib/tests/test_commands.py

bzrlib/win32console.py => bzrlib/win32utils.py

files modified:
.bzrignore

BRANCH.TODO

HACKING

Makefile

NEWS

README

TODO

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/commands.py

bzrlib/bundle/common.py

bzrlib/bundle/old/send_changeset.py

bzrlib/bundle/serializer/__init__.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc/__init__.py

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/errors.py

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/info.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/util/configobj/configobj.py

bzrlib/util/elementtree/ElementTree.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml_serializer.py

contrib/newinventory.py

doc/README.1st

doc/configuration.txt

doc/plugins.txt

doc/setting_up_email.txt

doc/specifying_revisions.txt

doc/tutorial.txt

doc/using_aliases.txt

generate_docs.py

setup.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/weavebench.py

Show diffs side-by-side

added added

removed removed

bzrlib/transport/http/__init__.py

There are separate implementation modules for each http client implementation.

"""

import errno

import os

from collections import deque

from cStringIO import StringIO

import mimetools

import re

import urlparse

import urllib

from warnings import warn

import sys

from bzrlib.transport import Transport, register_transport, Server

from bzrlib.errors import (TransportNotPossible, NoSuchFile,

TransportError, ConnectionError)

from bzrlib.errors import BzrError, BzrCheckError

from bzrlib.branch import Branch

from bzrlib import errors, ui

from bzrlib.trace import mutter

# TODO: load these only when running http tests

import BaseHTTPServer, SimpleHTTPServer, socket, time

import threading

from bzrlib.ui import ui_factory

from bzrlib.transport import (

smart,

Transport,

)

# TODO: This is not used anymore by HttpTransport_urllib

# (extracting the auth info and prompting the user for a password

# have been split), only the tests still use it. It should be

# deleted and the tests rewritten ASAP to stay in sync.

def extract_auth(url, password_manager):

"""Extract auth parameters from am HTTP/HTTPS url and add them to the given

password manager. Return the url, minus those auth parameters (which

assert re.match(r'^(https?)(\+\w+)?://', url), \

'invalid absolute url %r' % url

scheme, netloc, path, query, fragment = urlparse.urlsplit(url)

if '@' in netloc:

auth, netloc = netloc.split('@', 1)

if ':' in auth:

if password is not None:

password = urllib.unquote(password)

else:

password = ui_factory.get_password(prompt='HTTP %(user)@%(host) password',

user=username, host=host)

password = ui.ui_factory.get_password(

prompt='HTTP %(user)s@%(host)s password',

user=username, host=host)

password_manager.add_password(None, host, username, password)

url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))

return url

class HttpTransportBase(Transport):

def _extract_headers(header_text, url):

"""Extract the mapping for an rfc2822 header

This is a helper function for the test suite and for _pycurl.

(urllib already parses the headers for us)

In the case that there are multiple headers inside the file,

the last one is returned.

:param header_text: A string of header information.

This expects that the first line of a header will always be HTTP ...

:param url: The url we are parsing, so we can raise nice errors

:return: mimetools.Message object, which basically acts like a case

insensitive dictionary.

"""

first_header = True

remaining = header_text

if not remaining:

raise errors.InvalidHttpResponse(url, 'Empty headers')

while remaining:

header_file = StringIO(remaining)

first_line = header_file.readline()

if not first_line.startswith('HTTP'):

if first_header: # The first header *must* start with HTTP

raise errors.InvalidHttpResponse(url,

'Opening header line did not start with HTTP: %s'

100

% (first_line,))

101

assert False, 'Opening header line was not HTTP'

102

else:

103

break # We are done parsing

104

first_header = False

105

m = mimetools.Message(header_file)

106

107

# mimetools.Message parses the first header up to a blank line

108

# So while there is remaining data, it probably means there is

109

# another header to be parsed.

110

# Get rid of any preceeding whitespace, which if it is all whitespace

111

# will get rid of everything.

112

remaining = header_file.read().lstrip()

113

return m

114

115

116

class HttpTransportBase(Transport, smart.SmartClientMedium):

117

"""Base class for http implementations.

118

119

Does URL parsing, etc, but not any network IO.

125

# _proto: "http" or "https"

126

# _qualified_proto: may have "+pycurl", etc

127

def __init__(self, base):

128

def __init__(self, base, from_transport=None):

129

"""Set the base path where files will be stored."""

130

proto_match = re.match(r'^(https?)(\+\w+)?://', base)

131

if not proto_match:

138

if base[-1] != '/':

139

base = base + '/'

140

super(HttpTransportBase, self).__init__(base)

# In the future we might actually connect to the remote host

# rather than using get_url

100

# self._connection = None

101

141

(apparent_proto, self._host,

102

142

self._path, self._parameters,

103

143

self._query, self._fragment) = urlparse.urlparse(self.base)

104

144

self._qualified_proto = apparent_proto

145

# range hint is handled dynamically throughout the life

146

# of the object. We start by trying mulri-range requests

147

# and if the server returns bougs results, we retry with

148

# single range requests and, finally, we forget about

149

# range if the server really can't understand. Once

150

# aquired, this piece of info is propogated to clones.

151

if from_transport is not None:

152

self._range_hint = from_transport._range_hint

153

else:

154

self._range_hint = 'multi'

105

155

106

156

def abspath(self, relpath):

107

157

"""Return the full url to the given relative path.

113

163

implementation qualifier.

114

164

"""

115

165

assert isinstance(relpath, basestring)

166

if isinstance(relpath, unicode):

167

raise errors.InvalidURL(relpath, 'paths must not be unicode.')

116

168

if isinstance(relpath, basestring):

117

169

relpath_parts = relpath.split('/')

118

170

else:

119

171

# TODO: Don't call this with an array - no magic interfaces

120

172

relpath_parts = relpath[:]

121

if len(relpath_parts) > 1:

122

if relpath_parts[0] == '':

123

raise ValueError("path %r within branch %r seems to be absolute"

124

% (relpath, self._path))

125

if relpath_parts[-1] == '':

126

raise ValueError("path %r within branch %r seems to be a directory"

127

% (relpath, self._path))

128

basepath = self._path.split('/')

129

if len(basepath) > 0 and basepath[-1] == '':

130

basepath = basepath[:-1]

173

if relpath.startswith('/'):

174

basepath = []

175

else:

176

# Except for the root, no trailing slashes are allowed

177

if len(relpath_parts) > 1 and relpath_parts[-1] == '':

178

raise ValueError(

179

"path %r within branch %r seems to be a directory"

180

% (relpath, self._path))

181

basepath = self._path.split('/')

182

if len(basepath) > 0 and basepath[-1] == '':

183

basepath = basepath[:-1]

184

131

185

for p in relpath_parts:

132

186

if p == '..':

133

187

if len(basepath) == 0:

186

240

"""

187

241

raise NotImplementedError(self._get)

188

242

243

def get_request(self):

244

return SmartClientHTTPMediumRequest(self)

245

246

def get_smart_medium(self):

247

"""See Transport.get_smart_medium.

248

249

HttpTransportBase directly implements the minimal interface of

250

SmartMediumClient, so this returns self.

251

"""

252

return self

253

254

def _retry_get(self, relpath, ranges, exc_info):

255

"""A GET request have failed, let's retry with a simpler request."""

256

257

try_again = False

258

# The server does not gives us enough data or

259

# bogus-looking result, let's try again with

260

# a simpler request if possible.

261

if self._range_hint == 'multi':

262

self._range_hint = 'single'

263

mutter('Retry %s with single range request' % relpath)

264

try_again = True

265

elif self._range_hint == 'single':

266

self._range_hint = None

267

mutter('Retry %s without ranges' % relpath)

268

try_again = True

269

if try_again:

270

# Note that since the offsets and the ranges may not

271

# be in the same order, we don't try to calculate a

272

# restricted single range encompassing unprocessed

273

# offsets.

274

code, f = self._get(relpath, ranges)

275

return try_again, code, f

276

else:

277

# We tried all the tricks, but nothing worked. We

278

# re-raise original exception; the 'mutter' calls

279

# above will indicate that further tries were

280

# unsuccessful

281

raise exc_info[0], exc_info[1], exc_info[2]

282

189

283

def readv(self, relpath, offsets):

190

284

"""Get parts of the file at the given relative path.

191

285

192

286

:param offsets: A list of (offset, size) tuples.

193

287

:param return: A list or generator of (offset, data) tuples

194

288

"""

195

# Ideally we would pass one big request asking for all the ranges in

196

# one go; however then the server will give a multipart mime response

197

# back, and we can't parse them yet. So instead we just get one range

198

# per region, and try to coallesce the regions as much as possible.

199

200

# The read-coallescing code is not quite regular enough to have a

201

# single driver routine and

202

# helper method in Transport.

203

def do_combined_read(combined_offsets):

204

# read one coalesced block

205

total_size = 0

206

for offset, size in combined_offsets:

207

total_size += size

208

mutter('readv coalesced %d reads.', len(combined_offsets))

209

offset = combined_offsets[0][0]

210

byte_range = (offset, offset + total_size - 1)

211

code, result_file = self._get(relpath, [byte_range])

212

if code == 206:

213

for off, size in combined_offsets:

214

result_bytes = result_file.read(size)

215

assert len(result_bytes) == size

216

yield off, result_bytes

217

elif code == 200:

218

data = result_file.read(offset + total_size)[offset:offset + total_size]

219

pos = 0

220

for offset, size in combined_offsets:

221

yield offset, data[pos:pos + size]

222

pos += size

223

del data

224

if not len(offsets):

225

return

226

pending_offsets = deque(offsets)

227

combined_offsets = []

228

while len(pending_offsets):

229

offset, size = pending_offsets.popleft()

230

if not combined_offsets:

231

combined_offsets = [[offset, size]]

289

ranges = self.offsets_to_ranges(offsets)

290

mutter('http readv of %s collapsed %s offsets => %s',

291

relpath, len(offsets), ranges)

292

293

try_again = True

294

while try_again:

295

try_again = False

296

try:

297

code, f = self._get(relpath, ranges)

298

except (errors.InvalidRange, errors.ShortReadvError), e:

299

try_again, code, f = self._retry_get(relpath, ranges,

300

sys.exc_info())

301

302

for start, size in offsets:

303

try_again = True

304

while try_again:

305

try_again = False

306

f.seek(start, (start < 0) and 2 or 0)

307

start = f.tell()

308

try:

309

data = f.read(size)

310

if len(data) != size:

311

raise errors.ShortReadvError(relpath, start, size,

312

actual=len(data))

313

except (errors.InvalidRange, errors.ShortReadvError), e:

314

# Note that we replace 'f' here and that it

315

# may need cleaning one day before being

316

# thrown that way.

317

try_again, code, f = self._retry_get(relpath, ranges,

318

sys.exc_info())

319

# After one or more tries, we get the data.

320

yield start, data

321

322

@staticmethod

323

def offsets_to_ranges(offsets):

324

"""Turn a list of offsets and sizes into a list of byte ranges.

325

326

:param offsets: A list of tuples of (start, size). An empty list

327

is not accepted.

328

:return: a list of inclusive byte ranges (start, end)

329

Adjacent ranges will be combined.

330

"""

331

# Make sure we process sorted offsets

332

offsets = sorted(offsets)

333

334

prev_end = None

335

combined = []

336

337

for start, size in offsets:

338

end = start + size - 1

339

if prev_end is None:

340

combined.append([start, end])

341

elif start <= prev_end + 1:

342

combined[-1][1] = end

232

343

else:

233

if (len (combined_offsets) < 500 and

234

combined_offsets[-1][0] + combined_offsets[-1][1] == offset):

235

# combatible offset:

236

combined_offsets.append([offset, size])

237

else:

238

# incompatible, or over the threshold issue a read and yield

239

pending_offsets.appendleft((offset, size))

240

for result in do_combined_read(combined_offsets):

241

yield result

242

combined_offsets = []

243

# whatever is left is a single coalesced request

244

if len(combined_offsets):

245

for result in do_combined_read(combined_offsets):

246

yield result

247

248

def put(self, relpath, f, mode=None):

249

"""Copy the file-like or string object into the location.

344

combined.append([start, end])

345

prev_end = end

346

347

return combined

348

349

def _post(self, body_bytes):

350

"""POST body_bytes to .bzr/smart on this transport.

351

352

:returns: (response code, response body file-like object).

353

"""

354

# TODO: Requiring all the body_bytes to be available at the beginning of

355

# the POST may require large client buffers. It would be nice to have

356

# an interface that allows streaming via POST when possible (and

357

# degrades to a local buffer when not).

358

raise NotImplementedError(self._post)

359

360

def put_file(self, relpath, f, mode=None):

361

"""Copy the file-like object into the location.

250

362

251

363

:param relpath: Location to put the contents, relative to base.

252

:param f: File-like or string object.

364

:param f: File-like object.

253

365

"""

254

raise TransportNotPossible('http PUT not supported')

366

raise errors.TransportNotPossible('http PUT not supported')

255

367

256

368

def mkdir(self, relpath, mode=None):

257

369

"""Create a directory at the given path."""

258

raise TransportNotPossible('http does not support mkdir()')

370

raise errors.TransportNotPossible('http does not support mkdir()')

259

371

260

372

def rmdir(self, relpath):

261

373

"""See Transport.rmdir."""

262

raise TransportNotPossible('http does not support rmdir()')

374

raise errors.TransportNotPossible('http does not support rmdir()')

263

375

264

def append(self, relpath, f):

376

def append_file(self, relpath, f, mode=None):

265

377

"""Append the text in the file-like object into the final

266

378

location.

267

379

"""

268

raise TransportNotPossible('http does not support append()')

380

raise errors.TransportNotPossible('http does not support append()')

269

381

270

382

def copy(self, rel_from, rel_to):

271

383

"""Copy the item at rel_from to the location at rel_to"""

272

raise TransportNotPossible('http does not support copy()')

384

raise errors.TransportNotPossible('http does not support copy()')

273

385

274

386

def copy_to(self, relpaths, other, mode=None, pb=None):

275

387

"""Copy a set of entries from self into another Transport.

283

395

# the remote location is the same, and rather than download, and

284

396

# then upload, it could just issue a remote copy_this command.

285

397

if isinstance(other, HttpTransportBase):

286

raise TransportNotPossible('http cannot be the target of copy_to()')

398

raise errors.TransportNotPossible(

399

'http cannot be the target of copy_to()')

287

400

else:

288

401

return super(HttpTransportBase, self).\

289

402

copy_to(relpaths, other, mode=mode, pb=pb)

290

403

291

404

def move(self, rel_from, rel_to):

292

405

"""Move the item at rel_from to the location at rel_to"""

293

raise TransportNotPossible('http does not support move()')

406

raise errors.TransportNotPossible('http does not support move()')

294

407

295

408

def delete(self, relpath):

296

409

"""Delete the item at relpath"""

297

raise TransportNotPossible('http does not support delete()')

410

raise errors.TransportNotPossible('http does not support delete()')

298

411

299

412

def is_readonly(self):

300

413

"""See Transport.is_readonly."""

307

420

def stat(self, relpath):

308

421

"""Return the stat information for a file.

309

422

"""

310

raise TransportNotPossible('http does not support stat()')

423

raise errors.TransportNotPossible('http does not support stat()')

311

424

312

425

def lock_read(self, relpath):

313

426

"""Lock the given file for shared (read) access.

328

441

329

442

:return: A lock object, which should be passed to Transport.unlock()

330

443

"""

331

raise TransportNotPossible('http does not support lock_write()')

444

raise errors.TransportNotPossible('http does not support lock_write()')

332

445

333

446

def clone(self, offset=None):

334

447

"""Return a new HttpTransportBase with root at self.base + offset

335

For now HttpTransportBase does not actually connect, so just return

336

a new HttpTransportBase object.

448

449

We leave the daughter classes take advantage of the hint

450

that it's a cloning not a raw creation.

337

451

"""

338

452

if offset is None:

339

return self.__class__(self.base)

340

else:

341

return self.__class__(self.abspath(offset))

342

343

#---------------- test server facilities ----------------

344

# TODO: load these only when running tests

345

346

347

class WebserverNotAvailable(Exception):

348

pass

349

350

351

class BadWebserverPath(ValueError):

352

def __str__(self):

353

return 'path %s is not in %s' % self.args

354

355

356

class TestingHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):

357

358

def log_message(self, format, *args):

359

self.server.test_case.log('webserver - %s - - [%s] %s "%s" "%s"',

360

self.address_string(),

361

self.log_date_time_string(),

362

format % args,

363

self.headers.get('referer', '-'),

364

self.headers.get('user-agent', '-'))

365

366

def handle_one_request(self):

367

"""Handle a single HTTP request.

368

369

You normally don't need to override this method; see the class

370

__doc__ string for information on how to handle specific HTTP

371

commands such as GET and POST.

372

453

return self.__class__(self.base, self)

454

else:

455

return self.__class__(self.abspath(offset), self)

456

457

def attempted_range_header(self, ranges, tail_amount):

458

"""Prepare a HTTP Range header at a level the server should accept"""

459

460

if self._range_hint == 'multi':

461

# Nothing to do here

462

return self.range_header(ranges, tail_amount)

463

elif self._range_hint == 'single':

464

# Combine all the requested ranges into a single

465

# encompassing one

466

if len(ranges) > 0:

467

start, ignored = ranges[0]

468

ignored, end = ranges[-1]

469

if tail_amount not in (0, None):

470

# Nothing we can do here to combine ranges

471

# with tail_amount, just returns None. The

472

# whole file should be downloaded.

473

return None

474

else:

475

return self.range_header([(start, end)], 0)

476

else:

477

# Only tail_amount, requested, leave range_header

478

# do its work

479

return self.range_header(ranges, tail_amount)

480

else:

481

return None

482

483

@staticmethod

484

def range_header(ranges, tail_amount):

485

"""Turn a list of bytes ranges into a HTTP Range header value.

486

487

:param ranges: A list of byte ranges, (start, end).

488

:param tail_amount: The amount to get from the end of the file.

489

490

:return: HTTP range header string.

491

492

At least a non-empty ranges *or* a tail_amount must be

493

provided.

373

494

"""

374

for i in xrange(1,11): # Don't try more than 10 times

375

try:

376

self.raw_requestline = self.rfile.readline()

377

except socket.error, e:

378

if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):

379

# omitted for now because some tests look at the log of

380

# the server and expect to see no errors. see recent

381

# email thread. -- mbp 20051021.

382

## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)

383

time.sleep(0.01)

384

continue

385

raise

386

else:

387

break

388

if not self.raw_requestline:

389

self.close_connection = 1

390

return

391

if not self.parse_request(): # An error code has been sent, just exit

392

return

393

mname = 'do_' + self.command

394

if not hasattr(self, mname):

395

self.send_error(501, "Unsupported method (%r)" % self.command)

396

return

397

method = getattr(self, mname)

398

method()

399

400

401

class TestingHTTPServer(BaseHTTPServer.HTTPServer):

402

def __init__(self, server_address, RequestHandlerClass, test_case):

403

BaseHTTPServer.HTTPServer.__init__(self, server_address,

404

RequestHandlerClass)

405

self.test_case = test_case

406

407

class HttpServer(Server):

408

"""A test server for http transports."""

409

410

# used to form the url that connects to this server

411

_url_protocol = 'http'

412

413

def _http_start(self):

414

httpd = None

415

httpd = TestingHTTPServer(('localhost', 0),

416

TestingHTTPRequestHandler,

417

self)

418

host, port = httpd.socket.getsockname()

419

self._http_base_url = '%s://localhost:%s/' % (self._url_protocol, port)

420

self._http_starting.release()

421

httpd.socket.settimeout(0.1)

422

423

while self._http_running:

424

try:

425

httpd.handle_request()

426

except socket.timeout:

427

pass

428

429

def _get_remote_url(self, path):

430

path_parts = path.split(os.path.sep)

431

if os.path.isabs(path):

432

if path_parts[:len(self._local_path_parts)] != \

433

self._local_path_parts:

434

raise BadWebserverPath(path, self.test_dir)

435

remote_path = '/'.join(path_parts[len(self._local_path_parts):])

436

else:

437

remote_path = '/'.join(path_parts)

438

439

self._http_starting.acquire()

440

self._http_starting.release()

441

return self._http_base_url + remote_path

442

443

def log(self, format, *args):

444

"""Capture Server log output."""

445

self.logs.append(format % args)

446

447

def setUp(self):

448

"""See bzrlib.transport.Server.setUp."""

449

self._home_dir = os.getcwdu()

450

self._local_path_parts = self._home_dir.split(os.path.sep)

451

self._http_starting = threading.Lock()

452

self._http_starting.acquire()

453

self._http_running = True

454

self._http_base_url = None

455

self._http_thread = threading.Thread(target=self._http_start)

456

self._http_thread.setDaemon(True)

457

self._http_thread.start()

458

self._http_proxy = os.environ.get("http_proxy")

459

if self._http_proxy is not None:

460

del os.environ["http_proxy"]

461

self.logs = []

462

463

def tearDown(self):

464

"""See bzrlib.transport.Server.tearDown."""

465

self._http_running = False

466

self._http_thread.join()

467

if self._http_proxy is not None:

468

import os

469

os.environ["http_proxy"] = self._http_proxy

470

471

def get_url(self):

472

"""See bzrlib.transport.Server.get_url."""

473

return self._get_remote_url(self._home_dir)

474

475

def get_bogus_url(self):

476

"""See bzrlib.transport.Server.get_bogus_url."""

477

# this is chosen to try to prevent trouble with proxies, wierd dns,

478

# etc

479

return 'http://127.0.0.1:1/'

480

495

strings = []

496

for start, end in ranges:

497

strings.append('%d-%d' % (start, end))

498

499

if tail_amount:

500

strings.append('-%d' % tail_amount)

501

502

return ','.join(strings)

503

504

def send_http_smart_request(self, bytes):

505

code, body_filelike = self._post(bytes)

506

assert code == 200, 'unexpected HTTP response code %r' % (code,)

507

return body_filelike

508

509

510

class SmartClientHTTPMediumRequest(smart.SmartClientMediumRequest):

511

"""A SmartClientMediumRequest that works with an HTTP medium."""

512

513

def __init__(self, medium):

514

smart.SmartClientMediumRequest.__init__(self, medium)

515

self._buffer = ''

516

517

def _accept_bytes(self, bytes):

518

self._buffer += bytes

519

520

def _finished_writing(self):

521

data = self._medium.send_http_smart_request(self._buffer)

522

self._response_body = data

523

524

def _read_bytes(self, count):

525

return self._response_body.read(count)

526

527

def _finished_reading(self):

528

"""See SmartClientMediumRequest._finished_reading."""

529

pass

Older »