~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/HttpServer.py

Committer: John Arbash Meinel
Date: 2006-12-01 19:41:16 UTC
mfrom: (2158 +trunk)
mto: This revision was merged to the branch mainline in revision 2159.
Revision ID: john@arbash-meinel.com-20061201194116-nvn5qhfxux5284jc

[merge] bzr.dev 2158

files added:
bzrlib/debug.py

bzrlib/generate_ids.py

bzrlib/help_topics.py

bzrlib/tests/HttpServer.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_wsgi.py

bzrlib/transport/chroot.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/wsgi.py

doc/bazaar-vcs.org.kid

doc/http_smart_server.txt

tools/rst2prettyhtml.py

files modified:
.bzrignore

BRANCH.TODO

HACKING

Makefile

NEWS

README

bzrlib/__init__.py

bzrlib/annotate.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/builtins.py

bzrlib/bundle/serializer/v08.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/help.py

bzrlib/ignores.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patiencediff.py

bzrlib/progress.py

bzrlib/repository.py

bzrlib/revisionspec.py

bzrlib/shellcomplete.py

bzrlib/status.py

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_config.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_http.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_workingtree.py

bzrlib/trace.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/memory.py

bzrlib/transport/smart.py

bzrlib/transport/ssh.py

bzrlib/ui/__init__.py

bzrlib/workingtree.py

doc/centralized_workflow.txt

doc/configuration.txt

doc/index.txt

doc/specifying_revisions.txt

doc/tutorial.txt

Show diffs side-by-side

added added

removed removed

bzrlib/tests/HttpServer.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

import BaseHTTPServer

import errno

import os

from SimpleHTTPServer import SimpleHTTPRequestHandler

import socket

import posixpath

import random

import re

import sys

import threading

import time

import urllib

import urlparse

from bzrlib.transport import Server

class WebserverNotAvailable(Exception):

pass

class BadWebserverPath(ValueError):

def __str__(self):

return 'path %s is not in %s' % self.args

class TestingHTTPRequestHandler(SimpleHTTPRequestHandler):

def log_message(self, format, *args):

self.server.test_case.log('webserver - %s - - [%s] %s "%s" "%s"',

self.address_string(),

self.log_date_time_string(),

format % args,

self.headers.get('referer', '-'),

self.headers.get('user-agent', '-'))

def handle_one_request(self):

"""Handle a single HTTP request.

You normally don't need to override this method; see the class

__doc__ string for information on how to handle specific HTTP

commands such as GET and POST.

"""

for i in xrange(1,11): # Don't try more than 10 times

try:

self.raw_requestline = self.rfile.readline()

except socket.error, e:

if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):

# omitted for now because some tests look at the log of

# the server and expect to see no errors. see recent

# email thread. -- mbp 20051021.

## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)

time.sleep(0.01)

continue

raise

else:

break

if not self.raw_requestline:

self.close_connection = 1

return

if not self.parse_request(): # An error code has been sent, just exit

return

mname = 'do_' + self.command

if getattr(self, mname, None) is None:

self.send_error(501, "Unsupported method (%r)" % self.command)

return

method = getattr(self, mname)

method()

_range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')

_tail_regexp = re.compile(r'^-(?P<tail>\d+)$')

def parse_ranges(self, ranges_header):

"""Parse the range header value and returns ranges and tail"""

tail = 0

ranges = []

assert ranges_header.startswith('bytes=')

ranges_header = ranges_header[len('bytes='):]

for range_str in ranges_header.split(','):

range_match = self._range_regexp.match(range_str)

if range_match is not None:

ranges.append((int(range_match.group('start')),

100

int(range_match.group('end'))))

101

else:

102

tail_match = self._tail_regexp.match(range_str)

103

if tail_match is not None:

104

tail = int(tail_match.group('tail'))

105

return tail, ranges

106

107

def send_range_content(self, file, start, length):

108

file.seek(start)

109

self.wfile.write(file.read(length))

110

111

def get_single_range(self, file, file_size, start, end):

112

self.send_response(206)

113

length = end - start + 1

114

self.send_header('Accept-Ranges', 'bytes')

115

self.send_header("Content-Length", "%d" % length)

116

117

self.send_header("Content-Type", 'application/octet-stream')

118

self.send_header("Content-Range", "bytes %d-%d/%d" % (start,

119

end,

120

file_size))

121

self.end_headers()

122

self.send_range_content(file, start, length)

123

124

def get_multiple_ranges(self, file, file_size, ranges):

125

self.send_response(206)

126

self.send_header('Accept-Ranges', 'bytes')

127

boundary = "%d" % random.randint(0,0x7FFFFFFF)

128

self.send_header("Content-Type",

129

"multipart/byteranges; boundary=%s" % boundary)

130

self.end_headers()

131

for (start, end) in ranges:

132

self.wfile.write("--%s\r\n" % boundary)

133

self.send_header("Content-type", 'application/octet-stream')

134

self.send_header("Content-Range", "bytes %d-%d/%d" % (start,

135

end,

136

file_size))

137

self.end_headers()

138

self.send_range_content(file, start, end - start + 1)

139

self.wfile.write("--%s\r\n" % boundary)

140

pass

141

142

def do_GET(self):

143

"""Serve a GET request.

144

145

Handles the Range header.

146

"""

147

148

path = self.translate_path(self.path)

149

ranges_header_value = self.headers.get('Range')

150

if ranges_header_value is None or os.path.isdir(path):

151

# Let the mother class handle most cases

152

return SimpleHTTPRequestHandler.do_GET(self)

153

154

try:

155

# Always read in binary mode. Opening files in text

156

# mode may cause newline translations, making the

157

# actual size of the content transmitted *less* than

158

# the content-length!

159

file = open(path, 'rb')

160

except IOError:

161

self.send_error(404, "File not found")

162

return None

163

164

file_size = os.fstat(file.fileno())[6]

165

tail, ranges = self.parse_ranges(ranges_header_value)

166

# Normalize tail into ranges

167

if tail != 0:

168

ranges.append((file_size - tail, file_size))

169

170

ranges_valid = True

171

if len(ranges) == 0:

172

ranges_valid = False

173

else:

174

for (start, end) in ranges:

175

if start >= file_size or end >= file_size:

176

ranges_valid = False

177

break

178

if not ranges_valid:

179

# RFC2616 14-16 says that invalid Range headers

180

# should be ignored and in that case, the whole file

181

# should be returned as if no Range header was

182

# present

183

file.close() # Will be reopened by the following call

184

return SimpleHTTPRequestHandler.do_GET(self)

185

186

if len(ranges) == 1:

187

(start, end) = ranges[0]

188

self.get_single_range(file, file_size, start, end)

189

else:

190

self.get_multiple_ranges(file, file_size, ranges)

191

file.close()

192

193

if sys.platform == 'win32':

194

# On win32 you cannot access non-ascii filenames without

195

# decoding them into unicode first.

196

# However, under Linux, you can access bytestream paths

197

# without any problems. If this function was always active

198

# it would probably break tests when LANG=C was set

199

def translate_path(self, path):

200

"""Translate a /-separated PATH to the local filename syntax.

201

202

For bzr, all url paths are considered to be utf8 paths.

203

On Linux, you can access these paths directly over the bytestream

204

request, but on win32, you must decode them, and access them

205

as Unicode files.

206

"""

207

# abandon query parameters

208

path = urlparse.urlparse(path)[2]

209

path = posixpath.normpath(urllib.unquote(path))

210

path = path.decode('utf-8')

211

words = path.split('/')

212

words = filter(None, words)

213

path = os.getcwdu()

214

for word in words:

215

drive, word = os.path.splitdrive(word)

216

head, word = os.path.split(word)

217

if word in (os.curdir, os.pardir): continue

218

path = os.path.join(path, word)

219

return path

220

221

222

class TestingHTTPServer(BaseHTTPServer.HTTPServer):

223

def __init__(self, server_address, RequestHandlerClass, test_case):

224

BaseHTTPServer.HTTPServer.__init__(self, server_address,

225

RequestHandlerClass)

226

self.test_case = test_case

227

228

229

class HttpServer(Server):

230

"""A test server for http transports.

231

232

Subclasses can provide a specific request handler.

233

"""

234

235

# used to form the url that connects to this server

236

_url_protocol = 'http'

237

238

# Subclasses can provide a specific request handler

239

def __init__(self, request_handler=TestingHTTPRequestHandler):

240

Server.__init__(self)

241

self.request_handler = request_handler

242

243

def _get_httpd(self):

244

return TestingHTTPServer(('localhost', 0),

245

self.request_handler,

246

self)

247

248

def _http_start(self):

249

httpd = None

250

httpd = self._get_httpd()

251

host, port = httpd.socket.getsockname()

252

self._http_base_url = '%s://localhost:%s/' % (self._url_protocol, port)

253

self._http_starting.release()

254

httpd.socket.settimeout(0.1)

255

256

while self._http_running:

257

try:

258

httpd.handle_request()

259

except socket.timeout:

260

pass

261

262

def _get_remote_url(self, path):

263

path_parts = path.split(os.path.sep)

264

if os.path.isabs(path):

265

if path_parts[:len(self._local_path_parts)] != \

266

self._local_path_parts:

267

raise BadWebserverPath(path, self.test_dir)

268

remote_path = '/'.join(path_parts[len(self._local_path_parts):])

269

else:

270

remote_path = '/'.join(path_parts)

271

272

self._http_starting.acquire()

273

self._http_starting.release()

274

return self._http_base_url + remote_path

275

276

def log(self, format, *args):

277

"""Capture Server log output."""

278

self.logs.append(format % args)

279

280

def setUp(self):

281

"""See bzrlib.transport.Server.setUp."""

282

self._home_dir = os.getcwdu()

283

self._local_path_parts = self._home_dir.split(os.path.sep)

284

self._http_starting = threading.Lock()

285

self._http_starting.acquire()

286

self._http_running = True

287

self._http_base_url = None

288

self._http_thread = threading.Thread(target=self._http_start)

289

self._http_thread.setDaemon(True)

290

self._http_thread.start()

291

self._http_proxy = os.environ.get("http_proxy")

292

if self._http_proxy is not None:

293

del os.environ["http_proxy"]

294

self.logs = []

295

296

def tearDown(self):

297

"""See bzrlib.transport.Server.tearDown."""

298

self._http_running = False

299

self._http_thread.join()

300

if self._http_proxy is not None:

301

import os

302

os.environ["http_proxy"] = self._http_proxy

303

304

def get_url(self):

305

"""See bzrlib.transport.Server.get_url."""

306

return self._get_remote_url(self._home_dir)

307

308

def get_bogus_url(self):

309

"""See bzrlib.transport.Server.get_bogus_url."""

310

# this is chosen to try to prevent trouble with proxies, weird dns,

311

# etc

312

return 'http://127.0.0.1:1/'

313

314

315

class HttpServer_urllib(HttpServer):

316

"""Subclass of HttpServer that gives http+urllib urls.

317

318

This is for use in testing: connections to this server will always go

319

through urllib where possible.

320

"""

321

322

# urls returned by this server should require the urllib client impl

323

_url_protocol = 'http+urllib'

324

325

326

class HttpServer_PyCurl(HttpServer):

327

"""Subclass of HttpServer that gives http+pycurl urls.

328

329

This is for use in testing: connections to this server will always go

330

through pycurl where possible.

331

"""

332

333

# We don't care about checking the pycurl availability as

334

# this server will be required only when pycurl is present

335

336

# urls returned by this server should require the pycurl client impl

337

_url_protocol = 'http+pycurl'

Older »