1
# $Id: http_client.py 271 2004-10-09 10:50:59Z fredrik $
2
# a simple asynchronous http client (based on SimpleAsyncHTTP.py from
3
# "Python Standard Library" by Fredrik Lundh, O'Reilly 2001)
5
# HTTP/1.1 and GZIP support added in January 2003 by Fredrik Lundh.
8
# 2004-08-26 fl unified http callback
9
# 2004-10-09 fl factored out gzip_consumer support
11
# Copyright (c) 2001-2004 by Fredrik Lundh. All rights reserved.
15
import socket, string, time, sys
17
import mimetools, urlparse, urllib
20
from gzip_consumer import GzipConsumer
25
# Close connection. Request handlers can raise this exception to
26
# indicate that the connection should be closed.
28
class CloseConnection(Exception):
32
# Redirect connection. Request handlers can raise this exception to
33
# indicate that the a new request should be issued.
35
class Redirect(CloseConnection):
36
def __init__(self, location):
37
self.location = location
40
# Asynchronous HTTP/1.1 client.
42
class async_http(asyncore.dispatcher_with_send):
43
# asynchronous http client
45
user_agent = "http_client.py 1.2 (http://effbot.org/zone)"
48
proxies = urllib.getproxies()
50
def __init__(self, uri, consumer, extra_headers=None):
51
asyncore.dispatcher_with_send.__init__(self)
53
# turn the uri into a valid request
54
scheme, host, path, params, query, fragment = urlparse.urlparse(uri)
59
# get proxy settings, if any
60
proxy = self.proxies.get(scheme)
62
scheme, host, x, x, x, x = urlparse.urlparse(proxy)
64
assert scheme == "http", "only supports HTTP requests (%s)" % scheme
69
path = path + ";" + params
71
path = path + "?" + query
73
path = scheme + "://" + self.host + path
79
host, port = host.split(":", 1)
81
except (TypeError, ValueError):
82
port = 80 # default port
84
self.consumer = consumer
92
self.content_type = None
93
self.content_length = None
94
self.content_encoding = None
95
self.transfer_encoding = None
99
self.chunk_size = None
101
self.timestamp = time.time()
103
self.extra_headers = extra_headers
105
self.create_socket(socket.AF_INET, socket.SOCK_STREAM)
107
self.connect((host, port))
109
self.consumer.http(0, self, sys.exc_info())
111
def handle_connect(self):
112
# connection succeeded
115
"GET %s HTTP/%s" % (self.path, self.http_version),
116
"Host: %s" % self.host,
120
request.append("Accept-Encoding: gzip")
122
if self.extra_headers:
123
request.extend(self.extra_headers)
125
# make sure to include a user agent
126
for header in request:
127
if string.lower(header).startswith("user-agent:"):
130
request.append("User-Agent: %s" % self.user_agent)
132
request = string.join(request, "\r\n") + "\r\n\r\n"
136
self.bytes_out = self.bytes_out + len(request)
138
def handle_expt(self):
139
# connection failed (windows); notify consumer
141
if sys.platform == "win32":
143
self.consumer.http(0, self)
145
def handle_read(self):
146
# handle incoming data
148
data = self.recv(2048)
150
self.data = self.data + data
151
self.bytes_in = self.bytes_in + len(data)
156
# check if we've seen a full header
158
header = self.data.split("\r\n\r\n", 1)
161
header, self.data = header
164
fp = StringIO.StringIO(header)
165
self.status = fp.readline().split(" ", 2)
166
self.header = mimetools.Message(fp)
169
self.content_type = self.header.get("content-type")
171
self.content_length = int(
172
self.header.get("content-length")
174
except (ValueError, TypeError):
175
self.content_length = None
176
self.transfer_encoding = self.header.get("transfer-encoding")
177
self.content_encoding = self.header.get("content-encoding")
179
if self.content_encoding == "gzip":
180
# FIXME: report error if GzipConsumer is not available
181
self.consumer = GzipConsumer(self.consumer)
184
self.consumer.http(1, self)
189
v.location, self.consumer, self.extra_headers
193
except CloseConnection:
197
if self.transfer_encoding == "chunked" and self.chunk_size is None:
199
# strip off leading whitespace
200
if self.data.startswith("\r\n"):
201
self.data = self.data[2:]
203
chunk_size = self.data.split("\r\n", 1)
204
if len(chunk_size) <= 1:
206
chunk_size, self.data = chunk_size
209
self.chunk_size = int(chunk_size, 16)
210
if self.chunk_size <= 0:
213
return self.handle_close()
221
chunk_size = self.chunk_size or len(data)
223
if chunk_size < len(data):
224
self.data = data[chunk_size:]
225
data = data[:chunk_size]
226
self.chunk_size = None
228
self.chunk_size = chunk_size - len(data)
229
if self.chunk_size <= 0:
230
self.chunk_size = None
233
self.consumer.feed(data)
235
if self.content_length:
236
self.content_length -= chunk_size
237
if self.content_length <= 0:
238
return self.handle_close()
240
def handle_close(self):
241
self.consumer.close()
244
def handle_error(self):
245
self.consumer.http(0, self, sys.exc_info())
248
def do_request(uri, consumer, extra_headers=None):
250
return async_http(uri, consumer, extra_headers)
252
if __name__ == "__main__":
253
class dummy_consumer:
254
def feed(self, data):
255
# print "feed", repr(data)
256
print "feed", repr(data[:20]), repr(data[-20:]), len(data)
259
def http(self, ok, connection, **args):
260
print ok, connection, args
261
print "status", connection.status
262
print "header", connection.header
266
url = "http://www.cnn.com/"
267
do_request(url, dummy_consumer())