1
# $Id: http_client.py 271 2004-10-09 10:50:59Z fredrik $
2
# a simple asynchronous http client (based on SimpleAsyncHTTP.py from
3
# "Python Standard Library" by Fredrik Lundh, O'Reilly 2001)
5
# HTTP/1.1 and GZIP support added in January 2003 by Fredrik Lundh.
8
# 2004-08-26 fl unified http callback
9
# 2004-10-09 fl factored out gzip_consumer support
11
# Copyright (c) 2001-2004 by Fredrik Lundh. All rights reserved.
14
from __future__ import absolute_import
17
import socket, string, time, sys
19
import mimetools, urlparse, urllib
22
from gzip_consumer import GzipConsumer
27
# Close connection. Request handlers can raise this exception to
28
# indicate that the connection should be closed.
30
class CloseConnection(Exception):
34
# Redirect connection. Request handlers can raise this exception to
35
# indicate that the a new request should be issued.
37
class Redirect(CloseConnection):
38
def __init__(self, location):
39
self.location = location
42
# Asynchronous HTTP/1.1 client.
44
class async_http(asyncore.dispatcher_with_send):
45
# asynchronous http client
47
user_agent = "http_client.py 1.2 (http://effbot.org/zone)"
50
proxies = urllib.getproxies()
52
def __init__(self, uri, consumer, extra_headers=None):
53
asyncore.dispatcher_with_send.__init__(self)
55
# turn the uri into a valid request
56
scheme, host, path, params, query, fragment = urlparse.urlparse(uri)
61
# get proxy settings, if any
62
proxy = self.proxies.get(scheme)
64
scheme, host, x, x, x, x = urlparse.urlparse(proxy)
66
assert scheme == "http", "only supports HTTP requests (%s)" % scheme
71
path = path + ";" + params
73
path = path + "?" + query
75
path = scheme + "://" + self.host + path
81
host, port = host.split(":", 1)
83
except (TypeError, ValueError):
84
port = 80 # default port
86
self.consumer = consumer
94
self.content_type = None
95
self.content_length = None
96
self.content_encoding = None
97
self.transfer_encoding = None
101
self.chunk_size = None
103
self.timestamp = time.time()
105
self.extra_headers = extra_headers
107
self.create_socket(socket.AF_INET, socket.SOCK_STREAM)
109
self.connect((host, port))
111
self.consumer.http(0, self, sys.exc_info())
113
def handle_connect(self):
114
# connection succeeded
117
"GET %s HTTP/%s" % (self.path, self.http_version),
118
"Host: %s" % self.host,
122
request.append("Accept-Encoding: gzip")
124
if self.extra_headers:
125
request.extend(self.extra_headers)
127
# make sure to include a user agent
128
for header in request:
129
if string.lower(header).startswith("user-agent:"):
132
request.append("User-Agent: %s" % self.user_agent)
134
request = string.join(request, "\r\n") + "\r\n\r\n"
138
self.bytes_out = self.bytes_out + len(request)
140
def handle_expt(self):
141
# connection failed (windows); notify consumer
143
if sys.platform == "win32":
145
self.consumer.http(0, self)
147
def handle_read(self):
148
# handle incoming data
150
data = self.recv(2048)
152
self.data = self.data + data
153
self.bytes_in = self.bytes_in + len(data)
158
# check if we've seen a full header
160
header = self.data.split("\r\n\r\n", 1)
163
header, self.data = header
166
fp = StringIO.StringIO(header)
167
self.status = fp.readline().split(" ", 2)
168
self.header = mimetools.Message(fp)
171
self.content_type = self.header.get("content-type")
173
self.content_length = int(
174
self.header.get("content-length")
176
except (ValueError, TypeError):
177
self.content_length = None
178
self.transfer_encoding = self.header.get("transfer-encoding")
179
self.content_encoding = self.header.get("content-encoding")
181
if self.content_encoding == "gzip":
182
# FIXME: report error if GzipConsumer is not available
183
self.consumer = GzipConsumer(self.consumer)
186
self.consumer.http(1, self)
191
v.location, self.consumer, self.extra_headers
195
except CloseConnection:
199
if self.transfer_encoding == "chunked" and self.chunk_size is None:
201
# strip off leading whitespace
202
if self.data.startswith("\r\n"):
203
self.data = self.data[2:]
205
chunk_size = self.data.split("\r\n", 1)
206
if len(chunk_size) <= 1:
208
chunk_size, self.data = chunk_size
211
self.chunk_size = int(chunk_size, 16)
212
if self.chunk_size <= 0:
215
return self.handle_close()
223
chunk_size = self.chunk_size or len(data)
225
if chunk_size < len(data):
226
self.data = data[chunk_size:]
227
data = data[:chunk_size]
228
self.chunk_size = None
230
self.chunk_size = chunk_size - len(data)
231
if self.chunk_size <= 0:
232
self.chunk_size = None
235
self.consumer.feed(data)
237
if self.content_length:
238
self.content_length -= chunk_size
239
if self.content_length <= 0:
240
return self.handle_close()
242
def handle_close(self):
243
self.consumer.close()
246
def handle_error(self):
247
self.consumer.http(0, self, sys.exc_info())
250
def do_request(uri, consumer, extra_headers=None):
252
return async_http(uri, consumer, extra_headers)
254
if __name__ == "__main__":
255
class dummy_consumer:
256
def feed(self, data):
257
# print "feed", repr(data)
258
print "feed", repr(data[:20]), repr(data[-20:]), len(data)
261
def http(self, ok, connection, **args):
262
print ok, connection, args
263
print "status", connection.status
264
print "header", connection.header
268
url = "http://www.cnn.com/"
269
do_request(url, dummy_consumer())