~bzr-pqm/bzr/bzr.dev

791 by Martin Pool
- import effbot.org http client
1
# $Id: http_client.py 271 2004-10-09 10:50:59Z fredrik $
2
# a simple asynchronous http client (based on SimpleAsyncHTTP.py from
3
# "Python Standard Library" by Fredrik Lundh, O'Reilly 2001)
4
#
5
# HTTP/1.1 and GZIP support added in January 2003 by Fredrik Lundh.
6
#
7
# changes:
8
# 2004-08-26 fl   unified http callback
9
# 2004-10-09 fl   factored out gzip_consumer support
10
#
11
# Copyright (c) 2001-2004 by Fredrik Lundh.  All rights reserved.
12
#
13
14
import asyncore
15
import socket, string, time, sys
16
import StringIO
17
import mimetools, urlparse, urllib
18
19
try:
20
    from gzip_consumer import GzipConsumer
21
except ImportError:
22
    pass
23
24
##
25
# Close connection.   Request handlers can raise this exception to
26
# indicate that the connection should be closed.
27
28
class CloseConnection(Exception):
29
    pass
30
31
##
32
# Redirect connection.  Request handlers can raise this exception to
33
# indicate that the a new request should be issued.
34
35
class Redirect(CloseConnection):
36
    def __init__(self, location):
37
        self.location = location
38
39
##
40
# Asynchronous HTTP/1.1 client.
41
42
class async_http(asyncore.dispatcher_with_send):
43
    # asynchronous http client
44
45
    user_agent = "http_client.py 1.2 (http://effbot.org/zone)"
46
    http_version = "1.1"
47
48
    proxies = urllib.getproxies()
49
50
    def __init__(self, uri, consumer, extra_headers=None):
51
        asyncore.dispatcher_with_send.__init__(self)
52
53
        # turn the uri into a valid request
54
        scheme, host, path, params, query, fragment = urlparse.urlparse(uri)
55
56
        # use origin host
57
        self.host = host
58
59
        # get proxy settings, if any
60
        proxy = self.proxies.get(scheme)
61
        if proxy:
62
            scheme, host, x, x, x, x = urlparse.urlparse(proxy)
63
64
        assert scheme == "http", "only supports HTTP requests (%s)" % scheme
65
66
        if not path:
67
            path = "/"
68
        if params:
69
            path = path + ";" + params
70
        if query:
71
            path = path + "?" + query
72
        if proxy:
73
            path = scheme + "://" + self.host + path
74
75
        self.path = path
76
77
        # get port number
78
        try:
79
            host, port = host.split(":", 1)
80
            port = int(port)
81
        except (TypeError, ValueError):
82
            port = 80 # default port
83
84
        self.consumer = consumer
85
86
        self.status = None
87
        self.header = None
88
89
        self.bytes_in = 0
90
        self.bytes_out = 0
91
92
        self.content_type = None
93
        self.content_length = None
94
        self.content_encoding = None
95
        self.transfer_encoding = None
96
97
        self.data = ""
98
99
        self.chunk_size = None
100
101
        self.timestamp = time.time()
102
103
        self.extra_headers = extra_headers
104
105
        self.create_socket(socket.AF_INET, socket.SOCK_STREAM)
106
        try:
107
            self.connect((host, port))
108
        except socket.error:
109
            self.consumer.http(0, self, sys.exc_info())
110
111
    def handle_connect(self):
112
        # connection succeeded
113
114
        request = [
115
            "GET %s HTTP/%s" % (self.path, self.http_version),
116
            "Host: %s" % self.host,
117
            ]
118
119
        if GzipConsumer:
120
            request.append("Accept-Encoding: gzip")
121
122
        if self.extra_headers:
123
            request.extend(self.extra_headers)
124
125
        # make sure to include a user agent
126
        for header in request:
127
            if string.lower(header).startswith("user-agent:"):
128
                break
129
        else:
130
            request.append("User-Agent: %s" % self.user_agent)
131
132
        request = string.join(request, "\r\n") + "\r\n\r\n"
133
134
        self.send(request)
135
136
        self.bytes_out = self.bytes_out + len(request)
137
138
    def handle_expt(self):
139
        # connection failed (windows); notify consumer
140
141
        if sys.platform == "win32":
142
            self.close()
143
            self.consumer.http(0, self)
144
145
    def handle_read(self):
146
        # handle incoming data
147
148
        data = self.recv(2048)
149
150
        self.data = self.data + data
151
        self.bytes_in = self.bytes_in + len(data)
152
153
        while self.data:
154
155
            if not self.header:
156
                # check if we've seen a full header
157
158
                header = self.data.split("\r\n\r\n", 1)
159
                if len(header) <= 1:
160
                    return
161
                header, self.data = header
162
163
                # parse header
164
                fp = StringIO.StringIO(header)
165
                self.status = fp.readline().split(" ", 2)
166
                self.header = mimetools.Message(fp)
167
168
                # get http headers
169
                self.content_type = self.header.get("content-type")
170
                try:
171
                    self.content_length = int(
172
                        self.header.get("content-length")
173
                        )
174
                except (ValueError, TypeError):
175
                    self.content_length = None
176
                self.transfer_encoding = self.header.get("transfer-encoding")
177
                self.content_encoding = self.header.get("content-encoding")
178
179
                if self.content_encoding == "gzip":
180
                    # FIXME: report error if GzipConsumer is not available
181
                    self.consumer = GzipConsumer(self.consumer)
182
183
                try:
184
                    self.consumer.http(1, self)
185
                except Redirect, v:
186
                    # redirect
187
                    if v.location:
188
                        do_request(
189
                            v.location, self.consumer, self.extra_headers
190
                            )
191
                    self.close()
192
                    return
193
                except CloseConnection:
194
                    self.close()
195
                    return
196
197
            if self.transfer_encoding == "chunked" and self.chunk_size is None:
198
199
                # strip off leading whitespace
200
                if self.data.startswith("\r\n"):
201
                    self.data = self.data[2:]
202
203
                chunk_size = self.data.split("\r\n", 1)
204
                if len(chunk_size) <= 1:
205
                    return
206
                chunk_size, self.data = chunk_size
207
208
                try:
209
                    self.chunk_size = int(chunk_size, 16)
210
                    if self.chunk_size <= 0:
211
                        raise ValueError
212
                except ValueError:
213
                    return self.handle_close()
214
215
            if not self.data:
216
                return
217
218
            data = self.data
219
            self.data = ""
220
221
            chunk_size = self.chunk_size or len(data)
222
223
            if chunk_size < len(data):
224
                self.data = data[chunk_size:]
225
                data = data[:chunk_size]
226
                self.chunk_size = None
227
            else:
228
                self.chunk_size = chunk_size - len(data)
229
                if self.chunk_size <= 0:
230
                    self.chunk_size = None
231
232
            if data:
233
                self.consumer.feed(data)
234
235
            if self.content_length:
236
                self.content_length -= chunk_size
237
                if self.content_length <= 0:
238
                    return self.handle_close()
239
240
    def handle_close(self):
241
        self.consumer.close()
242
        self.close()
243
244
    def handle_error(self):
245
        self.consumer.http(0, self, sys.exc_info())
246
        self.close()
247
248
def do_request(uri, consumer, extra_headers=None):
249
250
    return async_http(uri, consumer, extra_headers)
251
252
if __name__ == "__main__":
253
    class dummy_consumer:
254
        def feed(self, data):
255
            # print "feed", repr(data)
256
            print "feed", repr(data[:20]), repr(data[-20:]), len(data)
257
        def close(self):
258
            print "close"
259
        def http(self, ok, connection, **args):
260
            print ok, connection, args
261
            print "status", connection.status
262
            print "header", connection.header
263
    try:
264
        url = sys.argv[1]
265
    except IndexError:
266
        url = "http://www.cnn.com/"
267
    do_request(url, dummy_consumer())
268
    asyncore.loop()