Download Install Tutorial Docs FAQ Tools WikiLicense Team IRC Planet Involvement Shop Book

root/tags/cherrypy-3.0.0/cherrypy/wsgiserver.py

Revision 1555 (checked in by fumanchu, 2 years ago)

wsgiserver optimization: use socket.sendall instead of wfile (since we were flushing regularly anyway).

  • Property svn:eol-style set to native
Line 
1 """A high-speed, production ready, thread pooled, generic WSGI server.
2
3 Simplest example on how to use this module directly
4 (without using CherryPy's application machinery):
5
6     from cherrypy import wsgiserver
7     
8     def my_crazy_app(environ, start_response):
9         status = '200 OK'
10         response_headers = [('Content-type','text/plain')]
11         start_response(status, response_headers)
12         return ['Hello world!\n']
13     
14     # Here we set our application to the script_name '/'
15     wsgi_apps = [('/', my_crazy_app)]
16     
17     server = wsgiserver.CherryPyWSGIServer(('localhost', 8070), wsgi_apps,
18                                            server_name='localhost')
19     
20     # Want SSL support? Just set these attributes
21     # server.ssl_certificate = <filename>
22     # server.ssl_private_key = <filename>
23     
24     if __name__ == '__main__':
25         server.start()
26
27 This won't call the CherryPy engine (application side) at all, only the
28 WSGI server, which is independant from the rest of CherryPy. Don't
29 let the name "CherryPyWSGIServer" throw you; the name merely reflects
30 its origin, not it's coupling.
31
32 The CherryPy WSGI server can serve as many WSGI application
33 as you want in one instance:
34
35     wsgi_apps = [('/', my_crazy_app), (/blog', my_blog_app)]
36
37 """
38
39
40 import base64
41 import Queue
42 import os
43 import re
44 quoted_slash = re.compile("(?i)%2F")
45 import rfc822
46 import socket
47 try:
48     import cStringIO as StringIO
49 except ImportError:
50     import StringIO
51 import sys
52 import threading
53 import time
54 import traceback
55 from urllib import unquote
56 from urlparse import urlparse
57
58 try:
59     from OpenSSL import SSL
60     from OpenSSL import crypto
61 except ImportError:
62     SSL = None
63
64 import errno
65 socket_errors_to_ignore = []
66 # Not all of these names will be defined for every platform.
67 for _ in ("EPIPE", "ETIMEDOUT", "ECONNREFUSED", "ECONNRESET",
68           "EHOSTDOWN", "EHOSTUNREACH",
69           "WSAECONNABORTED", "WSAECONNREFUSED", "WSAECONNRESET",
70           "WSAENETRESET", "WSAETIMEDOUT"):
71     if _ in dir(errno):
72         socket_errors_to_ignore.append(getattr(errno, _))
73 # de-dupe the list
74 socket_errors_to_ignore = dict.fromkeys(socket_errors_to_ignore).keys()
75 socket_errors_to_ignore.append("timed out")
76
77 comma_separated_headers = ['ACCEPT', 'ACCEPT-CHARSET', 'ACCEPT-ENCODING',
78     'ACCEPT-LANGUAGE', 'ACCEPT-RANGES', 'ALLOW', 'CACHE-CONTROL',
79     'CONNECTION', 'CONTENT-ENCODING', 'CONTENT-LANGUAGE', 'EXPECT',
80     'IF-MATCH', 'IF-NONE-MATCH', 'PRAGMA', 'PROXY-AUTHENTICATE', 'TE',
81     'TRAILER', 'TRANSFER-ENCODING', 'UPGRADE', 'VARY', 'VIA', 'WARNING',
82     'WWW-AUTHENTICATE']
83
84 class HTTPRequest(object):
85     """An HTTP Request (and response).
86     
87     A single HTTP connection may consist of multiple request/response pairs.
88     
89     connection: the HTTP Connection object which spawned this request.
90     rfile: the 'read' fileobject from the connection's socket
91     ready: when True, the request has been parsed and is ready to begin
92         generating the response. When False, signals the calling Connection
93         that the response should not be generated and the connection should
94         close.
95     close_connection: signals the calling Connection that the request
96         should close. This does not imply an error! The client and/or
97         server may each request that the connection be closed.
98     chunked_write: if True, output will be encoded with the "chunked"
99         transfer-coding. This value is set automatically inside
100         send_headers.
101     """
102    
103     def __init__(self, connection):
104         self.connection = connection
105         self.rfile = self.connection.rfile
106         self.sendall = self.connection.sendall
107         self.environ = connection.environ.copy()
108        
109         self.ready = False
110         self.started_response = False
111         self.status = ""
112         self.outheaders = []
113         self.sent_headers = False
114         self.close_connection = False
115         self.chunked_write = False
116    
117     def parse_request(self):
118         """Parse the next HTTP request start-line and message-headers."""
119         # HTTP/1.1 connections are persistent by default. If a client
120         # requests a page, then idles (leaves the connection open),
121         # then rfile.readline() will raise socket.error("timed out").
122         # Note that it does this based on the value given to settimeout(),
123         # and doesn't need the client to request or acknowledge the close
124         # (although your TCP stack might suffer for it: cf Apache's history
125         # with FIN_WAIT_2).
126         request_line = self.rfile.readline()
127         if not request_line:
128             # Force self.ready = False so the connection will close.
129             self.ready = False
130             return
131        
132         if request_line == "\r\n":
133             # RFC 2616 sec 4.1: "...if the server is reading the protocol
134             # stream at the beginning of a message and receives a CRLF
135             # first, it should ignore the CRLF."
136             # But only ignore one leading line! else we enable a DoS.
137             request_line = self.rfile.readline()
138             if not request_line:
139                 self.ready = False
140                 return
141        
142         server = self.connection.server
143         self.environ["SERVER_SOFTWARE"] = "%s WSGI Server" % server.version
144        
145         method, path, req_protocol = request_line.strip().split(" ", 2)
146         self.environ["REQUEST_METHOD"] = method
147        
148         # path may be an abs_path (including "http://host.domain.tld");
149         scheme, location, path, params, qs, frag = urlparse(path)
150        
151         if frag:
152             self.simple_response("400 Bad Request",
153                                  "Illegal #fragment in Request-URI.")
154             return
155        
156         if scheme:
157             self.environ["wsgi.url_scheme"] = scheme
158         if params:
159             path = path + ";" + params
160        
161         # Unquote the path+params (e.g. "/this%20path" -> "this path").
162         # http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2
163         #
164         # But note that "...a URI must be separated into its components
165         # before the escaped characters within those components can be
166         # safely decoded." http://www.ietf.org/rfc/rfc2396.txt, sec 2.4.2
167         atoms = [unquote(x) for x in quoted_slash.split(path)]
168         path = "%2F".join(atoms)
169        
170         if path == "*":
171             # This means, of course, that the last wsgi_app (shortest path)
172             # will always handle a URI of "*".
173             self.environ["SCRIPT_NAME"] = ""
174             self.environ["PATH_INFO"] = "*"
175             self.wsgi_app = server.mount_points[-1][1]
176         else:
177             for mount_point, wsgi_app in server.mount_points:
178                 # The mount_points list should be sorted by length, descending.
179                 if path.startswith(mount_point + "/") or path == mount_point:
180                     self.environ["SCRIPT_NAME"] = mount_point
181                     self.environ["PATH_INFO"] = path[len(mount_point):]
182                     self.wsgi_app = wsgi_app
183                     break
184             else:
185                 self.simple_response("404 Not Found")
186                 return
187        
188         # Note that, like wsgiref and most other WSGI servers,
189         # we unquote the path but not the query string.
190         self.environ["QUERY_STRING"] = qs
191        
192         # Compare request and server HTTP protocol versions, in case our
193         # server does not support the requested protocol. Limit our output
194         # to min(req, server). We want the following output:
195         #     request    server     actual written   supported response
196         #     protocol   protocol  response protocol    feature set
197         # a     1.0        1.0           1.0                1.0
198         # b     1.0        1.1           1.1                1.0
199         # c     1.1        1.0           1.0                1.0
200         # d     1.1        1.1           1.1                1.1
201         # Notice that, in (b), the response will be "HTTP/1.1" even though
202         # the client only understands 1.0. RFC 2616 10.5.6 says we should
203         # only return 505 if the _major_ version is different.
204         rp = int(req_protocol[5]), int(req_protocol[7])
205         sp = int(server.protocol[5]), int(server.protocol[7])
206         if sp[0] != rp[0]:
207             self.simple_response("505 HTTP Version Not Supported")
208             return
209         # Bah. "SERVER_PROTOCOL" is actually the REQUEST protocol.
210         self.environ["SERVER_PROTOCOL"] = req_protocol
211         # set a non-standard environ entry so the WSGI app can know what
212         # the *real* server protocol is (and what features to support).
213         # See http://www.faqs.org/rfcs/rfc2145.html.
214         self.environ["ACTUAL_SERVER_PROTOCOL"] = server.protocol
215         self.response_protocol = "HTTP/%s.%s" % min(rp, sp)
216        
217         # If the Request-URI was an absoluteURI, use its location atom.
218         if location:
219             self.environ["SERVER_NAME"] = location
220        
221         # then all the http headers
222         headers = rfc822.Message(self.rfile, seekable=0)
223         self.environ.update(self.parse_headers(headers))
224        
225         creds = headers.getheader("Authorization", "").split(" ", 1)
226         self.environ["AUTH_TYPE"] = creds[0]
227         if creds[0].lower() == 'basic':
228             user, pw = base64.decodestring(creds[1]).split(":", 1)
229             self.environ["REMOTE_USER"] = user
230        
231         # Persistent connection support
232         if self.response_protocol == "HTTP/1.1":
233             if headers.getheader("Connection", "") == "close":
234                 self.close_connection = True
235                 self.outheaders.append(("Connection", "close"))
236         else:
237             # HTTP/1.0
238             if headers.getheader("Connection", "") == "Keep-Alive":
239                 if self.close_connection == False:
240                     self.outheaders.append(("Connection", "Keep-Alive"))
241             else:
242                 self.close_connection = True
243        
244         # Transfer-Encoding support
245         te = headers.getheader("Transfer-Encoding", "")
246         te = [x.strip() for x in te.split(",") if x.strip()]
247         if te:
248             while te:
249                 enc = te.pop()
250                 if enc.lower() == "chunked":
251                     if not self.decode_chunked():
252                         return
253                 else:
254                     self.simple_response("501 Unimplemented")
255                     self.close_connection = True
256                     return
257         else:
258             cl = headers.getheader("Content-length")
259             if method in ("POST", "PUT") and cl is None:
260                 # No Content-Length header supplied. This will hang
261                 # cgi.FieldStorage, since it cannot determine when to
262                 # stop reading from the socket.
263                 # See http://www.cherrypy.org/ticket/493.
264                 self.simple_response("411 Length Required")
265                 return
266        
267         # From PEP 333:
268         # "Servers and gateways that implement HTTP 1.1 must provide
269         # transparent support for HTTP 1.1's "expect/continue" mechanism.
270         # This may be done in any of several ways:
271         #   1. Respond to requests containing an Expect: 100-continue request
272         #      with an immediate "100 Continue" response, and proceed normally.
273         #   2. Proceed with the request normally, but provide the application
274         #      with a wsgi.input stream that will send the "100 Continue"
275         #      response if/when the application first attempts to read from
276         #      the input stream. The read request must then remain blocked
277         #      until the client responds.
278         #   3. Wait until the client decides that the server does not support
279         #      expect/continue, and sends the request body on its own.
280         #      (This is suboptimal, and is not recommended.)
281         #
282         # We used to do 3, but are now doing 1. Maybe we'll do 2 someday,
283         # but it seems like it would be a big slowdown for such a rare case.
284         if headers.getheader("Expect", "") == "100-continue":
285             self.simple_response(100)
286        
287         self.ready = True
288    
289     def parse_headers(self, headers):
290         """Parse the given HTTP request message-headers."""
291         environ = {}
292         ct = headers.dict.get("content-type")
293         if ct:
294             environ["CONTENT_TYPE"] = ct
295         cl = headers.dict.get("content-length")
296         if cl:
297             environ["CONTENT_LENGTH"] = cl
298        
299         for line in headers.headers:
300             if line[:1].isspace():
301                 v = line.strip()
302             else:
303                 k, v = line.split(":", 1)
304                 k, v = k.strip().upper(), v.strip()
305                 envname = "HTTP_" + k.replace("-", "_")
306            
307             if k in comma_separated_headers:
308                 existing = environ.get(envname)
309                 if existing:
310                     v = ", ".join((existing, v))
311             environ[envname] = v
312        
313         return environ
314    
315     def decode_chunked(self):
316         """Decode the 'chunked' transfer coding."""
317         cl = 0
318         data = StringIO.StringIO()
319         while True:
320             line = self.rfile.readline().strip().split(";", 1)
321             chunk_size = int(line.pop(0), 16)
322             if chunk_size <= 0:
323                 break
324 ##            if line: chunk_extension = line[0]
325             cl += chunk_size
326             data.write(self.rfile.read(chunk_size))
327             crlf = self.rfile.read(2)
328             if crlf != "\r\n":
329                 self.simple_response("400 Bad Request",
330                                      "Bad chunked transfer coding "
331                                      "(expected '\\r\\n', got %r)" % crlf)
332                 return
333        
334         # Grab any trailer headers
335         headers = rfc822.Message(self.rfile, seekable=0)
336         self.environ.update(self.parse_headers(headers))
337        
338         data.seek(0)
339         self.environ["wsgi.input"] = data
340         self.environ["CONTENT_LENGTH"] = str(cl) or ""
341         return True
342    
343     def respond(self):
344         """Call the appropriate WSGI app and write its iterable output."""
345         response = self.wsgi_app(self.environ, self.start_response)
346         try:
347             for chunk in response:
348                 self.write(chunk)
349         finally:
350             if hasattr(response, "close"):
351                 response.close()
352         if (self.ready and not self.sent_headers
353                 and not self.connection.server.interrupt):
354             self.sent_headers = True
355             self.send_headers()
356         if self.chunked_write:
357             self.sendall("0\r\n\r\n")
358    
359     def simple_response(self, status, msg=""):
360         """Write a simple response back to the client."""
361         status = str(status)
362         buf = ["%s %s\r\n" % (self.connection.server.protocol, status),
363                "Content-Length: %s\r\n" % len(msg)]
364        
365         if status[:3] == "413" and self.response_protocol == 'HTTP/1.1':
366             # Request Entity Too Large
367             self.close_connection = True
368             buf.append("Connection: close\r\n")
369        
370         buf.append("\r\n")
371         if msg:
372             buf.append(msg)
373         self.sendall("".join(buf))
374    
375     def start_response(self, status, headers, exc_info = None):
376         """WSGI callable to begin the HTTP response."""
377         if self.started_response:
378             if not exc_info:
379                 assert False, "Already started response"
380             else:
381                 try:
382                     raise exc_info[0], exc_info[1], exc_info[2]
383                 finally:
384                     exc_info = None
385         self.started_response = True
386         self.status = status
387         self.outheaders.extend(headers)
388         return self.write
389    
390     def write(self, chunk):
391         """WSGI callable to write unbuffered data to the client.
392         
393         This method is also used internally by start_response (to write
394         data from the iterable returned by the WSGI application).
395         """
396         if not self.sent_headers:
397             self.sent_headers = True
398             self.send_headers()
399         if self.chunked_write:
400             buf = [hex(len(chunk))[2:],
401                    "\r\n", chunk, "\r\n"]
402             self.sendall("".join(buf))
403         else:
404             self.sendall(chunk)
405    
406     def send_headers(self):
407         """Assert, process, and send the HTTP response message-headers."""
408         hkeys = [key.lower() for (key, value) in self.outheaders]
409         status = int(self.status[:3])
410        
411         if self.response_protocol == 'HTTP/1.1':
412             if status == 413:
413                 # Request Entity Too Large. Close conn to avoid garbage.
414                 self.close_connection = True
415             elif "content-length" not in hkeys:
416                 # "All 1xx (informational), 204 (no content),
417                 # and 304 (not modified) responses MUST NOT
418                 # include a message-body." So no point chunking.
419                 if status < 200 or status in (204, 205, 304):
420                     pass
421                 else:
422                     # Use the chunked transfer-coding
423                     self.chunked_write = True
424                     self.outheaders.append(("Transfer-Encoding", "chunked"))
425        
426         if self.close_connection and "connection" not in hkeys:
427             self.outheaders.append(("Connection", "close"))
428        
429         if "date" not in hkeys:
430             self.outheaders.append(("Date", rfc822.formatdate()))
431        
432         server = self.connection.server
433        
434         if "server" not in hkeys:
435             self.outheaders.append(("Server", server.version))
436        
437         buf = [server.protocol, " ", self.status, "\r\n"]
438         try:
439             for k, v in self.outheaders:
440                 buf.append(k + ": " + v + "\r\n")
441         except TypeError:
442             if not isinstance(k, str):
443                 raise TypeError("WSGI response header key %r is not a string.")
444             if not isinstance(v, str):
445                 raise TypeError("WSGI response header value %r is not a string.")
446             else:
447                 raise
448         buf.append("\r\n")
449         self.sendall("".join(buf))
450
451
452 def _ssl_wrap_method(method, is_reader=False):
453     """Wrap the given method with SSL error-trapping.
454     
455     is_reader: if False (the default), EOF errors will be raised.
456         If True, EOF errors will return "" (to emulate normal sockets).
457     """
458     def ssl_method_wrapper(self, *args, **kwargs):
459 ##        print (id(self), method, args, kwargs)
460         start = time.time()
461         while True:
462             try:
463                 return method(self, *args, **kwargs)
464             except (SSL.WantReadError, SSL.WantWriteError):
465                 # Sleep and try again. This is dangerous, because it means
466                 # the rest of the stack has no way of differentiating
467                 # between a "new handshake" error and "client dropped".
468                 # Note this isn't an endless loop: there's a timeout below.
469                 time.sleep(self.ssl_retry)
470             except SSL.SysCallError, e:
471                 if is_reader and e.args == (-1, 'Unexpected EOF'):
472                     return ""
473                
474                 errno = e.args[0]
475                 if is_reader and errno in socket_errors_to_ignore:
476                     return ""
477                 raise socket.error(errno)
478             except SSL.Error, e:
479                 if is_reader and e.args == (-1, 'Unexpected EOF'):
480                     return ""
481                 if is_reader and e.args[0][0][2