Download Install Tutorial Docs FAQ Tools WikiLicense Team IRC Planet Involvement Shop Book

root/branches/cherrypy-3.0.x/cherrypy/wsgiserver/__init__.py

Revision 1843 (checked in by dowski, 11 months ago)

Updated version numbers in preparation for 3.0.3 release.

  • Property svn:eol-style set to native
Line 
1 """A high-speed, production ready, thread pooled, generic WSGI server.
2
3 Simplest example on how to use this module directly
4 (without using CherryPy's application machinery):
5
6     from cherrypy import wsgiserver
7     
8     def my_crazy_app(environ, start_response):
9         status = '200 OK'
10         response_headers = [('Content-type','text/plain')]
11         start_response(status, response_headers)
12         return ['Hello world!\n']
13     
14     # Here we set our application to the script_name '/'
15     wsgi_apps = [('/', my_crazy_app)]
16     
17     server = wsgiserver.CherryPyWSGIServer(('localhost', 8070), wsgi_apps,
18                                            server_name='localhost')
19     
20     # Want SSL support? Just set these attributes
21     # server.ssl_certificate = <filename>
22     # server.ssl_private_key = <filename>
23     
24     if __name__ == '__main__':
25         try:
26             server.start()
27         except KeyboardInterrupt:
28             server.stop()
29
30 This won't call the CherryPy engine (application side) at all, only the
31 WSGI server, which is independant from the rest of CherryPy. Don't
32 let the name "CherryPyWSGIServer" throw you; the name merely reflects
33 its origin, not it's coupling.
34
35 The CherryPy WSGI server can serve as many WSGI applications
36 as you want in one instance:
37
38     wsgi_apps = [('/', my_crazy_app), ('/blog', my_blog_app)]
39
40 """
41
42
43 import base64
44 import Queue
45 import os
46 import re
47 quoted_slash = re.compile("(?i)%2F")
48 import rfc822
49 import socket
50 try:
51     import cStringIO as StringIO
52 except ImportError:
53     import StringIO
54 import sys
55 import threading
56 import time
57 import traceback
58 from urllib import unquote
59 from urlparse import urlparse
60
61 try:
62     from OpenSSL import SSL
63     from OpenSSL import crypto
64 except ImportError:
65     SSL = None
66
67 import errno
68 socket_errors_to_ignore = []
69 # Not all of these names will be defined for every platform.
70 for _ in ("EPIPE", "ETIMEDOUT", "ECONNREFUSED", "ECONNRESET",
71           "EHOSTDOWN", "EHOSTUNREACH",
72           "WSAECONNABORTED", "WSAECONNREFUSED", "WSAECONNRESET",
73           "WSAENETRESET", "WSAETIMEDOUT"):
74     if _ in dir(errno):
75         socket_errors_to_ignore.append(getattr(errno, _))
76 # de-dupe the list
77 socket_errors_to_ignore = dict.fromkeys(socket_errors_to_ignore).keys()
78 socket_errors_to_ignore.append("timed out")
79
80 comma_separated_headers = ['ACCEPT', 'ACCEPT-CHARSET', 'ACCEPT-ENCODING',
81     'ACCEPT-LANGUAGE', 'ACCEPT-RANGES', 'ALLOW', 'CACHE-CONTROL',
82     'CONNECTION', 'CONTENT-ENCODING', 'CONTENT-LANGUAGE', 'EXPECT',
83     'IF-MATCH', 'IF-NONE-MATCH', 'PRAGMA', 'PROXY-AUTHENTICATE', 'TE',
84     'TRAILER', 'TRANSFER-ENCODING', 'UPGRADE', 'VARY', 'VIA', 'WARNING',
85     'WWW-AUTHENTICATE']
86
87 class HTTPRequest(object):
88     """An HTTP Request (and response).
89     
90     A single HTTP connection may consist of multiple request/response pairs.
91     
92     connection: the HTTP Connection object which spawned this request.
93     rfile: the 'read' fileobject from the connection's socket
94     ready: when True, the request has been parsed and is ready to begin
95         generating the response. When False, signals the calling Connection
96         that the response should not be generated and the connection should
97         close.
98     close_connection: signals the calling Connection that the request
99         should close. This does not imply an error! The client and/or
100         server may each request that the connection be closed.
101     chunked_write: if True, output will be encoded with the "chunked"
102         transfer-coding. This value is set automatically inside
103         send_headers.
104     """
105    
106     def __init__(self, connection):
107         self.connection = connection
108         self.rfile = self.connection.rfile
109         self.sendall = self.connection.sendall
110         self.environ = connection.environ.copy()
111        
112         self.ready = False
113         self.started_response = False
114         self.status = ""
115         self.outheaders = []
116         self.sent_headers = False
117         self.close_connection = False
118         self.chunked_write = False
119    
120     def parse_request(self):
121         """Parse the next HTTP request start-line and message-headers."""
122         # HTTP/1.1 connections are persistent by default. If a client
123         # requests a page, then idles (leaves the connection open),
124         # then rfile.readline() will raise socket.error("timed out").
125         # Note that it does this based on the value given to settimeout(),
126         # and doesn't need the client to request or acknowledge the close
127         # (although your TCP stack might suffer for it: cf Apache's history
128         # with FIN_WAIT_2).
129         request_line = self.rfile.readline()
130         if not request_line:
131             # Force self.ready = False so the connection will close.
132             self.ready = False
133             return
134        
135         if request_line == "\r\n":
136             # RFC 2616 sec 4.1: "...if the server is reading the protocol
137             # stream at the beginning of a message and receives a CRLF
138             # first, it should ignore the CRLF."
139             # But only ignore one leading line! else we enable a DoS.
140             request_line = self.rfile.readline()
141             if not request_line:
142                 self.ready = False
143                 return
144        
145         server = self.connection.server
146         environ = self.environ
147         environ["SERVER_SOFTWARE"] = "%s WSGI Server" % server.version
148        
149         method, path, req_protocol = request_line.strip().split(" ", 2)
150         environ["REQUEST_METHOD"] = method
151        
152         # path may be an abs_path (including "http://host.domain.tld");
153         scheme, location, path, params, qs, frag = urlparse(path)
154        
155         if frag:
156             self.simple_response("400 Bad Request",
157                                  "Illegal #fragment in Request-URI.")
158             return
159        
160         if scheme:
161             environ["wsgi.url_scheme"] = scheme
162         if params:
163             path = path + ";" + params
164        
165         # Unquote the path+params (e.g. "/this%20path" -> "this path").
166         # http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2
167         #
168         # But note that "...a URI must be separated into its components
169         # before the escaped characters within those components can be
170         # safely decoded." http://www.ietf.org/rfc/rfc2396.txt, sec 2.4.2
171         atoms = [unquote(x) for x in quoted_slash.split(path)]
172         path = "%2F".join(atoms)
173        
174         if path == "*":
175             # This means, of course, that the last wsgi_app (shortest path)
176             # will always handle a URI of "*".
177             environ["SCRIPT_NAME"] = ""
178             environ["PATH_INFO"] = "*"
179             self.wsgi_app = server.mount_points[-1][1]
180         else:
181             for mount_point, wsgi_app in server.mount_points:
182                 # The mount_points list should be sorted by length, descending.
183                 if path.startswith(mount_point + "/") or path == mount_point:
184                     environ["SCRIPT_NAME"] = mount_point
185                     environ["PATH_INFO"] = path[len(mount_point):]
186                     self.wsgi_app = wsgi_app
187                     break
188             else:
189                 self.simple_response("404 Not Found")
190                 return
191        
192         # Note that, like wsgiref and most other WSGI servers,
193         # we unquote the path but not the query string.
194         environ["QUERY_STRING"] = qs
195        
196         # Compare request and server HTTP protocol versions, in case our
197         # server does not support the requested protocol. Limit our output
198         # to min(req, server). We want the following output:
199         #     request    server     actual written   supported response
200         #     protocol   protocol  response protocol    feature set
201         # a     1.0        1.0           1.0                1.0
202         # b     1.0        1.1           1.1                1.0
203         # c     1.1        1.0           1.0                1.0
204         # d     1.1        1.1           1.1                1.1
205         # Notice that, in (b), the response will be "HTTP/1.1" even though
206         # the client only understands 1.0. RFC 2616 10.5.6 says we should
207         # only return 505 if the _major_ version is different.
208         rp = int(req_protocol[5]), int(req_protocol[7])
209         sp = int(server.protocol[5]), int(server.protocol[7])
210         if sp[0] != rp[0]:
211             self.simple_response("505 HTTP Version Not Supported")
212             return
213         # Bah. "SERVER_PROTOCOL" is actually the REQUEST protocol.
214         environ["SERVER_PROTOCOL"] = req_protocol
215         # set a non-standard environ entry so the WSGI app can know what
216         # the *real* server protocol is (and what features to support).
217         # See http://www.faqs.org/rfcs/rfc2145.html.
218         environ["ACTUAL_SERVER_PROTOCOL"] = server.protocol
219         self.response_protocol = "HTTP/%s.%s" % min(rp, sp)
220        
221         # If the Request-URI was an absoluteURI, use its location atom.
222         if location:
223             environ["SERVER_NAME"] = location
224        
225         # then all the http headers
226         try:
227             self.read_headers()
228         except ValueError, ex:
229             self.simple_response("400 Bad Request", repr(ex.args))
230             return
231        
232         creds = environ.get("HTTP_AUTHORIZATION", "").split(" ", 1)
233         environ["AUTH_TYPE"] = creds[0]
234         if creds[0].lower() == 'basic':
235             user, pw = base64.decodestring(creds[1]).split(":", 1)
236             environ["REMOTE_USER"] = user
237        
238         # Persistent connection support
239         if self.response_protocol == "HTTP/1.1":
240             if environ.get("HTTP_CONNECTION", "") == "close":
241                 self.close_connection = True
242         else:
243             # HTTP/1.0
244             if environ.get("HTTP_CONNECTION", "") != "Keep-Alive":
245                 self.close_connection = True
246        
247         # Transfer-Encoding support
248         te = None
249         if self.response_protocol == "HTTP/1.1":
250             te = environ.get("HTTP_TRANSFER_ENCODING")
251             if te:
252                 te = [x.strip().lower() for x in te.split(",") if x.strip()]
253        
254         read_chunked = False
255        
256         if te:
257             for enc in te:
258                 if enc == "chunked":
259                     read_chunked = True
260                 else:
261                     # Note that, even if we see "chunked", we must reject
262                     # if there is an extension we don't recognize.
263                     self.simple_response("501 Unimplemented")
264                     self.close_connection = True
265                     return
266        
267         if read_chunked:
268             if not self.decode_chunked():
269                 return
270        
271         # From PEP 333:
272         # "Servers and gateways that implement HTTP 1.1 must provide
273         # transparent support for HTTP 1.1's "expect/continue" mechanism.
274         # This may be done in any of several ways:
275         #   1. Respond to requests containing an Expect: 100-continue request
276         #      with an immediate "100 Continue" response, and proceed normally.
277         #   2. Proceed with the request normally, but provide the application
278         #      with a wsgi.input stream that will send the "100 Continue"
279         #      response if/when the application first attempts to read from
280         #      the input stream. The read request must then remain blocked
281         #      until the client responds.
282         #   3. Wait until the client decides that the server does not support
283         #      expect/continue, and sends the request body on its own.
284         #      (This is suboptimal, and is not recommended.)
285         #
286         # We used to do 3, but are now doing 1. Maybe we'll do 2 someday,
287         # but it seems like it would be a big slowdown for such a rare case.
288         if environ.get("HTTP_EXPECT", "") == "100-continue":
289             self.simple_response(100)
290        
291         self.ready = True
292    
293     def read_headers(self):
294         """Read header lines from the incoming stream."""
295         environ = self.environ
296        
297         while True:
298             line = self.rfile.readline()
299             if not line:
300                 # No more data--illegal end of headers
301                 raise ValueError("Illegal end of headers.")
302            
303             if line == '\r\n':
304                 # Normal end of headers
305                 break
306            
307             if line[0] in ' \t':
308                 # It's a continuation line.
309                 v = line.strip()
310             else:
311                 k, v = line.split(":", 1)
312                 k, v = k.strip().upper(), v.strip()
313                 envname = "HTTP_" + k.replace("-", "_")
314            
315             if k in comma_separated_headers:
316                 existing = environ.get(envname)
317                 if existing:
318                     v = ", ".join((existing, v))
319             environ[envname] = v
320        
321         ct = environ.pop("HTTP_CONTENT_TYPE", None)
322         if ct:
323             environ["CONTENT_TYPE"] = ct
324         cl = environ.pop("HTTP_CONTENT_LENGTH", None)
325         if cl:
326             environ["CONTENT_LENGTH"] = cl
327    
328     def decode_chunked(self):
329         """Decode the 'chunked' transfer coding."""
330         cl = 0
331         data = StringIO.StringIO()
332         while True:
333             line = self.rfile.readline().strip().split(";", 1)
334             chunk_size = int(line.pop(0), 16)
335             if chunk_size <= 0:
336                 break
337 ##            if line: chunk_extension = line[0]
338             cl += chunk_size
339             data.write(self.rfile.read(chunk_size))
340             crlf = self.rfile.read(2)
341             if crlf != "\r\n":
342                 self.simple_response("400 Bad Request",
343                                      "Bad chunked transfer coding "
344                                      "(expected '\\r\\n', got %r)" % crlf)
345                 return
346        
347         # Grab any trailer headers
348         self.read_headers()
349        
350         data.seek(0)
351         self.environ["wsgi.input"] = data
352         self.environ["CONTENT_LENGTH"] = str(cl) or ""
353         return True
354    
355     def respond(self):
356         """Call the appropriate WSGI app and write its iterable output."""
357         response = self.wsgi_app(self.environ, self.start_response)
358         try:
359             for chunk in response:
360                 # "The start_response callable must not actually transmit
361                 # the response headers. Instead, it must store them for the
362                 # server or gateway to transmit only after the first
363                 # iteration of the application return value that yields
364                 # a NON-EMPTY string, or upon the application's first
365                 # invocation of the write() callable." (PEP 333)
366                 if chunk:
367                     self.write(chunk)
368         finally:
369             if hasattr(response, "close"):
370                 response.close()
371         if (self.ready and not self.sent_headers
372                 and not self.connection.server.interrupt):
373             self.sent_headers = True
374             self.send_headers()
375         if self.chunked_write:
376             self.sendall("0\r\n\r\n")
377    
378     def simple_response(self, status, msg=""):
379         """Write a simple response back to the client."""
380         status = str(status)
381         buf = ["%s %s\r\n" % (self.connection.server.protocol, status),
382                "Content-Length: %s\r\n" % len(msg)]
383        
384         if status[:3] == "413" and self.response_protocol == 'HTTP/1.1':
385             # Request Entity Too Large
386             self.close_connection = True
387             buf.append("Connection: close\r\n")
388        
389         buf.append("\r\n")
390         if msg:
391             buf.append(msg)
392         self.sendall("".join(buf))
393    
394     def start_response(self, status, headers, exc_info = None):
395         """WSGI callable to begin the HTTP response."""
396         if self.started_response:
397             if not exc_info:
398                 raise AssertionError("WSGI start_response called a second "
399                                      "time with no exc_info.")
400             else:
401                 try:
402                     raise exc_info[0], exc_info[1], exc_info[2]
403                 finally:
404                     exc_info = None
405         self.started_response = True
406         self.status = status
407         self.outheaders.extend(headers)
408         return self.write
409    
410     def write(self, chunk):
411         """WSGI callable to write unbuffered data to the client.
412         
413         This method is also used internally by start_response (to write
414         data from the iterable returned by the WSGI application).
415         """
416         if not self.started_response:
417             raise AssertionError("WSGI write called before start_response.")
418        
419         if not self.sent_headers:
420             self.sent_headers = True
421             self.send_headers()
422        
423         if self.chunked_write and chunk:
424             buf = [hex(len(chunk))[2:], "\r\n", chunk, "\r\n"]
425             self.sendall("".join(buf))
426         else:
427             self.sendall(chunk)
428    
429     def send_headers(self):
430         """Assert, process, and send the HTTP response message-headers."""
431         hkeys = [key.lower() for key, value in self.outheaders]
432         status = int(self.status[:3])
433        
434         if status == 413:
435             # Request Entity Too Large. Close conn to avoid garbage.
436             self.close_connection = True
437         elif "content-length" not in hkeys:
438             # "All 1xx (informational), 204 (no content),
439             # and 304 (not modified) responses MUST NOT
440             # include a message-body." So no point chunking.
441             if status < 200 or status in (204, 205, 304):
442                 pass
443             else:
444                 if self.response_protocol == 'HTTP/1.1':
445                     # Use the chunked transfer-coding
446                     self.chunked_write = True
447                     self.outheaders.append(("Transfer-Encoding", "chunked"))
448                 else:
449                     # Closing the conn is the only way to determine len.
450                     self.close_connection = True
451        
452         if "connection" not in hkeys:
453             if self.response_protocol == 'HTTP/1.1':
454                 if self.close_connection:
455                     self.outheaders.append(("Connection", "close"))
456             else:
457                 if not self.close_connection:
458                     self.outheaders.append(("Connection", "Keep-Alive"))
459        
460         if "date" not in hkeys:
461             self.outheaders.append(("Date", rfc822.formatdate()))
462        
463         server = self.connection.server
464        
465         if "server" not in hkeys:
466             self.outheaders.append(("Server", server.version))
467        
468         buf = [server.protocol, " ", self.status, "\r\n"]
469         try:
470             buf += [k + ": " + v + "\r\n" for k, v in self.outheaders]
471         except TypeError:
472             if not isinstance(k, str):
473                 raise TypeError("WSGI response header key %r is not a string.")
474             if not isinstance(v, str):
475                 raise TypeError("WSGI response header value %r is not a string.")
476             else:
477                 raise
478         buf.append("\r\n")
479         self.sendall("".join(buf))
480
481
482 class NoSSLError(Exception):
483     """Exception raised when a client speaks HTTP to an HTTPS socket."""
484     pass
485
486
487 def _ssl_wrap_method(method, is_reader=False):
488     """Wrap the given method with SSL error-trapping.
489     
490     is_reader: if False (the default), EOF errors will be raised.
491         If True, EOF errors will return "" (to emulate normal sockets).
492     """
493     def ssl_method_wrapper(self, *args, **kwargs):
494 ##        print (id(self), method, args, kwargs)
495         start = time.time()
496         while True:
497             try:
498                 return method(self, *args, **kwargs)
499             except (SSL.WantReadError, SSL.WantWriteError):
500                 # Sleep and try again. This is dangerous, because it means
501                 # the rest of the stack has no way of differentiating
502                 # between a "new handshake" error and "client dropped".
503                 # Note this isn't an endless loop: there's a timeout below.
504                 time.sleep(self.ssl_retry)
505