Download Install Tutorial Docs FAQ Tools WikiLicense Team IRC Planet Involvement Shop Book

root/trunk/cherrypy/wsgiserver/__init__.py

Revision 2043 (checked in by fumanchu, 1 week ago)

Allow overriding of SERVER_SOFTWARE (and therefore the 'Server' response header).

  • Property svn:eol-style set to native
Line 
1 """A high-speed, production ready, thread pooled, generic WSGI server.
2
3 Simplest example on how to use this module directly
4 (without using CherryPy's application machinery):
5
6     from cherrypy import wsgiserver
7     
8     def my_crazy_app(environ, start_response):
9         status = '200 OK'
10         response_headers = [('Content-type','text/plain')]
11         start_response(status, response_headers)
12         return ['Hello world!\n']
13     
14     server = wsgiserver.CherryPyWSGIServer(
15                 ('0.0.0.0', 8070), my_crazy_app,
16                 server_name='www.cherrypy.example')
17     
18 The CherryPy WSGI server can serve as many WSGI applications
19 as you want in one instance by using a WSGIPathInfoDispatcher:
20     
21     d = WSGIPathInfoDispatcher({'/': my_crazy_app, '/blog': my_blog_app})
22     server = wsgiserver.CherryPyWSGIServer(('0.0.0.0', 80), d)
23     
24 Want SSL support? Just set these attributes:
25     
26     server.ssl_certificate = <filename>
27     server.ssl_private_key = <filename>
28     
29     if __name__ == '__main__':
30         try:
31             server.start()
32         except KeyboardInterrupt:
33             server.stop()
34
35 This won't call the CherryPy engine (application side) at all, only the
36 WSGI server, which is independant from the rest of CherryPy. Don't
37 let the name "CherryPyWSGIServer" throw you; the name merely reflects
38 its origin, not its coupling.
39
40 For those of you wanting to understand internals of this module, here's the
41 basic call flow. The server's listening thread runs a very tight loop,
42 sticking incoming connections onto a Queue:
43
44     server = CherryPyWSGIServer(...)
45     server.start()
46     while True:
47         tick()
48         # This blocks until a request comes in:
49         child = socket.accept()
50         conn = HTTPConnection(child, ...)
51         server.requests.put(conn)
52
53 Worker threads are kept in a pool and poll the Queue, popping off and then
54 handling each connection in turn. Each connection can consist of an arbitrary
55 number of requests and their responses, so we run a nested loop:
56
57     while True:
58         conn = server.requests.get()
59         conn.communicate()
60         ->  while True:
61                 req = HTTPRequest(...)
62                 req.parse_request()
63                 ->  # Read the Request-Line, e.g. "GET /page HTTP/1.1"
64                     req.rfile.readline()
65                     req.read_headers()
66                 req.respond()
67                 ->  response = wsgi_app(...)
68                     try:
69                         for chunk in response:
70                             if chunk:
71                                 req.write(chunk)
72                     finally:
73                         if hasattr(response, "close"):
74                             response.close()
75                 if req.close_connection:
76                     return
77 """
78
79
80 import base64
81 import os
82 import Queue
83 import re
84 quoted_slash = re.compile("(?i)%2F")
85 import rfc822
86 import socket
87 try:
88     import cStringIO as StringIO
89 except ImportError:
90     import StringIO
91 import sys
92 import threading
93 import time
94 import traceback
95 from urllib import unquote
96 from urlparse import urlparse
97 import warnings
98
99 try:
100     from OpenSSL import SSL
101     from OpenSSL import crypto
102 except ImportError:
103     SSL = None
104
105 import errno
106
107 def plat_specific_errors(*errnames):
108     """Return error numbers for all errors in errnames on this platform.
109     
110     The 'errno' module contains different global constants depending on
111     the specific platform (OS). This function will return the list of
112     numeric values for a given list of potential names.
113     """
114     errno_names = dir(errno)
115     nums = [getattr(errno, k) for k in errnames if k in errno_names]
116     # de-dupe the list
117     return dict.fromkeys(nums).keys()
118
119 socket_error_eintr = plat_specific_errors("EINTR", "WSAEINTR")
120
121 socket_errors_to_ignore = plat_specific_errors(
122     "EPIPE",
123     "EBADF", "WSAEBADF",
124     "ENOTSOCK", "WSAENOTSOCK",
125     "ETIMEDOUT", "WSAETIMEDOUT",
126     "ECONNREFUSED", "WSAECONNREFUSED",
127     "ECONNRESET", "WSAECONNRESET",
128     "ECONNABORTED", "WSAECONNABORTED",
129     "ENETRESET", "WSAENETRESET",
130     "EHOSTDOWN", "EHOSTUNREACH",
131     )
132 socket_errors_to_ignore.append("timed out")
133
134 socket_errors_nonblocking = plat_specific_errors(
135     'EAGAIN', 'EWOULDBLOCK', 'WSAEWOULDBLOCK')
136
137 comma_separated_headers = ['ACCEPT', 'ACCEPT-CHARSET', 'ACCEPT-ENCODING',
138     'ACCEPT-LANGUAGE', 'ACCEPT-RANGES', 'ALLOW', 'CACHE-CONTROL',
139     'CONNECTION', 'CONTENT-ENCODING', 'CONTENT-LANGUAGE', 'EXPECT',
140     'IF-MATCH', 'IF-NONE-MATCH', 'PRAGMA', 'PROXY-AUTHENTICATE', 'TE',
141     'TRAILER', 'TRANSFER-ENCODING', 'UPGRADE', 'VARY', 'VIA', 'WARNING',
142     'WWW-AUTHENTICATE']
143
144
145 class WSGIPathInfoDispatcher(object):
146     """A WSGI dispatcher for dispatch based on the PATH_INFO.
147     
148     apps: a dict or list of (path_prefix, app) pairs.
149     """
150    
151     def __init__(self, apps):
152         try:
153             apps = apps.items()
154         except AttributeError:
155             pass
156        
157         # Sort the apps by len(path), descending
158         apps.sort()
159         apps.reverse()
160        
161         # The path_prefix strings must start, but not end, with a slash.
162         # Use "" instead of "/".
163         self.apps = [(p.rstrip("/"), a) for p, a in apps]
164    
165     def __call__(self, environ, start_response):
166         path = environ["PATH_INFO"] or "/"
167         for p, app in self.apps:
168             # The apps list should be sorted by length, descending.
169             if path.startswith(p + "/") or path == p:
170                 environ = environ.copy()
171                 environ["SCRIPT_NAME"] = environ["SCRIPT_NAME"] + p
172                 environ["PATH_INFO"] = path[len(p):]
173                 return app(environ, start_response)
174        
175         start_response('404 Not Found', [('Content-Type', 'text/plain'),
176                                          ('Content-Length', '0')])
177         return ['']
178
179
180 class MaxSizeExceeded(Exception):
181     pass
182
183 class SizeCheckWrapper(object):
184     """Wraps a file-like object, raising MaxSizeExceeded if too large."""
185    
186     def __init__(self, rfile, maxlen):
187         self.rfile = rfile
188         self.maxlen = maxlen
189         self.bytes_read = 0
190    
191     def _check_length(self):
192         if self.maxlen and self.bytes_read > self.maxlen:
193             raise MaxSizeExceeded()
194    
195     def read(self, size=None):
196         data = self.rfile.read(size)
197         self.bytes_read += len(data)
198         self._check_length()
199         return data
200    
201     def readline(self, size=None):
202         if size is not None:
203             data = self.rfile.readline(size)
204             self.bytes_read += len(data)
205             self._check_length()
206             return data
207        
208         # User didn't specify a size ...
209         # We read the line in chunks to make sure it's not a 100MB line !
210         res = []
211         while True:
212             data = self.rfile.readline(256)
213             self.bytes_read += len(data)
214             self._check_length()
215             res.append(data)
216             # See http://www.cherrypy.org/ticket/421
217             if len(data) < 256 or data[-1:] == "\n":
218                 return ''.join(res)
219    
220     def readlines(self, sizehint=0):
221         # Shamelessly stolen from StringIO
222         total = 0
223         lines = []
224         line = self.readline()
225         while line:
226             lines.append(line)
227             total += len(line)
228             if 0 < sizehint <= total:
229                 break
230             line = self.readline()
231         return lines
232    
233     def close(self):
234         self.rfile.close()
235    
236     def __iter__(self):
237         return self
238    
239     def next(self):
240         data = self.rfile.next()
241         self.bytes_read += len(data)
242         self._check_length()
243         return data
244
245
246 class HTTPRequest(object):
247     """An HTTP Request (and response).
248     
249     A single HTTP connection may consist of multiple request/response pairs.
250     
251     send: the 'send' method from the connection's socket object.
252     wsgi_app: the WSGI application to call.
253     environ: a partial WSGI environ (server and connection entries).
254         The caller MUST set the following entries:
255         * All wsgi.* entries, including .input
256         * SERVER_NAME and SERVER_PORT
257         * Any SSL_* entries
258         * Any custom entries like REMOTE_ADDR and REMOTE_PORT
259         * SERVER_SOFTWARE: the value to write in the "Server" response header.
260         * ACTUAL_SERVER_PROTOCOL: the value to write in the Status-Line of
261             the response. From RFC 2145: "An HTTP server SHOULD send a
262             response version equal to the highest version for which the
263             server is at least conditionally compliant, and whose major
264             version is less than or equal to the one received in the
265             request.  An HTTP server MUST NOT send a version for which
266             it is not at least conditionally compliant."
267     
268     outheaders: a list of header tuples to write in the response.
269     ready: when True, the request has been parsed and is ready to begin
270         generating the response. When False, signals the calling Connection
271         that the response should not be generated and the connection should
272         close.
273     close_connection: signals the calling Connection that the request
274         should close. This does not imply an error! The client and/or
275         server may each request that the connection be closed.
276     chunked_write: if True, output will be encoded with the "chunked"
277         transfer-coding. This value is set automatically inside
278         send_headers.
279     """
280    
281     max_request_header_size = 0
282     max_request_body_size = 0
283    
284     def __init__(self, wfile, environ, wsgi_app):
285         self.rfile = environ['wsgi.input']
286         self.wfile = wfile
287         self.environ = environ.copy()
288         self.wsgi_app = wsgi_app
289        
290         self.ready = False
291         self.started_response = False
292         self.status = ""
293         self.outheaders = []
294         self.sent_headers = False
295         self.close_connection = False
296         self.chunked_write = False
297    
298     def parse_request(self):
299         """Parse the next HTTP request start-line and message-headers."""
300         self.rfile.maxlen = self.max_request_header_size
301         self.rfile.bytes_read = 0
302        
303         try:
304             self._parse_request()
305         except MaxSizeExceeded:
306             self.simple_response("413 Request Entity Too Large")
307             return
308    
309     def _parse_request(self):
310         # HTTP/1.1 connections are persistent by default. If a client
311         # requests a page, then idles (leaves the connection open),
312         # then rfile.readline() will raise socket.error("timed out").
313         # Note that it does this based on the value given to settimeout(),
314         # and doesn't need the client to request or acknowledge the close
315         # (although your TCP stack might suffer for it: cf Apache's history
316         # with FIN_WAIT_2).
317         request_line = self.rfile.readline()
318         if not request_line:
319             # Force self.ready = False so the connection will close.
320             self.ready = False
321             return
322        
323         if request_line == "\r\n":
324             # RFC 2616 sec 4.1: "...if the server is reading the protocol
325             # stream at the beginning of a message and receives a CRLF
326             # first, it should ignore the CRLF."
327             # But only ignore one leading line! else we enable a DoS.
328             request_line = self.rfile.readline()
329             if not request_line:
330                 self.ready = False
331                 return
332        
333         environ = self.environ
334        
335         try:
336             method, path, req_protocol = request_line.strip().split(" ", 2)
337         except ValueError:
338             self.simple_response(400, "Malformed Request-Line")
339             return
340        
341         environ["REQUEST_METHOD"] = method
342        
343         # path may be an abs_path (including "http://host.domain.tld");
344         scheme, location, path, params, qs, frag = urlparse(path)
345        
346         if frag:
347             self.simple_response("400 Bad Request",
348                                  "Illegal #fragment in Request-URI.")
349             return
350        
351         if scheme:
352             environ["wsgi.url_scheme"] = scheme
353         if params:
354             path = path + ";" + params
355        
356         environ["SCRIPT_NAME"] = ""
357        
358         # Unquote the path+params (e.g. "/this%20path" -> "this path").
359         # http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2
360         #
361         # But note that "...a URI must be separated into its components
362         # before the escaped characters within those components can be
363         # safely decoded." http://www.ietf.org/rfc/rfc2396.txt, sec 2.4.2
364         atoms = [unquote(x) for x in quoted_slash.split(path)]
365         path = "%2F".join(atoms)
366         environ["PATH_INFO"] = path
367        
368         # Note that, like wsgiref and most other WSGI servers,
369         # we unquote the path but not the query string.
370         environ["QUERY_STRING"] = qs
371        
372         # Compare request and server HTTP protocol versions, in case our
373         # server does not support the requested protocol. Limit our output
374         # to min(req, server). We want the following output:
375         #     request    server     actual written   supported response
376         #     protocol   protocol  response protocol    feature set
377         # a     1.0        1.0           1.0                1.0
378         # b     1.0        1.1           1.1                1.0
379         # c     1.1        1.0           1.0                1.0
380         # d     1.1        1.1           1.1                1.1
381         # Notice that, in (b), the response will be "HTTP/1.1" even though
382         # the client only understands 1.0. RFC 2616 10.5.6 says we should
383         # only return 505 if the _major_ version is different.
384         rp = int(req_protocol[5]), int(req_protocol[7])
385         server_protocol = environ["ACTUAL_SERVER_PROTOCOL"]
386         sp = int(server_protocol[5]), int(server_protocol[7])
387         if sp[0] != rp[0]:
388             self.simple_response("505 HTTP Version Not Supported")
389             return
390         # Bah. "SERVER_PROTOCOL" is actually the REQUEST protocol.
391         environ["SERVER_PROTOCOL"] = req_protocol
392         self.response_protocol = "HTTP/%s.%s" % min(rp, sp)
393        
394         # If the Request-URI was an absoluteURI, use its location atom.
395         if location:
396             environ["SERVER_NAME"] = location
397        
398         # then all the http headers
399         try:
400             self.read_headers()
401         except ValueError, ex:
402             self.simple_response("400 Bad Request", repr(ex.args))
403             return
404        
405         mrbs = self.max_request_body_size
406         if mrbs and int(environ.get("CONTENT_LENGTH", 0)) > mrbs:
407             self.simple_response("413 Request Entity Too Large")
408             return
409        
410         # Persistent connection support
411         if self.response_protocol == "HTTP/1.1":
412             # Both server and client are HTTP/1.1
413             if environ.get("HTTP_CONNECTION", "") == "close":
414                 self.close_connection = True
415         else:
416             # Either the server or client (or both) are HTTP/1.0
417             if environ.get("HTTP_CONNECTION", "") != "Keep-Alive":
418                 self.close_connection = True
419        
420         # Transfer-Encoding support
421         te = None
422         if self.response_protocol == "HTTP/1.1":
423             te = environ.get("HTTP_TRANSFER_ENCODING")
424             if te:
425                 te = [x.strip().lower() for x in te.split(",") if x.strip()]
426        
427         self.chunked_read = False
428        
429         if te:
430             for enc in te:
431                 if enc == "chunked":
432                     self.chunked_read = True
433                 else:
434                     # Note that, even if we see "chunked", we must reject
435                     # if there is an extension we don't recognize.
436                     self.simple_response("501 Unimplemented")
437                     self.close_connection = True
438                     return
439        
440         # From PEP 333:
441         # "Servers and gateways that implement HTTP 1.1 must provide
442         # transparent support for HTTP 1.1's "expect/continue" mechanism.
443         # This may be done in any of several ways:
444         #   1. Respond to requests containing an Expect: 100-continue request
445         #      with an immediate "100 Continue" response, and proceed normally.
446         #   2. Proceed with the request normally, but provide the application
447         #      with a wsgi.input stream that will send the "100 Continue"
448         #      response if/when the application first attempts to read from
449         #      the input stream. The read request must then remain blocked
450         #      until the client responds.
451         #   3. Wait until the client decides that the server does not support
452         #      expect/continue, and sends the request body on its own.
453         #      (This is suboptimal, and is not recommended.)
454         #
455         # We used to do 3, but are now doing 1. Maybe we'll do 2 someday,
456         # but it seems like it would be a big slowdown for such a rare case.
457         if environ.get("HTTP_EXPECT", "") == "100-continue":
458             self.simple_response(100)
459        
460         self.ready = True
461    
462     def read_headers(self):
463         """Read header lines from the incoming stream."""
464         environ = self.environ
465        
466         while True:
467             line = self.rfile.readline()
468             if not line:
469                 # No more data--illegal end of headers
470                 raise ValueError("Illegal end of headers.")
471            
472             if line == '\r\n':
473                 # Normal end of headers
474                 break
475            
476             if line[0] in ' \t':
477                 # It's a continuation line.
478                 v = line.strip()
479             else:
480                 k, v = line.split(":", 1)
481                 k, v = k.strip().upper(), v.strip()
482                 envname = "HTTP_" + k.replace("-", "_")
483            
484             if k in comma_separated_headers:
485                 existing = environ.get(envname)
486                 if existing:
487                     v = ", ".join((existing, v))
488             environ[envname] = v
489        
490         ct = environ.pop("HTTP_CONTENT_TYPE", None)
491         if ct is not None:
492             environ["CONTENT_TYPE"] = ct
493         cl = environ.pop("HTTP_CONTENT_LENGTH", None)
494         if cl is not None:
495             environ["CONTENT_LENGTH"] = cl
496    
497     def decode_chunked(self):
498         """Decode the 'chunked' transfer coding."""
499         cl = 0
500         data = StringIO.StringIO()
501         while True:
502             line = self.rfile.readline().strip().split(";", 1)
503             chunk_size = int(line.pop(0), 16)
504             if chunk_size <= 0:
505                 break
506 ##            if line: chunk_extension = line[0]
507             cl += chunk_size
508             data.write(self.rfile.read(chunk_size))
509             crlf = self.rfile.read(2)
510             if crlf != "\r\n":
511                 self.simple_response("400 Bad Request",
512                                      "Bad chunked transfer coding "
513                                      "(expected '\\r\\n', got %r)" % crlf)
514                 return
515        
516         # Grab any trailer headers
517         self.read_headers()
518        
519         data.seek(0)
520         self.environ["wsgi.input"] = data
521         self.environ["CONTENT_LENGTH"] = str(cl) or ""
522         return True
523    
524     def respond(self):
525         """Call the appropriate WSGI app and write its iterable output."""
526         # Set rfile.maxlen to ensure we don't read past Content-Length.
527         # This will also be used to read the entire request body if errors
528         # are raised before the app can read the body.
529         if self.chunked_read:
530             # If chunked, Content-Length will be 0.
531             self.rfile.maxlen = self.max_request_body_size
532         else:
533             cl = int(self.environ.get("CONTENT_LENGTH", 0))
534             if self.max_request_body_size:
535                 self.rfile.maxlen = min(cl, self.max_request_body_size)
536             else:
537                 self.rfile.maxlen = cl
538         self.rfile.bytes_read = 0
539        
540         try:
541             self._respond()
542         except MaxSizeExceeded:
543             if not self.sent_headers:
544                 self.simple_response("413 Request Entity Too Large")
545             return
546    
547     def _respond(self):
548         if self.chunked_read:
549             if not self.decode_chunked():
550                 self.close_connection = True
551                 return
552        
553         response = self.wsgi_app(self