Download Install Tutorial Docs FAQ Tools WikiLicense Team IRC Planet Involvement Shop Book

root/tags/cherrypy-3.0.0/cherrypy/lib/encoding.py

Revision 1427 (checked in by fumanchu, 2 years ago)

Some tool buglets and docs.

  • Property svn:eol-style set to native
Line 
1 import struct
2 import time
3
4 import cherrypy
5
6
7 def decode(encoding=None, default_encoding='utf-8'):
8     """Decode cherrypy.request.params."""
9     if not encoding:
10         ct = cherrypy.request.headers.elements("Content-Type")
11         if ct:
12             ct = ct[0]
13             encoding = ct.params.get("charset", None)
14             if (not encoding) and ct.value.lower().startswith("text/"):
15                 # http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1
16                 # When no explicit charset parameter is provided by the
17                 # sender, media subtypes of the "text" type are defined
18                 # to have a default charset value of "ISO-8859-1" when
19                 # received via HTTP.
20                 encoding = "ISO-8859-1"
21        
22         if not encoding:
23             encoding = default_encoding
24    
25     try:
26         decode_params(encoding)
27     except UnicodeDecodeError:
28         # IE and Firefox don't supply a charset when submitting form
29         # params with a CT of application/x-www-form-urlencoded.
30         # So after all our guessing, it could *still* be wrong.
31         # Start over with ISO-8859-1, since that seems to be preferred.
32         decode_params("ISO-8859-1")
33
34 def decode_params(encoding):
35     decoded_params = {}
36     for key, value in cherrypy.request.params.items():
37         if hasattr(value, 'file'):
38             # This is a file being uploaded: skip it
39             decoded_params[key] = value
40         elif isinstance(value, list):
41             # value is a list: decode each element
42             decoded_params[key] = [v.decode(encoding) for v in value]
43         elif isinstance(value, unicode):
44             pass
45         else:
46             # value is a regular string: decode it
47             decoded_params[key] = value.decode(encoding)
48    
49     # Decode all or nothing, so we can try again on error.
50     cherrypy.request.params = decoded_params
51
52
53 # Encoding
54
55 def encode(encoding=None, errors='strict'):
56     # Guard against running twice
57     if getattr(cherrypy.request, "_encoding_attempted", False):
58         return
59     cherrypy.request._encoding_attempted = True
60    
61     ct = cherrypy.response.headers.elements("Content-Type")
62     if ct:
63         ct = ct[0]
64         if ct.value.lower().startswith("text/"):
65             # Set "charset=..." param on response Content-Type header
66             ct.params['charset'] = find_acceptable_charset(encoding, errors=errors)
67             cherrypy.response.headers["Content-Type"] = str(ct)
68
69 def encode_stream(encoding, errors='strict'):
70     """Encode a streaming response body.
71     
72     Use a generator wrapper, and just pray it works as the stream is
73     being written out.
74     """
75     def encoder(body):
76         for chunk in body:
77             if isinstance(chunk, unicode):
78                 chunk = chunk.encode(encoding, errors)
79             yield chunk
80     cherrypy.response.body = encoder(cherrypy.response.body)
81     return True
82
83 def encode_string(encoding, errors='strict'):
84     """Encode a buffered response body."""
85     try:
86         body = []
87         for chunk in cherrypy.response.body:
88             if isinstance(chunk, unicode):
89                 chunk = chunk.encode(encoding, errors)
90             body.append(chunk)
91         cherrypy.response.body = body
92     except (LookupError, UnicodeError):
93         return False
94     else:
95         return True
96
97 def find_acceptable_charset(encoding=None, default_encoding='utf-8', errors='strict'):
98     response = cherrypy.response
99    
100     if cherrypy.response.stream:
101         encoder = encode_stream
102     else:
103         response.collapse_body()
104         encoder = encode_string
105         if response.headers.has_key("Content-Length"):
106             # Delete Content-Length header so finalize() recalcs it.
107             # Encoded strings may be of different lengths from their
108             # unicode equivalents, and even from each other. For example:
109             # >>> t = u"\u7007\u3040"
110             # >>> len(t)
111             # 2
112             # >>> len(t.encode("UTF-8"))
113             # 6
114             # >>> len(t.encode("utf7"))
115             # 8
116             del response.headers["Content-Length"]
117    
118     # Parse the Accept-Charset request header, and try to provide one
119     # of the requested charsets (in order of user preference).
120     encs = cherrypy.request.headers.elements('Accept-Charset')
121     charsets = [enc.value.lower() for enc in encs]
122     attempted_charsets = []
123    
124     if encoding is not None:
125         # If specified, force this encoding to be used, or fail.
126         encoding = encoding.lower()
127         if (not charsets) or "*" in charsets or encoding in charsets:
128             if encoder(encoding, errors):
129                 return encoding
130     else:
131         if not encs:
132             # Any character-set is acceptable.
133             if encoder(default_encoding, errors):
134                 return default_encoding
135             else:
136                 raise cherrypy.HTTPError(500, failmsg % default_encoding)
137         else:
138             if "*" not in charsets:
139                 # If no "*" is present in an Accept-Charset field, then all
140                 # character sets not explicitly mentioned get a quality
141                 # value of 0, except for ISO-8859-1, which gets a quality
142                 # value of 1 if not explicitly mentioned.
143                 iso = 'iso-8859-1'
144                 if iso not in charsets:
145                     attempted_charsets.append(iso)
146                     if encoder(iso, errors):
147                         return iso
148            
149             for element in encs:
150                 if element.qvalue > 0:
151                     if element.value == "*":
152                         # Matches any charset. Try our default.
153                         if default_encoding not in attempted_charsets:
154                             attempted_charsets.append(default_encoding)
155                             if encoder(default_encoding, errors):
156                                 return default_encoding
157                     else:
158                         encoding = element.value
159                         if encoding not in attempted_charsets:
160                             attempted_charsets.append(encoding)
161                             if encoder(encoding, errors):
162                                 return encoding
163    
164     # No suitable encoding found.
165     ac = cherrypy.request.headers.get('Accept-Charset')
166     if ac is None:
167         msg = "Your client did not send an Accept-Charset header."
168     else:
169         msg = "Your client sent this Accept-Charset header: %s." % ac
170     msg += " We tried these charsets: %s." % ", ".join(attempted_charsets)
171     raise cherrypy.HTTPError(406, msg)
172
173
174 # GZIP
175
176 def compress(body, compress_level):
177     """Compress 'body' at the given compress_level."""
178     import zlib
179    
180     yield '\037\213'      # magic header
181     yield '\010'         # compression method
182     yield '\0'
183     yield struct.pack("<L", long(time.time()))
184     yield '\002'
185     yield '\377'
186    
187     crc = zlib.crc32("")
188     size = 0
189     zobj = zlib.compressobj(compress_level,
190                             zlib.DEFLATED, -zlib.MAX_WBITS,
191                             zlib.DEF_MEM_LEVEL, 0)
192     for line in body:
193         size += len(line)
194         crc = zlib.crc32(line, crc)
195         yield zobj.compress(line)
196     yield zobj.flush()
197     yield struct.pack("<l", crc)
198     yield struct.pack("<L", size & 0xFFFFFFFFL)
199
200 def gzip(compress_level=9, mime_types=['text/html', 'text/plain']):
201     response = cherrypy.response
202     if not response.body:
203         # Response body is empty (might be a 304 for instance)
204         return
205    
206     acceptable = cherrypy.request.headers.elements('Accept-Encoding')
207     if not acceptable:
208         # If no Accept-Encoding field is present in a request,
209         # the server MAY assume that the client will accept any
210         # content coding. In this case, if "identity" is one of
211         # the available content-codings, then the server SHOULD use
212         # the "identity" content-coding, unless it has additional
213         # information that a different content-coding is meaningful
214         # to the client.
215         return
216    
217     ct = response.headers.get('Content-Type').split(';')[0]
218     for coding in acceptable:
219         if coding.value == 'identity' and coding.qvalue != 0:
220             return
221         if coding.value in ('gzip', 'x-gzip'):
222             if coding.qvalue == 0:
223                 return
224             if ct in mime_types:
225                 # Return a generator that compresses the page
226                 varies = response.headers.get("Vary", "")
227                 varies = [x.strip() for x in varies.split(",") if x.strip()]
228                 if "Accept-Encoding" not in varies:
229                     varies.append("Accept-Encoding")
230                 response.headers['Vary'] = ", ".join(varies)
231                
232                 response.headers['Content-Encoding'] = 'gzip'
233                 response.body = compress(response.body, compress_level)
234                 if response.headers.has_key("Content-Length"):
235                     # Delete Content-Length header so finalize() recalcs it.
236                     del response.headers["Content-Length"]
237             return
238     cherrypy.HTTPError(406, "identity, gzip").set_response()
Note: See TracBrowser for help on using the browser.

Hosted by WebFaction

Log in as guest/cpguest to create tickets