Download Install Tutorial Docs FAQ Tools WikiLicense Team IRC Planet Involvement Shop Book

root/branches/cherrypy-2.x/cherrypy/filters/tidyfilter.py

Revision 1519 (checked in by fumanchu, 2 years ago)

2.x backport of [1402]; fix for #577 (GzipFilter doesn't force an update of the Content-Length header)). Also fixes #617.

  • Property svn:eol-style set to native
Line 
1 import cgi
2 import os
3 import StringIO
4 import traceback
5
6 import cherrypy
7 from basefilter import BaseFilter
8
9
10 class TidyFilter(BaseFilter):
11     """Filter that runs the response through Tidy.
12     
13     Note that we use the standalone Tidy tool rather than the python
14     mxTidy module. This is because this module doesn't seem to be
15     stable and it crashes on some HTML pages (which means that the
16     server would also crash)
17     """
18    
19     def before_finalize(self):
20         if not cherrypy.config.get('tidy_filter.on', False):
21             return
22        
23         # the tidy filter, by its very nature it's not generator friendly,
24         # so we just collect the body and work with it.
25         originalBody = cherrypy.response.collapse_body()
26        
27         fct = cherrypy.response.headers.get('Content-Type', '')
28         ct = fct.split(';')[0]
29         encoding = ''
30         i = fct.find('charset=')
31         if i != -1:
32             encoding = fct[i+8:]
33         if ct == 'text/html':
34             tmpdir = cherrypy.config.get('tidy_filter.tmp_dir')
35             pageFile = os.path.join(tmpdir, 'page.html')
36             outFile = os.path.join(tmpdir, 'tidy.out')
37             errFile = os.path.join(tmpdir, 'tidy.err')
38             f = open(pageFile, 'wb')
39             f.write(originalBody)
40             f.close()
41             tidyEncoding = encoding.replace('-', '')
42             if tidyEncoding:
43                 tidyEncoding = '-' + tidyEncoding
44            
45             strictXml = ""
46             if cherrypy.config.get('tidy_filter.strict_xml', False):
47                 strictXml = ' -xml'
48             os.system('"%s" %s%s -f %s -o %s %s' %
49                       (cherrypy.config.get('tidy_filter.tidy_path'), tidyEncoding,
50                        strictXml, errFile, outFile, pageFile))
51             f = open(errFile, 'rb')
52             err = f.read()
53             f.close()
54            
55             errList = err.splitlines()
56             newErrList = []
57             for err in errList:
58                 if (err.find('Warning') != -1 or err.find('Error') != -1):
59                     ignore = 0
60                     for errIgn in cherrypy.config.get('tidy_filter.errors_to_ignore', []):
61                         if err.find(errIgn) != -1:
62                             ignore = 1
63                             break
64                     if not ignore:
65                         newErrList.append(err)
66            
67             if newErrList:
68                 newBody = "Wrong HTML:<br />" + cgi.escape('\n'.join(newErrList)).replace('\n','<br />')
69                 newBody += '<br /><br />'
70                 i = 0
71                 for line in originalBody.splitlines():
72                     i += 1
73                     newBody += "%03d - "%i + cgi.escape(line).replace('\t','    ').replace(' ','&nbsp;') + '<br />'
74                
75                 cherrypy.response.body = newBody
76                 # Delete Content-Length header so finalize() recalcs it.
77                 cherrypy.response.headers.pop("Content-Length", None)
78
79             elif strictXml:
80                 # The HTML is OK, but is it valid XML
81                 # Use elementtree to parse XML
82                 from elementtree.ElementTree import parse
83                 tagList = ['nbsp', 'quot']
84                 for tag in tagList:
85                     originalBody = originalBody.replace(
86                         '&' + tag + ';', tag.upper())
87
88                 if encoding:
89                     originalBody = """<?xml version="1.0" encoding="%s"?>""" % encoding + originalBody
90                 f = StringIO.StringIO(originalBody)
91                 try:
92                     tree = parse(f)
93                 except:
94                     # Wrong XML
95                     bodyFile = StringIO.StringIO()
96                     traceback.print_exc(file = bodyFile)
97                     cherrypy.response.body = bodyFile.getvalue()
98                     # Delete Content-Length header so finalize() recalcs it.
99                     cherrypy.response.headers.pop("Content-Length", None)
100                    
101                     newBody = "Wrong XML:<br />" + cgi.escape(bodyFile.getvalue().replace('\n','<br />'))
102                     newBody += '<br /><br />'
103                     i = 0
104                     for line in originalBody.splitlines():
105                         i += 1
106                         newBody += "%03d - "%i + cgi.escape(line).replace('\t','    ').replace(' ','&nbsp;') + '<br />'
107                    
108                     cherrypy.response.body = newBody
109                     # Delete Content-Length header so finalize() recalcs it.
110                     cherrypy.response.headers.pop("Content-Length", None)
111
Note: See TracBrowser for help on using the browser.

Hosted by WebFaction

Log in as guest/cpguest to create tickets