Download Install Tutorial Docs FAQ Tools WikiLicense Team IRC Planet Involvement Shop Book

root/branches/cherrypy-2.x/cherrypy/filters/nsgmlsfilter.py

Revision 1519 (checked in by fumanchu, 2 years ago)

2.x backport of [1402]; fix for #577 (GzipFilter doesn't force an update of the Content-Length header)). Also fixes #617.

  • Property svn:eol-style set to native
Line 
1 import os, cgi
2
3 import cherrypy
4 from basefilter import BaseFilter
5
6
7 class NsgmlsFilter(BaseFilter):
8     """Filter that runs the response through Nsgmls.
9     """
10    
11     def before_finalize(self):
12         if not cherrypy.config.get('nsgmls_filter.on', False):
13             return
14        
15         # the tidy filter, by its very nature it's not generator friendly,
16         # so we just collect the body and work with it.
17         original_body = cherrypy.response.collapse_body()
18        
19         fct = cherrypy.response.headers.get('Content-Type', '')
20         ct = fct.split(';')[0]
21         encoding = ''
22         i = fct.find('charset=')
23         if i != -1:
24             encoding = fct[i+8:]
25         if ct == 'text/html':
26             # Remove bits of Javascript (nsgmls doesn't seem to handle
27             #   them correctly (for instance, if <a appears in your
28             #   Javascript code nsgmls complains about it)
29             while True:
30                 i = original_body.find('<script')
31                 if i == -1:
32                     break
33                 j = original_body.find('</script>', i)
34                 if j == -1:
35                     break
36                 original_body = original_body[:i] + original_body[j+9:]
37
38             tmpdir = cherrypy.config.get('nsgmls_filter.tmp_dir')
39             page_file = os.path.join(tmpdir, 'page.html')
40             err_file = os.path.join(tmpdir, 'nsgmls.err')
41             f = open(page_file, 'wb')
42             f.write(original_body)
43             f.close()
44             nsgmls_path = cherrypy.config.get('nsgmls_filter.nsgmls_path')
45             catalog_path = cherrypy.config.get('nsgmls_filter.catalog_path')
46             command = '%s -c%s -f%s -s -E10 %s' % (
47                 nsgmls_path, catalog_path, err_file, page_file)
48             command = command.replace('\\', '/')
49             os.system(command)
50             f = open(err_file, 'rb')
51             err = f.read()
52             f.close()
53             errs = err.splitlines()
54             new_errs = []
55             for err in errs:
56                 ignore = False
57                 for err_ign in cherrypy.config.get('nsgmls_filter.errors_to_ignore', []):
58                     if err.find(err_ign) != -1:
59                         ignore = True
60                         break
61                 if not ignore:
62                     new_errs.append(err)
63             if new_errs:
64                 new_body = "Wrong HTML:<br />" + cgi.escape('\n'.join(new_errs)).replace('\n','<br />')
65                 new_body += '<br /><br />'
66                 i = 0
67                 for line in original_body.splitlines():
68                     i += 1
69                     new_body += "%03d - "%i + cgi.escape(line).replace('\t','    ').replace(' ','&nbsp;') + '<br />'
70                
71                 cherrypy.response.body = new_body
72                 # Delete Content-Length header so finalize() recalcs it.
73                 cherrypy.response.headers.pop("Content-Length", None)
74
Note: See TracBrowser for help on using the browser.

Hosted by WebFaction

Log in as guest/cpguest to create tickets