Download Install Tutorial Docs FAQ Tools WikiLicense Team IRC Planet Involvement Shop Book

root/trunk/cherrypy/_cpdispatch.py

Revision 2030 (checked in by lakin, 2 months ago)

#733 - Return a 404 when query parameters passed to a handler are incorect. Similarly return a 404 when path atoms are incorrectly passed to a handler. Alternatively return a 400 when body params are incorrectly passed to a handler. Includes tests.

  • Property svn:eol-style set to native
Line 
1 """CherryPy dispatchers.
2
3 A 'dispatcher' is the object which looks up the 'page handler' callable
4 and collects config for the current request based on the path_info, other
5 request attributes, and the application architecture. The core calls the
6 dispatcher as early as possible, passing it a 'path_info' argument.
7
8 The default dispatcher discovers the page handler by matching path_info
9 to a hierarchical arrangement of objects, starting at request.app.root.
10 """
11
12 import cherrypy
13
14
15 class PageHandler(object):
16     """Callable which sets response.body."""
17    
18     def __init__(self, callable, *args, **kwargs):
19         self.callable = callable
20         self.args = args
21         self.kwargs = kwargs
22    
23     def __call__(self):
24         try:
25             return self.callable(*self.args, **self.kwargs)
26         except TypeError, x:
27             test_callable_spec(self.callable, self.args, self.kwargs)
28             raise
29
30 def test_callable_spec(callable, callable_args, callable_kwargs):
31     """
32     Inspect callable and test to see if the given args are suitable for it.
33
34     When an error occurs during the handler's invoking stage there are 2
35     erroneous cases:
36     1.  Too many parameters passed to a function which doesn't define
37         one of *args or **kwargs.
38     2.  Too little parameters are passed to the function.
39
40     There are 3 sources of parameters to a cherrypy handler.
41     1.  query string parameters are passed as keyword parameters to the handler.
42     2.  body parameters are also passed as keyword parameters.
43     3.  when partial matching occurs, the final path atoms are passed as
44         positional args.
45     Both the query string and path atoms are part of the URI.  If they are
46     incorrect, then a 404 Not Found should be raised. Conversely the body
47     parameters are part of the request; if they are invalid a 400 Bad Request.
48     """
49     (args, varargs, varkw, defaults) = inspect.getargspec(callable)
50
51     if args and args[0] == 'self':
52         args = args[1:]
53
54     arg_usage = dict([(arg, 0,) for arg in args])
55     vararg_usage = 0
56     varkw_usage = 0
57     extra_kwargs = set()
58
59     for i, value in enumerate(callable_args):
60         try:
61             arg_usage[args[i]] += 1
62         except IndexError:
63             vararg_usage += 1
64
65     for key in callable_kwargs.keys():
66         try:
67             arg_usage[key] += 1
68         except KeyError:
69             varkw_usage += 1
70             extra_kwargs.add(key)
71
72     for i, val in enumerate(defaults or []):
73         # Defaults take effect only when the arg hasn't been used yet.
74         if arg_usage[args[i]] == 0:
75             arg_usage[args[i]] += 1
76
77     missing_args = []
78     multiple_args = []
79     for key, usage in arg_usage.iteritems():
80         if usage == 0:
81             missing_args.append(key)
82         elif usage > 1:
83             multiple_args.append(key)
84
85     if missing_args:
86         # In the case where the method allows body arguments
87         # there are 3 potential errors:
88         # 1. not enough query string parameters -> 404
89         # 2. not enough body parameters -> 400
90         # 3. not enough path parts (partial matches) -> 404
91         #
92         # We can't actually tell which case it is,
93         # so I'm raising a 404 because that covers 2/3 of the
94         # possibilities
95         #
96         # In the case where the method does not allow body
97         # arguments it's definitely a 404.
98         raise cherrypy.HTTPError(404,
99                 message="Missing parameters: %s" % ",".join(missing_args))
100
101     # the extra positional arguments come from the path - 404 Not Found
102     if not varargs and vararg_usage > 0:
103         raise cherrypy.HTTPError(404)
104
105     body_params = cherrypy.request.body_params or {}
106     body_params = set(body_params.keys())
107     qs_params = set(callable_kwargs.keys()) - body_params
108
109     if multiple_args:
110
111         if qs_params.intersection(set(multiple_args)):
112             # If any of the multiple parameters came from the query string then
113             # it's a 404 Not Found
114             error = 404
115         else:
116             # Otherwise it's a 400 Bad Request
117             error = 400
118
119         raise cherrypy.HTTPError(error,
120                 message="Multiple values for parameters: "\
121                         "%s" % ",".join(multiple_args))
122
123     if not varkw and varkw_usage > 0:
124
125         # If there were extra query string parameters, it's a 404 Not Found
126         extra_qs_params = set(qs_params).intersection(extra_kwargs)
127         if extra_qs_params:
128             raise cherrypy.HTTPError(404,
129                 message="Unexpected query string "\
130                         "parameters: %s" % ", ".join(extra_qs_params))
131
132         # If there were any extra body parameters, it's a 400 Not Found
133         extra_body_params = set(body_params).intersection(extra_kwargs)
134         if extra_body_params:
135             raise cherrypy.HTTPError(400,
136                 message="Unexpected body parameters: "\
137                         "%s" % ", ".join(extra_body_params))
138
139
140 try:
141     import inspect
142 except ImportError:
143     test_callable_spec = lambda callable, args, kwargs: None
144
145
146
147 class LateParamPageHandler(PageHandler):
148     """When passing cherrypy.request.params to the page handler, we do not
149     want to capture that dict too early; we want to give tools like the
150     decoding tool a chance to modify the params dict in-between the lookup
151     of the handler and the actual calling of the handler. This subclass
152     takes that into account, and allows request.params to be 'bound late'
153     (it's more complicated than that, but that's the effect).
154     """
155    
156     def _get_kwargs(self):
157         kwargs = cherrypy.request.params.copy()
158         if self._kwargs:
159             kwargs.update(self._kwargs)
160         return kwargs
161    
162     def _set_kwargs(self, kwargs):
163         self._kwargs = kwargs
164    
165     kwargs = property(_get_kwargs, _set_kwargs,
166                       doc='page handler kwargs (with '
167                       'cherrypy.request.params copied in)')
168
169
170 class Dispatcher(object):
171     """CherryPy Dispatcher which walks a tree of objects to find a handler.
172     
173     The tree is rooted at cherrypy.request.app.root, and each hierarchical
174     component in the path_info argument is matched to a corresponding nested
175     attribute of the root object. Matching handlers must have an 'exposed'
176     attribute which evaluates to True. The special method name "index"
177     matches a URI which ends in a slash ("/"). The special method name
178     "default" may match a portion of the path_info (but only when no longer
179     substring of the path_info matches some other object).
180     
181     This is the default, built-in dispatcher for CherryPy.
182     """
183    
184     def __call__(self, path_info):
185         """Set handler and config for the current request."""
186         request = cherrypy.request
187         func, vpath = self.find_handler(path_info)
188        
189         if func:
190             # Decode any leftover %2F in the virtual_path atoms.
191             vpath = [x.replace("%2F", "/") for x in vpath]
192             request.handler = LateParamPageHandler(func, *vpath)
193         else:
194             request.handler = cherrypy.NotFound()
195    
196     def find_handler(self, path):
197         """Return the appropriate page handler, plus any virtual path.
198         
199         This will return two objects. The first will be a callable,
200         which can be used to generate page output. Any parameters from
201         the query string or request body will be sent to that callable
202         as keyword arguments.
203         
204         The callable is found by traversing the application's tree,
205         starting from cherrypy.request.app.root, and matching path
206         components to successive objects in the tree. For example, the
207         URL "/path/to/handler" might return root.path.to.handler.
208         
209         The second object returned will be a list of names which are
210         'virtual path' components: parts of the URL which are dynamic,
211         and were not used when looking up the handler.
212         These virtual path components are passed to the handler as
213         positional arguments.
214         """
215         request = cherrypy.request
216         app = request.app
217         root = app.root
218        
219         # Get config for the root object/path.
220         curpath = ""
221         nodeconf = {}
222         if hasattr(root, "_cp_config"):
223             nodeconf.update(root._cp_config)
224         if "/" in app.config:
225             nodeconf.update(app.config["/"])
226         object_trail = [['root', root, nodeconf, curpath]]
227        
228         node = root
229         names = [x for x in path.strip('/').split('/') if x] + ['index']
230         for name in names:
231             # map to legal Python identifiers (replace '.' with '_')
232             objname = name.replace('.', '_')
233            
234             nodeconf = {}
235             node = getattr(node, objname, None)
236             if node is not None:
237                 # Get _cp_config attached to this node.
238                 if hasattr(node, "_cp_config"):
239                     nodeconf.update(node._cp_config)
240            
241             # Mix in values from app.config for this path.
242             curpath = "/".join((curpath, name))
243             if curpath in app.config:
244                 nodeconf.update(app.config[curpath])
245            
246             object_trail.append([name, node, nodeconf, curpath])
247        
248         def set_conf():
249             """Collapse all object_trail config into cherrypy.request.config."""
250             base = cherrypy.config.copy()
251             # Note that we merge the config from each node
252             # even if that node was None.
253             for name, obj, conf, curpath in object_trail:
254                 base.update(conf)
255                 if 'tools.staticdir.dir' in conf:
256                     base['tools.staticdir.section'] = curpath
257             return base
258        
259         # Try successive objects (reverse order)
260         num_candidates = len(object_trail) - 1
261         for i in xrange(num_candidates, -1, -1):
262            
263             name, candidate, nodeconf, curpath = object_trail[i]
264             if candidate is None:
265                 continue
266            
267             # Try a "default" method on the current leaf.
268             if hasattr(candidate, "default"):
269                 defhandler = candidate.default
270                 if getattr(defhandler, 'exposed', False):
271                     # Insert any extra _cp_config from the default handler.
272                     conf = getattr(defhandler, "_cp_config", {})
273                     object_trail.insert(i+1, ["default", defhandler, conf, curpath])
274                     request.config = set_conf()
275                     # See http://www.cherrypy.org/ticket/613
276                     request.is_index = path.endswith("/")
277                     return defhandler, names[i:-1]
278            
279             # Uncomment the next line to restrict positional params to "default".
280             # if i < num_candidates - 2: continue
281            
282             # Try the current leaf.
283             if getattr(candidate, 'exposed', False):
284                 request.config = set_conf()
285                 if i == num_candidates:
286                     # We found the extra ".index". Mark request so tools
287                     # can redirect if path_info has no trailing slash.
288                     request.is_index = True
289                 else:
290                     # We're not at an 'index' handler. Mark request so tools
291                     # can redirect if path_info has NO trailing slash.
292                     # Note that this also includes handlers which take
293                     # positional parameters (virtual paths).
294                     request.is_index = False
295                 return candidate, names[i:-1]
296        
297         # We didn't find anything
298         request.config = set_conf()
299         return None, []
300
301
302 class MethodDispatcher(Dispatcher):
303     """Additional dispatch based on cherrypy.request.method.upper().
304     
305     Methods named GET, POST, etc will be called on an exposed class.
306     The method names must be all caps; the appropriate Allow header
307     will be output showing all capitalized method names as allowable
308     HTTP verbs.
309     
310     Note that the containing class must be exposed, not the methods.
311     """
312    
313     def __call__(self, path_info):
314         """Set handler and config for the current request."""
315         request = cherrypy.request
316         resource, vpath = self.find_handler(path_info)
317        
318         if resource:
319             # Set Allow header
320             avail = [m for m in dir(resource) if m.isupper()]
321             if "GET" in avail and "HEAD" not in avail:
322                 avail.append("HEAD")
323             avail.sort()
324             cherrypy.response.headers['Allow'] = ", ".join(avail)
325            
326             # Find the subhandler
327             meth = request.method.upper()
328             func = getattr(resource, meth, None)
329             if func is None and meth == "HEAD":
330                 func = getattr(resource, "GET", None)
331             if func:
332                 # Decode any leftover %2F in the virtual_path atoms.
333                 vpath = [x.replace("%2F", "/") for x in vpath]
334                 request.handler = LateParamPageHandler(func, *vpath)
335             else:
336                 request.handler = cherrypy.HTTPError(405)
337         else:
338             request.handler = cherrypy.NotFound()
339
340
341 class RoutesDispatcher(object):
342     """A Routes based dispatcher for CherryPy."""
343    
344     def __init__(self, full_result=False):
345         """
346         Routes dispatcher
347
348         Set full_result to True if you wish the controller
349         and the action to be passed on to the page handler
350         parameters. By default they won't be.
351         """
352         import routes
353         self.full_result = full_result
354         self.controllers = {}
355         self.mapper = routes.Mapper()
356         self.mapper.controller_scan = self.controllers.keys
357        
358     def connect(self, name, route, controller, **kwargs):
359         self.controllers[name] = controller
360         self.mapper.connect(name, route, controller=name, **kwargs)
361    
362     def redirect(self, url):
363         raise cherrypy.HTTPRedirect(url)
364    
365     def __call__(self, path_info):
366         """Set handler and config for the current request."""
367         func = self.find_handler(path_info)
368         if func:
369             cherrypy.request.handler = LateParamPageHandler(func)
370         else:
371             cherrypy.request.handler = cherrypy.NotFound()
372    
373     def find_handler(self, path_info):
374         """Find the right page handler, and set request.config."""
375         import routes
376        
377         request = cherrypy.request
378        
379         config = routes.request_config()
380         config.mapper = self.mapper
381         if hasattr(cherrypy.request, 'wsgi_environ'):
382             config.environ = cherrypy.request.wsgi_environ
383         config.host = request.headers.get('Host', None)
384         config.protocol = request.scheme
385         config.redirect = self.redirect
386        
387         result = self.mapper.match(path_info)
388        
389         config.mapper_dict = result
390         params = {}
391         if result:
392             params = result.copy()
393         if not self.full_result:
394             params.pop('controller', None)
395             params.pop('action', None)
396         request.params.update(params)
397        
398         # Get config for the root object/path.
399         request.config = base = cherrypy.config.copy()
400         curpath = ""
401        
402         def merge(nodeconf):
403             if 'tools.staticdir.dir' in nodeconf:
404                 nodeconf['tools.staticdir.section'] = curpath or "/"
405             base.update(nodeconf)
406        
407         app = request.app
408         root = app.root
409         if hasattr(root, "_cp_config"):
410             merge(root._cp_config)
411         if "/" in app.config:
412             merge(app.config["/"])
413        
414         # Mix in values from app.config.
415         atoms = [x for x in path_info.split("/") if x]
416         if atoms:
417             last = atoms.pop()
418         else:
419             last = None
420         for atom in atoms:
421             curpath = "/".join((curpath, atom))
422             if curpath in app.config:
423                 merge(app.config[curpath])
424        
425         handler = None
426         if result:
427             controller = result.get('controller', None)
428             controller = self.controllers.get(controller)
429             if controller:
430                 # Get config from the controller.
431                 if hasattr(controller, "_cp_config"):
432                     merge(controller._cp_config)
433            
434             action = result.get('action', None)
435             if action is not None:
436                 handler = getattr(controller, action, None)
437                 # Get config from the handler
438                 if hasattr(handler, "_cp_config"):
439                     merge(handler._cp_config)
440                    
441         # Do the last path atom here so it can
442         # override the controller's _cp_config.
443         if last:
444             curpath = "/".join((curpath, last))
445             if curpath in app.config:
446                 merge(app.config[curpath])
447        
448         return handler
449
450
451 def XMLRPCDispatcher(next_dispatcher=Dispatcher()):
452     from cherrypy.lib import xmlrpc
453     def xmlrpc_dispatch(path_info):
454         path_info = xmlrpc.patched_path(path_info)
455         return next_dispatcher(path_info)
456     return xmlrpc_dispatch
457
458
459 def VirtualHost(next_dispatcher=Dispatcher(), use_x_forwarded_host=True, **domains):
460     """Select a different handler based on the Host header.
461     
462     This can be useful when running multiple sites within one CP server.
463     It allows several domains to point to different parts of a single
464     website structure. For example:
465     
466         http://www.domain.example  ->  root
467         http://www.domain2.example  ->  root/domain2/
468         http://www.domain2.example:443  ->  root/secure
469     
470     can be accomplished via the following config:
471     
472         [/]
473         request.dispatch = cherrypy.dispatch.VirtualHost(
474             **{'www.domain2.example': '/domain2',
475                'www.domain2.example:443': '/secure',
476               })
477     
478     next_dispatcher: the next dispatcher object in the dispatch chain.
479         The VirtualHost dispatcher adds a prefix to the URL and calls
480         another dispatcher. Defaults to cherrypy.dispatch.Dispatcher().
481     
482     use_x_forwarded_host: if True (the default), any "X-Forwarded-Host"
483         request header will be used instead of the "Host" header. This
484         is commonly added by HTTP servers (such as Apache) when proxying.
485     
486     **domains: a dict of {host header value: virtual prefix} pairs.
487         The incoming "Host" request header is looked up in this dict,
488         and, if a match is found, the corresponding "virtual prefix"
489         value will be prepended to the URL path before calling the
490         next dispatcher. Note that you often need separate entries
491         for "example.com" and "www.example.com". In addition, "Host"
492         headers may contain the port number.
493     """
494     from cherrypy.lib import http
495     def vhost_dispatch(path_info):
496         header = cherrypy.request.headers.get
497        
498         domain = header('Host', '')
499         if use_x_forwarded_host:
500             domain = header("X-Forwarded-Host", domain)
501        
502         prefix = domains.get(domain, "")
503         if prefix:
504             path_info = http.urljoin(prefix, path_info)
505        
506         result = next_dispatcher(path_info)
507        
508         # Touch up staticdir config. See http://www.cherrypy.org/ticket/614.
509         section = cherrypy.request.config.get('tools.staticdir.section')
510         if section:
511             section = section[len(prefix):]
512             cherrypy.request.config['tools.staticdir.section'] = section
513        
514         return result
515     return vhost_dispatch
516
Note: