Download Install Tutorial Docs FAQ Tools WikiLicense Team IRC Planet Involvement Shop Book

Changeset 1977

Show
Ignore:
Timestamp:
06/07/08 14:08:20
Author:
fumanchu
Message:

Most of the fix for #815 (URL contains multiple slashes).

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • branches/815-urljoin/cherrypy/lib/http.py

    r1970 r1977  
    3131    original URL, even if either atom is blank. 
    3232    """ 
    33     url = "/".join([x for x in atoms if x]) 
    34     while "//" in url: 
    35         url = url.replace("//", "/") 
     33    if atoms: 
     34        url = "\x00".join(atoms) 
     35        url = url.replace("/\x00/", "/") 
     36        url = url.replace("\x00/", "/") 
     37        url = url.replace("/\x00", "/") 
     38    else: 
     39        url = "" 
    3640    # Special-case the final url of "", and return "/" instead. 
    3741    return url or "/" 
  • branches/815-urljoin/cherrypy/test/test_objectmapping.py

    r1767 r1977  
    239239            self.getPage("/confvalue") 
    240240            self.assertBody((url or "/").split("/")[-2]) 
     241##             
     242##            # Test extra leading slash. 
     243##            self.getPage("//a/b/c") 
     244##            self.assertStatus(200) 
     245##            self.assertBody("default:('', 'a', 'b', 'c')") 
    241246         
    242247        self.script_name = "" 
  • branches/815-urljoin/cherrypy/wsgiserver/__init__.py

    r1971 r1977  
    9494import traceback 
    9595from urllib import unquote 
    96 from urlparse import urlparse 
    9796import warnings 
    9897 
     
    242241        self._check_length() 
    243242        return data 
     243 
     244 
     245MAX_CACHE_SIZE = 0 
     246_uri_parse_cache = {} 
     247def requesturi_split(uri): 
     248    """Parse uri into (scheme, authority, path, query). 
     249     
     250    <scheme>://<authority>/<path>?<query>#<fragment> 
     251    """ 
     252    # Relevant BNF: 
     253    #   Request-URI    = "*" | absoluteURI | abs_path | authority 
     254    #   absoluteURI   = scheme ":" ( hier_part | opaque_part ) 
     255    #   hier_part     = ( net_path | abs_path ) [ "?" query ] 
     256    #   net_path      = "//" authority [ abs_path ] 
     257    #   abs_path      = "/"  path_segments 
     258    #   path_segments = segment *( "/" segment ) 
     259    #   segment       = *pchar *( ";" param ) 
     260     
     261    # What to do with "//path"? It cannot be an absoluteURI since it 
     262    # does not start with a "scheme:" component; it cannot be 'authority' 
     263    # since it includes the reserved '/' character; therefore, it must 
     264    # be an abs_path. Note especially that this has NOTHING to do with 
     265    # the "Relative URI References" section of RFC 2396: 
     266    #   relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ] 
     267    # That is, "//path" cannot be interpreted as a net_path: 
     268    #   net_path      = "//" authority [ abs_path ] 
     269    # Unfortunately, urlparse.urlsplit gets this wrong even when we 
     270    # explicitly tell it we're passing an HTTP URI. So we use our own. 
     271     
     272    # Neither the scheme, the authority, nor the path components 
     273    # may contain "?", a reserved character. 
     274    # According to RFC 2396, absoluteURI's might treat everything after the 
     275    # "scheme:" as an opaque_part (= *uric, including "?"), but we read 
     276    # RFC 2616 3.2.2 as implying HTTP URI's are always hierarchical. 
     277    # Therefore, rather than trying to cache all possible URI's, we cache 
     278    # only the part before the first "?". 
     279    query = '' 
     280    if '?' in uri: 
     281        uri, query = uri.split('?', 1) 
     282     
     283    key = uri 
     284    cached = _uri_parse_cache.get(key, None) 
     285    if cached: 
     286        return cached + (query,) 
     287     
     288    # uri may be an absoluteURI (including "http://host.domain.tld") 
     289    scheme = authority = '' 
     290    i = uri.find(':') 
     291    if i > 0: 
     292        scheme, uri = uri[:i].lower(), uri[i+1:] 
     293        # Note the 'authority' component is only allowed when the URI 
     294        # begins with an explicit "scheme:" 
     295        if uri[:2] == '//': 
     296            for c in '/?#': # the order is important! 
     297                i = uri.find(c, 2) 
     298                if i >= 0: 
     299                    authority, uri = uri[2:i], uri[i:] 
     300                    break 
     301            else: 
     302                authority, uri = uri[2:], '' 
     303     
     304    if '#' in uri: 
     305        raise ValueError("Illegal #fragment in Request-URI.") 
     306     
     307    # avoid runaway growth 
     308    if MAX_CACHE_SIZE and len(_uri_parse_cache) >= MAX_CACHE_SIZE: 
     309        try: 
     310            _uri_parse_cache.popitem() 
     311        except KeyError: 
     312            pass 
     313    _uri_parse_cache[key] = v = (scheme, authority, uri) 
     314     
     315    return v + (query,) 
    244316 
    245317 
     
    333405        environ = self.environ 
    334406         
    335         method, path, req_protocol = request_line.strip().split(" ", 2) 
     407        method, uri, req_protocol = request_line.strip().split(" ", 2) 
    336408        environ["REQUEST_METHOD"] = method 
    337409         
    338         # path may be an abs_path (including "http://host.domain.tld"); 
    339         scheme, location, path, params, qs, frag = urlparse(path) 
    340          
    341         if frag: 
    342             self.simple_response("400 Bad Request", 
    343                                  "Illegal #fragment in Request-URI.") 
     410        try: 
     411            scheme, location, path, qs = requesturi_split(uri) 
     412        except ValueError, e: 
     413            self.simple_response("400 Bad Request", e.args[0]) 
    344414            return 
    345415         
    346416        if scheme: 
    347417            environ["wsgi.url_scheme"] = scheme 
    348         if params: 
    349             path = path + ";" + params 
    350418         
    351419        environ["SCRIPT_NAME"] = "" 

Hosted by WebFaction

Log in as guest/cpguest to create tickets