diff --git a/dlls/urlmon/tests/uri.c b/dlls/urlmon/tests/uri.c index 65fd4a33a6a..5bf8ca28d49 100644 --- a/dlls/urlmon/tests/uri.c +++ b/dlls/urlmon/tests/uri.c @@ -2659,6 +2659,208 @@ static const uri_properties uri_tests[] = { {URL_SCHEME_WILDCARD,S_OK,FALSE}, {URLZONE_INVALID,E_NOTIMPL,FALSE} } + }, + /* Forbidden characters are encoded for opaque known scheme types. */ + { "mailto:\"acco<|>unt@example.com\"", 0, S_OK, FALSE, + Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY| + Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME, + TRUE, + { + {"mailto:%22acco%3C%7C%3Eunt@example.com%22",S_OK,TRUE}, + {"",S_FALSE,FALSE}, + {"mailto:%22acco%3C%7C%3Eunt@example.com%22",S_OK,TRUE}, + {"",S_FALSE,FALSE}, + {".com%22",S_OK,TRUE}, + {"",S_FALSE,TRUE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE}, + {"%22acco%3C%7C%3Eunt@example.com%22",S_OK,TRUE}, + {"%22acco%3C%7C%3Eunt@example.com%22",S_OK,TRUE}, + {"",S_FALSE,TRUE}, + {"mailto:\"acco<|>unt@example.com\"",S_OK,FALSE}, + {"mailto",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE} + }, + { + {Uri_HOST_UNKNOWN,S_OK,FALSE}, + {0,S_FALSE,FALSE}, + {URL_SCHEME_MAILTO,S_OK,FALSE}, + {URLZONE_INVALID,E_NOTIMPL,FALSE} + } + }, + { "news:test.tes<|>t.com", 0, S_OK, FALSE, + Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY| + Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME, + TRUE, + { + {"news:test.tes%3C%7C%3Et.com",S_OK,TRUE}, + {"",S_FALSE,FALSE}, + {"news:test.tes%3C%7C%3Et.com",S_OK,TRUE}, + {"",S_FALSE,FALSE}, + {".com",S_OK,TRUE}, + {"",S_FALSE,TRUE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE}, + {"test.tes%3C%7C%3Et.com",S_OK,TRUE}, + {"test.tes%3C%7C%3Et.com",S_OK,TRUE}, + {"",S_FALSE,TRUE}, + {"news:test.tes<|>t.com",S_OK,FALSE}, + {"news",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE} + }, + { + {Uri_HOST_UNKNOWN,S_OK,FALSE}, + {0,S_FALSE,FALSE}, + {URL_SCHEME_NEWS,S_OK,FALSE}, + {URLZONE_INVALID,E_NOTIMPL,FALSE} + } + }, + /* Don't encode forbidden characters. */ + { "news:test.tes<|>t.com", Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS, S_OK, FALSE, + Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY| + Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME, + TRUE, + { + {"news:test.tes<|>t.com",S_OK,TRUE}, + {"",S_FALSE,FALSE}, + {"news:test.tes<|>t.com",S_OK,TRUE}, + {"",S_FALSE,FALSE}, + {".com",S_OK,TRUE}, + {"",S_FALSE,TRUE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE}, + {"test.tes<|>t.com",S_OK,TRUE}, + {"test.tes<|>t.com",S_OK,TRUE}, + {"",S_FALSE,TRUE}, + {"news:test.tes<|>t.com",S_OK,FALSE}, + {"news",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE} + }, + { + {Uri_HOST_UNKNOWN,S_OK,FALSE}, + {0,S_FALSE,FALSE}, + {URL_SCHEME_NEWS,S_OK,FALSE}, + {URLZONE_INVALID,E_NOTIMPL,FALSE} + } + }, + /* Forbidden characters aren't encoded for unknown, opaque URIs. */ + { "urn:test.tes<|>t.com", 0, S_OK, FALSE, + Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY| + Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME, + TRUE, + { + {"urn:test.tes<|>t.com",S_OK,TRUE}, + {"",S_FALSE,FALSE}, + {"urn:test.tes<|>t.com",S_OK,TRUE}, + {"",S_FALSE,FALSE}, + {".com",S_OK,TRUE}, + {"",S_FALSE,TRUE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE}, + {"test.tes<|>t.com",S_OK,TRUE}, + {"test.tes<|>t.com",S_OK,TRUE}, + {"",S_FALSE,TRUE}, + {"urn:test.tes<|>t.com",S_OK,FALSE}, + {"urn",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE} + }, + { + {Uri_HOST_UNKNOWN,S_OK,FALSE}, + {0,S_FALSE,FALSE}, + {URL_SCHEME_UNKNOWN,S_OK,FALSE}, + {URLZONE_INVALID,E_NOTIMPL,FALSE} + } + }, + /* Percent encoded unreserved characters are decoded for known opaque URIs. */ + { "news:test.%74%65%73%74.com", 0, S_OK, FALSE, + Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY| + Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME, + TRUE, + { + {"news:test.test.com",S_OK,TRUE}, + {"",S_FALSE,FALSE}, + {"news:test.test.com",S_OK,TRUE}, + {"",S_FALSE,FALSE}, + {".com",S_OK,TRUE}, + {"",S_FALSE,TRUE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE}, + {"test.test.com",S_OK,TRUE}, + {"test.test.com",S_OK,TRUE}, + {"",S_FALSE,TRUE}, + {"news:test.%74%65%73%74.com",S_OK,FALSE}, + {"news",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE} + }, + { + {Uri_HOST_UNKNOWN,S_OK,FALSE}, + {0,S_FALSE,FALSE}, + {URL_SCHEME_NEWS,S_OK,FALSE}, + {URLZONE_INVALID,E_NOTIMPL,FALSE} + } + }, + /* Percent encoded characters are still decoded for known scheme types. */ + { "news:test.%74%65%73%74.com", Uri_CREATE_NO_CANONICALIZE, S_OK, FALSE, + Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY| + Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME, + TRUE, + { + {"news:test.test.com",S_OK,TRUE}, + {"",S_FALSE,FALSE}, + {"news:test.test.com",S_OK,TRUE}, + {"",S_FALSE,FALSE}, + {".com",S_OK,TRUE}, + {"",S_FALSE,TRUE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE}, + {"test.test.com",S_OK,TRUE}, + {"test.test.com",S_OK,TRUE}, + {"",S_FALSE,TRUE}, + {"news:test.%74%65%73%74.com",S_OK,FALSE}, + {"news",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE} + }, + { + {Uri_HOST_UNKNOWN,S_OK,FALSE}, + {0,S_FALSE,FALSE}, + {URL_SCHEME_NEWS,S_OK,FALSE}, + {URLZONE_INVALID,E_NOTIMPL,FALSE} + } + }, + /* Percent encoded characters aren't decoded for unknown scheme types. */ + { "urn:test.%74%65%73%74.com", 0, S_OK, FALSE, + Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY| + Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME, + TRUE, + { + {"urn:test.%74%65%73%74.com",S_OK,TRUE}, + {"",S_FALSE,FALSE}, + {"urn:test.%74%65%73%74.com",S_OK,TRUE}, + {"",S_FALSE,FALSE}, + {".com",S_OK,TRUE}, + {"",S_FALSE,TRUE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE}, + {"test.%74%65%73%74.com",S_OK,TRUE}, + {"test.%74%65%73%74.com",S_OK,TRUE}, + {"",S_FALSE,TRUE}, + {"urn:test.%74%65%73%74.com",S_OK,FALSE}, + {"urn",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE} + }, + { + {Uri_HOST_UNKNOWN,S_OK,FALSE}, + {0,S_FALSE,FALSE}, + {URL_SCHEME_UNKNOWN,S_OK,FALSE}, + {URLZONE_INVALID,E_NOTIMPL,FALSE} + } } }; diff --git a/dlls/urlmon/uri.c b/dlls/urlmon/uri.c index 3e4e6fead20..f5ca037c450 100644 --- a/dlls/urlmon/uri.c +++ b/dlls/urlmon/uri.c @@ -1675,6 +1675,8 @@ static BOOL parse_path_opaque(const WCHAR **ptr, parse_data *data, DWORD flags) * (per MSDN documentation). */ static BOOL parse_hierpart(const WCHAR **ptr, parse_data *data, DWORD flags) { + const WCHAR *start = *ptr; + /* Checks if the authority information needs to be parsed. * * Relative URI's aren't hierarchical URI's, but, they could trick @@ -1703,7 +1705,11 @@ static BOOL parse_hierpart(const WCHAR **ptr, parse_data *data, DWORD flags) { return FALSE; return parse_path_hierarchical(ptr, data, flags); - } + } else + /* Reset ptr to it's starting position so opaque path parsing + * begins at the correct location. + */ + *ptr = start; } /* If it reaches here, then the URI will be treated as an opaque @@ -2456,6 +2462,78 @@ static BOOL canonicalize_path_hierarchical(const parse_data *data, Uri *uri, return TRUE; } +/* Attempts to canonicalize the path for an opaque URI. + * + * For known scheme types: + * 1) forbidden characters are percent encoded if + * NO_ENCODE_FORBIDDEN_CHARACTERS isn't set. + * + * 2) Percent encoded, unreserved characters are decoded + * to their actual values, for known scheme types. + * + * 3) '\\' are changed to '/' for known scheme types + * except for mailto schemes. + */ +static BOOL canonicalize_path_opaque(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { + const WCHAR *ptr; + const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; + + if(!data->path) { + uri->path_start = -1; + uri->path_len = 0; + return TRUE; + } + + uri->path_start = uri->canon_len; + + /* Windows doesn't allow a "//" to appear after the scheme + * of a URI, if it's an opaque URI. + */ + if(data->scheme && *(data->path) == '/' && *(data->path+1) == '/') { + /* So it inserts a "/." before the "//" if it exists. */ + if(!computeOnly) { + uri->canon_uri[uri->canon_len] = '/'; + uri->canon_uri[uri->canon_len+1] = '.'; + } + + uri->canon_len += 2; + } + + for(ptr = data->path; ptr < data->path+data->path_len; ++ptr) { + if(*ptr == '%' && known_scheme) { + WCHAR val = decode_pct_val(ptr); + + if(is_unreserved(val)) { + if(!computeOnly) + uri->canon_uri[uri->canon_len] = val; + ++uri->canon_len; + + ptr += 2; + continue; + } else { + if(!computeOnly) + uri->canon_uri[uri->canon_len] = *ptr; + ++uri->canon_len; + } + } else if(known_scheme && !is_unreserved(*ptr) && !is_reserved(*ptr) && + !(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS)) { + if(!computeOnly) + pct_encode_val(*ptr, uri->canon_uri+uri->canon_len); + uri->canon_len += 3; + } else { + if(!computeOnly) + uri->canon_uri[uri->canon_len] = *ptr; + ++uri->canon_len; + } + } + + uri->path_len = uri->canon_len - uri->path_start; + + TRACE("(%p %p %x %d): Canonicalized opaque URI path %s len=%d\n", data, uri, flags, computeOnly, + debugstr_wn(uri->canon_uri+uri->path_start, uri->path_len), uri->path_len); + return TRUE; +} + /* Determines how the URI represented by the parse_data should be canonicalized. * * Essentially, if the parse_data represents an hierarchical URI then it calls @@ -2493,6 +2571,9 @@ static BOOL canonicalize_hierpart(const parse_data *data, Uri *uri, DWORD flags, uri->authority_start = -1; uri->authority_len = 0; uri->domain_offset = -1; + + if(!canonicalize_path_opaque(data, uri, flags, computeOnly)) + return FALSE; } return TRUE;