urlmon: Implemented canonicalization function for hierarchical URI paths.
This commit is contained in:
parent
93d79ee18e
commit
300d0e1ad4
|
@ -2273,6 +2273,187 @@ static const uri_properties uri_tests[] = {
|
|||
{URL_SCHEME_UNKNOWN,S_OK,FALSE},
|
||||
{URLZONE_INVALID,E_NOTIMPL,FALSE}
|
||||
}
|
||||
},
|
||||
/* Since the original URI doesn't contain an extra '/' before the path no % encoded values
|
||||
* are decoded and all '%' are encoded.
|
||||
*/
|
||||
{ "file://C:/te%3Es%2Et/tes%t.mp3", 0, S_OK, FALSE,
|
||||
Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|
|
||||
Uri_HAS_PATH_AND_QUERY|Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
|
||||
TRUE,
|
||||
{
|
||||
{"file:///C:/te%253Es%252Et/tes%25t.mp3",S_OK,TRUE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"file:///C:/te%253Es%252Et/tes%25t.mp3",S_OK,TRUE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{".mp3",S_OK,TRUE},
|
||||
{"",S_FALSE,TRUE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"/C:/te%253Es%252Et/tes%25t.mp3",S_OK,TRUE},
|
||||
{"/C:/te%253Es%252Et/tes%25t.mp3",S_OK,TRUE},
|
||||
{"",S_FALSE,TRUE},
|
||||
{"file://C:/te%3Es%2Et/tes%t.mp3",S_OK,FALSE},
|
||||
{"file",S_OK,FALSE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"",S_FALSE,FALSE}
|
||||
},
|
||||
{
|
||||
{Uri_HOST_UNKNOWN,S_OK,FALSE},
|
||||
{0,S_FALSE,FALSE},
|
||||
{URL_SCHEME_FILE,S_OK,FALSE},
|
||||
{URLZONE_INVALID,E_NOTIMPL,FALSE}
|
||||
}
|
||||
},
|
||||
/* Since there's a '/' in front of the drive letter, any percent encoded, non-forbidden character
|
||||
* is decoded and only %'s in front of invalid hex digits are encoded.
|
||||
*/
|
||||
{ "file:///C:/te%3Es%2Et/t%23es%t.mp3", 0, S_OK, FALSE,
|
||||
Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|
|
||||
Uri_HAS_PATH_AND_QUERY|Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
|
||||
TRUE,
|
||||
{
|
||||
{"file:///C:/te%3Es.t/t#es%25t.mp3",S_OK,TRUE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"file:///C:/te%3Es.t/t#es%25t.mp3",S_OK,TRUE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{".mp3",S_OK,TRUE},
|
||||
{"",S_FALSE,TRUE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"/C:/te%3Es.t/t#es%25t.mp3",S_OK,TRUE},
|
||||
{"/C:/te%3Es.t/t#es%25t.mp3",S_OK,TRUE},
|
||||
{"",S_FALSE,TRUE},
|
||||
{"file:///C:/te%3Es%2Et/t%23es%t.mp3",S_OK,FALSE},
|
||||
{"file",S_OK,FALSE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"",S_FALSE,FALSE}
|
||||
},
|
||||
{
|
||||
{Uri_HOST_UNKNOWN,S_OK,FALSE},
|
||||
{0,S_FALSE,FALSE},
|
||||
{URL_SCHEME_FILE,S_OK,FALSE},
|
||||
{URLZONE_INVALID,E_NOTIMPL,FALSE}
|
||||
}
|
||||
},
|
||||
/* Only unreserved percent encoded characters are decoded for known schemes that aren't file. */
|
||||
{ "http://[::001.002.003.000]/%3F%23%2E%54/test", 0, S_OK, FALSE,
|
||||
Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_HOST|
|
||||
Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|
|
||||
Uri_HAS_HOST_TYPE|Uri_HAS_PORT|Uri_HAS_SCHEME,
|
||||
TRUE,
|
||||
{
|
||||
{"http://[::1.2.3.0]/%3F%23.T/test",S_OK,TRUE},
|
||||
{"[::1.2.3.0]",S_OK,FALSE},
|
||||
{"http://[::1.2.3.0]/%3F%23.T/test",S_OK,TRUE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"",S_FALSE,TRUE},
|
||||
{"",S_FALSE,TRUE},
|
||||
{"::1.2.3.0",S_OK,FALSE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"/%3F%23.T/test",S_OK,TRUE},
|
||||
{"/%3F%23.T/test",S_OK,TRUE},
|
||||
{"",S_FALSE,TRUE},
|
||||
{"http://[::001.002.003.000]/%3F%23%2E%54/test",S_OK,FALSE},
|
||||
{"http",S_OK,FALSE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"",S_FALSE,FALSE},
|
||||
},
|
||||
{
|
||||
{Uri_HOST_IPV6,S_OK,FALSE},
|
||||
{80,S_OK,FALSE},
|
||||
{URL_SCHEME_HTTP,S_OK,FALSE},
|
||||
{URLZONE_INVALID,E_NOTIMPL,FALSE}
|
||||
}
|
||||
},
|
||||
/* Forbidden characters are always encoded for file URIs. */
|
||||
{ "file:///C:/\"test\"/test.mp3", Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS, S_OK, FALSE,
|
||||
Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|
|
||||
Uri_HAS_PATH_AND_QUERY|Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
|
||||
TRUE,
|
||||
{
|
||||
{"file:///C:/%22test%22/test.mp3",S_OK,TRUE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"file:///C:/%22test%22/test.mp3",S_OK,TRUE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{".mp3",S_OK,TRUE},
|
||||
{"",S_FALSE,TRUE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"/C:/%22test%22/test.mp3",S_OK,TRUE},
|
||||
{"/C:/%22test%22/test.mp3",S_OK,TRUE},
|
||||
{"",S_FALSE,TRUE},
|
||||
{"file:///C:/\"test\"/test.mp3",S_OK,FALSE},
|
||||
{"file",S_OK,FALSE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"",S_FALSE,FALSE}
|
||||
},
|
||||
{
|
||||
{Uri_HOST_UNKNOWN,S_OK,FALSE},
|
||||
{0,S_FALSE,FALSE},
|
||||
{URL_SCHEME_FILE,S_OK,FALSE},
|
||||
{URLZONE_INVALID,E_NOTIMPL,FALSE}
|
||||
}
|
||||
},
|
||||
/* Forbidden characters are never encoded for unknown scheme types. */
|
||||
{ "1234://4294967295/<|>\" test<|>", 0, S_OK, FALSE,
|
||||
Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_HOST|
|
||||
Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|
|
||||
Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
|
||||
TRUE,
|
||||
{
|
||||
{"1234://4294967295/<|>\" test<|>",S_OK,TRUE},
|
||||
{"4294967295",S_OK,FALSE},
|
||||
{"1234://4294967295/<|>\" test<|>",S_OK,TRUE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"",S_FALSE,TRUE},
|
||||
{"",S_FALSE,TRUE},
|
||||
{"4294967295",S_OK,FALSE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"/<|>\" test<|>",S_OK,TRUE},
|
||||
{"/<|>\" test<|>",S_OK,TRUE},
|
||||
{"",S_FALSE,TRUE},
|
||||
{"1234://4294967295/<|>\" test<|>",S_OK,FALSE},
|
||||
{"1234",S_OK,FALSE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"",S_FALSE,FALSE}
|
||||
},
|
||||
{
|
||||
{Uri_HOST_IPV4,S_OK,FALSE},
|
||||
{0,S_FALSE,FALSE},
|
||||
{URL_SCHEME_UNKNOWN,S_OK,FALSE},
|
||||
{URLZONE_INVALID,E_NOTIMPL,FALSE}
|
||||
}
|
||||
},
|
||||
/* Make sure forbidden characters are percent encoded. */
|
||||
{ "http://gov.uk/<|> test<|>", 0, S_OK, FALSE,
|
||||
Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_HOST|
|
||||
Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|
|
||||
Uri_HAS_HOST_TYPE|Uri_HAS_PORT|Uri_HAS_SCHEME,
|
||||
TRUE,
|
||||
{
|
||||
{"http://gov.uk/%3C%7C%3E%20test%3C%7C%3E",S_OK,TRUE},
|
||||
{"gov.uk",S_OK,FALSE},
|
||||
{"http://gov.uk/%3C%7C%3E%20test%3C%7C%3E",S_OK,TRUE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"",S_FALSE,TRUE},
|
||||
{"",S_FALSE,TRUE},
|
||||
{"gov.uk",S_OK,FALSE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"/%3C%7C%3E%20test%3C%7C%3E",S_OK,TRUE},
|
||||
{"/%3C%7C%3E%20test%3C%7C%3E",S_OK,TRUE},
|
||||
{"",S_FALSE,TRUE},
|
||||
{"http://gov.uk/<|> test<|>",S_OK,FALSE},
|
||||
{"http",S_OK,FALSE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"",S_FALSE,FALSE}
|
||||
},
|
||||
{
|
||||
{Uri_HOST_DNS,S_OK,FALSE},
|
||||
{80,S_OK,FALSE},
|
||||
{URL_SCHEME_HTTP,S_OK,FALSE},
|
||||
{URLZONE_INVALID,E_NOTIMPL,FALSE}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -58,6 +58,9 @@ typedef struct {
|
|||
DWORD authority_len;
|
||||
|
||||
INT domain_offset;
|
||||
|
||||
INT path_start;
|
||||
DWORD path_len;
|
||||
} Uri;
|
||||
|
||||
typedef struct {
|
||||
|
@ -2266,6 +2269,115 @@ static BOOL canonicalize_authority(const parse_data *data, Uri *uri, DWORD flags
|
|||
return TRUE;
|
||||
}
|
||||
|
||||
/* Attempts to canonicalize the path of a hierarchical URI.
|
||||
*
|
||||
* Things that happen:
|
||||
* 1). Forbidden characters are percent encoded, unless the NO_ENCODE_FORBIDDEN
|
||||
* flag is set or it's a file URI. Forbidden characters are always encoded
|
||||
* for file schemes reguardless and forbidden characters are never encoded
|
||||
* for unknown scheme types.
|
||||
*
|
||||
* 2). For known scheme types '\\' are changed to '/'.
|
||||
*
|
||||
* 3). Percent encoded, unreserved characters are decoded to their actual values.
|
||||
* Unless the scheme type is unknown. For file schemes any percent encoded
|
||||
* character in the unreserved or reserved set is decoded.
|
||||
*
|
||||
* 4). For File schemes if the path is starts with a drive letter and doesn't
|
||||
* start with a '/' then one is appended.
|
||||
* Ex: file://c:/test.mp3 -> file:///c:/test.mp3
|
||||
*
|
||||
* 5). Dot segments are removed from the path for all scheme types
|
||||
* unless NO_CANONICALIZE flag is set. Dot segments aren't removed
|
||||
* for wildcard scheme types.
|
||||
*
|
||||
* NOTES:
|
||||
* file://c:/test%20test -> file:///c:/test%2520test
|
||||
* file://c:/test%3Etest -> file:///c:/test%253Etest
|
||||
* file:///c:/test%20test -> file:///c:/test%20test
|
||||
* file:///c:/test%test -> file:///c:/test%25test
|
||||
*/
|
||||
static BOOL canonicalize_path_hierarchical(const parse_data *data, Uri *uri,
|
||||
DWORD flags, BOOL computeOnly) {
|
||||
const WCHAR *ptr;
|
||||
const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
|
||||
const BOOL is_file = data->scheme_type == URL_SCHEME_FILE;
|
||||
|
||||
BOOL escape_pct = FALSE;
|
||||
|
||||
if(!data->path) {
|
||||
uri->path_start = -1;
|
||||
uri->path_len = 0;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
uri->path_start = uri->canon_len;
|
||||
|
||||
/* Check if a '/' needs to be appended for the file scheme. */
|
||||
if(is_file) {
|
||||
if(data->path_len > 1 && is_alpha(*(data->path)) &&
|
||||
*(data->path+1) == ':') {
|
||||
if(!computeOnly)
|
||||
uri->canon_uri[uri->canon_len] = '/';
|
||||
uri->canon_len++;
|
||||
escape_pct = TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
for(ptr = data->path; ptr < data->path+data->path_len; ++ptr) {
|
||||
if(*ptr == '%') {
|
||||
const WCHAR *tmp = ptr;
|
||||
WCHAR val;
|
||||
|
||||
/* Check if the % represents a valid encoded char, or if it needs encoded. */
|
||||
BOOL force_encode = !check_pct_encoded(&tmp) && is_file;
|
||||
val = decode_pct_val(ptr);
|
||||
|
||||
if(force_encode || escape_pct) {
|
||||
/* Escape the percent sign in the file URI. */
|
||||
if(!computeOnly)
|
||||
pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
|
||||
uri->canon_len += 3;
|
||||
} else if((is_unreserved(val) && known_scheme) ||
|
||||
(is_file && (is_unreserved(val) || is_reserved(val)))) {
|
||||
if(!computeOnly)
|
||||
uri->canon_uri[uri->canon_len] = val;
|
||||
++uri->canon_len;
|
||||
|
||||
ptr += 2;
|
||||
continue;
|
||||
} else {
|
||||
if(!computeOnly)
|
||||
uri->canon_uri[uri->canon_len] = *ptr;
|
||||
++uri->canon_len;
|
||||
}
|
||||
} else if(*ptr == '\\' && known_scheme) {
|
||||
if(!computeOnly)
|
||||
uri->canon_uri[uri->canon_len] = '/';
|
||||
++uri->canon_len;
|
||||
} else if(known_scheme && !is_unreserved(*ptr) && !is_reserved(*ptr) &&
|
||||
(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) || is_file)) {
|
||||
/* Escape the forbidden character. */
|
||||
if(!computeOnly)
|
||||
pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
|
||||
uri->canon_len += 3;
|
||||
} else {
|
||||
if(!computeOnly)
|
||||
uri->canon_uri[uri->canon_len] = *ptr;
|
||||
++uri->canon_len;
|
||||
}
|
||||
}
|
||||
|
||||
uri->path_len = uri->canon_len - uri->path_start;
|
||||
|
||||
if(!computeOnly)
|
||||
TRACE("Canonicalized path %s len=%d\n",
|
||||
debugstr_wn(uri->canon_uri+uri->path_start, uri->path_len),
|
||||
uri->path_len);
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* Determines how the URI represented by the parse_data should be canonicalized.
|
||||
*
|
||||
* Essentially, if the parse_data represents an hierarchical URI then it calls
|
||||
|
@ -2288,7 +2400,9 @@ static BOOL canonicalize_hierpart(const parse_data *data, Uri *uri, DWORD flags,
|
|||
if(!canonicalize_authority(data, uri, flags, computeOnly))
|
||||
return FALSE;
|
||||
|
||||
/* TODO: Canonicalize the path of the URI. */
|
||||
/* TODO: Canonicalize the path of the URI. */
|
||||
if(!canonicalize_path_hierarchical(data, uri, flags, computeOnly))
|
||||
return FALSE;
|
||||
|
||||
} else {
|
||||
/* Opaque URI's don't have an authority. */
|
||||
|
|
Loading…
Reference in New Issue