urlmon: Implemented canonicalization function for hierarchical URI paths.
This commit is contained in:
parent
93d79ee18e
commit
300d0e1ad4
|
@ -2273,6 +2273,187 @@ static const uri_properties uri_tests[] = {
|
||||||
{URL_SCHEME_UNKNOWN,S_OK,FALSE},
|
{URL_SCHEME_UNKNOWN,S_OK,FALSE},
|
||||||
{URLZONE_INVALID,E_NOTIMPL,FALSE}
|
{URLZONE_INVALID,E_NOTIMPL,FALSE}
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
/* Since the original URI doesn't contain an extra '/' before the path no % encoded values
|
||||||
|
* are decoded and all '%' are encoded.
|
||||||
|
*/
|
||||||
|
{ "file://C:/te%3Es%2Et/tes%t.mp3", 0, S_OK, FALSE,
|
||||||
|
Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|
|
||||||
|
Uri_HAS_PATH_AND_QUERY|Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
|
||||||
|
TRUE,
|
||||||
|
{
|
||||||
|
{"file:///C:/te%253Es%252Et/tes%25t.mp3",S_OK,TRUE},
|
||||||
|
{"",S_FALSE,FALSE},
|
||||||
|
{"file:///C:/te%253Es%252Et/tes%25t.mp3",S_OK,TRUE},
|
||||||
|
{"",S_FALSE,FALSE},
|
||||||
|
{".mp3",S_OK,TRUE},
|
||||||
|
{"",S_FALSE,TRUE},
|
||||||
|
{"",S_FALSE,FALSE},
|
||||||
|
{"",S_FALSE,FALSE},
|
||||||
|
{"/C:/te%253Es%252Et/tes%25t.mp3",S_OK,TRUE},
|
||||||
|
{"/C:/te%253Es%252Et/tes%25t.mp3",S_OK,TRUE},
|
||||||
|
{"",S_FALSE,TRUE},
|
||||||
|
{"file://C:/te%3Es%2Et/tes%t.mp3",S_OK,FALSE},
|
||||||
|
{"file",S_OK,FALSE},
|
||||||
|
{"",S_FALSE,FALSE},
|
||||||
|
{"",S_FALSE,FALSE}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
{Uri_HOST_UNKNOWN,S_OK,FALSE},
|
||||||
|
{0,S_FALSE,FALSE},
|
||||||
|
{URL_SCHEME_FILE,S_OK,FALSE},
|
||||||
|
{URLZONE_INVALID,E_NOTIMPL,FALSE}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
/* Since there's a '/' in front of the drive letter, any percent encoded, non-forbidden character
|
||||||
|
* is decoded and only %'s in front of invalid hex digits are encoded.
|
||||||
|
*/
|
||||||
|
{ "file:///C:/te%3Es%2Et/t%23es%t.mp3", 0, S_OK, FALSE,
|
||||||
|
Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|
|
||||||
|
Uri_HAS_PATH_AND_QUERY|Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
|
||||||
|
TRUE,
|
||||||
|
{
|
||||||
|
{"file:///C:/te%3Es.t/t#es%25t.mp3",S_OK,TRUE},
|
||||||
|
{"",S_FALSE,FALSE},
|
||||||
|
{"file:///C:/te%3Es.t/t#es%25t.mp3",S_OK,TRUE},
|
||||||
|
{"",S_FALSE,FALSE},
|
||||||
|
{".mp3",S_OK,TRUE},
|
||||||
|
{"",S_FALSE,TRUE},
|
||||||
|
{"",S_FALSE,FALSE},
|
||||||
|
{"",S_FALSE,FALSE},
|
||||||
|
{"/C:/te%3Es.t/t#es%25t.mp3",S_OK,TRUE},
|
||||||
|
{"/C:/te%3Es.t/t#es%25t.mp3",S_OK,TRUE},
|
||||||
|
{"",S_FALSE,TRUE},
|
||||||
|
{"file:///C:/te%3Es%2Et/t%23es%t.mp3",S_OK,FALSE},
|
||||||
|
{"file",S_OK,FALSE},
|
||||||
|
{"",S_FALSE,FALSE},
|
||||||
|
{"",S_FALSE,FALSE}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
{Uri_HOST_UNKNOWN,S_OK,FALSE},
|
||||||
|
{0,S_FALSE,FALSE},
|
||||||
|
{URL_SCHEME_FILE,S_OK,FALSE},
|
||||||
|
{URLZONE_INVALID,E_NOTIMPL,FALSE}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
/* Only unreserved percent encoded characters are decoded for known schemes that aren't file. */
|
||||||
|
{ "http://[::001.002.003.000]/%3F%23%2E%54/test", 0, S_OK, FALSE,
|
||||||
|
Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_HOST|
|
||||||
|
Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|
|
||||||
|
Uri_HAS_HOST_TYPE|Uri_HAS_PORT|Uri_HAS_SCHEME,
|
||||||
|
TRUE,
|
||||||
|
{
|
||||||
|
{"http://[::1.2.3.0]/%3F%23.T/test",S_OK,TRUE},
|
||||||
|
{"[::1.2.3.0]",S_OK,FALSE},
|
||||||
|
{"http://[::1.2.3.0]/%3F%23.T/test",S_OK,TRUE},
|
||||||
|
{"",S_FALSE,FALSE},
|
||||||
|
{"",S_FALSE,TRUE},
|
||||||
|
{"",S_FALSE,TRUE},
|
||||||
|
{"::1.2.3.0",S_OK,FALSE},
|
||||||
|
{"",S_FALSE,FALSE},
|
||||||
|
{"/%3F%23.T/test",S_OK,TRUE},
|
||||||
|
{"/%3F%23.T/test",S_OK,TRUE},
|
||||||
|
{"",S_FALSE,TRUE},
|
||||||
|
{"http://[::001.002.003.000]/%3F%23%2E%54/test",S_OK,FALSE},
|
||||||
|
{"http",S_OK,FALSE},
|
||||||
|
{"",S_FALSE,FALSE},
|
||||||
|
{"",S_FALSE,FALSE},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
{Uri_HOST_IPV6,S_OK,FALSE},
|
||||||
|
{80,S_OK,FALSE},
|
||||||
|
{URL_SCHEME_HTTP,S_OK,FALSE},
|
||||||
|
{URLZONE_INVALID,E_NOTIMPL,FALSE}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
/* Forbidden characters are always encoded for file URIs. */
|
||||||
|
{ "file:///C:/\"test\"/test.mp3", Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS, S_OK, FALSE,
|
||||||
|
Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|
|
||||||
|
Uri_HAS_PATH_AND_QUERY|Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
|
||||||
|
TRUE,
|
||||||
|
{
|
||||||
|
{"file:///C:/%22test%22/test.mp3",S_OK,TRUE},
|
||||||
|
{"",S_FALSE,FALSE},
|
||||||
|
{"file:///C:/%22test%22/test.mp3",S_OK,TRUE},
|
||||||
|
{"",S_FALSE,FALSE},
|
||||||
|
{".mp3",S_OK,TRUE},
|
||||||
|
{"",S_FALSE,TRUE},
|
||||||
|
{"",S_FALSE,FALSE},
|
||||||
|
{"",S_FALSE,FALSE},
|
||||||
|
{"/C:/%22test%22/test.mp3",S_OK,TRUE},
|
||||||
|
{"/C:/%22test%22/test.mp3",S_OK,TRUE},
|
||||||
|
{"",S_FALSE,TRUE},
|
||||||
|
{"file:///C:/\"test\"/test.mp3",S_OK,FALSE},
|
||||||
|
{"file",S_OK,FALSE},
|
||||||
|
{"",S_FALSE,FALSE},
|
||||||
|
{"",S_FALSE,FALSE}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
{Uri_HOST_UNKNOWN,S_OK,FALSE},
|
||||||
|
{0,S_FALSE,FALSE},
|
||||||
|
{URL_SCHEME_FILE,S_OK,FALSE},
|
||||||
|
{URLZONE_INVALID,E_NOTIMPL,FALSE}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
/* Forbidden characters are never encoded for unknown scheme types. */
|
||||||
|
{ "1234://4294967295/<|>\" test<|>", 0, S_OK, FALSE,
|
||||||
|
Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_HOST|
|
||||||
|
Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|
|
||||||
|
Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
|
||||||
|
TRUE,
|
||||||
|
{
|
||||||
|
{"1234://4294967295/<|>\" test<|>",S_OK,TRUE},
|
||||||
|
{"4294967295",S_OK,FALSE},
|
||||||
|
{"1234://4294967295/<|>\" test<|>",S_OK,TRUE},
|
||||||
|
{"",S_FALSE,FALSE},
|
||||||
|
{"",S_FALSE,TRUE},
|
||||||
|
{"",S_FALSE,TRUE},
|
||||||
|
{"4294967295",S_OK,FALSE},
|
||||||
|
{"",S_FALSE,FALSE},
|
||||||
|
{"/<|>\" test<|>",S_OK,TRUE},
|
||||||
|
{"/<|>\" test<|>",S_OK,TRUE},
|
||||||
|
{"",S_FALSE,TRUE},
|
||||||
|
{"1234://4294967295/<|>\" test<|>",S_OK,FALSE},
|
||||||
|
{"1234",S_OK,FALSE},
|
||||||
|
{"",S_FALSE,FALSE},
|
||||||
|
{"",S_FALSE,FALSE}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
{Uri_HOST_IPV4,S_OK,FALSE},
|
||||||
|
{0,S_FALSE,FALSE},
|
||||||
|
{URL_SCHEME_UNKNOWN,S_OK,FALSE},
|
||||||
|
{URLZONE_INVALID,E_NOTIMPL,FALSE}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
/* Make sure forbidden characters are percent encoded. */
|
||||||
|
{ "http://gov.uk/<|> test<|>", 0, S_OK, FALSE,
|
||||||
|
Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_HOST|
|
||||||
|
Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|
|
||||||
|
Uri_HAS_HOST_TYPE|Uri_HAS_PORT|Uri_HAS_SCHEME,
|
||||||
|
TRUE,
|
||||||
|
{
|
||||||
|
{"http://gov.uk/%3C%7C%3E%20test%3C%7C%3E",S_OK,TRUE},
|
||||||
|
{"gov.uk",S_OK,FALSE},
|
||||||
|
{"http://gov.uk/%3C%7C%3E%20test%3C%7C%3E",S_OK,TRUE},
|
||||||
|
{"",S_FALSE,FALSE},
|
||||||
|
{"",S_FALSE,TRUE},
|
||||||
|
{"",S_FALSE,TRUE},
|
||||||
|
{"gov.uk",S_OK,FALSE},
|
||||||
|
{"",S_FALSE,FALSE},
|
||||||
|
{"/%3C%7C%3E%20test%3C%7C%3E",S_OK,TRUE},
|
||||||
|
{"/%3C%7C%3E%20test%3C%7C%3E",S_OK,TRUE},
|
||||||
|
{"",S_FALSE,TRUE},
|
||||||
|
{"http://gov.uk/<|> test<|>",S_OK,FALSE},
|
||||||
|
{"http",S_OK,FALSE},
|
||||||
|
{"",S_FALSE,FALSE},
|
||||||
|
{"",S_FALSE,FALSE}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
{Uri_HOST_DNS,S_OK,FALSE},
|
||||||
|
{80,S_OK,FALSE},
|
||||||
|
{URL_SCHEME_HTTP,S_OK,FALSE},
|
||||||
|
{URLZONE_INVALID,E_NOTIMPL,FALSE}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -58,6 +58,9 @@ typedef struct {
|
||||||
DWORD authority_len;
|
DWORD authority_len;
|
||||||
|
|
||||||
INT domain_offset;
|
INT domain_offset;
|
||||||
|
|
||||||
|
INT path_start;
|
||||||
|
DWORD path_len;
|
||||||
} Uri;
|
} Uri;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
@ -2266,6 +2269,115 @@ static BOOL canonicalize_authority(const parse_data *data, Uri *uri, DWORD flags
|
||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Attempts to canonicalize the path of a hierarchical URI.
|
||||||
|
*
|
||||||
|
* Things that happen:
|
||||||
|
* 1). Forbidden characters are percent encoded, unless the NO_ENCODE_FORBIDDEN
|
||||||
|
* flag is set or it's a file URI. Forbidden characters are always encoded
|
||||||
|
* for file schemes reguardless and forbidden characters are never encoded
|
||||||
|
* for unknown scheme types.
|
||||||
|
*
|
||||||
|
* 2). For known scheme types '\\' are changed to '/'.
|
||||||
|
*
|
||||||
|
* 3). Percent encoded, unreserved characters are decoded to their actual values.
|
||||||
|
* Unless the scheme type is unknown. For file schemes any percent encoded
|
||||||
|
* character in the unreserved or reserved set is decoded.
|
||||||
|
*
|
||||||
|
* 4). For File schemes if the path is starts with a drive letter and doesn't
|
||||||
|
* start with a '/' then one is appended.
|
||||||
|
* Ex: file://c:/test.mp3 -> file:///c:/test.mp3
|
||||||
|
*
|
||||||
|
* 5). Dot segments are removed from the path for all scheme types
|
||||||
|
* unless NO_CANONICALIZE flag is set. Dot segments aren't removed
|
||||||
|
* for wildcard scheme types.
|
||||||
|
*
|
||||||
|
* NOTES:
|
||||||
|
* file://c:/test%20test -> file:///c:/test%2520test
|
||||||
|
* file://c:/test%3Etest -> file:///c:/test%253Etest
|
||||||
|
* file:///c:/test%20test -> file:///c:/test%20test
|
||||||
|
* file:///c:/test%test -> file:///c:/test%25test
|
||||||
|
*/
|
||||||
|
static BOOL canonicalize_path_hierarchical(const parse_data *data, Uri *uri,
|
||||||
|
DWORD flags, BOOL computeOnly) {
|
||||||
|
const WCHAR *ptr;
|
||||||
|
const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
|
||||||
|
const BOOL is_file = data->scheme_type == URL_SCHEME_FILE;
|
||||||
|
|
||||||
|
BOOL escape_pct = FALSE;
|
||||||
|
|
||||||
|
if(!data->path) {
|
||||||
|
uri->path_start = -1;
|
||||||
|
uri->path_len = 0;
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
uri->path_start = uri->canon_len;
|
||||||
|
|
||||||
|
/* Check if a '/' needs to be appended for the file scheme. */
|
||||||
|
if(is_file) {
|
||||||
|
if(data->path_len > 1 && is_alpha(*(data->path)) &&
|
||||||
|
*(data->path+1) == ':') {
|
||||||
|
if(!computeOnly)
|
||||||
|
uri->canon_uri[uri->canon_len] = '/';
|
||||||
|
uri->canon_len++;
|
||||||
|
escape_pct = TRUE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for(ptr = data->path; ptr < data->path+data->path_len; ++ptr) {
|
||||||
|
if(*ptr == '%') {
|
||||||
|
const WCHAR *tmp = ptr;
|
||||||
|
WCHAR val;
|
||||||
|
|
||||||
|
/* Check if the % represents a valid encoded char, or if it needs encoded. */
|
||||||
|
BOOL force_encode = !check_pct_encoded(&tmp) && is_file;
|
||||||
|
val = decode_pct_val(ptr);
|
||||||
|
|
||||||
|
if(force_encode || escape_pct) {
|
||||||
|
/* Escape the percent sign in the file URI. */
|
||||||
|
if(!computeOnly)
|
||||||
|
pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
|
||||||
|
uri->canon_len += 3;
|
||||||
|
} else if((is_unreserved(val) && known_scheme) ||
|
||||||
|
(is_file && (is_unreserved(val) || is_reserved(val)))) {
|
||||||
|
if(!computeOnly)
|
||||||
|
uri->canon_uri[uri->canon_len] = val;
|
||||||
|
++uri->canon_len;
|
||||||
|
|
||||||
|
ptr += 2;
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
if(!computeOnly)
|
||||||
|
uri->canon_uri[uri->canon_len] = *ptr;
|
||||||
|
++uri->canon_len;
|
||||||
|
}
|
||||||
|
} else if(*ptr == '\\' && known_scheme) {
|
||||||
|
if(!computeOnly)
|
||||||
|
uri->canon_uri[uri->canon_len] = '/';
|
||||||
|
++uri->canon_len;
|
||||||
|
} else if(known_scheme && !is_unreserved(*ptr) && !is_reserved(*ptr) &&
|
||||||
|
(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) || is_file)) {
|
||||||
|
/* Escape the forbidden character. */
|
||||||
|
if(!computeOnly)
|
||||||
|
pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
|
||||||
|
uri->canon_len += 3;
|
||||||
|
} else {
|
||||||
|
if(!computeOnly)
|
||||||
|
uri->canon_uri[uri->canon_len] = *ptr;
|
||||||
|
++uri->canon_len;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uri->path_len = uri->canon_len - uri->path_start;
|
||||||
|
|
||||||
|
if(!computeOnly)
|
||||||
|
TRACE("Canonicalized path %s len=%d\n",
|
||||||
|
debugstr_wn(uri->canon_uri+uri->path_start, uri->path_len),
|
||||||
|
uri->path_len);
|
||||||
|
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
/* Determines how the URI represented by the parse_data should be canonicalized.
|
/* Determines how the URI represented by the parse_data should be canonicalized.
|
||||||
*
|
*
|
||||||
* Essentially, if the parse_data represents an hierarchical URI then it calls
|
* Essentially, if the parse_data represents an hierarchical URI then it calls
|
||||||
|
@ -2288,7 +2400,9 @@ static BOOL canonicalize_hierpart(const parse_data *data, Uri *uri, DWORD flags,
|
||||||
if(!canonicalize_authority(data, uri, flags, computeOnly))
|
if(!canonicalize_authority(data, uri, flags, computeOnly))
|
||||||
return FALSE;
|
return FALSE;
|
||||||
|
|
||||||
/* TODO: Canonicalize the path of the URI. */
|
/* TODO: Canonicalize the path of the URI. */
|
||||||
|
if(!canonicalize_path_hierarchical(data, uri, flags, computeOnly))
|
||||||
|
return FALSE;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
/* Opaque URI's don't have an authority. */
|
/* Opaque URI's don't have an authority. */
|
||||||
|
|
Loading…
Reference in New Issue