urlmon: Implemented canonicalization function for paths in opaque URIs.
This commit is contained in:
parent
5758b068ae
commit
42e8af7a4a
|
@ -2659,6 +2659,208 @@ static const uri_properties uri_tests[] = {
|
|||
{URL_SCHEME_WILDCARD,S_OK,FALSE},
|
||||
{URLZONE_INVALID,E_NOTIMPL,FALSE}
|
||||
}
|
||||
},
|
||||
/* Forbidden characters are encoded for opaque known scheme types. */
|
||||
{ "mailto:\"acco<|>unt@example.com\"", 0, S_OK, FALSE,
|
||||
Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|
|
||||
Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
|
||||
TRUE,
|
||||
{
|
||||
{"mailto:%22acco%3C%7C%3Eunt@example.com%22",S_OK,TRUE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"mailto:%22acco%3C%7C%3Eunt@example.com%22",S_OK,TRUE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{".com%22",S_OK,TRUE},
|
||||
{"",S_FALSE,TRUE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"%22acco%3C%7C%3Eunt@example.com%22",S_OK,TRUE},
|
||||
{"%22acco%3C%7C%3Eunt@example.com%22",S_OK,TRUE},
|
||||
{"",S_FALSE,TRUE},
|
||||
{"mailto:\"acco<|>unt@example.com\"",S_OK,FALSE},
|
||||
{"mailto",S_OK,FALSE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"",S_FALSE,FALSE}
|
||||
},
|
||||
{
|
||||
{Uri_HOST_UNKNOWN,S_OK,FALSE},
|
||||
{0,S_FALSE,FALSE},
|
||||
{URL_SCHEME_MAILTO,S_OK,FALSE},
|
||||
{URLZONE_INVALID,E_NOTIMPL,FALSE}
|
||||
}
|
||||
},
|
||||
{ "news:test.tes<|>t.com", 0, S_OK, FALSE,
|
||||
Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|
|
||||
Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
|
||||
TRUE,
|
||||
{
|
||||
{"news:test.tes%3C%7C%3Et.com",S_OK,TRUE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"news:test.tes%3C%7C%3Et.com",S_OK,TRUE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{".com",S_OK,TRUE},
|
||||
{"",S_FALSE,TRUE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"test.tes%3C%7C%3Et.com",S_OK,TRUE},
|
||||
{"test.tes%3C%7C%3Et.com",S_OK,TRUE},
|
||||
{"",S_FALSE,TRUE},
|
||||
{"news:test.tes<|>t.com",S_OK,FALSE},
|
||||
{"news",S_OK,FALSE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"",S_FALSE,FALSE}
|
||||
},
|
||||
{
|
||||
{Uri_HOST_UNKNOWN,S_OK,FALSE},
|
||||
{0,S_FALSE,FALSE},
|
||||
{URL_SCHEME_NEWS,S_OK,FALSE},
|
||||
{URLZONE_INVALID,E_NOTIMPL,FALSE}
|
||||
}
|
||||
},
|
||||
/* Don't encode forbidden characters. */
|
||||
{ "news:test.tes<|>t.com", Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS, S_OK, FALSE,
|
||||
Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|
|
||||
Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
|
||||
TRUE,
|
||||
{
|
||||
{"news:test.tes<|>t.com",S_OK,TRUE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"news:test.tes<|>t.com",S_OK,TRUE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{".com",S_OK,TRUE},
|
||||
{"",S_FALSE,TRUE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"test.tes<|>t.com",S_OK,TRUE},
|
||||
{"test.tes<|>t.com",S_OK,TRUE},
|
||||
{"",S_FALSE,TRUE},
|
||||
{"news:test.tes<|>t.com",S_OK,FALSE},
|
||||
{"news",S_OK,FALSE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"",S_FALSE,FALSE}
|
||||
},
|
||||
{
|
||||
{Uri_HOST_UNKNOWN,S_OK,FALSE},
|
||||
{0,S_FALSE,FALSE},
|
||||
{URL_SCHEME_NEWS,S_OK,FALSE},
|
||||
{URLZONE_INVALID,E_NOTIMPL,FALSE}
|
||||
}
|
||||
},
|
||||
/* Forbidden characters aren't encoded for unknown, opaque URIs. */
|
||||
{ "urn:test.tes<|>t.com", 0, S_OK, FALSE,
|
||||
Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|
|
||||
Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
|
||||
TRUE,
|
||||
{
|
||||
{"urn:test.tes<|>t.com",S_OK,TRUE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"urn:test.tes<|>t.com",S_OK,TRUE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{".com",S_OK,TRUE},
|
||||
{"",S_FALSE,TRUE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"test.tes<|>t.com",S_OK,TRUE},
|
||||
{"test.tes<|>t.com",S_OK,TRUE},
|
||||
{"",S_FALSE,TRUE},
|
||||
{"urn:test.tes<|>t.com",S_OK,FALSE},
|
||||
{"urn",S_OK,FALSE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"",S_FALSE,FALSE}
|
||||
},
|
||||
{
|
||||
{Uri_HOST_UNKNOWN,S_OK,FALSE},
|
||||
{0,S_FALSE,FALSE},
|
||||
{URL_SCHEME_UNKNOWN,S_OK,FALSE},
|
||||
{URLZONE_INVALID,E_NOTIMPL,FALSE}
|
||||
}
|
||||
},
|
||||
/* Percent encoded unreserved characters are decoded for known opaque URIs. */
|
||||
{ "news:test.%74%65%73%74.com", 0, S_OK, FALSE,
|
||||
Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|
|
||||
Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
|
||||
TRUE,
|
||||
{
|
||||
{"news:test.test.com",S_OK,TRUE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"news:test.test.com",S_OK,TRUE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{".com",S_OK,TRUE},
|
||||
{"",S_FALSE,TRUE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"test.test.com",S_OK,TRUE},
|
||||
{"test.test.com",S_OK,TRUE},
|
||||
{"",S_FALSE,TRUE},
|
||||
{"news:test.%74%65%73%74.com",S_OK,FALSE},
|
||||
{"news",S_OK,FALSE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"",S_FALSE,FALSE}
|
||||
},
|
||||
{
|
||||
{Uri_HOST_UNKNOWN,S_OK,FALSE},
|
||||
{0,S_FALSE,FALSE},
|
||||
{URL_SCHEME_NEWS,S_OK,FALSE},
|
||||
{URLZONE_INVALID,E_NOTIMPL,FALSE}
|
||||
}
|
||||
},
|
||||
/* Percent encoded characters are still decoded for known scheme types. */
|
||||
{ "news:test.%74%65%73%74.com", Uri_CREATE_NO_CANONICALIZE, S_OK, FALSE,
|
||||
Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|
|
||||
Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
|
||||
TRUE,
|
||||
{
|
||||
{"news:test.test.com",S_OK,TRUE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"news:test.test.com",S_OK,TRUE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{".com",S_OK,TRUE},
|
||||
{"",S_FALSE,TRUE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"test.test.com",S_OK,TRUE},
|
||||
{"test.test.com",S_OK,TRUE},
|
||||
{"",S_FALSE,TRUE},
|
||||
{"news:test.%74%65%73%74.com",S_OK,FALSE},
|
||||
{"news",S_OK,FALSE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"",S_FALSE,FALSE}
|
||||
},
|
||||
{
|
||||
{Uri_HOST_UNKNOWN,S_OK,FALSE},
|
||||
{0,S_FALSE,FALSE},
|
||||
{URL_SCHEME_NEWS,S_OK,FALSE},
|
||||
{URLZONE_INVALID,E_NOTIMPL,FALSE}
|
||||
}
|
||||
},
|
||||
/* Percent encoded characters aren't decoded for unknown scheme types. */
|
||||
{ "urn:test.%74%65%73%74.com", 0, S_OK, FALSE,
|
||||
Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|
|
||||
Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
|
||||
TRUE,
|
||||
{
|
||||
{"urn:test.%74%65%73%74.com",S_OK,TRUE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"urn:test.%74%65%73%74.com",S_OK,TRUE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{".com",S_OK,TRUE},
|
||||
{"",S_FALSE,TRUE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"test.%74%65%73%74.com",S_OK,TRUE},
|
||||
{"test.%74%65%73%74.com",S_OK,TRUE},
|
||||
{"",S_FALSE,TRUE},
|
||||
{"urn:test.%74%65%73%74.com",S_OK,FALSE},
|
||||
{"urn",S_OK,FALSE},
|
||||
{"",S_FALSE,FALSE},
|
||||
{"",S_FALSE,FALSE}
|
||||
},
|
||||
{
|
||||
{Uri_HOST_UNKNOWN,S_OK,FALSE},
|
||||
{0,S_FALSE,FALSE},
|
||||
{URL_SCHEME_UNKNOWN,S_OK,FALSE},
|
||||
{URLZONE_INVALID,E_NOTIMPL,FALSE}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -1675,6 +1675,8 @@ static BOOL parse_path_opaque(const WCHAR **ptr, parse_data *data, DWORD flags)
|
|||
* (per MSDN documentation).
|
||||
*/
|
||||
static BOOL parse_hierpart(const WCHAR **ptr, parse_data *data, DWORD flags) {
|
||||
const WCHAR *start = *ptr;
|
||||
|
||||
/* Checks if the authority information needs to be parsed.
|
||||
*
|
||||
* Relative URI's aren't hierarchical URI's, but, they could trick
|
||||
|
@ -1703,7 +1705,11 @@ static BOOL parse_hierpart(const WCHAR **ptr, parse_data *data, DWORD flags) {
|
|||
return FALSE;
|
||||
|
||||
return parse_path_hierarchical(ptr, data, flags);
|
||||
}
|
||||
} else
|
||||
/* Reset ptr to it's starting position so opaque path parsing
|
||||
* begins at the correct location.
|
||||
*/
|
||||
*ptr = start;
|
||||
}
|
||||
|
||||
/* If it reaches here, then the URI will be treated as an opaque
|
||||
|
@ -2456,6 +2462,78 @@ static BOOL canonicalize_path_hierarchical(const parse_data *data, Uri *uri,
|
|||
return TRUE;
|
||||
}
|
||||
|
||||
/* Attempts to canonicalize the path for an opaque URI.
|
||||
*
|
||||
* For known scheme types:
|
||||
* 1) forbidden characters are percent encoded if
|
||||
* NO_ENCODE_FORBIDDEN_CHARACTERS isn't set.
|
||||
*
|
||||
* 2) Percent encoded, unreserved characters are decoded
|
||||
* to their actual values, for known scheme types.
|
||||
*
|
||||
* 3) '\\' are changed to '/' for known scheme types
|
||||
* except for mailto schemes.
|
||||
*/
|
||||
static BOOL canonicalize_path_opaque(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
|
||||
const WCHAR *ptr;
|
||||
const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
|
||||
|
||||
if(!data->path) {
|
||||
uri->path_start = -1;
|
||||
uri->path_len = 0;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
uri->path_start = uri->canon_len;
|
||||
|
||||
/* Windows doesn't allow a "//" to appear after the scheme
|
||||
* of a URI, if it's an opaque URI.
|
||||
*/
|
||||
if(data->scheme && *(data->path) == '/' && *(data->path+1) == '/') {
|
||||
/* So it inserts a "/." before the "//" if it exists. */
|
||||
if(!computeOnly) {
|
||||
uri->canon_uri[uri->canon_len] = '/';
|
||||
uri->canon_uri[uri->canon_len+1] = '.';
|
||||
}
|
||||
|
||||
uri->canon_len += 2;
|
||||
}
|
||||
|
||||
for(ptr = data->path; ptr < data->path+data->path_len; ++ptr) {
|
||||
if(*ptr == '%' && known_scheme) {
|
||||
WCHAR val = decode_pct_val(ptr);
|
||||
|
||||
if(is_unreserved(val)) {
|
||||
if(!computeOnly)
|
||||
uri->canon_uri[uri->canon_len] = val;
|
||||
++uri->canon_len;
|
||||
|
||||
ptr += 2;
|
||||
continue;
|
||||
} else {
|
||||
if(!computeOnly)
|
||||
uri->canon_uri[uri->canon_len] = *ptr;
|
||||
++uri->canon_len;
|
||||
}
|
||||
} else if(known_scheme && !is_unreserved(*ptr) && !is_reserved(*ptr) &&
|
||||
!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS)) {
|
||||
if(!computeOnly)
|
||||
pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
|
||||
uri->canon_len += 3;
|
||||
} else {
|
||||
if(!computeOnly)
|
||||
uri->canon_uri[uri->canon_len] = *ptr;
|
||||
++uri->canon_len;
|
||||
}
|
||||
}
|
||||
|
||||
uri->path_len = uri->canon_len - uri->path_start;
|
||||
|
||||
TRACE("(%p %p %x %d): Canonicalized opaque URI path %s len=%d\n", data, uri, flags, computeOnly,
|
||||
debugstr_wn(uri->canon_uri+uri->path_start, uri->path_len), uri->path_len);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* Determines how the URI represented by the parse_data should be canonicalized.
|
||||
*
|
||||
* Essentially, if the parse_data represents an hierarchical URI then it calls
|
||||
|
@ -2493,6 +2571,9 @@ static BOOL canonicalize_hierpart(const parse_data *data, Uri *uri, DWORD flags,
|
|||
uri->authority_start = -1;
|
||||
uri->authority_len = 0;
|
||||
uri->domain_offset = -1;
|
||||
|
||||
if(!canonicalize_path_opaque(data, uri, flags, computeOnly))
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
|
|
Loading…
Reference in New Issue