urlmon: Implemented canonicalization function for paths in opaque URIs.

This commit is contained in:
Thomas Mullaly 2010-07-25 16:00:50 -04:00 committed by Alexandre Julliard
parent 5758b068ae
commit 42e8af7a4a
2 changed files with 284 additions and 1 deletions

View File

@ -2659,6 +2659,208 @@ static const uri_properties uri_tests[] = {
{URL_SCHEME_WILDCARD,S_OK,FALSE},
{URLZONE_INVALID,E_NOTIMPL,FALSE}
}
},
/* Forbidden characters are encoded for opaque known scheme types. */
{ "mailto:\"acco<|>unt@example.com\"", 0, S_OK, FALSE,
Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|
Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
TRUE,
{
{"mailto:%22acco%3C%7C%3Eunt@example.com%22",S_OK,TRUE},
{"",S_FALSE,FALSE},
{"mailto:%22acco%3C%7C%3Eunt@example.com%22",S_OK,TRUE},
{"",S_FALSE,FALSE},
{".com%22",S_OK,TRUE},
{"",S_FALSE,TRUE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE},
{"%22acco%3C%7C%3Eunt@example.com%22",S_OK,TRUE},
{"%22acco%3C%7C%3Eunt@example.com%22",S_OK,TRUE},
{"",S_FALSE,TRUE},
{"mailto:\"acco<|>unt@example.com\"",S_OK,FALSE},
{"mailto",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE}
},
{
{Uri_HOST_UNKNOWN,S_OK,FALSE},
{0,S_FALSE,FALSE},
{URL_SCHEME_MAILTO,S_OK,FALSE},
{URLZONE_INVALID,E_NOTIMPL,FALSE}
}
},
{ "news:test.tes<|>t.com", 0, S_OK, FALSE,
Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|
Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
TRUE,
{
{"news:test.tes%3C%7C%3Et.com",S_OK,TRUE},
{"",S_FALSE,FALSE},
{"news:test.tes%3C%7C%3Et.com",S_OK,TRUE},
{"",S_FALSE,FALSE},
{".com",S_OK,TRUE},
{"",S_FALSE,TRUE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE},
{"test.tes%3C%7C%3Et.com",S_OK,TRUE},
{"test.tes%3C%7C%3Et.com",S_OK,TRUE},
{"",S_FALSE,TRUE},
{"news:test.tes<|>t.com",S_OK,FALSE},
{"news",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE}
},
{
{Uri_HOST_UNKNOWN,S_OK,FALSE},
{0,S_FALSE,FALSE},
{URL_SCHEME_NEWS,S_OK,FALSE},
{URLZONE_INVALID,E_NOTIMPL,FALSE}
}
},
/* Don't encode forbidden characters. */
{ "news:test.tes<|>t.com", Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS, S_OK, FALSE,
Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|
Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
TRUE,
{
{"news:test.tes<|>t.com",S_OK,TRUE},
{"",S_FALSE,FALSE},
{"news:test.tes<|>t.com",S_OK,TRUE},
{"",S_FALSE,FALSE},
{".com",S_OK,TRUE},
{"",S_FALSE,TRUE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE},
{"test.tes<|>t.com",S_OK,TRUE},
{"test.tes<|>t.com",S_OK,TRUE},
{"",S_FALSE,TRUE},
{"news:test.tes<|>t.com",S_OK,FALSE},
{"news",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE}
},
{
{Uri_HOST_UNKNOWN,S_OK,FALSE},
{0,S_FALSE,FALSE},
{URL_SCHEME_NEWS,S_OK,FALSE},
{URLZONE_INVALID,E_NOTIMPL,FALSE}
}
},
/* Forbidden characters aren't encoded for unknown, opaque URIs. */
{ "urn:test.tes<|>t.com", 0, S_OK, FALSE,
Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|
Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
TRUE,
{
{"urn:test.tes<|>t.com",S_OK,TRUE},
{"",S_FALSE,FALSE},
{"urn:test.tes<|>t.com",S_OK,TRUE},
{"",S_FALSE,FALSE},
{".com",S_OK,TRUE},
{"",S_FALSE,TRUE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE},
{"test.tes<|>t.com",S_OK,TRUE},
{"test.tes<|>t.com",S_OK,TRUE},
{"",S_FALSE,TRUE},
{"urn:test.tes<|>t.com",S_OK,FALSE},
{"urn",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE}
},
{
{Uri_HOST_UNKNOWN,S_OK,FALSE},
{0,S_FALSE,FALSE},
{URL_SCHEME_UNKNOWN,S_OK,FALSE},
{URLZONE_INVALID,E_NOTIMPL,FALSE}
}
},
/* Percent encoded unreserved characters are decoded for known opaque URIs. */
{ "news:test.%74%65%73%74.com", 0, S_OK, FALSE,
Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|
Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
TRUE,
{
{"news:test.test.com",S_OK,TRUE},
{"",S_FALSE,FALSE},
{"news:test.test.com",S_OK,TRUE},
{"",S_FALSE,FALSE},
{".com",S_OK,TRUE},
{"",S_FALSE,TRUE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE},
{"test.test.com",S_OK,TRUE},
{"test.test.com",S_OK,TRUE},
{"",S_FALSE,TRUE},
{"news:test.%74%65%73%74.com",S_OK,FALSE},
{"news",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE}
},
{
{Uri_HOST_UNKNOWN,S_OK,FALSE},
{0,S_FALSE,FALSE},
{URL_SCHEME_NEWS,S_OK,FALSE},
{URLZONE_INVALID,E_NOTIMPL,FALSE}
}
},
/* Percent encoded characters are still decoded for known scheme types. */
{ "news:test.%74%65%73%74.com", Uri_CREATE_NO_CANONICALIZE, S_OK, FALSE,
Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|
Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
TRUE,
{
{"news:test.test.com",S_OK,TRUE},
{"",S_FALSE,FALSE},
{"news:test.test.com",S_OK,TRUE},
{"",S_FALSE,FALSE},
{".com",S_OK,TRUE},
{"",S_FALSE,TRUE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE},
{"test.test.com",S_OK,TRUE},
{"test.test.com",S_OK,TRUE},
{"",S_FALSE,TRUE},
{"news:test.%74%65%73%74.com",S_OK,FALSE},
{"news",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE}
},
{
{Uri_HOST_UNKNOWN,S_OK,FALSE},
{0,S_FALSE,FALSE},
{URL_SCHEME_NEWS,S_OK,FALSE},
{URLZONE_INVALID,E_NOTIMPL,FALSE}
}
},
/* Percent encoded characters aren't decoded for unknown scheme types. */
{ "urn:test.%74%65%73%74.com", 0, S_OK, FALSE,
Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|
Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
TRUE,
{
{"urn:test.%74%65%73%74.com",S_OK,TRUE},
{"",S_FALSE,FALSE},
{"urn:test.%74%65%73%74.com",S_OK,TRUE},
{"",S_FALSE,FALSE},
{".com",S_OK,TRUE},
{"",S_FALSE,TRUE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE},
{"test.%74%65%73%74.com",S_OK,TRUE},
{"test.%74%65%73%74.com",S_OK,TRUE},
{"",S_FALSE,TRUE},
{"urn:test.%74%65%73%74.com",S_OK,FALSE},
{"urn",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE}
},
{
{Uri_HOST_UNKNOWN,S_OK,FALSE},
{0,S_FALSE,FALSE},
{URL_SCHEME_UNKNOWN,S_OK,FALSE},
{URLZONE_INVALID,E_NOTIMPL,FALSE}
}
}
};

View File

@ -1675,6 +1675,8 @@ static BOOL parse_path_opaque(const WCHAR **ptr, parse_data *data, DWORD flags)
* (per MSDN documentation).
*/
static BOOL parse_hierpart(const WCHAR **ptr, parse_data *data, DWORD flags) {
const WCHAR *start = *ptr;
/* Checks if the authority information needs to be parsed.
*
* Relative URI's aren't hierarchical URI's, but, they could trick
@ -1703,7 +1705,11 @@ static BOOL parse_hierpart(const WCHAR **ptr, parse_data *data, DWORD flags) {
return FALSE;
return parse_path_hierarchical(ptr, data, flags);
}
} else
/* Reset ptr to it's starting position so opaque path parsing
* begins at the correct location.
*/
*ptr = start;
}
/* If it reaches here, then the URI will be treated as an opaque
@ -2456,6 +2462,78 @@ static BOOL canonicalize_path_hierarchical(const parse_data *data, Uri *uri,
return TRUE;
}
/* Attempts to canonicalize the path for an opaque URI.
*
* For known scheme types:
* 1) forbidden characters are percent encoded if
* NO_ENCODE_FORBIDDEN_CHARACTERS isn't set.
*
* 2) Percent encoded, unreserved characters are decoded
* to their actual values, for known scheme types.
*
* 3) '\\' are changed to '/' for known scheme types
* except for mailto schemes.
*/
static BOOL canonicalize_path_opaque(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) {
const WCHAR *ptr;
const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN;
if(!data->path) {
uri->path_start = -1;
uri->path_len = 0;
return TRUE;
}
uri->path_start = uri->canon_len;
/* Windows doesn't allow a "//" to appear after the scheme
* of a URI, if it's an opaque URI.
*/
if(data->scheme && *(data->path) == '/' && *(data->path+1) == '/') {
/* So it inserts a "/." before the "//" if it exists. */
if(!computeOnly) {
uri->canon_uri[uri->canon_len] = '/';
uri->canon_uri[uri->canon_len+1] = '.';
}
uri->canon_len += 2;
}
for(ptr = data->path; ptr < data->path+data->path_len; ++ptr) {
if(*ptr == '%' && known_scheme) {
WCHAR val = decode_pct_val(ptr);
if(is_unreserved(val)) {
if(!computeOnly)
uri->canon_uri[uri->canon_len] = val;
++uri->canon_len;
ptr += 2;
continue;
} else {
if(!computeOnly)
uri->canon_uri[uri->canon_len] = *ptr;
++uri->canon_len;
}
} else if(known_scheme && !is_unreserved(*ptr) && !is_reserved(*ptr) &&
!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS)) {
if(!computeOnly)
pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
uri->canon_len += 3;
} else {
if(!computeOnly)
uri->canon_uri[uri->canon_len] = *ptr;
++uri->canon_len;
}
}
uri->path_len = uri->canon_len - uri->path_start;
TRACE("(%p %p %x %d): Canonicalized opaque URI path %s len=%d\n", data, uri, flags, computeOnly,
debugstr_wn(uri->canon_uri+uri->path_start, uri->path_len), uri->path_len);
return TRUE;
}
/* Determines how the URI represented by the parse_data should be canonicalized.
*
* Essentially, if the parse_data represents an hierarchical URI then it calls
@ -2493,6 +2571,9 @@ static BOOL canonicalize_hierpart(const parse_data *data, Uri *uri, DWORD flags,
uri->authority_start = -1;
uri->authority_len = 0;
uri->domain_offset = -1;
if(!canonicalize_path_opaque(data, uri, flags, computeOnly))
return FALSE;
}
return TRUE;