diff --git a/dlls/urlmon/tests/uri.c b/dlls/urlmon/tests/uri.c index 3b049403c41..65fd4a33a6a 100644 --- a/dlls/urlmon/tests/uri.c +++ b/dlls/urlmon/tests/uri.c @@ -2454,6 +2454,211 @@ static const uri_properties uri_tests[] = { {URL_SCHEME_HTTP,S_OK,FALSE}, {URLZONE_INVALID,E_NOTIMPL,FALSE} } + }, + { "http://gov.uk/test/../test2/././../test3/.././././", 0, S_OK, FALSE, + Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_HOST| + Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME| + Uri_HAS_HOST_TYPE|Uri_HAS_PORT|Uri_HAS_SCHEME, + TRUE, + { + {"http://gov.uk/",S_OK,TRUE}, + {"gov.uk",S_OK,FALSE}, + {"http://gov.uk/",S_OK,TRUE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,TRUE}, + {"",S_FALSE,TRUE}, + {"gov.uk",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"/",S_OK,TRUE}, + {"/",S_OK,TRUE}, + {"",S_FALSE,TRUE}, + {"http://gov.uk/test/../test2/././../test3/.././././",S_OK,FALSE}, + {"http",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE} + }, + { + {Uri_HOST_DNS,S_OK,FALSE}, + {80,S_OK,FALSE}, + {URL_SCHEME_HTTP,S_OK,FALSE}, + {URLZONE_INVALID,E_NOTIMPL,FALSE} + } + }, + { "http://gov.uk/test/test2/../../..", 0, S_OK, FALSE, + Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_HOST| + Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME| + Uri_HAS_HOST_TYPE|Uri_HAS_PORT|Uri_HAS_SCHEME, + TRUE, + { + {"http://gov.uk/",S_OK,TRUE}, + {"gov.uk",S_OK,FALSE}, + {"http://gov.uk/",S_OK,TRUE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,TRUE}, + {"",S_FALSE,TRUE}, + {"gov.uk",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"/",S_OK,TRUE}, + {"/",S_OK,TRUE}, + {"",S_FALSE,TRUE}, + {"http://gov.uk/test/test2/../../..",S_OK,FALSE}, + {"http",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE} + }, + { + {Uri_HOST_DNS,S_OK,FALSE}, + {80,S_OK,FALSE}, + {URL_SCHEME_HTTP,S_OK,FALSE}, + {URLZONE_INVALID,E_NOTIMPL,FALSE} + } + }, + { "http://gov.uk/test/test2/../../.", 0, S_OK, FALSE, + Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_HOST| + Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME| + Uri_HAS_HOST_TYPE|Uri_HAS_PORT|Uri_HAS_SCHEME, + TRUE, + { + {"http://gov.uk/",S_OK,TRUE}, + {"gov.uk",S_OK,FALSE}, + {"http://gov.uk/",S_OK,TRUE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,TRUE}, + {"",S_FALSE,TRUE}, + {"gov.uk",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"/",S_OK,TRUE}, + {"/",S_OK,TRUE}, + {"",S_FALSE,TRUE}, + {"http://gov.uk/test/test2/../../.",S_OK,FALSE}, + {"http",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE} + }, + { + {Uri_HOST_DNS,S_OK,FALSE}, + {80,S_OK,FALSE}, + {URL_SCHEME_HTTP,S_OK,FALSE}, + {URLZONE_INVALID,E_NOTIMPL,FALSE} + } + }, + { "file://c:\\tests\\../tests\\./.\\..\\foo%20bar.mp3", 0, S_OK, FALSE, + Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH| + Uri_HAS_PATH_AND_QUERY|Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME, + TRUE, + { + {"file:///c:/foo%2520bar.mp3",S_OK,TRUE}, + {"",S_FALSE,FALSE}, + {"file:///c:/foo%2520bar.mp3",S_OK,TRUE}, + {"",S_FALSE,FALSE}, + {".mp3",S_OK,TRUE}, + {"",S_FALSE,TRUE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE}, + {"/c:/foo%2520bar.mp3",S_OK,TRUE}, + {"/c:/foo%2520bar.mp3",S_OK,TRUE}, + {"",S_FALSE,TRUE}, + {"file://c:\\tests\\../tests\\./.\\..\\foo%20bar.mp3",S_OK,FALSE}, + {"file",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE} + }, + { + {Uri_HOST_UNKNOWN,S_OK,FALSE}, + {0,S_FALSE,FALSE}, + {URL_SCHEME_FILE,S_OK,FALSE}, + {URLZONE_INVALID,E_NOTIMPL,FALSE} + } + }, + /* Dot removal happens for unknown scheme types. */ + { "zip://gov.uk/test/test2/../../.", 0, S_OK, FALSE, + Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_HOST| + Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME| + Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME, + TRUE, + { + {"zip://gov.uk/",S_OK,TRUE}, + {"gov.uk",S_OK,FALSE}, + {"zip://gov.uk/",S_OK,TRUE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,TRUE}, + {"",S_FALSE,TRUE}, + {"gov.uk",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"/",S_OK,TRUE}, + {"/",S_OK,TRUE}, + {"",S_FALSE,TRUE}, + {"zip://gov.uk/test/test2/../../.",S_OK,FALSE}, + {"zip",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE} + }, + { + {Uri_HOST_DNS,S_OK,FALSE}, + {0,S_FALSE,FALSE}, + {URL_SCHEME_UNKNOWN,S_OK,FALSE}, + {URLZONE_INVALID,E_NOTIMPL,FALSE} + } + }, + /* Dot removal doesn't happen if NO_CANONICALIZE is set. */ + { "http://gov.uk/test/test2/../../.", Uri_CREATE_NO_CANONICALIZE, S_OK, FALSE, + Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION| + Uri_HAS_HOST|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME| + Uri_HAS_HOST_TYPE|Uri_HAS_PORT|Uri_HAS_SCHEME, + TRUE, + { + {"http://gov.uk/test/test2/../../.",S_OK,TRUE}, + {"gov.uk",S_OK,FALSE}, + {"http://gov.uk/test/test2/../../.",S_OK,TRUE}, + {"",S_FALSE,FALSE}, + {".",S_OK,TRUE}, + {"",S_FALSE,TRUE}, + {"gov.uk",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"/test/test2/../../.",S_OK,TRUE}, + {"/test/test2/../../.",S_OK,TRUE}, + {"",S_FALSE,TRUE}, + {"http://gov.uk/test/test2/../../.",S_OK,FALSE}, + {"http",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE} + }, + { + {Uri_HOST_DNS,S_OK,FALSE}, + {80,S_OK,FALSE}, + {URL_SCHEME_HTTP,S_OK,FALSE}, + {URLZONE_INVALID,E_NOTIMPL,FALSE} + } + }, + /* Dot removal doesn't happen for wildcard scheme types. */ + { "*:gov.uk/test/test2/../../.", 0, S_OK, FALSE, + Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION| + Uri_HAS_HOST|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME| + Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME, + TRUE, + { + {"*:gov.uk/test/test2/../../.",S_OK,TRUE}, + {"gov.uk",S_OK,FALSE}, + {"*:gov.uk/test/test2/../../.",S_OK,TRUE}, + {"",S_FALSE,FALSE}, + {".",S_OK,TRUE}, + {"",S_FALSE,TRUE}, + {"gov.uk",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"/test/test2/../../.",S_OK,TRUE}, + {"/test/test2/../../.",S_OK,TRUE}, + {"",S_FALSE,TRUE}, + {"*:gov.uk/test/test2/../../.",S_OK,FALSE}, + {"*",S_OK,FALSE}, + {"",S_FALSE,FALSE}, + {"",S_FALSE,FALSE} + }, + { + {Uri_HOST_DNS,S_OK,FALSE}, + {0,S_FALSE,FALSE}, + {URL_SCHEME_WILDCARD,S_OK,FALSE}, + {URLZONE_INVALID,E_NOTIMPL,FALSE} + } } }; diff --git a/dlls/urlmon/uri.c b/dlls/urlmon/uri.c index cc5a54874df..3e4e6fead20 100644 --- a/dlls/urlmon/uri.c +++ b/dlls/urlmon/uri.c @@ -471,6 +471,70 @@ static void find_domain_name(const WCHAR *host, DWORD host_len, (host+host_len)-(host+*domain_start))); } +/* Removes the dot segments from a heirarchical URIs path component. This + * function performs the removal in place. + * + * This is a modified version of Qt's QUrl function "removeDotsFromPath". + * + * This function returns the new length of the path string. + */ +static DWORD remove_dot_segments(WCHAR *path, DWORD path_len) { + WCHAR *out = path; + const WCHAR *in = out; + const WCHAR *end = out + path_len; + DWORD len; + + while(in < end) { + /* A. if the input buffer begins with a prefix of "/./" or "/.", + * where "." is a complete path segment, then replace that + * prefix with "/" in the input buffer; otherwise, + */ + if(in <= end - 3 && in[0] == '/' && in[1] == '.' && in[2] == '/') { + in += 2; + continue; + } else if(in == end - 2 && in[0] == '/' && in[1] == '.') { + *out++ = '/'; + in += 2; + break; + } + + /* B. if the input buffer begins with a prefix of "/../" or "/..", + * where ".." is a complete path segment, then replace that + * prefix with "/" in the input buffer and remove the last + * segment and its preceding "/" (if any) from the output + * buffer; otherwise, + */ + if(in <= end - 4 && in[0] == '/' && in[1] == '.' && in[2] == '.' && in[3] == '/') { + while(out > path && *(--out) != '/'); + + in += 3; + continue; + } else if(in == end - 3 && in[0] == '/' && in[1] == '.' && in[2] == '.') { + while(out > path && *(--out) != '/'); + + if(*out == '/') + ++out; + + in += 3; + break; + } + + /* C. move the first path segment in the input buffer to the end of + * the output buffer, including the initial "/" character (if + * any) and any subsequent characters up to, but not including, + * the next "/" character or the end of the input buffer. + */ + *out++ = *in++; + while(in < end && *in != '/') + *out++ = *in++; + } + + len = out - path; + TRACE("(%p %d): Path after dot segments removed %s len=%d\n", path, path_len, + debugstr_wn(path, len), len); + return len; +} + /* Computes the location where the elision should occur in the IPv6 * address using the numerical values of each component stored in * 'values'. If the address shouldn't contain an elision then 'index' @@ -2370,6 +2434,20 @@ static BOOL canonicalize_path_hierarchical(const parse_data *data, Uri *uri, uri->path_len = uri->canon_len - uri->path_start; + /* Removing the dot segments only happens when it's not in + * computeOnly mode and it's not a wildcard scheme. + */ + if(!computeOnly && data->scheme_type != URL_SCHEME_WILDCARD) { + if(!(flags & Uri_CREATE_NO_CANONICALIZE)) { + /* Remove the dot segments (if any) and reset everything to the new + * correct length. + */ + DWORD new_len = remove_dot_segments(uri->canon_uri+uri->path_start, uri->path_len); + uri->canon_len -= uri->path_len-new_len; + uri->path_len = new_len; + } + } + if(!computeOnly) TRACE("Canonicalized path %s len=%d\n", debugstr_wn(uri->canon_uri+uri->path_start, uri->path_len), @@ -2515,6 +2593,7 @@ static HRESULT canonicalize_uri(const parse_data *data, Uri *uri, DWORD flags) { if(!uri->canon_uri) return E_OUTOFMEMORY; + uri->canon_size = len; if(!canonicalize_scheme(data, uri, flags, FALSE)) { ERR("(%p %p %x): Unable to canonicalize the scheme of the URI.\n", data, uri, flags); heap_free(uri->canon_uri); @@ -2528,6 +2607,21 @@ static HRESULT canonicalize_uri(const parse_data *data, Uri *uri, DWORD flags) { return E_INVALIDARG; } + /* There's a possibility we didn't use all the space we allocated + * earlier. + */ + if(uri->canon_len < uri->canon_size) { + /* This happens if the URI is hierarchical and dot + * segments were removed from it's path. + */ + WCHAR *tmp = heap_realloc(uri->canon_uri, (uri->canon_len+1)*sizeof(WCHAR)); + if(!tmp) + return E_OUTOFMEMORY; + + uri->canon_uri = tmp; + uri->canon_size = uri->canon_len; + } + uri->canon_uri[uri->canon_len] = '\0'; TRACE("(%p %p %x): finished canonicalizing the URI. uri=%s\n", data, uri, flags, debugstr_w(uri->canon_uri));