urlmon: Do not canonicalize Unicode characters.

Signed-off-by: Zhiyi Zhang <yi.gd.cn@gmail.com>
Signed-off-by: Jacek Caban <jacek@codeweavers.com>
Signed-off-by: Alexandre Julliard <julliard@winehq.org>
This commit is contained in:
Zhiyi Zhang 2018-02-09 23:22:51 +08:00 committed by Alexandre Julliard
parent b29157e101
commit ac066d3eee
2 changed files with 279 additions and 10 deletions

View File

@ -4799,6 +4799,266 @@ static const uri_properties uri_tests[] = {
{URL_SCHEME_FILE,S_OK,FALSE},
{URLZONE_INVALID,E_NOTIMPL,FALSE}
}
},
/* Path with Unicode characters. Unicode characters should not be encoded */
{/* "http://127.0.0.1/测试/test.txt" with Chinese in UTF-8 encoding */
"http://127.0.0.1/\xE6\xB5\x8B\xE8\xAF\x95/test.txt", 0, S_OK, FALSE,
{
{"http://127.0.0.1/\xE6\xB5\x8B\xE8\xAF\x95/test.txt",S_OK,FALSE},
{"127.0.0.1",S_OK,FALSE},
{"http://127.0.0.1/\xE6\xB5\x8B\xE8\xAF\x95/test.txt",S_OK,FALSE},
{"",S_FALSE,FALSE},
{".txt",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"127.0.0.1",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"/\xE6\xB5\x8B\xE8\xAF\x95/test.txt",S_OK,FALSE},
{"/\xE6\xB5\x8B\xE8\xAF\x95/test.txt",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"http://127.0.0.1/\xE6\xB5\x8B\xE8\xAF\x95/test.txt",S_OK,FALSE},
{"http",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE}
},
{
{Uri_HOST_IPV4,S_OK,FALSE},
{80,S_OK,FALSE},
{URL_SCHEME_HTTP,S_OK,FALSE},
{URLZONE_INVALID,E_NOTIMPL,FALSE}
}
},
{ "file:\xE6\xB5\x8B\xE8\xAF\x95.html", 0, S_OK, FALSE,
{
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE},
{"file:\xE6\xB5\x8B\xE8\xAF\x95.html",S_OK,FALSE},
{"",S_FALSE,FALSE},
{".html",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE},
{"\xE6\xB5\x8B\xE8\xAF\x95.html",S_OK,FALSE},
{"\xE6\xB5\x8B\xE8\xAF\x95.html",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"file:\xE6\xB5\x8B\xE8\xAF\x95.html",S_OK,FALSE},
{"file",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE}
},
{
{Uri_HOST_UNKNOWN,S_OK,FALSE},
{0,S_FALSE,FALSE},
{URL_SCHEME_FILE,S_OK,FALSE},
{URLZONE_INVALID,E_NOTIMPL,FALSE}
}
},
/* Username with Unicode characters. Unicode characters should not be encoded */
{ "ftp://\xE6\xB5\x8B\xE8\xAF\x95:wine@ftp.winehq.org:9999/dir/foobar.txt", 0, S_OK, FALSE,
{
{"ftp://\xE6\xB5\x8B\xE8\xAF\x95:wine@ftp.winehq.org:9999/dir/foobar.txt",S_OK,FALSE},
{"\xE6\xB5\x8B\xE8\xAF\x95:wine@ftp.winehq.org:9999",S_OK,FALSE},
{"ftp://ftp.winehq.org:9999/dir/foobar.txt",S_OK,FALSE},
{"winehq.org",S_OK,FALSE},
{".txt",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"ftp.winehq.org",S_OK,FALSE},
{"wine",S_OK,FALSE},
{"/dir/foobar.txt",S_OK,FALSE},
{"/dir/foobar.txt",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"ftp://\xE6\xB5\x8B\xE8\xAF\x95:wine@ftp.winehq.org:9999/dir/foobar.txt",S_OK,FALSE},
{"ftp",S_OK,FALSE},
{"\xE6\xB5\x8B\xE8\xAF\x95:wine",S_OK,FALSE},
{"\xE6\xB5\x8B\xE8\xAF\x95",S_OK,FALSE}
},
{
{Uri_HOST_DNS,S_OK,FALSE},
{9999,S_OK,FALSE},
{URL_SCHEME_FTP,S_OK,FALSE},
{URLZONE_INVALID,E_NOTIMPL,FALSE}
}
},
/* Password with Unicode characters. Unicode characters should not be encoded */
{ "ftp://winepass:\xE6\xB5\x8B\xE8\xAF\x95@ftp.winehq.org:9999/dir/foobar.txt", 0, S_OK, FALSE,
{
{"ftp://winepass:\xE6\xB5\x8B\xE8\xAF\x95@ftp.winehq.org:9999/dir/foobar.txt",S_OK,FALSE},
{"winepass:\xE6\xB5\x8B\xE8\xAF\x95@ftp.winehq.org:9999",S_OK,FALSE},
{"ftp://ftp.winehq.org:9999/dir/foobar.txt",S_OK,FALSE},
{"winehq.org",S_OK,FALSE},
{".txt",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"ftp.winehq.org",S_OK,FALSE},
{"\xE6\xB5\x8B\xE8\xAF\x95",S_OK,FALSE},
{"/dir/foobar.txt",S_OK,FALSE},
{"/dir/foobar.txt",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"ftp://winepass:\xE6\xB5\x8B\xE8\xAF\x95@ftp.winehq.org:9999/dir/foobar.txt",S_OK,FALSE},
{"ftp",S_OK,FALSE},
{"winepass:\xE6\xB5\x8B\xE8\xAF\x95",S_OK,FALSE},
{"winepass",S_OK,FALSE}
},
{
{Uri_HOST_DNS,S_OK,FALSE},
{9999,S_OK,FALSE},
{URL_SCHEME_FTP,S_OK,FALSE},
{URLZONE_INVALID,E_NOTIMPL,FALSE}
}
},
/* Query with Unicode characters. Unicode characters should not be encoded */
{ "http://www.winehq.org/tests/..?query=\xE6\xB5\x8B\xE8\xAF\x95&return=y", 0, S_OK, FALSE,
{
{"http://www.winehq.org/?query=\xE6\xB5\x8B\xE8\xAF\x95&return=y",S_OK,FALSE},
{"www.winehq.org",S_OK,FALSE},
{"http://www.winehq.org/?query=\xE6\xB5\x8B\xE8\xAF\x95&return=y",S_OK,FALSE},
{"winehq.org",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE},
{"www.winehq.org",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"/",S_OK,FALSE},
{"/?query=\xE6\xB5\x8B\xE8\xAF\x95&return=y",S_OK,FALSE},
{"?query=\xE6\xB5\x8B\xE8\xAF\x95&return=y",S_OK,FALSE},
{"http://www.winehq.org/tests/..?query=\xE6\xB5\x8B\xE8\xAF\x95&return=y",S_OK,FALSE},
{"http",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE}
},
{
{Uri_HOST_DNS,S_OK,FALSE},
{80,S_OK,FALSE},
{URL_SCHEME_HTTP,S_OK,FALSE},
{URLZONE_INVALID,E_NOTIMPL,FALSE},
}
},
/* Fragment with Unicode characters. Unicode characters should not be encoded */
{ "http://www.winehq.org/tests/#\xE6\xB5\x8B\xE8\xAF\x95", 0, S_OK, FALSE,
{
{"http://www.winehq.org/tests/#\xE6\xB5\x8B\xE8\xAF\x95",S_OK,FALSE},
{"www.winehq.org",S_OK,FALSE},
{"http://www.winehq.org/tests/#\xE6\xB5\x8B\xE8\xAF\x95",S_OK,FALSE},
{"winehq.org",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"#\xE6\xB5\x8B\xE8\xAF\x95",S_OK,FALSE},
{"www.winehq.org",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"/tests/",S_OK,FALSE},
{"/tests/",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"http://www.winehq.org/tests/#\xE6\xB5\x8B\xE8\xAF\x95",S_OK,FALSE},
{"http",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE}
},
{
{Uri_HOST_DNS,S_OK,FALSE},
{80,S_OK,FALSE},
{URL_SCHEME_HTTP,S_OK,FALSE},
{URLZONE_INVALID,E_NOTIMPL,FALSE},
}
},
/* ZERO WIDTH JOINER as non-printing Unicode characters should not be encoded if not preprocessed. */
{ "file:a\xE2\x80\x8D.html", Uri_CREATE_NO_PRE_PROCESS_HTML_URI, S_OK, FALSE,
{
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE},
{"file:a\xE2\x80\x8D.html",S_OK,FALSE},
{"",S_FALSE,FALSE},
{".html",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE},
{"a\xE2\x80\x8D.html",S_OK,FALSE},
{"a\xE2\x80\x8D.html",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"file:a\xE2\x80\x8D.html",S_OK,FALSE},
{"file",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE}
},
{
{Uri_HOST_UNKNOWN,S_OK,FALSE},
{0,S_FALSE,FALSE},
{URL_SCHEME_FILE,S_OK,FALSE},
{URLZONE_INVALID,E_NOTIMPL,FALSE}
}
},
/* LEFT-TO-RIGHT MARK as non-printing Unicode characters should not be encoded if not preprocessed. */
{ "file:ab\xE2\x80\x8E.html", Uri_CREATE_NO_PRE_PROCESS_HTML_URI, S_OK, FALSE,
{
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE},
{"file:ab\xE2\x80\x8D.html",S_OK,FALSE},
{"",S_FALSE,FALSE},
{".html",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE},
{"ab\xE2\x80\x8D.html",S_OK,FALSE},
{"ab\xE2\x80\x8D.html",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"file:ab\xE2\x80\x8D.html",S_OK,FALSE},
{"file",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE}
},
{
{Uri_HOST_UNKNOWN,S_OK,FALSE},
{0,S_FALSE,FALSE},
{URL_SCHEME_FILE,S_OK,FALSE},
{URLZONE_INVALID,E_NOTIMPL,FALSE}
}
},
/* Invalid Unicode characters should not be filtered */
{ "file:ab\xc3\x28.html", 0, S_OK, FALSE,
{
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE},
{"file:ab\xc3\x28.html",S_OK,FALSE},
{"",S_FALSE,FALSE},
{".html",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE},
{"ab\xc3\x28.html",S_OK,FALSE},
{"ab\xc3\x28.html",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"file:ab\xc3\x28.html",S_OK,FALSE},
{"file",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE}
},
{
{Uri_HOST_UNKNOWN,S_OK,FALSE},
{0,S_FALSE,FALSE},
{URL_SCHEME_FILE,S_OK,FALSE},
{URLZONE_INVALID,E_NOTIMPL,FALSE}
}
},
/* Make sure % encoded unicode characters are not decoded. */
{ "ftp://%E6%B5%8B%E8%AF%95:%E6%B5%8B%E8%AF%95@ftp.google.com/", 0, S_OK, FALSE,
{
{"ftp://%E6%B5%8B%E8%AF%95:%E6%B5%8B%E8%AF%95@ftp.google.com/",S_OK,FALSE},
{"%E6%B5%8B%E8%AF%95:%E6%B5%8B%E8%AF%95@ftp.google.com",S_OK,FALSE},
{"ftp://ftp.google.com/",S_OK,FALSE},
{"google.com",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE},
{"ftp.google.com",S_OK,FALSE},
{"%E6%B5%8B%E8%AF%95",S_OK,FALSE},
{"/",S_OK,FALSE},
{"/",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"ftp://%E6%B5%8B%E8%AF%95:%E6%B5%8B%E8%AF%95@ftp.google.com/",S_OK,FALSE},
{"ftp",S_OK,FALSE},
{"%E6%B5%8B%E8%AF%95:%E6%B5%8B%E8%AF%95",S_OK,FALSE},
{"%E6%B5%8B%E8%AF%95",S_OK,FALSE}
},
{
{Uri_HOST_DNS,S_OK,FALSE},
{21,S_OK,FALSE},
{URL_SCHEME_FTP,S_OK,FALSE},
{URLZONE_INVALID,E_NOTIMPL,FALSE}
}
}
};
@ -7429,9 +7689,9 @@ static inline LPWSTR a2w(LPCSTR str) {
LPWSTR ret = NULL;
if(str) {
DWORD len = MultiByteToWideChar(CP_ACP, 0, str, -1, NULL, 0);
DWORD len = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0);
ret = HeapAlloc(GetProcessHeap(), 0, len*sizeof(WCHAR));
MultiByteToWideChar(CP_ACP, 0, str, -1, ret, len);
MultiByteToWideChar(CP_UTF8, 0, str, -1, ret, len);
}
return ret;
@ -8100,8 +8360,12 @@ static void test_IUri_GetPropertyLength(void) {
for(j = Uri_PROPERTY_STRING_START; j <= Uri_PROPERTY_STRING_LAST; ++j) {
DWORD expectedLen, receivedLen;
uri_str_property prop = test.str_props[j];
LPWSTR expectedValueW;
expectedLen = lstrlenA(prop.value);
/* Value may be unicode encoded */
expectedValueW = a2w(prop.value);
expectedLen = lstrlenW(expectedValueW);
/* This won't be necessary once GetPropertyLength is implemented. */
receivedLen = -1;

View File

@ -357,6 +357,11 @@ static inline BOOL is_slash(WCHAR c)
return c == '/' || c == '\\';
}
static inline BOOL is_ascii(WCHAR c)
{
return c < 0x80;
}
static BOOL is_default_port(URL_SCHEME scheme, DWORD port) {
DWORD i;
@ -2164,7 +2169,7 @@ static BOOL canonicalize_username(const parse_data *data, Uri *uri, DWORD flags,
continue;
}
}
} else if(!is_reserved(*ptr) && !is_unreserved(*ptr) && *ptr != '\\') {
} else if(is_ascii(*ptr) && !is_reserved(*ptr) && !is_unreserved(*ptr) && *ptr != '\\') {
/* Only percent encode forbidden characters if the NO_ENCODE_FORBIDDEN_CHARACTERS flag
* is NOT set.
*/
@ -2222,7 +2227,7 @@ static BOOL canonicalize_password(const parse_data *data, Uri *uri, DWORD flags,
continue;
}
}
} else if(!is_reserved(*ptr) && !is_unreserved(*ptr) && *ptr != '\\') {
} else if(is_ascii(*ptr) && !is_reserved(*ptr) && !is_unreserved(*ptr) && *ptr != '\\') {
/* Only percent encode forbidden characters if the NO_ENCODE_FORBIDDEN_CHARACTERS flag
* is NOT set.
*/
@ -2357,7 +2362,7 @@ static BOOL canonicalize_reg_name(const parse_data *data, Uri *uri,
if(!computeOnly)
uri->canon_uri[uri->canon_len] = *ptr;
++uri->canon_len;
} else if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) &&
} else if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) && is_ascii(*ptr) &&
!is_unreserved(*ptr) && !is_reserved(*ptr) && known_scheme) {
if(!computeOnly) {
pct_encode_val(*ptr, uri->canon_uri+uri->canon_len);
@ -2927,7 +2932,7 @@ static DWORD canonicalize_path_hierarchical(const WCHAR *path, DWORD path_len, U
len++;
do_default_action = FALSE;
}
} else if(known_scheme && !is_res && !is_unreserved(*ptr) && !is_reserved(*ptr) &&
} else if(known_scheme && !is_res && is_ascii(*ptr) && !is_unreserved(*ptr) && !is_reserved(*ptr) &&
(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) || is_file)) {
if(!is_file || !(flags & Uri_CREATE_FILE_USE_DOS_PATH)) {
/* Escape the forbidden character. */
@ -3053,7 +3058,7 @@ static BOOL canonicalize_path_opaque(const parse_data *data, Uri *uri, DWORD fla
}
} else if(is_mk && *ptr == ':' && ptr + 1 < data->path + data->path_len && *(ptr + 1) == ':') {
flags &= ~Uri_CREATE_FILE_USE_DOS_PATH;
} else if(known_scheme && !is_unreserved(*ptr) && !is_reserved(*ptr) &&
} else if(known_scheme && is_ascii(*ptr) && !is_unreserved(*ptr) && !is_reserved(*ptr) &&
!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS)) {
if(!(is_file && (flags & Uri_CREATE_FILE_USE_DOS_PATH))) {
if(!computeOnly)
@ -3207,7 +3212,7 @@ static BOOL canonicalize_query(const parse_data *data, Uri *uri, DWORD flags, BO
continue;
}
}
} else if(known_scheme && !is_unreserved(*ptr) && !is_reserved(*ptr)) {
} else if(known_scheme && is_ascii(*ptr) && !is_unreserved(*ptr) && !is_reserved(*ptr)) {
if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) &&
!(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) {
if(!computeOnly)
@ -3257,7 +3262,7 @@ static BOOL canonicalize_fragment(const parse_data *data, Uri *uri, DWORD flags,
continue;
}
}
} else if(known_scheme && !is_unreserved(*ptr) && !is_reserved(*ptr)) {
} else if(known_scheme && is_ascii(*ptr) && !is_unreserved(*ptr) && !is_reserved(*ptr)) {
if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) &&
!(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) {
if(!computeOnly)
@ -6890,7 +6895,7 @@ static HRESULT parse_canonicalize(const Uri *uri, DWORD flags, LPWSTR output,
len += 3;
do_default_action = FALSE;
}
} else if(!is_reserved(*ptr) && !is_unreserved(*ptr)) {
} else if(is_ascii(*ptr) && !is_reserved(*ptr) && !is_unreserved(*ptr)) {
if(flags & URL_ESCAPE_UNSAFE) {
if(len + 3 < output_len)
pct_encode_val(*ptr, output+len);