From d33504b9bb2de873ffe15c1b4d0598fdfd414672 Mon Sep 17 00:00:00 2001 From: Alexandre Julliard Date: Tue, 3 Dec 2019 12:34:00 +0100 Subject: [PATCH] kernel32: Use the Rtl UTF8 conversion functions. Signed-off-by: Alexandre Julliard --- dlls/kernel32/locale.c | 84 +++++++++++++++++++++++++----------- dlls/msvcrt/tests/file.c | 4 +- dlls/ntdll/tests/rtlstr.c | 90 ++++++++++++++++++++++++++++++++++----- 3 files changed, 141 insertions(+), 37 deletions(-) diff --git a/dlls/kernel32/locale.c b/dlls/kernel32/locale.c index cd9f7fac2fd..02f982966e9 100644 --- a/dlls/kernel32/locale.c +++ b/dlls/kernel32/locale.c @@ -1030,6 +1030,32 @@ static int utf7_mbstowcs(const char *src, int srclen, WCHAR *dst, int dstlen) return dest_index; } +static int mbstowcs_utf8( DWORD flags, LPCSTR src, INT srclen, LPWSTR dst, INT dstlen ) +{ + DWORD reslen; + NTSTATUS status; + + if (flags & ~MB_FLAGSMASK) + { + SetLastError( ERROR_INVALID_FLAGS ); + return 0; + } + if (!dstlen) dst = NULL; + status = RtlUTF8ToUnicodeN( dst, dstlen * sizeof(WCHAR), &reslen, src, srclen ); + if (status == STATUS_SOME_NOT_MAPPED) + { + if (flags & MB_ERR_INVALID_CHARS) + { + SetLastError( ERROR_NO_UNICODE_TRANSLATION ); + return 0; + } + } + else if (!set_ntstatus( status )) reslen = 0; + + return reslen / sizeof(WCHAR); +} + + /*********************************************************************** * MultiByteToWideChar (KERNEL32.@) * @@ -1085,24 +1111,19 @@ INT WINAPI MultiByteToWideChar( UINT page, DWORD flags, LPCSTR src, INT srclen, } ret = utf7_mbstowcs( src, srclen, dst, dstlen ); break; + case CP_UTF8: + return mbstowcs_utf8( flags, src, srclen, dst, dstlen ); case CP_UNIXCP: if (unix_cptable) { ret = wine_cp_mbstowcs( unix_cptable, flags, src, srclen, dst, dstlen ); break; } -#ifdef __APPLE__ - flags |= MB_COMPOSITE; /* work around broken Mac OS X filesystem that enforces decomposed Unicode */ + ret = mbstowcs_utf8( flags, src, srclen, dst, dstlen ); +#ifdef __APPLE__ /* work around broken Mac OS X filesystem that enforces decomposed Unicode */ + if (ret && dstlen) ret = wine_compose_string( dst, ret ); #endif - /* fall through */ - case CP_UTF8: - if (flags & ~MB_FLAGSMASK) - { - SetLastError( ERROR_INVALID_FLAGS ); - return 0; - } - ret = wine_utf8_mbstowcs( flags, src, srclen, dst, dstlen ); - break; + return ret; default: if (!(table = get_codepage_table( page ))) { @@ -1254,6 +1275,30 @@ static int utf7_wcstombs(const WCHAR *src, int srclen, char *dst, int dstlen) return dest_index; } +static int wcstombs_utf8( DWORD flags, LPCWSTR src, INT srclen, LPSTR dst, INT dstlen ) +{ + DWORD reslen; + NTSTATUS status; + + if (flags & ~WC_FLAGSMASK) + { + SetLastError( ERROR_INVALID_FLAGS ); + return 0; + } + if (!dstlen) dst = NULL; + status = RtlUnicodeToUTF8N( dst, dstlen, &reslen, src, srclen * sizeof(WCHAR) ); + if (status == STATUS_SOME_NOT_MAPPED) + { + if (flags & WC_ERR_INVALID_CHARS) + { + SetLastError( ERROR_NO_UNICODE_TRANSLATION ); + return 0; + } + } + else if (!set_ntstatus( status )) reslen = 0; + return reslen; +} + /*********************************************************************** * WideCharToMultiByte (KERNEL32.@) * @@ -1329,26 +1374,17 @@ INT WINAPI WideCharToMultiByte( UINT page, DWORD flags, LPCWSTR src, INT srclen, ret = wine_cp_wcstombs( unix_cptable, flags, src, srclen, dst, dstlen, defchar, used ? &used_tmp : NULL ); if (used) *used = used_tmp; + break; } - else - { - ret = wine_utf8_wcstombs( flags, src, srclen, dst, dstlen ); - if (used) *used = FALSE; - } - break; + if (used) *used = FALSE; + return wcstombs_utf8( flags, src, srclen, dst, dstlen ); case CP_UTF8: if (defchar || used) { SetLastError( ERROR_INVALID_PARAMETER ); return 0; } - if (flags & ~WC_FLAGSMASK) - { - SetLastError( ERROR_INVALID_FLAGS ); - return 0; - } - ret = wine_utf8_wcstombs( flags, src, srclen, dst, dstlen ); - break; + return wcstombs_utf8( flags, src, srclen, dst, dstlen ); default: if (!(table = get_codepage_table( page ))) { diff --git a/dlls/msvcrt/tests/file.c b/dlls/msvcrt/tests/file.c index 97d81abd00a..63d5100de28 100644 --- a/dlls/msvcrt/tests/file.c +++ b/dlls/msvcrt/tests/file.c @@ -1336,9 +1336,9 @@ static void test_file_write_read( void ) /* test invalid utf8 sequence */ lseek(tempfd, 5, SEEK_SET); ret = _read(tempfd, btext, sizeof(btext)); - todo_wine ok(ret == 10, "_read returned %d, expected 10\n", ret); + ok(ret == 10, "_read returned %d, expected 10\n", ret); /* invalid char should be replaced by U+FFFD in MultiByteToWideChar */ - todo_wine ok(!memcmp(btext, "\xfd\xff", 2), "invalid UTF8 character was not replaced by U+FFFD\n"); + ok(!memcmp(btext, "\xfd\xff", 2), "invalid UTF8 character was not replaced by U+FFFD\n"); ok(!memcmp(btext+ret-8, "\x62\x00\x7c\x01\x0d\x00\x0a\x00", 8), "btext is incorrect\n"); _close(tempfd); } diff --git a/dlls/ntdll/tests/rtlstr.c b/dlls/ntdll/tests/rtlstr.c index 6be036f4068..6f59e956c2c 100644 --- a/dlls/ntdll/tests/rtlstr.c +++ b/dlls/ntdll/tests/rtlstr.c @@ -2028,8 +2028,8 @@ static const struct unicode_to_utf8_test unicode_to_utf8[] = { { { '-',0xfeff,'-',0xfffe,'-',0 }, "-\xEF\xBB\xBF-\xEF\xBF\xBE-", STATUS_SUCCESS }, { { 0xfeff,'-',0 }, "\xEF\xBB\xBF-", STATUS_SUCCESS }, { { 0xfffe,'-',0 }, "\xEF\xBF\xBE-", STATUS_SUCCESS }, - /* invalid code point */ - { { 0xffff,'-',0 }, "\xEF\xBF\xBF-", STATUS_SUCCESS }, + /* invalid code points */ + { { 0xfffd, '-', 0xfffe, '-', 0xffff,'-',0 }, "\xEF\xBF\xBD-\xEF\xBF\xBE-\xEF\xBF\xBF-", STATUS_SUCCESS }, /* canonically equivalent representations -- no normalization should happen */ { { '-',0x1e09,'-',0 }, "-\xE1\xB8\x89-", STATUS_SUCCESS }, { { '-',0x0107,0x0327,'-',0 }, "-\xC4\x87\xCC\xA7-", STATUS_SUCCESS }, @@ -2086,7 +2086,7 @@ static void test_RtlUnicodeToUTF8N(void) const unsigned char special_expected[] = { 'X',0xc2,0x80,0xef,0xbf,0xbd,0 }; unsigned int input_len; const unsigned int test_count = ARRAY_SIZE(unicode_to_utf8); - unsigned int i; + unsigned int i, ret; if (!pRtlUnicodeToUTF8N) { @@ -2227,6 +2227,14 @@ static void test_RtlUnicodeToUTF8N(void) i, bytes_out, buffer, unicode_to_utf8[i].expected); ok(buffer[bytes_out] == 0x55, "(test %d): behind string: 0x%x\n", i, buffer[bytes_out]); + memset(buffer, 0x55, sizeof(buffer)); + ret = WideCharToMultiByte( CP_UTF8, 0, unicode_to_utf8[i].unicode, lstrlenW(unicode_to_utf8[i].unicode), + buffer, sizeof(buffer), NULL, NULL ); + ok( ret == strlen(unicode_to_utf8[i].expected), "(test %d): wrong len %u\n", i, ret ); + ok(!memcmp(buffer, unicode_to_utf8[i].expected, ret), + "(test %d): got \"%.*s\", expected \"%s\"\n", + i, ret, buffer, unicode_to_utf8[i].expected); + ok(buffer[ret] == 0x55, "(test %d): behind string: 0x%x\n", i, buffer[ret]); /* same test but include the null terminator */ bytes_out = 0x55555555; @@ -2245,6 +2253,30 @@ static void test_RtlUnicodeToUTF8N(void) i, bytes_out, buffer, unicode_to_utf8[i].expected); ok(buffer[bytes_out] == 0x55, "(test %d): behind string: 0x%x\n", i, buffer[bytes_out]); + memset(buffer, 0x55, sizeof(buffer)); + ret = WideCharToMultiByte( CP_UTF8, 0, unicode_to_utf8[i].unicode, -1, buffer, sizeof(buffer), NULL, NULL ); + ok( ret == strlen(unicode_to_utf8[i].expected) + 1, "(test %d): wrong len %u\n", i, ret ); + ok(!memcmp(buffer, unicode_to_utf8[i].expected, ret), + "(test %d): got \"%.*s\", expected \"%s\"\n", + i, ret, buffer, unicode_to_utf8[i].expected); + ok(buffer[ret] == 0x55, "(test %d): behind string: 0x%x\n", i, buffer[ret]); + SetLastError( 0xdeadbeef ); + memset(buffer, 0x55, sizeof(buffer)); + ret = WideCharToMultiByte( CP_UTF8, WC_ERR_INVALID_CHARS, unicode_to_utf8[i].unicode, -1, + buffer, sizeof(buffer), NULL, NULL ); + if (unicode_to_utf8[i].status == STATUS_SOME_NOT_MAPPED) + { + ok( ret == 0, "(test %d): wrong len %u\n", i, ret ); + ok( GetLastError() == ERROR_NO_UNICODE_TRANSLATION, "(test %d): wrong error %u\n", i, GetLastError() ); + ret = strlen(unicode_to_utf8[i].expected) + 1; + } + else + ok( ret == strlen(unicode_to_utf8[i].expected) + 1, "(test %d): wrong len %u\n", i, ret ); + + ok(!memcmp(buffer, unicode_to_utf8[i].expected, ret), + "(test %d): got \"%.*s\", expected \"%s\"\n", + i, ret, buffer, unicode_to_utf8[i].expected); + ok(buffer[ret] == 0x55, "(test %d): behind string: 0x%x\n", i, buffer[ret]); } } @@ -2329,9 +2361,8 @@ static const struct utf8_to_unicode_test utf8_to_unicode[] = { { "-\xEF\xBB\xBF-\xEF\xBF\xBE-", { '-',0xfeff,'-',0xfffe,'-',0 }, STATUS_SUCCESS }, { "\xEF\xBB\xBF-", { 0xfeff,'-',0 }, STATUS_SUCCESS }, { "\xEF\xBF\xBE-", { 0xfffe,'-',0 }, STATUS_SUCCESS }, - /* invalid code point */ - /* 0xffff */ - { "\xEF\xBF\xBF-", { 0xffff,'-',0 }, STATUS_SUCCESS }, + /* invalid code points */ + { "\xEF\xBF\xBD-\xEF\xBF\xBE-\xEF\xBF\xBF-", { 0xfffd,'-',0xfffe,'-',0xffff,'-',0 }, STATUS_SUCCESS }, /* canonically equivalent representations -- no normalization should happen */ { "-\xE1\xB8\x89-", { '-',0x1e09,'-',0 }, STATUS_SUCCESS }, { "-\xC4\x87\xCC\xA7-", { '-',0x0107,0x0327,'-',0 }, STATUS_SUCCESS }, @@ -2388,7 +2419,7 @@ static void test_RtlUTF8ToUnicodeN(void) const WCHAR special_expected[] = { 'X',0x80,0xd800,0xdc00,0 }; unsigned int input_len; const unsigned int test_count = ARRAY_SIZE(utf8_to_unicode); - unsigned int i; + unsigned int i, ret; if (!pRtlUTF8ToUnicodeN) { @@ -2497,8 +2528,17 @@ static void test_RtlUTF8ToUnicodeN(void) ok(!memcmp(buffer, utf8_to_unicode[i].expected, bytes_out), "(test %d): got %s, expected %s\n", i, wine_dbgstr_wn(buffer, bytes_out / sizeof(WCHAR)), wine_dbgstr_w(utf8_to_unicode[i].expected)); - ok(buffer[bytes_out] == 0x5555, - "(test %d): behind string: 0x%x\n", i, buffer[bytes_out]); + ok(buffer[bytes_out / sizeof(WCHAR)] == 0x5555, + "(test %d): behind string: 0x%x\n", i, buffer[bytes_out / sizeof(WCHAR)]); + memset(buffer, 0x55, sizeof(buffer)); + ret = MultiByteToWideChar( CP_UTF8, 0, utf8_to_unicode[i].utf8, strlen(utf8_to_unicode[i].utf8), + buffer, ARRAY_SIZE(buffer) ); + ok( ret == lstrlenW(utf8_to_unicode[i].expected), "(test %d): wrong len %u\n", i, ret ); + ok(!memcmp(buffer, utf8_to_unicode[i].expected, lstrlenW(utf8_to_unicode[i].expected) * sizeof(WCHAR)), + "(test %d): got %s, expected %s\n", + i, wine_dbgstr_wn(buffer, ret), wine_dbgstr_w(utf8_to_unicode[i].expected)); + ok(buffer[ret] == 0x5555, + "(test %d): behind string: 0x%x\n", i, buffer[ret]); /* same test but include the null terminator */ bytes_out = 0x55555555; @@ -2515,8 +2555,36 @@ static void test_RtlUTF8ToUnicodeN(void) ok(!memcmp(buffer, utf8_to_unicode[i].expected, bytes_out), "(test %d): got %s, expected %s\n", i, wine_dbgstr_wn(buffer, bytes_out / sizeof(WCHAR)), wine_dbgstr_w(utf8_to_unicode[i].expected)); - ok(buffer[bytes_out] == 0x5555, - "(test %d): behind string: 0x%x\n", i, buffer[bytes_out]); + ok(buffer[bytes_out / sizeof(WCHAR)] == 0x5555, + "(test %d): behind string: 0x%x\n", i, buffer[bytes_out / sizeof(WCHAR)]); + + memset(buffer, 0x55, sizeof(buffer)); + ret = MultiByteToWideChar( CP_UTF8, 0, utf8_to_unicode[i].utf8, -1, buffer, ARRAY_SIZE(buffer) ); + ok( ret == lstrlenW(utf8_to_unicode[i].expected) + 1, "(test %d): wrong len %u\n", i, ret ); + ok(!memcmp(buffer, utf8_to_unicode[i].expected, ret * sizeof(WCHAR)), + "(test %d): got %s, expected %s\n", + i, wine_dbgstr_wn(buffer, ret), wine_dbgstr_w(utf8_to_unicode[i].expected)); + ok(buffer[ret] == 0x5555, + "(test %d): behind string: 0x%x\n", i, buffer[ret]); + + SetLastError( 0xdeadbeef ); + memset(buffer, 0x55, sizeof(buffer)); + ret = MultiByteToWideChar( CP_UTF8, MB_ERR_INVALID_CHARS, + utf8_to_unicode[i].utf8, -1, buffer, ARRAY_SIZE(buffer) ); + if (utf8_to_unicode[i].status == STATUS_SOME_NOT_MAPPED) + { + ok( ret == 0, "(test %d): wrong len %u\n", i, ret ); + ok( GetLastError() == ERROR_NO_UNICODE_TRANSLATION, "(test %d): wrong error %u\n", i, GetLastError() ); + ret = lstrlenW(utf8_to_unicode[i].expected) + 1; + } + else + ok( ret == lstrlenW(utf8_to_unicode[i].expected) + 1, "(test %d): wrong len %u\n", i, ret ); + + ok(!memcmp(buffer, utf8_to_unicode[i].expected, ret * sizeof(WCHAR)), + "(test %d): got %s, expected %s\n", + i, wine_dbgstr_wn(buffer, ret), wine_dbgstr_w(utf8_to_unicode[i].expected)); + ok(buffer[ret] == 0x5555, + "(test %d): behind string: 0x%x\n", i, buffer[ret]); } }