From d33504b9bb2de873ffe15c1b4d0598fdfd414672 Mon Sep 17 00:00:00 2001
From: Alexandre Julliard <julliard@winehq.org>
Date: Tue, 3 Dec 2019 12:34:00 +0100
Subject: [PATCH] kernel32: Use the Rtl UTF8 conversion functions.

Signed-off-by: Alexandre Julliard <julliard@winehq.org>
---
 dlls/kernel32/locale.c    | 84 +++++++++++++++++++++++++-----------
 dlls/msvcrt/tests/file.c  |  4 +-
 dlls/ntdll/tests/rtlstr.c | 90 ++++++++++++++++++++++++++++++++++-----
 3 files changed, 141 insertions(+), 37 deletions(-)

diff --git a/dlls/kernel32/locale.c b/dlls/kernel32/locale.c
index cd9f7fac2fd..02f982966e9 100644
--- a/dlls/kernel32/locale.c
+++ b/dlls/kernel32/locale.c
@@ -1030,6 +1030,32 @@ static int utf7_mbstowcs(const char *src, int srclen, WCHAR *dst, int dstlen)
     return dest_index;
 }
 
+static int mbstowcs_utf8( DWORD flags, LPCSTR src, INT srclen, LPWSTR dst, INT dstlen )
+{
+    DWORD reslen;
+    NTSTATUS status;
+
+    if (flags & ~MB_FLAGSMASK)
+    {
+        SetLastError( ERROR_INVALID_FLAGS );
+        return 0;
+    }
+    if (!dstlen) dst = NULL;
+    status = RtlUTF8ToUnicodeN( dst, dstlen * sizeof(WCHAR), &reslen, src, srclen );
+    if (status == STATUS_SOME_NOT_MAPPED)
+    {
+        if (flags & MB_ERR_INVALID_CHARS)
+        {
+            SetLastError( ERROR_NO_UNICODE_TRANSLATION );
+            return 0;
+        }
+    }
+    else if (!set_ntstatus( status )) reslen = 0;
+
+    return reslen / sizeof(WCHAR);
+}
+
+
 /***********************************************************************
  *              MultiByteToWideChar   (KERNEL32.@)
  *
@@ -1085,24 +1111,19 @@ INT WINAPI MultiByteToWideChar( UINT page, DWORD flags, LPCSTR src, INT srclen,
         }
         ret = utf7_mbstowcs( src, srclen, dst, dstlen );
         break;
+    case CP_UTF8:
+        return mbstowcs_utf8( flags, src, srclen, dst, dstlen );
     case CP_UNIXCP:
         if (unix_cptable)
         {
             ret = wine_cp_mbstowcs( unix_cptable, flags, src, srclen, dst, dstlen );
             break;
         }
-#ifdef __APPLE__
-        flags |= MB_COMPOSITE;  /* work around broken Mac OS X filesystem that enforces decomposed Unicode */
+        ret = mbstowcs_utf8( flags, src, srclen, dst, dstlen );
+#ifdef __APPLE__  /* work around broken Mac OS X filesystem that enforces decomposed Unicode */
+        if (ret && dstlen) ret = wine_compose_string( dst, ret );
 #endif
-        /* fall through */
-    case CP_UTF8:
-        if (flags & ~MB_FLAGSMASK)
-        {
-            SetLastError( ERROR_INVALID_FLAGS );
-            return 0;
-        }
-        ret = wine_utf8_mbstowcs( flags, src, srclen, dst, dstlen );
-        break;
+        return ret;
     default:
         if (!(table = get_codepage_table( page )))
         {
@@ -1254,6 +1275,30 @@ static int utf7_wcstombs(const WCHAR *src, int srclen, char *dst, int dstlen)
     return dest_index;
 }
 
+static int wcstombs_utf8( DWORD flags, LPCWSTR src, INT srclen, LPSTR dst, INT dstlen )
+{
+    DWORD reslen;
+    NTSTATUS status;
+
+    if (flags & ~WC_FLAGSMASK)
+    {
+        SetLastError( ERROR_INVALID_FLAGS );
+        return 0;
+    }
+    if (!dstlen) dst = NULL;
+    status = RtlUnicodeToUTF8N( dst, dstlen, &reslen, src, srclen * sizeof(WCHAR) );
+    if (status == STATUS_SOME_NOT_MAPPED)
+    {
+        if (flags & WC_ERR_INVALID_CHARS)
+        {
+            SetLastError( ERROR_NO_UNICODE_TRANSLATION );
+            return 0;
+        }
+    }
+    else if (!set_ntstatus( status )) reslen = 0;
+    return reslen;
+}
+
 /***********************************************************************
  *              WideCharToMultiByte   (KERNEL32.@)
  *
@@ -1329,26 +1374,17 @@ INT WINAPI WideCharToMultiByte( UINT page, DWORD flags, LPCWSTR src, INT srclen,
             ret = wine_cp_wcstombs( unix_cptable, flags, src, srclen, dst, dstlen,
                                     defchar, used ? &used_tmp : NULL );
             if (used) *used = used_tmp;
+            break;
         }
-        else
-        {
-            ret = wine_utf8_wcstombs( flags, src, srclen, dst, dstlen );
-            if (used) *used = FALSE;
-        }
-        break;
+        if (used) *used = FALSE;
+        return wcstombs_utf8( flags, src, srclen, dst, dstlen );
     case CP_UTF8:
         if (defchar || used)
         {
             SetLastError( ERROR_INVALID_PARAMETER );
             return 0;
         }
-        if (flags & ~WC_FLAGSMASK)
-        {
-            SetLastError( ERROR_INVALID_FLAGS );
-            return 0;
-        }
-        ret = wine_utf8_wcstombs( flags, src, srclen, dst, dstlen );
-        break;
+        return wcstombs_utf8( flags, src, srclen, dst, dstlen );
     default:
         if (!(table = get_codepage_table( page )))
         {
diff --git a/dlls/msvcrt/tests/file.c b/dlls/msvcrt/tests/file.c
index 97d81abd00a..63d5100de28 100644
--- a/dlls/msvcrt/tests/file.c
+++ b/dlls/msvcrt/tests/file.c
@@ -1336,9 +1336,9 @@ static void test_file_write_read( void )
       /* test invalid utf8 sequence */
       lseek(tempfd, 5, SEEK_SET);
       ret = _read(tempfd, btext, sizeof(btext));
-      todo_wine ok(ret == 10, "_read returned %d, expected 10\n", ret);
+      ok(ret == 10, "_read returned %d, expected 10\n", ret);
       /* invalid char should be replaced by U+FFFD in MultiByteToWideChar */
-      todo_wine ok(!memcmp(btext, "\xfd\xff", 2), "invalid UTF8 character was not replaced by U+FFFD\n");
+      ok(!memcmp(btext, "\xfd\xff", 2), "invalid UTF8 character was not replaced by U+FFFD\n");
       ok(!memcmp(btext+ret-8, "\x62\x00\x7c\x01\x0d\x00\x0a\x00", 8), "btext is incorrect\n");
       _close(tempfd);
   }
diff --git a/dlls/ntdll/tests/rtlstr.c b/dlls/ntdll/tests/rtlstr.c
index 6be036f4068..6f59e956c2c 100644
--- a/dlls/ntdll/tests/rtlstr.c
+++ b/dlls/ntdll/tests/rtlstr.c
@@ -2028,8 +2028,8 @@ static const struct unicode_to_utf8_test unicode_to_utf8[] = {
     { { '-',0xfeff,'-',0xfffe,'-',0 }, "-\xEF\xBB\xBF-\xEF\xBF\xBE-", STATUS_SUCCESS },
     { { 0xfeff,'-',0 }, "\xEF\xBB\xBF-", STATUS_SUCCESS },
     { { 0xfffe,'-',0 }, "\xEF\xBF\xBE-", STATUS_SUCCESS },
-    /* invalid code point */
-    { { 0xffff,'-',0 }, "\xEF\xBF\xBF-", STATUS_SUCCESS },
+    /* invalid code points */
+    { { 0xfffd, '-', 0xfffe, '-', 0xffff,'-',0 }, "\xEF\xBF\xBD-\xEF\xBF\xBE-\xEF\xBF\xBF-", STATUS_SUCCESS },
     /* canonically equivalent representations -- no normalization should happen */
     { { '-',0x1e09,'-',0 }, "-\xE1\xB8\x89-", STATUS_SUCCESS },
     { { '-',0x0107,0x0327,'-',0 }, "-\xC4\x87\xCC\xA7-", STATUS_SUCCESS },
@@ -2086,7 +2086,7 @@ static void test_RtlUnicodeToUTF8N(void)
     const unsigned char special_expected[] = { 'X',0xc2,0x80,0xef,0xbf,0xbd,0 };
     unsigned int input_len;
     const unsigned int test_count = ARRAY_SIZE(unicode_to_utf8);
-    unsigned int i;
+    unsigned int i, ret;
 
     if (!pRtlUnicodeToUTF8N)
     {
@@ -2227,6 +2227,14 @@ static void test_RtlUnicodeToUTF8N(void)
            i, bytes_out, buffer, unicode_to_utf8[i].expected);
         ok(buffer[bytes_out] == 0x55,
            "(test %d): behind string: 0x%x\n", i, buffer[bytes_out]);
+        memset(buffer, 0x55, sizeof(buffer));
+        ret = WideCharToMultiByte( CP_UTF8, 0, unicode_to_utf8[i].unicode, lstrlenW(unicode_to_utf8[i].unicode),
+                                   buffer, sizeof(buffer), NULL, NULL );
+        ok( ret == strlen(unicode_to_utf8[i].expected), "(test %d): wrong len %u\n", i, ret );
+        ok(!memcmp(buffer, unicode_to_utf8[i].expected, ret),
+           "(test %d): got \"%.*s\", expected \"%s\"\n",
+           i, ret, buffer, unicode_to_utf8[i].expected);
+        ok(buffer[ret] == 0x55, "(test %d): behind string: 0x%x\n", i, buffer[ret]);
 
         /* same test but include the null terminator */
         bytes_out = 0x55555555;
@@ -2245,6 +2253,30 @@ static void test_RtlUnicodeToUTF8N(void)
            i, bytes_out, buffer, unicode_to_utf8[i].expected);
         ok(buffer[bytes_out] == 0x55,
            "(test %d): behind string: 0x%x\n", i, buffer[bytes_out]);
+        memset(buffer, 0x55, sizeof(buffer));
+        ret = WideCharToMultiByte( CP_UTF8, 0, unicode_to_utf8[i].unicode, -1, buffer, sizeof(buffer), NULL, NULL );
+        ok( ret == strlen(unicode_to_utf8[i].expected) + 1, "(test %d): wrong len %u\n", i, ret );
+        ok(!memcmp(buffer, unicode_to_utf8[i].expected, ret),
+           "(test %d): got \"%.*s\", expected \"%s\"\n",
+           i, ret, buffer, unicode_to_utf8[i].expected);
+        ok(buffer[ret] == 0x55, "(test %d): behind string: 0x%x\n", i, buffer[ret]);
+        SetLastError( 0xdeadbeef );
+        memset(buffer, 0x55, sizeof(buffer));
+        ret = WideCharToMultiByte( CP_UTF8, WC_ERR_INVALID_CHARS, unicode_to_utf8[i].unicode, -1,
+                                   buffer, sizeof(buffer), NULL, NULL );
+        if (unicode_to_utf8[i].status == STATUS_SOME_NOT_MAPPED)
+        {
+            ok( ret == 0, "(test %d): wrong len %u\n", i, ret );
+            ok( GetLastError() == ERROR_NO_UNICODE_TRANSLATION, "(test %d): wrong error %u\n", i, GetLastError() );
+            ret = strlen(unicode_to_utf8[i].expected) + 1;
+        }
+        else
+            ok( ret == strlen(unicode_to_utf8[i].expected) + 1, "(test %d): wrong len %u\n", i, ret );
+
+        ok(!memcmp(buffer, unicode_to_utf8[i].expected, ret),
+           "(test %d): got \"%.*s\", expected \"%s\"\n",
+           i, ret, buffer, unicode_to_utf8[i].expected);
+        ok(buffer[ret] == 0x55, "(test %d): behind string: 0x%x\n", i, buffer[ret]);
     }
 }
 
@@ -2329,9 +2361,8 @@ static const struct utf8_to_unicode_test utf8_to_unicode[] = {
     { "-\xEF\xBB\xBF-\xEF\xBF\xBE-", { '-',0xfeff,'-',0xfffe,'-',0 }, STATUS_SUCCESS },
     { "\xEF\xBB\xBF-", { 0xfeff,'-',0 }, STATUS_SUCCESS },
     { "\xEF\xBF\xBE-", { 0xfffe,'-',0 }, STATUS_SUCCESS },
-    /* invalid code point */
-       /* 0xffff */
-    { "\xEF\xBF\xBF-", { 0xffff,'-',0 }, STATUS_SUCCESS },
+    /* invalid code points */
+    { "\xEF\xBF\xBD-\xEF\xBF\xBE-\xEF\xBF\xBF-", { 0xfffd,'-',0xfffe,'-',0xffff,'-',0 }, STATUS_SUCCESS },
     /* canonically equivalent representations -- no normalization should happen */
     { "-\xE1\xB8\x89-", { '-',0x1e09,'-',0 }, STATUS_SUCCESS },
     { "-\xC4\x87\xCC\xA7-", { '-',0x0107,0x0327,'-',0 }, STATUS_SUCCESS },
@@ -2388,7 +2419,7 @@ static void test_RtlUTF8ToUnicodeN(void)
     const WCHAR special_expected[] = { 'X',0x80,0xd800,0xdc00,0 };
     unsigned int input_len;
     const unsigned int test_count = ARRAY_SIZE(utf8_to_unicode);
-    unsigned int i;
+    unsigned int i, ret;
 
     if (!pRtlUTF8ToUnicodeN)
     {
@@ -2497,8 +2528,17 @@ static void test_RtlUTF8ToUnicodeN(void)
         ok(!memcmp(buffer, utf8_to_unicode[i].expected, bytes_out),
            "(test %d): got %s, expected %s\n",
            i, wine_dbgstr_wn(buffer, bytes_out / sizeof(WCHAR)), wine_dbgstr_w(utf8_to_unicode[i].expected));
-        ok(buffer[bytes_out] == 0x5555,
-           "(test %d): behind string: 0x%x\n", i, buffer[bytes_out]);
+        ok(buffer[bytes_out / sizeof(WCHAR)] == 0x5555,
+           "(test %d): behind string: 0x%x\n", i, buffer[bytes_out / sizeof(WCHAR)]);
+        memset(buffer, 0x55, sizeof(buffer));
+        ret = MultiByteToWideChar( CP_UTF8, 0, utf8_to_unicode[i].utf8, strlen(utf8_to_unicode[i].utf8),
+                                   buffer, ARRAY_SIZE(buffer) );
+        ok( ret == lstrlenW(utf8_to_unicode[i].expected), "(test %d): wrong len %u\n", i, ret );
+        ok(!memcmp(buffer, utf8_to_unicode[i].expected, lstrlenW(utf8_to_unicode[i].expected) * sizeof(WCHAR)),
+           "(test %d): got %s, expected %s\n",
+           i, wine_dbgstr_wn(buffer, ret), wine_dbgstr_w(utf8_to_unicode[i].expected));
+        ok(buffer[ret] == 0x5555,
+           "(test %d): behind string: 0x%x\n", i, buffer[ret]);
 
         /* same test but include the null terminator */
         bytes_out = 0x55555555;
@@ -2515,8 +2555,36 @@ static void test_RtlUTF8ToUnicodeN(void)
         ok(!memcmp(buffer, utf8_to_unicode[i].expected, bytes_out),
            "(test %d): got %s, expected %s\n",
            i, wine_dbgstr_wn(buffer, bytes_out / sizeof(WCHAR)), wine_dbgstr_w(utf8_to_unicode[i].expected));
-        ok(buffer[bytes_out] == 0x5555,
-           "(test %d): behind string: 0x%x\n", i, buffer[bytes_out]);
+        ok(buffer[bytes_out / sizeof(WCHAR)] == 0x5555,
+           "(test %d): behind string: 0x%x\n", i, buffer[bytes_out / sizeof(WCHAR)]);
+
+        memset(buffer, 0x55, sizeof(buffer));
+        ret = MultiByteToWideChar( CP_UTF8, 0, utf8_to_unicode[i].utf8, -1, buffer, ARRAY_SIZE(buffer) );
+        ok( ret == lstrlenW(utf8_to_unicode[i].expected) + 1, "(test %d): wrong len %u\n", i, ret );
+        ok(!memcmp(buffer, utf8_to_unicode[i].expected, ret * sizeof(WCHAR)),
+           "(test %d): got %s, expected %s\n",
+           i, wine_dbgstr_wn(buffer, ret), wine_dbgstr_w(utf8_to_unicode[i].expected));
+        ok(buffer[ret] == 0x5555,
+           "(test %d): behind string: 0x%x\n", i, buffer[ret]);
+
+        SetLastError( 0xdeadbeef );
+        memset(buffer, 0x55, sizeof(buffer));
+        ret = MultiByteToWideChar( CP_UTF8, MB_ERR_INVALID_CHARS,
+                                   utf8_to_unicode[i].utf8, -1, buffer, ARRAY_SIZE(buffer) );
+        if (utf8_to_unicode[i].status == STATUS_SOME_NOT_MAPPED)
+        {
+            ok( ret == 0, "(test %d): wrong len %u\n", i, ret );
+            ok( GetLastError() == ERROR_NO_UNICODE_TRANSLATION, "(test %d): wrong error %u\n", i, GetLastError() );
+            ret = lstrlenW(utf8_to_unicode[i].expected) + 1;
+        }
+        else
+            ok( ret == lstrlenW(utf8_to_unicode[i].expected) + 1, "(test %d): wrong len %u\n", i, ret );
+
+        ok(!memcmp(buffer, utf8_to_unicode[i].expected, ret * sizeof(WCHAR)),
+           "(test %d): got %s, expected %s\n",
+           i, wine_dbgstr_wn(buffer, ret), wine_dbgstr_w(utf8_to_unicode[i].expected));
+        ok(buffer[ret] == 0x5555,
+           "(test %d): behind string: 0x%x\n", i, buffer[ret]);
     }
 }