From 0b39344ef0b02a969e9755a891f7d7f1a2986d48 Mon Sep 17 00:00:00 2001 From: Alexandre Julliard Date: Tue, 3 Dec 2019 10:10:39 +0100 Subject: [PATCH] ntdll: Reimplement RtlUnicodeToUTF8N. This avoids relying on wine_utf8_wcstombs(). Signed-off-by: Alexandre Julliard --- dlls/ntdll/locale.c | 111 ++++++++++++++++++++++++++++++++++++++ dlls/ntdll/rtlstr.c | 33 ------------ dlls/ntdll/tests/rtlstr.c | 16 ------ 3 files changed, 111 insertions(+), 49 deletions(-) diff --git a/dlls/ntdll/locale.c b/dlls/ntdll/locale.c index 5ad32e4f1b8..09f64b8dad7 100644 --- a/dlls/ntdll/locale.c +++ b/dlls/ntdll/locale.c @@ -1307,6 +1307,117 @@ found: } +/* get the next char value taking surrogates into account */ +static inline unsigned int get_surrogate_value( const WCHAR *src, unsigned int srclen ) +{ + if (src[0] >= 0xd800 && src[0] <= 0xdfff) /* surrogate pair */ + { + if (src[0] > 0xdbff || /* invalid high surrogate */ + srclen <= 1 || /* missing low surrogate */ + src[1] < 0xdc00 || src[1] > 0xdfff) /* invalid low surrogate */ + return 0; + return 0x10000 + ((src[0] & 0x3ff) << 10) + (src[1] & 0x3ff); + } + return src[0]; +} + + +/************************************************************************** + * RtlUnicodeToUTF8N (NTDLL.@) + */ +NTSTATUS WINAPI RtlUnicodeToUTF8N( char *dst, DWORD dstlen, DWORD *reslen, const WCHAR *src, DWORD srclen ) +{ + char *end; + unsigned int val, len; + NTSTATUS status = STATUS_SUCCESS; + + if (!src) return STATUS_INVALID_PARAMETER_4; + if (!reslen) return STATUS_INVALID_PARAMETER; + if (dst && (srclen & 1)) return STATUS_INVALID_PARAMETER_5; + + srclen /= sizeof(WCHAR); + + if (!dst) + { + for (len = 0; srclen; srclen--, src++) + { + if (*src < 0x80) len++; /* 0x00-0x7f: 1 byte */ + else if (*src < 0x800) len += 2; /* 0x80-0x7ff: 2 bytes */ + else + { + if (!(val = get_surrogate_value( src, srclen ))) + { + val = 0xfffd; + status = STATUS_SOME_NOT_MAPPED; + } + if (val < 0x10000) len += 3; /* 0x800-0xffff: 3 bytes */ + else /* 0x10000-0x10ffff: 4 bytes */ + { + len += 4; + src++; + srclen--; + } + } + } + *reslen = len; + return status; + } + + for (end = dst + dstlen; srclen; srclen--, src++) + { + WCHAR ch = *src; + + if (ch < 0x80) /* 0x00-0x7f: 1 byte */ + { + if (dst > end - 1) break; + *dst++ = ch; + continue; + } + if (ch < 0x800) /* 0x80-0x7ff: 2 bytes */ + { + if (dst > end - 2) break; + dst[1] = 0x80 | (ch & 0x3f); + ch >>= 6; + dst[0] = 0xc0 | ch; + dst += 2; + continue; + } + if (!(val = get_surrogate_value( src, srclen ))) + { + val = 0xfffd; + status = STATUS_SOME_NOT_MAPPED; + } + if (val < 0x10000) /* 0x800-0xffff: 3 bytes */ + { + if (dst > end - 3) break; + dst[2] = 0x80 | (val & 0x3f); + val >>= 6; + dst[1] = 0x80 | (val & 0x3f); + val >>= 6; + dst[0] = 0xe0 | val; + dst += 3; + } + else /* 0x10000-0x10ffff: 4 bytes */ + { + if (dst > end - 4) break; + dst[3] = 0x80 | (val & 0x3f); + val >>= 6; + dst[2] = 0x80 | (val & 0x3f); + val >>= 6; + dst[1] = 0x80 | (val & 0x3f); + val >>= 6; + dst[0] = 0xf0 | val; + dst += 4; + src++; + srclen--; + } + } + if (srclen) status = STATUS_BUFFER_TOO_SMALL; + *reslen = dstlen - (end - dst); + return status; +} + + /****************************************************************************** * RtlIsNormalizedString (NTDLL.@) */ diff --git a/dlls/ntdll/rtlstr.c b/dlls/ntdll/rtlstr.c index 8196dd71786..c0d8ec64775 100644 --- a/dlls/ntdll/rtlstr.c +++ b/dlls/ntdll/rtlstr.c @@ -709,39 +709,6 @@ NTSTATUS WINAPI RtlUnicodeStringToOemString( STRING *oem, } -/************************************************************************** - * RtlUnicodeToUTF8N (NTDLL.@) - * - * Converts a Unicode string to a UTF-8 string. - * - * RETURNS - * NTSTATUS code - */ -NTSTATUS WINAPI RtlUnicodeToUTF8N( LPSTR dst, DWORD dstlen, LPDWORD reslen, - LPCWSTR src, DWORD srclen) -{ - int ret; - - if (!src) return STATUS_INVALID_PARAMETER_4; - if (!reslen) return STATUS_INVALID_PARAMETER; - if (dst && (srclen & 1)) return STATUS_INVALID_PARAMETER_5; - - if (!dstlen && dst) - { - char c; - dst = &c; - ret = wine_utf8_wcstombs( 0, src, srclen / sizeof(WCHAR), dst, 1 ); - if (ret > 0) ret--; - } - else - ret = wine_utf8_wcstombs( 0, src, srclen / sizeof(WCHAR), dst, dstlen ); - if (reslen) - *reslen = (ret >= 0) ? ret : dstlen; /* overflow -> we filled up to dstlen */ - if (ret < 0) return STATUS_BUFFER_TOO_SMALL; - return STATUS_SUCCESS; -} - - /* CASE CONVERSIONS */ diff --git a/dlls/ntdll/tests/rtlstr.c b/dlls/ntdll/tests/rtlstr.c index 4583ddeb4b2..6be036f4068 100644 --- a/dlls/ntdll/tests/rtlstr.c +++ b/dlls/ntdll/tests/rtlstr.c @@ -2152,11 +2152,8 @@ static void test_RtlUnicodeToUTF8N(void) length_expect(0, 0, STATUS_SUCCESS); length_expect(1, 1, STATUS_SUCCESS); length_expect(2, 3, STATUS_SUCCESS); -todo_wine -{ length_expect(3, 6, STATUS_SOME_NOT_MAPPED); length_expect(4, 7, STATUS_SOME_NOT_MAPPED); -} #undef length_expect for (i = 0; i <= 6; i++) @@ -2164,20 +2161,14 @@ todo_wine memset(buffer, 0x55, sizeof(buffer)); bytes_out = 0xdeadbeef; status = pRtlUnicodeToUTF8N(buffer, i, &bytes_out, special_string, sizeof(special_string)); -todo_wine_if (i == 4 || i == 5 || i == 6) ok(status == STATUS_BUFFER_TOO_SMALL, "%d: status = 0x%x\n", i, status); -todo_wine_if (bytes_out != special_string_len[i]) ok(bytes_out == special_string_len[i], "%d: expected %u, got %u\n", i, special_string_len[i], bytes_out); -todo_wine_if (i == 6) ok(memcmp(buffer, special_expected, special_string_len[i]) == 0, "%d: bad conversion\n", i); } status = pRtlUnicodeToUTF8N(buffer, 7, &bytes_out, special_string, sizeof(special_string)); -todo_wine ok(status == STATUS_SOME_NOT_MAPPED, "status = 0x%x\n", status); -todo_wine ok(bytes_out == special_string_len[7], "expected %u, got %u\n", special_string_len[7], bytes_out); -todo_wine ok(memcmp(buffer, special_expected, 7) == 0, "bad conversion\n"); /* conversion behavior with varying input length */ @@ -2225,19 +2216,15 @@ todo_wine status = pRtlUnicodeToUTF8N( buffer, sizeof(buffer), &bytes_out, unicode_to_utf8[i].unicode, lstrlenW(unicode_to_utf8[i].unicode) * sizeof(WCHAR)); -todo_wine_if(unicode_to_utf8[i].status == STATUS_SOME_NOT_MAPPED) ok(status == unicode_to_utf8[i].status, "(test %d): status is 0x%x, expected 0x%x\n", i, status, unicode_to_utf8[i].status); -todo_wine_if(i == 9 || i == 10 || i == 11) -{ ok(bytes_out == strlen(unicode_to_utf8[i].expected), "(test %d): bytes_out is %u, expected %u\n", i, bytes_out, lstrlenA(unicode_to_utf8[i].expected)); ok(!memcmp(buffer, unicode_to_utf8[i].expected, bytes_out), "(test %d): got \"%.*s\", expected \"%s\"\n", i, bytes_out, buffer, unicode_to_utf8[i].expected); -} ok(buffer[bytes_out] == 0x55, "(test %d): behind string: 0x%x\n", i, buffer[bytes_out]); @@ -2247,8 +2234,6 @@ todo_wine_if(i == 9 || i == 10 || i == 11) status = pRtlUnicodeToUTF8N( buffer, sizeof(buffer), &bytes_out, unicode_to_utf8[i].unicode, (lstrlenW(unicode_to_utf8[i].unicode) + 1) * sizeof(WCHAR)); -todo_wine_if(i == 9 || i == 10 || i == 11) -{ ok(status == unicode_to_utf8[i].status, "(test %d): status is 0x%x, expected 0x%x\n", i, status, unicode_to_utf8[i].status); @@ -2258,7 +2243,6 @@ todo_wine_if(i == 9 || i == 10 || i == 11) ok(!memcmp(buffer, unicode_to_utf8[i].expected, bytes_out), "(test %d): got \"%.*s\", expected \"%s\"\n", i, bytes_out, buffer, unicode_to_utf8[i].expected); -} ok(buffer[bytes_out] == 0x55, "(test %d): behind string: 0x%x\n", i, buffer[bytes_out]); }