ntdll: Reimplement RtlUnicodeToUTF8N.

This avoids relying on wine_utf8_wcstombs().

Signed-off-by: Alexandre Julliard <julliard@winehq.org>
This commit is contained in:
Alexandre Julliard 2019-12-03 10:10:39 +01:00
parent 0d0b7a1451
commit 0b39344ef0
3 changed files with 111 additions and 49 deletions

View File

@ -1307,6 +1307,117 @@ found:
}
/* get the next char value taking surrogates into account */
static inline unsigned int get_surrogate_value( const WCHAR *src, unsigned int srclen )
{
if (src[0] >= 0xd800 && src[0] <= 0xdfff) /* surrogate pair */
{
if (src[0] > 0xdbff || /* invalid high surrogate */
srclen <= 1 || /* missing low surrogate */
src[1] < 0xdc00 || src[1] > 0xdfff) /* invalid low surrogate */
return 0;
return 0x10000 + ((src[0] & 0x3ff) << 10) + (src[1] & 0x3ff);
}
return src[0];
}
/**************************************************************************
* RtlUnicodeToUTF8N (NTDLL.@)
*/
NTSTATUS WINAPI RtlUnicodeToUTF8N( char *dst, DWORD dstlen, DWORD *reslen, const WCHAR *src, DWORD srclen )
{
char *end;
unsigned int val, len;
NTSTATUS status = STATUS_SUCCESS;
if (!src) return STATUS_INVALID_PARAMETER_4;
if (!reslen) return STATUS_INVALID_PARAMETER;
if (dst && (srclen & 1)) return STATUS_INVALID_PARAMETER_5;
srclen /= sizeof(WCHAR);
if (!dst)
{
for (len = 0; srclen; srclen--, src++)
{
if (*src < 0x80) len++; /* 0x00-0x7f: 1 byte */
else if (*src < 0x800) len += 2; /* 0x80-0x7ff: 2 bytes */
else
{
if (!(val = get_surrogate_value( src, srclen )))
{
val = 0xfffd;
status = STATUS_SOME_NOT_MAPPED;
}
if (val < 0x10000) len += 3; /* 0x800-0xffff: 3 bytes */
else /* 0x10000-0x10ffff: 4 bytes */
{
len += 4;
src++;
srclen--;
}
}
}
*reslen = len;
return status;
}
for (end = dst + dstlen; srclen; srclen--, src++)
{
WCHAR ch = *src;
if (ch < 0x80) /* 0x00-0x7f: 1 byte */
{
if (dst > end - 1) break;
*dst++ = ch;
continue;
}
if (ch < 0x800) /* 0x80-0x7ff: 2 bytes */
{
if (dst > end - 2) break;
dst[1] = 0x80 | (ch & 0x3f);
ch >>= 6;
dst[0] = 0xc0 | ch;
dst += 2;
continue;
}
if (!(val = get_surrogate_value( src, srclen )))
{
val = 0xfffd;
status = STATUS_SOME_NOT_MAPPED;
}
if (val < 0x10000) /* 0x800-0xffff: 3 bytes */
{
if (dst > end - 3) break;
dst[2] = 0x80 | (val & 0x3f);
val >>= 6;
dst[1] = 0x80 | (val & 0x3f);
val >>= 6;
dst[0] = 0xe0 | val;
dst += 3;
}
else /* 0x10000-0x10ffff: 4 bytes */
{
if (dst > end - 4) break;
dst[3] = 0x80 | (val & 0x3f);
val >>= 6;
dst[2] = 0x80 | (val & 0x3f);
val >>= 6;
dst[1] = 0x80 | (val & 0x3f);
val >>= 6;
dst[0] = 0xf0 | val;
dst += 4;
src++;
srclen--;
}
}
if (srclen) status = STATUS_BUFFER_TOO_SMALL;
*reslen = dstlen - (end - dst);
return status;
}
/******************************************************************************
* RtlIsNormalizedString (NTDLL.@)
*/

View File

@ -709,39 +709,6 @@ NTSTATUS WINAPI RtlUnicodeStringToOemString( STRING *oem,
}
/**************************************************************************
* RtlUnicodeToUTF8N (NTDLL.@)
*
* Converts a Unicode string to a UTF-8 string.
*
* RETURNS
* NTSTATUS code
*/
NTSTATUS WINAPI RtlUnicodeToUTF8N( LPSTR dst, DWORD dstlen, LPDWORD reslen,
LPCWSTR src, DWORD srclen)
{
int ret;
if (!src) return STATUS_INVALID_PARAMETER_4;
if (!reslen) return STATUS_INVALID_PARAMETER;
if (dst && (srclen & 1)) return STATUS_INVALID_PARAMETER_5;
if (!dstlen && dst)
{
char c;
dst = &c;
ret = wine_utf8_wcstombs( 0, src, srclen / sizeof(WCHAR), dst, 1 );
if (ret > 0) ret--;
}
else
ret = wine_utf8_wcstombs( 0, src, srclen / sizeof(WCHAR), dst, dstlen );
if (reslen)
*reslen = (ret >= 0) ? ret : dstlen; /* overflow -> we filled up to dstlen */
if (ret < 0) return STATUS_BUFFER_TOO_SMALL;
return STATUS_SUCCESS;
}
/*
CASE CONVERSIONS
*/

View File

@ -2152,11 +2152,8 @@ static void test_RtlUnicodeToUTF8N(void)
length_expect(0, 0, STATUS_SUCCESS);
length_expect(1, 1, STATUS_SUCCESS);
length_expect(2, 3, STATUS_SUCCESS);
todo_wine
{
length_expect(3, 6, STATUS_SOME_NOT_MAPPED);
length_expect(4, 7, STATUS_SOME_NOT_MAPPED);
}
#undef length_expect
for (i = 0; i <= 6; i++)
@ -2164,20 +2161,14 @@ todo_wine
memset(buffer, 0x55, sizeof(buffer));
bytes_out = 0xdeadbeef;
status = pRtlUnicodeToUTF8N(buffer, i, &bytes_out, special_string, sizeof(special_string));
todo_wine_if (i == 4 || i == 5 || i == 6)
ok(status == STATUS_BUFFER_TOO_SMALL, "%d: status = 0x%x\n", i, status);
todo_wine_if (bytes_out != special_string_len[i])
ok(bytes_out == special_string_len[i], "%d: expected %u, got %u\n", i, special_string_len[i], bytes_out);
todo_wine_if (i == 6)
ok(memcmp(buffer, special_expected, special_string_len[i]) == 0, "%d: bad conversion\n", i);
}
status = pRtlUnicodeToUTF8N(buffer, 7, &bytes_out, special_string, sizeof(special_string));
todo_wine
ok(status == STATUS_SOME_NOT_MAPPED, "status = 0x%x\n", status);
todo_wine
ok(bytes_out == special_string_len[7], "expected %u, got %u\n", special_string_len[7], bytes_out);
todo_wine
ok(memcmp(buffer, special_expected, 7) == 0, "bad conversion\n");
/* conversion behavior with varying input length */
@ -2225,19 +2216,15 @@ todo_wine
status = pRtlUnicodeToUTF8N(
buffer, sizeof(buffer), &bytes_out,
unicode_to_utf8[i].unicode, lstrlenW(unicode_to_utf8[i].unicode) * sizeof(WCHAR));
todo_wine_if(unicode_to_utf8[i].status == STATUS_SOME_NOT_MAPPED)
ok(status == unicode_to_utf8[i].status,
"(test %d): status is 0x%x, expected 0x%x\n",
i, status, unicode_to_utf8[i].status);
todo_wine_if(i == 9 || i == 10 || i == 11)
{
ok(bytes_out == strlen(unicode_to_utf8[i].expected),
"(test %d): bytes_out is %u, expected %u\n",
i, bytes_out, lstrlenA(unicode_to_utf8[i].expected));
ok(!memcmp(buffer, unicode_to_utf8[i].expected, bytes_out),
"(test %d): got \"%.*s\", expected \"%s\"\n",
i, bytes_out, buffer, unicode_to_utf8[i].expected);
}
ok(buffer[bytes_out] == 0x55,
"(test %d): behind string: 0x%x\n", i, buffer[bytes_out]);
@ -2247,8 +2234,6 @@ todo_wine_if(i == 9 || i == 10 || i == 11)
status = pRtlUnicodeToUTF8N(
buffer, sizeof(buffer), &bytes_out,
unicode_to_utf8[i].unicode, (lstrlenW(unicode_to_utf8[i].unicode) + 1) * sizeof(WCHAR));
todo_wine_if(i == 9 || i == 10 || i == 11)
{
ok(status == unicode_to_utf8[i].status,
"(test %d): status is 0x%x, expected 0x%x\n",
i, status, unicode_to_utf8[i].status);
@ -2258,7 +2243,6 @@ todo_wine_if(i == 9 || i == 10 || i == 11)
ok(!memcmp(buffer, unicode_to_utf8[i].expected, bytes_out),
"(test %d): got \"%.*s\", expected \"%s\"\n",
i, bytes_out, buffer, unicode_to_utf8[i].expected);
}
ok(buffer[bytes_out] == 0x55,
"(test %d): behind string: 0x%x\n", i, buffer[bytes_out]);
}