unicode: Add support for high Unicode planes in decomposition tables.
Signed-off-by: Alexandre Julliard <julliard@winehq.org>
This commit is contained in:
parent
148f564d10
commit
c658731975
|
@ -6257,17 +6257,19 @@ static void test_NormalizeString(void)
|
|||
ok( dstlen == (i < 2 ? 15 : 64), "%d: wrong len %d\n", i, dstlen );
|
||||
SetLastError( 0xdeadbeef );
|
||||
dstlen = pNormalizeString( norm_forms[i], L"AB\xd800Z", -1, dst, ARRAY_SIZE(dst) );
|
||||
todo_wine ok( dstlen == -3, "%d: wrong len %d\n", i, dstlen );
|
||||
todo_wine ok( GetLastError() == ERROR_NO_UNICODE_TRANSLATION, "%d: wrong error %d\n", i, GetLastError() );
|
||||
ok( dstlen == -3, "%d: wrong len %d\n", i, dstlen );
|
||||
ok( GetLastError() == ERROR_NO_UNICODE_TRANSLATION, "%d: wrong error %d\n", i, GetLastError() );
|
||||
dstlen = pNormalizeString( norm_forms[i], L"ABCD\xdc12Z", -1, NULL, 0 );
|
||||
ok( dstlen == (i < 2 ? 21 : 64), "%d: wrong len %d\n", i, dstlen );
|
||||
SetLastError( 0xdeadbeef );
|
||||
dstlen = pNormalizeString( norm_forms[i], L"ABCD\xdc12Z", -1, dst, ARRAY_SIZE(dst) );
|
||||
todo_wine ok( dstlen == -4, "%d: wrong len %d\n", i, dstlen );
|
||||
todo_wine ok( GetLastError() == ERROR_NO_UNICODE_TRANSLATION, "%d: wrong error %d\n", i, GetLastError() );
|
||||
ok( dstlen == -4, "%d: wrong len %d\n", i, dstlen );
|
||||
ok( GetLastError() == ERROR_NO_UNICODE_TRANSLATION, "%d: wrong error %d\n", i, GetLastError() );
|
||||
SetLastError( 0xdeadbeef );
|
||||
dstlen = pNormalizeString( norm_forms[i], L"ABCD\xdc12Z", -1, dst, 2 );
|
||||
todo_wine ok( dstlen == (i < 2 ? -18 : -74), "%d: wrong len %d\n", i, dstlen );
|
||||
todo_wine
|
||||
ok( dstlen == (i < 2 ? -18 : -74), "%d: wrong len %d\n", i, dstlen );
|
||||
todo_wine_if (i == 0 || i == 2)
|
||||
ok( GetLastError() == ERROR_INSUFFICIENT_BUFFER, "%d: wrong error %d\n", i, GetLastError() );
|
||||
if (pRtlNormalizeString)
|
||||
{
|
||||
|
@ -6277,17 +6279,18 @@ static void test_NormalizeString(void)
|
|||
ok( dstlen == (i < 2 ? 15 : 64), "%d: wrong len %d\n", i, dstlen );
|
||||
dstlen = ARRAY_SIZE(dst);
|
||||
status = pRtlNormalizeString( norm_forms[i], L"AB\xd800Z", -1, dst, &dstlen );
|
||||
todo_wine ok( status == STATUS_NO_UNICODE_TRANSLATION, "%d: failed %x\n", i, status );
|
||||
todo_wine ok( dstlen == 3, "%d: wrong len %d\n", i, dstlen );
|
||||
ok( status == STATUS_NO_UNICODE_TRANSLATION, "%d: failed %x\n", i, status );
|
||||
ok( dstlen == 3, "%d: wrong len %d\n", i, dstlen );
|
||||
dstlen = 1;
|
||||
status = pRtlNormalizeString( norm_forms[i], L"AB\xd800Z", -1, dst, &dstlen );
|
||||
todo_wine_if( i == 0 || i == 2)
|
||||
ok( status == STATUS_BUFFER_TOO_SMALL, "%d: failed %x\n", i, status );
|
||||
todo_wine_if (i != 3)
|
||||
todo_wine_if( i != 3)
|
||||
ok( dstlen == (i < 2 ? 14 : 73), "%d: wrong len %d\n", i, dstlen );
|
||||
dstlen = 2;
|
||||
status = pRtlNormalizeString( norm_forms[i], L"AB\xd800Z", -1, dst, &dstlen );
|
||||
todo_wine ok( status == STATUS_NO_UNICODE_TRANSLATION, "%d: failed %x\n", i, status );
|
||||
todo_wine ok( dstlen == 3, "%d: wrong len %d\n", i, dstlen );
|
||||
ok( status == STATUS_NO_UNICODE_TRANSLATION, "%d: failed %x\n", i, status );
|
||||
ok( dstlen == 3, "%d: wrong len %d\n", i, dstlen );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -155,14 +155,14 @@ static WCHAR casemap_ascii( WCHAR ch )
|
|||
}
|
||||
|
||||
|
||||
static const WCHAR *get_decomposition( const unsigned short *table, WCHAR ch, unsigned int *len )
|
||||
static const WCHAR *get_decomposition( const unsigned short *table, unsigned int ch, unsigned int *len )
|
||||
{
|
||||
unsigned short offset = table[table[ch >> 8] + ((ch >> 4) & 0xf)] + (ch & 0xf);
|
||||
unsigned short start = table[offset];
|
||||
unsigned short end = table[offset + 1];
|
||||
|
||||
if ((*len = end - start)) return table + start;
|
||||
*len = 1;
|
||||
*len = 1 + (ch >= 0x10000);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -174,13 +174,13 @@ static BYTE get_combining_class( unsigned int c )
|
|||
}
|
||||
|
||||
|
||||
static BOOL is_starter( WCHAR c )
|
||||
static BOOL is_starter( unsigned int c )
|
||||
{
|
||||
return !get_combining_class( c );
|
||||
}
|
||||
|
||||
|
||||
static BOOL reorderable_pair( WCHAR c1, WCHAR c2 )
|
||||
static BOOL reorderable_pair( unsigned int c1, unsigned int c2 )
|
||||
{
|
||||
BYTE ccc1, ccc2;
|
||||
|
||||
|
@ -191,23 +191,52 @@ static BOOL reorderable_pair( WCHAR c1, WCHAR c2 )
|
|||
return ccc2 && (ccc1 > ccc2);
|
||||
}
|
||||
|
||||
static int get_utf16( const WCHAR *src, unsigned int srclen, unsigned int *ch )
|
||||
{
|
||||
if (IS_HIGH_SURROGATE( src[0] ))
|
||||
{
|
||||
if (srclen <= 1) return 0;
|
||||
if (!IS_LOW_SURROGATE( src[1] )) return 0;
|
||||
*ch = 0x10000 + ((src[0] & 0x3ff) << 10) + (src[1] & 0x3ff);
|
||||
return 2;
|
||||
}
|
||||
if (IS_LOW_SURROGATE( src[0] )) return 0;
|
||||
*ch = src[0];
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void put_utf16( WCHAR *dst, unsigned int ch )
|
||||
{
|
||||
if (ch >= 0x10000)
|
||||
{
|
||||
ch -= 0x10000;
|
||||
dst[0] = 0xd800 | (ch >> 10);
|
||||
dst[1] = 0xdc00 | (ch & 0x3ff);
|
||||
}
|
||||
else dst[0] = ch;
|
||||
}
|
||||
|
||||
static void canonical_order_substring( WCHAR *str, unsigned int len )
|
||||
{
|
||||
unsigned int i;
|
||||
unsigned int i, ch1, ch2, len1, len2;
|
||||
BOOL swapped;
|
||||
|
||||
do
|
||||
{
|
||||
swapped = FALSE;
|
||||
for (i = 0; i < len - 1; i++)
|
||||
for (i = 0; i < len - 1; i += len1)
|
||||
{
|
||||
if (reorderable_pair( str[i], str[i + 1] ))
|
||||
if (!(len1 = get_utf16( str + i, len - i, &ch1 ))) break;
|
||||
if (i + len1 >= len) break;
|
||||
if (!(len2 = get_utf16( str + i + len1, len - i - len1, &ch2 ))) break;
|
||||
if (reorderable_pair( ch1, ch2 ))
|
||||
{
|
||||
WCHAR tmp = str[i];
|
||||
str[i] = str[i + 1];
|
||||
str[i + 1] = tmp;
|
||||
WCHAR tmp[2];
|
||||
memcpy( tmp, str + i, len1 * sizeof(WCHAR) );
|
||||
memcpy( str + i, str + i + len1, len2 * sizeof(WCHAR) );
|
||||
memcpy( str + i + len2, tmp, len1 * sizeof(WCHAR) );
|
||||
swapped = TRUE;
|
||||
i += len2 - len1;
|
||||
}
|
||||
}
|
||||
} while (swapped);
|
||||
|
@ -224,38 +253,43 @@ static void canonical_order_substring( WCHAR *str, unsigned int len )
|
|||
*/
|
||||
static void canonical_order_string( WCHAR *str, unsigned int len )
|
||||
{
|
||||
unsigned int i, next = 0;
|
||||
unsigned int ch, i, r, next = 0;
|
||||
|
||||
for (i = 1; i <= len; i++)
|
||||
for (i = 0; i < len; i += r)
|
||||
{
|
||||
if (i == len || is_starter( str[i] ))
|
||||
if (!(r = get_utf16( str + i, len - i, &ch ))) return;
|
||||
if (i && is_starter( ch ))
|
||||
{
|
||||
if (i > next + 1) /* at least two successive non-starters */
|
||||
canonical_order_substring( str + next, i - next );
|
||||
next = i + 1;
|
||||
next = i + r;
|
||||
}
|
||||
}
|
||||
if (i > next + 1) canonical_order_substring( str + next, i - next );
|
||||
}
|
||||
|
||||
|
||||
static NTSTATUS decompose_string( int compat, const WCHAR *src, int src_len, WCHAR *dst, int *dst_len )
|
||||
{
|
||||
const unsigned short *table = compat ? nfkd_table : nfd_table;
|
||||
int src_pos, dst_pos = 0;
|
||||
unsigned int decomp_len;
|
||||
int src_pos, dst_pos;
|
||||
unsigned int ch, len, decomp_len;
|
||||
const WCHAR *decomp;
|
||||
|
||||
for (src_pos = 0; src_pos < src_len; src_pos++)
|
||||
for (src_pos = dst_pos = 0; src_pos < src_len; src_pos += len, dst_pos += decomp_len)
|
||||
{
|
||||
if (dst_pos == *dst_len) break;
|
||||
if ((decomp = get_decomposition( table, src[src_pos], &decomp_len )))
|
||||
if (!(len = get_utf16( src + src_pos, src_len - src_pos, &ch )) ||
|
||||
(ch >= 0xfdd0 && ch <= 0xfdef) || ((ch & 0xffff) >= 0xfffe))
|
||||
{
|
||||
if (dst_pos + decomp_len > *dst_len) break;
|
||||
memcpy( dst + dst_pos, decomp, decomp_len * sizeof(WCHAR) );
|
||||
*dst_len = src_pos + IS_HIGH_SURROGATE( src[src_pos] );
|
||||
return STATUS_NO_UNICODE_TRANSLATION;
|
||||
}
|
||||
else dst[dst_pos] = src[src_pos];
|
||||
dst_pos += decomp_len;
|
||||
decomp = get_decomposition( table, ch, &decomp_len );
|
||||
if (dst_pos + decomp_len > *dst_len) break;
|
||||
if (decomp) memcpy( dst + dst_pos, decomp, decomp_len * sizeof(WCHAR) );
|
||||
else put_utf16( dst + dst_pos, ch );
|
||||
}
|
||||
|
||||
if (src_pos < src_len)
|
||||
{
|
||||
*dst_len += (src_len - src_pos) * (compat ? 18 : 3);
|
||||
|
@ -1554,21 +1588,6 @@ NTSTATUS WINAPI RtlUTF8ToUnicodeN( WCHAR *dst, DWORD dstlen, DWORD *reslen, cons
|
|||
}
|
||||
|
||||
|
||||
/* get the next char value taking surrogates into account */
|
||||
static inline unsigned int get_surrogate_value( const WCHAR *src, unsigned int srclen )
|
||||
{
|
||||
if (src[0] >= 0xd800 && src[0] <= 0xdfff) /* surrogate pair */
|
||||
{
|
||||
if (src[0] > 0xdbff || /* invalid high surrogate */
|
||||
srclen <= 1 || /* missing low surrogate */
|
||||
src[1] < 0xdc00 || src[1] > 0xdfff) /* invalid low surrogate */
|
||||
return 0;
|
||||
return 0x10000 + ((src[0] & 0x3ff) << 10) + (src[1] & 0x3ff);
|
||||
}
|
||||
return src[0];
|
||||
}
|
||||
|
||||
|
||||
/**************************************************************************
|
||||
* RtlUnicodeToUTF8N (NTDLL.@)
|
||||
*/
|
||||
|
@ -1592,7 +1611,7 @@ NTSTATUS WINAPI RtlUnicodeToUTF8N( char *dst, DWORD dstlen, DWORD *reslen, const
|
|||
else if (*src < 0x800) len += 2; /* 0x80-0x7ff: 2 bytes */
|
||||
else
|
||||
{
|
||||
if (!(val = get_surrogate_value( src, srclen )))
|
||||
if (!get_utf16( src, srclen, &val ))
|
||||
{
|
||||
val = 0xfffd;
|
||||
status = STATUS_SOME_NOT_MAPPED;
|
||||
|
@ -1629,7 +1648,7 @@ NTSTATUS WINAPI RtlUnicodeToUTF8N( char *dst, DWORD dstlen, DWORD *reslen, const
|
|||
dst += 2;
|
||||
continue;
|
||||
}
|
||||
if (!(val = get_surrogate_value( src, srclen )))
|
||||
if (!get_utf16( src, srclen, &val ))
|
||||
{
|
||||
val = 0xfffd;
|
||||
status = STATUS_SOME_NOT_MAPPED;
|
||||
|
|
|
@ -480,7 +480,7 @@ sub build_decompositions(@)
|
|||
my @src = @_;
|
||||
my @dst;
|
||||
|
||||
for (my $i = 0; $i < 65536; $i++)
|
||||
for (my $i = 0; $i < @src; $i++)
|
||||
{
|
||||
next unless defined $src[$i];
|
||||
my @decomp = get_decomposition( $i, \@src );
|
||||
|
@ -2092,10 +2092,13 @@ sub dump_decompositions($@)
|
|||
|
||||
# first determine all the 16-char subsets that contain something
|
||||
|
||||
my @filled = (0) x 4096;
|
||||
my $level1 = ($MAX_CHAR + 1) / 16;
|
||||
my $level2 = $level1 / 16;
|
||||
|
||||
my @filled = (0) x $level1;
|
||||
my $pos = 16; # for the null subset
|
||||
my $data_total = 0;
|
||||
for (my $i = 0; $i < 65536; $i++)
|
||||
for (my $i = 0; $i <= $MAX_CHAR; $i++)
|
||||
{
|
||||
next unless defined $decomp[$i];
|
||||
if ($filled[$i >> 4] == 0)
|
||||
|
@ -2109,9 +2112,9 @@ sub dump_decompositions($@)
|
|||
|
||||
# now count the 256-char subsets that contain something
|
||||
|
||||
my @filled_idx = (256) x 256;
|
||||
$pos = 256 + 16;
|
||||
for (my $i = 0; $i < 4096; $i++)
|
||||
my @filled_idx = ($level2) x $level2;
|
||||
$pos = $level2 + 16;
|
||||
for (my $i = 0; $i < $level1; $i++)
|
||||
{
|
||||
next unless $filled[$i];
|
||||
$filled_idx[$i >> 4] = $pos;
|
||||
|
@ -2123,7 +2126,7 @@ sub dump_decompositions($@)
|
|||
|
||||
# add the index offsets to the subsets positions
|
||||
|
||||
for (my $i = 0; $i < 4096; $i++)
|
||||
for (my $i = 0; $i < $level1; $i++)
|
||||
{
|
||||
next unless $filled[$i];
|
||||
$filled[$i] += $null_offset;
|
||||
|
@ -2138,9 +2141,9 @@ sub dump_decompositions($@)
|
|||
|
||||
# dump the second-level indexes
|
||||
|
||||
for (my $i = 0; $i < 256; $i++)
|
||||
for (my $i = 0; $i < $level2; $i++)
|
||||
{
|
||||
next unless ($filled_idx[$i] > 256);
|
||||
next unless ($filled_idx[$i] > $level2);
|
||||
my @table = @filled[($i<<4)..($i<<4)+15];
|
||||
for (my $j = 0; $j < 16; $j++) { $table[$j] ||= $null_offset; }
|
||||
printf OUTPUT ",\n /* sub-index %02x */\n", $i;
|
||||
|
@ -2155,7 +2158,7 @@ sub dump_decompositions($@)
|
|||
$pos = $total;
|
||||
|
||||
my @data;
|
||||
for (my $i = 0; $i < 4096; $i++)
|
||||
for (my $i = 0; $i < $level1; $i++)
|
||||
{
|
||||
next unless $filled[$i];
|
||||
my @table = (0) x (16);
|
||||
|
|
Loading…
Reference in New Issue