kernelbase: Use the mapping tables from locale.nls in LCMapStringEx().

Signed-off-by: Alexandre Julliard <julliard@winehq.org>
This commit is contained in:
Alexandre Julliard 2022-04-25 11:53:46 +02:00
parent 9e6d0e459f
commit ccbb341a8c
1 changed files with 63 additions and 183 deletions

View File

@ -253,6 +253,21 @@ static const struct geo_index
static unsigned int geo_ids_count;
static unsigned int geo_index_count;
enum charmaps
{
CHARMAP_FOLDDIGITS,
CHARMAP_COMPAT,
CHARMAP_HIRAGANA,
CHARMAP_KATAKANA,
CHARMAP_HALFWIDTH,
CHARMAP_FULLWIDTH,
CHARMAP_TRADITIONAL,
CHARMAP_SIMPLIFIED,
NB_CHARMAPS
};
static const USHORT *charmaps[NB_CHARMAPS];
/* NLS normalization file */
struct norm_table
{
@ -366,6 +381,8 @@ static void load_locale_nls(void)
} *geo_header;
LARGE_INTEGER dummy;
const USHORT *map_ptr;
unsigned int i;
RtlGetLocaleFileMappingAddress( (void **)&header, &system_lcid, &dummy );
locale_table = (const NLS_LOCALE_HEADER *)((char *)header + header->locales);
@ -377,6 +394,8 @@ static void load_locale_nls(void)
geo_index = (const struct geo_index *)((char *)geo_header + geo_header->index_offset);
geo_ids_count = geo_header->ids_count;
geo_index_count = geo_header->index_count;
map_ptr = (const USHORT *)((char *)header + header->charmaps);
for (i = 0; i < NB_CHARMAPS; i++, map_ptr += *map_ptr) charmaps[i] = map_ptr + 1;
}
@ -3162,42 +3181,9 @@ static int get_sortkey( DWORD flags, const WCHAR *src, int srclen, char *dst, in
/* compose a full-width katakana. return consumed source characters. */
static int compose_katakana( const WCHAR *src, int srclen, WCHAR *dst )
static int map_to_fullwidth( const WCHAR *src, int srclen, WCHAR *dst )
{
static const BYTE katakana_map[] =
{
/* */ 0x02, 0x0c, 0x0d, 0x01, 0xfb, 0xf2, 0xa1, /* U+FF61- */
0xa3, 0xa5, 0xa7, 0xa9, 0xe3, 0xe5, 0xe7, 0xc3, /* U+FF68- */
0xfc, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xab, 0xad, /* U+FF70- */
0xaf, 0xb1, 0xb3, 0xb5, 0xb7, 0xb9, 0xbb, 0xbd, /* U+FF78- */
0xbf, 0xc1, 0xc4, 0xc6, 0xc8, 0xca, 0xcb, 0xcc, /* U+FF80- */
0xcd, 0xce, 0xcf, 0xd2, 0xd5, 0xd8, 0xdb, 0xde, /* U+FF88- */
0xdf, 0xe0, 0xe1, 0xe2, 0xe4, 0xe6, 0xe8, 0xe9, /* U+FF90- */
0xea, 0xeb, 0xec, 0xed, 0xef, 0xf3, 0x99, 0x9a, /* U+FF98- */
};
WCHAR dummy;
int shift;
if (!dst) dst = &dummy;
switch (*src)
{
case 0x309b:
case 0x309c:
*dst = *src - 2;
return 1;
case 0x30f0:
case 0x30f1:
case 0x30fd:
*dst = *src;
break;
default:
shift = *src - 0xff61;
if (shift < 0 || shift >= ARRAY_SIZE( katakana_map )) return 0;
*dst = katakana_map[shift] | 0x3000;
break;
}
*dst = casemap( charmaps[CHARMAP_FULLWIDTH], *src );
if (srclen <= 1) return 1;
switch (src[1])
@ -3230,141 +3216,36 @@ static int compose_katakana( const WCHAR *src, int srclen, WCHAR *dst )
return 2;
}
/* map one or two half-width characters to one full-width character */
static int map_to_fullwidth( const WCHAR *src, int srclen, WCHAR *dst )
{
INT n;
if (*src <= '~' && *src > ' ' && *src != '\\')
*dst = *src - 0x20 + 0xff00;
else if (*src == ' ')
*dst = 0x3000;
else if (*src <= 0x00af && *src >= 0x00a2)
{
static const BYTE misc_symbols_table[] =
{
0xe0, 0xe1, 0x00, 0xe5, 0xe4, 0x00, 0x00, /* U+00A2- */
0x00, 0x00, 0x00, 0xe2, 0x00, 0x00, 0xe3 /* U+00A9- */
};
if (misc_symbols_table[*src - 0x00a2])
*dst = misc_symbols_table[*src - 0x00a2] | 0xff00;
else
*dst = *src;
}
else if (*src == 0x20a9) /* WON SIGN */
*dst = 0xffe6;
else if ((n = compose_katakana(src, srclen, dst)) > 0)
return n;
else if (*src >= 0xffa0 && *src <= 0xffdc)
{
static const BYTE hangul_mapping_table[] =
{
0x64, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* U+FFA0- */
0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* U+FFA8- */
0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* U+FFB0- */
0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x00, /* U+FFB8- */
0x00, 0x00, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, /* U+FFC0- */
0x00, 0x00, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, /* U+FFC8- */
0x00, 0x00, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, /* U+FFD0- */
0x00, 0x00, 0x61, 0x62, 0x63 /* U+FFD8- */
};
if (hangul_mapping_table[*src - 0xffa0])
*dst = hangul_mapping_table[*src - 0xffa0] | 0x3100;
else
*dst = *src;
}
else
*dst = *src;
return 1;
}
/* decompose a full-width katakana character into one or two half-width characters. */
static int decompose_katakana( WCHAR c, WCHAR *dst, int dstlen )
{
static const BYTE katakana_map[] =
{
/* */ 0x9e, 0x9f, 0x9e, 0x9f, 0x00, 0x00, 0x00, /* U+3099- */
0x00, 0x67, 0x71, 0x68, 0x72, 0x69, 0x73, 0x6a, /* U+30a1- */
0x74, 0x6b, 0x75, 0x76, 0x01, 0x77, 0x01, 0x78, /* U+30a8- */
0x01, 0x79, 0x01, 0x7a, 0x01, 0x7b, 0x01, 0x7c, /* U+30b0- */
0x01, 0x7d, 0x01, 0x7e, 0x01, 0x7f, 0x01, 0x80, /* U+30b8- */
0x01, 0x81, 0x01, 0x6f, 0x82, 0x01, 0x83, 0x01, /* U+30c0- */
0x84, 0x01, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, /* U+30c8- */
0x01, 0x02, 0x8b, 0x01, 0x02, 0x8c, 0x01, 0x02, /* U+30d0- */
0x8d, 0x01, 0x02, 0x8e, 0x01, 0x02, 0x8f, 0x90, /* U+30d8- */
0x91, 0x92, 0x93, 0x6c, 0x94, 0x6d, 0x95, 0x6e, /* U+30e0- */
0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x00, 0x9c, /* U+30e8- */
0x00, 0x00, 0x66, 0x9d, 0x4e, 0x00, 0x00, 0x08, /* U+30f0- */
0x58, 0x58, 0x08, 0x65, 0x70, 0x00, 0x51 /* U+30f8- */
};
int len = 0, shift = c - 0x3099;
BYTE k;
if (shift < 0 || shift >= ARRAY_SIZE( katakana_map )) return 0;
k = katakana_map[shift];
if (!k)
{
if (dstlen > 0) *dst = c;
len++;
}
else if (k > 0x60)
{
if (dstlen > 0) *dst = k | 0xff00;
len++;
}
else
{
if (dstlen >= 2)
{
dst[0] = (k > 0x50) ? (c - (k & 0xf)) : (katakana_map[shift - k] | 0xff00);
dst[1] = (k == 2) ? 0xff9f : 0xff9e;
}
len += 2;
}
return len;
}
/* map single full-width character to single or double half-width characters. */
static int map_to_halfwidth( WCHAR c, WCHAR *dst, int dstlen )
{
int n = decompose_katakana( c, dst, dstlen );
if (n > 0) return n;
if (c == 0x3000)
*dst = ' ';
else if (c == 0x3001)
*dst = 0xff64;
else if (c == 0x3002)
*dst = 0xff61;
else if (c == 0x300c || c == 0x300d)
*dst = (c - 0x300c) + 0xff62;
else if (c >= 0x3131 && c <= 0x3163)
static const BYTE katakana_map[] =
{
*dst = c - 0x3131 + 0xffa1;
if (*dst >= 0xffbf) *dst += 3;
if (*dst >= 0xffc8) *dst += 2;
if (*dst >= 0xffd0) *dst += 2;
if (*dst >= 0xffd8) *dst += 2;
}
else if (c == 0x3164)
*dst = 0xffa0;
else if (c == 0x2019)
*dst = '\'';
else if (c == 0x201d)
*dst = '"';
else if (c > 0xff00 && c < 0xff5f && c != 0xff3c)
*dst = c - 0xff00 + 0x20;
else if (c >= 0xffe0 && c <= 0xffe6)
{
static const WCHAR misc_symbol_map[] = { 0x00a2, 0x00a3, 0x00ac, 0x00af, 0x00a6, 0x00a5, 0x20a9 };
*dst = misc_symbol_map[c - 0xffe0];
}
else
*dst = c;
0x01, 0x00, 0x01, 0x00, /* U+30a8- */
0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, /* U+30b0- */
0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, /* U+30b8- */
0x01, 0x00, 0x01, 0x00, 0x00, 0x01, 0x00, 0x01, /* U+30c0- */
0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* U+30c8- */
0x01, 0x02, 0x00, 0x01, 0x02, 0x00, 0x01, 0x02, /* U+30d0- */
0x00, 0x01, 0x02, 0x00, 0x01, 0x02, 0x00, 0x00, /* U+30d8- */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* U+30e0- */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* U+30e8- */
0x00, 0x00, 0x00, 0x00, 0x4e, 0x00, 0x00, 0x08, /* U+30f0- */
0x08, 0x08, 0x08, 0x00, 0x00, 0x00, 0x01 /* U+30f8- */
};
USHORT shift = c - 0x30ac;
BYTE k;
if (shift < ARRAY_SIZE(katakana_map) && (k = katakana_map[shift]))
{
if (dstlen >= 2)
{
dst[0] = casemap( charmaps[CHARMAP_HALFWIDTH], c - k );
dst[1] = (k == 2) ? 0xff9f : 0xff9e;
}
return 2;
}
if (dstlen >= 1) dst[0] = casemap( charmaps[CHARMAP_HALFWIDTH], c );
return 1;
}
@ -5877,9 +5758,10 @@ INT WINAPI DECLSPEC_HOTPATCH LCMapStringEx( const WCHAR *locale, DWORD flags, co
}
else if (flags & LCMAP_FULLWIDTH)
{
WCHAR wch;
for (len = 0; srclen; src++, srclen--, len++)
{
if (compose_katakana( src, srclen, NULL ) == 2)
if (map_to_fullwidth( src, srclen, &wch ) == 2)
{
src++;
srclen--;
@ -5892,11 +5774,8 @@ INT WINAPI DECLSPEC_HOTPATCH LCMapStringEx( const WCHAR *locale, DWORD flags, co
{
WCHAR wch = *src;
/* map Hiragana to Katakana before decomposition if needed */
if ((flags & LCMAP_KATAKANA) &&
((wch >= 0x3041 && wch <= 0x3096) || wch == 0x309D || wch == 0x309E))
wch += 0x60;
if (decompose_katakana( wch, NULL, 0 ) == 2) len++;
if (flags & LCMAP_KATAKANA) wch = casemap( charmaps[CHARMAP_KATAKANA], wch );
if (map_to_halfwidth( wch, NULL, 0 ) == 2) len++;
}
}
else len = srclen;
@ -5921,6 +5800,17 @@ INT WINAPI DECLSPEC_HOTPATCH LCMapStringEx( const WCHAR *locale, DWORD flags, co
goto done;
}
if (flags & LCMAP_TRADITIONAL_CHINESE)
{
for (len = dstlen, dst_ptr = dst; srclen && len; srclen--, len--)
*dst_ptr++ = casemap( charmaps[CHARMAP_TRADITIONAL], *src++ );
}
if (flags & LCMAP_SIMPLIFIED_CHINESE)
{
for (len = dstlen, dst_ptr = dst; srclen && len; srclen--, len--)
*dst_ptr++ = casemap( charmaps[CHARMAP_SIMPLIFIED], *src++ );
}
if (flags & (LCMAP_FULLWIDTH | LCMAP_HALFWIDTH | LCMAP_HIRAGANA | LCMAP_KATAKANA))
{
for (len = dstlen, dst_ptr = dst; len && srclen; src++, srclen--, len--, dst_ptr++)
@ -5938,18 +5828,8 @@ INT WINAPI DECLSPEC_HOTPATCH LCMapStringEx( const WCHAR *locale, DWORD flags, co
}
else wch = *src;
if (flags & LCMAP_KATAKANA)
{
/* map hiragana to katakana, e.g. U+3041 -> U+30A1.
we can't use C3_HIRAGANA as some characters can't map to katakana */
if ((wch >= 0x3041 && wch <= 0x3096) || wch == 0x309D || wch == 0x309E) wch += 0x60;
}
else if (flags & LCMAP_HIRAGANA)
{
/* map katakana to hiragana, e.g. U+30A1 -> U+3041.
we can't use C3_KATAKANA as some characters can't map to hiragana */
if ((wch >= 0x30A1 && wch <= 0x30F6) || wch == 0x30FD || wch == 0x30FE) wch -= 0x60;
}
if (flags & LCMAP_KATAKANA) wch = casemap( charmaps[CHARMAP_KATAKANA], wch );
else if (flags & LCMAP_HIRAGANA) wch = casemap( charmaps[CHARMAP_HIRAGANA], wch );
if (flags & LCMAP_HALFWIDTH)
{