diff --git a/dlls/kernelbase/locale.c b/dlls/kernelbase/locale.c index 42dcfff50e0..3830a3fae2c 100644 --- a/dlls/kernelbase/locale.c +++ b/dlls/kernelbase/locale.c @@ -253,6 +253,21 @@ static const struct geo_index static unsigned int geo_ids_count; static unsigned int geo_index_count; +enum charmaps +{ + CHARMAP_FOLDDIGITS, + CHARMAP_COMPAT, + CHARMAP_HIRAGANA, + CHARMAP_KATAKANA, + CHARMAP_HALFWIDTH, + CHARMAP_FULLWIDTH, + CHARMAP_TRADITIONAL, + CHARMAP_SIMPLIFIED, + NB_CHARMAPS +}; + +static const USHORT *charmaps[NB_CHARMAPS]; + /* NLS normalization file */ struct norm_table { @@ -366,6 +381,8 @@ static void load_locale_nls(void) } *geo_header; LARGE_INTEGER dummy; + const USHORT *map_ptr; + unsigned int i; RtlGetLocaleFileMappingAddress( (void **)&header, &system_lcid, &dummy ); locale_table = (const NLS_LOCALE_HEADER *)((char *)header + header->locales); @@ -377,6 +394,8 @@ static void load_locale_nls(void) geo_index = (const struct geo_index *)((char *)geo_header + geo_header->index_offset); geo_ids_count = geo_header->ids_count; geo_index_count = geo_header->index_count; + map_ptr = (const USHORT *)((char *)header + header->charmaps); + for (i = 0; i < NB_CHARMAPS; i++, map_ptr += *map_ptr) charmaps[i] = map_ptr + 1; } @@ -3162,42 +3181,9 @@ static int get_sortkey( DWORD flags, const WCHAR *src, int srclen, char *dst, in /* compose a full-width katakana. return consumed source characters. */ -static int compose_katakana( const WCHAR *src, int srclen, WCHAR *dst ) +static int map_to_fullwidth( const WCHAR *src, int srclen, WCHAR *dst ) { - static const BYTE katakana_map[] = - { - /* */ 0x02, 0x0c, 0x0d, 0x01, 0xfb, 0xf2, 0xa1, /* U+FF61- */ - 0xa3, 0xa5, 0xa7, 0xa9, 0xe3, 0xe5, 0xe7, 0xc3, /* U+FF68- */ - 0xfc, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xab, 0xad, /* U+FF70- */ - 0xaf, 0xb1, 0xb3, 0xb5, 0xb7, 0xb9, 0xbb, 0xbd, /* U+FF78- */ - 0xbf, 0xc1, 0xc4, 0xc6, 0xc8, 0xca, 0xcb, 0xcc, /* U+FF80- */ - 0xcd, 0xce, 0xcf, 0xd2, 0xd5, 0xd8, 0xdb, 0xde, /* U+FF88- */ - 0xdf, 0xe0, 0xe1, 0xe2, 0xe4, 0xe6, 0xe8, 0xe9, /* U+FF90- */ - 0xea, 0xeb, 0xec, 0xed, 0xef, 0xf3, 0x99, 0x9a, /* U+FF98- */ - }; - WCHAR dummy; - int shift; - - if (!dst) dst = &dummy; - - switch (*src) - { - case 0x309b: - case 0x309c: - *dst = *src - 2; - return 1; - case 0x30f0: - case 0x30f1: - case 0x30fd: - *dst = *src; - break; - default: - shift = *src - 0xff61; - if (shift < 0 || shift >= ARRAY_SIZE( katakana_map )) return 0; - *dst = katakana_map[shift] | 0x3000; - break; - } - + *dst = casemap( charmaps[CHARMAP_FULLWIDTH], *src ); if (srclen <= 1) return 1; switch (src[1]) @@ -3230,141 +3216,36 @@ static int compose_katakana( const WCHAR *src, int srclen, WCHAR *dst ) return 2; } -/* map one or two half-width characters to one full-width character */ -static int map_to_fullwidth( const WCHAR *src, int srclen, WCHAR *dst ) -{ - INT n; - - if (*src <= '~' && *src > ' ' && *src != '\\') - *dst = *src - 0x20 + 0xff00; - else if (*src == ' ') - *dst = 0x3000; - else if (*src <= 0x00af && *src >= 0x00a2) - { - static const BYTE misc_symbols_table[] = - { - 0xe0, 0xe1, 0x00, 0xe5, 0xe4, 0x00, 0x00, /* U+00A2- */ - 0x00, 0x00, 0x00, 0xe2, 0x00, 0x00, 0xe3 /* U+00A9- */ - }; - if (misc_symbols_table[*src - 0x00a2]) - *dst = misc_symbols_table[*src - 0x00a2] | 0xff00; - else - *dst = *src; - } - else if (*src == 0x20a9) /* WON SIGN */ - *dst = 0xffe6; - else if ((n = compose_katakana(src, srclen, dst)) > 0) - return n; - else if (*src >= 0xffa0 && *src <= 0xffdc) - { - static const BYTE hangul_mapping_table[] = - { - 0x64, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* U+FFA0- */ - 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* U+FFA8- */ - 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* U+FFB0- */ - 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x00, /* U+FFB8- */ - 0x00, 0x00, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, /* U+FFC0- */ - 0x00, 0x00, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, /* U+FFC8- */ - 0x00, 0x00, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, /* U+FFD0- */ - 0x00, 0x00, 0x61, 0x62, 0x63 /* U+FFD8- */ - }; - - if (hangul_mapping_table[*src - 0xffa0]) - *dst = hangul_mapping_table[*src - 0xffa0] | 0x3100; - else - *dst = *src; - } - else - *dst = *src; - - return 1; -} - -/* decompose a full-width katakana character into one or two half-width characters. */ -static int decompose_katakana( WCHAR c, WCHAR *dst, int dstlen ) -{ - static const BYTE katakana_map[] = - { - /* */ 0x9e, 0x9f, 0x9e, 0x9f, 0x00, 0x00, 0x00, /* U+3099- */ - 0x00, 0x67, 0x71, 0x68, 0x72, 0x69, 0x73, 0x6a, /* U+30a1- */ - 0x74, 0x6b, 0x75, 0x76, 0x01, 0x77, 0x01, 0x78, /* U+30a8- */ - 0x01, 0x79, 0x01, 0x7a, 0x01, 0x7b, 0x01, 0x7c, /* U+30b0- */ - 0x01, 0x7d, 0x01, 0x7e, 0x01, 0x7f, 0x01, 0x80, /* U+30b8- */ - 0x01, 0x81, 0x01, 0x6f, 0x82, 0x01, 0x83, 0x01, /* U+30c0- */ - 0x84, 0x01, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, /* U+30c8- */ - 0x01, 0x02, 0x8b, 0x01, 0x02, 0x8c, 0x01, 0x02, /* U+30d0- */ - 0x8d, 0x01, 0x02, 0x8e, 0x01, 0x02, 0x8f, 0x90, /* U+30d8- */ - 0x91, 0x92, 0x93, 0x6c, 0x94, 0x6d, 0x95, 0x6e, /* U+30e0- */ - 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x00, 0x9c, /* U+30e8- */ - 0x00, 0x00, 0x66, 0x9d, 0x4e, 0x00, 0x00, 0x08, /* U+30f0- */ - 0x58, 0x58, 0x08, 0x65, 0x70, 0x00, 0x51 /* U+30f8- */ - }; - int len = 0, shift = c - 0x3099; - BYTE k; - - if (shift < 0 || shift >= ARRAY_SIZE( katakana_map )) return 0; - - k = katakana_map[shift]; - if (!k) - { - if (dstlen > 0) *dst = c; - len++; - } - else if (k > 0x60) - { - if (dstlen > 0) *dst = k | 0xff00; - len++; - } - else - { - if (dstlen >= 2) - { - dst[0] = (k > 0x50) ? (c - (k & 0xf)) : (katakana_map[shift - k] | 0xff00); - dst[1] = (k == 2) ? 0xff9f : 0xff9e; - } - len += 2; - } - return len; -} - /* map single full-width character to single or double half-width characters. */ static int map_to_halfwidth( WCHAR c, WCHAR *dst, int dstlen ) { - int n = decompose_katakana( c, dst, dstlen ); - if (n > 0) return n; - - if (c == 0x3000) - *dst = ' '; - else if (c == 0x3001) - *dst = 0xff64; - else if (c == 0x3002) - *dst = 0xff61; - else if (c == 0x300c || c == 0x300d) - *dst = (c - 0x300c) + 0xff62; - else if (c >= 0x3131 && c <= 0x3163) + static const BYTE katakana_map[] = { - *dst = c - 0x3131 + 0xffa1; - if (*dst >= 0xffbf) *dst += 3; - if (*dst >= 0xffc8) *dst += 2; - if (*dst >= 0xffd0) *dst += 2; - if (*dst >= 0xffd8) *dst += 2; - } - else if (c == 0x3164) - *dst = 0xffa0; - else if (c == 0x2019) - *dst = '\''; - else if (c == 0x201d) - *dst = '"'; - else if (c > 0xff00 && c < 0xff5f && c != 0xff3c) - *dst = c - 0xff00 + 0x20; - else if (c >= 0xffe0 && c <= 0xffe6) - { - static const WCHAR misc_symbol_map[] = { 0x00a2, 0x00a3, 0x00ac, 0x00af, 0x00a6, 0x00a5, 0x20a9 }; - *dst = misc_symbol_map[c - 0xffe0]; - } - else - *dst = c; + 0x01, 0x00, 0x01, 0x00, /* U+30a8- */ + 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, /* U+30b0- */ + 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, /* U+30b8- */ + 0x01, 0x00, 0x01, 0x00, 0x00, 0x01, 0x00, 0x01, /* U+30c0- */ + 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* U+30c8- */ + 0x01, 0x02, 0x00, 0x01, 0x02, 0x00, 0x01, 0x02, /* U+30d0- */ + 0x00, 0x01, 0x02, 0x00, 0x01, 0x02, 0x00, 0x00, /* U+30d8- */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* U+30e0- */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* U+30e8- */ + 0x00, 0x00, 0x00, 0x00, 0x4e, 0x00, 0x00, 0x08, /* U+30f0- */ + 0x08, 0x08, 0x08, 0x00, 0x00, 0x00, 0x01 /* U+30f8- */ + }; + USHORT shift = c - 0x30ac; + BYTE k; + if (shift < ARRAY_SIZE(katakana_map) && (k = katakana_map[shift])) + { + if (dstlen >= 2) + { + dst[0] = casemap( charmaps[CHARMAP_HALFWIDTH], c - k ); + dst[1] = (k == 2) ? 0xff9f : 0xff9e; + } + return 2; + } + if (dstlen >= 1) dst[0] = casemap( charmaps[CHARMAP_HALFWIDTH], c ); return 1; } @@ -5877,9 +5758,10 @@ INT WINAPI DECLSPEC_HOTPATCH LCMapStringEx( const WCHAR *locale, DWORD flags, co } else if (flags & LCMAP_FULLWIDTH) { + WCHAR wch; for (len = 0; srclen; src++, srclen--, len++) { - if (compose_katakana( src, srclen, NULL ) == 2) + if (map_to_fullwidth( src, srclen, &wch ) == 2) { src++; srclen--; @@ -5892,11 +5774,8 @@ INT WINAPI DECLSPEC_HOTPATCH LCMapStringEx( const WCHAR *locale, DWORD flags, co { WCHAR wch = *src; /* map Hiragana to Katakana before decomposition if needed */ - if ((flags & LCMAP_KATAKANA) && - ((wch >= 0x3041 && wch <= 0x3096) || wch == 0x309D || wch == 0x309E)) - wch += 0x60; - - if (decompose_katakana( wch, NULL, 0 ) == 2) len++; + if (flags & LCMAP_KATAKANA) wch = casemap( charmaps[CHARMAP_KATAKANA], wch ); + if (map_to_halfwidth( wch, NULL, 0 ) == 2) len++; } } else len = srclen; @@ -5921,6 +5800,17 @@ INT WINAPI DECLSPEC_HOTPATCH LCMapStringEx( const WCHAR *locale, DWORD flags, co goto done; } + if (flags & LCMAP_TRADITIONAL_CHINESE) + { + for (len = dstlen, dst_ptr = dst; srclen && len; srclen--, len--) + *dst_ptr++ = casemap( charmaps[CHARMAP_TRADITIONAL], *src++ ); + } + if (flags & LCMAP_SIMPLIFIED_CHINESE) + { + for (len = dstlen, dst_ptr = dst; srclen && len; srclen--, len--) + *dst_ptr++ = casemap( charmaps[CHARMAP_SIMPLIFIED], *src++ ); + } + if (flags & (LCMAP_FULLWIDTH | LCMAP_HALFWIDTH | LCMAP_HIRAGANA | LCMAP_KATAKANA)) { for (len = dstlen, dst_ptr = dst; len && srclen; src++, srclen--, len--, dst_ptr++) @@ -5938,18 +5828,8 @@ INT WINAPI DECLSPEC_HOTPATCH LCMapStringEx( const WCHAR *locale, DWORD flags, co } else wch = *src; - if (flags & LCMAP_KATAKANA) - { - /* map hiragana to katakana, e.g. U+3041 -> U+30A1. - we can't use C3_HIRAGANA as some characters can't map to katakana */ - if ((wch >= 0x3041 && wch <= 0x3096) || wch == 0x309D || wch == 0x309E) wch += 0x60; - } - else if (flags & LCMAP_HIRAGANA) - { - /* map katakana to hiragana, e.g. U+30A1 -> U+3041. - we can't use C3_KATAKANA as some characters can't map to hiragana */ - if ((wch >= 0x30A1 && wch <= 0x30F6) || wch == 0x30FD || wch == 0x30FE) wch -= 0x60; - } + if (flags & LCMAP_KATAKANA) wch = casemap( charmaps[CHARMAP_KATAKANA], wch ); + else if (flags & LCMAP_HIRAGANA) wch = casemap( charmaps[CHARMAP_HIRAGANA], wch ); if (flags & LCMAP_HALFWIDTH) {