wrc: Use the locale.nls data to determine a language's codepage.

Signed-off-by: Alexandre Julliard <julliard@winehq.org>
This commit is contained in:
Alexandre Julliard 2022-04-01 13:39:58 +02:00
parent d0b7fa9da7
commit f445f21a38
1 changed files with 78 additions and 175 deletions

View File

@ -29,6 +29,7 @@
#include "../tools.h"
#include "wrc.h"
#include "winternl.h"
#include "utils.h"
#include "parser.h"
@ -212,6 +213,16 @@ static WCHAR *codepage_to_unicode( int codepage, const char *src, int srclen, in
return dst;
}
int get_language_codepage( language_t lang )
{
DWORD codepage;
if (!lang) return 1252;
if (!GetLocaleInfoW( lang, LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
(WCHAR *)&codepage, sizeof(codepage)/sizeof(WCHAR) )) return -1;
return codepage;
}
#else /* _WIN32 */
struct nls_info
@ -312,6 +323,73 @@ static WCHAR *codepage_to_unicode( int codepage, const char *src, int srclen, in
return dst;
}
static const NLS_LOCALE_LCID_INDEX *lcids_index;
static const NLS_LOCALE_HEADER *locale_table;
static const NLS_LOCALE_LCNAME_INDEX *lcnames_index;
static const WCHAR *locale_strings;
static void load_locale_nls(void)
{
struct
{
unsigned int ctypes;
unsigned int unknown1;
unsigned int unknown2;
unsigned int unknown3;
unsigned int locales;
unsigned int charmaps;
unsigned int geoids;
unsigned int scripts;
} *header;
char *path;
unsigned int i;
size_t size;
for (i = 0; nlsdirs[i]; i++)
{
path = strmake( "%s/locale.nls", nlsdirs[i] );
header = read_file( path, &size );
free( path );
if (!header) continue;
locale_table = (const NLS_LOCALE_HEADER *)((char *)header + header->locales);
lcids_index = (const NLS_LOCALE_LCID_INDEX *)((char *)locale_table + locale_table->lcids_offset);
lcnames_index = (const NLS_LOCALE_LCNAME_INDEX *)((char *)locale_table + locale_table->lcnames_offset);
locale_strings = (const WCHAR *)((char *)locale_table + locale_table->strings_offset);
return;
}
error( "unable to load locale.nls\n" );
}
static const NLS_LOCALE_LCID_INDEX *find_lcid_entry( LCID lcid )
{
int min = 0, max = locale_table->nb_lcids - 1;
while (min <= max)
{
int pos = (min + max) / 2;
if (lcid < lcids_index[pos].id) max = pos - 1;
else if (lcid > lcids_index[pos].id) min = pos + 1;
else return &lcids_index[pos];
}
return NULL;
}
static const NLS_LOCALE_DATA *get_locale_data( UINT idx )
{
ULONG offset = locale_table->locales_offset + idx * locale_table->locale_size;
return (const NLS_LOCALE_DATA *)((const char *)locale_table + offset);
}
int get_language_codepage( language_t lang )
{
const NLS_LOCALE_LCID_INDEX *entry;
if (!lang) return 1252;
if (!locale_table) load_locale_nls();
if (!(entry = find_lcid_entry( lang ))) return -1;
return get_locale_data( entry->idx )->idefaultansicodepage;
}
#endif /* _WIN32 */
static WCHAR *utf8_to_unicode( const char *src, int srclen, int *dstlen )
@ -517,178 +595,3 @@ const char *get_nameid_str(const name_id_t *n)
if (n->name.s_name->type == str_char) return n->name.s_name->str.cstr;
return unicode_to_utf8( n->name.s_name->str.wstr, n->name.s_name->size, &len );
}
struct lang2cp
{
unsigned short lang;
unsigned short sublang;
unsigned int cp;
};
/* language to codepage conversion table */
/* specific sublanguages need only be specified if their codepage */
/* differs from the default (SUBLANG_NEUTRAL) */
static const struct lang2cp lang2cps[] =
{
{ LANG_AFRIKAANS, SUBLANG_NEUTRAL, 1252 },
{ LANG_ALBANIAN, SUBLANG_NEUTRAL, 1250 },
{ LANG_ALSATIAN, SUBLANG_NEUTRAL, 1252 },
{ LANG_AMHARIC, SUBLANG_NEUTRAL, 0 },
{ LANG_ARABIC, SUBLANG_NEUTRAL, 1256 },
{ LANG_ARMENIAN, SUBLANG_NEUTRAL, 0 },
{ LANG_ASSAMESE, SUBLANG_NEUTRAL, 0 },
{ LANG_ASTURIAN, SUBLANG_NEUTRAL, 1252 },
{ LANG_AZERI, SUBLANG_NEUTRAL, 1254 },
{ LANG_AZERI, SUBLANG_AZERI_CYRILLIC, 1251 },
{ LANG_BASHKIR, SUBLANG_NEUTRAL, 1251 },
{ LANG_BASQUE, SUBLANG_NEUTRAL, 1252 },
{ LANG_BELARUSIAN, SUBLANG_NEUTRAL, 1251 },
{ LANG_BENGALI, SUBLANG_NEUTRAL, 0 },
{ LANG_BOSNIAN, SUBLANG_NEUTRAL, 1250 },
{ LANG_BOSNIAN, SUBLANG_BOSNIAN_BOSNIA_HERZEGOVINA_CYRILLIC, 1251 },
{ LANG_BRETON, SUBLANG_NEUTRAL, 1252 },
{ LANG_BULGARIAN, SUBLANG_NEUTRAL, 1251 },
{ LANG_CATALAN, SUBLANG_NEUTRAL, 1252 },
{ LANG_CHINESE, SUBLANG_NEUTRAL, 950 },
{ LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED, 936 },
{ LANG_CHINESE, SUBLANG_CHINESE_SINGAPORE, 936 },
#ifdef LANG_CORNISH
{ LANG_CORNISH, SUBLANG_NEUTRAL, 1252 },
#endif /* LANG_CORNISH */
{ LANG_CORSICAN, SUBLANG_NEUTRAL, 1252 },
{ LANG_CROATIAN, SUBLANG_NEUTRAL, 1250 },
{ LANG_CZECH, SUBLANG_NEUTRAL, 1250 },
{ LANG_DANISH, SUBLANG_NEUTRAL, 1252 },
{ LANG_DARI, SUBLANG_NEUTRAL, 1256 },
{ LANG_DIVEHI, SUBLANG_NEUTRAL, 0 },
{ LANG_DUTCH, SUBLANG_NEUTRAL, 1252 },
{ LANG_ENGLISH, SUBLANG_NEUTRAL, 1252 },
#ifdef LANG_ESPERANTO
{ LANG_ESPERANTO, SUBLANG_NEUTRAL, 1252 },
#endif /* LANG_ESPERANTO */
{ LANG_ESTONIAN, SUBLANG_NEUTRAL, 1257 },
{ LANG_FAEROESE, SUBLANG_NEUTRAL, 1252 },
{ LANG_FILIPINO, SUBLANG_NEUTRAL, 1252 },
{ LANG_FINNISH, SUBLANG_NEUTRAL, 1252 },
{ LANG_FRENCH, SUBLANG_NEUTRAL, 1252 },
{ LANG_FRISIAN, SUBLANG_NEUTRAL, 1252 },
#ifdef LANG_MANX_GAELIC
{ LANG_MANX_GAELIC, SUBLANG_NEUTRAL, 1252 },
#endif /* LANG_MANX_GAELIC */
{ LANG_GALICIAN, SUBLANG_NEUTRAL, 1252 },
{ LANG_GEORGIAN, SUBLANG_NEUTRAL, 0 },
{ LANG_GERMAN, SUBLANG_NEUTRAL, 1252 },
{ LANG_GREEK, SUBLANG_NEUTRAL, 1253 },
{ LANG_GREENLANDIC, SUBLANG_NEUTRAL, 1252 },
{ LANG_GUJARATI, SUBLANG_NEUTRAL, 0 },
{ LANG_HAUSA, SUBLANG_NEUTRAL, 1252 },
{ LANG_HEBREW, SUBLANG_NEUTRAL, 1255 },
{ LANG_HINDI, SUBLANG_NEUTRAL, 0 },
{ LANG_HUNGARIAN, SUBLANG_NEUTRAL, 1250 },
{ LANG_ICELANDIC, SUBLANG_NEUTRAL, 1252 },
{ LANG_IGBO, SUBLANG_NEUTRAL, 1252 },
{ LANG_INDONESIAN, SUBLANG_NEUTRAL, 1252 },
{ LANG_INUKTITUT, SUBLANG_NEUTRAL, 0 },
{ LANG_INUKTITUT, SUBLANG_INUKTITUT_CANADA_LATIN, 0 },
{ LANG_INVARIANT, SUBLANG_NEUTRAL, 0 },
{ LANG_IRISH, SUBLANG_NEUTRAL, 1252 },
{ LANG_ITALIAN, SUBLANG_NEUTRAL, 1252 },
{ LANG_JAPANESE, SUBLANG_NEUTRAL, 932 },
{ LANG_KANNADA, SUBLANG_NEUTRAL, 0 },
{ LANG_KAZAK, SUBLANG_NEUTRAL, 1251 },
{ LANG_KHMER, SUBLANG_NEUTRAL, 0 },
{ LANG_KICHE, SUBLANG_NEUTRAL, 1252 },
{ LANG_KINYARWANDA, SUBLANG_NEUTRAL, 1252 },
{ LANG_KONKANI, SUBLANG_NEUTRAL, 0 },
{ LANG_KOREAN, SUBLANG_NEUTRAL, 949 },
{ LANG_KYRGYZ, SUBLANG_NEUTRAL, 1251 },
{ LANG_LAO, SUBLANG_NEUTRAL, 0 },
{ LANG_LATVIAN, SUBLANG_NEUTRAL, 1257 },
{ LANG_LITHUANIAN, SUBLANG_NEUTRAL, 1257 },
{ LANG_LOWER_SORBIAN, SUBLANG_NEUTRAL, 1252 },
{ LANG_LUXEMBOURGISH, SUBLANG_NEUTRAL, 1252 },
{ LANG_MACEDONIAN, SUBLANG_NEUTRAL, 1251 },
{ LANG_MALAY, SUBLANG_NEUTRAL, 1252 },
{ LANG_MALAYALAM, SUBLANG_NEUTRAL, 0 },
{ LANG_MALTESE, SUBLANG_NEUTRAL, 0 },
{ LANG_MAORI, SUBLANG_NEUTRAL, 0 },
{ LANG_MAPUDUNGUN, SUBLANG_NEUTRAL, 1252 },
{ LANG_MARATHI, SUBLANG_NEUTRAL, 0 },
{ LANG_MOHAWK, SUBLANG_NEUTRAL, 1252 },
{ LANG_MONGOLIAN, SUBLANG_NEUTRAL, 1251 },
{ LANG_NEPALI, SUBLANG_NEUTRAL, 0 },
{ LANG_NEUTRAL, SUBLANG_NEUTRAL, 1252 },
{ LANG_NORWEGIAN, SUBLANG_NEUTRAL, 1252 },
{ LANG_OCCITAN, SUBLANG_NEUTRAL, 1252 },
{ LANG_ORIYA, SUBLANG_NEUTRAL, 0 },
{ LANG_PASHTO, SUBLANG_NEUTRAL, 0 },
{ LANG_PERSIAN, SUBLANG_NEUTRAL, 1256 },
{ LANG_POLISH, SUBLANG_NEUTRAL, 1250 },
{ LANG_PORTUGUESE, SUBLANG_NEUTRAL, 1252 },
{ LANG_PUNJABI, SUBLANG_NEUTRAL, 0 },
{ LANG_QUECHUA, SUBLANG_NEUTRAL, 1252 },
{ LANG_ROMANIAN, SUBLANG_NEUTRAL, 1250 },
{ LANG_ROMANSH, SUBLANG_NEUTRAL, 1252 },
{ LANG_RUSSIAN, SUBLANG_NEUTRAL, 1251 },
{ LANG_SAMI, SUBLANG_NEUTRAL, 1252 },
{ LANG_SANSKRIT, SUBLANG_NEUTRAL, 0 },
{ LANG_SCOTTISH_GAELIC,SUBLANG_NEUTRAL, 1252 },
{ LANG_SERBIAN, SUBLANG_NEUTRAL, 1250 },
{ LANG_SERBIAN, SUBLANG_SERBIAN_CYRILLIC, 1251 },
{ LANG_SINHALESE, SUBLANG_NEUTRAL, 0 },
{ LANG_SLOVAK, SUBLANG_NEUTRAL, 1250 },
{ LANG_SLOVENIAN, SUBLANG_NEUTRAL, 1250 },
{ LANG_SOTHO, SUBLANG_NEUTRAL, 1252 },
{ LANG_SPANISH, SUBLANG_NEUTRAL, 1252 },
{ LANG_SWAHILI, SUBLANG_NEUTRAL, 1252 },
{ LANG_SWEDISH, SUBLANG_NEUTRAL, 1252 },
{ LANG_SYRIAC, SUBLANG_NEUTRAL, 0 },
{ LANG_TAJIK, SUBLANG_NEUTRAL, 1251 },
{ LANG_TAMAZIGHT, SUBLANG_NEUTRAL, 1252 },
{ LANG_TAMIL, SUBLANG_NEUTRAL, 0 },
{ LANG_TATAR, SUBLANG_NEUTRAL, 1251 },
{ LANG_TELUGU, SUBLANG_NEUTRAL, 0 },
{ LANG_THAI, SUBLANG_NEUTRAL, 874 },
{ LANG_TIBETAN, SUBLANG_NEUTRAL, 0 },
{ LANG_TSWANA, SUBLANG_NEUTRAL, 1252 },
{ LANG_TURKISH, SUBLANG_NEUTRAL, 1254 },
{ LANG_TURKMEN, SUBLANG_NEUTRAL, 1250 },
{ LANG_UIGHUR, SUBLANG_NEUTRAL, 1256 },
{ LANG_UKRAINIAN, SUBLANG_NEUTRAL, 1251 },
{ LANG_UPPER_SORBIAN, SUBLANG_NEUTRAL, 1252 },
{ LANG_URDU, SUBLANG_NEUTRAL, 1256 },
{ LANG_UZBEK, SUBLANG_NEUTRAL, 1254 },
{ LANG_UZBEK, SUBLANG_UZBEK_CYRILLIC, 1251 },
{ LANG_VIETNAMESE, SUBLANG_NEUTRAL, 1258 },
#ifdef LANG_WALON
{ LANG_WALON, SUBLANG_NEUTRAL, 1252 },
#endif /* LANG_WALON */
{ LANG_WELSH, SUBLANG_NEUTRAL, 1252 },
{ LANG_WOLOF, SUBLANG_NEUTRAL, 1252 },
{ LANG_XHOSA, SUBLANG_NEUTRAL, 1252 },
{ LANG_YAKUT, SUBLANG_NEUTRAL, 1251 },
{ LANG_YI, SUBLANG_NEUTRAL, 0 },
{ LANG_YORUBA, SUBLANG_NEUTRAL, 1252 },
{ LANG_ZULU, SUBLANG_NEUTRAL, 1252 }
};
int get_language_codepage( language_t lang )
{
unsigned int i;
int cp = -1, defcp = -1;
for (i = 0; i < ARRAY_SIZE(lang2cps); i++)
{
if (lang2cps[i].lang != PRIMARYLANGID(lang)) continue;
if (lang2cps[i].sublang == SUBLANGID(lang))
{
cp = lang2cps[i].cp;
break;
}
if (lang2cps[i].sublang == SUBLANG_NEUTRAL) defcp = lang2cps[i].cp;
}
if (cp == -1) cp = defcp;
return cp;
}