/* * Utility routines * * Copyright 1998 Bertho A. Stultiens * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA */ #include "config.h" #include "wine/port.h" #include #include #include #include #include #include #include "wrc.h" #include "utils.h" #include "parser.h" /* #define WANT_NEAR_INDICATION */ #ifdef WANT_NEAR_INDICATION void make_print(char *str) { while(*str) { if(!isprint(*str)) *str = ' '; str++; } } #endif static void generic_msg(const char *s, const char *t, const char *n, va_list ap) { fprintf(stderr, "%s:%d:%d: %s: ", input_name ? input_name : "stdin", line_number, char_number, t); vfprintf(stderr, s, ap); #ifdef WANT_NEAR_INDICATION { char *cpy; if(n) { cpy = xstrdup(n); make_print(cpy); fprintf(stderr, " near '%s'", cpy); free(cpy); } } #endif } int parser_error(const char *s, ...) { va_list ap; va_start(ap, s); generic_msg(s, "Error", parser_text, ap); fputc( '\n', stderr ); va_end(ap); exit(1); return 1; } int parser_warning(const char *s, ...) { va_list ap; va_start(ap, s); generic_msg(s, "Warning", parser_text, ap); va_end(ap); return 0; } void internal_error(const char *file, int line, const char *s, ...) { va_list ap; va_start(ap, s); fprintf(stderr, "Internal error (please report) %s %d: ", file, line); vfprintf(stderr, s, ap); va_end(ap); exit(3); } void fatal_perror( const char *msg, ... ) { va_list valist; va_start( valist, msg ); fprintf(stderr, "Error: "); vfprintf( stderr, msg, valist ); perror( " " ); va_end( valist ); exit(2); } void error(const char *s, ...) { va_list ap; va_start(ap, s); fprintf(stderr, "Error: "); vfprintf(stderr, s, ap); va_end(ap); exit(2); } void warning(const char *s, ...) { va_list ap; va_start(ap, s); fprintf(stderr, "Warning: "); vfprintf(stderr, s, ap); va_end(ap); } void chat(const char *s, ...) { if(debuglevel & DEBUGLEVEL_CHAT) { va_list ap; va_start(ap, s); fprintf(stderr, "FYI: "); vfprintf(stderr, s, ap); va_end(ap); } } char *dup_basename(const char *name, const char *ext) { int namelen; int extlen = strlen(ext); char *base; char *slash; if(!name) name = "wrc.tab"; slash = strrchr(name, '/'); if (slash) name = slash + 1; namelen = strlen(name); /* +4 for later extension and +1 for '\0' */ base = xmalloc(namelen +4 +1); strcpy(base, name); if(!strcasecmp(name + namelen-extlen, ext)) { base[namelen - extlen] = '\0'; } return base; } void *xmalloc(size_t size) { void *res; assert(size > 0); res = malloc(size); if(res == NULL) { error("Virtual memory exhausted.\n"); } memset(res, 0x55, size); return res; } void *xrealloc(void *p, size_t size) { void *res; assert(size > 0); res = realloc(p, size); if(res == NULL) { error("Virtual memory exhausted.\n"); } return res; } char *strmake( const char* fmt, ... ) { int n; size_t size = 100; va_list ap; for (;;) { char *p = xmalloc( size ); va_start( ap, fmt ); n = vsnprintf( p, size, fmt, ap ); va_end( ap ); if (n == -1) size *= 2; else if ((size_t)n >= size) size = n + 1; else return p; free( p ); } } char *xstrdup(const char *str) { char *s; assert(str != NULL); s = xmalloc(strlen(str)+1); return strcpy(s, str); } int compare_striA( const char *str1, const char *str2 ) { for (;;) { /* only the A-Z range is case-insensitive */ char ch1 = (*str1 >= 'a' && *str1 <= 'z') ? *str1 + 'A' - 'a' : *str1; char ch2 = (*str2 >= 'a' && *str2 <= 'z') ? *str2 + 'A' - 'a' : *str2; if (!ch1 || ch1 != ch2) return ch1 - ch2; str1++; str2++; } } int compare_striW( const WCHAR *str1, const WCHAR *str2 ) { for (;;) { /* only the A-Z range is case-insensitive */ WCHAR ch1 = (*str1 >= 'a' && *str1 <= 'z') ? *str1 + 'A' - 'a' : *str1; WCHAR ch2 = (*str2 >= 'a' && *str2 <= 'z') ? *str2 + 'A' - 'a' : *str2; if (!ch1 || ch1 != ch2) return ch1 - ch2; str1++; str2++; } } /* ***************************************************************************** * Function : compare_name_id * Syntax : int compare_name_id(const name_id_t *n1, const name_id_t *n2) * Input : * Output : * Description : * Remarks : ***************************************************************************** */ int compare_name_id(const name_id_t *n1, const name_id_t *n2) { if(n1->type == name_ord && n2->type == name_ord) { return n1->name.i_name - n2->name.i_name; } else if(n1->type == name_str && n2->type == name_str) { if(n1->name.s_name->type == str_char && n2->name.s_name->type == str_char) { return compare_striA(n1->name.s_name->str.cstr, n2->name.s_name->str.cstr); } else if(n1->name.s_name->type == str_unicode && n2->name.s_name->type == str_unicode) { return compare_striW(n1->name.s_name->str.wstr, n2->name.s_name->str.wstr); } else { internal_error(__FILE__, __LINE__, "Can't yet compare strings of mixed type\n"); } } else if(n1->type == name_ord && n2->type == name_str) return 1; else if(n1->type == name_str && n2->type == name_ord) return -1; else internal_error(__FILE__, __LINE__, "Comparing name-ids with unknown types (%d, %d)\n", n1->type, n2->type); return 0; /* Keep the compiler happy */ } #ifdef _WIN32 int is_valid_codepage(int id) { return IsValidCodePage( id ); } int wrc_mbstowcs( int codepage, int flags, const char *src, int srclen, WCHAR *dst, int dstlen ) { return MultiByteToWideChar( codepage, flags, src, srclen, dst, dstlen ); } int wrc_wcstombs( int codepage, int flags, const WCHAR *src, int srclen, char *dst, int dstlen ) { return WideCharToMultiByte( codepage, flags, src, srclen, dst, dstlen, NULL, NULL ); } #else /* _WIN32 */ #include "wine/unicode.h" int is_valid_codepage(int cp) { return cp == CP_UTF8 || wine_cp_get_table(cp); } int wrc_mbstowcs( int codepage, int flags, const char *src, int srclen, WCHAR *dst, int dstlen ) { if (codepage == CP_UTF8) return wine_utf8_mbstowcs( flags, src, srclen, dst, dstlen ); return wine_cp_mbstowcs( wine_cp_get_table( codepage ), flags, src, srclen, dst, dstlen ); } int wrc_wcstombs( int codepage, int flags, const WCHAR *src, int srclen, char *dst, int dstlen ) { if (codepage == CP_UTF8) return wine_utf8_wcstombs( flags, src, srclen, dst, dstlen ); return wine_cp_wcstombs( wine_cp_get_table( codepage ), flags, src, srclen, dst, dstlen, NULL, NULL ); } #endif /* _WIN32 */ string_t *convert_string(const string_t *str, enum str_e type, int codepage) { string_t *ret = xmalloc(sizeof(*ret)); int res; ret->loc = str->loc; if (!codepage && str->type != type) parser_error( "Current language is Unicode only, cannot convert string" ); if((str->type == str_char) && (type == str_unicode)) { ret->type = str_unicode; ret->size = wrc_mbstowcs( codepage, 0, str->str.cstr, str->size, NULL, 0 ); ret->str.wstr = xmalloc( (ret->size+1) * sizeof(WCHAR) ); res = wrc_mbstowcs( codepage, MB_ERR_INVALID_CHARS, str->str.cstr, str->size, ret->str.wstr, ret->size ); if (res == -2) parser_error( "Invalid character in string '%.*s' for codepage %u", str->size, str->str.cstr, codepage ); ret->str.wstr[ret->size] = 0; } else if((str->type == str_unicode) && (type == str_char)) { ret->type = str_char; ret->size = wrc_wcstombs( codepage, 0, str->str.wstr, str->size, NULL, 0 ); ret->str.cstr = xmalloc( ret->size + 1 ); wrc_wcstombs( codepage, 0, str->str.wstr, str->size, ret->str.cstr, ret->size ); ret->str.cstr[ret->size] = 0; } else if(str->type == str_unicode) { ret->type = str_unicode; ret->size = str->size; ret->str.wstr = xmalloc(sizeof(WCHAR)*(ret->size+1)); memcpy( ret->str.wstr, str->str.wstr, ret->size * sizeof(WCHAR) ); ret->str.wstr[ret->size] = 0; } else /* str->type == str_char */ { ret->type = str_char; ret->size = str->size; ret->str.cstr = xmalloc( ret->size + 1 ); memcpy( ret->str.cstr, str->str.cstr, ret->size ); ret->str.cstr[ret->size] = 0; } return ret; } void free_string(string_t *str) { if (str->type == str_unicode) free( str->str.wstr ); else free( str->str.cstr ); free( str ); } /* check if the string is valid utf8 despite a different codepage being in use */ int check_valid_utf8( const string_t *str, int codepage ) { unsigned int i; if (!check_utf8) return 0; if (!codepage) return 0; if (codepage == CP_UTF8) return 0; if (!is_valid_codepage( codepage )) return 0; for (i = 0; i < str->size; i++) { if ((unsigned char)str->str.cstr[i] >= 0xf5) goto done; if ((unsigned char)str->str.cstr[i] >= 0xc2) break; if ((unsigned char)str->str.cstr[i] >= 0x80) goto done; } if (i == str->size) return 0; /* no 8-bit chars at all */ if (wrc_mbstowcs( CP_UTF8, MB_ERR_INVALID_CHARS, str->str.cstr, str->size, NULL, 0 ) >= 0) return 1; done: check_utf8 = 0; /* at least one 8-bit non-utf8 string found, stop checking */ return 0; } int check_unicode_conversion( const string_t *str_a, const string_t *str_w, int codepage ) { int ok; string_t *teststr = convert_string( str_w, str_char, codepage ); ok = (teststr->size == str_a->size && !memcmp( teststr->str.cstr, str_a->str.cstr, str_a->size )); if (!ok) { int i; fprintf( stderr, "Source: %s", str_a->str.cstr ); for (i = 0; i < str_a->size; i++) fprintf( stderr, " %02x", (unsigned char)str_a->str.cstr[i] ); fprintf( stderr, "\nUnicode: " ); for (i = 0; i < str_w->size; i++) fprintf( stderr, " %04x", str_w->str.wstr[i] ); fprintf( stderr, "\nBack: %s", teststr->str.cstr ); for (i = 0; i < teststr->size; i++) fprintf( stderr, " %02x", (unsigned char)teststr->str.cstr[i] ); fprintf( stderr, "\n" ); } free_string( teststr ); return ok; } struct lang2cp { unsigned short lang; unsigned short sublang; unsigned int cp; }; /* language to codepage conversion table */ /* specific sublanguages need only be specified if their codepage */ /* differs from the default (SUBLANG_NEUTRAL) */ static const struct lang2cp lang2cps[] = { { LANG_AFRIKAANS, SUBLANG_NEUTRAL, 1252 }, { LANG_ALBANIAN, SUBLANG_NEUTRAL, 1250 }, { LANG_ALSATIAN, SUBLANG_NEUTRAL, 1252 }, { LANG_AMHARIC, SUBLANG_NEUTRAL, 0 }, { LANG_ARABIC, SUBLANG_NEUTRAL, 1256 }, { LANG_ARMENIAN, SUBLANG_NEUTRAL, 0 }, { LANG_ASSAMESE, SUBLANG_NEUTRAL, 0 }, { LANG_AZERI, SUBLANG_NEUTRAL, 1254 }, { LANG_AZERI, SUBLANG_AZERI_CYRILLIC, 1251 }, { LANG_BASHKIR, SUBLANG_NEUTRAL, 1251 }, { LANG_BASQUE, SUBLANG_NEUTRAL, 1252 }, { LANG_BELARUSIAN, SUBLANG_NEUTRAL, 1251 }, { LANG_BENGALI, SUBLANG_NEUTRAL, 0 }, { LANG_BOSNIAN, SUBLANG_NEUTRAL, 1250 }, { LANG_BOSNIAN, SUBLANG_BOSNIAN_BOSNIA_HERZEGOVINA_CYRILLIC, 1251 }, { LANG_BRETON, SUBLANG_NEUTRAL, 1252 }, { LANG_BULGARIAN, SUBLANG_NEUTRAL, 1251 }, { LANG_CATALAN, SUBLANG_NEUTRAL, 1252 }, { LANG_CHINESE, SUBLANG_NEUTRAL, 950 }, { LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED, 936 }, { LANG_CHINESE, SUBLANG_CHINESE_SINGAPORE, 936 }, #ifdef LANG_CORNISH { LANG_CORNISH, SUBLANG_NEUTRAL, 1252 }, #endif /* LANG_CORNISH */ { LANG_CORSICAN, SUBLANG_NEUTRAL, 1252 }, { LANG_CROATIAN, SUBLANG_NEUTRAL, 1250 }, { LANG_CZECH, SUBLANG_NEUTRAL, 1250 }, { LANG_DANISH, SUBLANG_NEUTRAL, 1252 }, { LANG_DARI, SUBLANG_NEUTRAL, 1256 }, { LANG_DIVEHI, SUBLANG_NEUTRAL, 0 }, { LANG_DUTCH, SUBLANG_NEUTRAL, 1252 }, { LANG_ENGLISH, SUBLANG_NEUTRAL, 1252 }, #ifdef LANG_ESPERANTO { LANG_ESPERANTO, SUBLANG_NEUTRAL, 1252 }, #endif /* LANG_ESPERANTO */ { LANG_ESTONIAN, SUBLANG_NEUTRAL, 1257 }, { LANG_FAEROESE, SUBLANG_NEUTRAL, 1252 }, { LANG_FILIPINO, SUBLANG_NEUTRAL, 1252 }, { LANG_FINNISH, SUBLANG_NEUTRAL, 1252 }, { LANG_FRENCH, SUBLANG_NEUTRAL, 1252 }, { LANG_FRISIAN, SUBLANG_NEUTRAL, 1252 }, #ifdef LANG_MANX_GAELIC { LANG_MANX_GAELIC, SUBLANG_NEUTRAL, 1252 }, #endif /* LANG_MANX_GAELIC */ { LANG_GALICIAN, SUBLANG_NEUTRAL, 1252 }, { LANG_GEORGIAN, SUBLANG_NEUTRAL, 0 }, { LANG_GERMAN, SUBLANG_NEUTRAL, 1252 }, { LANG_GREEK, SUBLANG_NEUTRAL, 1253 }, { LANG_GREENLANDIC, SUBLANG_NEUTRAL, 1252 }, { LANG_GUJARATI, SUBLANG_NEUTRAL, 0 }, { LANG_HAUSA, SUBLANG_NEUTRAL, 1252 }, { LANG_HEBREW, SUBLANG_NEUTRAL, 1255 }, { LANG_HINDI, SUBLANG_NEUTRAL, 0 }, { LANG_HUNGARIAN, SUBLANG_NEUTRAL, 1250 }, { LANG_ICELANDIC, SUBLANG_NEUTRAL, 1252 }, { LANG_IGBO, SUBLANG_NEUTRAL, 1252 }, { LANG_INDONESIAN, SUBLANG_NEUTRAL, 1252 }, { LANG_INUKTITUT, SUBLANG_NEUTRAL, 0 }, { LANG_INUKTITUT, SUBLANG_INUKTITUT_CANADA_LATIN, 0 }, { LANG_INVARIANT, SUBLANG_NEUTRAL, 0 }, { LANG_IRISH, SUBLANG_NEUTRAL, 1252 }, { LANG_ITALIAN, SUBLANG_NEUTRAL, 1252 }, { LANG_JAPANESE, SUBLANG_NEUTRAL, 932 }, { LANG_KANNADA, SUBLANG_NEUTRAL, 0 }, { LANG_KAZAK, SUBLANG_NEUTRAL, 1251 }, { LANG_KHMER, SUBLANG_NEUTRAL, 0 }, { LANG_KICHE, SUBLANG_NEUTRAL, 1252 }, { LANG_KINYARWANDA, SUBLANG_NEUTRAL, 1252 }, { LANG_KONKANI, SUBLANG_NEUTRAL, 0 }, { LANG_KOREAN, SUBLANG_NEUTRAL, 949 }, { LANG_KYRGYZ, SUBLANG_NEUTRAL, 1251 }, { LANG_LAO, SUBLANG_NEUTRAL, 0 }, { LANG_LATVIAN, SUBLANG_NEUTRAL, 1257 }, { LANG_LITHUANIAN, SUBLANG_NEUTRAL, 1257 }, { LANG_LOWER_SORBIAN, SUBLANG_NEUTRAL, 1252 }, { LANG_LUXEMBOURGISH, SUBLANG_NEUTRAL, 1252 }, { LANG_MACEDONIAN, SUBLANG_NEUTRAL, 1251 }, { LANG_MALAY, SUBLANG_NEUTRAL, 1252 }, { LANG_MALAYALAM, SUBLANG_NEUTRAL, 0 }, { LANG_MALTESE, SUBLANG_NEUTRAL, 0 }, { LANG_MAORI, SUBLANG_NEUTRAL, 0 }, { LANG_MAPUDUNGUN, SUBLANG_NEUTRAL, 1252 }, { LANG_MARATHI, SUBLANG_NEUTRAL, 0 }, { LANG_MOHAWK, SUBLANG_NEUTRAL, 1252 }, { LANG_MONGOLIAN, SUBLANG_NEUTRAL, 1251 }, { LANG_NEPALI, SUBLANG_NEUTRAL, 0 }, { LANG_NEUTRAL, SUBLANG_NEUTRAL, 1252 }, { LANG_NORWEGIAN, SUBLANG_NEUTRAL, 1252 }, { LANG_OCCITAN, SUBLANG_NEUTRAL, 1252 }, { LANG_ORIYA, SUBLANG_NEUTRAL, 0 }, { LANG_PASHTO, SUBLANG_NEUTRAL, 0 }, { LANG_PERSIAN, SUBLANG_NEUTRAL, 1256 }, { LANG_POLISH, SUBLANG_NEUTRAL, 1250 }, { LANG_PORTUGUESE, SUBLANG_NEUTRAL, 1252 }, { LANG_PUNJABI, SUBLANG_NEUTRAL, 0 }, { LANG_QUECHUA, SUBLANG_NEUTRAL, 1252 }, { LANG_ROMANIAN, SUBLANG_NEUTRAL, 1250 }, { LANG_ROMANSH, SUBLANG_NEUTRAL, 1252 }, { LANG_RUSSIAN, SUBLANG_NEUTRAL, 1251 }, { LANG_SAMI, SUBLANG_NEUTRAL, 1252 }, { LANG_SANSKRIT, SUBLANG_NEUTRAL, 0 }, { LANG_SCOTTISH_GAELIC,SUBLANG_NEUTRAL, 1252 }, { LANG_SERBIAN, SUBLANG_NEUTRAL, 1250 }, { LANG_SERBIAN, SUBLANG_SERBIAN_CYRILLIC, 1251 }, { LANG_SINHALESE, SUBLANG_NEUTRAL, 0 }, { LANG_SLOVAK, SUBLANG_NEUTRAL, 1250 }, { LANG_SLOVENIAN, SUBLANG_NEUTRAL, 1250 }, { LANG_SOTHO, SUBLANG_NEUTRAL, 1252 }, { LANG_SPANISH, SUBLANG_NEUTRAL, 1252 }, { LANG_SWAHILI, SUBLANG_NEUTRAL, 1252 }, { LANG_SWEDISH, SUBLANG_NEUTRAL, 1252 }, { LANG_SYRIAC, SUBLANG_NEUTRAL, 0 }, { LANG_TAJIK, SUBLANG_NEUTRAL, 1251 }, { LANG_TAMAZIGHT, SUBLANG_NEUTRAL, 1252 }, { LANG_TAMIL, SUBLANG_NEUTRAL, 0 }, { LANG_TATAR, SUBLANG_NEUTRAL, 1251 }, { LANG_TELUGU, SUBLANG_NEUTRAL, 0 }, { LANG_THAI, SUBLANG_NEUTRAL, 874 }, { LANG_TIBETAN, SUBLANG_NEUTRAL, 0 }, { LANG_TSWANA, SUBLANG_NEUTRAL, 1252 }, { LANG_TURKISH, SUBLANG_NEUTRAL, 1254 }, { LANG_TURKMEN, SUBLANG_NEUTRAL, 1250 }, { LANG_UIGHUR, SUBLANG_NEUTRAL, 1256 }, { LANG_UKRAINIAN, SUBLANG_NEUTRAL, 1251 }, { LANG_UPPER_SORBIAN, SUBLANG_NEUTRAL, 1252 }, { LANG_URDU, SUBLANG_NEUTRAL, 1256 }, { LANG_UZBEK, SUBLANG_NEUTRAL, 1254 }, { LANG_UZBEK, SUBLANG_UZBEK_CYRILLIC, 1251 }, { LANG_VIETNAMESE, SUBLANG_NEUTRAL, 1258 }, #ifdef LANG_WALON { LANG_WALON, SUBLANG_NEUTRAL, 1252 }, #endif /* LANG_WALON */ { LANG_WELSH, SUBLANG_NEUTRAL, 1252 }, { LANG_WOLOF, SUBLANG_NEUTRAL, 1252 }, { LANG_XHOSA, SUBLANG_NEUTRAL, 1252 }, { LANG_YAKUT, SUBLANG_NEUTRAL, 1251 }, { LANG_YI, SUBLANG_NEUTRAL, 0 }, { LANG_YORUBA, SUBLANG_NEUTRAL, 1252 }, { LANG_ZULU, SUBLANG_NEUTRAL, 1252 } }; int get_language_codepage( unsigned short lang, unsigned short sublang ) { unsigned int i; int cp = -1, defcp = -1; for (i = 0; i < ARRAY_SIZE(lang2cps); i++) { if (lang2cps[i].lang != lang) continue; if (lang2cps[i].sublang == sublang) { cp = lang2cps[i].cp; break; } if (lang2cps[i].sublang == SUBLANG_NEUTRAL) defcp = lang2cps[i].cp; } if (cp == -1) cp = defcp; assert( cp <= 0 || is_valid_codepage(cp) ); return cp; }