Sweden-Number/tools/wrc/utils.c

614 lines
20 KiB
C

/*
* Utility routines
*
* Copyright 1998 Bertho A. Stultiens
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
#include "config.h"
#include "wine/port.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <ctype.h>
#include "wrc.h"
#include "utils.h"
#include "parser.h"
/* #define WANT_NEAR_INDICATION */
#ifdef WANT_NEAR_INDICATION
void make_print(char *str)
{
while(*str)
{
if(!isprint(*str))
*str = ' ';
str++;
}
}
#endif
static void generic_msg(const char *s, const char *t, const char *n, va_list ap)
{
fprintf(stderr, "%s:%d:%d: %s: ", input_name ? input_name : "stdin", line_number, char_number, t);
vfprintf(stderr, s, ap);
#ifdef WANT_NEAR_INDICATION
{
char *cpy;
if(n)
{
cpy = xstrdup(n);
make_print(cpy);
fprintf(stderr, " near '%s'", cpy);
free(cpy);
}
}
#endif
}
int parser_error(const char *s, ...)
{
va_list ap;
va_start(ap, s);
generic_msg(s, "Error", parser_text, ap);
fputc( '\n', stderr );
va_end(ap);
exit(1);
return 1;
}
int parser_warning(const char *s, ...)
{
va_list ap;
va_start(ap, s);
generic_msg(s, "Warning", parser_text, ap);
va_end(ap);
return 0;
}
void internal_error(const char *file, int line, const char *s, ...)
{
va_list ap;
va_start(ap, s);
fprintf(stderr, "Internal error (please report) %s %d: ", file, line);
vfprintf(stderr, s, ap);
va_end(ap);
exit(3);
}
void fatal_perror( const char *msg, ... )
{
va_list valist;
va_start( valist, msg );
fprintf(stderr, "Error: ");
vfprintf( stderr, msg, valist );
perror( " " );
va_end( valist );
exit(2);
}
void error(const char *s, ...)
{
va_list ap;
va_start(ap, s);
fprintf(stderr, "Error: ");
vfprintf(stderr, s, ap);
va_end(ap);
exit(2);
}
void warning(const char *s, ...)
{
va_list ap;
va_start(ap, s);
fprintf(stderr, "Warning: ");
vfprintf(stderr, s, ap);
va_end(ap);
}
void chat(const char *s, ...)
{
if(debuglevel & DEBUGLEVEL_CHAT)
{
va_list ap;
va_start(ap, s);
fprintf(stderr, "FYI: ");
vfprintf(stderr, s, ap);
va_end(ap);
}
}
char *dup_basename(const char *name, const char *ext)
{
int namelen;
int extlen = strlen(ext);
char *base;
char *slash;
if(!name)
name = "wrc.tab";
slash = strrchr(name, '/');
if (slash)
name = slash + 1;
namelen = strlen(name);
/* +4 for later extension and +1 for '\0' */
base = xmalloc(namelen +4 +1);
strcpy(base, name);
if(!strcasecmp(name + namelen-extlen, ext))
{
base[namelen - extlen] = '\0';
}
return base;
}
void *xmalloc(size_t size)
{
void *res;
assert(size > 0);
res = malloc(size);
if(res == NULL)
{
error("Virtual memory exhausted.\n");
}
memset(res, 0x55, size);
return res;
}
void *xrealloc(void *p, size_t size)
{
void *res;
assert(size > 0);
res = realloc(p, size);
if(res == NULL)
{
error("Virtual memory exhausted.\n");
}
return res;
}
char *strmake( const char* fmt, ... )
{
int n;
size_t size = 100;
va_list ap;
for (;;)
{
char *p = xmalloc( size );
va_start( ap, fmt );
n = vsnprintf( p, size, fmt, ap );
va_end( ap );
if (n == -1) size *= 2;
else if ((size_t)n >= size) size = n + 1;
else return p;
free( p );
}
}
char *xstrdup(const char *str)
{
char *s;
assert(str != NULL);
s = xmalloc(strlen(str)+1);
return strcpy(s, str);
}
int compare_striA( const char *str1, const char *str2 )
{
for (;;)
{
/* only the A-Z range is case-insensitive */
char ch1 = (*str1 >= 'a' && *str1 <= 'z') ? *str1 + 'A' - 'a' : *str1;
char ch2 = (*str2 >= 'a' && *str2 <= 'z') ? *str2 + 'A' - 'a' : *str2;
if (!ch1 || ch1 != ch2) return ch1 - ch2;
str1++;
str2++;
}
}
int compare_striW( const WCHAR *str1, const WCHAR *str2 )
{
for (;;)
{
/* only the A-Z range is case-insensitive */
WCHAR ch1 = (*str1 >= 'a' && *str1 <= 'z') ? *str1 + 'A' - 'a' : *str1;
WCHAR ch2 = (*str2 >= 'a' && *str2 <= 'z') ? *str2 + 'A' - 'a' : *str2;
if (!ch1 || ch1 != ch2) return ch1 - ch2;
str1++;
str2++;
}
}
/*
*****************************************************************************
* Function : compare_name_id
* Syntax : int compare_name_id(const name_id_t *n1, const name_id_t *n2)
* Input :
* Output :
* Description :
* Remarks :
*****************************************************************************
*/
int compare_name_id(const name_id_t *n1, const name_id_t *n2)
{
if(n1->type == name_ord && n2->type == name_ord)
{
return n1->name.i_name - n2->name.i_name;
}
else if(n1->type == name_str && n2->type == name_str)
{
if(n1->name.s_name->type == str_char
&& n2->name.s_name->type == str_char)
{
return compare_striA(n1->name.s_name->str.cstr, n2->name.s_name->str.cstr);
}
else if(n1->name.s_name->type == str_unicode
&& n2->name.s_name->type == str_unicode)
{
return compare_striW(n1->name.s_name->str.wstr, n2->name.s_name->str.wstr);
}
else
{
internal_error(__FILE__, __LINE__, "Can't yet compare strings of mixed type\n");
}
}
else if(n1->type == name_ord && n2->type == name_str)
return 1;
else if(n1->type == name_str && n2->type == name_ord)
return -1;
else
internal_error(__FILE__, __LINE__, "Comparing name-ids with unknown types (%d, %d)\n",
n1->type, n2->type);
return 0; /* Keep the compiler happy */
}
#ifdef _WIN32
int is_valid_codepage(int id)
{
return IsValidCodePage( id );
}
int wrc_mbstowcs( int codepage, int flags, const char *src, int srclen, WCHAR *dst, int dstlen )
{
return MultiByteToWideChar( codepage, flags, src, srclen, dst, dstlen );
}
int wrc_wcstombs( int codepage, int flags, const WCHAR *src, int srclen, char *dst, int dstlen )
{
return WideCharToMultiByte( codepage, flags, src, srclen, dst, dstlen, NULL, NULL );
}
#else /* _WIN32 */
#include "wine/unicode.h"
int is_valid_codepage(int cp)
{
return cp == CP_UTF8 || wine_cp_get_table(cp);
}
int wrc_mbstowcs( int codepage, int flags, const char *src, int srclen, WCHAR *dst, int dstlen )
{
if (codepage == CP_UTF8) return wine_utf8_mbstowcs( flags, src, srclen, dst, dstlen );
return wine_cp_mbstowcs( wine_cp_get_table( codepage ), flags, src, srclen, dst, dstlen );
}
int wrc_wcstombs( int codepage, int flags, const WCHAR *src, int srclen, char *dst, int dstlen )
{
if (codepage == CP_UTF8) return wine_utf8_wcstombs( flags, src, srclen, dst, dstlen );
return wine_cp_wcstombs( wine_cp_get_table( codepage ), flags, src, srclen, dst, dstlen, NULL, NULL );
}
#endif /* _WIN32 */
string_t *convert_string(const string_t *str, enum str_e type, int codepage)
{
string_t *ret = xmalloc(sizeof(*ret));
int res;
ret->loc = str->loc;
if (!codepage && str->type != type)
parser_error( "Current language is Unicode only, cannot convert string" );
if((str->type == str_char) && (type == str_unicode))
{
ret->type = str_unicode;
ret->size = wrc_mbstowcs( codepage, 0, str->str.cstr, str->size, NULL, 0 );
ret->str.wstr = xmalloc( (ret->size+1) * sizeof(WCHAR) );
res = wrc_mbstowcs( codepage, MB_ERR_INVALID_CHARS, str->str.cstr, str->size,
ret->str.wstr, ret->size );
if (res == -2)
parser_error( "Invalid character in string '%.*s' for codepage %u",
str->size, str->str.cstr, codepage );
ret->str.wstr[ret->size] = 0;
}
else if((str->type == str_unicode) && (type == str_char))
{
ret->type = str_char;
ret->size = wrc_wcstombs( codepage, 0, str->str.wstr, str->size, NULL, 0 );
ret->str.cstr = xmalloc( ret->size + 1 );
wrc_wcstombs( codepage, 0, str->str.wstr, str->size, ret->str.cstr, ret->size );
ret->str.cstr[ret->size] = 0;
}
else if(str->type == str_unicode)
{
ret->type = str_unicode;
ret->size = str->size;
ret->str.wstr = xmalloc(sizeof(WCHAR)*(ret->size+1));
memcpy( ret->str.wstr, str->str.wstr, ret->size * sizeof(WCHAR) );
ret->str.wstr[ret->size] = 0;
}
else /* str->type == str_char */
{
ret->type = str_char;
ret->size = str->size;
ret->str.cstr = xmalloc( ret->size + 1 );
memcpy( ret->str.cstr, str->str.cstr, ret->size );
ret->str.cstr[ret->size] = 0;
}
return ret;
}
void free_string(string_t *str)
{
if (str->type == str_unicode) free( str->str.wstr );
else free( str->str.cstr );
free( str );
}
/* check if the string is valid utf8 despite a different codepage being in use */
int check_valid_utf8( const string_t *str, int codepage )
{
unsigned int i;
if (!check_utf8) return 0;
if (!codepage) return 0;
if (codepage == CP_UTF8) return 0;
if (!is_valid_codepage( codepage )) return 0;
for (i = 0; i < str->size; i++)
{
if ((unsigned char)str->str.cstr[i] >= 0xf5) goto done;
if ((unsigned char)str->str.cstr[i] >= 0xc2) break;
if ((unsigned char)str->str.cstr[i] >= 0x80) goto done;
}
if (i == str->size) return 0; /* no 8-bit chars at all */
if (wrc_mbstowcs( CP_UTF8, MB_ERR_INVALID_CHARS, str->str.cstr, str->size, NULL, 0 ) >= 0) return 1;
done:
check_utf8 = 0; /* at least one 8-bit non-utf8 string found, stop checking */
return 0;
}
int check_unicode_conversion( const string_t *str_a, const string_t *str_w, int codepage )
{
int ok;
string_t *teststr = convert_string( str_w, str_char, codepage );
ok = (teststr->size == str_a->size && !memcmp( teststr->str.cstr, str_a->str.cstr, str_a->size ));
if (!ok)
{
int i;
fprintf( stderr, "Source: %s", str_a->str.cstr );
for (i = 0; i < str_a->size; i++)
fprintf( stderr, " %02x", (unsigned char)str_a->str.cstr[i] );
fprintf( stderr, "\nUnicode: " );
for (i = 0; i < str_w->size; i++)
fprintf( stderr, " %04x", str_w->str.wstr[i] );
fprintf( stderr, "\nBack: %s", teststr->str.cstr );
for (i = 0; i < teststr->size; i++)
fprintf( stderr, " %02x", (unsigned char)teststr->str.cstr[i] );
fprintf( stderr, "\n" );
}
free_string( teststr );
return ok;
}
struct lang2cp
{
unsigned short lang;
unsigned short sublang;
unsigned int cp;
};
/* language to codepage conversion table */
/* specific sublanguages need only be specified if their codepage */
/* differs from the default (SUBLANG_NEUTRAL) */
static const struct lang2cp lang2cps[] =
{
{ LANG_AFRIKAANS, SUBLANG_NEUTRAL, 1252 },
{ LANG_ALBANIAN, SUBLANG_NEUTRAL, 1250 },
{ LANG_ALSATIAN, SUBLANG_NEUTRAL, 1252 },
{ LANG_AMHARIC, SUBLANG_NEUTRAL, 0 },
{ LANG_ARABIC, SUBLANG_NEUTRAL, 1256 },
{ LANG_ARMENIAN, SUBLANG_NEUTRAL, 0 },
{ LANG_ASSAMESE, SUBLANG_NEUTRAL, 0 },
{ LANG_AZERI, SUBLANG_NEUTRAL, 1254 },
{ LANG_AZERI, SUBLANG_AZERI_CYRILLIC, 1251 },
{ LANG_BASHKIR, SUBLANG_NEUTRAL, 1251 },
{ LANG_BASQUE, SUBLANG_NEUTRAL, 1252 },
{ LANG_BELARUSIAN, SUBLANG_NEUTRAL, 1251 },
{ LANG_BENGALI, SUBLANG_NEUTRAL, 0 },
{ LANG_BOSNIAN, SUBLANG_NEUTRAL, 1250 },
{ LANG_BOSNIAN, SUBLANG_BOSNIAN_BOSNIA_HERZEGOVINA_CYRILLIC, 1251 },
{ LANG_BRETON, SUBLANG_NEUTRAL, 1252 },
{ LANG_BULGARIAN, SUBLANG_NEUTRAL, 1251 },
{ LANG_CATALAN, SUBLANG_NEUTRAL, 1252 },
{ LANG_CHINESE, SUBLANG_NEUTRAL, 950 },
{ LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED, 936 },
{ LANG_CHINESE, SUBLANG_CHINESE_SINGAPORE, 936 },
#ifdef LANG_CORNISH
{ LANG_CORNISH, SUBLANG_NEUTRAL, 1252 },
#endif /* LANG_CORNISH */
{ LANG_CORSICAN, SUBLANG_NEUTRAL, 1252 },
{ LANG_CROATIAN, SUBLANG_NEUTRAL, 1250 },
{ LANG_CZECH, SUBLANG_NEUTRAL, 1250 },
{ LANG_DANISH, SUBLANG_NEUTRAL, 1252 },
{ LANG_DARI, SUBLANG_NEUTRAL, 1256 },
{ LANG_DIVEHI, SUBLANG_NEUTRAL, 0 },
{ LANG_DUTCH, SUBLANG_NEUTRAL, 1252 },
{ LANG_ENGLISH, SUBLANG_NEUTRAL, 1252 },
#ifdef LANG_ESPERANTO
{ LANG_ESPERANTO, SUBLANG_NEUTRAL, 1252 },
#endif /* LANG_ESPERANTO */
{ LANG_ESTONIAN, SUBLANG_NEUTRAL, 1257 },
{ LANG_FAEROESE, SUBLANG_NEUTRAL, 1252 },
{ LANG_FILIPINO, SUBLANG_NEUTRAL, 1252 },
{ LANG_FINNISH, SUBLANG_NEUTRAL, 1252 },
{ LANG_FRENCH, SUBLANG_NEUTRAL, 1252 },
{ LANG_FRISIAN, SUBLANG_NEUTRAL, 1252 },
#ifdef LANG_MANX_GAELIC
{ LANG_MANX_GAELIC, SUBLANG_NEUTRAL, 1252 },
#endif /* LANG_MANX_GAELIC */
{ LANG_GALICIAN, SUBLANG_NEUTRAL, 1252 },
{ LANG_GEORGIAN, SUBLANG_NEUTRAL, 0 },
{ LANG_GERMAN, SUBLANG_NEUTRAL, 1252 },
{ LANG_GREEK, SUBLANG_NEUTRAL, 1253 },
{ LANG_GREENLANDIC, SUBLANG_NEUTRAL, 1252 },
{ LANG_GUJARATI, SUBLANG_NEUTRAL, 0 },
{ LANG_HAUSA, SUBLANG_NEUTRAL, 1252 },
{ LANG_HEBREW, SUBLANG_NEUTRAL, 1255 },
{ LANG_HINDI, SUBLANG_NEUTRAL, 0 },
{ LANG_HUNGARIAN, SUBLANG_NEUTRAL, 1250 },
{ LANG_ICELANDIC, SUBLANG_NEUTRAL, 1252 },
{ LANG_IGBO, SUBLANG_NEUTRAL, 1252 },
{ LANG_INDONESIAN, SUBLANG_NEUTRAL, 1252 },
{ LANG_INUKTITUT, SUBLANG_NEUTRAL, 0 },
{ LANG_INUKTITUT, SUBLANG_INUKTITUT_CANADA_LATIN, 0 },
{ LANG_INVARIANT, SUBLANG_NEUTRAL, 0 },
{ LANG_IRISH, SUBLANG_NEUTRAL, 1252 },
{ LANG_ITALIAN, SUBLANG_NEUTRAL, 1252 },
{ LANG_JAPANESE, SUBLANG_NEUTRAL, 932 },
{ LANG_KANNADA, SUBLANG_NEUTRAL, 0 },
{ LANG_KAZAK, SUBLANG_NEUTRAL, 1251 },
{ LANG_KHMER, SUBLANG_NEUTRAL, 0 },
{ LANG_KICHE, SUBLANG_NEUTRAL, 1252 },
{ LANG_KINYARWANDA, SUBLANG_NEUTRAL, 1252 },
{ LANG_KONKANI, SUBLANG_NEUTRAL, 0 },
{ LANG_KOREAN, SUBLANG_NEUTRAL, 949 },
{ LANG_KYRGYZ, SUBLANG_NEUTRAL, 1251 },
{ LANG_LAO, SUBLANG_NEUTRAL, 0 },
{ LANG_LATVIAN, SUBLANG_NEUTRAL, 1257 },
{ LANG_LITHUANIAN, SUBLANG_NEUTRAL, 1257 },
{ LANG_LOWER_SORBIAN, SUBLANG_NEUTRAL, 1252 },
{ LANG_LUXEMBOURGISH, SUBLANG_NEUTRAL, 1252 },
{ LANG_MACEDONIAN, SUBLANG_NEUTRAL, 1251 },
{ LANG_MALAY, SUBLANG_NEUTRAL, 1252 },
{ LANG_MALAYALAM, SUBLANG_NEUTRAL, 0 },
{ LANG_MALTESE, SUBLANG_NEUTRAL, 0 },
{ LANG_MAORI, SUBLANG_NEUTRAL, 0 },
{ LANG_MAPUDUNGUN, SUBLANG_NEUTRAL, 1252 },
{ LANG_MARATHI, SUBLANG_NEUTRAL, 0 },
{ LANG_MOHAWK, SUBLANG_NEUTRAL, 1252 },
{ LANG_MONGOLIAN, SUBLANG_NEUTRAL, 1251 },
{ LANG_NEPALI, SUBLANG_NEUTRAL, 0 },
{ LANG_NEUTRAL, SUBLANG_NEUTRAL, 1252 },
{ LANG_NORWEGIAN, SUBLANG_NEUTRAL, 1252 },
{ LANG_OCCITAN, SUBLANG_NEUTRAL, 1252 },
{ LANG_ORIYA, SUBLANG_NEUTRAL, 0 },
{ LANG_PASHTO, SUBLANG_NEUTRAL, 0 },
{ LANG_PERSIAN, SUBLANG_NEUTRAL, 1256 },
{ LANG_POLISH, SUBLANG_NEUTRAL, 1250 },
{ LANG_PORTUGUESE, SUBLANG_NEUTRAL, 1252 },
{ LANG_PUNJABI, SUBLANG_NEUTRAL, 0 },
{ LANG_QUECHUA, SUBLANG_NEUTRAL, 1252 },
{ LANG_ROMANIAN, SUBLANG_NEUTRAL, 1250 },
{ LANG_ROMANSH, SUBLANG_NEUTRAL, 1252 },
{ LANG_RUSSIAN, SUBLANG_NEUTRAL, 1251 },
{ LANG_SAMI, SUBLANG_NEUTRAL, 1252 },
{ LANG_SANSKRIT, SUBLANG_NEUTRAL, 0 },
{ LANG_SCOTTISH_GAELIC,SUBLANG_NEUTRAL, 1252 },
{ LANG_SERBIAN, SUBLANG_NEUTRAL, 1250 },
{ LANG_SERBIAN, SUBLANG_SERBIAN_CYRILLIC, 1251 },
{ LANG_SINHALESE, SUBLANG_NEUTRAL, 0 },
{ LANG_SLOVAK, SUBLANG_NEUTRAL, 1250 },
{ LANG_SLOVENIAN, SUBLANG_NEUTRAL, 1250 },
{ LANG_SOTHO, SUBLANG_NEUTRAL, 1252 },
{ LANG_SPANISH, SUBLANG_NEUTRAL, 1252 },
{ LANG_SWAHILI, SUBLANG_NEUTRAL, 1252 },
{ LANG_SWEDISH, SUBLANG_NEUTRAL, 1252 },
{ LANG_SYRIAC, SUBLANG_NEUTRAL, 0 },
{ LANG_TAJIK, SUBLANG_NEUTRAL, 1251 },
{ LANG_TAMAZIGHT, SUBLANG_NEUTRAL, 1252 },
{ LANG_TAMIL, SUBLANG_NEUTRAL, 0 },
{ LANG_TATAR, SUBLANG_NEUTRAL, 1251 },
{ LANG_TELUGU, SUBLANG_NEUTRAL, 0 },
{ LANG_THAI, SUBLANG_NEUTRAL, 874 },
{ LANG_TIBETAN, SUBLANG_NEUTRAL, 0 },
{ LANG_TSWANA, SUBLANG_NEUTRAL, 1252 },
{ LANG_TURKISH, SUBLANG_NEUTRAL, 1254 },
{ LANG_TURKMEN, SUBLANG_NEUTRAL, 1250 },
{ LANG_UIGHUR, SUBLANG_NEUTRAL, 1256 },
{ LANG_UKRAINIAN, SUBLANG_NEUTRAL, 1251 },
{ LANG_UPPER_SORBIAN, SUBLANG_NEUTRAL, 1252 },
{ LANG_URDU, SUBLANG_NEUTRAL, 1256 },
{ LANG_UZBEK, SUBLANG_NEUTRAL, 1254 },
{ LANG_UZBEK, SUBLANG_UZBEK_CYRILLIC, 1251 },
{ LANG_VIETNAMESE, SUBLANG_NEUTRAL, 1258 },
#ifdef LANG_WALON
{ LANG_WALON, SUBLANG_NEUTRAL, 1252 },
#endif /* LANG_WALON */
{ LANG_WELSH, SUBLANG_NEUTRAL, 1252 },
{ LANG_WOLOF, SUBLANG_NEUTRAL, 1252 },
{ LANG_XHOSA, SUBLANG_NEUTRAL, 1252 },
{ LANG_YAKUT, SUBLANG_NEUTRAL, 1251 },
{ LANG_YI, SUBLANG_NEUTRAL, 0 },
{ LANG_YORUBA, SUBLANG_NEUTRAL, 1252 },
{ LANG_ZULU, SUBLANG_NEUTRAL, 1252 }
};
int get_language_codepage( unsigned short lang, unsigned short sublang )
{
unsigned int i;
int cp = -1, defcp = -1;
for (i = 0; i < ARRAY_SIZE(lang2cps); i++)
{
if (lang2cps[i].lang != lang) continue;
if (lang2cps[i].sublang == sublang)
{
cp = lang2cps[i].cp;
break;
}
if (lang2cps[i].sublang == SUBLANG_NEUTRAL) defcp = lang2cps[i].cp;
}
if (cp == -1) cp = defcp;
assert( cp <= 0 || is_valid_codepage(cp) );
return cp;
}