/* * Utility routines * * Copyright 1998,2000 Bertho A. Stultiens * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA */ #include "config.h" #include #include #include #include #include #include #include "wmc.h" #include "winnls.h" #include "utils.h" #define SUPPRESS_YACC_ERROR_MESSAGE static void generic_msg(const char *s, const char *t, va_list ap) { fprintf(stderr, "%s:%d:%d: %s: ", input_name ? input_name : "stdin", line_number, char_number, t); vfprintf(stderr, s, ap); } /* * The yyerror routine should not exit because we use the error-token * to determine the syntactic error in the source. However, YACC * uses the same routine to print an error just before the error * token is reduced. * The extra routine 'xyyerror' is used to exit after giving a real * message. */ int mcy_error(const char *s, ...) { #ifndef SUPPRESS_YACC_ERROR_MESSAGE va_list ap; va_start(ap, s); generic_msg(s, "Yacc error", ap); va_end(ap); #endif return 1; } int xyyerror(const char *s, ...) { va_list ap; va_start(ap, s); generic_msg(s, "Error", ap); va_end(ap); exit(1); return 1; } int mcy_warning(const char *s, ...) { va_list ap; va_start(ap, s); generic_msg(s, "Warning", ap); va_end(ap); return 0; } void internal_error(const char *file, int line, const char *s, ...) { va_list ap; va_start(ap, s); fprintf(stderr, "Internal error (please report) %s %d: ", file, line); vfprintf(stderr, s, ap); va_end(ap); exit(3); } void fatal_perror( const char *msg, ... ) { va_list valist; va_start( valist, msg ); fprintf(stderr, "Error: "); vfprintf( stderr, msg, valist ); perror( " " ); va_end( valist ); exit(2); } void error(const char *s, ...) { va_list ap; va_start(ap, s); fprintf(stderr, "Error: "); vfprintf(stderr, s, ap); va_end(ap); exit(2); } void warning(const char *s, ...) { va_list ap; va_start(ap, s); fprintf(stderr, "Warning: "); vfprintf(stderr, s, ap); va_end(ap); } int unistrlen(const WCHAR *s) { int n; for(n = 0; *s; n++, s++) ; return n; } WCHAR *unistrcpy(WCHAR *dst, const WCHAR *src) { WCHAR *t = dst; while(*src) *t++ = *src++; *t = 0; return dst; } WCHAR *xunistrdup(const WCHAR * str) { WCHAR *s; assert(str != NULL); s = xmalloc((unistrlen(str)+1) * sizeof(WCHAR)); return unistrcpy(s, str); } int unistricmp(const WCHAR *s1, const WCHAR *s2) { int i; int once = 0; static const char warn[] = "Don't know the uppercase equivalent of non ascii characters;" "comparison might yield wrong results"; while(*s1 && *s2) { if((*s1 & 0xffff) > 0x7f || (*s2 & 0xffff) > 0x7f) { if(!once) { once++; mcy_warning(warn); } i = *s1++ - *s2++; } else i = toupper(*s1++) - toupper(*s2++); if(i) return i; } if((*s1 & 0xffff) > 0x7f || (*s2 & 0xffff) > 0x7f) { if(!once) mcy_warning(warn); return *s1 - *s2; } else return toupper(*s1) - toupper(*s2); } int unistrcmp(const WCHAR *s1, const WCHAR *s2) { int i; while(*s1 && *s2) { i = *s1++ - *s2++; if(i) return i; } return *s1 - *s2; } WCHAR *utf8_to_unicode( const char *src, int srclen, int *dstlen ) { static const char utf8_length[128] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x80-0x8f */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x90-0x9f */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xa0-0xaf */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb0-0xbf */ 0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xc0-0xcf */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xd0-0xdf */ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 0xe0-0xef */ 3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0 /* 0xf0-0xff */ }; static const unsigned char utf8_mask[4] = { 0x7f, 0x1f, 0x0f, 0x07 }; const char *srcend = src + srclen; int len, res; WCHAR *ret, *dst; dst = ret = xmalloc( (srclen + 1) * sizeof(WCHAR) ); while (src < srcend) { unsigned char ch = *src++; if (ch < 0x80) /* special fast case for 7-bit ASCII */ { *dst++ = ch; continue; } len = utf8_length[ch - 0x80]; if (len && src + len <= srcend) { res = ch & utf8_mask[len]; switch (len) { case 3: if ((ch = *src ^ 0x80) >= 0x40) break; res = (res << 6) | ch; src++; if (res < 0x10) break; case 2: if ((ch = *src ^ 0x80) >= 0x40) break; res = (res << 6) | ch; if (res >= 0x110000 >> 6) break; src++; if (res < 0x20) break; if (res >= 0xd800 >> 6 && res <= 0xdfff >> 6) break; case 1: if ((ch = *src ^ 0x80) >= 0x40) break; res = (res << 6) | ch; src++; if (res < 0x80) break; if (res <= 0xffff) *dst++ = res; else { res -= 0x10000; *dst++ = 0xd800 | (res >> 10); *dst++ = 0xdc00 | (res & 0x3ff); } continue; } } *dst++ = 0xfffd; } *dst = 0; *dstlen = dst - ret; return ret; } char *unicode_to_utf8( const WCHAR *src, int srclen, int *dstlen ) { char *ret, *dst; dst = ret = xmalloc( srclen * 3 + 1 ); for ( ; srclen; srclen--, src++) { unsigned int ch = *src; if (ch < 0x80) /* 0x00-0x7f: 1 byte */ { *dst++ = ch; continue; } if (ch < 0x800) /* 0x80-0x7ff: 2 bytes */ { dst[1] = 0x80 | (ch & 0x3f); ch >>= 6; dst[0] = 0xc0 | ch; dst += 2; continue; } if (ch >= 0xd800 && ch <= 0xdbff && srclen > 1 && src[1] >= 0xdc00 && src[1] <= 0xdfff) { /* 0x10000-0x10ffff: 4 bytes */ ch = 0x10000 + ((ch & 0x3ff) << 10) + (src[1] & 0x3ff); dst[3] = 0x80 | (ch & 0x3f); ch >>= 6; dst[2] = 0x80 | (ch & 0x3f); ch >>= 6; dst[1] = 0x80 | (ch & 0x3f); ch >>= 6; dst[0] = 0xf0 | ch; dst += 4; src++; srclen--; continue; } if (ch >= 0xd800 && ch <= 0xdfff) ch = 0xfffd; /* invalid surrogate pair */ /* 0x800-0xffff: 3 bytes */ dst[2] = 0x80 | (ch & 0x3f); ch >>= 6; dst[1] = 0x80 | (ch & 0x3f); ch >>= 6; dst[0] = 0xe0 | ch; dst += 3; } *dst = 0; *dstlen = dst - ret; return ret; } #ifdef _WIN32 int is_valid_codepage(int id) { return IsValidCodePage( id ); } WCHAR *codepage_to_unicode( int codepage, const char *src, int srclen, int *dstlen ) { WCHAR *dst = xmalloc( (srclen + 1) * sizeof(WCHAR) ); DWORD ret = MultiByteToWideChar( codepage, MB_ERR_INVALID_CHARS, src, srclen, dst, srclen ); if (!ret) return NULL; dst[ret] = 0; *dstlen = ret; return dst; } #else /* _WIN32 */ struct nls_info { unsigned short codepage; unsigned short unidef; unsigned short trans_unidef; unsigned short *cp2uni; unsigned short *dbcs_offsets; }; static struct nls_info nlsinfo[128]; static void init_nls_info( struct nls_info *info, unsigned short *ptr ) { unsigned short hdr_size = ptr[0]; info->codepage = ptr[1]; info->unidef = ptr[4]; info->trans_unidef = ptr[6]; ptr += hdr_size; info->cp2uni = ++ptr; ptr += 256; if (*ptr++) ptr += 256; /* glyph table */ info->dbcs_offsets = *ptr ? ptr + 1 : NULL; } static const struct nls_info *get_nls_info( unsigned int codepage ) { unsigned short *data; char *path; unsigned int i; size_t size; for (i = 0; i < ARRAY_SIZE(nlsinfo) && nlsinfo[i].codepage; i++) if (nlsinfo[i].codepage == codepage) return &nlsinfo[i]; assert( i < ARRAY_SIZE(nlsinfo) ); for (i = 0; nlsdirs[i]; i++) { path = strmake( "%s/c_%03u.nls", nlsdirs[i], codepage ); if ((data = read_file( path, &size ))) { free( path ); init_nls_info( &nlsinfo[i], data ); return &nlsinfo[i]; } free( path ); } return NULL; } int is_valid_codepage(int cp) { return cp == CP_UTF8 || get_nls_info( cp ); } WCHAR *codepage_to_unicode( int codepage, const char *src, int srclen, int *dstlen ) { const struct nls_info *info = get_nls_info( codepage ); unsigned int i; WCHAR dbch, *dst = xmalloc( (srclen + 1) * sizeof(WCHAR) ); if (!info) error( "codepage %u not supported\n", codepage ); if (info->dbcs_offsets) { for (i = 0; srclen; i++, srclen--, src++) { unsigned short off = info->dbcs_offsets[(unsigned char)*src]; if (off) { if (srclen == 1) return NULL; dbch = (src[0] << 8) | (unsigned char)src[1]; src++; srclen--; dst[i] = info->dbcs_offsets[off + (unsigned char)*src]; if (dst[i] == info->unidef && dbch != info->trans_unidef) return NULL; } else { dst[i] = info->cp2uni[(unsigned char)*src]; if (dst[i] == info->unidef && *src != info->trans_unidef) return NULL; } } } else { for (i = 0; i < srclen; i++) { dst[i] = info->cp2uni[(unsigned char)src[i]]; if (dst[i] == info->unidef && src[i] != info->trans_unidef) return NULL; } } dst[i] = 0; *dstlen = i; return dst; } #endif /* _WIN32 */ unsigned char *output_buffer; size_t output_buffer_pos; size_t output_buffer_size;