wrc: Avoid converting Unicode strings back to Ansi.

Signed-off-by: Alexandre Julliard <julliard@winehq.org>
This commit is contained in:
Alexandre Julliard 2020-02-07 08:47:00 +01:00
parent 1d8f4fe4a0
commit fe888bbbb6
5 changed files with 62 additions and 133 deletions

View File

@ -363,33 +363,21 @@ static int parse_accel_string( const string_t *key, int flags )
static void put_string(res_t *res, const string_t *str, int isterm, const language_t *lang) static void put_string(res_t *res, const string_t *str, int isterm, const language_t *lang)
{ {
int cnt, codepage; int cnt, codepage;
string_t *newstr;
assert(res != NULL);
assert(str != NULL);
if (lang) codepage = get_language_codepage( lang->id, lang->sub );
else codepage = get_language_codepage( 0, 0 );
assert( codepage != -1 );
if (win32) if (win32)
{ {
newstr = convert_string(str, str_unicode, codepage); string_t *newstr;
if (str->type == str_char)
if (lang) codepage = get_language_codepage( lang->id, lang->sub );
else codepage = get_language_codepage( 0, 0 );
assert( codepage != -1 );
newstr = convert_string_unicode( str, codepage );
if (str->type == str_char && check_valid_utf8( str, codepage ))
{ {
if (!check_unicode_conversion( str, newstr, codepage )) print_location( &str->loc );
{ warning( "string \"%s\" seems to be UTF-8 but codepage %u is in use, maybe use --utf8?\n",
print_location( &str->loc ); str->str.cstr, codepage );
error( "String %s does not convert identically to Unicode and back in codepage %d. "
"Try using a Unicode string instead\n", str->str.cstr, codepage );
}
if (check_valid_utf8( str, codepage ))
{
print_location( &str->loc );
warning( "string \"%s\" seems to be UTF-8 but codepage %u is in use.\n",
str->str.cstr, codepage );
}
} }
if (!isterm) put_word(res, newstr->size); if (!isterm) put_word(res, newstr->size);
for(cnt = 0; cnt < newstr->size; cnt++) for(cnt = 0; cnt < newstr->size; cnt++)
@ -399,20 +387,22 @@ static void put_string(res_t *res, const string_t *str, int isterm, const langua
put_word(res, c); put_word(res, c);
} }
if (isterm) put_word(res, 0); if (isterm) put_word(res, 0);
free_string(newstr);
} }
else else
{ {
newstr = convert_string(str, str_char, codepage); if (str->type == str_unicode)
if (!isterm) put_byte(res, newstr->size); internal_error(__FILE__, __LINE__, "Unicode string %s in 16-bit\n",
for(cnt = 0; cnt < newstr->size; cnt++) convert_string_utf8( str, 0 ));
if (!isterm) put_byte(res, str->size);
for(cnt = 0; cnt < str->size; cnt++)
{ {
char c = newstr->str.cstr[cnt]; char c = str->str.cstr[cnt];
if (isterm && !c) break; if (isterm && !c) break;
put_byte(res, c); put_byte(res, c);
} }
if (isterm) put_byte(res, 0); if (isterm) put_byte(res, 0);
} }
free_string(newstr);
} }
/* /*
@ -1458,21 +1448,15 @@ static void versionblock2res(res_t *res, ver_block_t *blk, int level, const lang
*/ */
static res_t *versioninfo2res(name_id_t *name, versioninfo_t *ver) static res_t *versioninfo2res(name_id_t *name, versioninfo_t *ver)
{ {
int restag; static const char info[] = "VS_VERSION_INFO";
int rootblocksizetag; unsigned int i;
int valsizetag; int restag, rootblocksizetag, valsizetag, tag;
int tag;
res_t *res; res_t *res;
string_t vsvi;
ver_block_t *blk; ver_block_t *blk;
assert(name != NULL); assert(name != NULL);
assert(ver != NULL); assert(ver != NULL);
vsvi.type = str_char;
vsvi.str.cstr = xstrdup("VS_VERSION_INFO");
vsvi.size = 15; /* Excl. termination */
res = new_res(); res = new_res();
restag = put_res_header(res, WRC_RT_VERSION, NULL, name, ver->memopt, &(ver->lvc)); restag = put_res_header(res, WRC_RT_VERSION, NULL, name, ver->memopt, &(ver->lvc));
rootblocksizetag = res->size; rootblocksizetag = res->size;
@ -1480,10 +1464,15 @@ static res_t *versioninfo2res(name_id_t *name, versioninfo_t *ver)
valsizetag = res->size; valsizetag = res->size;
put_word(res, 0); /* ValueSize filled in later*/ put_word(res, 0); /* ValueSize filled in later*/
if(win32) if(win32)
{
put_word(res, 0); /* Tree-level ? */ put_word(res, 0); /* Tree-level ? */
put_string(res, &vsvi, TRUE, NULL); for (i = 0; i < sizeof(info); i++) put_word(res, info[i]);
if(win32)
put_pad(res); put_pad(res);
}
else
{
for (i = 0; i < sizeof(info); i++) put_byte(res, info[i]);
}
tag = res->size; tag = res->size;
put_dword(res, VS_FFI_SIGNATURE); put_dword(res, VS_FFI_SIGNATURE);
put_dword(res, VS_FFI_STRUCVERSION); put_dword(res, VS_FFI_STRUCVERSION);
@ -1505,8 +1494,6 @@ static res_t *versioninfo2res(name_id_t *name, versioninfo_t *ver)
versionblock2res(res, blk, 0, win32 ? ver->lvc.language : NULL); versionblock2res(res, blk, 0, win32 ? ver->lvc.language : NULL);
/* Set root block's size */ /* Set root block's size */
set_word(res, rootblocksizetag, (WORD)(res->size - rootblocksizetag)); set_word(res, rootblocksizetag, (WORD)(res->size - rootblocksizetag));
free(vsvi.str.cstr);
return end_res(res, restag); return end_res(res, restag);
} }

View File

@ -429,7 +429,7 @@ L\" {
yy_push_state(tklstr); yy_push_state(tklstr);
wbufidx = 0; wbufidx = 0;
if(!win32) if(!win32)
parser_warning("16bit resource contains unicode strings\n"); parser_error("16bit resource contains unicode strings\n");
} }
<tklstr>\"{ws}+ | <tklstr>\"{ws}+ |
<tklstr>\" { <tklstr>\" {
@ -635,12 +635,9 @@ static string_t *get_buffered_cstring(void)
} }
else /* convert to Unicode before storing */ else /* convert to Unicode before storing */
{ {
string_t *str_w = convert_string( str, str_unicode, current_codepage ); string_t *str_w = convert_string_unicode( str, current_codepage );
if (!check_unicode_conversion( str, str_w, current_codepage ))
parser_error("String %s does not convert identically to Unicode and back in codepage %d. "
"Try using a Unicode string instead", str->str.cstr, current_codepage );
if (check_valid_utf8( str, current_codepage )) if (check_valid_utf8( str, current_codepage ))
parser_warning( "string \"%s\" seems to be UTF-8 but codepage %u is in use.\n", parser_warning( "string \"%s\" seems to be UTF-8 but codepage %u is in use, maybe use --utf8?\n",
str->str.cstr, current_codepage ); str->str.cstr, current_codepage );
free_string( str ); free_string( str );
return str_w; return str_w;
@ -660,10 +657,13 @@ static string_t *get_buffered_wstring(void)
static string_t *make_string(char *s) static string_t *make_string(char *s)
{ {
string_t *str = new_string(); string_t *ret, *str = new_string();
str->size = strlen(s); str->size = strlen(s);
str->type = str_char; str->type = str_char;
str->str.cstr = xmalloc(str->size+1); str->str.cstr = xmalloc(str->size+1);
memcpy(str->str.cstr, s, str->size+1); memcpy(str->str.cstr, s, str->size+1);
return str; if (current_codepage <= 0 || !win32) return str;
ret = convert_string_unicode( str, current_codepage );
free_string( str );
return ret;
} }

View File

@ -97,32 +97,30 @@ static name_id_t *dup_name_id( name_id_t *id )
if (!id || id->type != name_str) return id; if (!id || id->type != name_str) return id;
new = new_name_id(); new = new_name_id();
*new = *id; *new = *id;
new->name.s_name = convert_string( id->name.s_name, str_unicode, 1252 ); new->name.s_name = convert_string_unicode( id->name.s_name, 1252 );
return new; return new;
} }
static char *convert_msgid_ascii( const string_t *str, int error_on_invalid_char ) static char *convert_msgid_ascii( const string_t *str, int error_on_invalid_char )
{ {
int i; int i;
string_t *newstr = convert_string( str, str_unicode, 1252 ); char *buffer = xmalloc( str->size + 1 );
char *buffer = xmalloc( newstr->size + 1 );
for (i = 0; i < newstr->size; i++) for (i = 0; i < str->size; i++)
{ {
buffer[i] = newstr->str.wstr[i]; WCHAR ch = (str->type == str_unicode ? str->str.wstr[i] : (unsigned char)str->str.cstr[i]);
if (newstr->str.wstr[i] >= 32 && newstr->str.wstr[i] <= 127) continue; buffer[i] = ch;
if (newstr->str.wstr[i] == '\t' || newstr->str.wstr[i] == '\n') continue; if (ch >= 32 && ch <= 127) continue;
if (ch == '\t' || ch == '\n') continue;
if (error_on_invalid_char) if (error_on_invalid_char)
{ {
print_location( &newstr->loc ); print_location( &str->loc );
error( "Invalid character %04x in source string\n", newstr->str.wstr[i] ); error( "Invalid character %04x in source string\n", ch );
} }
free( buffer); free( buffer);
free_string( newstr );
return NULL; return NULL;
} }
buffer[i] = 0; buffer[i] = 0;
free_string( newstr );
return buffer; return buffer;
} }
@ -1205,7 +1203,7 @@ static string_t *translate_string( string_t *str, int *found )
char *buffer, *msgid, *context; char *buffer, *msgid, *context;
if (!str->size || !(buffer = convert_msgid_ascii( str, 0 ))) if (!str->size || !(buffer = convert_msgid_ascii( str, 0 )))
return convert_string( str, str_unicode, 1252 ); return convert_string_unicode( str, 1252 );
msgid = buffer; msgid = buffer;
context = get_message_context( &msgid ); context = get_message_context( &msgid );
@ -1216,7 +1214,7 @@ static string_t *translate_string( string_t *str, int *found )
ustr.str.cstr = (char *)transl; ustr.str.cstr = (char *)transl;
ustr.loc = str->loc; ustr.loc = str->loc;
new = convert_string( &ustr, str_unicode, CP_UTF8 ); new = convert_string_unicode( &ustr, CP_UTF8 );
free( buffer ); free( buffer );
return new; return new;
} }
@ -1310,7 +1308,7 @@ static void translate_dialog( dialog_t *dlg, dialog_t *new, int *found )
new->font = xmalloc( sizeof(*dlg->font) ); new->font = xmalloc( sizeof(*dlg->font) );
*new->font = *dlg->font; *new->font = *dlg->font;
if (uses_larger_font( new->lvc.language )) new->font->size++; if (uses_larger_font( new->lvc.language )) new->font->size++;
new->font->name = convert_string( dlg->font->name, str_unicode, 1252 ); new->font->name = convert_string_unicode( dlg->font->name, 1252 );
} }
new->controls = translate_controls( dlg->controls, found ); new->controls = translate_controls( dlg->controls, found );
} }
@ -1383,12 +1381,12 @@ static ver_value_t *translate_stringfileinfo( ver_value_t *val, language_t *lang
{ {
if (!strcasecmp( name, english_block_name[i] )) if (!strcasecmp( name, english_block_name[i] ))
{ {
string_t *str; string_t str;
str = new_string(); str.type = str_char;
str->type = str_char; str.size = strlen( block_name[i] ) + 1;
str->size = strlen( block_name[i] ) + 1; str.str.cstr = block_name[i];
str->str.cstr = xstrdup( block_name[i] ); str.loc = blk->name->loc;
new_blk->name = str; new_blk->name = convert_string_unicode( &str, CP_UTF8 );
new_blk->values = translate_langcharset_values( blk->values, lang, found ); new_blk->values = translate_langcharset_values( blk->values, lang, found );
} }
} }

View File

@ -301,11 +301,6 @@ static int wrc_mbstowcs( int codepage, int flags, const char *src, int srclen, W
return MultiByteToWideChar( codepage, flags, src, srclen, dst, dstlen ); return MultiByteToWideChar( codepage, flags, src, srclen, dst, dstlen );
} }
static int wrc_wcstombs( int codepage, int flags, const WCHAR *src, int srclen, char *dst, int dstlen )
{
return WideCharToMultiByte( codepage, flags, src, srclen, dst, dstlen, NULL, NULL );
}
#else /* _WIN32 */ #else /* _WIN32 */
#include "wine/unicode.h" #include "wine/unicode.h"
@ -320,11 +315,6 @@ static int wrc_mbstowcs( int codepage, int flags, const char *src, int srclen, W
return wine_cp_mbstowcs( wine_cp_get_table( codepage ), flags, src, srclen, dst, dstlen ); return wine_cp_mbstowcs( wine_cp_get_table( codepage ), flags, src, srclen, dst, dstlen );
} }
static int wrc_wcstombs( int codepage, int flags, const WCHAR *src, int srclen, char *dst, int dstlen )
{
return wine_cp_wcstombs( wine_cp_get_table( codepage ), flags, src, srclen, dst, dstlen, NULL, NULL );
}
#endif /* _WIN32 */ #endif /* _WIN32 */
static WCHAR *utf8_to_unicode( const char *src, int srclen, int *dstlen ) static WCHAR *utf8_to_unicode( const char *src, int srclen, int *dstlen )
@ -448,19 +438,18 @@ static char *unicode_to_utf8( const WCHAR *src, int srclen, int *dstlen )
return ret; return ret;
} }
string_t *convert_string(const string_t *str, enum str_e type, int codepage) string_t *convert_string_unicode( const string_t *str, int codepage )
{ {
string_t *ret = xmalloc(sizeof(*ret)); string_t *ret = xmalloc(sizeof(*ret));
int res; int res;
ret->type = type; ret->type = str_unicode;
ret->loc = str->loc; ret->loc = str->loc;
if (!codepage && str->type != type) if (str->type == str_char)
parser_error( "Current language is Unicode only, cannot convert string" );
if((str->type == str_char) && (type == str_unicode))
{ {
if (!codepage) parser_error( "Current language is Unicode only, cannot convert string" );
if (codepage == CP_UTF8) if (codepage == CP_UTF8)
ret->str.wstr = utf8_to_unicode( str->str.cstr, str->size, &ret->size ); ret->str.wstr = utf8_to_unicode( str->str.cstr, str->size, &ret->size );
else else
@ -475,38 +464,20 @@ string_t *convert_string(const string_t *str, enum str_e type, int codepage)
ret->str.wstr[ret->size] = 0; ret->str.wstr[ret->size] = 0;
} }
} }
else if((str->type == str_unicode) && (type == str_char)) else
{
if (codepage == CP_UTF8)
ret->str.cstr = unicode_to_utf8( str->str.wstr, str->size, &ret->size );
else
{
ret->str.cstr = xmalloc( str->size * 2 + 1 );
ret->size = wrc_wcstombs( codepage, 0, str->str.wstr, str->size, ret->str.cstr, str->size * 2 );
ret->str.cstr[ret->size] = 0;
}
}
else if(str->type == str_unicode)
{ {
ret->size = str->size; ret->size = str->size;
ret->str.wstr = xmalloc(sizeof(WCHAR)*(ret->size+1)); ret->str.wstr = xmalloc(sizeof(WCHAR)*(ret->size+1));
memcpy( ret->str.wstr, str->str.wstr, ret->size * sizeof(WCHAR) ); memcpy( ret->str.wstr, str->str.wstr, ret->size * sizeof(WCHAR) );
ret->str.wstr[ret->size] = 0; ret->str.wstr[ret->size] = 0;
} }
else /* str->type == str_char */
{
ret->size = str->size;
ret->str.cstr = xmalloc( ret->size + 1 );
memcpy( ret->str.cstr, str->str.cstr, ret->size );
ret->str.cstr[ret->size] = 0;
}
return ret; return ret;
} }
char *convert_string_utf8( const string_t *str, int codepage ) char *convert_string_utf8( const string_t *str, int codepage )
{ {
int len; int len;
string_t *wstr = convert_string( str, str_unicode, codepage ); string_t *wstr = convert_string_unicode( str, codepage );
char *ret = unicode_to_utf8( wstr->str.wstr, wstr->size, &len ); char *ret = unicode_to_utf8( wstr->str.wstr, wstr->size, &len );
free_string( wstr ); free_string( wstr );
return ret; return ret;
@ -548,32 +519,6 @@ done:
return 0; return 0;
} }
int check_unicode_conversion( const string_t *str_a, const string_t *str_w, int codepage )
{
int ok;
string_t *teststr = convert_string( str_w, str_char, codepage );
ok = (teststr->size == str_a->size && !memcmp( teststr->str.cstr, str_a->str.cstr, str_a->size ));
if (!ok)
{
int i;
fprintf( stderr, "Source: %s", str_a->str.cstr );
for (i = 0; i < str_a->size; i++)
fprintf( stderr, " %02x", (unsigned char)str_a->str.cstr[i] );
fprintf( stderr, "\nUnicode: " );
for (i = 0; i < str_w->size; i++)
fprintf( stderr, " %04x", str_w->str.wstr[i] );
fprintf( stderr, "\nBack: %s", teststr->str.cstr );
for (i = 0; i < teststr->size; i++)
fprintf( stderr, " %02x", (unsigned char)teststr->str.cstr[i] );
fprintf( stderr, "\n" );
}
free_string( teststr );
return ok;
}
struct lang2cp struct lang2cp
{ {

View File

@ -46,11 +46,10 @@ void chat(const char *s, ...) __attribute__((format (printf, 1, 2)));
char *dup_basename(const char *name, const char *ext); char *dup_basename(const char *name, const char *ext);
int compare_name_id(const name_id_t *n1, const name_id_t *n2); int compare_name_id(const name_id_t *n1, const name_id_t *n2);
string_t *convert_string(const string_t *str, enum str_e type, int codepage); string_t *convert_string_unicode( const string_t *str, int codepage );
char *convert_string_utf8( const string_t *str, int codepage ); char *convert_string_utf8( const string_t *str, int codepage );
void free_string( string_t *str ); void free_string( string_t *str );
int check_valid_utf8( const string_t *str, int codepage ); int check_valid_utf8( const string_t *str, int codepage );
int check_unicode_conversion( const string_t *str_a, const string_t *str_w, int codepage );
int get_language_codepage( unsigned short lang, unsigned short sublang ); int get_language_codepage( unsigned short lang, unsigned short sublang );
int is_valid_codepage(int cp); int is_valid_codepage(int cp);