wrc: Avoid converting Unicode strings back to Ansi.

Signed-off-by: Alexandre Julliard <julliard@winehq.org>
This commit is contained in:
Alexandre Julliard 2020-02-07 08:47:00 +01:00
parent 1d8f4fe4a0
commit fe888bbbb6
5 changed files with 62 additions and 133 deletions

View File

@ -363,34 +363,22 @@ static int parse_accel_string( const string_t *key, int flags )
static void put_string(res_t *res, const string_t *str, int isterm, const language_t *lang)
{
int cnt, codepage;
string_t *newstr;
assert(res != NULL);
assert(str != NULL);
if (lang) codepage = get_language_codepage( lang->id, lang->sub );
else codepage = get_language_codepage( 0, 0 );
assert( codepage != -1 );
if (win32)
{
newstr = convert_string(str, str_unicode, codepage);
if (str->type == str_char)
{
if (!check_unicode_conversion( str, newstr, codepage ))
string_t *newstr;
if (lang) codepage = get_language_codepage( lang->id, lang->sub );
else codepage = get_language_codepage( 0, 0 );
assert( codepage != -1 );
newstr = convert_string_unicode( str, codepage );
if (str->type == str_char && check_valid_utf8( str, codepage ))
{
print_location( &str->loc );
error( "String %s does not convert identically to Unicode and back in codepage %d. "
"Try using a Unicode string instead\n", str->str.cstr, codepage );
}
if (check_valid_utf8( str, codepage ))
{
print_location( &str->loc );
warning( "string \"%s\" seems to be UTF-8 but codepage %u is in use.\n",
warning( "string \"%s\" seems to be UTF-8 but codepage %u is in use, maybe use --utf8?\n",
str->str.cstr, codepage );
}
}
if (!isterm) put_word(res, newstr->size);
for(cnt = 0; cnt < newstr->size; cnt++)
{
@ -399,20 +387,22 @@ static void put_string(res_t *res, const string_t *str, int isterm, const langua
put_word(res, c);
}
if (isterm) put_word(res, 0);
free_string(newstr);
}
else
{
newstr = convert_string(str, str_char, codepage);
if (!isterm) put_byte(res, newstr->size);
for(cnt = 0; cnt < newstr->size; cnt++)
if (str->type == str_unicode)
internal_error(__FILE__, __LINE__, "Unicode string %s in 16-bit\n",
convert_string_utf8( str, 0 ));
if (!isterm) put_byte(res, str->size);
for(cnt = 0; cnt < str->size; cnt++)
{
char c = newstr->str.cstr[cnt];
char c = str->str.cstr[cnt];
if (isterm && !c) break;
put_byte(res, c);
}
if (isterm) put_byte(res, 0);
}
free_string(newstr);
}
/*
@ -1458,21 +1448,15 @@ static void versionblock2res(res_t *res, ver_block_t *blk, int level, const lang
*/
static res_t *versioninfo2res(name_id_t *name, versioninfo_t *ver)
{
int restag;
int rootblocksizetag;
int valsizetag;
int tag;
static const char info[] = "VS_VERSION_INFO";
unsigned int i;
int restag, rootblocksizetag, valsizetag, tag;
res_t *res;
string_t vsvi;
ver_block_t *blk;
assert(name != NULL);
assert(ver != NULL);
vsvi.type = str_char;
vsvi.str.cstr = xstrdup("VS_VERSION_INFO");
vsvi.size = 15; /* Excl. termination */
res = new_res();
restag = put_res_header(res, WRC_RT_VERSION, NULL, name, ver->memopt, &(ver->lvc));
rootblocksizetag = res->size;
@ -1480,10 +1464,15 @@ static res_t *versioninfo2res(name_id_t *name, versioninfo_t *ver)
valsizetag = res->size;
put_word(res, 0); /* ValueSize filled in later*/
if(win32)
{
put_word(res, 0); /* Tree-level ? */
put_string(res, &vsvi, TRUE, NULL);
if(win32)
for (i = 0; i < sizeof(info); i++) put_word(res, info[i]);
put_pad(res);
}
else
{
for (i = 0; i < sizeof(info); i++) put_byte(res, info[i]);
}
tag = res->size;
put_dword(res, VS_FFI_SIGNATURE);
put_dword(res, VS_FFI_STRUCVERSION);
@ -1505,8 +1494,6 @@ static res_t *versioninfo2res(name_id_t *name, versioninfo_t *ver)
versionblock2res(res, blk, 0, win32 ? ver->lvc.language : NULL);
/* Set root block's size */
set_word(res, rootblocksizetag, (WORD)(res->size - rootblocksizetag));
free(vsvi.str.cstr);
return end_res(res, restag);
}

View File

@ -429,7 +429,7 @@ L\" {
yy_push_state(tklstr);
wbufidx = 0;
if(!win32)
parser_warning("16bit resource contains unicode strings\n");
parser_error("16bit resource contains unicode strings\n");
}
<tklstr>\"{ws}+ |
<tklstr>\" {
@ -635,12 +635,9 @@ static string_t *get_buffered_cstring(void)
}
else /* convert to Unicode before storing */
{
string_t *str_w = convert_string( str, str_unicode, current_codepage );
if (!check_unicode_conversion( str, str_w, current_codepage ))
parser_error("String %s does not convert identically to Unicode and back in codepage %d. "
"Try using a Unicode string instead", str->str.cstr, current_codepage );
string_t *str_w = convert_string_unicode( str, current_codepage );
if (check_valid_utf8( str, current_codepage ))
parser_warning( "string \"%s\" seems to be UTF-8 but codepage %u is in use.\n",
parser_warning( "string \"%s\" seems to be UTF-8 but codepage %u is in use, maybe use --utf8?\n",
str->str.cstr, current_codepage );
free_string( str );
return str_w;
@ -660,10 +657,13 @@ static string_t *get_buffered_wstring(void)
static string_t *make_string(char *s)
{
string_t *str = new_string();
string_t *ret, *str = new_string();
str->size = strlen(s);
str->type = str_char;
str->str.cstr = xmalloc(str->size+1);
memcpy(str->str.cstr, s, str->size+1);
return str;
if (current_codepage <= 0 || !win32) return str;
ret = convert_string_unicode( str, current_codepage );
free_string( str );
return ret;
}

View File

@ -97,32 +97,30 @@ static name_id_t *dup_name_id( name_id_t *id )
if (!id || id->type != name_str) return id;
new = new_name_id();
*new = *id;
new->name.s_name = convert_string( id->name.s_name, str_unicode, 1252 );
new->name.s_name = convert_string_unicode( id->name.s_name, 1252 );
return new;
}
static char *convert_msgid_ascii( const string_t *str, int error_on_invalid_char )
{
int i;
string_t *newstr = convert_string( str, str_unicode, 1252 );
char *buffer = xmalloc( newstr->size + 1 );
char *buffer = xmalloc( str->size + 1 );
for (i = 0; i < newstr->size; i++)
for (i = 0; i < str->size; i++)
{
buffer[i] = newstr->str.wstr[i];
if (newstr->str.wstr[i] >= 32 && newstr->str.wstr[i] <= 127) continue;
if (newstr->str.wstr[i] == '\t' || newstr->str.wstr[i] == '\n') continue;
WCHAR ch = (str->type == str_unicode ? str->str.wstr[i] : (unsigned char)str->str.cstr[i]);
buffer[i] = ch;
if (ch >= 32 && ch <= 127) continue;
if (ch == '\t' || ch == '\n') continue;
if (error_on_invalid_char)
{
print_location( &newstr->loc );
error( "Invalid character %04x in source string\n", newstr->str.wstr[i] );
print_location( &str->loc );
error( "Invalid character %04x in source string\n", ch );
}
free( buffer);
free_string( newstr );
return NULL;
}
buffer[i] = 0;
free_string( newstr );
return buffer;
}
@ -1205,7 +1203,7 @@ static string_t *translate_string( string_t *str, int *found )
char *buffer, *msgid, *context;
if (!str->size || !(buffer = convert_msgid_ascii( str, 0 )))
return convert_string( str, str_unicode, 1252 );
return convert_string_unicode( str, 1252 );
msgid = buffer;
context = get_message_context( &msgid );
@ -1216,7 +1214,7 @@ static string_t *translate_string( string_t *str, int *found )
ustr.str.cstr = (char *)transl;
ustr.loc = str->loc;
new = convert_string( &ustr, str_unicode, CP_UTF8 );
new = convert_string_unicode( &ustr, CP_UTF8 );
free( buffer );
return new;
}
@ -1310,7 +1308,7 @@ static void translate_dialog( dialog_t *dlg, dialog_t *new, int *found )
new->font = xmalloc( sizeof(*dlg->font) );
*new->font = *dlg->font;
if (uses_larger_font( new->lvc.language )) new->font->size++;
new->font->name = convert_string( dlg->font->name, str_unicode, 1252 );
new->font->name = convert_string_unicode( dlg->font->name, 1252 );
}
new->controls = translate_controls( dlg->controls, found );
}
@ -1383,12 +1381,12 @@ static ver_value_t *translate_stringfileinfo( ver_value_t *val, language_t *lang
{
if (!strcasecmp( name, english_block_name[i] ))
{
string_t *str;
str = new_string();
str->type = str_char;
str->size = strlen( block_name[i] ) + 1;
str->str.cstr = xstrdup( block_name[i] );
new_blk->name = str;
string_t str;
str.type = str_char;
str.size = strlen( block_name[i] ) + 1;
str.str.cstr = block_name[i];
str.loc = blk->name->loc;
new_blk->name = convert_string_unicode( &str, CP_UTF8 );
new_blk->values = translate_langcharset_values( blk->values, lang, found );
}
}

View File

@ -301,11 +301,6 @@ static int wrc_mbstowcs( int codepage, int flags, const char *src, int srclen, W
return MultiByteToWideChar( codepage, flags, src, srclen, dst, dstlen );
}
static int wrc_wcstombs( int codepage, int flags, const WCHAR *src, int srclen, char *dst, int dstlen )
{
return WideCharToMultiByte( codepage, flags, src, srclen, dst, dstlen, NULL, NULL );
}
#else /* _WIN32 */
#include "wine/unicode.h"
@ -320,11 +315,6 @@ static int wrc_mbstowcs( int codepage, int flags, const char *src, int srclen, W
return wine_cp_mbstowcs( wine_cp_get_table( codepage ), flags, src, srclen, dst, dstlen );
}
static int wrc_wcstombs( int codepage, int flags, const WCHAR *src, int srclen, char *dst, int dstlen )
{
return wine_cp_wcstombs( wine_cp_get_table( codepage ), flags, src, srclen, dst, dstlen, NULL, NULL );
}
#endif /* _WIN32 */
static WCHAR *utf8_to_unicode( const char *src, int srclen, int *dstlen )
@ -448,19 +438,18 @@ static char *unicode_to_utf8( const WCHAR *src, int srclen, int *dstlen )
return ret;
}
string_t *convert_string(const string_t *str, enum str_e type, int codepage)
string_t *convert_string_unicode( const string_t *str, int codepage )
{
string_t *ret = xmalloc(sizeof(*ret));
int res;
ret->type = type;
ret->type = str_unicode;
ret->loc = str->loc;
if (!codepage && str->type != type)
parser_error( "Current language is Unicode only, cannot convert string" );
if((str->type == str_char) && (type == str_unicode))
if (str->type == str_char)
{
if (!codepage) parser_error( "Current language is Unicode only, cannot convert string" );
if (codepage == CP_UTF8)
ret->str.wstr = utf8_to_unicode( str->str.cstr, str->size, &ret->size );
else
@ -475,38 +464,20 @@ string_t *convert_string(const string_t *str, enum str_e type, int codepage)
ret->str.wstr[ret->size] = 0;
}
}
else if((str->type == str_unicode) && (type == str_char))
{
if (codepage == CP_UTF8)
ret->str.cstr = unicode_to_utf8( str->str.wstr, str->size, &ret->size );
else
{
ret->str.cstr = xmalloc( str->size * 2 + 1 );
ret->size = wrc_wcstombs( codepage, 0, str->str.wstr, str->size, ret->str.cstr, str->size * 2 );
ret->str.cstr[ret->size] = 0;
}
}
else if(str->type == str_unicode)
{
ret->size = str->size;
ret->str.wstr = xmalloc(sizeof(WCHAR)*(ret->size+1));
memcpy( ret->str.wstr, str->str.wstr, ret->size * sizeof(WCHAR) );
ret->str.wstr[ret->size] = 0;
}
else /* str->type == str_char */
{
ret->size = str->size;
ret->str.cstr = xmalloc( ret->size + 1 );
memcpy( ret->str.cstr, str->str.cstr, ret->size );
ret->str.cstr[ret->size] = 0;
}
return ret;
}
char *convert_string_utf8( const string_t *str, int codepage )
{
int len;
string_t *wstr = convert_string( str, str_unicode, codepage );
string_t *wstr = convert_string_unicode( str, codepage );
char *ret = unicode_to_utf8( wstr->str.wstr, wstr->size, &len );
free_string( wstr );
return ret;
@ -548,32 +519,6 @@ done:
return 0;
}
int check_unicode_conversion( const string_t *str_a, const string_t *str_w, int codepage )
{
int ok;
string_t *teststr = convert_string( str_w, str_char, codepage );
ok = (teststr->size == str_a->size && !memcmp( teststr->str.cstr, str_a->str.cstr, str_a->size ));
if (!ok)
{
int i;
fprintf( stderr, "Source: %s", str_a->str.cstr );
for (i = 0; i < str_a->size; i++)
fprintf( stderr, " %02x", (unsigned char)str_a->str.cstr[i] );
fprintf( stderr, "\nUnicode: " );
for (i = 0; i < str_w->size; i++)
fprintf( stderr, " %04x", str_w->str.wstr[i] );
fprintf( stderr, "\nBack: %s", teststr->str.cstr );
for (i = 0; i < teststr->size; i++)
fprintf( stderr, " %02x", (unsigned char)teststr->str.cstr[i] );
fprintf( stderr, "\n" );
}
free_string( teststr );
return ok;
}
struct lang2cp
{

View File

@ -46,11 +46,10 @@ void chat(const char *s, ...) __attribute__((format (printf, 1, 2)));
char *dup_basename(const char *name, const char *ext);
int compare_name_id(const name_id_t *n1, const name_id_t *n2);
string_t *convert_string(const string_t *str, enum str_e type, int codepage);
string_t *convert_string_unicode( const string_t *str, int codepage );
char *convert_string_utf8( const string_t *str, int codepage );
void free_string( string_t *str );
int check_valid_utf8( const string_t *str, int codepage );
int check_unicode_conversion( const string_t *str_a, const string_t *str_w, int codepage );
int get_language_codepage( unsigned short lang, unsigned short sublang );
int is_valid_codepage(int cp);