From be7558fc30f301f891ee8dfc6fba3bf569c44913 Mon Sep 17 00:00:00 2001 From: Alexandre Julliard Date: Thu, 20 Aug 2009 15:33:03 +0200 Subject: [PATCH] wrc: Print a warning when encountering utf8 strings in non-utf8 codepage. --- tools/wrc/genres.c | 3 +++ tools/wrc/parser.l | 3 +++ tools/wrc/utils.c | 23 +++++++++++++++++++++++ tools/wrc/utils.h | 1 + tools/wrc/wrc.c | 3 +++ tools/wrc/wrc.h | 1 + 6 files changed, 34 insertions(+) diff --git a/tools/wrc/genres.c b/tools/wrc/genres.c index e4317d5989a..131f6f79e50 100644 --- a/tools/wrc/genres.c +++ b/tools/wrc/genres.c @@ -314,6 +314,9 @@ static void put_string(res_t *res, const string_t *str, enum str_e type, int ist if (!check_unicode_conversion( str, newstr, codepage )) error( "String %s does not convert identically to Unicode and back in codepage %d. " "Try using a Unicode string instead\n", str->str.cstr, codepage ); + if (check_valid_utf8( str, codepage )) + warning( "string \"%s\" seems to be UTF-8 but codepage %u is in use.\n", + str->str.cstr, codepage ); } if (!isterm) put_word(res, newstr->size); for(cnt = 0; cnt < newstr->size; cnt++) diff --git a/tools/wrc/parser.l b/tools/wrc/parser.l index cafa35ad587..8ed04459429 100644 --- a/tools/wrc/parser.l +++ b/tools/wrc/parser.l @@ -637,6 +637,9 @@ static string_t *get_buffered_cstring(void) if (!check_unicode_conversion( str, str_w, current_codepage )) parser_error("String %s does not convert identically to Unicode and back in codepage %d. " "Try using a Unicode string instead", str->str.cstr, current_codepage ); + if (check_valid_utf8( str, current_codepage )) + parser_warning( "string \"%s\" seems to be UTF-8 but codepage %u is in use.", + str->str.cstr, current_codepage ); free_string( str ); return str_w; } diff --git a/tools/wrc/utils.c b/tools/wrc/utils.c index b6688af4abb..d3b8cf722f4 100644 --- a/tools/wrc/utils.c +++ b/tools/wrc/utils.c @@ -311,6 +311,29 @@ void free_string(string_t *str) free( str ); } +/* check if the string is valid utf8 despite a different codepage being in use */ +int check_valid_utf8( const string_t *str, int codepage ) +{ + unsigned int i; + + if (!check_utf8) return 0; + if (!codepage) return 0; + if (!wine_cp_get_table( codepage )) return 0; + + for (i = 0; i < str->size; i++) + { + if ((unsigned char)str->str.cstr[i] >= 0xf5) goto done; + if ((unsigned char)str->str.cstr[i] >= 0xc2) break; + if ((unsigned char)str->str.cstr[i] >= 0x80) goto done; + } + if (i == str->size) return 0; /* no 8-bit chars at all */ + + if (wine_utf8_mbstowcs( MB_ERR_INVALID_CHARS, str->str.cstr, str->size, NULL, 0 ) >= 0) return 1; + +done: + check_utf8 = 0; /* at least one 8-bit non-utf8 string found, stop checking */ + return 0; +} int check_unicode_conversion( const string_t *str_a, const string_t *str_w, int codepage ) { diff --git a/tools/wrc/utils.h b/tools/wrc/utils.h index 09144e79fd0..ced55abc3af 100644 --- a/tools/wrc/utils.h +++ b/tools/wrc/utils.h @@ -45,6 +45,7 @@ char *dup_basename(const char *name, const char *ext); int compare_name_id(const name_id_t *n1, const name_id_t *n2); string_t *convert_string(const string_t *str, enum str_e type, int codepage); void free_string( string_t *str ); +int check_valid_utf8( const string_t *str, int codepage ); int check_unicode_conversion( const string_t *str_a, const string_t *str_w, int codepage ); int get_language_codepage( unsigned short lang, unsigned short sublang ); diff --git a/tools/wrc/wrc.c b/tools/wrc/wrc.c index 5879d92c1f7..7dd91fb0f98 100644 --- a/tools/wrc/wrc.c +++ b/tools/wrc/wrc.c @@ -155,6 +155,8 @@ int preprocess_only = 0; */ int no_preprocess = 0; +int check_utf8 = 1; /* whether to check for valid utf8 */ + static int verify_translations_mode; char *output_name = NULL; /* The name given by the -o option */ @@ -292,6 +294,7 @@ static int load_file( const char *input_name, const char *output_name ) /* Reset the language */ currentlanguage = dup_language( defaultlanguage ); + check_utf8 = 1; /* Go from .rc to .res */ chat("Starting parse\n"); diff --git a/tools/wrc/wrc.h b/tools/wrc/wrc.h index de8929a56e7..02835469337 100644 --- a/tools/wrc/wrc.h +++ b/tools/wrc/wrc.h @@ -43,6 +43,7 @@ extern int pedantic; extern int byteorder; extern int preprocess_only; extern int no_preprocess; +extern int check_utf8; extern char *output_name; extern char *input_name;