From 6659308fe9406502f194e0cbdf760b2df31b2cc5 Mon Sep 17 00:00:00 2001 From: Alexandre Julliard Date: Sat, 10 Jun 2000 04:29:16 +0000 Subject: [PATCH] Added support for WC_NO_BEST_FIT_CHAR and default char parameters in cp_wcstombs. Minor cosmetic fixes. --- include/wine/unicode.h | 23 +++++++++- include/winnls.h | 10 ++-- unicode/c_037.c | 2 +- unicode/c_10000.c | 2 +- unicode/c_10006.c | 2 +- unicode/c_10007.c | 2 +- unicode/c_10029.c | 2 +- unicode/c_1006.c | 2 +- unicode/c_10079.c | 2 +- unicode/c_10081.c | 2 +- unicode/c_1026.c | 2 +- unicode/c_1250.c | 2 +- unicode/c_1251.c | 2 +- unicode/c_1252.c | 2 +- unicode/c_1253.c | 2 +- unicode/c_1254.c | 2 +- unicode/c_1255.c | 2 +- unicode/c_1256.c | 2 +- unicode/c_1257.c | 2 +- unicode/c_1258.c | 2 +- unicode/c_20866.c | 2 +- unicode/c_28591.c | 2 +- unicode/c_28592.c | 2 +- unicode/c_28593.c | 2 +- unicode/c_28594.c | 2 +- unicode/c_28595.c | 2 +- unicode/c_28596.c | 2 +- unicode/c_28597.c | 2 +- unicode/c_28598.c | 2 +- unicode/c_28599.c | 2 +- unicode/c_424.c | 2 +- unicode/c_437.c | 2 +- unicode/c_500.c | 2 +- unicode/c_737.c | 2 +- unicode/c_775.c | 2 +- unicode/c_850.c | 2 +- unicode/c_852.c | 2 +- unicode/c_855.c | 2 +- unicode/c_856.c | 2 +- unicode/c_857.c | 2 +- unicode/c_860.c | 2 +- unicode/c_861.c | 2 +- unicode/c_862.c | 2 +- unicode/c_863.c | 2 +- unicode/c_864.c | 2 +- unicode/c_865.c | 2 +- unicode/c_866.c | 2 +- unicode/c_869.c | 2 +- unicode/c_874.c | 2 +- unicode/c_875.c | 2 +- unicode/c_878.c | 2 +- unicode/c_932.c | 4 +- unicode/c_936.c | 4 +- unicode/c_949.c | 4 +- unicode/c_950.c | 4 +- unicode/cpmap.pl | 6 +-- unicode/mbtowc.c | 7 ++- unicode/wctomb.c | 102 ++++++++++++++++++++++++++++++++++++++++- 58 files changed, 190 insertions(+), 72 deletions(-) diff --git a/include/wine/unicode.h b/include/wine/unicode.h index d8d65270435..6c81470d417 100644 --- a/include/wine/unicode.h +++ b/include/wine/unicode.h @@ -12,7 +12,7 @@ struct cp_info { unsigned int codepage; /* codepage id */ unsigned int char_size; /* char size (1 or 2 bytes) */ - char def_char[2]; /* default char value */ + unsigned short def_char; /* default char value (can be double-byte) */ unsigned short def_unicode_char; /* default Unicode char value */ const char *name; /* code page name */ }; @@ -50,11 +50,30 @@ extern int cp_mbstowcs( const union cptable *table, int flags, unsigned short *dst, int dstlen ); extern int cp_wcstombs( const union cptable *table, int flags, const unsigned short *src, int srclen, - char *dst, int dstlen ); + char *dst, int dstlen, const char *defchar, int *used ); + static inline int is_dbcs_leadbyte( const union cptable *table, unsigned char ch ) { return (table->info.char_size == 2) && (table->dbcs.cp2uni_leadbytes[ch]); } +static inline unsigned int strlenW( const unsigned short *str ) +{ +#if defined(__i386__) && defined(__GNUC__) + int dummy, res; + __asm__( "cld\n\t" + "repne\n\t" + "scasw\n\t" + "notl %0" + : "=c" (res), "=&D" (dummy) + : "0" (0xffffffff), "1" (str), "a" (0) ); + return res - 1; +#else + const unsigned short *s = str; + while (*s) s++; + return s - str; +#endif +} + #endif /* __WINE_UNICODE_H */ diff --git a/include/winnls.h b/include/winnls.h index 1be65201af4..485a2b7c1ed 100644 --- a/include/winnls.h +++ b/include/winnls.h @@ -153,11 +153,11 @@ #define CP_UTF7 65000 #define CP_UTF8 65001 -#define WC_DEFAULTCHECK 0x00000100 -#define WC_COMPOSITECHECK 0x00000200 -#define WC_DISCARDNS 0x00000010 -#define WC_SEPCHARS 0x00000020 -#define WC_DEFAULTCHAR 0x00000040 +#define WC_DISCARDNS 0x00000010 +#define WC_SEPCHARS 0x00000020 +#define WC_DEFAULTCHAR 0x00000040 +#define WC_COMPOSITECHECK 0x00000200 +#define WC_NO_BEST_FIT_CHARS 0x00000400 #define MAKELCID(l, s) (MAKELONG(l, s)) diff --git a/unicode/c_037.c b/unicode/c_037.c index 6a7049f4a46..aefe46a1fba 100644 --- a/unicode/c_037.c +++ b/unicode/c_037.c @@ -643,7 +643,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_037 = { - { 37, 1, { 0x3f, 0x00 }, 0x003f, "IBM EBCDIC US Canada" }, + { 37, 1, 0x003f, 0x003f, "IBM EBCDIC US Canada" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_10000.c b/unicode/c_10000.c index ee3493ee6e2..4e53c0c9570 100644 --- a/unicode/c_10000.c +++ b/unicode/c_10000.c @@ -643,7 +643,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_10000 = { - { 10000, 1, { 0x3f, 0x00 }, 0x003f, "Mac Roman" }, + { 10000, 1, 0x003f, 0x003f, "Mac Roman" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_10006.c b/unicode/c_10006.c index 02d3b7435e9..05d5295983d 100644 --- a/unicode/c_10006.c +++ b/unicode/c_10006.c @@ -643,7 +643,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_10006 = { - { 10006, 1, { 0x3f, 0x00 }, 0x003f, "Mac Greek" }, + { 10006, 1, 0x003f, 0x003f, "Mac Greek" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_10007.c b/unicode/c_10007.c index 7316ae3646e..a25946dafdb 100644 --- a/unicode/c_10007.c +++ b/unicode/c_10007.c @@ -676,7 +676,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_10007 = { - { 10007, 1, { 0x3f, 0x00 }, 0x003f, "Mac Cyrillic" }, + { 10007, 1, 0x003f, 0x003f, "Mac Cyrillic" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_10029.c b/unicode/c_10029.c index 0d83b5a492a..8b58ce59eef 100644 --- a/unicode/c_10029.c +++ b/unicode/c_10029.c @@ -643,7 +643,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_10029 = { - { 10029, 1, { 0x3f, 0x00 }, 0x003f, "Mac Latin 2" }, + { 10029, 1, 0x003f, 0x003f, "Mac Latin 2" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_1006.c b/unicode/c_1006.c index 2a6a80cb0c4..9200b23dc0e 100644 --- a/unicode/c_1006.c +++ b/unicode/c_1006.c @@ -676,7 +676,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_1006 = { - { 1006, 1, { 0x3f, 0x00 }, 0x003f, "IBM Arabic" }, + { 1006, 1, 0x003f, 0x003f, "IBM Arabic" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_10079.c b/unicode/c_10079.c index 37c87771cb0..3ff25733a56 100644 --- a/unicode/c_10079.c +++ b/unicode/c_10079.c @@ -643,7 +643,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_10079 = { - { 10079, 1, { 0x3f, 0x00 }, 0x003f, "Mac Icelandic" }, + { 10079, 1, 0x003f, 0x003f, "Mac Icelandic" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_10081.c b/unicode/c_10081.c index d4b6158327f..181adb37d71 100644 --- a/unicode/c_10081.c +++ b/unicode/c_10081.c @@ -643,7 +643,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_10081 = { - { 10081, 1, { 0x3f, 0x00 }, 0x003f, "Mac Turkish" }, + { 10081, 1, 0x003f, 0x003f, "Mac Turkish" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_1026.c b/unicode/c_1026.c index 057f0adf1c5..ad7d6f6ca42 100644 --- a/unicode/c_1026.c +++ b/unicode/c_1026.c @@ -643,7 +643,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_1026 = { - { 1026, 1, { 0x3f, 0x00 }, 0x003f, "IBM EBCDIC Latin 5 Turkish" }, + { 1026, 1, 0x003f, 0x003f, "IBM EBCDIC Latin 5 Turkish" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_1250.c b/unicode/c_1250.c index ba634bdf217..5f850f31287 100644 --- a/unicode/c_1250.c +++ b/unicode/c_1250.c @@ -643,7 +643,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_1250 = { - { 1250, 1, { 0x3f, 0x00 }, 0x003f, "ANSI Eastern Europe" }, + { 1250, 1, 0x003f, 0x003f, "ANSI Eastern Europe" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_1251.c b/unicode/c_1251.c index 5dc694aac90..9040f5f3523 100644 --- a/unicode/c_1251.c +++ b/unicode/c_1251.c @@ -676,7 +676,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_1251 = { - { 1251, 1, { 0x3f, 0x00 }, 0x003f, "ANSI Cyrillic" }, + { 1251, 1, 0x003f, 0x003f, "ANSI Cyrillic" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_1252.c b/unicode/c_1252.c index 83eab2ccbfe..e8d657bcd18 100644 --- a/unicode/c_1252.c +++ b/unicode/c_1252.c @@ -643,7 +643,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_1252 = { - { 1252, 1, { 0x3f, 0x00 }, 0x003f, "ANSI Latin 1" }, + { 1252, 1, 0x003f, 0x003f, "ANSI Latin 1" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_1253.c b/unicode/c_1253.c index fe96df126f4..b0ee18ef607 100644 --- a/unicode/c_1253.c +++ b/unicode/c_1253.c @@ -643,7 +643,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_1253 = { - { 1253, 1, { 0x3f, 0x00 }, 0x003f, "ANSI Greek" }, + { 1253, 1, 0x003f, 0x003f, "ANSI Greek" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_1254.c b/unicode/c_1254.c index e7d95284213..cf178c39e5a 100644 --- a/unicode/c_1254.c +++ b/unicode/c_1254.c @@ -643,7 +643,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_1254 = { - { 1254, 1, { 0x3f, 0x00 }, 0x003f, "ANSI Turkish" }, + { 1254, 1, 0x003f, 0x003f, "ANSI Turkish" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_1255.c b/unicode/c_1255.c index 58362c9c09f..aa32b321a7b 100644 --- a/unicode/c_1255.c +++ b/unicode/c_1255.c @@ -676,7 +676,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_1255 = { - { 1255, 1, { 0x3f, 0x00 }, 0x003f, "ANSI Hebrew" }, + { 1255, 1, 0x003f, 0x003f, "ANSI Hebrew" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_1256.c b/unicode/c_1256.c index c456d78c304..0edf46b23f4 100644 --- a/unicode/c_1256.c +++ b/unicode/c_1256.c @@ -676,7 +676,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_1256 = { - { 1256, 1, { 0x3f, 0x00 }, 0x003f, "ANSI Arabic" }, + { 1256, 1, 0x003f, 0x003f, "ANSI Arabic" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_1257.c b/unicode/c_1257.c index 1ad60f5f790..e0aa1c65543 100644 --- a/unicode/c_1257.c +++ b/unicode/c_1257.c @@ -643,7 +643,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_1257 = { - { 1257, 1, { 0x3f, 0x00 }, 0x003f, "ANSI Baltic" }, + { 1257, 1, 0x003f, 0x003f, "ANSI Baltic" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_1258.c b/unicode/c_1258.c index b5a08b3289a..2265ae85412 100644 --- a/unicode/c_1258.c +++ b/unicode/c_1258.c @@ -643,7 +643,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_1258 = { - { 1258, 1, { 0x3f, 0x00 }, 0x003f, "ANSI/OEM Viet Nam" }, + { 1258, 1, 0x003f, 0x003f, "ANSI/OEM Viet Nam" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_20866.c b/unicode/c_20866.c index bd3e0e7cb27..a45ada40b1a 100644 --- a/unicode/c_20866.c +++ b/unicode/c_20866.c @@ -676,7 +676,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_20866 = { - { 20866, 1, { 0x3f, 0x00 }, 0x003f, "Russian KOI8" }, + { 20866, 1, 0x003f, 0x003f, "Russian KOI8" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_28591.c b/unicode/c_28591.c index 2827582b569..5c02c393ee8 100644 --- a/unicode/c_28591.c +++ b/unicode/c_28591.c @@ -643,7 +643,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_28591 = { - { 28591, 1, { 0x3f, 0x00 }, 0x003f, "ISO 8859-1 Latin 1" }, + { 28591, 1, 0x003f, 0x003f, "ISO 8859-1 Latin 1" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_28592.c b/unicode/c_28592.c index 16e8aa30a7c..3f32c70176b 100644 --- a/unicode/c_28592.c +++ b/unicode/c_28592.c @@ -643,7 +643,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_28592 = { - { 28592, 1, { 0x3f, 0x00 }, 0x003f, "ISO 8859-2 Eastern Europe" }, + { 28592, 1, 0x003f, 0x003f, "ISO 8859-2 Eastern Europe" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_28593.c b/unicode/c_28593.c index e030415c6fa..7da83c290e1 100644 --- a/unicode/c_28593.c +++ b/unicode/c_28593.c @@ -643,7 +643,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_28593 = { - { 28593, 1, { 0x3f, 0x00 }, 0x003f, "ISO 8859-3 Turkish" }, + { 28593, 1, 0x003f, 0x003f, "ISO 8859-3 Turkish" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_28594.c b/unicode/c_28594.c index 680513445ec..017a4628917 100644 --- a/unicode/c_28594.c +++ b/unicode/c_28594.c @@ -643,7 +643,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_28594 = { - { 28594, 1, { 0x3f, 0x00 }, 0x003f, "ISO 8859-4 Baltic" }, + { 28594, 1, 0x003f, 0x003f, "ISO 8859-4 Baltic" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_28595.c b/unicode/c_28595.c index 6c1c951ecff..1f2ad44573b 100644 --- a/unicode/c_28595.c +++ b/unicode/c_28595.c @@ -676,7 +676,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_28595 = { - { 28595, 1, { 0x3f, 0x00 }, 0x003f, "ISO 8859-5 Cyrillic" }, + { 28595, 1, 0x003f, 0x003f, "ISO 8859-5 Cyrillic" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_28596.c b/unicode/c_28596.c index 9d838c76eef..29317d1c08f 100644 --- a/unicode/c_28596.c +++ b/unicode/c_28596.c @@ -676,7 +676,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_28596 = { - { 28596, 1, { 0x3f, 0x00 }, 0x003f, "ISO 8859-6 Arabic" }, + { 28596, 1, 0x003f, 0x003f, "ISO 8859-6 Arabic" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_28597.c b/unicode/c_28597.c index 781e1033eaa..69144966fc5 100644 --- a/unicode/c_28597.c +++ b/unicode/c_28597.c @@ -643,7 +643,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_28597 = { - { 28597, 1, { 0x3f, 0x00 }, 0x003f, "ISO 8859-7 Greek" }, + { 28597, 1, 0x003f, 0x003f, "ISO 8859-7 Greek" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_28598.c b/unicode/c_28598.c index 7ef91388e85..dbd16fcecf8 100644 --- a/unicode/c_28598.c +++ b/unicode/c_28598.c @@ -676,7 +676,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_28598 = { - { 28598, 1, { 0x3f, 0x00 }, 0x003f, "ISO 8859-8 Hebrew" }, + { 28598, 1, 0x003f, 0x003f, "ISO 8859-8 Hebrew" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_28599.c b/unicode/c_28599.c index 7b49f419cde..39641c38ce0 100644 --- a/unicode/c_28599.c +++ b/unicode/c_28599.c @@ -643,7 +643,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_28599 = { - { 28599, 1, { 0x3f, 0x00 }, 0x003f, "ISO 8859-9 Latin 5" }, + { 28599, 1, 0x003f, 0x003f, "ISO 8859-9 Latin 5" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_424.c b/unicode/c_424.c index 86e3f9a5d01..10d3b4e7821 100644 --- a/unicode/c_424.c +++ b/unicode/c_424.c @@ -676,7 +676,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_424 = { - { 424, 1, { 0x3f, 0x00 }, 0x003f, "IBM EBCDIC Hebrew" }, + { 424, 1, 0x003f, 0x003f, "IBM EBCDIC Hebrew" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_437.c b/unicode/c_437.c index bd7b4421d82..0461b97318c 100644 --- a/unicode/c_437.c +++ b/unicode/c_437.c @@ -643,7 +643,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_437 = { - { 437, 1, { 0x3f, 0x00 }, 0x003f, "OEM United States" }, + { 437, 1, 0x003f, 0x003f, "OEM United States" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_500.c b/unicode/c_500.c index 608b79b7199..b5cd19b14aa 100644 --- a/unicode/c_500.c +++ b/unicode/c_500.c @@ -643,7 +643,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_500 = { - { 500, 1, { 0x3f, 0x00 }, 0x003f, "IBM EBCDIC International" }, + { 500, 1, 0x003f, 0x003f, "IBM EBCDIC International" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_737.c b/unicode/c_737.c index 106c48dc3fd..666805bebb7 100644 --- a/unicode/c_737.c +++ b/unicode/c_737.c @@ -643,7 +643,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_737 = { - { 737, 1, { 0x3f, 0x00 }, 0x003f, "OEM Greek 437G" }, + { 737, 1, 0x003f, 0x003f, "OEM Greek 437G" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_775.c b/unicode/c_775.c index b2cdde99475..5f2653f990b 100644 --- a/unicode/c_775.c +++ b/unicode/c_775.c @@ -643,7 +643,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_775 = { - { 775, 1, { 0x3f, 0x00 }, 0x003f, "OEM Baltic" }, + { 775, 1, 0x003f, 0x003f, "OEM Baltic" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_850.c b/unicode/c_850.c index 3c7d785f139..4110a1669fc 100644 --- a/unicode/c_850.c +++ b/unicode/c_850.c @@ -643,7 +643,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_850 = { - { 850, 1, { 0x3f, 0x00 }, 0x003f, "OEM Multilingual Latin 1" }, + { 850, 1, 0x003f, 0x003f, "OEM Multilingual Latin 1" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_852.c b/unicode/c_852.c index 92a6fb083de..e2bab90ef8b 100644 --- a/unicode/c_852.c +++ b/unicode/c_852.c @@ -643,7 +643,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_852 = { - { 852, 1, { 0x3f, 0x00 }, 0x003f, "OEM Slovak Latin 2" }, + { 852, 1, 0x003f, 0x003f, "OEM Slovak Latin 2" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_855.c b/unicode/c_855.c index 208290aff56..c1bb3cba254 100644 --- a/unicode/c_855.c +++ b/unicode/c_855.c @@ -676,7 +676,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_855 = { - { 855, 1, { 0x3f, 0x00 }, 0x003f, "OEM Cyrillic" }, + { 855, 1, 0x003f, 0x003f, "OEM Cyrillic" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_856.c b/unicode/c_856.c index ce04d354b64..2c700041f07 100644 --- a/unicode/c_856.c +++ b/unicode/c_856.c @@ -676,7 +676,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_856 = { - { 856, 1, { 0x3f, 0x00 }, 0x003f, "Hebrew PC" }, + { 856, 1, 0x003f, 0x003f, "Hebrew PC" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_857.c b/unicode/c_857.c index dc12967512b..2faa29b85f0 100644 --- a/unicode/c_857.c +++ b/unicode/c_857.c @@ -643,7 +643,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_857 = { - { 857, 1, { 0x3f, 0x00 }, 0x003f, "OEM Turkish" }, + { 857, 1, 0x003f, 0x003f, "OEM Turkish" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_860.c b/unicode/c_860.c index be811f89f97..01442a9fe2a 100644 --- a/unicode/c_860.c +++ b/unicode/c_860.c @@ -643,7 +643,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_860 = { - { 860, 1, { 0x3f, 0x00 }, 0x003f, "OEM Portuguese" }, + { 860, 1, 0x003f, 0x003f, "OEM Portuguese" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_861.c b/unicode/c_861.c index 1fb8cd67335..11e1db56cc9 100644 --- a/unicode/c_861.c +++ b/unicode/c_861.c @@ -643,7 +643,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_861 = { - { 861, 1, { 0x3f, 0x00 }, 0x003f, "OEM Icelandic" }, + { 861, 1, 0x003f, 0x003f, "OEM Icelandic" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_862.c b/unicode/c_862.c index 86fc693d0ee..353395c7306 100644 --- a/unicode/c_862.c +++ b/unicode/c_862.c @@ -676,7 +676,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_862 = { - { 862, 1, { 0x3f, 0x00 }, 0x003f, "OEM Hebrew" }, + { 862, 1, 0x003f, 0x003f, "OEM Hebrew" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_863.c b/unicode/c_863.c index 5e1a9a26354..4f8af4a9ab1 100644 --- a/unicode/c_863.c +++ b/unicode/c_863.c @@ -643,7 +643,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_863 = { - { 863, 1, { 0x3f, 0x00 }, 0x003f, "OEM Canadian French" }, + { 863, 1, 0x003f, 0x003f, "OEM Canadian French" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_864.c b/unicode/c_864.c index 2d0212dfe1b..d7514d2b024 100644 --- a/unicode/c_864.c +++ b/unicode/c_864.c @@ -676,7 +676,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_864 = { - { 864, 1, { 0x3f, 0x00 }, 0x003f, "OEM Arabic" }, + { 864, 1, 0x003f, 0x003f, "OEM Arabic" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_865.c b/unicode/c_865.c index fd1366585d7..ab9593b3ac7 100644 --- a/unicode/c_865.c +++ b/unicode/c_865.c @@ -643,7 +643,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_865 = { - { 865, 1, { 0x3f, 0x00 }, 0x003f, "OEM Nordic" }, + { 865, 1, 0x003f, 0x003f, "OEM Nordic" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_866.c b/unicode/c_866.c index f383aab8d3a..53dded562f7 100644 --- a/unicode/c_866.c +++ b/unicode/c_866.c @@ -676,7 +676,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_866 = { - { 866, 1, { 0x3f, 0x00 }, 0x003f, "OEM Russian" }, + { 866, 1, 0x003f, 0x003f, "OEM Russian" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_869.c b/unicode/c_869.c index 1d6d8f8c228..3971f2f77d9 100644 --- a/unicode/c_869.c +++ b/unicode/c_869.c @@ -643,7 +643,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_869 = { - { 869, 1, { 0x3f, 0x00 }, 0x003f, "OEM Greek" }, + { 869, 1, 0x003f, 0x003f, "OEM Greek" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_874.c b/unicode/c_874.c index bf779a901c3..de546553f68 100644 --- a/unicode/c_874.c +++ b/unicode/c_874.c @@ -676,7 +676,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_874 = { - { 874, 1, { 0x3f, 0x00 }, 0x003f, "ANSI/OEM Thai" }, + { 874, 1, 0x003f, 0x003f, "ANSI/OEM Thai" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_875.c b/unicode/c_875.c index 08a3d393bb5..7d7b6a5d46e 100644 --- a/unicode/c_875.c +++ b/unicode/c_875.c @@ -643,7 +643,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_875 = { - { 875, 1, { 0x3f, 0x00 }, 0x003f, "IBM EBCDIC Greek" }, + { 875, 1, 0x003f, 0x003f, "IBM EBCDIC Greek" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_878.c b/unicode/c_878.c index 7822bf8031a..905b16de2f5 100644 --- a/unicode/c_878.c +++ b/unicode/c_878.c @@ -676,7 +676,7 @@ static const unsigned short uni2cp_high[256] = const struct sbcs_table cptable_878 = { - { 878, 1, { 0x3f, 0x00 }, 0x003f, "Russian KOI8" }, + { 878, 1, 0x003f, 0x003f, "Russian KOI8" }, cp2uni, uni2cp_low, uni2cp_high diff --git a/unicode/c_932.c b/unicode/c_932.c index 1e5bb61212c..c44014b9208 100644 --- a/unicode/c_932.c +++ b/unicode/c_932.c @@ -5200,10 +5200,10 @@ static const unsigned short uni2cp_high[256] = const struct dbcs_table cptable_932 = { - { 932, 2, { 0x3f, 0x00 }, 0x003f, "ANSI/OEM Japanese Shift-JIS" }, + { 932, 2, 0x003f, 0x003f, "ANSI/OEM Japanese Shift-JIS" }, cp2uni, cp2uni_leadbytes, uni2cp_low, uni2cp_high, - { 0x81, 0x9f, 0xe0, 0xfc, } + { 0x81, 0x9f, 0xe0, 0xfc, 0x00, 0x00 } }; diff --git a/unicode/c_936.c b/unicode/c_936.c index d36efac70ba..fefe753f907 100644 --- a/unicode/c_936.c +++ b/unicode/c_936.c @@ -7840,10 +7840,10 @@ static const unsigned short uni2cp_high[256] = const struct dbcs_table cptable_936 = { - { 936, 2, { 0x3f, 0x00 }, 0x003f, "ANSI/OEM Simplified Chinese GBK" }, + { 936, 2, 0x003f, 0x003f, "ANSI/OEM Simplified Chinese GBK" }, cp2uni, cp2uni_leadbytes, uni2cp_low, uni2cp_high, - { 0x81, 0xfe, } + { 0x81, 0xfe, 0x00, 0x00 } }; diff --git a/unicode/c_949.c b/unicode/c_949.c index 812a4765317..e7a0ed37d26 100644 --- a/unicode/c_949.c +++ b/unicode/c_949.c @@ -9259,10 +9259,10 @@ static const unsigned short uni2cp_high[256] = const struct dbcs_table cptable_949 = { - { 949, 2, { 0x3f, 0x00 }, 0x003f, "ANSI/OEM Korean Unified Hangul" }, + { 949, 2, 0x003f, 0x003f, "ANSI/OEM Korean Unified Hangul" }, cp2uni, cp2uni_leadbytes, uni2cp_low, uni2cp_high, - { 0x81, 0xfe, } + { 0x81, 0xfe, 0x00, 0x00 } }; diff --git a/unicode/c_950.c b/unicode/c_950.c index 7dbe1abea80..47c304cd91f 100644 --- a/unicode/c_950.c +++ b/unicode/c_950.c @@ -6553,10 +6553,10 @@ static const unsigned short uni2cp_high[256] = const struct dbcs_table cptable_950 = { - { 950, 2, { 0x3f, 0x00 }, 0x003f, "ANSI/OEM Traditional Chinese Big5" }, + { 950, 2, 0x003f, 0x003f, "ANSI/OEM Traditional Chinese Big5" }, cp2uni, cp2uni_leadbytes, uni2cp_low, uni2cp_high, - { 0x81, 0xfe, } + { 0x81, 0xfe, 0x00, 0x00 } }; diff --git a/unicode/cpmap.pl b/unicode/cpmap.pl index b93aa9ff9c4..40a6f82a66c 100755 --- a/unicode/cpmap.pl +++ b/unicode/cpmap.pl @@ -328,7 +328,7 @@ sub DUMP_SBCS_TABLE # output the code page descriptor printf OUTPUT "const struct sbcs_table cptable_%03d =\n{\n", $codepage; - printf OUTPUT " { %d, 1, { 0x%02x, 0x00 }, 0x%04x, \"%s\" },\n", + printf OUTPUT " { %d, 1, 0x%04x, 0x%04x, \"%s\" },\n", $codepage, $DEF_CHAR, $DEF_CHAR, $name; printf OUTPUT " cp2uni,\n"; printf OUTPUT " uni2cp_low,\n"; @@ -471,7 +471,7 @@ sub DUMP_DBCS_TABLE # output the code page descriptor printf OUTPUT "const struct dbcs_table cptable_%03d =\n{\n", $codepage; - printf OUTPUT " { %d, 2, { 0x%02x, 0x00 }, 0x%04x, \"%s\" },\n", + printf OUTPUT " { %d, 2, 0x%04x, 0x%04x, \"%s\" },\n", $codepage, $DEF_CHAR, $DEF_CHAR, $name; printf OUTPUT " cp2uni,\n"; printf OUTPUT " cp2uni_leadbytes,\n"; @@ -503,7 +503,7 @@ sub DUMP_LB_RANGES } } if ($on) { printf OUTPUT "0xff, "; } - printf OUTPUT "}\n"; + printf OUTPUT "0x00, 0x00 }\n"; } diff --git a/unicode/mbtowc.c b/unicode/mbtowc.c index a6b3ed191c5..ca86f5e53e8 100644 --- a/unicode/mbtowc.c +++ b/unicode/mbtowc.c @@ -16,7 +16,7 @@ static inline int check_invalid_chars_sbcs( const struct sbcs_table *table, const unsigned short * const cp2uni = table->cp2uni; while (srclen) { - if (cp2uni[*src] == table->info.def_unicode_char && *src != table->info.def_char[0]) + if (cp2uni[*src] == table->info.def_unicode_char && *src != table->info.def_char) break; src++; srclen--; @@ -102,13 +102,12 @@ static inline int check_invalid_chars_dbcs( const struct dbcs_table *table, { if (srclen == 1) break; /* partial char, error */ if (cp2uni[(off << 8) + src[1]] == table->info.def_unicode_char && - (src[0] != table->info.def_char[0] || src[1] != table->info.def_char[1])) - break; + ((src[0] << 8) | src[1]) != table->info.def_char) break; src++; srclen--; } else if (cp2uni[*src] == table->info.def_unicode_char && - (*src != table->info.def_char[0] || table->info.def_char[1])) break; + *src != table->info.def_char) break; src++; srclen--; } diff --git a/unicode/wctomb.c b/unicode/wctomb.c index 946eccfed17..f4ae8223cb2 100644 --- a/unicode/wctomb.c +++ b/unicode/wctomb.c @@ -55,6 +55,44 @@ static inline int wcstombs_sbcs( const struct sbcs_table *table, } } +/* slow version of wcstombs_sbcs that handles the various flags */ +static int wcstombs_sbcs_slow( const struct sbcs_table *table, int flags, + const unsigned short *src, unsigned int srclen, + char *dst, unsigned int dstlen, + const char *defchar, int *used ) +{ + const unsigned short * const cp2uni = table->cp2uni; + const unsigned char * const uni2cp_low = table->uni2cp_low; + const unsigned short * const uni2cp_high = table->uni2cp_high; + const unsigned char table_default = table->info.def_char & 0xff; + int ret = srclen, tmp; + + if (dstlen < srclen) + { + /* buffer too small: fill it up to dstlen and return error */ + srclen = dstlen; + ret = -1; + } + + if (!defchar) defchar = &table_default; + if (!used) used = &tmp; /* avoid checking on every char */ + + while (srclen) + { + unsigned char ch = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)]; + if (((flags & WC_NO_BEST_FIT_CHARS) && (cp2uni[ch] != *src)) || + (ch == table_default && *src != table->info.def_unicode_char)) + { + ch = *defchar; + *used = 1; + } + *dst++ = ch; + src++; + srclen--; + } + return ret; +} + /* query necessary dst length for src string */ static inline int get_length_dbcs( const struct dbcs_table *table, const unsigned short *src, unsigned int srclen ) @@ -94,20 +132,82 @@ static inline int wcstombs_dbcs( const struct dbcs_table *table, return dstlen - len; } +/* slow version of wcstombs_dbcs that handles the various flags */ +static int wcstombs_dbcs_slow( const struct dbcs_table *table, int flags, + const unsigned short *src, unsigned int srclen, + char *dst, unsigned int dstlen, + const char *defchar, int *used ) +{ + const unsigned short * const uni2cp_low = table->uni2cp_low; + const unsigned short * const uni2cp_high = table->uni2cp_high; + const unsigned short * const cp2uni = table->cp2uni; + const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes; + unsigned short defchar_value = table->info.def_char; + int len, tmp; + + if (defchar) defchar_value = defchar[1] ? ((defchar[0] << 8) | defchar[1]) : defchar[0]; + if (!used) used = &tmp; /* avoid checking on every char */ + + for (len = dstlen; srclen && len; len--, srclen--, src++) + { + unsigned short res = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)]; + + if (res == table->info.def_char && *src != table->info.def_unicode_char) + { + res = defchar_value; + *used = 1; + } + else if (flags & WC_NO_BEST_FIT_CHARS) + { + /* check if char maps back to the same Unicode value */ + if (res & 0xff00) + { + unsigned char off = cp2uni_lb[res >> 8]; + if (cp2uni[(off << 8) + (res & 0xff)] != *src) + { + res = defchar_value; + *used = 1; + } + } + else if (cp2uni[res & 0xff] != *src) + { + res = defchar_value; + *used = 1; + } + } + + if (res & 0xff00) + { + if (len == 1) break; /* do not output a partial char */ + len--; + *dst++ = res >> 8; + } + *dst++ = (char)res; + } + if (srclen) return -1; /* overflow */ + return dstlen - len; +} + /* wide char to multi byte string conversion */ /* return -1 on dst buffer overflow */ int cp_wcstombs( const union cptable *table, int flags, const unsigned short *src, int srclen, - char *dst, int dstlen ) + char *dst, int dstlen, const char *defchar, int *used ) { if (table->info.char_size == 1) { if (!dstlen) return srclen; + if (flags || defchar || used) + return wcstombs_sbcs_slow( &table->sbcs, flags, src, srclen, + dst, dstlen, defchar, used ); return wcstombs_sbcs( &table->sbcs, src, srclen, dst, dstlen ); } else /* mbcs */ { if (!dstlen) return get_length_dbcs( &table->dbcs, src, srclen ); + if (flags || defchar || used) + return wcstombs_dbcs_slow( &table->dbcs, flags, src, srclen, + dst, dstlen, defchar, used ); return wcstombs_sbcs( &table->sbcs, src, srclen, dst, dstlen ); } }