Added support for composite Unicode characters in MultiByteToWideChar
and WideCharToMultiByte.
This commit is contained in:
parent
441f874517
commit
e709cdbae3
|
@ -254,7 +254,6 @@ INT WINAPI MultiByteToWideChar( UINT page, DWORD flags, LPCSTR src, INT srclen,
|
|||
|
||||
if (srclen == -1) srclen = strlen(src) + 1;
|
||||
|
||||
if (flags & MB_COMPOSITE) FIXME("MB_COMPOSITE not supported\n");
|
||||
if (flags & MB_USEGLYPHCHARS) FIXME("MB_USEGLYPHCHARS not supported\n");
|
||||
|
||||
switch(page)
|
||||
|
@ -330,8 +329,6 @@ INT WINAPI WideCharToMultiByte( UINT page, DWORD flags, LPCWSTR src, INT srclen,
|
|||
|
||||
if (srclen == -1) srclen = strlenW(src) + 1;
|
||||
|
||||
/* if (flags & WC_COMPOSITECHECK) FIXME( "WC_COMPOSITECHECK (%lx) not supported\n", flags );*/
|
||||
|
||||
switch(page)
|
||||
{
|
||||
case CP_UTF7:
|
||||
|
|
|
@ -70,6 +70,7 @@ CODEPAGES = \
|
|||
|
||||
C_SRCS = \
|
||||
casemap.c \
|
||||
compose.c \
|
||||
cptable.c \
|
||||
mbtowc.c \
|
||||
string.c \
|
||||
|
|
File diff suppressed because it is too large
Load Diff
153
unicode/cpmap.pl
153
unicode/cpmap.pl
|
@ -166,6 +166,7 @@ $DEF_CHAR = ord '?';
|
|||
|
||||
READ_DEFAULTS();
|
||||
DUMP_CASE_MAPPINGS();
|
||||
DUMP_COMPOSE_TABLES();
|
||||
DUMP_CTYPE_TABLES();
|
||||
|
||||
foreach $file (@allfiles) { HANDLE_FILE( @$file ); }
|
||||
|
@ -185,6 +186,8 @@ sub READ_DEFAULTS
|
|||
@toupper_table = ();
|
||||
@category_table = ();
|
||||
@direction_table = ();
|
||||
@decomp_table = ();
|
||||
@compose_table = ();
|
||||
|
||||
# first setup a few default mappings
|
||||
|
||||
|
@ -285,6 +288,12 @@ sub READ_DEFAULTS
|
|||
# decomposition contains only char values without prefix -> use first char
|
||||
$dst = hex $1;
|
||||
$category_table[$src] |= $category_table[$dst];
|
||||
# store decomposition if it contains two chars
|
||||
if ($decomp =~ /^([0-9a-fA-F]+)\s+([0-9a-fA-F]+)$/)
|
||||
{
|
||||
$decomp_table[$src] = [ hex $1, hex $2 ];
|
||||
push @compose_table, [ hex $1, hex $2, $src ];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -465,7 +474,7 @@ sub DUMP_SBCS_TABLE
|
|||
next unless defined $uni2cp[$i];
|
||||
$filled[$i >> 8] = 1;
|
||||
$subtables++;
|
||||
$i = ($i & ~255) + 256;
|
||||
$i |= 255;
|
||||
}
|
||||
|
||||
# output all the subtables into a single array
|
||||
|
@ -572,7 +581,7 @@ sub DUMP_DBCS_TABLE
|
|||
next unless defined $uni2cp[$i];
|
||||
$filled[$i >> 8] = 1;
|
||||
$subtables++;
|
||||
$i = ($i & ~255) + 256;
|
||||
$i |= 255;
|
||||
}
|
||||
|
||||
# output all the subtables into a single array
|
||||
|
@ -669,7 +678,7 @@ sub DUMP_CASE_TABLE
|
|||
next unless defined $table[$i];
|
||||
$filled[$i >> 8] = $pos;
|
||||
$pos += 256;
|
||||
$i = ($i & ~255) + 256;
|
||||
$i |= 255;
|
||||
}
|
||||
for ($i = 0; $i < 65536; $i++)
|
||||
{
|
||||
|
@ -737,6 +746,144 @@ sub DUMP_CTYPE_TABLES
|
|||
close OUTPUT;
|
||||
}
|
||||
|
||||
|
||||
################################################################
|
||||
# dump the char composition tables
|
||||
sub DUMP_COMPOSE_TABLES
|
||||
{
|
||||
open OUTPUT,">compose.c" or die "Cannot create compose.c";
|
||||
printf "Building compose.c\n";
|
||||
printf OUTPUT "/* Unicode char composition */\n";
|
||||
printf OUTPUT "/* Automatically generated; DO NOT EDIT!! */\n\n";
|
||||
printf OUTPUT "#include \"wine/unicode.h\"\n\n";
|
||||
|
||||
######### composition table
|
||||
|
||||
my @filled = ();
|
||||
foreach $i (@compose_table)
|
||||
{
|
||||
my @comp = @$i;
|
||||
push @{$filled[$comp[1]]}, [ $comp[0], $comp[2] ];
|
||||
}
|
||||
|
||||
# count how many different second chars we have
|
||||
|
||||
for ($i = $count = 0; $i < 65536; $i++)
|
||||
{
|
||||
next unless defined $filled[$i];
|
||||
$count++;
|
||||
}
|
||||
|
||||
# build the table of second chars and offsets
|
||||
|
||||
my $pos = $count + 1;
|
||||
for ($i = 0; $i < 65536; $i++)
|
||||
{
|
||||
next unless defined $filled[$i];
|
||||
push @table, $i, $pos;
|
||||
$pos += @{$filled[$i]};
|
||||
}
|
||||
# terminator with last position
|
||||
push @table, 0, $pos;
|
||||
printf OUTPUT "const WCHAR unicode_compose_table[0x%x] =\n{\n", 2*$pos;
|
||||
printf OUTPUT " /* second chars + offsets */\n%s", DUMP_ARRAY( "0x%04x", 0, @table );
|
||||
|
||||
# build the table of first chars and mappings
|
||||
|
||||
for ($i = 0; $i < 65536; $i++)
|
||||
{
|
||||
next unless defined $filled[$i];
|
||||
my @table = ();
|
||||
my @list = sort { $a->[0] <=> $b->[0] } @{$filled[$i]};
|
||||
for ($j = 0; $j <= $#list; $j++)
|
||||
{
|
||||
push @table, $list[$j][0], $list[$j][1];
|
||||
}
|
||||
printf OUTPUT ",\n /* 0x%04x */\n%s", $i, DUMP_ARRAY( "0x%04x", 0, @table );
|
||||
}
|
||||
printf OUTPUT "\n};\n\nconst unsigned int unicode_compose_table_size = %d;\n\n", $count;
|
||||
|
||||
######### decomposition table
|
||||
|
||||
# first determine all the 16-char subsets that contain something
|
||||
|
||||
my @filled = (0) x 4096;
|
||||
my $pos = 16*2; # for the null subset
|
||||
for ($i = 0; $i < 65536; $i++)
|
||||
{
|
||||
next unless defined $decomp_table[$i];
|
||||
$filled[$i >> 4] = $pos;
|
||||
$pos += 16*2;
|
||||
$i |= 15;
|
||||
}
|
||||
my $total = $pos;
|
||||
|
||||
# now count the 256-char subsets that contain something
|
||||
|
||||
my @filled_idx = (256) x 256;
|
||||
$pos = 256 + 16;
|
||||
for ($i = 0; $i < 4096; $i++)
|
||||
{
|
||||
next unless $filled[$i];
|
||||
$filled_idx[$i >> 4] = $pos;
|
||||
$pos += 16;
|
||||
$i |= 15;
|
||||
}
|
||||
my $null_offset = $pos; # null mapping
|
||||
$total += $pos;
|
||||
|
||||
# add the index offsets to the subsets positions
|
||||
|
||||
for ($i = 0; $i < 4096; $i++)
|
||||
{
|
||||
next unless $filled[$i];
|
||||
$filled[$i] += $null_offset;
|
||||
}
|
||||
|
||||
# dump the main index
|
||||
|
||||
printf OUTPUT "const WCHAR unicode_decompose_table[%d] =\n", $total;
|
||||
printf OUTPUT "{\n /* index */\n";
|
||||
printf OUTPUT "%s", DUMP_ARRAY( "0x%04x", 0, @filled_idx );
|
||||
printf OUTPUT ",\n /* null sub-index */\n%s", DUMP_ARRAY( "0x%04x", 0, ($null_offset) x 16 );
|
||||
|
||||
# dump the second-level indexes
|
||||
|
||||
for ($i = 0; $i < 256; $i++)
|
||||
{
|
||||
next unless ($filled_idx[$i] > 256);
|
||||
my @table = @filled[($i<<4)..($i<<4)+15];
|
||||
for ($j = 0; $j < 16; $j++) { $table[$j] ||= $null_offset; }
|
||||
printf OUTPUT ",\n /* sub-index %02x */\n", $i;
|
||||
printf OUTPUT "%s", DUMP_ARRAY( "0x%04x", 0, @table );
|
||||
}
|
||||
|
||||
# dump the 16-char subsets
|
||||
|
||||
printf OUTPUT ",\n /* null mapping */\n";
|
||||
printf OUTPUT "%s", DUMP_ARRAY( "0x%04x", 0, (0) x 32 );
|
||||
|
||||
for ($i = 0; $i < 4096; $i++)
|
||||
{
|
||||
next unless $filled[$i];
|
||||
my @table = (0) x 32;
|
||||
for ($j = 0; $j < 16; $j++)
|
||||
{
|
||||
if (defined $decomp_table[($i<<4) + $j])
|
||||
{
|
||||
$table[2 * $j] = ${$decomp_table[($i << 4) + $j]}[0];
|
||||
$table[2 * $j + 1] = ${$decomp_table[($i << 4) + $j]}[1];
|
||||
}
|
||||
}
|
||||
printf OUTPUT ",\n /* 0x%03x0 .. 0x%03xf */\n", $i, $i;
|
||||
printf OUTPUT "%s", DUMP_ARRAY( "0x%04x", 0, @table );
|
||||
}
|
||||
|
||||
printf OUTPUT "\n};\n";
|
||||
close OUTPUT;
|
||||
}
|
||||
|
||||
|
||||
################################################################
|
||||
# read an input file and generate the corresponding .c file
|
||||
sub HANDLE_FILE
|
||||
|
|
104
unicode/mbtowc.c
104
unicode/mbtowc.c
|
@ -9,6 +9,23 @@
|
|||
#include "winnls.h"
|
||||
#include "wine/unicode.h"
|
||||
|
||||
/* get the decomposition of a Unicode char */
|
||||
static int get_decomposition( WCHAR src, WCHAR *dst, unsigned int dstlen )
|
||||
{
|
||||
extern const WCHAR unicode_decompose_table[];
|
||||
const WCHAR *ptr = unicode_decompose_table;
|
||||
int res;
|
||||
|
||||
*dst = src;
|
||||
ptr = unicode_decompose_table + ptr[src >> 8];
|
||||
ptr = unicode_decompose_table + ptr[(src >> 4) & 0x0f] + 2 * (src & 0x0f);
|
||||
if (!*ptr) return 1;
|
||||
if (dstlen <= 1) return 0;
|
||||
/* apply the decomposition recursively to the first char */
|
||||
if ((res = get_decomposition( *ptr, dst, dstlen-1 ))) dst[res++] = ptr[1];
|
||||
return res;
|
||||
}
|
||||
|
||||
/* check src string for invalid chars; return non-zero if invalid char found */
|
||||
static inline int check_invalid_chars_sbcs( const struct sbcs_table *table,
|
||||
const unsigned char *src, unsigned int srclen )
|
||||
|
@ -70,6 +87,33 @@ static inline int mbstowcs_sbcs( const struct sbcs_table *table,
|
|||
}
|
||||
}
|
||||
|
||||
/* mbstowcs for single-byte code page with char decomposition */
|
||||
static int mbstowcs_sbcs_decompose( const struct sbcs_table *table,
|
||||
const unsigned char *src, unsigned int srclen,
|
||||
WCHAR *dst, unsigned int dstlen )
|
||||
{
|
||||
const WCHAR * const cp2uni = table->cp2uni;
|
||||
unsigned int len;
|
||||
|
||||
if (!dstlen) /* compute length */
|
||||
{
|
||||
WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
|
||||
for (len = 0; srclen; srclen--, src++)
|
||||
len += get_decomposition( cp2uni[*src], dummy, 4 );
|
||||
return len;
|
||||
}
|
||||
|
||||
for (len = dstlen; srclen && len; srclen--, src++)
|
||||
{
|
||||
int res = get_decomposition( cp2uni[*src], dst, len );
|
||||
if (!res) break;
|
||||
len -= res;
|
||||
dst += res;
|
||||
}
|
||||
if (srclen) return -1; /* overflow */
|
||||
return dstlen - len;
|
||||
}
|
||||
|
||||
/* query necessary dst length for src string */
|
||||
static inline int get_length_dbcs( const struct dbcs_table *table,
|
||||
const unsigned char *src, unsigned int srclen )
|
||||
|
@ -122,7 +166,9 @@ static inline int mbstowcs_dbcs( const struct dbcs_table *table,
|
|||
{
|
||||
const WCHAR * const cp2uni = table->cp2uni;
|
||||
const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
|
||||
int len;
|
||||
unsigned int len;
|
||||
|
||||
if (!dstlen) return get_length_dbcs( table, src, srclen );
|
||||
|
||||
for (len = dstlen; srclen && len; len--, srclen--, src++, dst++)
|
||||
{
|
||||
|
@ -140,6 +186,54 @@ static inline int mbstowcs_dbcs( const struct dbcs_table *table,
|
|||
}
|
||||
|
||||
|
||||
/* mbstowcs for double-byte code page with character decomposition */
|
||||
static int mbstowcs_dbcs_decompose( const struct dbcs_table *table,
|
||||
const unsigned char *src, unsigned int srclen,
|
||||
WCHAR *dst, unsigned int dstlen )
|
||||
{
|
||||
const WCHAR * const cp2uni = table->cp2uni;
|
||||
const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
|
||||
unsigned int len;
|
||||
WCHAR ch;
|
||||
int res;
|
||||
|
||||
if (!dstlen) /* compute length */
|
||||
{
|
||||
WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
|
||||
for (len = 0; srclen; srclen--, src++)
|
||||
{
|
||||
unsigned char off = cp2uni_lb[*src];
|
||||
if (off)
|
||||
{
|
||||
if (!--srclen) break; /* partial char, ignore it */
|
||||
src++;
|
||||
ch = cp2uni[(off << 8) + *src];
|
||||
}
|
||||
else ch = cp2uni[*src];
|
||||
len += get_decomposition( ch, dummy, 4 );
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
for (len = dstlen; srclen && len; srclen--, src++)
|
||||
{
|
||||
unsigned char off = cp2uni_lb[*src];
|
||||
if (off)
|
||||
{
|
||||
if (!--srclen) break; /* partial char, ignore it */
|
||||
src++;
|
||||
ch = cp2uni[(off << 8) + *src];
|
||||
}
|
||||
else ch = cp2uni[*src];
|
||||
if (!(res = get_decomposition( ch, dst, len ))) break;
|
||||
dst += res;
|
||||
len -= res;
|
||||
}
|
||||
if (srclen) return -1; /* overflow */
|
||||
return dstlen - len;
|
||||
}
|
||||
|
||||
|
||||
/* return -1 on dst buffer overflow, -2 on invalid input char */
|
||||
int cp_mbstowcs( const union cptable *table, int flags,
|
||||
const char *src, int srclen,
|
||||
|
@ -151,16 +245,22 @@ int cp_mbstowcs( const union cptable *table, int flags,
|
|||
{
|
||||
if (check_invalid_chars_sbcs( &table->sbcs, src, srclen )) return -2;
|
||||
}
|
||||
if (!(flags & MB_COMPOSITE))
|
||||
{
|
||||
if (!dstlen) return srclen;
|
||||
return mbstowcs_sbcs( &table->sbcs, src, srclen, dst, dstlen );
|
||||
}
|
||||
return mbstowcs_sbcs_decompose( &table->sbcs, src, srclen, dst, dstlen );
|
||||
}
|
||||
else /* mbcs */
|
||||
{
|
||||
if (flags & MB_ERR_INVALID_CHARS)
|
||||
{
|
||||
if (check_invalid_chars_dbcs( &table->dbcs, src, srclen )) return -2;
|
||||
}
|
||||
if (!dstlen) return get_length_dbcs( &table->dbcs, src, srclen );
|
||||
if (!(flags & MB_COMPOSITE))
|
||||
return mbstowcs_dbcs( &table->dbcs, src, srclen, dst, dstlen );
|
||||
else
|
||||
return mbstowcs_dbcs_decompose( &table->dbcs, src, srclen, dst, dstlen );
|
||||
}
|
||||
}
|
||||
|
|
267
unicode/wctomb.c
267
unicode/wctomb.c
|
@ -9,6 +9,90 @@
|
|||
#include "winnls.h"
|
||||
#include "wine/unicode.h"
|
||||
|
||||
/* search for a character in the unicode_compose_table; helper for compose() */
|
||||
static inline int binary_search( WCHAR ch, int low, int high )
|
||||
{
|
||||
extern const WCHAR unicode_compose_table[];
|
||||
while (low <= high)
|
||||
{
|
||||
int pos = (low + high) / 2;
|
||||
if (unicode_compose_table[2*pos] < ch)
|
||||
{
|
||||
low = pos + 1;
|
||||
continue;
|
||||
}
|
||||
if (unicode_compose_table[2*pos] > ch)
|
||||
{
|
||||
high = pos - 1;
|
||||
continue;
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* return the result of the composition of two Unicode chars, or 0 if none */
|
||||
static WCHAR compose( const WCHAR *str )
|
||||
{
|
||||
extern const WCHAR unicode_compose_table[];
|
||||
extern const unsigned int unicode_compose_table_size;
|
||||
|
||||
int idx = 1, low = 0, high = unicode_compose_table_size - 1;
|
||||
for (;;)
|
||||
{
|
||||
int pos = binary_search( str[idx], low, high );
|
||||
if (pos == -1) return 0;
|
||||
if (!idx--) return unicode_compose_table[2*pos+1];
|
||||
low = unicode_compose_table[2*pos+1];
|
||||
high = unicode_compose_table[2*pos+3] - 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************/
|
||||
/* sbcs support */
|
||||
|
||||
/* check if 'ch' is an acceptable sbcs mapping for 'wch' */
|
||||
static inline int is_valid_sbcs_mapping( const struct sbcs_table *table, int flags,
|
||||
WCHAR wch, unsigned char ch )
|
||||
{
|
||||
if (flags & WC_NO_BEST_FIT_CHARS) return (table->cp2uni[ch] == wch);
|
||||
if (ch != (unsigned char)table->info.def_char) return 1;
|
||||
return (wch == table->info.def_unicode_char);
|
||||
}
|
||||
|
||||
/* query necessary dst length for src string */
|
||||
static inline int get_length_sbcs( const struct sbcs_table *table, int flags,
|
||||
const WCHAR *src, unsigned int srclen )
|
||||
{
|
||||
unsigned int ret = srclen;
|
||||
|
||||
if (flags & WC_COMPOSITECHECK)
|
||||
{
|
||||
const unsigned char * const uni2cp_low = table->uni2cp_low;
|
||||
const unsigned short * const uni2cp_high = table->uni2cp_high;
|
||||
WCHAR composed;
|
||||
|
||||
for (ret = 0; srclen > 1; ret++, srclen--, src++)
|
||||
{
|
||||
if (!(composed = compose(src))) continue;
|
||||
/* check if we should skip the next char */
|
||||
|
||||
/* in WC_DEFAULTCHAR and WC_DISCARDNS mode, we always skip */
|
||||
/* the next char no matter if the composition is valid or not */
|
||||
if (!(flags & (WC_DEFAULTCHAR|WC_DISCARDNS)))
|
||||
{
|
||||
unsigned char ch = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
|
||||
if (!is_valid_sbcs_mapping( table, flags, composed, ch )) continue;
|
||||
}
|
||||
src++;
|
||||
srclen--;
|
||||
}
|
||||
if (srclen) ret++; /* last char */
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* wcstombs for single-byte code page */
|
||||
static inline int wcstombs_sbcs( const struct sbcs_table *table,
|
||||
const WCHAR *src, unsigned int srclen,
|
||||
|
@ -61,46 +145,94 @@ static int wcstombs_sbcs_slow( const struct sbcs_table *table, int flags,
|
|||
char *dst, unsigned int dstlen,
|
||||
const char *defchar, int *used )
|
||||
{
|
||||
const WCHAR * const cp2uni = table->cp2uni;
|
||||
const unsigned char * const uni2cp_low = table->uni2cp_low;
|
||||
const unsigned short * const uni2cp_high = table->uni2cp_high;
|
||||
const unsigned char table_default = table->info.def_char & 0xff;
|
||||
int ret = srclen, tmp;
|
||||
|
||||
if (dstlen < srclen)
|
||||
{
|
||||
/* buffer too small: fill it up to dstlen and return error */
|
||||
srclen = dstlen;
|
||||
ret = -1;
|
||||
}
|
||||
unsigned int len;
|
||||
int tmp;
|
||||
WCHAR composed;
|
||||
|
||||
if (!defchar) defchar = &table_default;
|
||||
if (!used) used = &tmp; /* avoid checking on every char */
|
||||
|
||||
while (srclen)
|
||||
for (len = dstlen; srclen && len; dst++, len--, src++, srclen--)
|
||||
{
|
||||
unsigned char ch = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];
|
||||
if (((flags & WC_NO_BEST_FIT_CHARS) && (cp2uni[ch] != *src)) ||
|
||||
(ch == table_default && *src != table->info.def_unicode_char))
|
||||
WCHAR wch = *src;
|
||||
|
||||
if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
|
||||
{
|
||||
ch = *defchar;
|
||||
*used = 1;
|
||||
/* now check if we can use the composed char */
|
||||
*dst = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
|
||||
if (is_valid_sbcs_mapping( table, flags, composed, *dst ))
|
||||
{
|
||||
/* we have a good mapping, use it */
|
||||
src++;
|
||||
srclen--;
|
||||
continue;
|
||||
}
|
||||
*dst++ = ch;
|
||||
/* no mapping for the composed char, check the other flags */
|
||||
if (flags & WC_DEFAULTCHAR) /* use the default char instead */
|
||||
{
|
||||
*dst = *defchar;
|
||||
*used = 1;
|
||||
src++; /* skip the non-spacing char */
|
||||
srclen--;
|
||||
continue;
|
||||
}
|
||||
if (flags & WC_DISCARDNS) /* skip the second char of the composition */
|
||||
{
|
||||
src++;
|
||||
srclen--;
|
||||
}
|
||||
return ret;
|
||||
/* WC_SEPCHARS is the default */
|
||||
}
|
||||
|
||||
*dst = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
|
||||
if (!is_valid_sbcs_mapping( table, flags, wch, *dst ))
|
||||
{
|
||||
*dst = *defchar;
|
||||
*used = 1;
|
||||
}
|
||||
}
|
||||
if (srclen) return -1; /* overflow */
|
||||
return dstlen - len;
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************/
|
||||
/* dbcs support */
|
||||
|
||||
/* check if 'ch' is an acceptable dbcs mapping for 'wch' */
|
||||
static inline int is_valid_dbcs_mapping( const struct dbcs_table *table, int flags,
|
||||
WCHAR wch, unsigned short ch )
|
||||
{
|
||||
if (ch == table->info.def_char && wch != table->info.def_unicode_char) return 0;
|
||||
if (flags & WC_NO_BEST_FIT_CHARS)
|
||||
{
|
||||
/* check if char maps back to the same Unicode value */
|
||||
if (ch & 0xff00)
|
||||
{
|
||||
unsigned char off = table->cp2uni_leadbytes[ch >> 8];
|
||||
return (table->cp2uni[(off << 8) + (ch & 0xff)] == wch);
|
||||
}
|
||||
return (table->cp2uni[ch & 0xff] == wch);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* query necessary dst length for src string */
|
||||
static inline int get_length_dbcs( const struct dbcs_table *table,
|
||||
const WCHAR *src, unsigned int srclen )
|
||||
static int get_length_dbcs( const struct dbcs_table *table, int flags,
|
||||
const WCHAR *src, unsigned int srclen,
|
||||
const char *defchar )
|
||||
{
|
||||
const unsigned short * const uni2cp_low = table->uni2cp_low;
|
||||
const unsigned short * const uni2cp_high = table->uni2cp_high;
|
||||
WCHAR defchar_value = table->info.def_char;
|
||||
WCHAR composed;
|
||||
int len;
|
||||
|
||||
if (!defchar && !(flags & WC_COMPOSITECHECK))
|
||||
{
|
||||
for (len = 0; srclen; srclen--, src++, len++)
|
||||
{
|
||||
if (uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)] & 0xff00) len++;
|
||||
|
@ -108,6 +240,48 @@ static inline int get_length_dbcs( const struct dbcs_table *table,
|
|||
return len;
|
||||
}
|
||||
|
||||
if (defchar) defchar_value = defchar[1] ? ((defchar[0] << 8) | defchar[1]) : defchar[0];
|
||||
for (len = 0; srclen; len++, srclen--, src++)
|
||||
{
|
||||
unsigned short res;
|
||||
WCHAR wch = *src;
|
||||
|
||||
if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
|
||||
{
|
||||
/* now check if we can use the composed char */
|
||||
res = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
|
||||
|
||||
if (is_valid_dbcs_mapping( table, flags, composed, res ))
|
||||
{
|
||||
/* we have a good mapping for the composed char, use it */
|
||||
if (res & 0xff00) len++;
|
||||
src++;
|
||||
srclen--;
|
||||
continue;
|
||||
}
|
||||
/* no mapping for the composed char, check the other flags */
|
||||
if (flags & WC_DEFAULTCHAR) /* use the default char instead */
|
||||
{
|
||||
if (defchar_value & 0xff00) len++;
|
||||
src++; /* skip the non-spacing char */
|
||||
srclen--;
|
||||
continue;
|
||||
}
|
||||
if (flags & WC_DISCARDNS) /* skip the second char of the composition */
|
||||
{
|
||||
src++;
|
||||
srclen--;
|
||||
}
|
||||
/* WC_SEPCHARS is the default */
|
||||
}
|
||||
|
||||
res = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
|
||||
if (!is_valid_dbcs_mapping( table, flags, wch, res )) res = defchar_value;
|
||||
if (res & 0xff00) len++;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
/* wcstombs for double-byte code page */
|
||||
static inline int wcstombs_dbcs( const struct dbcs_table *table,
|
||||
const WCHAR *src, unsigned int srclen,
|
||||
|
@ -138,11 +312,10 @@ static int wcstombs_dbcs_slow( const struct dbcs_table *table, int flags,
|
|||
char *dst, unsigned int dstlen,
|
||||
const char *defchar, int *used )
|
||||
{
|
||||
const WCHAR * const cp2uni = table->cp2uni;
|
||||
const unsigned short * const uni2cp_low = table->uni2cp_low;
|
||||
const unsigned short * const uni2cp_high = table->uni2cp_high;
|
||||
const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
|
||||
WCHAR defchar_value = table->info.def_char;
|
||||
WCHAR composed;
|
||||
int len, tmp;
|
||||
|
||||
if (defchar) defchar_value = defchar[1] ? ((defchar[0] << 8) | defchar[1]) : defchar[0];
|
||||
|
@ -150,32 +323,46 @@ static int wcstombs_dbcs_slow( const struct dbcs_table *table, int flags,
|
|||
|
||||
for (len = dstlen; srclen && len; len--, srclen--, src++)
|
||||
{
|
||||
unsigned short res = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];
|
||||
unsigned short res;
|
||||
WCHAR wch = *src;
|
||||
|
||||
if (res == table->info.def_char && *src != table->info.def_unicode_char)
|
||||
if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
|
||||
{
|
||||
/* now check if we can use the composed char */
|
||||
res = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
|
||||
|
||||
if (is_valid_dbcs_mapping( table, flags, composed, res ))
|
||||
{
|
||||
/* we have a good mapping for the composed char, use it */
|
||||
src++;
|
||||
srclen--;
|
||||
goto output_char;
|
||||
}
|
||||
/* no mapping for the composed char, check the other flags */
|
||||
if (flags & WC_DEFAULTCHAR) /* use the default char instead */
|
||||
{
|
||||
res = defchar_value;
|
||||
*used = 1;
|
||||
src++; /* skip the non-spacing char */
|
||||
srclen--;
|
||||
goto output_char;
|
||||
}
|
||||
else if (flags & WC_NO_BEST_FIT_CHARS)
|
||||
if (flags & WC_DISCARDNS) /* skip the second char of the composition */
|
||||
{
|
||||
/* check if char maps back to the same Unicode value */
|
||||
if (res & 0xff00)
|
||||
{
|
||||
unsigned char off = cp2uni_lb[res >> 8];
|
||||
if (cp2uni[(off << 8) + (res & 0xff)] != *src)
|
||||
{
|
||||
res = defchar_value;
|
||||
*used = 1;
|
||||
}
|
||||
}
|
||||
else if (cp2uni[res & 0xff] != *src)
|
||||
{
|
||||
res = defchar_value;
|
||||
*used = 1;
|
||||
src++;
|
||||
srclen--;
|
||||
}
|
||||
/* WC_SEPCHARS is the default */
|
||||
}
|
||||
|
||||
res = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
|
||||
if (!is_valid_dbcs_mapping( table, flags, wch, res ))
|
||||
{
|
||||
res = defchar_value;
|
||||
*used = 1;
|
||||
}
|
||||
|
||||
output_char:
|
||||
if (res & 0xff00)
|
||||
{
|
||||
if (len == 1) break; /* do not output a partial char */
|
||||
|
@ -196,7 +383,7 @@ int cp_wcstombs( const union cptable *table, int flags,
|
|||
{
|
||||
if (table->info.char_size == 1)
|
||||
{
|
||||
if (!dstlen) return srclen;
|
||||
if (!dstlen) return get_length_sbcs( &table->sbcs, flags, src, srclen );
|
||||
if (flags || defchar || used)
|
||||
return wcstombs_sbcs_slow( &table->sbcs, flags, src, srclen,
|
||||
dst, dstlen, defchar, used );
|
||||
|
@ -204,7 +391,7 @@ int cp_wcstombs( const union cptable *table, int flags,
|
|||
}
|
||||
else /* mbcs */
|
||||
{
|
||||
if (!dstlen) return get_length_dbcs( &table->dbcs, src, srclen );
|
||||
if (!dstlen) return get_length_dbcs( &table->dbcs, flags, src, srclen, defchar );
|
||||
if (flags || defchar || used)
|
||||
return wcstombs_dbcs_slow( &table->dbcs, flags, src, srclen,
|
||||
dst, dstlen, defchar, used );
|
||||
|
|
Loading…
Reference in New Issue