2000-06-09 07:27:21 +02:00
|
|
|
/*
|
|
|
|
* MultiByteToWideChar implementation
|
|
|
|
*
|
|
|
|
* Copyright 2000 Alexandre Julliard
|
2002-03-10 00:29:33 +01:00
|
|
|
*
|
|
|
|
* This library is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
|
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This library is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Lesser General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
|
|
* License along with this library; if not, write to the Free Software
|
2006-05-18 14:49:52 +02:00
|
|
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
|
2000-06-09 07:27:21 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
#include "wine/unicode.h"
|
|
|
|
|
2020-02-04 10:52:04 +01:00
|
|
|
extern const unsigned short nfd_table[] DECLSPEC_HIDDEN;
|
|
|
|
|
|
|
|
static const WCHAR *get_decomposition( WCHAR ch, unsigned int *len )
|
|
|
|
{
|
|
|
|
unsigned short offset = nfd_table[nfd_table[ch >> 8] + ((ch >> 4) & 0xf)] + (ch & 0xf);
|
|
|
|
unsigned short start = nfd_table[offset];
|
|
|
|
unsigned short end = nfd_table[offset + 1];
|
|
|
|
|
|
|
|
if ((*len = end - start)) return nfd_table + start;
|
|
|
|
*len = 1;
|
|
|
|
return NULL;
|
|
|
|
}
|
2000-12-29 04:56:06 +01:00
|
|
|
|
2011-04-04 12:01:01 +02:00
|
|
|
/* check the code whether it is in Unicode Private Use Area (PUA). */
|
|
|
|
/* MB_ERR_INVALID_CHARS raises an error converting from 1-byte character to PUA. */
|
|
|
|
static inline int is_private_use_area_char(WCHAR code)
|
|
|
|
{
|
|
|
|
return (code >= 0xe000 && code <= 0xf8ff);
|
|
|
|
}
|
|
|
|
|
2000-06-09 07:27:21 +02:00
|
|
|
/* check src string for invalid chars; return non-zero if invalid char found */
|
2007-05-29 23:31:14 +02:00
|
|
|
static inline int check_invalid_chars_sbcs( const struct sbcs_table *table, int flags,
|
2000-06-09 07:27:21 +02:00
|
|
|
const unsigned char *src, unsigned int srclen )
|
|
|
|
{
|
2007-05-29 23:31:14 +02:00
|
|
|
const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni;
|
2009-02-23 15:16:35 +01:00
|
|
|
const WCHAR def_unicode_char = table->info.def_unicode_char;
|
|
|
|
const unsigned char def_char = table->uni2cp_low[table->uni2cp_high[def_unicode_char >> 8]
|
|
|
|
+ (def_unicode_char & 0xff)];
|
2000-06-09 07:27:21 +02:00
|
|
|
while (srclen)
|
|
|
|
{
|
2011-04-04 12:01:01 +02:00
|
|
|
if ((cp2uni[*src] == def_unicode_char && *src != def_char) ||
|
|
|
|
is_private_use_area_char(cp2uni[*src])) break;
|
2000-06-09 07:27:21 +02:00
|
|
|
src++;
|
|
|
|
srclen--;
|
|
|
|
}
|
|
|
|
return srclen;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* mbstowcs for single-byte code page */
|
|
|
|
/* all lengths are in characters, not bytes */
|
2007-05-29 23:31:14 +02:00
|
|
|
static inline int mbstowcs_sbcs( const struct sbcs_table *table, int flags,
|
2000-06-09 07:27:21 +02:00
|
|
|
const unsigned char *src, unsigned int srclen,
|
2000-06-12 03:16:11 +02:00
|
|
|
WCHAR *dst, unsigned int dstlen )
|
2000-06-09 07:27:21 +02:00
|
|
|
{
|
2007-05-29 23:31:14 +02:00
|
|
|
const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni;
|
2000-06-09 07:27:21 +02:00
|
|
|
int ret = srclen;
|
|
|
|
|
|
|
|
if (dstlen < srclen)
|
|
|
|
{
|
|
|
|
/* buffer too small: fill it up to dstlen and return error */
|
|
|
|
srclen = dstlen;
|
|
|
|
ret = -1;
|
|
|
|
}
|
|
|
|
|
2019-01-23 12:29:54 +01:00
|
|
|
while (srclen >= 16)
|
2000-06-09 07:27:21 +02:00
|
|
|
{
|
2019-01-23 12:29:54 +01:00
|
|
|
dst[0] = cp2uni[src[0]];
|
|
|
|
dst[1] = cp2uni[src[1]];
|
|
|
|
dst[2] = cp2uni[src[2]];
|
|
|
|
dst[3] = cp2uni[src[3]];
|
|
|
|
dst[4] = cp2uni[src[4]];
|
|
|
|
dst[5] = cp2uni[src[5]];
|
|
|
|
dst[6] = cp2uni[src[6]];
|
|
|
|
dst[7] = cp2uni[src[7]];
|
|
|
|
dst[8] = cp2uni[src[8]];
|
|
|
|
dst[9] = cp2uni[src[9]];
|
|
|
|
dst[10] = cp2uni[src[10]];
|
|
|
|
dst[11] = cp2uni[src[11]];
|
|
|
|
dst[12] = cp2uni[src[12]];
|
|
|
|
dst[13] = cp2uni[src[13]];
|
|
|
|
dst[14] = cp2uni[src[14]];
|
|
|
|
dst[15] = cp2uni[src[15]];
|
2000-06-09 07:27:21 +02:00
|
|
|
src += 16;
|
2019-01-23 12:29:54 +01:00
|
|
|
dst += 16;
|
2000-06-09 07:27:21 +02:00
|
|
|
srclen -= 16;
|
|
|
|
}
|
2019-01-23 12:29:54 +01:00
|
|
|
|
|
|
|
/* now handle the remaining characters */
|
|
|
|
src += srclen;
|
|
|
|
dst += srclen;
|
|
|
|
switch (srclen)
|
|
|
|
{
|
|
|
|
case 15: dst[-15] = cp2uni[src[-15]];
|
|
|
|
case 14: dst[-14] = cp2uni[src[-14]];
|
|
|
|
case 13: dst[-13] = cp2uni[src[-13]];
|
|
|
|
case 12: dst[-12] = cp2uni[src[-12]];
|
|
|
|
case 11: dst[-11] = cp2uni[src[-11]];
|
|
|
|
case 10: dst[-10] = cp2uni[src[-10]];
|
|
|
|
case 9: dst[-9] = cp2uni[src[-9]];
|
|
|
|
case 8: dst[-8] = cp2uni[src[-8]];
|
|
|
|
case 7: dst[-7] = cp2uni[src[-7]];
|
|
|
|
case 6: dst[-6] = cp2uni[src[-6]];
|
|
|
|
case 5: dst[-5] = cp2uni[src[-5]];
|
|
|
|
case 4: dst[-4] = cp2uni[src[-4]];
|
|
|
|
case 3: dst[-3] = cp2uni[src[-3]];
|
|
|
|
case 2: dst[-2] = cp2uni[src[-2]];
|
|
|
|
case 1: dst[-1] = cp2uni[src[-1]];
|
|
|
|
case 0: break;
|
|
|
|
}
|
|
|
|
return ret;
|
2000-06-09 07:27:21 +02:00
|
|
|
}
|
|
|
|
|
2000-12-29 04:56:06 +01:00
|
|
|
/* mbstowcs for single-byte code page with char decomposition */
|
2007-05-29 23:31:14 +02:00
|
|
|
static int mbstowcs_sbcs_decompose( const struct sbcs_table *table, int flags,
|
2000-12-29 04:56:06 +01:00
|
|
|
const unsigned char *src, unsigned int srclen,
|
|
|
|
WCHAR *dst, unsigned int dstlen )
|
|
|
|
{
|
2007-05-29 23:31:14 +02:00
|
|
|
const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni;
|
2020-02-04 10:52:04 +01:00
|
|
|
const WCHAR *decomp;
|
|
|
|
unsigned int len, decomp_len;
|
2000-12-29 04:56:06 +01:00
|
|
|
|
|
|
|
if (!dstlen) /* compute length */
|
|
|
|
{
|
2020-02-04 10:52:04 +01:00
|
|
|
for (len = 0; srclen; srclen--, src++, len += decomp_len)
|
|
|
|
get_decomposition( cp2uni[*src], &decomp_len );
|
2000-12-29 04:56:06 +01:00
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
2020-02-04 10:52:04 +01:00
|
|
|
for (len = dstlen; srclen && len; srclen--, src++, dst += decomp_len, len -= decomp_len)
|
2000-12-29 04:56:06 +01:00
|
|
|
{
|
2020-02-04 10:52:04 +01:00
|
|
|
if ((decomp = get_decomposition( cp2uni[*src], &decomp_len )))
|
|
|
|
{
|
|
|
|
if (len < decomp_len) break;
|
|
|
|
memcpy( dst, decomp, decomp_len * sizeof(WCHAR) );
|
|
|
|
}
|
|
|
|
else *dst = cp2uni[*src];
|
2000-12-29 04:56:06 +01:00
|
|
|
}
|
|
|
|
if (srclen) return -1; /* overflow */
|
|
|
|
return dstlen - len;
|
|
|
|
}
|
|
|
|
|
2000-06-09 07:27:21 +02:00
|
|
|
/* query necessary dst length for src string */
|
|
|
|
static inline int get_length_dbcs( const struct dbcs_table *table,
|
|
|
|
const unsigned char *src, unsigned int srclen )
|
|
|
|
{
|
|
|
|
const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
|
|
|
|
int len;
|
|
|
|
|
|
|
|
for (len = 0; srclen; srclen--, src++, len++)
|
|
|
|
{
|
2016-05-15 14:08:47 +02:00
|
|
|
if (cp2uni_lb[*src] && srclen > 1 && src[1])
|
2000-06-09 07:27:21 +02:00
|
|
|
{
|
|
|
|
src++;
|
2016-02-09 05:44:26 +01:00
|
|
|
srclen--;
|
2000-06-09 07:27:21 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check src string for invalid chars; return non-zero if invalid char found */
|
|
|
|
static inline int check_invalid_chars_dbcs( const struct dbcs_table *table,
|
|
|
|
const unsigned char *src, unsigned int srclen )
|
|
|
|
{
|
2000-06-12 03:16:11 +02:00
|
|
|
const WCHAR * const cp2uni = table->cp2uni;
|
2000-06-09 07:27:21 +02:00
|
|
|
const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
|
2009-02-23 15:16:35 +01:00
|
|
|
const WCHAR def_unicode_char = table->info.def_unicode_char;
|
|
|
|
const unsigned short def_char = table->uni2cp_low[table->uni2cp_high[def_unicode_char >> 8]
|
|
|
|
+ (def_unicode_char & 0xff)];
|
2000-06-09 07:27:21 +02:00
|
|
|
while (srclen)
|
|
|
|
{
|
|
|
|
unsigned char off = cp2uni_lb[*src];
|
|
|
|
if (off) /* multi-byte char */
|
|
|
|
{
|
|
|
|
if (srclen == 1) break; /* partial char, error */
|
2009-02-23 15:16:35 +01:00
|
|
|
if (cp2uni[(off << 8) + src[1]] == def_unicode_char &&
|
|
|
|
((src[0] << 8) | src[1]) != def_char) break;
|
2000-06-09 07:27:21 +02:00
|
|
|
src++;
|
|
|
|
srclen--;
|
|
|
|
}
|
2011-04-04 12:01:01 +02:00
|
|
|
else if ((cp2uni[*src] == def_unicode_char && *src != def_char) ||
|
|
|
|
is_private_use_area_char(cp2uni[*src])) break;
|
2000-06-09 07:27:21 +02:00
|
|
|
src++;
|
|
|
|
srclen--;
|
|
|
|
}
|
|
|
|
return srclen;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* mbstowcs for double-byte code page */
|
|
|
|
/* all lengths are in characters, not bytes */
|
|
|
|
static inline int mbstowcs_dbcs( const struct dbcs_table *table,
|
|
|
|
const unsigned char *src, unsigned int srclen,
|
2000-06-12 03:16:11 +02:00
|
|
|
WCHAR *dst, unsigned int dstlen )
|
2000-06-09 07:27:21 +02:00
|
|
|
{
|
2000-06-12 03:16:11 +02:00
|
|
|
const WCHAR * const cp2uni = table->cp2uni;
|
2000-06-09 07:27:21 +02:00
|
|
|
const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
|
2000-12-29 04:56:06 +01:00
|
|
|
unsigned int len;
|
|
|
|
|
|
|
|
if (!dstlen) return get_length_dbcs( table, src, srclen );
|
2000-06-09 07:27:21 +02:00
|
|
|
|
|
|
|
for (len = dstlen; srclen && len; len--, srclen--, src++, dst++)
|
|
|
|
{
|
|
|
|
unsigned char off = cp2uni_lb[*src];
|
2016-05-15 14:08:47 +02:00
|
|
|
if (off && srclen > 1 && src[1])
|
2000-06-09 07:27:21 +02:00
|
|
|
{
|
|
|
|
src++;
|
2016-02-09 05:44:26 +01:00
|
|
|
srclen--;
|
2000-06-09 07:27:21 +02:00
|
|
|
*dst = cp2uni[(off << 8) + *src];
|
|
|
|
}
|
|
|
|
else *dst = cp2uni[*src];
|
|
|
|
}
|
|
|
|
if (srclen) return -1; /* overflow */
|
|
|
|
return dstlen - len;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2000-12-29 04:56:06 +01:00
|
|
|
/* mbstowcs for double-byte code page with character decomposition */
|
|
|
|
static int mbstowcs_dbcs_decompose( const struct dbcs_table *table,
|
|
|
|
const unsigned char *src, unsigned int srclen,
|
|
|
|
WCHAR *dst, unsigned int dstlen )
|
|
|
|
{
|
|
|
|
const WCHAR * const cp2uni = table->cp2uni;
|
|
|
|
const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
|
2020-02-04 10:52:04 +01:00
|
|
|
const WCHAR *decomp;
|
|
|
|
unsigned int len, decomp_len;
|
2000-12-29 04:56:06 +01:00
|
|
|
WCHAR ch;
|
|
|
|
|
|
|
|
if (!dstlen) /* compute length */
|
|
|
|
{
|
2020-02-04 10:52:04 +01:00
|
|
|
for (len = 0; srclen; srclen--, src++, len += decomp_len)
|
2000-12-29 04:56:06 +01:00
|
|
|
{
|
|
|
|
unsigned char off = cp2uni_lb[*src];
|
2016-05-15 14:08:47 +02:00
|
|
|
if (off && srclen > 1 && src[1])
|
2000-12-29 04:56:06 +01:00
|
|
|
{
|
|
|
|
src++;
|
2016-02-09 05:44:26 +01:00
|
|
|
srclen--;
|
2000-12-29 04:56:06 +01:00
|
|
|
ch = cp2uni[(off << 8) + *src];
|
|
|
|
}
|
|
|
|
else ch = cp2uni[*src];
|
2020-02-04 10:52:04 +01:00
|
|
|
get_decomposition( ch, &decomp_len );
|
2000-12-29 04:56:06 +01:00
|
|
|
}
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
2020-02-04 10:52:04 +01:00
|
|
|
for (len = dstlen; srclen && len; srclen--, src++, dst += decomp_len, len -= decomp_len)
|
2000-12-29 04:56:06 +01:00
|
|
|
{
|
|
|
|
unsigned char off = cp2uni_lb[*src];
|
2016-05-15 14:08:47 +02:00
|
|
|
if (off && srclen > 1 && src[1])
|
2000-12-29 04:56:06 +01:00
|
|
|
{
|
|
|
|
src++;
|
2016-02-09 05:44:26 +01:00
|
|
|
srclen--;
|
2000-12-29 04:56:06 +01:00
|
|
|
ch = cp2uni[(off << 8) + *src];
|
|
|
|
}
|
|
|
|
else ch = cp2uni[*src];
|
2020-02-04 10:52:04 +01:00
|
|
|
|
|
|
|
if ((decomp = get_decomposition( ch, &decomp_len )))
|
|
|
|
{
|
|
|
|
if (len < decomp_len) break;
|
|
|
|
memcpy( dst, decomp, decomp_len * sizeof(WCHAR) );
|
|
|
|
}
|
|
|
|
else *dst = ch;
|
2000-12-29 04:56:06 +01:00
|
|
|
}
|
|
|
|
if (srclen) return -1; /* overflow */
|
|
|
|
return dstlen - len;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2000-06-09 07:27:21 +02:00
|
|
|
/* return -1 on dst buffer overflow, -2 on invalid input char */
|
2003-03-21 22:30:51 +01:00
|
|
|
int wine_cp_mbstowcs( const union cptable *table, int flags,
|
2005-08-01 11:21:03 +02:00
|
|
|
const char *s, int srclen,
|
2003-03-21 22:30:51 +01:00
|
|
|
WCHAR *dst, int dstlen )
|
2000-06-09 07:27:21 +02:00
|
|
|
{
|
2005-08-01 11:21:03 +02:00
|
|
|
const unsigned char *src = (const unsigned char*) s;
|
|
|
|
|
2000-06-09 07:27:21 +02:00
|
|
|
if (table->info.char_size == 1)
|
|
|
|
{
|
|
|
|
if (flags & MB_ERR_INVALID_CHARS)
|
|
|
|
{
|
2007-05-29 23:31:14 +02:00
|
|
|
if (check_invalid_chars_sbcs( &table->sbcs, flags, src, srclen )) return -2;
|
2000-06-09 07:27:21 +02:00
|
|
|
}
|
2000-12-29 04:56:06 +01:00
|
|
|
if (!(flags & MB_COMPOSITE))
|
|
|
|
{
|
|
|
|
if (!dstlen) return srclen;
|
2007-05-29 23:31:14 +02:00
|
|
|
return mbstowcs_sbcs( &table->sbcs, flags, src, srclen, dst, dstlen );
|
2000-12-29 04:56:06 +01:00
|
|
|
}
|
2007-05-29 23:31:14 +02:00
|
|
|
return mbstowcs_sbcs_decompose( &table->sbcs, flags, src, srclen, dst, dstlen );
|
2000-06-09 07:27:21 +02:00
|
|
|
}
|
|
|
|
else /* mbcs */
|
|
|
|
{
|
|
|
|
if (flags & MB_ERR_INVALID_CHARS)
|
|
|
|
{
|
2000-06-15 02:31:12 +02:00
|
|
|
if (check_invalid_chars_dbcs( &table->dbcs, src, srclen )) return -2;
|
2000-06-09 07:27:21 +02:00
|
|
|
}
|
2000-12-29 04:56:06 +01:00
|
|
|
if (!(flags & MB_COMPOSITE))
|
|
|
|
return mbstowcs_dbcs( &table->dbcs, src, srclen, dst, dstlen );
|
|
|
|
else
|
|
|
|
return mbstowcs_dbcs_decompose( &table->dbcs, src, srclen, dst, dstlen );
|
2000-06-09 07:27:21 +02:00
|
|
|
}
|
|
|
|
}
|