From e4a9e2ff2e65a37333d1dd4f132a062d3380bce2 Mon Sep 17 00:00:00 2001 From: Huw Davies Date: Fri, 8 Feb 2019 10:41:23 +0000 Subject: [PATCH] kernel32: Initial support for canonical decomposition in NormalizeString(). Signed-off-by: Huw Davies Signed-off-by: Alexandre Julliard --- dlls/kernel32/locale.c | 56 ++++++++++++++++++++++++++++++++---- dlls/kernel32/tests/locale.c | 42 +++++++++++++-------------- include/wine/unicode.h | 4 +++ libs/port/Makefile.in | 1 + libs/port/decompose.c | 4 +-- libs/port/mbtowc.c | 10 +++---- libs/port/normalize.c | 38 ++++++++++++++++++++++++ libs/wine/sortkey.c | 2 +- tools/make_unicode | 4 +-- 9 files changed, 124 insertions(+), 37 deletions(-) create mode 100644 libs/port/normalize.c diff --git a/dlls/kernel32/locale.c b/dlls/kernel32/locale.c index e62b849d091..a4e5ef5a381 100644 --- a/dlls/kernel32/locale.c +++ b/dlls/kernel32/locale.c @@ -48,6 +48,7 @@ #include "winerror.h" #include "winver.h" #include "kernel_private.h" +#include "wine/heap.h" #include "wine/debug.h" WINE_DEFAULT_DEBUG_CHANNEL(nls); @@ -5361,12 +5362,57 @@ INT WINAPI GetUserDefaultLocaleName(LPWSTR localename, int buffersize) /****************************************************************************** * NormalizeString (KERNEL32.@) */ -INT WINAPI NormalizeString(NORM_FORM NormForm, LPCWSTR lpSrcString, INT cwSrcLength, - LPWSTR lpDstString, INT cwDstLength) +INT WINAPI NormalizeString(NORM_FORM form, const WCHAR *src, INT src_len, WCHAR *dst, INT dst_len) { - FIXME("%x %p %d %p %d\n", NormForm, lpSrcString, cwSrcLength, lpDstString, cwDstLength); - SetLastError(ERROR_CALL_NOT_IMPLEMENTED); - return 0; + int flags = 0, compose = 0; + unsigned int res, buf_len; + WCHAR *buf = NULL; + + TRACE("%x %s %d %p %d\n", form, debugstr_wn(src, src_len), src_len, dst, dst_len); + + if (src_len == -1) src_len = strlenW(src) + 1; + + if (form == NormalizationKC || form == NormalizationKD) flags |= WINE_DECOMPOSE_COMPAT; + if (form == NormalizationC || form == NormalizationKC) compose = 1; + + if (!compose && dst_len) + { + res = wine_decompose_string( flags, src, src_len, dst, dst_len ); + if (!res) + { + SetLastError( ERROR_INSUFFICIENT_BUFFER ); + goto done; + } + buf = dst; + } + else + { + buf_len = src_len * 4; + do + { + WCHAR *old_buf = buf; + + buf = heap_realloc( buf, buf_len ); + if (!buf) + { + heap_free( old_buf ); + SetLastError( ERROR_OUTOFMEMORY ); + return 0; + } + res = wine_decompose_string( flags, src, src_len, buf, buf_len ); + buf_len *= 2; + } while (!res); + } + + if (compose) + { + FIXME("Composing not yet implemented\n"); + res = 0; + } + +done: + if (buf != dst) heap_free( buf ); + return res; } /****************************************************************************** diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c index d93fdb224e3..0fa6aeb7b45 100644 --- a/dlls/kernel32/tests/locale.c +++ b/dlls/kernel32/tests/locale.c @@ -5692,27 +5692,27 @@ static void test_NormalizeString(void) }; static const struct test_data_normal test_arr[] = { - { part0_str1, { part0_str1, part0_nfd1, part0_str1, part0_nfd1 }, { 1, 1, 1, 1 } }, - { part0_str2, { part0_nfc2, part0_str2, part0_nfc2, part0_str2 }, { 1, 1, 1, 1 } }, - { part0_str3, { part0_nfc3, part0_str3, part0_nfc3, part0_str3 }, { 1, 1, 1, 1 } }, - { part0_str4, { part0_nfc4, part0_str4, part0_nfc4, part0_str4 }, { 1, 1, 1, 1 } }, + { part0_str1, { part0_str1, part0_nfd1, part0_str1, part0_nfd1 }, { 1, 0, 1, 0 } }, + { part0_str2, { part0_nfc2, part0_str2, part0_nfc2, part0_str2 }, { 1, 0, 1, 0 } }, + { part0_str3, { part0_nfc3, part0_str3, part0_nfc3, part0_str3 }, { 1, 0, 1, 0 } }, + { part0_str4, { part0_nfc4, part0_str4, part0_nfc4, part0_str4 }, { 1, 0, 1, 0 } }, { part0_str5, { part0_nfc5, part0_nfc5, part0_nfc5, part0_nfc5 }, { 1, 1, 1, 1 } }, { part0_str6, { part0_nfc6, part0_nfc6, part0_nfc6, part0_nfc6 }, { 1, 1, 1, 1 } }, - { part0_str8, { part0_str8, part0_nfd8, part0_str8, part0_nfd8 }, { 1, 1, 1, 1 } }, + { part0_str8, { part0_str8, part0_nfd8, part0_str8, part0_nfd8 }, { 1, 0, 1, 0 } }, { part0_str9, { part0_nfc9, part0_nfd9, part0_nfc9, part0_nfd9 }, { 1, 1, 1, 1 } }, - { part0_str10, { part0_str10, part0_nfd10, part0_str10, part0_nfd10 }, { 1, 1, 1, 1 } }, - { part0_str11, { part0_str11, part0_nfd11, part0_str11, part0_nfd11 }, { 1, 1, 1, 1 } }, - { part0_str12, { part0_nfc12, part0_nfd12, part0_nfc12, part0_nfd12 }, { 1, 1, 1, 1 } }, - { part1_str1, { part1_str1, part1_str1, part1_nfkc1, part1_nfkc1 }, { 1, 1, 1, 1 } }, - { part1_str2, { part1_str2, part1_str2, part1_nfkc2, part1_nfkc2 }, { 1, 1, 1, 1 } }, - { part1_str3, { part1_str3, part1_nfd3, part1_str3, part1_nfd3 }, { 1, 1, 1, 1 } }, - { part1_str4, { part1_str4, part1_str4, part1_nfkc4, part1_nfkc4 }, { 1, 1, 1, 1 } }, - { part1_str5, { part1_str5, part1_nfd5, part1_str5, part1_nfd5 }, { 1, 1, 1, 1 } }, - { part1_str6, { part1_str6, part1_nfd6, part1_str6, part1_nfd6 }, { 1, 1, 1, 1 } }, - { part1_str7, { part1_str7, part1_str7, part1_nfkc7, part1_nfkc7 }, { 1, 1, 1, 1 } }, - { part1_str8, { part1_str8, part1_nfd8, part1_str8, part1_nfd8 }, { 1, 1, 1, 1 } }, - { part1_str9, { part1_str9, part1_str9, part1_nfkc9, part1_nfkc9 }, { 1, 1, 1, 1 } }, - { part1_str10, { part1_str10, part1_str10, part1_nfkc10, part1_nfkc10 }, { 1, 1, 1, 1 } }, + { part0_str10, { part0_str10, part0_nfd10, part0_str10, part0_nfd10 }, { 1, 0, 1, 0 } }, + { part0_str11, { part0_str11, part0_nfd11, part0_str11, part0_nfd11 }, { 1, 0, 1, 0 } }, + { part0_str12, { part0_nfc12, part0_nfd12, part0_nfc12, part0_nfd12 }, { 1, 0, 1, 0 } }, + { part1_str1, { part1_str1, part1_str1, part1_nfkc1, part1_nfkc1 }, { 1, 0, 1, 1 } }, + { part1_str2, { part1_str2, part1_str2, part1_nfkc2, part1_nfkc2 }, { 1, 0, 1, 1 } }, + { part1_str3, { part1_str3, part1_nfd3, part1_str3, part1_nfd3 }, { 1, 0, 1, 0 } }, + { part1_str4, { part1_str4, part1_str4, part1_nfkc4, part1_nfkc4 }, { 1, 0, 1, 1 } }, + { part1_str5, { part1_str5, part1_nfd5, part1_str5, part1_nfd5 }, { 1, 0, 1, 0 } }, + { part1_str6, { part1_str6, part1_nfd6, part1_str6, part1_nfd6 }, { 1, 0, 1, 0 } }, + { part1_str7, { part1_str7, part1_str7, part1_nfkc7, part1_nfkc7 }, { 1, 0, 1, 1 } }, + { part1_str8, { part1_str8, part1_nfd8, part1_str8, part1_nfd8 }, { 1, 0, 1, 0 } }, + { part1_str9, { part1_str9, part1_str9, part1_nfkc9, part1_nfkc9 }, { 1, 0, 1, 1 } }, + { part1_str10, { part1_str10, part1_str10, part1_nfkc10, part1_nfkc10 }, { 1, 0, 1, 1 } }, { 0 } }; const struct test_data_normal *ptest = test_arr; @@ -5726,10 +5726,8 @@ static void test_NormalizeString(void) return; } - todo_wine { - dstlen = pNormalizeString( NormalizationD, ptest->str, -1, dst, 1 ); - ok(GetLastError() == ERROR_INSUFFICIENT_BUFFER, "Should have failed with ERROR_INSUFFICIENT_BUFFER\n"); - } + dstlen = pNormalizeString( NormalizationD, ptest->str, -1, dst, 1 ); + ok(GetLastError() == ERROR_INSUFFICIENT_BUFFER, "Should have failed with ERROR_INSUFFICIENT_BUFFER\n"); /* * For each string, first test passing -1 as srclen to NormalizeString, diff --git a/include/wine/unicode.h b/include/wine/unicode.h index 35c61666d22..2c21cdfe482 100644 --- a/include/wine/unicode.h +++ b/include/wine/unicode.h @@ -101,6 +101,10 @@ extern int wine_compare_string( int flags, const WCHAR *str1, int len1, const WC extern int wine_get_sortkey( int flags, const WCHAR *src, int srclen, char *dst, int dstlen ); extern int wine_fold_string( int flags, const WCHAR *src, int srclen , WCHAR *dst, int dstlen ); +extern unsigned int wine_decompose_string( int flags, const WCHAR *src, unsigned int srclen, WCHAR *dst, unsigned int dstlen ); +#define WINE_DECOMPOSE_COMPAT 1 +#define WINE_DECOMPOSE_REORDER 2 + extern int strcmpiW( const WCHAR *str1, const WCHAR *str2 ); extern int strncmpiW( const WCHAR *str1, const WCHAR *str2, int n ); extern int memicmpW( const WCHAR *str1, const WCHAR *str2, int n ); diff --git a/libs/port/Makefile.in b/libs/port/Makefile.in index c87b99de6d0..1c753fd893f 100644 --- a/libs/port/Makefile.in +++ b/libs/port/Makefile.in @@ -92,6 +92,7 @@ C_SRCS = \ memcpy_unaligned.c \ memmove.c \ mkstemps.c \ + normalize.c \ poll.c \ pread.c \ pwrite.c \ diff --git a/libs/port/decompose.c b/libs/port/decompose.c index 5e6b2214ec8..c7568a01eaa 100644 --- a/libs/port/decompose.c +++ b/libs/port/decompose.c @@ -747,7 +747,7 @@ static const WCHAR table[4704] = 0x05d1, 0x05bf, 0x05db, 0x05bf, 0x05e4, 0x05bf, 0x0000, 0x0000 }; -unsigned int DECLSPEC_HIDDEN wine_decompose( WCHAR ch, WCHAR *dst, unsigned int dstlen ) +unsigned int DECLSPEC_HIDDEN wine_decompose( int flags, WCHAR ch, WCHAR *dst, unsigned int dstlen ) { const WCHAR *ptr = table + table[table[ch >> 8] + ((ch >> 4) & 0x0f)] + 2 * (ch & 0xf); unsigned int res; @@ -756,6 +756,6 @@ unsigned int DECLSPEC_HIDDEN wine_decompose( WCHAR ch, WCHAR *dst, unsigned int if (!*ptr) return 1; if (dstlen <= 1) return 0; /* apply the decomposition recursively to the first char */ - if ((res = wine_decompose( *ptr, dst, dstlen-1 ))) dst[res++] = ptr[1]; + if ((res = wine_decompose( flags, *ptr, dst, dstlen-1 ))) dst[res++] = ptr[1]; return res; } diff --git a/libs/port/mbtowc.c b/libs/port/mbtowc.c index f5d0059e154..471b87be6c0 100644 --- a/libs/port/mbtowc.c +++ b/libs/port/mbtowc.c @@ -22,7 +22,7 @@ #include "wine/unicode.h" -extern unsigned int wine_decompose( WCHAR ch, WCHAR *dst, unsigned int dstlen ) DECLSPEC_HIDDEN; +extern unsigned int wine_decompose( int flags, WCHAR ch, WCHAR *dst, unsigned int dstlen ) DECLSPEC_HIDDEN; /* check the code whether it is in Unicode Private Use Area (PUA). */ /* MB_ERR_INVALID_CHARS raises an error converting from 1-byte character to PUA. */ @@ -125,13 +125,13 @@ static int mbstowcs_sbcs_decompose( const struct sbcs_table *table, int flags, { WCHAR dummy[4]; /* no decomposition is larger than 4 chars */ for (len = 0; srclen; srclen--, src++) - len += wine_decompose( cp2uni[*src], dummy, 4 ); + len += wine_decompose( 0, cp2uni[*src], dummy, 4 ); return len; } for (len = dstlen; srclen && len; srclen--, src++) { - unsigned int res = wine_decompose( cp2uni[*src], dst, len ); + unsigned int res = wine_decompose( 0, cp2uni[*src], dst, len ); if (!res) break; len -= res; dst += res; @@ -237,7 +237,7 @@ static int mbstowcs_dbcs_decompose( const struct dbcs_table *table, ch = cp2uni[(off << 8) + *src]; } else ch = cp2uni[*src]; - len += wine_decompose( ch, dummy, 4 ); + len += wine_decompose( 0, ch, dummy, 4 ); } return len; } @@ -252,7 +252,7 @@ static int mbstowcs_dbcs_decompose( const struct dbcs_table *table, ch = cp2uni[(off << 8) + *src]; } else ch = cp2uni[*src]; - if (!(res = wine_decompose( ch, dst, len ))) break; + if (!(res = wine_decompose( 0, ch, dst, len ))) break; dst += res; len -= res; } diff --git a/libs/port/normalize.c b/libs/port/normalize.c new file mode 100644 index 00000000000..ba6e39fe0e4 --- /dev/null +++ b/libs/port/normalize.c @@ -0,0 +1,38 @@ +/* + * Unicode normalization functions + * + * Copyright 2019 Huw Davies + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "wine/unicode.h" + +extern unsigned int wine_decompose( int flags, WCHAR ch, WCHAR *dst, unsigned int dstlen ) DECLSPEC_HIDDEN; + +unsigned int wine_decompose_string( int flags, const WCHAR *src, unsigned int src_len, + WCHAR *dst, unsigned int dst_len ) +{ + unsigned int src_pos, dst_pos = 0, decomp_len; + + for (src_pos = 0; src_pos < src_len; src_pos++) + { + if (dst_pos == dst_len) return 0; + decomp_len = wine_decompose( flags, src[src_pos], dst + dst_pos, dst_len - dst_pos ); + if (decomp_len == 0) return 0; + dst_pos += decomp_len; + } + return dst_pos; +} diff --git a/libs/wine/sortkey.c b/libs/wine/sortkey.c index 634e910d4c3..ef4488908b6 100644 --- a/libs/wine/sortkey.c +++ b/libs/wine/sortkey.c @@ -19,7 +19,7 @@ */ #include "wine/unicode.h" -extern unsigned int wine_decompose( WCHAR ch, WCHAR *dst, unsigned int dstlen ); +extern unsigned int wine_decompose( int flags, WCHAR ch, WCHAR *dst, unsigned int dstlen ); extern const unsigned int collation_table[]; /* diff --git a/tools/make_unicode b/tools/make_unicode index 56d19056567..1533c46cea8 100755 --- a/tools/make_unicode +++ b/tools/make_unicode @@ -2353,7 +2353,7 @@ sub dump_decompose_table($) printf OUTPUT "\n};\n\n"; print OUTPUT <<"EOF"; -unsigned int DECLSPEC_HIDDEN wine_decompose( WCHAR ch, WCHAR *dst, unsigned int dstlen ) +unsigned int DECLSPEC_HIDDEN wine_decompose( int flags, WCHAR ch, WCHAR *dst, unsigned int dstlen ) { const WCHAR *ptr = table + table[table[ch >> 8] + ((ch >> 4) & 0x0f)] + 2 * (ch & 0xf); unsigned int res; @@ -2362,7 +2362,7 @@ unsigned int DECLSPEC_HIDDEN wine_decompose( WCHAR ch, WCHAR *dst, unsigned int if (!*ptr) return 1; if (dstlen <= 1) return 0; /* apply the decomposition recursively to the first char */ - if ((res = wine_decompose( *ptr, dst, dstlen-1 ))) dst[res++] = ptr[1]; + if ((res = wine_decompose( flags, *ptr, dst, dstlen-1 ))) dst[res++] = ptr[1]; return res; } EOF