From cc300a20b3847d510f0b8aa54b08fb15ac9e6cb3 Mon Sep 17 00:00:00 2001 From: Alexandre Julliard Date: Mon, 3 Feb 2020 13:15:07 +0100 Subject: [PATCH] libport: Move Unicode normalization implementation to ntdll. Signed-off-by: Alexandre Julliard --- dlls/ntdll/locale.c | 139 ++++++++++++++++++++++++++++++++++++++-- include/wine/unicode.h | 2 - libs/port/Makefile.in | 1 - libs/port/normalize.c | 141 ----------------------------------------- 4 files changed, 134 insertions(+), 149 deletions(-) delete mode 100644 libs/port/normalize.c diff --git a/dlls/ntdll/locale.c b/dlls/ntdll/locale.c index b3717b33160..dfd48eb0f8b 100644 --- a/dlls/ntdll/locale.c +++ b/dlls/ntdll/locale.c @@ -82,6 +82,10 @@ static NLSTABLEINFO nls_info; static HMODULE kernel32_handle; static const union cptable *unix_table; /* NULL if UTF8 */ +extern WCHAR wine_compose( const WCHAR *str ) DECLSPEC_HIDDEN; +extern unsigned int wine_decompose( int flags, WCHAR ch, WCHAR *dst, unsigned int dstlen ) DECLSPEC_HIDDEN; +extern const unsigned short combining_class_table[] DECLSPEC_HIDDEN; + static NTSTATUS load_string( ULONG id, LANGID lang, WCHAR *buffer, ULONG len ) { const IMAGE_RESOURCE_DATA_ENTRY *data; @@ -149,6 +153,130 @@ static WCHAR casemap_ascii( WCHAR ch ) } +static BYTE get_combining_class( WCHAR c ) +{ + return combining_class_table[combining_class_table[combining_class_table[c >> 8] + ((c >> 4) & 0xf)] + (c & 0xf)]; +} + + +static BOOL is_starter( WCHAR c ) +{ + return !get_combining_class( c ); +} + + +static BOOL reorderable_pair( WCHAR c1, WCHAR c2 ) +{ + BYTE ccc1, ccc2; + + /* reorderable if ccc1 > ccc2 > 0 */ + ccc1 = get_combining_class( c1 ); + if (ccc1 < 2) return FALSE; + ccc2 = get_combining_class( c2 ); + return ccc2 && (ccc1 > ccc2); +} + + +static void canonical_order_substring( WCHAR *str, unsigned int len ) +{ + unsigned int i; + BOOL swapped; + + do + { + swapped = FALSE; + for (i = 0; i < len - 1; i++) + { + if (reorderable_pair( str[i], str[i + 1] )) + { + WCHAR tmp = str[i]; + str[i] = str[i + 1]; + str[i + 1] = tmp; + swapped = TRUE; + } + } + } while (swapped); +} + + +/**************************************************************************** + * canonical_order_string + * + * Reorder the string into canonical order - D108/D109. + * + * Starters (chars with combining class == 0) don't move, so look for continuous + * substrings of non-starters and only reorder those. + */ +static void canonical_order_string( WCHAR *str, unsigned int len ) +{ + unsigned int i, next = 0; + + for (i = 1; i <= len; i++) + { + if (i == len || is_starter( str[i] )) + { + if (i > next + 1) /* at least two successive non-starters */ + canonical_order_substring( str + next, i - next ); + next = i + 1; + } + } +} + + +static unsigned int decompose_string( int flags, const WCHAR *src, unsigned int src_len, + WCHAR *dst, unsigned int dst_len ) +{ + unsigned int src_pos, dst_pos = 0, decomp_len; + + for (src_pos = 0; src_pos < src_len; src_pos++) + { + if (dst_pos == dst_len) return 0; + decomp_len = wine_decompose( flags, src[src_pos], dst + dst_pos, dst_len - dst_pos ); + if (decomp_len == 0) return 0; + dst_pos += decomp_len; + } + + if (flags & WINE_DECOMPOSE_REORDER) canonical_order_string( dst, dst_pos ); + return dst_pos; +} + + +static BOOL is_blocked( WCHAR *starter, WCHAR *ptr ) +{ + if (ptr == starter + 1) return FALSE; + /* Because the string is already canonically ordered, the chars are blocked + only if the previous char's combining class is equal to the test char. */ + if (get_combining_class( *(ptr - 1) ) == get_combining_class( *ptr )) return TRUE; + return FALSE; +} + + +static unsigned int compose_string( WCHAR *str, unsigned int len ) +{ + unsigned int i, last_starter = len; + WCHAR pair[2], comp; + + for (i = 0; i < len; i++) + { + pair[1] = str[i]; + if (last_starter == len || is_blocked( str + last_starter, str + i ) || !(comp = wine_compose( pair ))) + { + if (is_starter( str[i] )) + { + last_starter = i; + pair[0] = str[i]; + } + continue; + } + str[last_starter] = pair[0] = comp; + len--; + memmove( str + i, str + i + 1, (len - i) * sizeof(WCHAR) ); + i = last_starter; + } + return len; +} + + static NTSTATUS open_nls_data_file( ULONG type, ULONG id, HANDLE *file ) { static const WCHAR pathfmtW[] = {'\\','?','?','\\','%','s','%','s',0}; @@ -646,7 +774,7 @@ int ntdll_umbstowcs( DWORD flags, const char *src, int srclen, WCHAR *dst, int d if (status && status != STATUS_SOME_NOT_MAPPED) return -1; reslen /= sizeof(WCHAR); #ifdef __APPLE__ /* work around broken Mac OS X filesystem that enforces decomposed Unicode */ - if (reslen && dst) RtlNormalizeString( NormalizationC, dst, reslen, dst, (int *)&reslen ); + if (reslen && dst) reslen = compose_string( dst, reslen ); #endif return reslen; } @@ -1574,7 +1702,7 @@ NTSTATUS WINAPI RtlNormalizeString( ULONG form, const WCHAR *src, INT src_len, W if (!compose && *dst_len) { - res = wine_decompose_string( flags, src, src_len, dst, *dst_len ); + res = decompose_string( flags, src, src_len, dst, *dst_len ); if (!res) { status = STATUS_BUFFER_TOO_SMALL; @@ -1582,23 +1710,24 @@ NTSTATUS WINAPI RtlNormalizeString( ULONG form, const WCHAR *src, INT src_len, W } buf = dst; } - else + else if (src_len) { buf_len = src_len * 4; for (;;) { buf = RtlAllocateHeap( GetProcessHeap(), 0, buf_len * sizeof(WCHAR) ); if (!buf) return STATUS_NO_MEMORY; - res = wine_decompose_string( flags, src, src_len, buf, buf_len ); + res = decompose_string( flags, src, src_len, buf, buf_len ); if (res) break; buf_len *= 2; RtlFreeHeap( GetProcessHeap(), 0, buf ); } } + else res = 0; if (compose) { - res = wine_compose_string( buf, res ); + res = compose_string( buf, res ); if (*dst_len >= res) memcpy( dst, buf, res * sizeof(WCHAR) ); } diff --git a/include/wine/unicode.h b/include/wine/unicode.h index ff6f6568a53..c9393062b73 100644 --- a/include/wine/unicode.h +++ b/include/wine/unicode.h @@ -97,8 +97,6 @@ extern int wine_compare_string( int flags, const WCHAR *str1, int len1, const WC extern int wine_get_sortkey( int flags, const WCHAR *src, int srclen, char *dst, int dstlen ); extern int wine_fold_string( int flags, const WCHAR *src, int srclen , WCHAR *dst, int dstlen ); -extern unsigned int wine_compose_string( WCHAR *str, unsigned int len ); -extern unsigned int wine_decompose_string( int flags, const WCHAR *src, unsigned int srclen, WCHAR *dst, unsigned int dstlen ); #define WINE_DECOMPOSE_COMPAT 1 #define WINE_DECOMPOSE_REORDER 2 diff --git a/libs/port/Makefile.in b/libs/port/Makefile.in index cb57be61b8b..f9e45035bd6 100644 --- a/libs/port/Makefile.in +++ b/libs/port/Makefile.in @@ -93,7 +93,6 @@ C_SRCS = \ lstat.c \ mbtowc.c \ mkstemps.c \ - normalize.c \ poll.c \ pread.c \ pwrite.c \ diff --git a/libs/port/normalize.c b/libs/port/normalize.c deleted file mode 100644 index 954a33cfdf1..00000000000 --- a/libs/port/normalize.c +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Unicode normalization functions - * - * Copyright 2019 Huw Davies - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA - */ - -#include "wine/unicode.h" - -extern WCHAR wine_compose( const WCHAR *str ) DECLSPEC_HIDDEN; -extern unsigned int wine_decompose( int flags, WCHAR ch, WCHAR *dst, unsigned int dstlen ) DECLSPEC_HIDDEN; -extern const unsigned short combining_class_table[] DECLSPEC_HIDDEN; - -static BYTE get_combining_class( WCHAR c ) -{ - return combining_class_table[combining_class_table[combining_class_table[c >> 8] + ((c >> 4) & 0xf)] + (c & 0xf)]; -} - -static BOOL is_starter( WCHAR c ) -{ - return !get_combining_class( c ); -} - -static BOOL reorderable_pair( WCHAR c1, WCHAR c2 ) -{ - BYTE ccc1, ccc2; - - /* reorderable if ccc1 > ccc2 > 0 */ - ccc1 = get_combining_class( c1 ); - if (ccc1 < 2) return FALSE; - ccc2 = get_combining_class( c2 ); - return ccc2 && (ccc1 > ccc2); -} - -static void canonical_order_substring( WCHAR *str, unsigned int len ) -{ - unsigned int i; - BOOL swapped; - - do - { - swapped = FALSE; - for (i = 0; i < len - 1; i++) - { - if (reorderable_pair( str[i], str[i + 1] )) - { - WCHAR tmp = str[i]; - str[i] = str[i + 1]; - str[i + 1] = tmp; - swapped = TRUE; - } - } - } while (swapped); -} - -/**************************************************************************** - * canonical_order_string - * - * Reorder the string into canonical order - D108/D109. - * - * Starters (chars with combining class == 0) don't move, so look for continuous - * substrings of non-starters and only reorder those. - */ -static void canonical_order_string( WCHAR *str, unsigned int len ) -{ - unsigned int i, next = 0; - - for (i = 1; i <= len; i++) - { - if (i == len || is_starter( str[i] )) - { - if (i > next + 1) /* at least two successive non-starters */ - canonical_order_substring( str + next, i - next ); - next = i + 1; - } - } -} - -unsigned int wine_decompose_string( int flags, const WCHAR *src, unsigned int src_len, - WCHAR *dst, unsigned int dst_len ) -{ - unsigned int src_pos, dst_pos = 0, decomp_len; - - for (src_pos = 0; src_pos < src_len; src_pos++) - { - if (dst_pos == dst_len) return 0; - decomp_len = wine_decompose( flags, src[src_pos], dst + dst_pos, dst_len - dst_pos ); - if (decomp_len == 0) return 0; - dst_pos += decomp_len; - } - - if (flags & WINE_DECOMPOSE_REORDER) canonical_order_string( dst, dst_pos ); - return dst_pos; -} - -static BOOL is_blocked( WCHAR *starter, WCHAR *ptr ) -{ - if (ptr == starter + 1) return FALSE; - /* Because the string is already canonically ordered, the chars are blocked - only if the previous char's combining class is equal to the test char. */ - if (get_combining_class( *(ptr - 1) ) == get_combining_class( *ptr )) return TRUE; - return FALSE; -} - -unsigned int wine_compose_string( WCHAR *str, unsigned int len ) -{ - unsigned int i, last_starter = len; - WCHAR pair[2], comp; - - for (i = 0; i < len; i++) - { - pair[1] = str[i]; - if (last_starter == len || is_blocked( str + last_starter, str + i ) || !(comp = wine_compose( pair ))) - { - if (is_starter( str[i] )) - { - last_starter = i; - pair[0] = str[i]; - } - continue; - } - str[last_starter] = pair[0] = comp; - len--; - memmove( str + i, str + i + 1, (len - i) * sizeof(WCHAR) ); - i = last_starter; - } - return len; -}