diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c index 097cd1fcb4d..397dd0eeb1f 100644 --- a/dlls/kernel32/tests/locale.c +++ b/dlls/kernel32/tests/locale.c @@ -4220,11 +4220,11 @@ static void test_GetCPInfo(void) case NormalizationKC: case NormalizationKD: case 13: /* IDN */ - todo_wine ok( !status, "%u: failed %x\n", i, status ); + ok( !status, "%u: failed %x\n", i, status ); if (status) break; ok( size > 0x8000 && size <= 0x30000 , "wrong size %lx\n", size ); ret = UnmapViewOfFile( ptr ); - ok( ret, "UnmapViewOfFile failed err %u\n", GetLastError() ); + todo_wine ok( ret, "UnmapViewOfFile failed err %u\n", GetLastError() ); break; default: ok( status == STATUS_OBJECT_NAME_NOT_FOUND, "%u: failed %x\n", i, status ); diff --git a/loader/wine.inf.in b/loader/wine.inf.in index 3a73f1fd046..f83309a9514 100644 --- a/loader/wine.inf.in +++ b/loader/wine.inf.in @@ -3948,6 +3948,11 @@ c_936.nls c_949.nls c_950.nls l_intl.nls +normidna.nls +normnfc.nls +normnfd.nls +normnfkc.nls +normnfkd.nls [WineSourceDirs] NlsFiles=nls diff --git a/nls/Makefile.in b/nls/Makefile.in index aa20e9bffbd..f3e517a48ef 100644 --- a/nls/Makefile.in +++ b/nls/Makefile.in @@ -64,4 +64,9 @@ SOURCES = \ c_936.nls \ c_949.nls \ c_950.nls \ - l_intl.nls + l_intl.nls \ + normidna.nls \ + normnfc.nls \ + normnfd.nls \ + normnfkc.nls \ + normnfkd.nls diff --git a/nls/normidna.nls b/nls/normidna.nls new file mode 100644 index 00000000000..f6f4f551c04 Binary files /dev/null and b/nls/normidna.nls differ diff --git a/nls/normnfc.nls b/nls/normnfc.nls new file mode 100644 index 00000000000..c3c9fb09f45 Binary files /dev/null and b/nls/normnfc.nls differ diff --git a/nls/normnfd.nls b/nls/normnfd.nls new file mode 100644 index 00000000000..013980e02f1 Binary files /dev/null and b/nls/normnfd.nls differ diff --git a/nls/normnfkc.nls b/nls/normnfkc.nls new file mode 100644 index 00000000000..dc44b8539d1 Binary files /dev/null and b/nls/normnfkc.nls differ diff --git a/nls/normnfkd.nls b/nls/normnfkd.nls new file mode 100644 index 00000000000..0dd33edf8ab Binary files /dev/null and b/nls/normnfkd.nls differ diff --git a/tools/make_unicode b/tools/make_unicode index 7b6fad57026..e5a35188331 100755 --- a/tools/make_unicode +++ b/tools/make_unicode @@ -22,8 +22,10 @@ use strict; # base URLs for www.unicode.org files +my $UNIVERSION = "12.1.0"; my $MAPPINGS = "http://www.unicode.org/Public/MAPPINGS"; -my $UNIDATA = "http://www.unicode.org/Public/12.1.0/ucd/UCD.zip"; +my $UNIDATA = "http://www.unicode.org/Public/$UNIVERSION/ucd/UCD.zip"; +my $IDNADATA = "https://www.unicode.org/Public/idna/$UNIVERSION"; my $REPORTS = "http://www.unicode.org/reports"; my $RFCS = "http://www.rfc-editor.org/rfc"; my $MSDATA = "https://download.microsoft.com/download/C/F/7/CF713A5E-9FBC-4FD6-9246-275F65C0E498"; @@ -408,6 +410,8 @@ my @decomp_table = (); my @combining_class_table = (); my @decomp_compat_table = (); my @comp_exclusions = (); +my @idna_decomp_table = (); +my @idna_disallowed = (); my $default_char; my $default_wchar; @@ -494,8 +498,13 @@ sub get_composition($$) return () if $comp_exclusions[$ch]; # composition exclusion return () if $combining_class_table[$ch]; # non-starter return () if $combining_class_table[$ret[0]]; # first char is non-starter - return () if $compat && !defined $decomp_table[$ret[0]] && - defined $decomp_compat_table[$ret[0]]; # first char has compat decomposition + return () if $compat == 1 && !defined $decomp_table[$ret[0]] && + defined $decomp_compat_table[$ret[0]]; # first char has compat decomposition + return () if $compat == 2 && !defined $decomp_table[$ret[0]] && + defined $idna_decomp_table[$ret[0]]; # first char has IDNA decomposition + return () if $compat == 2 && defined $idna_decomp_table[$ret[0]] && + defined $idna_decomp_table[$idna_decomp_table[$ret[0]]->[0]]; # first char's decomposition has IDNA decomposition + return () if $compat == 2 && defined $idna_decomp_table[$ret[1]]; # second char has IDNA decomposition return @ret; } @@ -515,6 +524,44 @@ sub build_decompositions(@) return @dst; } +################################################################ +# compose Hangul sequences +sub compose_hangul(@) +{ + my $SBASE = 0xac00; + my $LBASE = 0x1100; + my $VBASE = 0x1161; + my $TBASE = 0x11a7; + my $LCOUNT = 19; + my $VCOUNT = 21; + my $TCOUNT = 28; + my $NCOUNT = $VCOUNT * $TCOUNT; + my $SCOUNT = $LCOUNT * $NCOUNT; + + my @seq = @_; + my @ret; + my $i; + + for ($i = 0; $i < @seq; $i++) + { + my $ch = $seq[$i]; + if ($ch >= $LBASE && $ch < $LBASE + $LCOUNT && $i < @seq - 1 && + $seq[$i+1] >= $VBASE && $seq[$i+1] < $VBASE + $VCOUNT) + { + $ch = $SBASE + (($seq[$i] - $LBASE) * $VCOUNT + ($seq[$i+1] - $VBASE)) * $TCOUNT; + $i++; + } + if ($ch >= $SBASE && $ch < $SBASE + $SCOUNT && !(($ch - $SBASE) % $TCOUNT) && $i < @seq - 1 && + $seq[$i+1] > $TBASE && $seq[$i+1] < $TBASE + $TCOUNT) + { + $ch += $seq[$i+1] - $TBASE; + $i++; + } + push @ret, $ch; + } + return @ret; +} + ################################################################ # read in the Unicode database files sub load_data() @@ -556,10 +603,7 @@ sub load_data() { $digitmap_table[$src] = ord $dig; } - if ($comb ne "") - { - $combining_class_table[$src] = $comb; - } + $combining_class_table[$src] = ($cat ne "Co") ? $comb : 0x100; # Private Use # copy the category and direction for everything between First/Last pairs if ($name =~ /, First>/) { $start = $src; } @@ -569,6 +613,7 @@ sub load_data() { $category_table[$start] = $category_table[$src]; $direction_table[$start] = $direction_table[$src]; + $combining_class_table[$start] = $combining_class_table[$src]; $start++; } } @@ -667,6 +712,50 @@ sub load_data() } } close $EXCL; + + # load the IDNA mappings + + @idna_decomp_table = @decomp_compat_table; + my $IDNA = open_data_file( $IDNADATA, "IdnaMappingTable.txt" ); + while (<$IDNA>) + { + s/\#.*//; # remove comments + next if /^\s*$/; + my ($char, $type, $mapping) = split /;/; + my ($ch1, $ch2); + if ($char =~ /([0-9a-fA-F]+)\.\.([0-9a-fA-F]+)/) + { + $ch1 = hex $1; + $ch2 = hex $2; + } + elsif ($char =~ /([0-9a-fA-F]+)/) + { + $ch1 = $ch2 = hex $1; + } + + if ($type =~ /mapped/ || $type =~ /deviation/) + { + $mapping =~ s/^\s*(([0-9a-fA-F]+\s+)+)\s*$/$1/; + my @seq = map { hex $_; } split /\s+/, $mapping; + foreach my $i ($ch1 .. $ch2) { $idna_decomp_table[$i] = @seq ? \@seq : [ 0 ]; } + } + elsif ($type =~ /valid/) + { + } + elsif ($type =~ /ignored/) + { + foreach my $i ($ch1 .. $ch2) { $idna_decomp_table[$i] = [ 0 ]; } + } + elsif ($type =~ /disallowed/) + { + foreach my $i ($ch1 .. $ch2) + { + $idna_decomp_table[$i] = undef; + $idna_disallowed[$i] = 1; + } + } + } + close $IDNA; } @@ -2190,6 +2279,274 @@ sub dump_decompose_table($$) save_file($filename); } +sub rol($$) +{ + my ($byte, $count) = @_; + return (($byte << $count) | ($byte >> (8 - $count))) & 0xff; +} + +################################################################ +# compress the character properties table +sub compress_char_props_table($@) +{ + my $rows = shift; + my @table = @_; + my $len = @table / $rows; + my $pos = 0; + my @array = (0) x $rows; + my %sequences; + + # add some predefined sequences + foreach my $i (0, 0xfb .. 0xff) { $sequences{pack "L*", (rol($i,5)) x $len} = $i; } + + # try to merge table rows + for (my $row = 0; $row < $rows; $row++) + { + my @table_row = map { defined $_ ? $_ : 0x7f; } @table[($row * $len)..(($row + 1) * $len - 1)]; + my $rowtxt = pack "L*", @table_row; + if (defined($sequences{$rowtxt})) + { + # reuse an existing row + $array[$row] = $sequences{$rowtxt}; + } + else + { + # create a new row + $sequences{$rowtxt} = $array[$row] = ++$pos; + push @array, @table_row; + } + } + return @array; +} + +################################################################ +# dump a normalization table in binary format +sub dump_norm_table($) +{ + my $filename = shift; + + my %forms = ( "nfc" => 1, "nfd" => 2, "nfkc" => 5, "nfkd" => 6, "idna" => 13 ); + my %decomp = ( "nfc" => \@decomp_table, + "nfd" => \@decomp_table, + "nfkc" => \@decomp_compat_table, + "nfkd" => \@decomp_compat_table , + "idna" => \@idna_decomp_table ); + + open OUTPUT,">$filename.new" or die "Cannot create $filename"; + print "Building $filename\n"; + + my $type = $filename; + $type =~ s!.*/norm(\w+)\.nls!$1!; + + my $compose = $forms{$type} & 1; + my $compat = !!($forms{$type} & 4) + ($type eq "idna"); + + my @version = split /\./, $UNIVERSION; + + # combining classes + + my @classes; + my @class_values; + + foreach my $c (grep defined, @combining_class_table) + { + $classes[$c] = 1 if $c < 0x100; + } + for (my $i = 0; $i < @classes; $i++) + { + next unless defined $classes[$i]; + $classes[$i] = @class_values; + push @class_values, $i; + } + push @class_values, 0 if (@class_values % 2); + die "too many classes" if @class_values >= 0x40; + + # character properties + + my @char_props; + my @decomposed; + my @comp_hash_table; + my $comp_hash_size = $compose ? 254 : 0; + + for (my $i = 0; $i <= $MAX_CHAR; $i++) + { + next unless defined $combining_class_table[$i]; + if (defined $decomp{$type}->[$i]) + { + my @dec = get_decomposition( $i, $decomp{$type} ); + if ($compose && (my @comp = get_composition( $i, $compat ))) + { + my $hash = ($comp[0] + 95 * $comp[1]) % $comp_hash_size; + push @{$comp_hash_table[$hash]}, to_utf16( @comp, $i ); + + my $val = 0; + foreach my $d (@dec) + { + $val = $combining_class_table[$d]; + last if $val; + } + $char_props[$i] = $classes[$val]; + } + else + { + $char_props[$i] = 0xbf; + } + @dec = compose_hangul( @dec ) if $compose; + @dec = to_utf16( @dec ); + push @dec, 0 if @dec >= 7; + $decomposed[$i] = \@dec; + } + else + { + if ($combining_class_table[$i] == 0x100) + { + $char_props[$i] = 0x7f; + } + elsif ($combining_class_table[$i]) + { + $char_props[$i] = $classes[$combining_class_table[$i]] | 0x80; + } + elsif ($type eq "idna" && defined $idna_disallowed[$i]) + { + $char_props[$i] = 0xff; + } + else + { + $char_props[$i] = 0; + } + } + } + + if ($compose) + { + for (my $i = 0; $i <= $MAX_CHAR; $i++) + { + my @comp = get_composition( $i, $compat ); + next unless @comp; + if ($combining_class_table[$comp[1]]) + { + $char_props[$comp[0]] |= 0x40 unless $char_props[$comp[0]] & 0x80; + $char_props[$comp[1]] |= 0x40; + } + else + { + $char_props[$comp[0]] = ($char_props[$comp[0]] & ~0x40) | 0x80; + $char_props[$comp[1]] |= 0xc0; + } + } + } + + # surrogates + foreach my $i (0xd800..0xdbff) { $char_props[$i] = 0xdf; } + foreach my $i (0xdc00..0xdfff) { $char_props[$i] = 0x9f; } + + # Hangul + if ($type eq "nfc") { foreach my $i (0x1100..0x117f) { $char_props[$i] = 0xff; } } + elsif ($compose) { foreach my $i (0x1100..0x11ff) { $char_props[$i] = 0xff; } } + foreach my $i (0xac00..0xd7ff) { $char_props[$i] = 0xff; } + + # invalid chars + if ($type eq "idna") { foreach my $i (0x00..0x1f, 0x7f) { $char_props[$i] = 0xff; } } + foreach my $i (0xfdd0..0xfdef) { $char_props[$i] = 0xff; } + foreach my $i (0x00..0x10) + { + $char_props[($i << 16) | 0xfffe] = 0xff; + $char_props[($i << 16) | 0xffff] = 0xff; + } + + # decomposition hash table + + my @decomp_hash_table; + my @decomp_hash_index; + my @decomp_hash_data; + my $decomp_hash_size = 944; + + # build string of character data, reusing substrings when possible + my $decomp_char_data = ""; + foreach my $i (sort { @{$b} <=> @{$a} } grep defined, @decomposed) + { + my $str = pack "U*", @{$i}; + $decomp_char_data .= $str if index( $decomp_char_data, $str) == -1; + } + for (my $i = 0; $i < @decomposed; $i++) + { + next unless defined $decomposed[$i]; + my $pos = index( $decomp_char_data, pack( "U*", @{$decomposed[$i]} )); + die "sequence not found" if $pos == -1; + my $len = @{$decomposed[$i]}; + $len = 7 if $len > 7; + my $hash = $i % $decomp_hash_size; + push @{$decomp_hash_table[$hash]}, [ $i, ($len << 13) | $pos ]; + } + for (my $i = 0; $i < $decomp_hash_size; $i++) + { + $decomp_hash_index[$i] = @decomp_hash_data / 2; + next unless defined $decomp_hash_table[$i]; + if (@{$decomp_hash_table[$i]} == 1) + { + my $entry = $decomp_hash_table[$i]->[0]; + if ($char_props[$entry->[0]] == 0xbf) + { + $decomp_hash_index[$i] = $entry->[1]; + next; + } + } + foreach my $entry (@{$decomp_hash_table[$i]}) + { + push @decomp_hash_data, $entry->[0] & 0xffff, $entry->[1]; + } + } + push @decomp_hash_data, 0, 0; + + # composition hash table + + my @comp_hash_index; + my @comp_hash_data; + if (@comp_hash_table) + { + for (my $i = 0; $i < $comp_hash_size; $i++) + { + $comp_hash_index[$i] = @comp_hash_data; + push @comp_hash_data, @{$comp_hash_table[$i]} if defined $comp_hash_table[$i]; + } + $comp_hash_index[$comp_hash_size] = @comp_hash_data; + push @comp_hash_data, 0, 0, 0; + } + + my $level1 = ($MAX_CHAR + 1) / 128; + my @rows = compress_char_props_table( $level1, @char_props[0..$MAX_CHAR] ); + + my @header = ( $version[0], $version[1], $version[2], 0, $forms{$type}, $compat ? 18 : 3, + 0, $decomp_hash_size, $comp_hash_size, 0 ); + my @tables = (0) x 8; + + $tables[0] = 16 + @header + @tables; + $tables[1] = $tables[0] + @class_values / 2; + $tables[2] = $tables[1] + $level1 / 2; + $tables[3] = $tables[2] + (@rows - $level1) / 2; + $tables[4] = $tables[3] + @decomp_hash_index; + $tables[5] = $tables[4] + @decomp_hash_data; + $tables[6] = $tables[5] + length $decomp_char_data; + $tables[7] = $tables[6] + @comp_hash_index; + + print OUTPUT pack "S<16", unpack "U*", "norm$type.nlp"; + print OUTPUT pack "S<*", @header; + print OUTPUT pack "S<*", @tables; + print OUTPUT pack "C*", @class_values; + + print OUTPUT pack "C*", @rows[0..$level1-1]; + print OUTPUT pack "C*", @rows[$level1..$#rows]; + print OUTPUT pack "S<*", @decomp_hash_index; + print OUTPUT pack "S<*", @decomp_hash_data; + print OUTPUT pack "S<*", unpack "U*", $decomp_char_data; + print OUTPUT pack "S<*", @comp_hash_index; + print OUTPUT pack "S<*", @comp_hash_data; + + close OUTPUT; + save_file($filename); +} + + ################################################################ # dump the combining class table sub dump_combining_class($) @@ -2203,7 +2560,7 @@ sub dump_combining_class($) print OUTPUT "/* DO NOT EDIT!! */\n\n"; print OUTPUT "#include \"windef.h\"\n\n"; - dump_three_level_mapping( "combining_class_table", 0, 16, @combining_class_table ); + dump_three_level_mapping( "combining_class_table", 0, 16, map { defined $_ ? $_ & 0xff : 0; } @combining_class_table ); close OUTPUT; save_file($filename); } @@ -2395,6 +2752,11 @@ dump_vertical( "dlls/gdi32/vertical.c" ); dump_vertical( "dlls/wineps.drv/vertical.c" ); dump_nameprep( "dlls/kernel32/nameprep.c" ); dump_intl_nls("nls/l_intl.nls"); +dump_norm_table( "nls/normnfc.nls" ); +dump_norm_table( "nls/normnfd.nls" ); +dump_norm_table( "nls/normnfkc.nls" ); +dump_norm_table( "nls/normnfkd.nls" ); +dump_norm_table( "nls/normidna.nls" ); foreach my $file (@allfiles) { dump_msdata_codepage( $file ); } dump_eucjp_codepage();