Added EUC-JP encoding as codepage 20932 (with the help of Aric
Stewart).
This commit is contained in:
parent
65f4436566
commit
141966b052
|
@ -54,6 +54,7 @@ CODEPAGES = \
|
|||
10079 \
|
||||
10081 \
|
||||
20866 \
|
||||
20932 \
|
||||
28591 \
|
||||
28592 \
|
||||
28593 \
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -79,6 +79,7 @@ $DEF_CHAR = ord '?';
|
|||
[ 10079, "VENDORS/MICSFT/MAC/ICELAND.TXT", "Mac Icelandic" ],
|
||||
[ 10081, "VENDORS/MICSFT/MAC/TURKISH.TXT", "Mac Turkish" ],
|
||||
[ 20866, "VENDORS/MISC/KOI8-R.TXT", "Russian KOI8" ],
|
||||
[ 20932, "OBSOLETE/EASTASIA/JIS/JIS0208.TXT", "EUC-JP" ],
|
||||
[ 28591, "ISO8859/8859-1.TXT", "ISO 8859-1 Latin 1" ],
|
||||
[ 28592, "ISO8859/8859-2.TXT", "ISO 8859-2 Latin 2 (East European)" ],
|
||||
[ 28593, "ISO8859/8859-3.TXT", "ISO 8859-3 Latin 3 (South European)" ],
|
||||
|
@ -401,6 +402,71 @@ sub READ_SYMBOL_FILE
|
|||
}
|
||||
|
||||
|
||||
################################################################
|
||||
# build EUC-JP table from the JIS 0208 file
|
||||
# FIXME: for proper EUC-JP we should probably read JIS 0212 too
|
||||
# but this would require 3-byte DBCS characters
|
||||
sub READ_JIS0208_FILE
|
||||
{
|
||||
my $name = shift;
|
||||
@cp2uni = ();
|
||||
@lead_bytes = ();
|
||||
@uni2cp = ();
|
||||
|
||||
# ASCII chars
|
||||
for ($i = 0x00; $i <= 0x7f; $i++)
|
||||
{
|
||||
$cp2uni[$i] = $i;
|
||||
$uni2cp[$i] = $i;
|
||||
}
|
||||
|
||||
# JIS X 0201 right plane
|
||||
for ($i = 0xa1; $i <= 0xdf; $i++)
|
||||
{
|
||||
$cp2uni[0x8e00 + $i] = 0xfec0 + $i;
|
||||
$uni2cp[0xfec0 + $i] = 0x8e00 + $i;
|
||||
}
|
||||
|
||||
# lead bytes
|
||||
foreach $i (0x8e, 0x8f, 0xa1 .. 0xfe)
|
||||
{
|
||||
push @lead_bytes,$i;
|
||||
$cp2uni[$i] = 0;
|
||||
}
|
||||
|
||||
# undefined chars
|
||||
foreach $i (0x80 .. 0x8d, 0x90 .. 0xa0, 0xff)
|
||||
{
|
||||
$cp2uni[$i] = $DEF_CHAR;
|
||||
}
|
||||
|
||||
# Shift-JIS compatibility
|
||||
$uni2cp[0x00a5] = 0x5c;
|
||||
$uni2cp[0x203e] = 0x7e;
|
||||
|
||||
# Fix backslash conversion
|
||||
$cp2uni[0xa1c0] = 0xff3c;
|
||||
$uni2cp[0xff3c] = 0xa1c0;
|
||||
|
||||
open INPUT, "$name" or die "Cannot open $name";
|
||||
while (<INPUT>)
|
||||
{
|
||||
next if /^\#/; # skip comments
|
||||
next if /^$/; # skip empty lines
|
||||
next if /\x1a/; # skip ^Z
|
||||
if (/^0x[0-9a-fA-F]+\s+0x([0-9a-fA-F]+)\s+0x([0-9a-fA-F]+)\s+(\#.*)?/)
|
||||
{
|
||||
$cp = 0x8080 + hex $1;
|
||||
$uni = hex $2;
|
||||
$cp2uni[$cp] = $uni unless defined($cp2uni[$cp]);
|
||||
$uni2cp[$uni] = $cp unless defined($uni2cp[$uni]);
|
||||
next;
|
||||
}
|
||||
die "$name: Unrecognized line $_\n";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
################################################################
|
||||
# add default mappings once the file had been read
|
||||
sub ADD_DEFAULT_MAPPINGS
|
||||
|
@ -907,6 +973,7 @@ sub HANDLE_FILE
|
|||
|
||||
# symbol codepage file is special
|
||||
if ($codepage == 42) { READ_SYMBOL_FILE($MAPPREFIX . $filename); }
|
||||
elsif ($codepage == 20932) { READ_JIS0208_FILE($MAPPREFIX . $filename); }
|
||||
else { READ_FILE($MAPPREFIX . $filename); }
|
||||
|
||||
ADD_DEFAULT_MAPPINGS();
|
||||
|
|
|
@ -69,6 +69,7 @@ extern union cptable cptable_10029;
|
|||
extern union cptable cptable_10079;
|
||||
extern union cptable cptable_10081;
|
||||
extern union cptable cptable_20866;
|
||||
extern union cptable cptable_20932;
|
||||
extern union cptable cptable_28591;
|
||||
extern union cptable cptable_28592;
|
||||
extern union cptable cptable_28593;
|
||||
|
@ -84,7 +85,7 @@ extern union cptable cptable_28604;
|
|||
extern union cptable cptable_28605;
|
||||
extern union cptable cptable_28606;
|
||||
|
||||
static const union cptable * const cptables[59] =
|
||||
static const union cptable * const cptables[60] =
|
||||
{
|
||||
&cptable_037,
|
||||
&cptable_042,
|
||||
|
@ -131,6 +132,7 @@ static const union cptable * const cptables[59] =
|
|||
&cptable_10079,
|
||||
&cptable_10081,
|
||||
&cptable_20866,
|
||||
&cptable_20932,
|
||||
&cptable_28591,
|
||||
&cptable_28592,
|
||||
&cptable_28593,
|
||||
|
|
Loading…
Reference in New Issue