mlang: Correct and fill out Japanese locale information.

Implement Japanese auto detect, JIS and EUC encoding conversions to
Unicode and back. These conversions are not done in code pages but are
simple algorithms.
This commit is contained in:
Aric Stewart 2008-07-22 20:13:41 +09:00 committed by Alexandre Julliard
parent 34a17650f3
commit db726e4165
1 changed files with 436 additions and 8 deletions

View File

@ -258,16 +258,46 @@ static const MIME_CP_INFO hebrew_cp[] =
};
static const MIME_CP_INFO japanese_cp[] =
{
{ "Japanese (Auto-Select)",
50932, MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_MINIMAL |
MIMECONTF_IMPORT | MIMECONTF_VALID | MIMECONTF_VALID_NLS |
MIMECONTF_MIME_IE4 | MIMECONTF_MIME_LATEST,
"_autodetect", "_autodetect", "_autodetect" },
{ "Japanese (EUC)",
51932, MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_MINIMAL |
MIMECONTF_IMPORT | MIMECONTF_SAVABLE_MAILNEWS |
MIMECONTF_SAVABLE_BROWSER | MIMECONTF_EXPORT | MIMECONTF_VALID |
MIMECONTF_VALID_NLS | MIMECONTF_MIME_IE4 | MIMECONTF_MIME_LATEST,
"euc-jp", "euc-jp", "euc-jp" },
{ "Japanese (JIS)",
50220, MIMECONTF_IMPORT | MIMECONTF_MAILNEWS | MIMECONTF_EXPORT |
MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_VALID_NLS |
MIMECONTF_PRIVCONVERTER | MIMECONTF_MIME_LATEST |
MIMECONTF_MIME_IE4,
"iso-2022-jp","iso-2022-jp","iso-2022-jp"},
{ "Japanese (JIS 0208-1990 and 0212-1990)",
20932, MIMECONTF_IMPORT | MIMECONTF_EXPORT | MIMECONTF_VALID_NLS |
MIMECONTF_VALID | MIMECONTF_PRIVCONVERTER | MIMECONTF_MIME_LATEST,
"EUC-JP","EUC-JP","EUC-JP"},
{ "Japanese (JIS-Allow 1 byte Kana)",
50221, MIMECONTF_MAILNEWS | MIMECONTF_EXPORT | MIMECONTF_SAVABLE_BROWSER |
MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_VALID_NLS |
MIMECONTF_VALID | MIMECONTF_PRIVCONVERTER | MIMECONTF_MIME_LATEST,
"csISO2022JP","iso-2022-jp","iso-2022-jp"},
{ "Japanese (JIS-Allow 1 byte Kana - SO/SI)",
50222, MIMECONTF_EXPORT | MIMECONTF_VALID_NLS | MIMECONTF_VALID |
MIMECONTF_PRIVCONVERTER | MIMECONTF_MIME_LATEST,
"iso-2022-jp","iso-2022-jp","iso-2022-jp"},
{ "Japanese (Mac)",
10001, MIMECONTF_IMPORT | MIMECONTF_EXPORT | MIMECONTF_VALID_NLS |
MIMECONTF_VALID | MIMECONTF_PRIVCONVERTER | MIMECONTF_MIME_LATEST,
"x-mac-japanese","x-mac-japanese","x-mac-japanese"},
{ "Japanese (Shift-JIS)",
932, MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_MINIMAL |
MIMECONTF_IMPORT | MIMECONTF_SAVABLE_MAILNEWS |
MIMECONTF_SAVABLE_BROWSER | MIMECONTF_EXPORT | MIMECONTF_VALID_NLS |
MIMECONTF_MIME_IE4 | MIMECONTF_MIME_LATEST,
"shift_jis", "iso-2022-jp", "iso-2022-jp" },
{ "Japanese (JIS 0208-1990 and 0212-1990)",
20932, MIMECONTF_IMPORT | MIMECONTF_EXPORT | MIMECONTF_VALID_NLS |
MIMECONTF_MIME_LATEST,
"euc-jp", "euc-jp", "euc-jp" }
MIMECONTF_SAVABLE_BROWSER | MIMECONTF_EXPORT | MIMECONTF_VALID |
MIMECONTF_VALID_NLS | MIMECONTF_MIME_IE4 | MIMECONTF_MIME_LATEST,
"shift_jis", "iso-2022-jp", "iso-2022-jp" }
};
static const MIME_CP_INFO korean_cp[] =
{
@ -439,7 +469,7 @@ static const struct mlang_data
{ "Hebrew",1255,sizeof(hebrew_cp)/sizeof(hebrew_cp[0]),hebrew_cp,
"Courier","Arial" }, /* FIXME */
{ "Japanese",932,sizeof(japanese_cp)/sizeof(japanese_cp[0]),japanese_cp,
"Courier","Arial" }, /* FIXME */
"MS Gothic","MS PGothic" },
{ "Korean",949,sizeof(korean_cp)/sizeof(korean_cp[0]),korean_cp,
"Courier","Arial" }, /* FIXME */
{ "Thai",874,sizeof(thai_cp)/sizeof(thai_cp[0]),thai_cp,
@ -458,6 +488,371 @@ static void fill_cp_info(const struct mlang_data *ml_data, UINT index, MIMECPINF
static LONG dll_count;
/*
* Japanese Detection and Converstion Functions
*/
#define HANKATA(A) ((A >= 161) && (A <= 223))
#define ISEUC(A) ((A >= 161) && (A <= 254))
#define NOTEUC(A,B) (((A >= 129) && (A <= 159)) && ((B >= 64) && (B <= 160)))
#define SJIS1(A) (((A >= 129) && (A <= 159)) || ((A >= 224) && (A <= 239)))
#define SJIS2(A) ((A >= 64) && (A <= 252))
#define ISMARU(A) ((A >= 202) && (A <= 206))
#define ISNIGORI(A) (((A >= 182) && (A <= 196)) || ((A >= 202) && (A <= 206)))
static UINT DetectJapaneseCode(LPCSTR input, DWORD count)
{
UINT code = 0;
int i = 0;
unsigned char c1,c2;
while ((code == 0 || code == 51932) && i < count)
{
c1 = input[i];
if (c1 == 0x1b /* ESC */)
{
i++;
if (i >= count)
return code;
c1 = input[i];
if (c1 == '$')
{
i++;
if (i >= count)
return code;
c1 = input[i];
if (c1 =='B' || c1 == '@')
code = 50220;
}
if (c1 == 'K')
code = 50220;
}
else if (c1 >= 129)
{
i++;
if (i >= count)
return code;
c2 = input[i];
if NOTEUC(c1,c2)
code = 932;
else if (ISEUC(c1) && ISEUC(c2))
code = 51932;
else if (((c1 == 142)) && HANKATA(c2))
code = 51932;
}
i++;
}
return code;
}
static inline void jis2sjis(unsigned char *p1, unsigned char *p2)
{
unsigned char c1 = *p1;
unsigned char c2 = *p2;
int row = c1 < 95 ? 112 : 176;
int cell = c1 % 2 ? 31 + (c2 > 95) : 126;
*p1 = ((c1 + 1) >> 1) + row;
*p2 = c2 + cell;
}
static inline void sjis2jis(unsigned char *p1, unsigned char *p2)
{
unsigned char c1 = *p1;
unsigned char c2 = *p2;
int shift = c2 < 159;
int row = c1 < 160 ? 112 : 176;
int cell = shift ? (31 + (c2 > 127)): 126;
*p1 = ((c1 - row) << 1) - shift;
*p2 -= cell;
}
static int han2zen(unsigned char *p1, unsigned char *p2)
{
int maru = FALSE;
int nigori = FALSE;
static const unsigned char char1[] = {129,129,129,129,129,131,131,131,131,
131,131,131,131,131,131,129,131,131,131,131,131,131,131,131,131,131,
131,131,131,131,131,131,131,131,131,131,131,131,131,131,131,131,131,
131,131,131,131,131,131,131,131,131,131,131,131,131,131,131,131,131,
131,129,129 };
static const unsigned char char2[] = {66,117,118,65,69,146,64,66,68,70,
72,131,133,135,98,91,65,67,69,71,73,74,76,78,80,82,84,86,88,90,92,94,
96,99,101,103,105,106,107,108,109,110,113,116,119,122,125,126,128,
129,130,132,134,136,137,138,139,140,141,143,147,74,75};
if (( *p2 == 222) && ((ISNIGORI(*p1) || (*p1 == 179))))
nigori = TRUE;
else if ((*p2 == 223) && (ISMARU(*p1)))
maru = TRUE;
if (*p1 >= 161 && *p1 <= 223)
{
unsigned char index = *p1 - 161;
*p1 = char1[index];
*p2 = char2[index];
}
if (maru || nigori)
{
if (nigori)
{
if (((*p2 >= 74) && (*p2 <= 103)) || ((*p2 >= 110) && (*p2 <= 122)))
(*p2)++;
else if ((*p1 == 131) && (*p2 == 69))
*p2 = 148;
}
else if ((maru) && ((*p2 >= 110) && (*p2 <= 122)))
*p2+= 2;
return 1;
}
return 0;
}
static UINT ConvertJIS2SJIS(LPCSTR input, DWORD count, LPSTR output)
{
int i = 0;
int j = 0;
unsigned char p2,p;
int shifted = FALSE;
while (i < count)
{
p = input[i];
if (p == 0x1b /* ESC */)
{
i++;
if (i >= count)
return 0;
p2 = input[i];
if (p2 == '$' || p2 =='(')
i++;
if (p2 == 'K' || p2 =='$')
shifted = TRUE;
else
shifted = FALSE;
}
else
{
if (shifted)
{
i++;
if (i >= count)
return 0;
p2 = input[i];
jis2sjis(&p,&p2);
output[j++]=p;
output[j++]=p2;
}
else
{
output[j++] = p;
}
}
i++;
}
return j;
}
static inline int exit_shift(LPSTR out, int c)
{
if (out)
{
out[c] = 0x1b;
out[c+1] = '(';
out[c+2] = 'B';
}
return 3;
}
static inline int enter_shift(LPSTR out, int c)
{
if (out)
{
out[c] = 0x1b;
out[c+1] = '$';
out[c+2] = 'B';
}
return 3;
}
static UINT ConvertSJIS2JIS(LPCSTR input, DWORD count, LPSTR output)
{
int i = 0;
int j = 0;
unsigned char p2,p;
int shifted = FALSE;
while (i < count)
{
p = input[i] & 0xff;
if (p == 10 || p == 13) /* NL and CR */
{
if (shifted)
{
shifted = FALSE;
j += exit_shift(output,j);
}
if (output)
output[j++] = p;
else
j++;
}
else
{
if (SJIS1(p))
{
i++;
if (i >= count)
return 0;
p2 = input[i] & 0xff;
if (SJIS2(p2))
{
sjis2jis(&p,&p2);
if (!shifted)
{
shifted = TRUE;
j+=enter_shift(output,j);
}
}
if (output)
{
output[j++]=p;
output[j++]=p2;
}
else
j+=2;
}
else
{
if (HANKATA(p))
{
if ((i+1) >= count)
return 0;
p2 = input[i+1] & 0xff;
i+=han2zen(&p,&p2);
sjis2jis(&p,&p2);
if (!shifted)
{
shifted = TRUE;
j+=enter_shift(output,j);
}
if (output)
{
output[j++]=p;
output[j++]=p2;
}
else
j+=2;
}
else
{
if (shifted)
{
shifted = FALSE;
j += exit_shift(output,j);
}
if (output)
output[j++]=p;
else
j++;
}
}
}
i++;
}
if (shifted)
j += exit_shift(output,j);
return j;
}
static UINT ConvertJISJapaneseToUnicode(LPCSTR input, DWORD count,
LPWSTR output, DWORD out_count)
{
CHAR *sjis_string;
UINT rc = 0;
sjis_string = HeapAlloc(GetProcessHeap(),0,count);
rc = ConvertJIS2SJIS(input,count,sjis_string);
if (rc)
{
TRACE("%s\n",debugstr_an(sjis_string,rc));
if (output)
rc = MultiByteToWideChar(932,0,sjis_string,rc,output,out_count);
else
rc = MultiByteToWideChar(932,0,sjis_string,rc,0,0);
}
HeapFree(GetProcessHeap(),0,sjis_string);
return rc;
}
static UINT ConvertUnknownJapaneseToUnicode(LPCSTR input, DWORD count,
LPWSTR output, DWORD out_count)
{
CHAR *sjis_string;
UINT rc = 0;
int code = DetectJapaneseCode(input,count);
TRACE("Japanese code %i\n",code);
if (code == 932)
{
if (output)
rc = MultiByteToWideChar(932,0,input,count,output,out_count);
else
rc = MultiByteToWideChar(932,0,input,count,0,0);
}
else if (code == 51932)
{
if (output)
rc = MultiByteToWideChar(20932,0,input,count,output,out_count);
else
rc = MultiByteToWideChar(20932,0,input,count,0,0);
}
else if (code == 50220)
{
sjis_string = HeapAlloc(GetProcessHeap(),0,count);
rc = ConvertJIS2SJIS(input,count,sjis_string);
if (rc)
{
TRACE("%s\n",debugstr_an(sjis_string,rc));
if (output)
rc = MultiByteToWideChar(932,0,sjis_string,rc,output,out_count);
else
rc = MultiByteToWideChar(932,0,sjis_string,rc,0,0);
}
HeapFree(GetProcessHeap(),0,sjis_string);
}
return rc;
}
static UINT ConvertJapaneseUnicodeToJIS(LPCWSTR input, DWORD count,
LPSTR output, DWORD out_count)
{
CHAR *sjis_string;
INT len;
UINT rc = 0;
len = WideCharToMultiByte(932,0,input,count,0,0,NULL,NULL);
sjis_string = HeapAlloc(GetProcessHeap(),0,len);
WideCharToMultiByte(932,0,input,count,sjis_string,len,NULL,NULL);
TRACE("%s\n",debugstr_an(sjis_string,len));
rc = ConvertSJIS2JIS(sjis_string, len, NULL);
if (out_count >= rc)
{
ConvertSJIS2JIS(sjis_string, len, output);
}
HeapFree(GetProcessHeap(),0,sjis_string);
return rc;
}
/*
* Dll lifetime tracking declaration
*/
@ -510,6 +905,10 @@ HRESULT WINAPI ConvertINetMultiByteToUnicode(
return S_OK;
}
/* forwarding euc-jp to EUC-JP */
if (dwEncoding == 51932)
dwEncoding = 20932;
switch (dwEncoding)
{
case CP_UNICODE:
@ -521,6 +920,15 @@ HRESULT WINAPI ConvertINetMultiByteToUnicode(
memmove(pDstStr, pSrcStr, *pcDstSize * sizeof(WCHAR));
break;
case 50220:
case 50221:
case 50222:
*pcDstSize = ConvertJISJapaneseToUnicode(pSrcStr,*pcSrcSize,pDstStr,*pcDstSize);
break;
case 50932:
*pcDstSize = ConvertUnknownJapaneseToUnicode(pSrcStr,*pcSrcSize,pDstStr,*pcDstSize);
break;
default:
if (*pcSrcSize == -1)
*pcSrcSize = lstrlenA(pSrcStr);
@ -567,6 +975,10 @@ HRESULT WINAPI ConvertINetUnicodeToMultiByte(
if (*pcSrcSize == -1)
*pcSrcSize = lstrlenW(pSrcStr);
/* forwarding euc-jp to EUC-JP */
if (dwEncoding == 51932)
dwEncoding = 20932;
if (dwEncoding == CP_UNICODE)
{
if (*pcSrcSize == -1)
@ -579,6 +991,22 @@ HRESULT WINAPI ConvertINetUnicodeToMultiByte(
if (size >= destsz)
goto fail;
}
else if (dwEncoding == 50220 || dwEncoding == 50221 || dwEncoding == 50222)
{
size = ConvertJapaneseUnicodeToJIS(pSrcStr, *pcSrcSize, NULL, 0);
if (!size)
goto fail;
if (pDstStr)
{
size = min(size, destsz);
size = ConvertJapaneseUnicodeToJIS(pSrcStr, *pcSrcSize, pDstStr,
destsz);
if (!size)
goto fail;
}
}
else
{
size = WideCharToMultiByte(dwEncoding, 0, pSrcStr, *pcSrcSize,