2009-07-14 23:28:49 +02:00
|
|
|
// Copyright (c) 2009, Thomas Goyne
|
|
|
|
// All rights reserved.
|
|
|
|
//
|
|
|
|
// Redistribution and use in source and binary forms, with or without
|
|
|
|
// modification, are permitted provided that the following conditions are met:
|
|
|
|
//
|
|
|
|
// * Redistributions of source code must retain the above copyright notice,
|
|
|
|
// this list of conditions and the following disclaimer.
|
|
|
|
// * Redistributions in binary form must reproduce the above copyright notice,
|
|
|
|
// this list of conditions and the following disclaimer in the documentation
|
|
|
|
// and/or other materials provided with the distribution.
|
|
|
|
// * Neither the name of the Aegisub Group nor the names of its contributors
|
|
|
|
// may be used to endorse or promote products derived from this software
|
|
|
|
// without specific prior written permission.
|
|
|
|
//
|
|
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
|
|
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
// POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
//
|
2009-07-29 07:43:02 +02:00
|
|
|
// Aegisub Project http://www.aegisub.org/
|
2009-07-14 23:28:49 +02:00
|
|
|
//
|
2009-07-29 07:43:02 +02:00
|
|
|
// $Id$
|
|
|
|
|
|
|
|
/// @file charset_conv.cpp
|
|
|
|
/// @brief Iconv-based implementation of character set conversions
|
|
|
|
/// @ingroup utility
|
|
|
|
///
|
2009-07-14 23:28:49 +02:00
|
|
|
|
2009-10-09 18:34:38 +02:00
|
|
|
#include "config.h"
|
|
|
|
|
2009-07-14 23:28:49 +02:00
|
|
|
#include "charset_conv.h"
|
2009-07-18 02:58:13 +02:00
|
|
|
|
2009-09-10 15:06:40 +02:00
|
|
|
#ifndef AGI_PRE
|
2009-07-18 02:58:13 +02:00
|
|
|
#include <errno.h>
|
2009-09-10 15:06:40 +02:00
|
|
|
#include <stdint.h>
|
|
|
|
|
2009-07-18 02:58:13 +02:00
|
|
|
#include <wx/hashmap.h>
|
|
|
|
#include <wx/intl.h>
|
2009-09-10 15:06:40 +02:00
|
|
|
#endif
|
2009-07-18 02:58:13 +02:00
|
|
|
|
|
|
|
WX_DECLARE_STRING_HASH_MAP(wxString, PrettyNamesHash);
|
2009-07-14 23:28:49 +02:00
|
|
|
|
|
|
|
#if wxUSE_THREADS
|
|
|
|
static wxMutex encodingListMutex;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static const iconv_t iconv_invalid = (iconv_t)-1;
|
|
|
|
static const size_t iconv_failed = (size_t)-1;
|
|
|
|
#define ICONV_CONST_CAST(a) const_cast<ICONV_CONST char *>(a)
|
|
|
|
|
|
|
|
static wxArrayString *supportedEncodings = NULL;
|
|
|
|
static wxArrayString *prettyEncodingList = NULL;
|
|
|
|
static PrettyNamesHash *prettyEncodingHash = NULL;
|
|
|
|
|
|
|
|
AegisubCSConv::AegisubCSConv(const wxChar *mbEncName, bool enableSubst)
|
2010-01-24 19:56:51 +01:00
|
|
|
: wcCharsetName(WCHAR_T_ENCODING)
|
|
|
|
, mbCharsetName(GetRealEncodingName(mbEncName))
|
|
|
|
, mbNulLen(0)
|
|
|
|
, enableSubst(enableSubst)
|
|
|
|
, m2w(wcCharsetName, mbCharsetName)
|
|
|
|
, w2m(mbCharsetName, wcCharsetName)
|
2009-07-14 23:28:49 +02:00
|
|
|
{
|
|
|
|
if (m2w == iconv_invalid || w2m == iconv_invalid) {
|
2010-01-24 19:56:51 +01:00
|
|
|
throw wxString::Format(L"Character set %s is not supported.", mbEncName);
|
2009-07-14 23:28:49 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (enableSubst) {
|
|
|
|
invalidRepSize = FromWChar(invalidRep, sizeof(invalidRep), L"?") - GetMBNulLen();
|
|
|
|
|
|
|
|
#ifndef ICONV_POSIX
|
|
|
|
fallbacks.data = this;
|
|
|
|
fallbacks.mb_to_uc_fallback = NULL;
|
|
|
|
fallbacks.mb_to_wc_fallback = NULL;
|
|
|
|
fallbacks.uc_to_mb_fallback = ucToMbFallback;
|
|
|
|
fallbacks.wc_to_mb_fallback = NULL;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
}
|
Note: This was done using a script! it's far from perfect but 95% of the work has been done already formatting-wise.
Document all functions, class, struct, union, enum, macro, variable, typedefs. This isn't the actual document in itself but empty documentation using any old documentation if it was there.
This was done using exuberant ctags to get tag info, then a TCL script to parse/remove old comments and convert them into Doxygen-style.
Some notes:
* Anything labeled 'DOCME' needs to be documented, @param and @return have been left blank as it would be annoying to delete the 'DOCME' from every one of those.
* Some multiline comments may have been munged into single line comments
* Leave the /// comments above global variables with a space, if they're harder to read then we'll be less likey to use them.
* Enum comments can go after the enumeration itself '[value] /// comment'
* include/aegisub/*.h haven't been converted yet, this will be done in a later commit
* Some documentation blocks are in the wrong place, in the .h when it should be in the .cpp, or vice versa.
See http://devel.aegisub.org/wiki/Doxygen for some details on Doxygen and a 'style guide'.
Originally committed to SVN as r3312.
2009-07-30 00:59:22 +02:00
|
|
|
|
2009-07-14 23:28:49 +02:00
|
|
|
wxMBConv * AegisubCSConv::Clone() const {
|
|
|
|
AegisubCSConv *c = new AegisubCSConv(mbCharsetName);
|
|
|
|
c->mbNulLen = mbNulLen;
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
Note: This was done using a script! it's far from perfect but 95% of the work has been done already formatting-wise.
Document all functions, class, struct, union, enum, macro, variable, typedefs. This isn't the actual document in itself but empty documentation using any old documentation if it was there.
This was done using exuberant ctags to get tag info, then a TCL script to parse/remove old comments and convert them into Doxygen-style.
Some notes:
* Anything labeled 'DOCME' needs to be documented, @param and @return have been left blank as it would be annoying to delete the 'DOCME' from every one of those.
* Some multiline comments may have been munged into single line comments
* Leave the /// comments above global variables with a space, if they're harder to read then we'll be less likey to use them.
* Enum comments can go after the enumeration itself '[value] /// comment'
* include/aegisub/*.h haven't been converted yet, this will be done in a later commit
* Some documentation blocks are in the wrong place, in the .h when it should be in the .cpp, or vice versa.
See http://devel.aegisub.org/wiki/Doxygen for some details on Doxygen and a 'style guide'.
Originally committed to SVN as r3312.
2009-07-30 00:59:22 +02:00
|
|
|
/// @brief Calculate the size of NUL in the target encoding via iconv
|
2010-01-24 19:56:51 +01:00
|
|
|
/// @return The size in bytes of NUL
|
2009-07-14 23:28:49 +02:00
|
|
|
size_t AegisubCSConv::GetMBNulLen() const {
|
|
|
|
if (mbNulLen == 0) {
|
|
|
|
const wchar_t nulStr[] = L"";
|
|
|
|
char outBuff[8];
|
|
|
|
size_t inLen = sizeof(wchar_t);
|
|
|
|
size_t outLen = sizeof(outBuff);
|
2009-09-02 11:40:52 +02:00
|
|
|
char * inPtr = (char *)nulStr;
|
2009-07-14 23:28:49 +02:00
|
|
|
char * outPtr = outBuff;
|
|
|
|
|
|
|
|
size_t res = iconv(w2m, &inPtr, &inLen, &outPtr, &outLen);
|
|
|
|
|
|
|
|
if (res != 0)
|
2010-01-24 19:56:51 +01:00
|
|
|
mbNulLen = (size_t)-1;
|
2009-07-14 23:28:49 +02:00
|
|
|
else
|
2010-01-24 19:56:51 +01:00
|
|
|
mbNulLen = sizeof(outBuff) - outLen;
|
2009-07-14 23:28:49 +02:00
|
|
|
}
|
|
|
|
return mbNulLen;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t AegisubCSConv::MBBuffLen(const char * str) const {
|
|
|
|
size_t nulLen = GetMBNulLen();
|
|
|
|
const char *ptr;
|
|
|
|
switch (nulLen) {
|
|
|
|
case 1:
|
|
|
|
return strlen(str);
|
|
|
|
case 2:
|
|
|
|
for (ptr = str; *reinterpret_cast<const uint16_t *>(ptr) != 0; ptr += 2) ;
|
|
|
|
return ptr - str;
|
|
|
|
case 4:
|
|
|
|
for (ptr = str; *reinterpret_cast<const uint32_t *>(ptr) != 0; ptr += 4) ;
|
|
|
|
return ptr - str;
|
|
|
|
default:
|
|
|
|
return (size_t)-1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-01-24 19:56:51 +01:00
|
|
|
/// @brief Convert a string from multibyte to wide characters
|
|
|
|
/// @param dst Destination buffer.
|
|
|
|
/// @param dstSize Length of destination buffer in wchar_ts
|
|
|
|
/// @param src Source multibyte string
|
|
|
|
/// @param srcLen Length of source buffer in bytes, or -1 to autodetect
|
|
|
|
/// @return The number of wchar_ts needed to store the string in the target charset
|
2009-07-14 23:28:49 +02:00
|
|
|
size_t AegisubCSConv::ToWChar(wchar_t *dst, size_t dstSize, const char *src, size_t srcLen) const {
|
|
|
|
return doConversion(
|
|
|
|
m2w,
|
|
|
|
reinterpret_cast<char *>(dst),
|
|
|
|
dstSize * sizeof(wchar_t),
|
|
|
|
const_cast<char *>(src),
|
|
|
|
srcLen == wxNO_LEN ? MBBuffLen(src) + GetMBNulLen() : srcLen
|
|
|
|
) / sizeof(wchar_t);
|
|
|
|
}
|
|
|
|
|
2010-01-24 19:56:51 +01:00
|
|
|
/// @brief Convert a string from wide characters to multibyte
|
|
|
|
/// @param dst Destination buffer
|
|
|
|
/// @param dstSize Length of destination buffer in bytes
|
|
|
|
/// @param src Source wide character string
|
|
|
|
/// @param srcLen Length in wchar_ts of source, or -1 to autodetect
|
|
|
|
/// @return The number of bytes needed to store the string in the target charset
|
2009-07-14 23:28:49 +02:00
|
|
|
size_t AegisubCSConv::FromWChar(char *dst, size_t dstSize, const wchar_t *src, size_t srcLen) const {
|
|
|
|
return doConversion(
|
|
|
|
w2m,
|
|
|
|
dst,
|
|
|
|
dstSize,
|
|
|
|
reinterpret_cast<char *>(const_cast<wchar_t *>(src)),
|
|
|
|
(srcLen == wxNO_LEN ? wcslen(src) + 1 : srcLen) * sizeof(wchar_t)
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2010-01-24 19:56:51 +01:00
|
|
|
// Perform a conversion if a buffer is given or calculate the needed buffer size if not
|
2009-09-02 11:40:52 +02:00
|
|
|
size_t AegisubCSConv::doConversion(iconv_t cd, char *dst, size_t dstSize, char *src, size_t srcSize) const {
|
2009-07-14 23:28:49 +02:00
|
|
|
if (dstSize > 0) {
|
|
|
|
return iconvWrapper(cd, &src, &srcSize, &dst, &dstSize);
|
|
|
|
}
|
|
|
|
|
|
|
|
// No destination given, so calculate the needed buffer size instead
|
|
|
|
char buff[32];
|
|
|
|
size_t buffSize = 32;
|
|
|
|
size_t charsWritten = 0;
|
|
|
|
size_t res;
|
|
|
|
|
|
|
|
do {
|
|
|
|
dst = buff;
|
|
|
|
dstSize = buffSize;
|
|
|
|
res = iconvWrapper(cd, &src, &srcSize, &dst, &dstSize);
|
|
|
|
|
|
|
|
charsWritten += dst - buff;
|
|
|
|
} while (res == iconv_failed && errno == E2BIG);
|
|
|
|
|
|
|
|
if (res == iconv_failed) return wxCONV_FAILED;
|
|
|
|
return charsWritten;
|
|
|
|
}
|
|
|
|
|
2010-01-24 19:56:51 +01:00
|
|
|
// Actually perform a conversion via iconv
|
2009-09-02 11:40:52 +02:00
|
|
|
size_t AegisubCSConv::iconvWrapper(iconv_t cd, char **inbuf, size_t *inbytesleft,
|
2010-01-24 19:56:51 +01:00
|
|
|
char **outbuf, size_t *outbytesleft) const {
|
2009-07-14 23:28:49 +02:00
|
|
|
|
|
|
|
#if wxUSE_THREADS
|
2010-01-24 19:56:51 +01:00
|
|
|
wxMutexLocker lock(iconvMutex);
|
2009-07-14 23:28:49 +02:00
|
|
|
#endif
|
|
|
|
|
|
|
|
char *outbuforig = *outbuf;
|
|
|
|
size_t res = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
|
|
|
|
|
|
|
|
if (res != iconv_failed)
|
|
|
|
return *outbuf - outbuforig;
|
|
|
|
if (!enableSubst)
|
|
|
|
return iconv_failed;
|
|
|
|
|
|
|
|
#ifdef ICONV_POSIX
|
|
|
|
if (errno == EILSEQ) {
|
2010-01-24 19:56:51 +01:00
|
|
|
throw
|
|
|
|
L"One or more characters do not fit in the selected "
|
|
|
|
L"encoding and the version of iconv Aegisub was built with"
|
|
|
|
L" does not have useful fallbacks. For best results, "
|
|
|
|
L"please rebuild Aegisub using a recent version of GNU iconv.";
|
2009-07-14 23:28:49 +02:00
|
|
|
}
|
|
|
|
return wxCONV_FAILED;
|
|
|
|
#else
|
|
|
|
// Save original errno so we can return it rather than the result from iconvctl
|
|
|
|
int err = errno;
|
|
|
|
|
|
|
|
// Some characters in the input string do not exist in the output encoding
|
|
|
|
if (res == iconv_failed && err == EILSEQ) {
|
|
|
|
// first try transliteration only
|
|
|
|
int transliterate = 1;
|
|
|
|
iconvctl(cd, ICONV_SET_TRANSLITERATE, &transliterate);
|
|
|
|
res = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
|
|
|
|
err = errno;
|
|
|
|
transliterate = 0;
|
|
|
|
iconvctl(cd, ICONV_SET_TRANSLITERATE, &transliterate);
|
|
|
|
}
|
|
|
|
if (res == iconv_failed && err == EILSEQ) {
|
|
|
|
// Conversion still failed with transliteration enabled, so try our substitution
|
2010-01-24 19:56:51 +01:00
|
|
|
iconvctl(cd, ICONV_SET_FALLBACKS, &fallbacks);
|
2009-07-14 23:28:49 +02:00
|
|
|
res = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
|
|
|
|
err = errno;
|
|
|
|
iconvctl(cd, ICONV_SET_FALLBACKS, NULL);
|
|
|
|
}
|
|
|
|
if (res == iconv_failed && err == EILSEQ) {
|
|
|
|
// Conversion still failed, so just drop any invalid characters
|
|
|
|
int discard = 1;
|
|
|
|
iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &discard);
|
|
|
|
res = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
|
|
|
|
err = errno;
|
|
|
|
discard = 0;
|
|
|
|
iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &discard);
|
|
|
|
}
|
|
|
|
|
|
|
|
errno = err;
|
|
|
|
if (res == iconv_failed) return wxCONV_FAILED;
|
|
|
|
return *outbuf - outbuforig;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
Note: This was done using a script! it's far from perfect but 95% of the work has been done already formatting-wise.
Document all functions, class, struct, union, enum, macro, variable, typedefs. This isn't the actual document in itself but empty documentation using any old documentation if it was there.
This was done using exuberant ctags to get tag info, then a TCL script to parse/remove old comments and convert them into Doxygen-style.
Some notes:
* Anything labeled 'DOCME' needs to be documented, @param and @return have been left blank as it would be annoying to delete the 'DOCME' from every one of those.
* Some multiline comments may have been munged into single line comments
* Leave the /// comments above global variables with a space, if they're harder to read then we'll be less likey to use them.
* Enum comments can go after the enumeration itself '[value] /// comment'
* include/aegisub/*.h haven't been converted yet, this will be done in a later commit
* Some documentation blocks are in the wrong place, in the .h when it should be in the .cpp, or vice versa.
See http://devel.aegisub.org/wiki/Doxygen for some details on Doxygen and a 'style guide'.
Originally committed to SVN as r3312.
2009-07-30 00:59:22 +02:00
|
|
|
|
2010-01-24 19:56:51 +01:00
|
|
|
/// @brief GNU iconv character substitution callback
|
|
|
|
/// @param code Unicode character which could not be converted
|
|
|
|
/// @param callback Callback to tell iconv what string to use instead
|
|
|
|
/// @param callback_arg Iconv userdata for callback
|
|
|
|
/// @param convPtr AegisubCSConv instance to use
|
2009-07-14 23:28:49 +02:00
|
|
|
void AegisubCSConv::ucToMbFallback(
|
|
|
|
unsigned int code,
|
|
|
|
void (*callback) (const char *buf, size_t buflen, void* callback_arg),
|
|
|
|
void *callback_arg,
|
|
|
|
void *convPtr)
|
|
|
|
{
|
|
|
|
// At some point in the future, this should probably switch to a real mapping
|
2010-01-24 19:56:51 +01:00
|
|
|
// For now, there's just three cases: BOM to nothing, '\' to itself
|
|
|
|
// (for Shift-JIS, which does not have \) and everything else to '?'
|
2009-07-14 23:28:49 +02:00
|
|
|
if (code == 0xFEFF) return;
|
|
|
|
if (code == 0x5C) callback("\\", 1, callback_arg);
|
|
|
|
else {
|
|
|
|
AegisubCSConv *self = static_cast<AegisubCSConv *>(convPtr);
|
|
|
|
callback(self->invalidRep, self->invalidRepSize, callback_arg);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifndef ICONV_POSIX
|
2010-01-24 19:56:51 +01:00
|
|
|
/// @brief Callback for iconvlist
|
|
|
|
/// @param namescount Number of names in names
|
|
|
|
/// @param names Names to add to the list
|
|
|
|
/// @param data Unused userdata field
|
2009-07-14 23:28:49 +02:00
|
|
|
int addEncoding(unsigned int namescount, const char * const * names, void* data) {
|
|
|
|
for (unsigned int i = 0; i < namescount; i++) {
|
|
|
|
supportedEncodings->Add(wxString::FromAscii(names[i]));
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
wxArrayString AegisubCSConv::GetAllSupportedEncodings() {
|
|
|
|
#if wxUSE_THREADS
|
|
|
|
wxMutexLocker lock(encodingListMutex);
|
|
|
|
#endif
|
|
|
|
if (supportedEncodings == NULL) {
|
|
|
|
supportedEncodings = new wxArrayString();
|
|
|
|
#ifndef ICONV_POSIX
|
|
|
|
iconvlist(addEncoding, NULL);
|
|
|
|
supportedEncodings->Sort();
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
return *supportedEncodings;
|
|
|
|
}
|
|
|
|
|
|
|
|
wxString AegisubCSConv::GetRealEncodingName(wxString name) {
|
2010-01-24 19:56:51 +01:00
|
|
|
if (name.Lower() == L"local") return wxLocale::GetSystemEncodingName();
|
2009-07-14 23:28:49 +02:00
|
|
|
if (prettyEncodingList == NULL) return name;
|
|
|
|
|
|
|
|
PrettyNamesHash::iterator realName = prettyEncodingHash->find(name);
|
|
|
|
if (realName != prettyEncodingHash->end()) {
|
|
|
|
return realName->second;
|
|
|
|
}
|
|
|
|
return name;
|
|
|
|
}
|
|
|
|
|
|
|
|
wxArrayString AegisubCSConv::GetEncodingsList() {
|
|
|
|
#if wxUSE_THREADS
|
|
|
|
wxMutexLocker lock(encodingListMutex);
|
|
|
|
#endif
|
|
|
|
if (prettyEncodingList == NULL) {
|
|
|
|
struct { const char *pretty, *real; } encodingNames[] = {
|
|
|
|
{"Unicode (UTF-8)", "utf-8"},
|
|
|
|
{"Unicode (UTF-16)", "utf-16"},
|
|
|
|
{"Unicode (UTF-16BE)", "utf-16be"},
|
|
|
|
{"Unicode (UTF-16LE)", "utf-16le"},
|
|
|
|
{"Unicode (UTF-32)", "utf-32"},
|
|
|
|
{"Unicode (UTF-32BE)", "utf-32be"},
|
|
|
|
{"Unicode (UTF-32LE)", "utf-32le"},
|
|
|
|
{"Unicode (UTF-7)", "utf-7"},
|
|
|
|
|
|
|
|
{"Arabic (IBM-864)", "ibm864"},
|
|
|
|
{"Arabic (IBM-864-I)", "ibm864i"},
|
|
|
|
{"Arabic (ISO-8859-6)", "iso-8859-6"},
|
|
|
|
{"Arabic (ISO-8859-6-E)", "iso-8859-6-e"},
|
|
|
|
{"Arabic (ISO-8859-6-I)", "iso-8859-6-i"},
|
|
|
|
{"Arabic (Langbox ISO-8859-6.16)", "x-iso-8859-6-16"},
|
|
|
|
{"Arabic (Langbox ISO-8859-6.8x)", "x-iso-8859-6-8-x"},
|
|
|
|
{"Arabic (MacArabic)", "x-mac-arabic"},
|
|
|
|
{"Arabic (Windows-1256)", "windows-1256"},
|
|
|
|
|
|
|
|
{"Armenian (ARMSCII-8)", "armscii-8"},
|
|
|
|
|
|
|
|
{"Baltic (ISO-8859-13)", "iso-8859-13"},
|
|
|
|
{"Baltic (ISO-8859-4)", "iso-8859-4"},
|
|
|
|
{"Baltic (Windows-1257)", "windows-1257"},
|
|
|
|
|
|
|
|
{"Celtic (ISO-8859-14)", "iso-8859-14"},
|
|
|
|
|
|
|
|
{"Central European (IBM-852)", "ibm852"},
|
|
|
|
{"Central European (ISO-8859-2)", "iso-8859-2"},
|
|
|
|
{"Central European (MacCE)", "x-mac-ce"},
|
|
|
|
{"Central European (Windows-1250)", "windows-1250"},
|
|
|
|
|
|
|
|
{"Chinese Simplified (GB18030)", "gb18030"},
|
|
|
|
{"Chinese Simplified (GB2312)", "gb2312"},
|
|
|
|
{"Chinese Simplified (GBK)", "x-gbk"},
|
|
|
|
{"Chinese Simplified (HZ)", "hz-gb-2312"},
|
|
|
|
{"Chinese Simplified (ISO-2022-CN)", "iso-2022-cn"},
|
|
|
|
{"Chinese Traditional (Big5)", "big5"},
|
|
|
|
{"Chinese Traditional (Big5-HKSCS)", "big5-hkscs"},
|
|
|
|
{"Chinese Traditional (EUC-TW)", "x-euc-tw"},
|
|
|
|
|
|
|
|
{"Croatian (MacCroatian)", "x-mac-croatian"},
|
|
|
|
|
|
|
|
{"Cyrillic (IBM-855)", "ibm855"},
|
|
|
|
{"Cyrillic (ISO-8859-5)", "iso-8859-5"},
|
|
|
|
{"Cyrillic (ISO-IR-111)", "iso-ir-111"},
|
|
|
|
{"Cyrillic (KOI8-R)", "koi8-r"},
|
|
|
|
{"Cyrillic (MacCyrillic)", "x-mac-cyrillic"},
|
|
|
|
{"Cyrillic (Windows-1251)", "windows-1251"},
|
|
|
|
{"Cyrillic/Russian (CP-866)", "ibm866"},
|
|
|
|
{"Cyrillic/Ukrainian (KOI8-U)", "koi8-u"},
|
|
|
|
{"Cyrillic/Ukrainian (MacUkrainian)", "x-mac-ukrainian"},
|
|
|
|
|
|
|
|
{"English (US-ASCII)", "us-ascii"},
|
|
|
|
|
|
|
|
{"Farsi (MacFarsi)", "x-mac-farsi"},
|
|
|
|
|
|
|
|
{"Georgian (GEOSTD8)", "geostd8"},
|
|
|
|
|
|
|
|
{"Greek (ISO-8859-7)", "iso-8859-7"},
|
|
|
|
{"Greek (MacGreek)", "x-mac-greek"},
|
|
|
|
{"Greek (Windows-1253)", "windows-1253"},
|
|
|
|
|
|
|
|
{"Gujarati (MacGujarati)", "x-mac-gujarati"},
|
|
|
|
{"Gurmukhi (MacGurmukhi)", "x-mac-gurmukhi"},
|
|
|
|
|
|
|
|
{"Hebrew (IBM-862)", "ibm862"},
|
|
|
|
{"Hebrew (ISO-8859-8-E)", "iso-8859-8-e"},
|
|
|
|
{"Hebrew (ISO-8859-8-I)", "iso-8859-8-i"},
|
|
|
|
{"Hebrew (MacHebrew)", "x-mac-hebrew"},
|
|
|
|
{"Hebrew (Windows-1255)", "windows-1255"},
|
|
|
|
{"Hebrew Visual (ISO-8859-8)", "iso-8859-8"},
|
|
|
|
|
|
|
|
{"Hindi (MacDevanagari)", "x-mac-devanagari"},
|
|
|
|
{"Hindi (SunDevanagari)", "x-sun-unicode-india-0"},
|
|
|
|
|
|
|
|
{"Icelandic (MacIcelandic)", "x-mac-icelandic"},
|
|
|
|
|
|
|
|
{"Japanese (EUC-JP)", "euc-jp"},
|
|
|
|
{"Japanese (ISO-2022-JP)", "iso-2022-jp"},
|
|
|
|
{"Japanese (Shift_JIS)", "shift_jis"},
|
|
|
|
|
|
|
|
{"Korean (EUC-KR)", "euc-kr"},
|
|
|
|
{"Korean (ISO-2022-KR)", "iso-2022-kr"},
|
|
|
|
{"Korean (JOHAB)", "x-johab"},
|
|
|
|
{"Korean (UHC)", "x-windows-949"},
|
|
|
|
|
|
|
|
{"Nordic (ISO-8859-10)", "iso-8859-10"},
|
|
|
|
|
|
|
|
{"Romanian (ISO-8859-16)", "iso-8859-16"},
|
|
|
|
{"Romanian (MacRomanian)", "x-mac-romanian"},
|
|
|
|
|
|
|
|
{"South European (ISO-8859-3)", "iso-8859-3"},
|
|
|
|
|
|
|
|
{"Thai (IBM-874)", "ibm874"},
|
|
|
|
{"Thai (ISO-8859-11)", "iso-8859-11"},
|
|
|
|
{"Thai (TIS-620)", "tis-620"},
|
|
|
|
{"Thai (Windows-874)", "windows-874"},
|
|
|
|
|
|
|
|
{"Turkish (IBM-857)", "ibm857"},
|
|
|
|
{"Turkish (ISO-8859-9)", "iso-8859-9"},
|
|
|
|
{"Turkish (MacTurkish)", "x-mac-turkish"},
|
|
|
|
{"Turkish (Windows-1254)", "windows-1254"},
|
|
|
|
|
|
|
|
{"Vietnamese (TCVN)", "x-viet-tcvn5712"},
|
|
|
|
{"Vietnamese (VISCII)", "viscii"},
|
|
|
|
{"Vietnamese (VPS)", "x-viet-vps"},
|
|
|
|
{"Vietnamese (Windows-1258)", "windows-1258"},
|
|
|
|
|
|
|
|
{"Western (IBM-850)", "ibm850"},
|
|
|
|
{"Western (ISO-8859-1)", "iso-8859-1"},
|
|
|
|
{"Western (ISO-8859-15)", "iso-8859-15"},
|
|
|
|
{"Western (MacRoman)", "x-mac-roman"},
|
|
|
|
{"Western (Windows-1252)", "windows-1252"},
|
|
|
|
|
|
|
|
{NULL, NULL}
|
|
|
|
};
|
|
|
|
|
|
|
|
PrettyNamesHash *map = new PrettyNamesHash(100);
|
|
|
|
wxArrayString *arr = new wxArrayString();
|
2010-01-24 19:56:51 +01:00
|
|
|
arr->Add(L"Local");
|
2009-07-14 23:28:49 +02:00
|
|
|
|
|
|
|
for (int i = 0; encodingNames[i].real != NULL; i++) {
|
2010-01-24 19:56:51 +01:00
|
|
|
// Verify that iconv actually supports converting to and from this encoding
|
2009-07-14 23:28:49 +02:00
|
|
|
iconv_t cd = iconv_open(encodingNames[i].real, WCHAR_T_ENCODING);
|
|
|
|
if (cd == iconv_invalid) continue;
|
|
|
|
iconv_close(cd);
|
|
|
|
|
|
|
|
cd = iconv_open(WCHAR_T_ENCODING, encodingNames[i].real);
|
|
|
|
if (cd == iconv_invalid) continue;
|
|
|
|
iconv_close(cd);
|
|
|
|
|
|
|
|
wxString pretty = wxString::FromAscii(encodingNames[i].pretty);
|
|
|
|
arr->Add(pretty);
|
|
|
|
(*map)[pretty] = wxString::FromAscii(encodingNames[i].real);
|
|
|
|
}
|
|
|
|
|
|
|
|
prettyEncodingList = arr;
|
|
|
|
prettyEncodingHash = map;
|
|
|
|
}
|
|
|
|
return *prettyEncodingList;
|
|
|
|
}
|
2010-01-24 19:56:51 +01:00
|
|
|
static AegisubCSConv localConv(L"Local", false);
|
2009-07-14 23:28:49 +02:00
|
|
|
AegisubCSConv& csConvLocal(localConv);
|