Move most character set conversion code to libaegisub and make everything use the new conversion functionality.

Originally committed to SVN as r4423.
This commit is contained in:
Thomas Goyne 2010-06-03 20:32:25 +00:00
parent 7337a11745
commit b6d29443a3
32 changed files with 967 additions and 644 deletions

View File

@ -20,6 +20,7 @@
<Configurations>
<Configuration
Name="Debug|Win32"
IntermediateDirectory="$(ConfigurationName)"
ConfigurationType="4"
InheritedPropertySheets=".\wxlib_include.vsprops;.\libraries_outdirs.vsprops;.\compiler_options_debug.vsprops;..\aegisub_vs2008\wxlib_lib32.vsprops;..\aegisub_vs2008\suffix_debug32.vsprops;.\precomp_header.vsprops;.\src_msvc_include_dir.vsprops"
CharacterSet="1"
@ -190,11 +191,11 @@
>
</File>
<File
RelativePath="..\..\libaegisub\common\charset_ucd.cpp"
RelativePath="..\..\libaegisub\common\charset_conv.cpp"
>
</File>
<File
RelativePath="..\..\libaegisub\common\log.cpp"
RelativePath="..\..\libaegisub\common\charset_ucd.cpp"
>
</File>
<File
@ -231,6 +232,10 @@
RelativePath="..\..\libaegisub\windows\access.cpp"
>
</File>
<File
RelativePath="..\..\libaegisub\windows\charset_conv_win.cpp"
>
</File>
<File
RelativePath="..\..\libaegisub\windows\io.cpp"
>
@ -303,6 +308,18 @@
RelativePath="..\..\libaegisub\include\libaegisub\access.h"
>
</File>
<File
RelativePath="..\..\libaegisub\include\libaegisub\charset_conv.h"
>
</File>
<File
RelativePath="..\..\libaegisub\include\libaegisub\charset_conv_win.h"
>
</File>
<File
RelativePath="..\..\libaegisub\include\libaegisub\charsets.def"
>
</File>
<File
RelativePath="..\..\libaegisub\include\libaegisub\colour.h"
>

View File

@ -228,6 +228,10 @@
RelativePath="..\..\tests\libaegisub_cajun.cpp"
>
</File>
<File
RelativePath="..\..\tests\libaegisub_iconv.cpp"
>
</File>
<File
RelativePath="..\..\tests\libaegisub_mru.cpp"
>

View File

@ -21,6 +21,7 @@ endif
libaegisub_2_2_la_SOURCES = \
common/charset.cpp \
common/charset_conv.cpp \
common/charset_ucd.cpp \
common/mru.cpp \
common/option.cpp \

View File

@ -0,0 +1,327 @@
// Copyright (c) 2010, Thomas Goyne <plorkyeran@aegisub.org>
//
// Permission to use, copy, modify, and distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
//
// $Id$
/// @file charset_conv.cpp
/// @brief Wrapper for libiconv to present a more C++-friendly API
/// @ingroup libaegisub
#ifndef LAGI_PRE
#endif
#include <libaegisub/charset_conv.h>
#include <iconv.h>
// Check if we can use advanced fallback capabilities added in GNU's iconv
// implementation
#if !defined(_LIBICONV_VERSION) || _LIBICONV_VERSION < 0x010A || defined(LIBICONV_PLUG)
#define ICONV_POSIX
#endif
static const iconv_t iconv_invalid = (iconv_t)-1;
static const size_t iconv_failed = (size_t)-1;
namespace {
struct ltstr {
bool operator()(const char* s1, const char* s2) const {
return strcmp(s1, s2) < 0;
}
};
}
/// @brief Map a user-friendly encoding name to the real encoding name
static const char* GetRealEncodingName(const char* name) {
static std::map<const char*, const char*, ltstr> prettyNames;
if (prettyNames.empty()) {
# define ADD(pretty, real) prettyNames[pretty] = real
# include <libaegisub/charsets.def>
# undef ADD
}
std::map<const char*, const char*, ltstr>::iterator real = prettyNames.find(name);
if (real != prettyNames.end()) {
return real->second;
}
return name;
}
namespace agi {
namespace charset {
#ifdef ICONV_POSIX
class IconvWrapper::Converter {
public:
Converter(bool, const char*) { }
size_t operator()(iconv_t cd, char** inbuf, size_t* inbytesleft, char** outbuf, size_t* outbytesleft) {
return iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
}
};
#else
class IconvWrapper::Converter : public iconv_fallbacks {
private:
bool subst;
char invalidRep[4];
size_t invalidRepSize;
static void fallback(
unsigned int code,
void (*callback) (const char *buf, size_t buflen, void* callback_arg),
void *callback_arg,
void *convPtr)
{
// At some point in the future, this should probably switch to a real mapping
// For now, there's just three cases: BOM to nothing, '\' to itself
// (for Shift-JIS, which does not have \) and everything else to '?'
if (code == 0xFEFF) return;
if (code == 0x5C) callback("\\", 1, callback_arg);
else {
Converter *self = static_cast<Converter *>(convPtr);
callback(self->invalidRep, self->invalidRepSize, callback_arg);
}
}
public:
Converter(bool subst, const char* targetEnc)
: subst(subst)
{
data = this;
mb_to_uc_fallback = NULL;
mb_to_wc_fallback = NULL;
uc_to_mb_fallback = fallback;
wc_to_mb_fallback = NULL;
char sbuff[] = "?";
char* src = sbuff;
char* dst = invalidRep;
size_t dstLen = 4;
size_t srcLen = 1;
iconv_t cd = iconv_open(GetRealEncodingName(targetEnc), "UTF-8");
assert(cd != iconv_invalid);
size_t res = iconv(cd, &src, &srcLen, &dst, &dstLen);
assert(res != iconv_failed);
assert(srcLen == 0);
iconv_close(cd);
invalidRepSize = 4 - dstLen;
}
size_t operator()(iconv_t cd, char** inbuf, size_t* inbytesleft, char** outbuf, size_t* outbytesleft) {
size_t res = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
if (!subst) return res;
// Save original errno so we can return it rather than the result from iconvctl
int err = errno;
// Some characters in the input string do not exist in the output encoding
if (res == iconv_failed && err == EILSEQ) {
// first try transliteration only
int transliterate = 1;
iconvctl(cd, ICONV_SET_TRANSLITERATE, &transliterate);
res = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
err = errno;
transliterate = 0;
iconvctl(cd, ICONV_SET_TRANSLITERATE, &transliterate);
}
if (res == iconv_failed && err == EILSEQ) {
// Conversion still failed with transliteration enabled, so try our substitution
iconvctl(cd, ICONV_SET_FALLBACKS, this);
res = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
err = errno;
iconvctl(cd, ICONV_SET_FALLBACKS, NULL);
}
if (res == iconv_failed && err == E2BIG && *outbytesleft == 0) {
// Check for E2BIG false positives
char buff[4];
size_t buffsize = 4;
char* out = buff;
char* in = *inbuf;
size_t insize = *inbytesleft;
iconvctl(cd, ICONV_SET_FALLBACKS, this);
res = iconv(cd, &in, &insize, &out, &buffsize);
// If no bytes of the output buffer were used, the original
// conversion may have been successful
if (buffsize == 4) {
err = errno;
}
else {
res = iconv_failed;
}
iconvctl(cd, ICONV_SET_FALLBACKS, NULL);
}
errno = err;
return res;
}
};
#endif
// Calculate the size of NUL in the given character set
static size_t NulSize(const char* encoding) {
// We need a character set to convert from with a known encoding of NUL
// UTF-8 seems like the obvious choice
iconv_t cd = iconv_open(GetRealEncodingName(encoding), "UTF-8");
assert(cd != iconv_invalid);
char dbuff[4];
char sbuff[] = "";
char* dst = dbuff;
char* src = sbuff;
size_t dstLen = sizeof(dbuff);
size_t srcLen = 1;
size_t ret = iconv(cd, &src, &srcLen, &dst, &dstLen);
assert(ret != iconv_failed);
assert(dst - dbuff > 0);
iconv_close(cd);
return dst - dbuff;
}
IconvWrapper::IconvWrapper(const char* sourceEncoding, const char* destEncoding, bool enableSubst)
: toNulLen(0)
, fromNulLen(0)
, conv(NULL)
{
cd = iconv_open(GetRealEncodingName(destEncoding), GetRealEncodingName(sourceEncoding));
if (cd == iconv_invalid) {
throw UnsupportedConversion(std::string("Cannot convert from ") + sourceEncoding + " to " + destEncoding);
}
// These need to be set only after we verify that the source and des
// charsets are valid
toNulLen = NulSize(destEncoding);
fromNulLen = NulSize(sourceEncoding);
conv.reset(new Converter(enableSubst, destEncoding));
}
IconvWrapper::~IconvWrapper() {
if (cd != iconv_invalid) iconv_close(cd);
}
std::string IconvWrapper::Convert(std::string const& source) {
std::string dest;
Convert(source, dest);
return dest;
}
void IconvWrapper::Convert(std::string const& source, std::string &dest) {
/// @todo Investigate if it's worth using ropes to avoid having to convert
/// everything twice. It probably isn't.
size_t len = RequiredBufferSize(source);
dest.resize(len);
// This is technically invalid as C++03 does not require that strings use
// a single contiguous block of memory. However, no implementation has ever
// not done so and C++0x does require that it be contiguous
Convert(source.data(), source.size(), &dest[0], len);
}
size_t IconvWrapper::Convert(const char* source, size_t sourceSize, char *dest, size_t destSize) {
if (sourceSize == (size_t)-1) {
sourceSize = SrcStrLen(source);
}
// POSIX requires that inbuf be const char **, but libiconv uses char**
size_t res = (*conv)(cd, const_cast<char **>(&source), &sourceSize, &dest, &destSize);
if (res == iconv_failed) {
switch (errno) {
case E2BIG:
throw BufferTooSmall(
"Destination buffer was not large enough to fit converted "
"string.");
case EINVAL:
throw BadInput(
"One or more characters in the input string were not valid "
"characters in the given input encoding");
case EILSEQ:
throw BadOutput(
"One or more characters could not be converted to the "
"selected target encoding and the version of iconv "
"Aegisub was built with does not have useful fallbacks. "
"For best results, please build Aegisub using a recent "
"version of GNU iconv.");
default:
throw ConversionFailure("An unknown conversion failure occured");
}
}
return res;
}
size_t IconvWrapper::Convert(const char** source, size_t* sourceSize, char** dest, size_t* destSize) {
return (*conv)(cd, const_cast<char **>(source), sourceSize, dest, destSize);
}
size_t IconvWrapper::RequiredBufferSize(std::string const& str) {
return RequiredBufferSize(str.data(), str.size());
}
size_t IconvWrapper::RequiredBufferSize(const char* src, size_t srcLen) {
char buff[512];
size_t charsWritten = 0;
size_t res;
do {
char* dst = buff;
size_t dstSize = sizeof(buff);
res = (*conv)(cd, const_cast<char **>(&src), &srcLen, &dst, &dstSize);
charsWritten += dst - buff;
} while (res == iconv_failed && errno == E2BIG);
if (res == iconv_failed) {
switch (errno) {
case EINVAL:
throw BadInput(
"One or more characters in the input string were not valid "
"characters in the given input encoding");
case EILSEQ:
throw BadOutput(
"One or more characters could not be converted to the "
"selected target encoding and the version of iconv "
"Aegisub was built with does not have useful fallbacks. "
"For best results, please build Aegisub using a recent "
"version of GNU iconv.");
default:
throw ConversionFailure("An unknown conversion failure occured");
}
}
return charsWritten;
}
static size_t mbstrlen(const char* str, size_t nulLen) {
const char *ptr;
switch (nulLen) {
case 1:
return strlen(str);
case 2:
for (ptr = str; *reinterpret_cast<const uint16_t *>(ptr) != 0; ptr += 2) ;
return ptr - str;
case 4:
for (ptr = str; *reinterpret_cast<const uint32_t *>(ptr) != 0; ptr += 4) ;
return ptr - str;
default:
return (size_t)-1;
}
}
size_t IconvWrapper::SrcStrLen(const char* str) {
return mbstrlen(str, fromNulLen);
}
size_t IconvWrapper::DstStrLen(const char* str) {
return mbstrlen(str, toNulLen);
}
}
}

View File

@ -0,0 +1,107 @@
// Copyright (c) 2010, Thomas Goyne <plorkyeran@aegisub.org>
//
// Permission to use, copy, modify, and distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
//
// $Id$
/// @file charset_conv.h
/// @brief Wrapper for libiconv to present a more C++-friendly API
/// @ingroup libaegisub
#ifndef LAGI_PRE
#include <string.h>
#include <memory>
#include <string>
#include <vector>
#endif
#include <libaegisub/exception.h>
namespace agi {
namespace charset {
DEFINE_BASE_EXCEPTION_NOINNER(ConvError, Exception)
DEFINE_SIMPLE_EXCEPTION_NOINNER(UnsupportedConversion, ConvError, "iconv/unsupported")
DEFINE_SIMPLE_EXCEPTION_NOINNER(ConversionFailure, ConvError, "iconv/failed")
DEFINE_SIMPLE_EXCEPTION_NOINNER(BufferTooSmall, ConversionFailure, "iconv/failed/E2BIG")
DEFINE_SIMPLE_EXCEPTION_NOINNER(BadInput, ConversionFailure, "iconv/failed/EILSEQ")
DEFINE_SIMPLE_EXCEPTION_NOINNER(BadOutput, ConversionFailure, "iconv/failed/EINVAL")
/// @brief Get a list of support encodings with user-friendly names
template<class T>
T const& GetEncodingsList() {
static T nameList;
if (nameList.empty()) {
# define ADD(pretty, real) nameList.push_back(pretty)
# include <libaegisub/charsets.def>
# undef ADD
}
return nameList;
}
typedef void* iconv_t;
/// @brief A C++ wrapper for iconv
class IconvWrapper {
private:
// Helper class that abstracts away the differences betwen libiconv and
// POSIX iconv implementations
class Converter;
iconv_t cd;
size_t toNulLen;
size_t fromNulLen;
std::auto_ptr<Converter> conv;
public:
/// @brief Create a converter
/// @param sourceEncoding Source encoding name, may be a pretty name
/// @param destEncoding Destination encoding name, may be a pretty name
/// @param enableSubst If true, when possible characters will be
/// mutilated or dropped rather than a letting a
/// conversion fail
IconvWrapper(const char* sourceEncoding, const char* destEncoding, bool enableSubst = true);
~IconvWrapper();
/// @brief Convert a string from the source to destination charset
/// @param source String to convert
/// @return Converted string. Note that std::string always uses a single byte
/// terminator, so c_str() may not return a valid string if the dest
/// charset has wider terminators
std::string Convert(std::string const& source);
/// @brief Convert a string from the source to destination charset
/// @param source String to convert
/// @param[out] dest String to place the result in
void Convert(std::string const& source, std::string &dest);
size_t Convert(const char* source, size_t sourceSize, char* dest, size_t destSize);
/// Bare wrapper around iconv; see iconv documention for details
size_t Convert(const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft);
/// @brief Get the required buffer size required to fit the source string in the target charset
/// @param source A string in the source charset
/// @param sourceSize Length of the source in bytes
/// @return Bytes required, including NUL terminator if applicable
size_t RequiredBufferSize(const char* source, size_t sourceSize);
/// @brief Get the required buffer size required to fit the source string in the target charset
/// @param str A string in the source charset
/// @return Bytes required, not including space needed for NUL terminator
size_t RequiredBufferSize(std::string const& str);
/// Encoding-aware strlen for strings encoding in the source charset
size_t SrcStrLen(const char* str);
/// Encoding-aware strlen for strings encoding in the destination charset
size_t DstStrLen(const char* str);
};
}
}

View File

@ -0,0 +1,29 @@
// Copyright (c) 2010, Thomas Goyne <plorkyeran@aegisub.org>
//
// Permission to use, copy, modify, and distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
//
// $Id$
/// @file charset_conv_win.h
/// @brief Windows-specific charset conversion stuff
/// @ingroup libaegisub windows
#include <libaegisub/charset_conv.h>
namespace agi {
namespace charset {
/// Convert a UTF-8 string to a string suitable for use with Win32 API functions
std::wstring ConvertW(std::string const& src);
std::string ConvertW(std::wstring const& src);
}
}

View File

@ -0,0 +1,116 @@
ADD("Local", "");
ADD("Unicode (UTF-8)", "utf-8");
ADD("Unicode (UTF-16)", "utf-16");
ADD("Unicode (UTF-16BE)", "utf-16be");
ADD("Unicode (UTF-16LE)", "utf-16le");
ADD("Unicode (UTF-32)", "utf-32");
ADD("Unicode (UTF-32BE)", "utf-32be");
ADD("Unicode (UTF-32LE)", "utf-32le");
ADD("Unicode (UTF-7)", "utf-7");
ADD("Arabic (IBM-864)", "ibm864");
ADD("Arabic (IBM-864-I)", "ibm864i");
ADD("Arabic (ISO-8859-6)", "iso-8859-6");
ADD("Arabic (ISO-8859-6-E)", "iso-8859-6-e");
ADD("Arabic (ISO-8859-6-I)", "iso-8859-6-i");
ADD("Arabic (Langbox ISO-8859-6.16)", "x-iso-8859-6-16");
ADD("Arabic (Langbox ISO-8859-6.8x)", "x-iso-8859-6-8-x");
ADD("Arabic (MacArabic)", "x-mac-arabic");
ADD("Arabic (Windows-1256)", "windows-1256");
ADD("Armenian (ARMSCII-8)", "armscii-8");
ADD("Baltic (ISO-8859-13)", "iso-8859-13");
ADD("Baltic (ISO-8859-4)", "iso-8859-4");
ADD("Baltic (Windows-1257)", "windows-1257");
ADD("Celtic (ISO-8859-14)", "iso-8859-14");
ADD("Central European (IBM-852)", "ibm852");
ADD("Central European (ISO-8859-2)", "iso-8859-2");
ADD("Central European (MacCE)", "x-mac-ce");
ADD("Central European (Windows-1250)", "windows-1250");
ADD("Chinese Simplified (GB18030)", "gb18030");
ADD("Chinese Simplified (GB2312)", "gb2312");
ADD("Chinese Simplified (GBK)", "x-gbk");
ADD("Chinese Simplified (HZ)", "hz-gb-2312");
ADD("Chinese Simplified (ISO-2022-CN)", "iso-2022-cn");
ADD("Chinese Traditional (Big5)", "big5");
ADD("Chinese Traditional (Big5-HKSCS)", "big5-hkscs");
ADD("Chinese Traditional (EUC-TW)", "x-euc-tw");
ADD("Croatian (MacCroatian)", "x-mac-croatian");
ADD("Cyrillic (IBM-855)", "ibm855");
ADD("Cyrillic (ISO-8859-5)", "iso-8859-5");
ADD("Cyrillic (ISO-IR-111)", "iso-ir-111");
ADD("Cyrillic (KOI8-R)", "koi8-r");
ADD("Cyrillic (MacCyrillic)", "x-mac-cyrillic");
ADD("Cyrillic (Windows-1251)", "windows-1251");
ADD("Cyrillic/Russian (CP-866)", "ibm866");
ADD("Cyrillic/Ukrainian (KOI8-U)", "koi8-u");
ADD("Cyrillic/Ukrainian (MacUkrainian)", "x-mac-ukrainian");
ADD("English (US-ASCII)", "us-ascii");
ADD("Farsi (MacFarsi)", "x-mac-farsi");
ADD("Georgian (GEOSTD8)", "geostd8");
ADD("Greek (ISO-8859-7)", "iso-8859-7");
ADD("Greek (MacGreek)", "x-mac-greek");
ADD("Greek (Windows-1253)", "windows-1253");
ADD("Gujarati (MacGujarati)", "x-mac-gujarati");
ADD("Gurmukhi (MacGurmukhi)", "x-mac-gurmukhi");
ADD("Hebrew (IBM-862)", "ibm862");
ADD("Hebrew (ISO-8859-8-E)", "iso-8859-8-e");
ADD("Hebrew (ISO-8859-8-I)", "iso-8859-8-i");
ADD("Hebrew (MacHebrew)", "x-mac-hebrew");
ADD("Hebrew (Windows-1255)", "windows-1255");
ADD("Hebrew Visual (ISO-8859-8)", "iso-8859-8");
ADD("Hindi (MacDevanagari)", "x-mac-devanagari");
ADD("Hindi (SunDevanagari)", "x-sun-unicode-india-0");
ADD("Icelandic (MacIcelandic)", "x-mac-icelandic");
ADD("Japanese (EUC-JP)", "euc-jp");
ADD("Japanese (ISO-2022-JP)", "iso-2022-jp");
ADD("Japanese (Shift_JIS)", "shift_jis");
ADD("Korean (EUC-KR)", "euc-kr");
ADD("Korean (ISO-2022-KR)", "iso-2022-kr");
ADD("Korean (JOHAB)", "x-johab");
ADD("Korean (UHC)", "x-windows-949");
ADD("Nordic (ISO-8859-10)", "iso-8859-10");
ADD("Romanian (ISO-8859-16)", "iso-8859-16");
ADD("Romanian (MacRomanian)", "x-mac-romanian");
ADD("South European (ISO-8859-3)", "iso-8859-3");
ADD("Thai (IBM-874)", "ibm874");
ADD("Thai (ISO-8859-11)", "iso-8859-11");
ADD("Thai (TIS-620)", "tis-620");
ADD("Thai (Windows-874)", "windows-874");
ADD("Turkish (IBM-857)", "ibm857");
ADD("Turkish (ISO-8859-9)", "iso-8859-9");
ADD("Turkish (MacTurkish)", "x-mac-turkish");
ADD("Turkish (Windows-1254)", "windows-1254");
ADD("Vietnamese (TCVN)", "x-viet-tcvn5712");
ADD("Vietnamese (VISCII)", "viscii");
ADD("Vietnamese (VPS)", "x-viet-vps");
ADD("Vietnamese (Windows-1258)", "windows-1258");
ADD("Western (IBM-850)", "ibm850");
ADD("Western (ISO-8859-1)", "iso-8859-1");
ADD("Western (ISO-8859-15)", "iso-8859-15");
ADD("Western (MacRoman)", "x-mac-roman");
ADD("Western (Windows-1252)", "windows-1252");

View File

@ -45,9 +45,9 @@ class Save {
const std::string file_name;
public:
Save(const std::string& file);
~Save();
std::ofstream& Get();
Save(const std::string& file);
~Save();
std::ofstream& Get();
};

View File

@ -6,6 +6,7 @@
#include <stdarg.h>
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <sys/stat.h>
#include <time.h>

View File

@ -25,8 +25,9 @@
#include <fstream>
#endif
#include "libaegisub/util.h"
#include "libaegisub/util_win.h"
#include <libaegisub/charset_conv_win.h>
#include <libaegisub/util.h>
#include <libaegisub/util_win.h>
namespace agi {
namespace acs {
@ -57,8 +58,7 @@ is a short (and incomplete) todo
requires detecting the filesystem being used.
*/
void Check(const std::string &file, acs::Type type) {
std::wstring wfile;
wfile.assign(file.begin(), file.end());
std::wstring wfile = agi::charset::ConvertW(file);
SECURITY_DESCRIPTOR* sd;
DWORD len = 0;

View File

@ -0,0 +1,49 @@
// Copyright (c) 2010, Thomas Goyne <plorkyeran@aegisub.org>
//
// Permission to use, copy, modify, and distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
//
// $Id$
/// @file charset_conv_win.h
/// @brief Windows-specific charset conversion stuff
/// @ingroup libaegisub windows
#include <libaegisub/charset_conv_win.h>
namespace agi {
namespace charset {
std::wstring ConvertW(std::string const& source) {
static IconvWrapper w32Conv("utf-8", "utf-16le", false);
std::wstring dest;
size_t len = w32Conv.RequiredBufferSize(source);
dest.resize(len / sizeof(wchar_t));
w32Conv.Convert(source.data(), source.size(), reinterpret_cast<char *>(&dest[0]), len);
return dest;
}
std::string ConvertW(std::wstring const& source) {
static IconvWrapper w32Conv("utf-16le", "utf-8", false);
std::string dest;
size_t srcLen = source.size() * sizeof(wchar_t);
const char* src = reinterpret_cast<const char *>(source.c_str());
size_t len = w32Conv.RequiredBufferSize(src, srcLen);
dest.resize(len);
w32Conv.Convert(src, srcLen, &dest[0], len);
return dest;
}
}
}

View File

@ -26,6 +26,7 @@
#include <fstream>
#endif
#include <libaegisub/charset_conv_win.h>
#include "libaegisub/io.h"
#include "libaegisub/log.h"
#include "libaegisub/util.h"
@ -34,11 +35,13 @@
namespace agi {
namespace io {
using agi::charset::ConvertW;
std::ifstream* Open(const std::string &file) {
LOG_D("agi/io/open/file") << file;
acs::CheckFileRead(file);
std::ifstream *stream = new std::ifstream(file.c_str());
std::ifstream *stream = new std::ifstream(ConvertW(file).c_str());
if (stream->fail()) {
delete stream;
@ -53,7 +56,7 @@ Save::Save(const std::string& file): file_name(file) {
LOG_D("agi/io/save/file") << file;
const std::string pwd = util::DirName(file);
acs::CheckDirWrite(pwd.c_str());
acs::CheckDirWrite(pwd);
try {
acs::CheckFileWrite(file);
@ -61,23 +64,19 @@ Save::Save(const std::string& file): file_name(file) {
// If the file doesn't exist we create a 0 byte file, this so so
// util::Rename will find it, and to let users know something went
// wrong by leaving a 0 byte file.
std::ofstream fp_touch(file.c_str());
std::ofstream fp_touch(ConvertW(file).c_str());
}
/// @todo This is a temp hack, proper implementation needs to come after
/// Windows support is added. The code in the destructor needs fixing
/// as well.
const std::string tmp = file + "_tmp";
// This will open to file.XXXX. (tempfile)
fp = new std::ofstream(tmp.c_str());
fp = new std::ofstream(ConvertW(file + "_tmp").c_str());
}
Save::~Save() {
const std::string tmp(file_name + "_tmp");
delete fp;
util::Rename(tmp, file_name);
util::Rename(file_name + "_tmp", file_name);
}
std::ofstream& Save::Get() {

View File

@ -30,23 +30,22 @@
#endif
//#include <string.h>
#include "libaegisub/types.h"
#include <libaegisub/charset_conv_win.h>
#include "libaegisub/util.h"
#include "libaegisub/util_win.h"
namespace agi {
namespace util {
using agi::charset::ConvertW;
const std::string DirName(const std::string& path) {
if (path.find('/') == std::string::npos) {
const std::string cwd(".");
return cwd;
return ".";
}
const std::string stripped = path.substr(0, path.rfind("/")+1);
return stripped;
return path.substr(0, path.rfind("/")+1);
}
void Rename(const std::string& from, const std::string& to) {
@ -58,19 +57,18 @@ void Rename(const std::string& from, const std::string& to) {
acs::CheckDirWrite(DirName(to));
}
MoveFileExA(from.c_str(), to.c_str(), MOVEFILE_REPLACE_EXISTING);
MoveFileEx(ConvertW(from).c_str(), ConvertW(to).c_str(), MOVEFILE_REPLACE_EXISTING);
}
std::string ErrorString(DWORD error) {
LPSTR lpstr = NULL;
LPWSTR lpstr = NULL;
if(FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM, NULL, error, 0, (LPSTR)&lpstr, 0, NULL) == 0) {
if(FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM, NULL, error, 0, reinterpret_cast<LPWSTR>(&lpstr), 0, NULL) == 0) {
/// @todo Return the actual 'unknown error' string from windows.
std::string str("Unknown Error");
return str;
return "Unknown Error";
}
std::string str(lpstr);
std::string str = ConvertW(lpstr);
LocalFree(lpstr);
return str;
}

View File

@ -42,107 +42,21 @@
#include <errno.h>
#include <stdint.h>
#include <wx/hashmap.h>
#include <wx/intl.h>
#endif
WX_DECLARE_STRING_HASH_MAP(wxString, PrettyNamesHash);
class AegisubCSConvImpl : public AegisubCSConv {
public:
AegisubCSConvImpl() { }
};
#if wxUSE_THREADS
static wxMutex encodingListMutex;
#endif
static const iconv_t iconv_invalid = (iconv_t)-1;
static const size_t iconv_failed = (size_t)-1;
#define ICONV_CONST_CAST(a) const_cast<ICONV_CONST char *>(a)
static wxArrayString *supportedEncodings = NULL;
static wxArrayString *prettyEncodingList = NULL;
static PrettyNamesHash *prettyEncodingHash = NULL;
AegisubCSConv::AegisubCSConv(const wxChar *mbEncName, bool enableSubst)
: wcCharsetName(WCHAR_T_ENCODING)
, mbCharsetName(GetRealEncodingName(mbEncName))
, mbNulLen(0)
, enableSubst(enableSubst)
, m2w(wcCharsetName, mbCharsetName)
, w2m(mbCharsetName, wcCharsetName)
AegisubCSConv::AegisubCSConv()
: conv("wchar_t", "")
{
if (m2w == iconv_invalid || w2m == iconv_invalid) {
throw wxString::Format(L"Character set %s is not supported.", mbEncName);
}
if (enableSubst) {
invalidRepSize = FromWChar(invalidRep, sizeof(invalidRep), L"?") - GetMBNulLen();
#ifndef ICONV_POSIX
fallbacks.data = this;
fallbacks.mb_to_uc_fallback = NULL;
fallbacks.mb_to_wc_fallback = NULL;
fallbacks.uc_to_mb_fallback = ucToMbFallback;
fallbacks.wc_to_mb_fallback = NULL;
#endif
}
}
wxMBConv * AegisubCSConv::Clone() const {
AegisubCSConv *c = new AegisubCSConv(mbCharsetName);
c->mbNulLen = mbNulLen;
return c;
}
/// @brief Calculate the size of NUL in the target encoding via iconv
/// @return The size in bytes of NUL
size_t AegisubCSConv::GetMBNulLen() const {
if (mbNulLen == 0) {
const wchar_t nulStr[] = L"";
char outBuff[8];
size_t inLen = sizeof(wchar_t);
size_t outLen = sizeof(outBuff);
char * inPtr = (char *)nulStr;
char * outPtr = outBuff;
size_t res = iconv(w2m, &inPtr, &inLen, &outPtr, &outLen);
if (res != 0)
mbNulLen = (size_t)-1;
else
mbNulLen = sizeof(outBuff) - outLen;
}
return mbNulLen;
}
size_t AegisubCSConv::MBBuffLen(const char * str) const {
size_t nulLen = GetMBNulLen();
const char *ptr;
switch (nulLen) {
case 1:
return strlen(str);
case 2:
for (ptr = str; *reinterpret_cast<const uint16_t *>(ptr) != 0; ptr += 2) ;
return ptr - str;
case 4:
for (ptr = str; *reinterpret_cast<const uint32_t *>(ptr) != 0; ptr += 4) ;
return ptr - str;
default:
return (size_t)-1;
}
}
/// @brief Convert a string from multibyte to wide characters
/// @param dst Destination buffer.
/// @param dstSize Length of destination buffer in wchar_ts
/// @param src Source multibyte string
/// @param srcLen Length of source buffer in bytes, or -1 to autodetect
/// @return The number of wchar_ts needed to store the string in the target charset
size_t AegisubCSConv::ToWChar(wchar_t *dst, size_t dstSize, const char *src, size_t srcLen) const {
return doConversion(
m2w,
reinterpret_cast<char *>(dst),
dstSize * sizeof(wchar_t),
const_cast<char *>(src),
srcLen == wxNO_LEN ? MBBuffLen(src) + GetMBNulLen() : srcLen
) / sizeof(wchar_t);
throw agi::charset::UnsupportedConversion("Cannot convert to local with csConvLocal");
}
/// @brief Convert a string from wide characters to multibyte
@ -152,309 +66,19 @@ size_t AegisubCSConv::ToWChar(wchar_t *dst, size_t dstSize, const char *src, siz
/// @param srcLen Length in wchar_ts of source, or -1 to autodetect
/// @return The number of bytes needed to store the string in the target charset
size_t AegisubCSConv::FromWChar(char *dst, size_t dstSize, const wchar_t *src, size_t srcLen) const {
return doConversion(
w2m,
dst,
dstSize,
reinterpret_cast<char *>(const_cast<wchar_t *>(src)),
(srcLen == wxNO_LEN ? wcslen(src) + 1 : srcLen) * sizeof(wchar_t)
);
}
// Perform a conversion if a buffer is given or calculate the needed buffer size if not
size_t AegisubCSConv::doConversion(iconv_t cd, char *dst, size_t dstSize, char *src, size_t srcSize) const {
if (dstSize > 0) {
return iconvWrapper(cd, &src, &srcSize, &dst, &dstSize);
}
// No destination given, so calculate the needed buffer size instead
char buff[32];
size_t buffSize = 32;
size_t charsWritten = 0;
size_t res;
do {
dst = buff;
dstSize = buffSize;
res = iconvWrapper(cd, &src, &srcSize, &dst, &dstSize);
charsWritten += dst - buff;
} while (res == iconv_failed && errno == E2BIG);
if (res == iconv_failed) return wxCONV_FAILED;
return charsWritten;
}
// Actually perform a conversion via iconv
size_t AegisubCSConv::iconvWrapper(iconv_t cd, char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft) const {
#if wxUSE_THREADS
wxMutexLocker lock(iconvMutex);
#endif
char *outbuforig = *outbuf;
size_t res = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
if (res != iconv_failed)
return *outbuf - outbuforig;
if (!enableSubst)
return iconv_failed;
#ifdef ICONV_POSIX
if (errno == EILSEQ) {
throw
L"One or more characters do not fit in the selected "
L"encoding and the version of iconv Aegisub was built with"
L" does not have useful fallbacks. For best results, "
L"please rebuild Aegisub using a recent version of GNU iconv.";
}
return wxCONV_FAILED;
#else
// Save original errno so we can return it rather than the result from iconvctl
int err = errno;
// Some characters in the input string do not exist in the output encoding
if (res == iconv_failed && err == EILSEQ) {
// first try transliteration only
int transliterate = 1;
iconvctl(cd, ICONV_SET_TRANSLITERATE, &transliterate);
res = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
err = errno;
transliterate = 0;
iconvctl(cd, ICONV_SET_TRANSLITERATE, &transliterate);
}
if (res == iconv_failed && err == EILSEQ) {
// Conversion still failed with transliteration enabled, so try our substitution
iconvctl(cd, ICONV_SET_FALLBACKS, &fallbacks);
res = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
err = errno;
iconvctl(cd, ICONV_SET_FALLBACKS, NULL);
}
if (res == iconv_failed && err == EILSEQ) {
// Conversion still failed, so just drop any invalid characters
int discard = 1;
iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &discard);
res = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
err = errno;
discard = 0;
iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &discard);
}
errno = err;
if (res == iconv_failed) return wxCONV_FAILED;
return *outbuf - outbuforig;
#endif
}
/// @brief GNU iconv character substitution callback
/// @param code Unicode character which could not be converted
/// @param callback Callback to tell iconv what string to use instead
/// @param callback_arg Iconv userdata for callback
/// @param convPtr AegisubCSConv instance to use
void AegisubCSConv::ucToMbFallback(
unsigned int code,
void (*callback) (const char *buf, size_t buflen, void* callback_arg),
void *callback_arg,
void *convPtr)
{
// At some point in the future, this should probably switch to a real mapping
// For now, there's just three cases: BOM to nothing, '\' to itself
// (for Shift-JIS, which does not have \) and everything else to '?'
if (code == 0xFEFF) return;
if (code == 0x5C) callback("\\", 1, callback_arg);
else {
AegisubCSConv *self = static_cast<AegisubCSConv *>(convPtr);
callback(self->invalidRep, self->invalidRepSize, callback_arg);
}
}
#ifndef ICONV_POSIX
/// @brief Callback for iconvlist
/// @param namescount Number of names in names
/// @param names Names to add to the list
/// @param data Unused userdata field
int addEncoding(unsigned int namescount, const char * const * names, void* data) {
for (unsigned int i = 0; i < namescount; i++) {
supportedEncodings->Add(wxString::FromAscii(names[i]));
}
return 0;
}
#endif
wxArrayString AegisubCSConv::GetAllSupportedEncodings() {
#if wxUSE_THREADS
wxMutexLocker lock(encodingListMutex);
#endif
if (supportedEncodings == NULL) {
supportedEncodings = new wxArrayString();
#ifndef ICONV_POSIX
iconvlist(addEncoding, NULL);
supportedEncodings->Sort();
#endif
}
return *supportedEncodings;
}
wxString AegisubCSConv::GetRealEncodingName(wxString name) {
if (name.Lower() == L"local") return wxLocale::GetSystemEncodingName();
if (prettyEncodingList == NULL) return name;
PrettyNamesHash::iterator realName = prettyEncodingHash->find(name);
if (realName != prettyEncodingHash->end()) {
return realName->second;
}
return name;
}
wxArrayString AegisubCSConv::GetEncodingsList() {
#if wxUSE_THREADS
wxMutexLocker lock(encodingListMutex);
#endif
if (prettyEncodingList == NULL) {
struct { const char *pretty, *real; } encodingNames[] = {
{"Unicode (UTF-8)", "utf-8"},
{"Unicode (UTF-16)", "utf-16"},
{"Unicode (UTF-16BE)", "utf-16be"},
{"Unicode (UTF-16LE)", "utf-16le"},
{"Unicode (UTF-32)", "utf-32"},
{"Unicode (UTF-32BE)", "utf-32be"},
{"Unicode (UTF-32LE)", "utf-32le"},
{"Unicode (UTF-7)", "utf-7"},
{"Arabic (IBM-864)", "ibm864"},
{"Arabic (IBM-864-I)", "ibm864i"},
{"Arabic (ISO-8859-6)", "iso-8859-6"},
{"Arabic (ISO-8859-6-E)", "iso-8859-6-e"},
{"Arabic (ISO-8859-6-I)", "iso-8859-6-i"},
{"Arabic (Langbox ISO-8859-6.16)", "x-iso-8859-6-16"},
{"Arabic (Langbox ISO-8859-6.8x)", "x-iso-8859-6-8-x"},
{"Arabic (MacArabic)", "x-mac-arabic"},
{"Arabic (Windows-1256)", "windows-1256"},
{"Armenian (ARMSCII-8)", "armscii-8"},
{"Baltic (ISO-8859-13)", "iso-8859-13"},
{"Baltic (ISO-8859-4)", "iso-8859-4"},
{"Baltic (Windows-1257)", "windows-1257"},
{"Celtic (ISO-8859-14)", "iso-8859-14"},
{"Central European (IBM-852)", "ibm852"},
{"Central European (ISO-8859-2)", "iso-8859-2"},
{"Central European (MacCE)", "x-mac-ce"},
{"Central European (Windows-1250)", "windows-1250"},
{"Chinese Simplified (GB18030)", "gb18030"},
{"Chinese Simplified (GB2312)", "gb2312"},
{"Chinese Simplified (GBK)", "x-gbk"},
{"Chinese Simplified (HZ)", "hz-gb-2312"},
{"Chinese Simplified (ISO-2022-CN)", "iso-2022-cn"},
{"Chinese Traditional (Big5)", "big5"},
{"Chinese Traditional (Big5-HKSCS)", "big5-hkscs"},
{"Chinese Traditional (EUC-TW)", "x-euc-tw"},
{"Croatian (MacCroatian)", "x-mac-croatian"},
{"Cyrillic (IBM-855)", "ibm855"},
{"Cyrillic (ISO-8859-5)", "iso-8859-5"},
{"Cyrillic (ISO-IR-111)", "iso-ir-111"},
{"Cyrillic (KOI8-R)", "koi8-r"},
{"Cyrillic (MacCyrillic)", "x-mac-cyrillic"},
{"Cyrillic (Windows-1251)", "windows-1251"},
{"Cyrillic/Russian (CP-866)", "ibm866"},
{"Cyrillic/Ukrainian (KOI8-U)", "koi8-u"},
{"Cyrillic/Ukrainian (MacUkrainian)", "x-mac-ukrainian"},
{"English (US-ASCII)", "us-ascii"},
{"Farsi (MacFarsi)", "x-mac-farsi"},
{"Georgian (GEOSTD8)", "geostd8"},
{"Greek (ISO-8859-7)", "iso-8859-7"},
{"Greek (MacGreek)", "x-mac-greek"},
{"Greek (Windows-1253)", "windows-1253"},
{"Gujarati (MacGujarati)", "x-mac-gujarati"},
{"Gurmukhi (MacGurmukhi)", "x-mac-gurmukhi"},
{"Hebrew (IBM-862)", "ibm862"},
{"Hebrew (ISO-8859-8-E)", "iso-8859-8-e"},
{"Hebrew (ISO-8859-8-I)", "iso-8859-8-i"},
{"Hebrew (MacHebrew)", "x-mac-hebrew"},
{"Hebrew (Windows-1255)", "windows-1255"},
{"Hebrew Visual (ISO-8859-8)", "iso-8859-8"},
{"Hindi (MacDevanagari)", "x-mac-devanagari"},
{"Hindi (SunDevanagari)", "x-sun-unicode-india-0"},
{"Icelandic (MacIcelandic)", "x-mac-icelandic"},
{"Japanese (EUC-JP)", "euc-jp"},
{"Japanese (ISO-2022-JP)", "iso-2022-jp"},
{"Japanese (Shift_JIS)", "shift_jis"},
{"Korean (EUC-KR)", "euc-kr"},
{"Korean (ISO-2022-KR)", "iso-2022-kr"},
{"Korean (JOHAB)", "x-johab"},
{"Korean (UHC)", "x-windows-949"},
{"Nordic (ISO-8859-10)", "iso-8859-10"},
{"Romanian (ISO-8859-16)", "iso-8859-16"},
{"Romanian (MacRomanian)", "x-mac-romanian"},
{"South European (ISO-8859-3)", "iso-8859-3"},
{"Thai (IBM-874)", "ibm874"},
{"Thai (ISO-8859-11)", "iso-8859-11"},
{"Thai (TIS-620)", "tis-620"},
{"Thai (Windows-874)", "windows-874"},
{"Turkish (IBM-857)", "ibm857"},
{"Turkish (ISO-8859-9)", "iso-8859-9"},
{"Turkish (MacTurkish)", "x-mac-turkish"},
{"Turkish (Windows-1254)", "windows-1254"},
{"Vietnamese (TCVN)", "x-viet-tcvn5712"},
{"Vietnamese (VISCII)", "viscii"},
{"Vietnamese (VPS)", "x-viet-vps"},
{"Vietnamese (Windows-1258)", "windows-1258"},
{"Western (IBM-850)", "ibm850"},
{"Western (ISO-8859-1)", "iso-8859-1"},
{"Western (ISO-8859-15)", "iso-8859-15"},
{"Western (MacRoman)", "x-mac-roman"},
{"Western (Windows-1252)", "windows-1252"},
{NULL, NULL}
};
PrettyNamesHash *map = new PrettyNamesHash(100);
wxArrayString *arr = new wxArrayString();
arr->Add(L"Local");
for (int i = 0; encodingNames[i].real != NULL; i++) {
// Verify that iconv actually supports converting to and from this encoding
iconv_t cd = iconv_open(encodingNames[i].real, WCHAR_T_ENCODING);
if (cd == iconv_invalid) continue;
iconv_close(cd);
cd = iconv_open(WCHAR_T_ENCODING, encodingNames[i].real);
if (cd == iconv_invalid) continue;
iconv_close(cd);
wxString pretty = wxString::FromAscii(encodingNames[i].pretty);
arr->Add(pretty);
(*map)[pretty] = wxString::FromAscii(encodingNames[i].real);
try {
if (srcLen != (size_t)-1) {
if (src[srcLen - 1] == 0) srcLen -= 1;
srcLen *= sizeof(wchar_t);
}
prettyEncodingList = arr;
prettyEncodingHash = map;
if (dstSize == 0) {
return conv.RequiredBufferSize(reinterpret_cast<const char*>(src), srcLen);
}
return conv.Convert(reinterpret_cast<const char*>(src), srcLen, dst, dstSize);
}
catch (agi::charset::ConvError const&) {
return (size_t)-1;
}
return *prettyEncodingList;
}
static AegisubCSConv localConv(L"Local", false);
AegisubCSConv& csConvLocal(localConv);
static AegisubCSConvImpl localConv;
AegisubCSConv& csConvLocal = localConv;

View File

@ -35,135 +35,38 @@
///
#ifndef AGI_PRE
#include <iconv.h>
#include <wchar.h>
#include <wx/arrstr.h>
#include <wx/string.h>
#include <wx/strconv.h>
#include <wx/thread.h>
#endif
#include "aegisub_endian.h"
#if !defined(_LIBICONV_VERSION) || _LIBICONV_VERSION < 0x010A || defined(LIBICONV_PLUG)
#define ICONV_POSIX
#endif
/// @class iconv_wrapper
/// @brief RAII wrapper for iconv
class iconv_wrapper {
private:
iconv_t conv;
public:
iconv_wrapper(const char *to, const char *from)
: conv(iconv_open(to, from))
{ }
iconv_wrapper(wxString const& to, wxString const& from)
: conv(iconv_open(to.ToAscii(), from.ToAscii()))
{ }
iconv_wrapper(const char *to, wxString const& from)
: conv(iconv_open(to, from.ToAscii()))
{ }
iconv_wrapper(wxString const& to, const char *from)
: conv(iconv_open(to.ToAscii(), from))
{ }
~iconv_wrapper() {
if (conv != (iconv_t)-1) iconv_close(conv);
}
operator iconv_t() {
return conv;
}
operator const iconv_t() const {
return conv;
}
};
#include <libaegisub/charset_conv.h>
/// @class AegisubCSConv
/// @brief wxMBConv implementation for converting to and from unicode
class AegisubCSConv : public wxMBConv {
public:
/// @param mbEncName Multibyte encoding to convert to/from
/// @param enableSubst Whether to substitute characters when needed.
/// By default, any conversion that would be lossy will fail
/// When enableSubst is true, conversions to multibyte with a sufficiently
/// large buffer are guaranteed to succeed, with characters dropped or
/// changed as needed to fit the string into the target encoding.
AegisubCSConv(const wxChar *mbEncName, bool enableSubst = false);
// wxMBConv implementation; see strconv.h for usage details
size_t ToWChar(wchar_t *dst, size_t dstLen, const char *src, size_t srcLen = wxNO_LEN) const;
size_t FromWChar(char *dst, size_t dstLen, const wchar_t *src, size_t srcLen = wxNO_LEN) const;
size_t GetMBNulLen() const;
wxMBConv *Clone() const;
/// @brief Multibyte-aware strlen
/// @return Length in bytes of str (excluding terminator)
size_t MBBuffLen(const char *str) const;
/// @brief Get a list of support encodings with user-friendly names
static wxArrayString GetEncodingsList();
/// @brief Get a list of all encodings supported by iconv
/// Requires GNU iconv for useful results
static wxArrayString GetAllSupportedEncodings();
/// @brief Map a user-friendly encoding name to the real encoding name
static wxString GetRealEncodingName(wxString name);
wxMBConv *Clone() const { return NULL; };
protected:
AegisubCSConv();
private:
// The smattering of mutable variables here are due to that ToWChar and
// FromWChar are const in wxMBConv, but we require minor mutation for
// things like locks (as iconv is not thread-safe)
wxString wcCharsetName;
wxString mbCharsetName;
mutable size_t mbNulLen;
bool enableSubst;
size_t doConversion(iconv_t cd, char *dst, size_t dstSize, char *src, size_t srcSize) const;
size_t iconvWrapper(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) const;
static void ucToMbFallback(
unsigned int code,
void (*callback) (const char *buf, size_t buflen, void* callback_arg),
void *callback_arg,
void *convPtr);
/// Replacement character for characters which do not fit in the target
/// encoding and iconv does not have an appropriate substitute for
char invalidRep[8];
size_t invalidRepSize;
#ifndef ICONV_POSIX
mutable iconv_fallbacks fallbacks;
#endif
AegisubCSConv(const AegisubCSConv&);
AegisubCSConv& operator=(const AegisubCSConv&);
wxString localCharset;
#if wxUSE_THREADS
mutable wxMutex iconvMutex;
#endif
protected:
iconv_wrapper m2w, w2m;
// ToWChar and FromWChar are const in wxMBConv, but iconv can't be used
// immutably
mutable agi::charset::IconvWrapper conv;
};
// Predefined conversion for the current locale, intended to be a drop-in
// replacement for wxConvLocal
extern AegisubCSConv& csConvLocal;
#ifdef HAVE_BIG_ENDIAN
# if SIZEOF_WCHAR_T == 4
# define WCHAR_T_ENCODING "UTF-32BE"
# elif SIZEOF_WCHAR_T == 2
# define WCHAR_T_ENCODING "UTF-16BE"
# endif
#elif defined(HAVE_LITTLE_ENDIAN)
# if SIZEOF_WCHAR_T == 4
# define WCHAR_T_ENCODING "UTF-32LE"
# elif SIZEOF_WCHAR_T == 2
# define WCHAR_T_ENCODING "UTF-16LE"
# endif
#else
# if SIZEOF_WCHAR_T == 4
# define WCHAR_T_ENCODING ((Endian::MachineToBig((uint32_t)1) == 1) ? "UTF-32BE" : "UTF-32LE")
# elif SIZEOF_WCHAR_T == 2
# define WCHAR_T_ENCODING ((Endian::MachineToBig((uint32_t)1) == 1) ? "UTF-16BE" : "UTF-16LE")
# endif
#endif

View File

@ -7,8 +7,8 @@ wxArrayString lagi_MRU_wxAS(const wxString &list) {
const agi::MRUManager::MRUListMap *map_list = AegisubApp::Get()->mru->Get(STD_STR(list));
for (agi::MRUManager::MRUListMap::const_iterator i_lst = map_list->begin(); i_lst != map_list->end(); ++i_lst) {
work.Add(wxString(i_lst->second));
work.Add(wxString(i_lst->second.c_str(), wxConvUTF8));
}
return work;
return work;
}

View File

@ -8,8 +8,8 @@
#include <libaegisub/colour.h>
#define STD_STR(x) std::string(x.mb_str())
#define STD_STR(x) std::string(x.utf8_str())
inline wxColour lagi_wxColour(const agi::Colour &colour) { return wxColour(colour); }
inline wxString lagi_wxString(const std::string &str) { return wxString(str); }
inline wxString lagi_wxString(const std::string &str) { return wxString(str.c_str(), wxConvUTF8); }
wxArrayString lagi_MRU_wxAS(const wxString &list);

View File

@ -34,9 +34,6 @@
/// @ingroup export
///
///////////
// Headers
#include "config.h"
#ifndef AGI_PRE
@ -102,7 +99,7 @@ DialogExport::DialogExport (wxWindow *parent)
// Charset dropdown list
wxStaticText *charset_list_label = new wxStaticText(this, -1, _("Text encoding:"));
CharsetList = new wxChoice(this, Charset_List_Box, wxDefaultPosition, wxDefaultSize, AegisubCSConv::GetEncodingsList());
CharsetList = new wxChoice(this, Charset_List_Box, wxDefaultPosition, wxDefaultSize, agi::charset::GetEncodingsList<wxArrayString>());
wxSizer *charset_list_sizer = new wxBoxSizer(wxHORIZONTAL);
charset_list_sizer->Add(charset_list_label, 0, wxALIGN_CENTER | wxRIGHT, 5);
charset_list_sizer->Add(CharsetList, 1, wxEXPAND);
@ -219,6 +216,9 @@ void DialogExport::OnProcess(wxCommandEvent &event) {
wxString err(error);
wxMessageBox(err, _T("Error exporting subtitles"), wxOK | wxICON_ERROR, this);
}
catch (const agi::charset::ConvError& err) {
wxMessageBox(err.GetMessage(), _T("Error exporting subtitles"), wxOK | wxICON_ERROR, this);
}
catch (...) {
wxMessageBox(_T("Unknown error"), _T("Error exporting subtitles"), wxOK | wxICON_ERROR, this);
}

View File

@ -713,8 +713,7 @@ void FrameMain::LoadSubtitles (wxString filename,wxString charset) {
// Make sure that file isn't actually a timecode file
try {
TextFileReader testSubs(filename,charset);
charset = testSubs.GetCurrentEncoding();
isBinary = charset == _T("binary");
isBinary = testSubs.IsBinary();
if (!isBinary && testSubs.HasMoreLines()) {
wxString cur = testSubs.ReadLineFromFile();
if (cur.Left(10) == _T("# timecode")) {
@ -817,8 +816,7 @@ bool FrameMain::SaveSubtitles(bool saveas,bool withCharset) {
// Get charset
wxString charset = _T("");
if (withCharset) {
wxArrayString choices = AegisubCSConv::GetEncodingsList();
charset = wxGetSingleChoice(_("Choose charset code:"), _T("Charset"),choices,this,-1, -1,true,250,200);
charset = wxGetSingleChoice(_("Choose charset code:"), _T("Charset"),agi::charset::GetEncodingsList<wxArrayString>(),this,-1, -1,true,250,200);
if (charset.IsEmpty()) return false;
}

View File

@ -538,7 +538,7 @@ int FrameMain::AddMacroMenuItems(wxMenu *menu, const std::vector<Automation4::Fe
///
void FrameMain::OnOpenRecentSubs(wxCommandEvent &event) {
int number = event.GetId()-Menu_File_Recent;
LoadSubtitles(AegisubApp::Get()->mru->GetEntry("Subtitle", number));
LoadSubtitles(lagi_wxString(AegisubApp::Get()->mru->GetEntry("Subtitle", number)));
}
@ -548,7 +548,7 @@ void FrameMain::OnOpenRecentSubs(wxCommandEvent &event) {
///
void FrameMain::OnOpenRecentVideo(wxCommandEvent &event) {
int number = event.GetId()-Menu_Video_Recent;
LoadVideo(AegisubApp::Get()->mru->GetEntry("Video", number));
LoadVideo(lagi_wxString(AegisubApp::Get()->mru->GetEntry("Video", number)));
}
@ -558,7 +558,7 @@ void FrameMain::OnOpenRecentVideo(wxCommandEvent &event) {
///
void FrameMain::OnOpenRecentTimecodes(wxCommandEvent &event) {
int number = event.GetId()-Menu_Timecodes_Recent;
LoadVFR(AegisubApp::Get()->mru->GetEntry("Timecodes", number));
LoadVFR(lagi_wxString(AegisubApp::Get()->mru->GetEntry("Timecodes", number)));
}
@ -568,7 +568,7 @@ void FrameMain::OnOpenRecentTimecodes(wxCommandEvent &event) {
///
void FrameMain::OnOpenRecentKeyframes(wxCommandEvent &event) {
int number = event.GetId()-Menu_Keyframes_Recent;
KeyFrameFile::Load(AegisubApp::Get()->mru->GetEntry("Keyframes", number));
KeyFrameFile::Load(lagi_wxString(AegisubApp::Get()->mru->GetEntry("Keyframes", number)));
videoBox->videoSlider->Refresh();
audioBox->audioDisplay->Update();
Refresh();
@ -581,7 +581,7 @@ void FrameMain::OnOpenRecentKeyframes(wxCommandEvent &event) {
///
void FrameMain::OnOpenRecentAudio(wxCommandEvent &event) {
int number = event.GetId()-Menu_Audio_Recent;
LoadSubtitles(AegisubApp::Get()->mru->GetEntry("Audio", number));
LoadAudio(lagi_wxString(AegisubApp::Get()->mru->GetEntry("Audio", number)));
}
@ -805,13 +805,12 @@ void FrameMain::OnOpenSubtitles(wxCommandEvent& WXUNUSED(event)) {
///
void FrameMain::OnOpenSubtitlesCharset(wxCommandEvent& WXUNUSED(event)) {
// Initialize charsets
wxArrayString choices = AegisubCSConv::GetEncodingsList();
wxString path = lagi_wxString(OPT_GET("Path/Last/Subtitles")->GetString());
// Get options and load
wxString filename = wxFileSelector(_("Open subtitles file"),path,_T(""),_T(""),AssFile::GetWildcardList(0),wxFD_OPEN | wxFD_FILE_MUST_EXIST);
if (!filename.empty()) {
wxString charset = wxGetSingleChoice(_("Choose charset code:"), _("Charset"),choices,this,-1, -1,true,250,200);
wxString charset = wxGetSingleChoice(_("Choose charset code:"), _("Charset"),agi::charset::GetEncodingsList<wxArrayString>(),this,-1, -1,true,250,200);
if (!charset.empty()) {
LoadSubtitles(filename,charset);
}

View File

@ -300,7 +300,7 @@ void HotkeyManager::Load() {
TextFileReader file(filename);
wxString header;
try {
if (file.GetCurrentEncoding() != _T("binary"))
if (!file.IsBinary())
header = file.ReadLineFromFile();
}
catch (wxString e) {

View File

@ -263,6 +263,10 @@ emit_stdout->Enable();
wxMessageBox(err,_T("Fatal error while initializing"));
return false;
}
catch (agi::Exception const& e) {
wxMessageBox(e.GetMessage(),_T("Fatal error while initializing"));
return false;
}
catch (...) {
wxMessageBox(_T("Unhandled exception"),_T("Fatal error while initializing"));

View File

@ -29,6 +29,7 @@
#include <libaegisub/exception.h>
#include "colour_button.h"
#include "compat.h"
#include "libresrc/libresrc.h"
#include "preferences.h"
#include "main.h"
@ -172,7 +173,7 @@ void Preferences::OptionAdd(wxPanel *parent, wxFlexGridSizer *flex, const wxStri
case agi::OptionValue::Type_String: {
flex->Add(new wxStaticText(parent, wxID_ANY, name), 1, wxALIGN_CENTRE_VERTICAL);
wxTextCtrl *text = new wxTextCtrl(parent, wxID_ANY , opt->GetString(), wxDefaultPosition, wxDefaultSize);
wxTextCtrl *text = new wxTextCtrl(parent, wxID_ANY , lagi_wxString(opt->GetString()), wxDefaultPosition, wxDefaultSize);
flex->Add(text, 1, wxEXPAND);
break;
}

View File

@ -59,6 +59,8 @@
#include "options.h"
#include "spellchecker_hunspell.h"
#include "standard_paths.h"
#include "text_file_reader.h"
#include "text_file_writer.h"
#include "utils.h"
@ -66,6 +68,7 @@
HunspellSpellChecker::HunspellSpellChecker() {
hunspell = NULL;
conv = NULL;
rconv = NULL;
SetLanguage(lagi_wxString(OPT_GET("Tool/Spell Checker/Language")->GetString()));
}
@ -84,6 +87,8 @@ void HunspellSpellChecker::Reset() {
hunspell = NULL;
delete conv;
conv = NULL;
delete rconv;
rconv = NULL;
affpath.Clear();
dicpath.Clear();
}
@ -96,8 +101,13 @@ void HunspellSpellChecker::Reset() {
///
bool HunspellSpellChecker::CanAddWord(wxString word) {
if (!hunspell) return false;
wxCharBuffer buffer = word.mb_str(*conv);
return (buffer.data() != NULL);
try {
conv->Convert(word);
return true;
}
catch (agi::charset::ConvError const&) {
return false;
}
}
@ -111,9 +121,9 @@ void HunspellSpellChecker::AddWord(wxString word) {
// Add to currently loaded file
#ifdef WITH_OLD_HUNSPELL
hunspell->put_word(word.mb_str(*conv));
hunspell->put_word(conv->Convert(word).c_str());
#else
hunspell->add(word.mb_str(*conv));
hunspell->add(conv->Convert(word).c_str());
#endif
// Ensure that the path exists
@ -124,22 +134,14 @@ void HunspellSpellChecker::AddWord(wxString word) {
// Load dictionary
wxArrayString dic;
wxString curLine;
bool added = false;
if (fn.FileExists()) { // Even if you ever want to remove this "if", keep the braces, so the stream closes at the end
bool first = true;
wxFileInputStream in(usrdicpath);
if (!in.IsOk()) return;
wxTextInputStream textIn(in,_T(" \t"),*conv);
// Read it
while (in.CanRead() && !in.Eof()) {
// Read line
curLine = textIn.ReadLine();
curLine.Trim();
TextFileReader reader(usrdicpath, L"UTF-8");
while (reader.HasMoreLines()) {
wxString curLine = reader.ReadLineFromFile();
if (curLine.IsEmpty()) continue;
// First
if (first) {
first = false;
if (curLine.IsNumber()) continue;
@ -160,11 +162,14 @@ void HunspellSpellChecker::AddWord(wxString word) {
if (!added) dic.Add(word);
// Write back to disk
wxFileOutputStream out(usrdicpath);
if (!out.IsOk()) return;
wxTextOutputStream textOut(out,wxEOL_UNIX,*conv);
textOut.WriteString(wxString::Format(_T("%i"),dic.Count())+_T("\n"));
for (unsigned int i=0;i<dic.Count();i++) textOut.WriteString(dic[i]+_T("\n"));
try {
TextFileWriter writer(usrdicpath, L"UTF-8");
writer.WriteLineToFile(wxString::Format(L"%i", dic.Count()));
for (unsigned int i=0;i<dic.Count();i++) writer.WriteLineToFile(dic[i]);
}
catch (const wchar_t*) {
// Failed to open file
}
}
@ -175,9 +180,12 @@ void HunspellSpellChecker::AddWord(wxString word) {
///
bool HunspellSpellChecker::CheckWord(wxString word) {
if (!hunspell) return true;
wxCharBuffer buf = word.mb_str(*conv);
if (buf) return (hunspell->spell(buf) == 1);
return false;
try {
return hunspell->spell(conv->Convert(word).c_str()) == 1;
}
catch (agi::charset::ConvError const&) {
return false;
}
}
@ -187,31 +195,26 @@ bool HunspellSpellChecker::CheckWord(wxString word) {
/// @return List of suggestions
///
wxArrayString HunspellSpellChecker::GetSuggestions(wxString word) {
// Array
wxArrayString suggestions;
if (!hunspell) return suggestions;
// Get suggestions
if (hunspell) {
// Word
wxCharBuffer buf = word.mb_str(*conv);
if (!buf) return suggestions;
try {
// Grab raw from Hunspell
char **results;
int n = hunspell->suggest(&results,buf);
int n = hunspell->suggest(&results,conv->Convert(word).c_str());
// Convert each
for (int i=0;i<n;i++) {
wxString current(results[i],*conv);
suggestions.Add(current);
suggestions.Add(rconv->Convert(results[i]));
delete results[i];
}
// Delete
delete results;
}
catch (agi::charset::ConvError const&) {
return suggestions;
}
// Return them
return suggestions;
}
@ -279,25 +282,23 @@ void HunspellSpellChecker::SetLanguage(wxString language) {
hunspell = new Hunspell(affpath.mb_str(csConvLocal),dicpath.mb_str(csConvLocal));
conv = NULL;
if (hunspell) {
conv = new AegisubCSConv(wxString(hunspell->get_dic_encoding(),wxConvUTF8));
// Load user dictionary
if (wxFileExists(usrdicpath)) {
wxFileInputStream in(usrdicpath);
if (!in.IsOk()) return;
wxTextInputStream textIn(in,_T(" \t"),*conv);
while (in.CanRead() && !in.Eof()) {
// Read line
wxString curLine = textIn.ReadLine();
curLine.Trim();
conv = new agi::charset::IconvWrapper("wchar_t", hunspell->get_dic_encoding());
rconv = new agi::charset::IconvWrapper(hunspell->get_dic_encoding(), "wchar_t");
try {
TextFileReader reader(usrdicpath, L"UTF-8");
while (reader.HasMoreLines()) {
wxString curLine = reader.ReadLineFromFile();
if (curLine.IsEmpty() || curLine.IsNumber()) continue;
#ifdef WITH_OLD_HUNSPELL
hunspell->put_word(curLine.mb_str(*conv));
hunspell->put_word(conv->Convert(curLine).c_str());
#else
hunspell->add(curLine.mb_str(*conv));
hunspell->add(conv->Convert(curLine).c_str());
#endif
}
}
catch (const wchar_t *) {
// file not found
}
}
}

View File

@ -43,6 +43,11 @@
#include <hunspell/hunspell.hxx>
#include "include/aegisub/spellchecker.h"
namespace agi {
namespace charset {
class IconvWrapper;
}
}
/// @class HunspellSpellChecker
@ -55,7 +60,8 @@ private:
Hunspell *hunspell;
/// Conversion buffer
wxMBConv *conv;
agi::charset::IconvWrapper *conv;
agi::charset::IconvWrapper *rconv;
/// Path to .aff file
wxString affpath;

View File

@ -51,8 +51,15 @@
#include "charset_detect.h"
#include "text_file_reader.h"
TextFileReader::TextFileReader(wxString filename, wxString enc, bool trim)
: encoding(enc), conv((iconv_t)-1), trim(trim), readComplete(false), currout(0), outptr(0), currentLine(0) {
TextFileReader::TextFileReader(wxString const& filename, wxString encoding, bool trim)
: isBinary(false)
, conv()
, trim(trim)
, readComplete(false)
, currout(0)
, outptr(0)
, currentLine(0)
{
#ifdef __WINDOWS__
file.open(filename.wc_str(),std::ios::in | std::ios::binary);
#else
@ -61,16 +68,14 @@ TextFileReader::TextFileReader(wxString filename, wxString enc, bool trim)
if (!file.is_open()) throw L"Failed opening file for reading.";
if (encoding.IsEmpty()) encoding = CharSetDetect::GetEncoding(filename);
if (encoding == L"binary") return;
encoding = AegisubCSConv::GetRealEncodingName(encoding);
conv = iconv_open(WCHAR_T_ENCODING, encoding.ToAscii());
if (conv == (iconv_t)-1) {
throw wxString::Format(L"Character set '%s' is not supported.", enc.c_str());
if (encoding == L"binary") {
isBinary = true;
return;
}
conv.reset(new agi::charset::IconvWrapper(encoding.c_str(), "wchar_t"));
}
TextFileReader::~TextFileReader() {
if (conv != (iconv_t)-1) iconv_close(conv);
}
wchar_t TextFileReader::GetWChar() {
@ -98,7 +103,8 @@ wchar_t TextFileReader::GetWChar() {
return 0;
do {
size_t ret = iconv(conv, &inptr, &inbytesleft, reinterpret_cast<char **>(&outptr), &outbytesleft);
// Without this const_cast the wrong overload is chosen
size_t ret = conv->Convert(const_cast<const char**>(&inptr), &inbytesleft, reinterpret_cast<char **>(&outptr), &outbytesleft);
if (ret != (size_t)-1) break;
int err = errno;
@ -144,7 +150,6 @@ wxString TextFileReader::ReadLineFromFile() {
if (ch == 0)
readComplete = true;
// Trim
if (trim) {
buffer.Trim(true);
buffer.Trim(false);
@ -155,7 +160,3 @@ wxString TextFileReader::ReadLineFromFile() {
bool TextFileReader::HasMoreLines() {
return !readComplete;
}
wxString TextFileReader::GetCurrentEncoding() {
return encoding;
}

View File

@ -38,21 +38,23 @@
#ifndef AGI_PRE
#include <fstream>
#include <iconv.h>
#include <memory>
#include <wx/dynarray.h>
#include <wx/string.h>
#endif
namespace agi { namespace charset {
class IconvWrapper;
} }
/// @class TextFileReader
/// @brief A line-based text file reader
class TextFileReader {
private:
/// Encoding of the file being read
wxString encoding;
bool isBinary;
std::ifstream file;
iconv_t conv;
std::auto_ptr<agi::charset::IconvWrapper> conv;
bool trim;
bool readComplete;
@ -76,7 +78,7 @@ public:
/// @param filename File to open
/// @param enc Encoding to use, or empty to autodetect
/// @param trim Whether to trim whitespace from lines read
TextFileReader(wxString filename,wxString encoding=L"", bool trim=true);
TextFileReader(wxString const& filename,wxString encoding=L"", bool trim=true);
/// @brief Destructor
~TextFileReader();
@ -85,8 +87,5 @@ public:
wxString ReadLineFromFile();
/// @brief Check if there are any more lines to read
bool HasMoreLines();
/// @brief Get the file encoding used by this reader
/// @return "unknown", "binary", or a character encoding name
wxString GetCurrentEncoding();
bool IsBinary() { return isBinary; }
};

View File

@ -51,7 +51,7 @@
/// @param filename
/// @param encoding
///
TextFileWriter::TextFileWriter(wxString filename, wxString encoding)
TextFileWriter::TextFileWriter(wxString const& filename, wxString encoding)
: conv() {
#ifdef WIN32
file.open(filename.wc_str(),std::ios::out | std::ios::binary | std::ios::trunc);
@ -59,17 +59,17 @@ TextFileWriter::TextFileWriter(wxString filename, wxString encoding)
file.open(wxFNCONV(filename),std::ios::out | std::ios::binary | std::ios::trunc);
#endif
if (!file.is_open()) {
throw _T("Failed opening file for writing.");
throw L"Failed opening file for writing.";
}
if (encoding.IsEmpty()) encoding = lagi_wxString(OPT_GET("App/Save Charset")->GetString());
conv.reset(new AegisubCSConv(encoding, true));
if (encoding.empty()) encoding = lagi_wxString(OPT_GET("App/Save Charset")->GetString());
conv.reset(new agi::charset::IconvWrapper("utf-8", encoding.c_str(), true));
// Write the BOM
try {
WriteLineToFile(_T("\uFEFF"), false);
WriteLineToFile(L"\uFEFF", false);
}
catch (wxString ignore) {
catch (agi::charset::ConversionFailure&) {
// If the BOM could not be converted to the target encoding it isn't needed
}
}
@ -85,14 +85,11 @@ TextFileWriter::~TextFileWriter() {
/// @brief DOCME
/// @param line
/// @param addLineBreak
///
void TextFileWriter::WriteLineToFile(wxString line, bool addLineBreak) {
wxString temp = line;
if (addLineBreak) temp += _T("\r\n");
if (addLineBreak) line += L"\n";
wxCharBuffer buf = temp.mb_str(*conv);
if (buf.data())
file.write(buf.data(), conv->MBBuffLen(buf.data()));
std::string buf = conv->Convert(line.utf8_str().data());
file.write(buf.data(), buf.size());
}

View File

@ -43,8 +43,11 @@
#include <wx/string.h>
#endif
class AegisubCSConv;
namespace agi {
namespace charset {
class IconvWrapper;
}
}
/// DOCME
@ -59,13 +62,13 @@ private:
std::ofstream file;
/// DOCME
std::auto_ptr<AegisubCSConv> conv;
std::auto_ptr<agi::charset::IconvWrapper> conv;
TextFileWriter(const TextFileWriter&);
TextFileWriter& operator=(const TextFileWriter&);
public:
TextFileWriter(wxString filename, wxString encoding=_T(""));
TextFileWriter(wxString const& filename, wxString encoding="");
~TextFileWriter();
void WriteLineToFile(wxString line, bool addLineBreak=true);

View File

@ -69,7 +69,7 @@ VideoProvider *VideoProviderFactoryManager::GetProvider(wxString video) {
}
try {
VideoProvider *y4m_provider = new YUV4MPEGVideoProvider(video.wc_str());
VideoProvider *y4m_provider = new YUV4MPEGVideoProvider(video);
if (y4m_provider)
y4m_provider = new VideoProviderCache(y4m_provider);
return y4m_provider;
@ -92,7 +92,7 @@ VideoProvider *VideoProviderFactoryManager::GetProvider(wxString video) {
for (unsigned int i=0;i<list.Count();i++) {
try {
// Create provider
VideoProvider *provider = GetFactory(list[i])->CreateProvider(video.wc_str());
VideoProvider *provider = GetFactory(list[i])->CreateProvider(video);
if (provider) {
// Cache if necessary
if (provider->WantsCaching()) {

View File

@ -12,6 +12,7 @@ run_SOURCES = \
util_unix.cpp \
libaegisub_access.cpp \
libaegisub_cajun.cpp \
libaegisub_iconv.cpp \
libaegisub_util.cpp \
libaegisub_mru.cpp

View File

@ -0,0 +1,138 @@
// Copyright (c) 2010, Thomas Goyne <plorkyeran@aegisub.org>
//
// Permission to use, copy, modify, and distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
//
// $Id$
/// @file libaegisub_iconv.cpp
/// @brief agi::charset
/// @ingroup iconv
#include <stdint.h>
#include <libaegisub/charset_conv.h>
#include "main.h"
#include "util.h"
using namespace agi::charset;
TEST(lagi_iconv, BasicSetup) {
EXPECT_NO_THROW(IconvWrapper("UTF-8", "UTF-16LE"));
}
TEST(lagi_iconv, InvalidConversions) {
EXPECT_THROW(IconvWrapper("nonexistent charset", "UTF-16LE"), UnsupportedConversion);
EXPECT_THROW(IconvWrapper("UTF-16LE", "nonexistent charset"), UnsupportedConversion);
EXPECT_THROW(IconvWrapper("nonexistent charset", "nonexistent charset"), UnsupportedConversion);
}
TEST(lagi_iconv, StrLen1) {
IconvWrapper conv("UTF-8", "UTF-8", false);
for (int i = 0; i < 10; i++) {
std::string str(i, ' ');
ASSERT_EQ(i, conv.SrcStrLen(str.c_str()));
ASSERT_EQ(i, conv.DstStrLen(str.c_str()));
}
}
TEST(lagi_iconv, StrLen2) {
IconvWrapper conv("UTF-16LE", "UTF-16LE", false);
for (int i = 0; i < 10; i++) {
std::basic_string<int16_t> str(i, ' ');
ASSERT_EQ(2*i, conv.SrcStrLen((const char *)str.c_str()));
ASSERT_EQ(2*i, conv.DstStrLen((const char *)str.c_str()));
}
}
TEST(lagi_iconv, StrLen4) {
IconvWrapper conv("UTF-32LE", "UTF-32LE", false);
for (int i = 0; i < 10; i++) {
std::basic_string<int32_t> str(i, ' ');
ASSERT_EQ(4*i, conv.SrcStrLen((const char *)str.c_str()));
ASSERT_EQ(4*i, conv.DstStrLen((const char *)str.c_str()));
}
}
TEST(lagi_iconv, Fallbacks) {
IconvWrapper nofallback("UTF-8", "Shift-JIS", false);
IconvWrapper fallback("UTF-8", "Shift-JIS", true);
IconvWrapper noneneeded("UTF-8", "UTF-16LE", false);
// Shift-JIS does not have a backslash
EXPECT_THROW(nofallback.Convert("\\"), BadOutput);
ASSERT_NO_THROW(fallback.Convert("\\"));
EXPECT_EQ("\\", fallback.Convert("\\"));
EXPECT_NO_THROW(noneneeded.Convert("\\"));
// BOM into non-unicode
char bom[] = "\xEF\xBB\xBF";
EXPECT_THROW(nofallback.Convert(bom), BadOutput);
ASSERT_NO_THROW(fallback.Convert(bom));
EXPECT_EQ("", fallback.Convert(bom));
EXPECT_NO_THROW(noneneeded.Convert(bom));
// A snowman (U+2603)
char snowman[] = "\xE2\x98\x83";
EXPECT_THROW(nofallback.Convert(snowman), BadOutput);
EXPECT_NO_THROW(noneneeded.Convert(snowman));
ASSERT_NO_THROW(fallback.Convert(snowman));
EXPECT_EQ("?", fallback.Convert(snowman));
}
TEST(lagi_iconv, BadInput) {
IconvWrapper utf16("UTF-16LE", "UTF-8");
EXPECT_THROW(utf16.Convert(" "), BadInput);
IconvWrapper utf8("UTF-8", "UTF-16LE");
EXPECT_THROW(utf8.Convert("\xE2\xFF"), BadInput);
}
TEST(lagi_iconv, Conversions) {
IconvWrapper utf16le("UTF-16LE", "UTF-8", false);
IconvWrapper utf16be("UTF-16BE", "UTF-8", false);
IconvWrapper utf8("UTF-8", "UTF-16LE", false);
char space_utf8_[] = " ";
char space_utf16be_[] = {0, 32, 0, 0};
char space_utf16le_[] = {32, 0, 0, 0};
std::string space_utf8(space_utf8_);
std::string space_utf16be(space_utf16be_, 2);
std::string space_utf16le(space_utf16le_, 2);
EXPECT_EQ(space_utf8, utf16le.Convert(space_utf16le));
EXPECT_EQ(space_utf8, utf16be.Convert(space_utf16be));
EXPECT_EQ(space_utf16le, utf8.Convert(space_utf8));
}
// Basic overflow tests
TEST(lagi_iconv, Buffer) {
IconvWrapper conv("UTF-8", "UTF-16LE", false);
char buff[32];
memset(buff, 0xFF, sizeof(buff));
EXPECT_THROW(conv.Convert("", 1, buff, 0), BufferTooSmall);
EXPECT_EQ('\xFF', buff[0]);
EXPECT_THROW(conv.Convert("", 1, buff, 1), BufferTooSmall);
EXPECT_EQ('\xFF', buff[0]);
EXPECT_NO_THROW(conv.Convert("", 1, buff, 2));
EXPECT_EQ('\0', buff[0]);
EXPECT_EQ('\0', buff[1]);
EXPECT_EQ('\xFF', buff[2]);
}
TEST(lagi_iconv, LocalSupport) {
ASSERT_NO_THROW(IconvWrapper("UTF-8", ""));
IconvWrapper conv("UTF-8", "");
ASSERT_NO_THROW(conv.Convert(" "));
EXPECT_EQ(" ", conv.Convert(" "));
}
TEST(lagi_iconv, wchar_tSupport) {
EXPECT_NO_THROW(IconvWrapper("UTF-8", "wchar_t"));
}