Replaced most wx-based charset conversions with a custom iconv-based conversion. Closes #639, #666, #837, #849 and #877.

Originally committed to SVN as r3137.
This commit is contained in:
Thomas Goyne 2009-07-14 21:28:49 +00:00
parent dac40729e0
commit 0ea2c53c1a
30 changed files with 847 additions and 520 deletions

View File

@ -823,6 +823,14 @@
RelativePath="..\..\src\aegisublocale.h"
>
</File>
<File
RelativePath="..\..\src\charset_conv.cpp"
>
</File>
<File
RelativePath="..\..\src\charset_conv.h"
>
</File>
<File
RelativePath="..\..\src\charset_detect.cpp"
>

View File

@ -247,6 +247,7 @@ aegisub_2_1_SOURCES = \
avisynth_wrap.cpp \
base_grid.cpp \
browse_button.cpp \
charset_conv.cpp \
colorspace.cpp \
colour_button.cpp \
dialog_about.cpp \

View File

@ -98,14 +98,14 @@ namespace Endian {
inline uint64_t Reverse(uint64_t val)
{
return
((val & 0x00000000000000FF) << 56) |
((val & 0x000000000000FF00) << 40) |
((val & 0x0000000000FF0000) << 24) |
((val & 0x00000000FF000000) << 8) |
((val & 0x000000FF00000000) >> 8) |
((val & 0x0000FF0000000000) >> 24) |
((val & 0x00FF000000000000) >> 40) |
((val & 0xFF00000000000000) >> 56);
((val & 0x00000000000000FFULL) << 56) |
((val & 0x000000000000FF00ULL) << 40) |
((val & 0x0000000000FF0000ULL) << 24) |
((val & 0x00000000FF000000ULL) << 8) |
((val & 0x000000FF00000000ULL) >> 8) |
((val & 0x0000FF0000000000ULL) >> 24) |
((val & 0x00FF000000000000ULL) >> 40) |
((val & 0xFF00000000000000ULL) >> 56);
}
@ -283,28 +283,28 @@ namespace Endian {
inline uint64_t MachineToBig(uint64_t val)
{
bytes64 pack;
pack.byte[0] = (val & 0xFF00000000000000) >> 56;
pack.byte[1] = (val & 0x00FF000000000000) >> 48;
pack.byte[2] = (val & 0x0000FF0000000000) >> 40;
pack.byte[3] = (val & 0x000000FF00000000) >> 32;
pack.byte[4] = (val & 0x00000000FF000000) >> 24;
pack.byte[5] = (val & 0x0000000000FF0000) >> 16;
pack.byte[6] = (val & 0x000000000000FF00) >> 8;
pack.byte[7] = val & 0x00000000000000FF ;
pack.byte[0] = (val & 0xFF00000000000000ULL) >> 56;
pack.byte[1] = (val & 0x00FF000000000000ULL) >> 48;
pack.byte[2] = (val & 0x0000FF0000000000ULL) >> 40;
pack.byte[3] = (val & 0x000000FF00000000ULL) >> 32;
pack.byte[4] = (val & 0x00000000FF000000ULL) >> 24;
pack.byte[5] = (val & 0x0000000000FF0000ULL) >> 16;
pack.byte[6] = (val & 0x000000000000FF00ULL) >> 8;
pack.byte[7] = val & 0x00000000000000FFULL ;
return pack.word;
}
inline uint64_t MachineToLittle(uint64_t val)
{
bytes64 pack;
pack.byte[0] = val & 0x00000000000000FF ;
pack.byte[1] = (val & 0x000000000000FF00) >> 8;
pack.byte[2] = (val & 0x0000000000FF0000) >> 16;
pack.byte[3] = (val & 0x00000000FF000000) >> 24;
pack.byte[4] = (val & 0x000000FF00000000) >> 32;
pack.byte[5] = (val & 0x0000FF0000000000) >> 40;
pack.byte[6] = (val & 0x00FF000000000000) >> 48;
pack.byte[7] = (val & 0xFF00000000000000) >> 56;
pack.byte[0] = val & 0x00000000000000FFULL ;
pack.byte[1] = (val & 0x000000000000FF00ULL) >> 8;
pack.byte[2] = (val & 0x0000000000FF0000ULL) >> 16;
pack.byte[3] = (val & 0x00000000FF000000ULL) >> 24;
pack.byte[4] = (val & 0x000000FF00000000ULL) >> 32;
pack.byte[5] = (val & 0x0000FF0000000000ULL) >> 40;
pack.byte[6] = (val & 0x00FF000000000000ULL) >> 48;
pack.byte[7] = (val & 0xFF00000000000000ULL) >> 56;
return pack.word;
}

View File

@ -45,6 +45,7 @@
#include "audio_provider_manager.h"
#include "options.h"
#include "utils.h"
#include "charset_conv.h"
// Uncomment to enable debug features.
//#define PORTAUDIO_DEBUG
@ -120,7 +121,7 @@ void PortAudioPlayer::OpenStream() {
if (pa_err->errorCode != 0) {
wxLogDebug(_T("PortAudioPlayer::OpenStream HostError: API: %d, %s (%ld)\n"), pa_err->hostApiType, pa_err->errorText, pa_err->errorCode);
}
throw wxString(_T("Failed initializing PortAudio stream with error: ") + wxString(Pa_GetErrorText(err),wxConvLocal));
throw wxString(_T("Failed initializing PortAudio stream with error: ") + wxString(Pa_GetErrorText(err),csConvLocal));
}
}

View File

@ -47,6 +47,7 @@
#include "utils.h"
#include "options.h"
#include "standard_paths.h"
#include "charset_conv.h"
//////////////
@ -92,7 +93,7 @@ void AvisynthAudioProvider::OpenAVSAudio() {
// Include
if (filename.EndsWith(_T(".avs"))) {
wxFileName fn(filename);
char *fname = env->SaveString(fn.GetShortPath().mb_str(wxConvLocal));
char *fname = env->SaveString(fn.GetShortPath().mb_str(csConvLocal));
script = env->Invoke("Import", fname);
}
@ -100,12 +101,12 @@ void AvisynthAudioProvider::OpenAVSAudio() {
else {
wxFileName fn(filename);
const char * argnames[3] = { 0, "video", "audio" };
AVSValue args[3] = { env->SaveString(fn.GetShortPath().mb_str(wxConvLocal)), false, true };
AVSValue args[3] = { env->SaveString(fn.GetShortPath().mb_str(csConvLocal)), false, true };
// Load DirectShowSource.dll from app dir if it exists
wxFileName dsspath(StandardPaths::DecodePath(_T("?data/DirectShowSource.dll")));
if (dsspath.FileExists()) {
env->Invoke("LoadPlugin",env->SaveString(dsspath.GetShortPath().mb_str(wxConvLocal)));
env->Invoke("LoadPlugin",env->SaveString(dsspath.GetShortPath().mb_str(csConvLocal)));
}
// Load audio with DSS if it exists
@ -122,7 +123,7 @@ void AvisynthAudioProvider::OpenAVSAudio() {
}
catch (AvisynthError &err) {
throw wxString::Format(_T("AviSynth error: %s"), wxString(err.msg,wxConvLocal));
throw wxString::Format(_T("AviSynth error: %s"), wxString(err.msg,csConvLocal));
}
}
@ -139,7 +140,7 @@ void AvisynthAudioProvider::LoadFromClip(AVSValue _clip) {
// Convert to one channel
char buffer[1024];
strcpy(buffer,Options.AsText(_T("Audio Downmixer")).mb_str(wxConvLocal));
strcpy(buffer,Options.AsText(_T("Audio Downmixer")).mb_str(csConvLocal));
script = env->Invoke(buffer, _clip);
// Convert to 16 bits per sample

View File

@ -0,0 +1,432 @@
// Copyright (c) 2009, Thomas Goyne
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of the Aegisub Group nor the names of its contributors
// may be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// -----------------------------------------------------------------------------
//
// AEGISUB
//
// Website: http://www.aegisub.net/
// Contact: mailto:zeratul@cellosoft.com
//
#include "charset_conv.h"
#include <stdint.h>
#if wxUSE_THREADS
static wxMutex encodingListMutex;
#endif
static const iconv_t iconv_invalid = (iconv_t)-1;
static const size_t iconv_failed = (size_t)-1;
#define ICONV_CONST_CAST(a) const_cast<ICONV_CONST char *>(a)
#ifndef ICONV_POSIX
static int addEncoding(unsigned int namescount, const char * const * names, void* data);
#endif
static wxArrayString *supportedEncodings = NULL;
static wxArrayString *prettyEncodingList = NULL;
static PrettyNamesHash *prettyEncodingHash = NULL;
AegisubCSConv::AegisubCSConv(const wxChar *mbEncName, bool enableSubst)
: mbCharsetName(GetRealEncodingName(mbEncName)), mbNulLen(0), enableSubst(enableSubst)
{
wcCharsetName = wxString::FromAscii(WCHAR_T_ENCODING);
m2w = iconv_open(wcCharsetName.ToAscii(), mbCharsetName.ToAscii());
w2m = iconv_open(mbCharsetName.ToAscii(), wcCharsetName.ToAscii());
if (m2w == iconv_invalid || w2m == iconv_invalid) {
if (m2w != iconv_invalid) iconv_close(m2w);
if (w2m != iconv_invalid) iconv_close(w2m);
throw wxString::Format(_T("Character set %s is not supported."), mbEncName);
}
if (enableSubst) {
invalidRepSize = FromWChar(invalidRep, sizeof(invalidRep), L"?") - GetMBNulLen();
#ifndef ICONV_POSIX
fallbacks.data = this;
fallbacks.mb_to_uc_fallback = NULL;
fallbacks.mb_to_wc_fallback = NULL;
fallbacks.uc_to_mb_fallback = ucToMbFallback;
fallbacks.wc_to_mb_fallback = NULL;
#endif
}
}
AegisubCSConv::~AegisubCSConv() {
if (m2w != iconv_invalid) iconv_close(m2w);
if (w2m != iconv_invalid) iconv_close(w2m);
}
wxMBConv * AegisubCSConv::Clone() const {
AegisubCSConv *c = new AegisubCSConv(mbCharsetName);
c->mbNulLen = mbNulLen;
return c;
}
// Calculate the size of NUL in the target encoding via iconv
size_t AegisubCSConv::GetMBNulLen() const {
if (mbNulLen == 0) {
const wchar_t nulStr[] = L"";
char outBuff[8];
size_t inLen = sizeof(wchar_t);
size_t outLen = sizeof(outBuff);
char * inPtr = (char *)nulStr;
char * outPtr = outBuff;
size_t res = iconv(w2m, &inPtr, &inLen, &outPtr, &outLen);
if (res != 0)
const_cast<AegisubCSConv *>(this)->mbNulLen = (size_t)-1;
else
const_cast<AegisubCSConv *>(this)->mbNulLen = sizeof(outBuff) - outLen;
}
return mbNulLen;
}
// Calculate the length (in bytes) of a MB string, not including the terminator
size_t AegisubCSConv::MBBuffLen(const char * str) const {
size_t nulLen = GetMBNulLen();
const char *ptr;
switch (nulLen) {
case 1:
return strlen(str);
case 2:
for (ptr = str; *reinterpret_cast<const uint16_t *>(ptr) != 0; ptr += 2) ;
return ptr - str;
case 4:
for (ptr = str; *reinterpret_cast<const uint32_t *>(ptr) != 0; ptr += 4) ;
return ptr - str;
default:
return (size_t)-1;
}
}
size_t AegisubCSConv::ToWChar(wchar_t *dst, size_t dstSize, const char *src, size_t srcLen) const {
return doConversion(
m2w,
reinterpret_cast<char *>(dst),
dstSize * sizeof(wchar_t),
const_cast<char *>(src),
srcLen == wxNO_LEN ? MBBuffLen(src) + GetMBNulLen() : srcLen
) / sizeof(wchar_t);
}
size_t AegisubCSConv::FromWChar(char *dst, size_t dstSize, const wchar_t *src, size_t srcLen) const {
return doConversion(
w2m,
dst,
dstSize,
reinterpret_cast<char *>(const_cast<wchar_t *>(src)),
(srcLen == wxNO_LEN ? wcslen(src) + 1 : srcLen) * sizeof(wchar_t)
);
}
size_t AegisubCSConv::doConversion(iconv_t cd, char *dst, size_t dstSize, char *src, size_t srcSize) const {
if (dstSize > 0) {
return iconvWrapper(cd, &src, &srcSize, &dst, &dstSize);
}
// No destination given, so calculate the needed buffer size instead
char buff[32];
size_t buffSize = 32;
size_t charsWritten = 0;
size_t res;
do {
dst = buff;
dstSize = buffSize;
res = iconvWrapper(cd, &src, &srcSize, &dst, &dstSize);
charsWritten += dst - buff;
} while (res == iconv_failed && errno == E2BIG);
if (res == iconv_failed) return wxCONV_FAILED;
return charsWritten;
}
size_t AegisubCSConv::iconvWrapper(iconv_t cd, char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft) const {
#if wxUSE_THREADS
wxMutexLocker lock(const_cast<AegisubCSConv *>(this)->iconvMutex);
#endif
char *outbuforig = *outbuf;
size_t res = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
if (res != iconv_failed)
return *outbuf - outbuforig;
if (!enableSubst)
return iconv_failed;
#ifdef ICONV_POSIX
if (errno == EILSEQ) {
throw _T("One or more characters do not fit in the selected ")
_T("encoding and the version of iconv Aegisub was built with")
_T(" does not have useful fallbacks. For best results, ")
_T("please rebuild Aegisub using a recent version of GNU iconv.");
}
return wxCONV_FAILED;
#else
// Save original errno so we can return it rather than the result from iconvctl
int err = errno;
// Some characters in the input string do not exist in the output encoding
if (res == iconv_failed && err == EILSEQ) {
// first try transliteration only
int transliterate = 1;
iconvctl(cd, ICONV_SET_TRANSLITERATE, &transliterate);
res = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
err = errno;
transliterate = 0;
iconvctl(cd, ICONV_SET_TRANSLITERATE, &transliterate);
}
if (res == iconv_failed && err == EILSEQ) {
// Conversion still failed with transliteration enabled, so try our substitution
iconvctl(cd, ICONV_SET_FALLBACKS, const_cast<iconv_fallbacks *>(&fallbacks));
res = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
err = errno;
iconvctl(cd, ICONV_SET_FALLBACKS, NULL);
}
if (res == iconv_failed && err == EILSEQ) {
// Conversion still failed, so just drop any invalid characters
int discard = 1;
iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &discard);
res = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
err = errno;
discard = 0;
iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &discard);
}
errno = err;
if (res == iconv_failed) return wxCONV_FAILED;
return *outbuf - outbuforig;
#endif
}
void AegisubCSConv::ucToMbFallback(
unsigned int code,
void (*callback) (const char *buf, size_t buflen, void* callback_arg),
void *callback_arg,
void *convPtr)
{
// At some point in the future, this should probably switch to a real mapping
// For now, there's just three cases: BOM to nothing, \ to itself (lol Shift-JIS) and everything else to ?
if (code == 0xFEFF) return;
if (code == 0x5C) callback("\\", 1, callback_arg);
else {
AegisubCSConv *self = static_cast<AegisubCSConv *>(convPtr);
callback(self->invalidRep, self->invalidRepSize, callback_arg);
}
}
#ifndef ICONV_POSIX
int addEncoding(unsigned int namescount, const char * const * names, void* data) {
for (unsigned int i = 0; i < namescount; i++) {
supportedEncodings->Add(wxString::FromAscii(names[i]));
}
return 0;
}
#endif
wxArrayString AegisubCSConv::GetAllSupportedEncodings() {
#if wxUSE_THREADS
wxMutexLocker lock(encodingListMutex);
#endif
if (supportedEncodings == NULL) {
supportedEncodings = new wxArrayString();
#ifndef ICONV_POSIX
iconvlist(addEncoding, NULL);
supportedEncodings->Sort();
#endif
}
return *supportedEncodings;
}
// Map pretty names to the real encoding names
wxString AegisubCSConv::GetRealEncodingName(wxString name) {
if (name.Lower() == _T("local")) return wxLocale::GetSystemEncodingName();
if (prettyEncodingList == NULL) return name;
PrettyNamesHash::iterator realName = prettyEncodingHash->find(name);
if (realName != prettyEncodingHash->end()) {
return realName->second;
}
return name;
}
wxArrayString AegisubCSConv::GetEncodingsList() {
#if wxUSE_THREADS
wxMutexLocker lock(encodingListMutex);
#endif
if (prettyEncodingList == NULL) {
struct { const char *pretty, *real; } encodingNames[] = {
{"Unicode (UTF-8)", "utf-8"},
{"Unicode (UTF-16)", "utf-16"},
{"Unicode (UTF-16BE)", "utf-16be"},
{"Unicode (UTF-16LE)", "utf-16le"},
{"Unicode (UTF-32)", "utf-32"},
{"Unicode (UTF-32BE)", "utf-32be"},
{"Unicode (UTF-32LE)", "utf-32le"},
{"Unicode (UTF-7)", "utf-7"},
{"Arabic (IBM-864)", "ibm864"},
{"Arabic (IBM-864-I)", "ibm864i"},
{"Arabic (ISO-8859-6)", "iso-8859-6"},
{"Arabic (ISO-8859-6-E)", "iso-8859-6-e"},
{"Arabic (ISO-8859-6-I)", "iso-8859-6-i"},
{"Arabic (Langbox ISO-8859-6.16)", "x-iso-8859-6-16"},
{"Arabic (Langbox ISO-8859-6.8x)", "x-iso-8859-6-8-x"},
{"Arabic (MacArabic)", "x-mac-arabic"},
{"Arabic (Windows-1256)", "windows-1256"},
{"Armenian (ARMSCII-8)", "armscii-8"},
{"Baltic (ISO-8859-13)", "iso-8859-13"},
{"Baltic (ISO-8859-4)", "iso-8859-4"},
{"Baltic (Windows-1257)", "windows-1257"},
{"Celtic (ISO-8859-14)", "iso-8859-14"},
{"Central European (IBM-852)", "ibm852"},
{"Central European (ISO-8859-2)", "iso-8859-2"},
{"Central European (MacCE)", "x-mac-ce"},
{"Central European (Windows-1250)", "windows-1250"},
{"Chinese Simplified (GB18030)", "gb18030"},
{"Chinese Simplified (GB2312)", "gb2312"},
{"Chinese Simplified (GBK)", "x-gbk"},
{"Chinese Simplified (HZ)", "hz-gb-2312"},
{"Chinese Simplified (ISO-2022-CN)", "iso-2022-cn"},
{"Chinese Traditional (Big5)", "big5"},
{"Chinese Traditional (Big5-HKSCS)", "big5-hkscs"},
{"Chinese Traditional (EUC-TW)", "x-euc-tw"},
{"Croatian (MacCroatian)", "x-mac-croatian"},
{"Cyrillic (IBM-855)", "ibm855"},
{"Cyrillic (ISO-8859-5)", "iso-8859-5"},
{"Cyrillic (ISO-IR-111)", "iso-ir-111"},
{"Cyrillic (KOI8-R)", "koi8-r"},
{"Cyrillic (MacCyrillic)", "x-mac-cyrillic"},
{"Cyrillic (Windows-1251)", "windows-1251"},
{"Cyrillic/Russian (CP-866)", "ibm866"},
{"Cyrillic/Ukrainian (KOI8-U)", "koi8-u"},
{"Cyrillic/Ukrainian (MacUkrainian)", "x-mac-ukrainian"},
{"English (US-ASCII)", "us-ascii"},
{"Farsi (MacFarsi)", "x-mac-farsi"},
{"Georgian (GEOSTD8)", "geostd8"},
{"Greek (ISO-8859-7)", "iso-8859-7"},
{"Greek (MacGreek)", "x-mac-greek"},
{"Greek (Windows-1253)", "windows-1253"},
{"Gujarati (MacGujarati)", "x-mac-gujarati"},
{"Gurmukhi (MacGurmukhi)", "x-mac-gurmukhi"},
{"Hebrew (IBM-862)", "ibm862"},
{"Hebrew (ISO-8859-8-E)", "iso-8859-8-e"},
{"Hebrew (ISO-8859-8-I)", "iso-8859-8-i"},
{"Hebrew (MacHebrew)", "x-mac-hebrew"},
{"Hebrew (Windows-1255)", "windows-1255"},
{"Hebrew Visual (ISO-8859-8)", "iso-8859-8"},
{"Hindi (MacDevanagari)", "x-mac-devanagari"},
{"Hindi (SunDevanagari)", "x-sun-unicode-india-0"},
{"Icelandic (MacIcelandic)", "x-mac-icelandic"},
{"Japanese (EUC-JP)", "euc-jp"},
{"Japanese (ISO-2022-JP)", "iso-2022-jp"},
{"Japanese (Shift_JIS)", "shift_jis"},
{"Korean (EUC-KR)", "euc-kr"},
{"Korean (ISO-2022-KR)", "iso-2022-kr"},
{"Korean (JOHAB)", "x-johab"},
{"Korean (UHC)", "x-windows-949"},
{"Nordic (ISO-8859-10)", "iso-8859-10"},
{"Romanian (ISO-8859-16)", "iso-8859-16"},
{"Romanian (MacRomanian)", "x-mac-romanian"},
{"South European (ISO-8859-3)", "iso-8859-3"},
{"Thai (IBM-874)", "ibm874"},
{"Thai (ISO-8859-11)", "iso-8859-11"},
{"Thai (TIS-620)", "tis-620"},
{"Thai (Windows-874)", "windows-874"},
{"Turkish (IBM-857)", "ibm857"},
{"Turkish (ISO-8859-9)", "iso-8859-9"},
{"Turkish (MacTurkish)", "x-mac-turkish"},
{"Turkish (Windows-1254)", "windows-1254"},
{"Vietnamese (TCVN)", "x-viet-tcvn5712"},
{"Vietnamese (VISCII)", "viscii"},
{"Vietnamese (VPS)", "x-viet-vps"},
{"Vietnamese (Windows-1258)", "windows-1258"},
{"Western (IBM-850)", "ibm850"},
{"Western (ISO-8859-1)", "iso-8859-1"},
{"Western (ISO-8859-15)", "iso-8859-15"},
{"Western (MacRoman)", "x-mac-roman"},
{"Western (Windows-1252)", "windows-1252"},
{NULL, NULL}
};
PrettyNamesHash *map = new PrettyNamesHash(100);
wxArrayString *arr = new wxArrayString();
arr->Add(_T("Local"));
for (int i = 0; encodingNames[i].real != NULL; i++) {
// Verify that iconv actually supports this encoding
iconv_t cd = iconv_open(encodingNames[i].real, WCHAR_T_ENCODING);
if (cd == iconv_invalid) continue;
iconv_close(cd);
cd = iconv_open(WCHAR_T_ENCODING, encodingNames[i].real);
if (cd == iconv_invalid) continue;
iconv_close(cd);
wxString pretty = wxString::FromAscii(encodingNames[i].pretty);
arr->Add(pretty);
(*map)[pretty] = wxString::FromAscii(encodingNames[i].real);
}
prettyEncodingList = arr;
prettyEncodingHash = map;
}
return *prettyEncodingList;
}
static AegisubCSConv localConv(_T("Local"), false);
AegisubCSConv& csConvLocal(localConv);

135
aegisub/src/charset_conv.h Normal file
View File

@ -0,0 +1,135 @@
// Copyright (c) 2009, Thomas Goyne
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of the Aegisub Group nor the names of its contributors
// may be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// -----------------------------------------------------------------------------
//
// AEGISUB
//
// Website: http://www.aegisub.net/
// Contact: mailto:zeratul@cellosoft.com
//
#ifndef AEGISUB_STRCONV
#define AEGISUB_STRCONV
#include <iconv.h>
#include <wchar.h>
#include <wx/intl.h>
#include <wx/hashmap.h>
#include <wx/thread.h>
#include <wx/arrstr.h>
#include <errno.h>
#include "aegisub_endian.h"
WX_DECLARE_STRING_HASH_MAP(wxString, PrettyNamesHash);
#if !defined(_LIBICONV_VERSION) || _LIBICONV_VERSION < 0x010A || defined(LIBICONV_PLUG)
#define ICONV_POSIX
#endif
class AegisubCSConv : public wxMBConv {
public:
// By default, any conversion that would be lossy will fail
// When enableSubst is true, conversions to multibyte with a sufficiently large buffer
// are guarunteed to succeed, with characters dropped or changed as needed to fit the
// string into the target encoding.
AegisubCSConv(const wxChar *mbEncName, bool enableSubst = false);
virtual ~AegisubCSConv();
// wxMBConv implementation; see strconv.h for usage details
virtual size_t ToWChar(wchar_t *dst, size_t dstLen, const char *src, size_t srcLen = wxNO_LEN) const;
virtual size_t FromWChar(char *dst, size_t dstLen, const wchar_t *src, size_t srcLen = wxNO_LEN) const;
virtual size_t GetMBNulLen() const;
virtual wxMBConv *Clone() const;
// Get the length (in bytes) of a null-terminated string whose encoding is mbEncName
size_t MBBuffLen(const char *str) const;
// Get a list of support encodings with somewhat user-friendly names
static wxArrayString GetEncodingsList();
// Get a list of all encodings supported by iconv
static wxArrayString GetAllSupportedEncodings();
// Map a user-friendly encoding name to iconv's name
static wxString GetRealEncodingName(wxString name);
static iconv_t IconvOpen(const char *toEncoding);
protected:
iconv_t m2w, w2m;
private:
wxString wcCharsetName;
wxString mbCharsetName;
size_t mbNulLen;
bool enableSubst;
size_t doConversion(iconv_t cd, char *dst, size_t dstSize, char *src, size_t srcSize) const;
size_t iconvWrapper(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) const;
static void ucToMbFallback(
unsigned int code,
void (*callback) (const char *buf, size_t buflen, void* callback_arg),
void *callback_arg,
void *convPtr);
char invalidRep[8];
size_t invalidRepSize;
#ifndef ICONV_POSIX
iconv_fallbacks fallbacks;
#endif
#if wxUSE_THREADS
// While iconv itself is thread-safe, using the same iconv_t on multiple threads is not
wxMutex iconvMutex;
#endif
};
// Predefined conversion for the current locale. Should be a drop-in replacement for wxConvLocal
extern AegisubCSConv& csConvLocal;
#ifdef HAVE_BIG_ENDIAN
# if SIZEOF_WCHAR_T == 4
# define WCHAR_T_ENCODING "UTF-32BE"
# elif SIZEOF_WCHAR_T == 2
# define WCHAR_T_ENCODING "UTF-16BE"
# endif
#elif defined(HAVE_LITTLE_ENDIAN)
# if SIZEOF_WCHAR_T == 4
# define WCHAR_T_ENCODING "UTF-32LE"
# elif SIZEOF_WCHAR_T == 2
# define WCHAR_T_ENCODING "UTF-16LE"
# endif
#else
# if SIZEOF_WCHAR_T == 4
# define WCHAR_T_ENCODING ((Endian::MachineToBig((uint32_t)1) == 1) ? "UTF-32BE" : "UTF-32LE")
# elif SIZEOF_WCHAR_T == 2
# define WCHAR_T_ENCODING ((Endian::MachineToBig((uint32_t)1) == 1) ? "UTF-16BE" : "UTF-16LE")
# endif
#endif
#endif

View File

@ -57,14 +57,22 @@ struct CharDetResult {
////////////////
// Get encoding
wxString CharSetDetect::GetEncoding(wxString filename) {
// Open file
TextFileReader reader(filename,_T("Local"));
std::ifstream file;
#ifdef __WINDOWS__
file.open(filename.wc_str(),std::ios::in | std::ios::binary);
#else
file.open(wxFNCONV(filename),std::ios::in | std::ios::binary);
#endif
if (!file.is_open()) {
throw _T("Failed opening file for reading.");
}
// Loop through it until it finds interesting lines
while (reader.HasMoreLines() && !done()) {
wxString line = reader.ReadLineFromFile();
wxCharBuffer buffer = line.mb_str(wxConvLocal);
HandleData(buffer,line.Length());
while (!file.eof() && !done()) {
char buffer[512];
file.read(buffer, 512);
size_t bytesRead = file.gcount();
HandleData(buffer, bytesRead);
}
// Flag as finished

View File

@ -46,7 +46,7 @@
#include "dialog_export.h"
#include "ass_file.h"
#include "ass_exporter.h"
#include "frame_main.h"
#include "charset_conv.h"
#include "help_button.h"
@ -97,12 +97,12 @@ DialogExport::DialogExport (wxWindow *parent)
// Charset dropdown list
wxStaticText *charset_list_label = new wxStaticText(this, -1, _("Text encoding:"));
CharsetList = new wxChoice(this, Charset_List_Box, wxDefaultPosition, wxDefaultSize, FrameMain::GetEncodings());
CharsetList = new wxChoice(this, Charset_List_Box, wxDefaultPosition, wxDefaultSize, AegisubCSConv::GetEncodingsList());
wxSizer *charset_list_sizer = new wxBoxSizer(wxHORIZONTAL);
charset_list_sizer->Add(charset_list_label, 0, wxALIGN_CENTER | wxRIGHT, 5);
charset_list_sizer->Add(CharsetList, 1, wxEXPAND);
if (!CharsetList->SetStringSelection(Export->GetOriginalSubs()->GetScriptInfo(_T("Export Encoding")))) {
CharsetList->SetStringSelection(_T("UTF-8"));
CharsetList->SetStringSelection(_T("Unicode (UTF-8)"));
}
// Top sizer

View File

@ -55,6 +55,7 @@
#include "subs_edit_box.h"
#include "utils.h"
#include "help_button.h"
#include "charset_conv.h"
///////////////
@ -329,7 +330,7 @@ void DialogShiftTimes::AppendToHistory(wxString text) {
if (HistoryFile.IsEmpty()) return;
using namespace std;
ofstream file;
file.open(HistoryFile.mb_str(wxConvLocal),ios::out | ios::app);
file.open(HistoryFile.mb_str(csConvLocal),ios::out | ios::app);
if (!file.is_open()) {
return;
}
@ -349,7 +350,7 @@ void DialogShiftTimes::LoadHistory(wxString filename) {
using namespace std;
HistoryFile = filename;
ifstream file;
file.open(filename.mb_str(wxConvLocal));
file.open(filename.mb_str(csConvLocal));
if (!file.is_open()) {
return;
}

View File

@ -41,6 +41,7 @@
#include <wx/wxprec.h>
#ifndef WIN32
#include "font_file_lister_fontconfig.h"
#include "charset_conv.h"
///////////////////////////////////
@ -69,7 +70,7 @@ wxArrayString FontConfigFontFileLister::DoGetFilesWithFace(wxString facename) {
if (FcPatternGetString(final, FC_FILE, 0, &filename) == FcResultMatch && FcPatternGetInteger(final, FC_INDEX, 0, &fontindex) == FcResultMatch) {
FcPatternGetString(final, FC_FAMILY, fontindex, &gotfamily);
if (strcmp(gotfamily,buffer) == 0) {
results.Add(wxString((char*) filename,wxConvLocal));
results.Add(wxString((char*) filename,csConvLocal));
}
}
FcPatternDestroy(final);

View File

@ -49,6 +49,7 @@
# include <shlobj.h>
#endif
#include <wx/dir.h>
#include "charset_conv.h"
///////////////
@ -80,7 +81,7 @@ wxArrayString GetName(FT_Face &face,int id) {
memcpy(str,name.string,name.string_len);
str[name.string_len] = 0;
str[name.string_len+1] = 0;
if (name.encoding_id == 0) final.Add(wxString(str, wxConvLocal));
if (name.encoding_id == 0) final.Add(wxString(str, csConvLocal));
else if (name.encoding_id == 1) {
wxMBConvUTF16BE conv;
wxString string(str,conv);
@ -155,10 +156,10 @@ void FreetypeFontFileLister::DoInitialize() {
// Ordinary fonts
else {
if (face->style_name) {
AddFont(fontfiles[i],wxString(face->family_name, wxConvLocal) + _T(" ") + wxString(face->style_name, wxConvLocal));
AddFont(fontfiles[i],_T("*")+wxString(face->family_name, wxConvLocal));
AddFont(fontfiles[i],wxString(face->family_name, csConvLocal) + _T(" ") + wxString(face->style_name, csConvLocal));
AddFont(fontfiles[i],_T("*")+wxString(face->family_name, csConvLocal));
}
else AddFont(fontfiles[i],wxString(face->family_name, wxConvLocal));
else AddFont(fontfiles[i],wxString(face->family_name, csConvLocal));
}
FT_Done_Face(face);
}

View File

@ -80,6 +80,7 @@
#ifdef WITH_AUTOMATION
#include "auto4_base.h"
#endif
#include "charset_conv.h"
@ -674,17 +675,23 @@ void FrameMain::LoadSubtitles (wxString filename,wxString charset) {
if (!fileCheck.FileExists()) throw _T("Selected file does not exist.");
// Make sure that file isn't actually a timecode file
TextFileReader testSubs(filename,charset);
charset = testSubs.GetCurrentEncoding();
isBinary = charset == _T("binary");
if (!isBinary && testSubs.HasMoreLines()) {
wxString cur = testSubs.ReadLineFromFile();
if (cur.Left(10) == _T("# timecode")) {
LoadVFR(filename);
Options.SetText(_T("Last open timecodes path"), fileCheck.GetPath());
return;
try {
TextFileReader testSubs(filename,charset);
charset = testSubs.GetCurrentEncoding();
isBinary = charset == _T("binary");
if (!isBinary && testSubs.HasMoreLines()) {
wxString cur = testSubs.ReadLineFromFile();
if (cur.Left(10) == _T("# timecode")) {
LoadVFR(filename);
Options.SetText(_T("Last open timecodes path"), fileCheck.GetPath());
return;
}
}
}
catch (...) {
// if trying to load the file as timecodes fails it's fairly safe to assume that
// it is in fact not a timecode file
}
}
// Proceed into loading
@ -706,6 +713,10 @@ void FrameMain::LoadSubtitles (wxString filename,wxString charset) {
wxMessageBox(wxString(err), _T("Error"), wxOK | wxICON_ERROR, NULL);
return;
}
catch (wxString err) {
wxMessageBox(err, _T("Error"), wxOK | wxICON_ERROR, NULL);
return;
}
catch (...) {
wxMessageBox(_T("Unknown error"), _T("Error"), wxOK | wxICON_ERROR, NULL);
return;
@ -766,7 +777,7 @@ bool FrameMain::SaveSubtitles(bool saveas,bool withCharset) {
// Get charset
wxString charset = _T("");
if (withCharset) {
wxArrayString choices = GetEncodings();
wxArrayString choices = AegisubCSConv::GetEncodingsList();
charset = wxGetSingleChoice(_("Choose charset code:"), _T("Charset"),choices,this,-1, -1,true,250,200);
if (charset.IsEmpty()) return false;
}
@ -1233,53 +1244,6 @@ void FrameMain::DetachVideo(bool detach) {
}
/////////////////
// Get encodings
wxArrayString FrameMain::GetEncodings() {
wxArrayString choices;
choices.Add(_T("UTF-8"));
choices.Add(_T("UTF-16"));
choices.Add(_T("UTF-16BE"));
choices.Add(_T("UTF-16LE"));
choices.Add(_T("UTF-7"));
choices.Add(_T("Local"));
choices.Add(_T("US-ASCII"));
choices.Add(_T("SHIFT_JIS"));
choices.Add(_T("GB2312"));
choices.Add(_T("BIG5"));
choices.Add(_T("EUC-JP"));
choices.Add(_T("KOI8-R"));
choices.Add(_T("KOI8-RU"));
choices.Add(_T("KOI8-U"));
choices.Add(_T("ISO-8859-1"));
choices.Add(_T("ISO-8859-2"));
choices.Add(_T("ISO-8859-3"));
choices.Add(_T("ISO-8859-4"));
choices.Add(_T("ISO-8859-5"));
choices.Add(_T("ISO-8859-6"));
choices.Add(_T("ISO-8859-7"));
choices.Add(_T("ISO-8859-8"));
choices.Add(_T("ISO-8859-9"));
choices.Add(_T("ISO-8859-13"));
choices.Add(_T("ISO-8859-15"));
choices.Add(_T("WINDOWS-1250"));
choices.Add(_T("WINDOWS-1251"));
choices.Add(_T("WINDOWS-1252"));
choices.Add(_T("WINDOWS-1253"));
choices.Add(_T("WINDOWS-1254"));
choices.Add(_T("WINDOWS-1255"));
choices.Add(_T("WINDOWS-1256"));
choices.Add(_T("WINDOWS-1257"));
choices.Add(_T("WINDOWS-1258"));
choices.Add(_T("WINDOWS-874"));
choices.Add(_T("WINDOWS-932"));
choices.Add(_T("WINDOWS-936"));
choices.Add(_T("WINDOWS-949"));
choices.Add(_T("WINDOWS-950"));
return choices;
}
/////////////////////////////////////////////
// Sets status and clear after n miliseconds
void FrameMain::StatusTimeout(wxString text,int ms) {

View File

@ -281,7 +281,6 @@ public:
bool LoadList(wxArrayString list);
static void OpenHelp(wxString page=_T(""));
static wxArrayString GetEncodings();
void UpdateTitle();
void StatusTimeout(wxString text,int ms=10000);
void DetachVideo(bool detach=true);

View File

@ -701,7 +701,7 @@ void FrameMain::OnOpenSubtitles(wxCommandEvent& WXUNUSED(event)) {
// Open subtitles with specific charset
void FrameMain::OnOpenSubtitlesCharset(wxCommandEvent& WXUNUSED(event)) {
// Initialize charsets
wxArrayString choices = GetEncodings();
wxArrayString choices = AegisubCSConv::GetEncodingsList();
wxString path = Options.AsText(_T("Last open subtitles path"));
// Get options and load

View File

@ -273,7 +273,14 @@ void HotkeyManager::Load() {
// Open file
using namespace std;
TextFileReader file(filename);
wxString header = file.ReadLineFromFile();
wxString header;
try {
if (file.GetCurrentEncoding() != _T("binary"))
header = file.ReadLineFromFile();
}
catch (wxString e) {
header = _T("");
}
if (header != _T("[Hotkeys]")) {
wxFileName backupfn(filename);
backupfn.SetFullName(_T("hotkeys.bak"));
@ -289,7 +296,18 @@ void HotkeyManager::Load() {
map<wxString,HotkeyType>::iterator cur;
while (file.HasMoreLines()) {
// Parse line
curLine = file.ReadLineFromFile();
try {
curLine = file.ReadLineFromFile();
}
catch (wxString e) {
wxFileName backupfn(filename);
backupfn.SetFullName(_T("hotkeys.bak"));
wxCopyFile(filename, backupfn.GetFullPath());
modified = true;
Save();
wxLogWarning(_T("Hotkeys file corrupted, defaults restored.\nA backup of the corrupted file was made."));
return;
}
if (curLine.IsEmpty()) continue;
size_t pos = curLine.Find(_T("="));
if (pos == wxString::npos) continue;

View File

@ -60,7 +60,7 @@ LAVCFile::LAVCFile(Aegisub::String _filename)
filename = fn.GetShortPath();
#endif
result = av_open_input_file(&fctx,filename.mb_str(wxConvLocal),NULL,0,NULL);
result = av_open_input_file(&fctx,filename.mb_str(csConvLocal),NULL,0,NULL);
if (result != 0) throw _T("Failed opening file.");
// Get stream info

View File

@ -69,6 +69,7 @@
#endif
#include "version.h"
#include "plugin_manager.h"
#include "charset_conv.h"
///////////////////
@ -327,7 +328,7 @@ StackWalker::StackWalker(wxString cause) {
wxDateTime time = wxDateTime::Now();
wxString timeStr = _T("---") + time.FormatISODate() + _T(" ") + time.FormatISOTime() + _T("------------------");
formatLen = timeStr.Length();
file << std::endl << timeStr.mb_str(wxConvLocal);
file << std::endl << timeStr.mb_str(csConvLocal);
file << "\nVER - " << GetAegisubLongVersionString().mb_str(wxConvUTF8);
file << "\nFTL - Begining stack dump for \"" << cause.mb_str(wxConvUTF8) <<"\":\n";
}
@ -373,9 +374,9 @@ int AegisubApp::OnRun() {
if (file.is_open()) {
wxDateTime time = wxDateTime::Now();
wxString timeStr = _T("---") + time.FormatISODate() + _T(" ") + time.FormatISOTime() + _T("------------------");
file << std::endl << timeStr.mb_str(wxConvLocal);
file << std::endl << timeStr.mb_str(csConvLocal);
file << "\nVER - " << GetAegisubLongVersionString().mb_str(wxConvUTF8);
file << "\nEXC - Aegisub has crashed with unhandled exception \"" << error.mb_str(wxConvLocal) <<"\".\n";
file << "\nEXC - Aegisub has crashed with unhandled exception \"" << error.mb_str(csConvLocal) <<"\".\n";
int formatLen = timeStr.Length();
char dashes[1024];
int i = 0;

View File

@ -455,7 +455,14 @@ void OptionsManager::Load() {
// Read header
TextFileReader file(filename);
wxString header = file.ReadLineFromFile();
wxString header;
try {
if (file.GetCurrentEncoding() != _T("binary"))
header = file.ReadLineFromFile();
}
catch (wxString e) {
header = _T("");
}
if (header != _T("[Config]")) {
wxMessageBox(_("Configuration file is either invalid or corrupt. The current file will be backed up and replaced with a default file."),_("Error"),wxCENTRE|wxICON_WARNING);
wxRenameFile(filename,filename + wxString::Format(_T(".%i.backup"),wxGetUTCTime()));
@ -468,7 +475,15 @@ void OptionsManager::Load() {
wxString curLine;
while (file.HasMoreLines()) {
// Parse line
curLine = file.ReadLineFromFile();
try {
curLine = file.ReadLineFromFile();
}
catch (wxString e) {
wxMessageBox(_("Configuration file is either invalid or corrupt. The current file will be backed up and replaced with a default file."),_("Error"),wxCENTRE|wxICON_WARNING);
wxRenameFile(filename,filename + wxString::Format(_T(".%i.backup"),wxGetUTCTime()));
modified = true;
return;
}
if (curLine.IsEmpty()) continue;
size_t pos = curLine.Find(_T("="));
if (pos == wxString::npos) continue;

View File

@ -45,6 +45,7 @@
#include "standard_paths.h"
#include "utils.h"
#include "options.h"
#include "charset_conv.h"
#include <hunspell/hunspell.hxx>
#include <wx/wxprec.h>
#include <wx/wxprec.h>
@ -254,10 +255,10 @@ void HunspellSpellChecker::SetLanguage(wxString language) {
if (!wxFileExists(affpath) || !wxFileExists(dicpath)) return;
// Load
hunspell = new Hunspell(affpath.mb_str(wxConvLocal),dicpath.mb_str(wxConvLocal));
hunspell = new Hunspell(affpath.mb_str(csConvLocal),dicpath.mb_str(csConvLocal));
conv = NULL;
if (hunspell) {
conv = new wxCSConv(wxString(hunspell->get_dic_encoding(),wxConvUTF8));
conv = new AegisubCSConv(wxString(hunspell->get_dic_encoding(),wxConvUTF8));
// Load user dictionary
if (wxFileExists(usrdicpath)) {

View File

@ -50,7 +50,7 @@
class HunspellSpellChecker : public SpellChecker {
private:
Hunspell *hunspell;
wxCSConv *conv;
wxMBConv *conv;
wxString affpath;
wxString dicpath;
wxString usrdicpath;

View File

@ -189,7 +189,7 @@ void Spline::InsertCurve(SplineCurve &curve,int index) {
else {
std::list<SplineCurve>::iterator cur;
int i=0;
for (cur=curves.begin();cur!=curves.end() && i < index;cur++,i++);
for (cur=curves.begin();cur!=curves.end() && i < index;cur++,i++) ;
curves.insert(cur,curve);
}
}

View File

@ -55,6 +55,7 @@
#include "utils.h"
#include "ass_override.h"
#include "dialog_paste_over.h"
#include "charset_conv.h"
///////////////
@ -693,7 +694,7 @@ void SubtitlesGrid::OnAudioClip(wxCommandEvent &event) {
wxString filename = wxFileSelector(_("Save audio clip"),_T(""),_T(""),_T("wav"),_T(""),wxFD_SAVE|wxFD_OVERWRITE_PROMPT,this);
if (!filename.empty()) {
std::ofstream outfile(filename.mb_str(wxConvLocal),std::ios::binary);
std::ofstream outfile(filename.mb_str(csConvLocal),std::ios::binary);
size_t bufsize=(end-start)*provider->GetChannels()*provider->GetBytesPerSample();
int intval;

View File

@ -57,6 +57,7 @@
#include "utils.h"
#include "md5.h"
#include "dialog_progress.h"
#include "charset_conv.h"
#include "../prs/prs.h"
@ -114,7 +115,7 @@ void PRSSubtitleFormat::WriteFile(wxString filename,wxString encoding) {
AVSValue script1 = env1->Invoke("Eval",AVSValue(wxString(val + _T(",color=$000000)")).mb_str(wxConvUTF8)));
AVSValue script2 = env2->Invoke("Eval",AVSValue(wxString(val + _T(",color=$FFFFFF)")).mb_str(wxConvUTF8)));
char temp[512];
strcpy(temp,tempFile.mb_str(wxConvLocal));
strcpy(temp,tempFile.mb_str(csConvLocal));
AVSValue args1[2] = { script1.AsClip(), temp };
AVSValue args2[2] = { script2.AsClip(), temp };
try {
@ -122,7 +123,7 @@ void PRSSubtitleFormat::WriteFile(wxString filename,wxString encoding) {
script2 = env2->Invoke("TextSub", AVSValue(args2,2));
}
catch (AvisynthError &err) {
throw _T("AviSynth error: ") + wxString(err.msg,wxConvLocal);
throw _T("AviSynth error: ") + wxString(err.msg,csConvLocal);
}
PClip clip1 = script1.AsClip();
PClip clip2 = script2.AsClip();
@ -203,9 +204,9 @@ void PRSSubtitleFormat::WriteFile(wxString filename,wxString encoding) {
else return;
// Save file
file.Save((const char*)filename.mb_str(wxConvLocal));
file.Save((const char*)filename.mb_str(csConvLocal));
wxString filename2 = filename + _T(".prsa");
file.SaveText((const char*)filename2.mb_str(wxConvLocal));
file.SaveText((const char*)filename2.mb_str(csConvLocal));
// Delete temp file
wxRemoveFile(tempFile);
@ -243,7 +244,7 @@ void PRSSubtitleFormat::InsertFrame(PRSFile &file,int &framen,std::vector<int> &
}
// Read file back
FILE *fp = fopen(tempOut.mb_str(wxConvLocal),"rb");
FILE *fp = fopen(tempOut.mb_str(csConvLocal),"rb");
fseek(fp,0,SEEK_END);
datasize = ftell(fp);
data.resize(datasize);

View File

@ -33,85 +33,46 @@
// Contact: mailto:zeratul@cellosoft.com
//
///////////
// Headers
#include "config.h"
#include <fstream>
#include <algorithm>
#include <string>
#include <assert.h>
#include "text_file_reader.h"
#ifdef WITH_UNIVCHARDET
#include "charset_detect.h"
#endif
TextFileReader::TextFileReader(wxString filename, wxString enc, bool trim)
: encoding(enc), conv((iconv_t)-1), trim(trim), readComplete(false), currout(0), outptr(0), currentLine(0) {
#ifdef __WINDOWS__
file.open(filename.wc_str(),std::ios::in | std::ios::binary);
#else
file.open(wxFNCONV(filename),std::ios::in | std::ios::binary);
#endif
if (!file.is_open()) {
throw _T("Failed opening file for reading.");
}
///////////////
// Constructor
TextFileReader::TextFileReader(wxString _filename,wxString enc,bool _trim) {
// Setup
open = false;
customConv = false;
trim = _trim;
filename = _filename;
// Open file
Open();
// Set encoding
encoding = enc;
if (encoding.IsEmpty()) encoding = GetEncoding(filename);
if (encoding == _T("binary")) return;
SetEncodingConfiguration();
encoding = AegisubCSConv::GetRealEncodingName(encoding);
conv = iconv_open(WCHAR_T_ENCODING, encoding.ToAscii());
}
//////////////
// Destructor
TextFileReader::~TextFileReader() {
Close();
// Clean up conversion
if (customConv) delete conv;
if (conv != (iconv_t)-1) iconv_close(conv);
}
///////////////////////////
// Determine file encoding
wxString TextFileReader::GetEncoding(const wxString _filename) {
// Prepare
using namespace std;
unsigned char b[4];
for (int i=0;i<4;i++) b[i] = 0;
memset(b, 0, sizeof(b));
// Read four bytes from file
#ifdef TEXT_READER_USE_STDIO
// TODO: maybe make this use posix-style fopen() api's instead as well?
HANDLE ifile = CreateFile(
_filename.c_str(), // filename
FILE_READ_DATA, // access mode
FILE_SHARE_READ, // share mode
0, // security descriptor
OPEN_EXISTING, // creation disposition
FILE_FLAG_SEQUENTIAL_SCAN, // flags
0); // template file
if (ifile == INVALID_HANDLE_VALUE) {
return _T("unknown");
}
DWORD numread;
if (!ReadFile(ifile, (char*)b, 4, &numread, 0)) {
// Unable to open
return _T("unknown");
}
if (numread < 4) {
// File too short to decide, assume local
return _T("Local");
}
CloseHandle(ifile);
#else
ifstream ifile;
std::ifstream ifile;
#ifdef __WINDOWS__
ifile.open(_filename.wc_str());
#else
@ -120,9 +81,8 @@ wxString TextFileReader::GetEncoding(const wxString _filename) {
if (!ifile.is_open()) {
return _T("unknown");
}
ifile.read((char*)b,4);
ifile.read(reinterpret_cast<char *>(b),4);
ifile.close();
#endif
// Try to get the byte order mark from them
if (b[0] == 0xEF && b[1] == 0xBB && b[2] == 0xBF) return _T("UTF-8");
@ -142,213 +102,110 @@ wxString TextFileReader::GetEncoding(const wxString _filename) {
if (b[i] < 9 || (b[i] > 13 && b[i] < 32)) return _T("binary");
}
#ifdef WITH_UNIVCHARDET
#ifdef WITH_UNIVCHARDET
// Use universalchardet library to detect charset
CharSetDetect det;
return det.GetEncoding(_filename);
#else
#else
// Fall back to local
return _T("Local");
#endif
#endif
}
wchar_t TextFileReader::GetWChar() {
// If there's already some converted characters waiting, return the next one
if (++currout < outptr) {
return *currout;
}
//////////////////////////////
// Set encoding configuration
void TextFileReader::SetEncodingConfiguration() {
// Set encoding configuration
swap = false;
Is16 = false;
customConv = false;
conv = NULL;
if (encoding == _T("UTF-8")) {
conv = new wxMBConvUTF8;
customConv = true;
}
else if (encoding == _T("UTF-16LE")) {
Is16 = true;
}
else if (encoding == _T("UTF-16BE")) {
Is16 = true;
swap = true;
}
else if (encoding == _T("UTF-7")) {
conv = new wxCSConv(encoding);
customConv = true;
}
else if (encoding == _T("Local")) {
conv = wxConvCurrent;
}
else {
conv = new wxCSConv(encoding);
customConv = true;
}
if (file.eof()) return 0;
// Otherwise convert another block
char inbuf[64];
char *inptr = inbuf;
size_t inbytesleft = sizeof(inbuf) - 4;
int bytesAdded = 0;
memset(inbuf, 0, inbytesleft);
outptr = outbuf;
outbytesleft = sizeof(outbuf);
currout = outbuf;
file.read(inbuf, inbytesleft);
inbytesleft = file.gcount();
do {
size_t ret = iconv(conv, &inptr, &inbytesleft, reinterpret_cast<char **>(&outptr), &outbytesleft);
if (ret != (size_t)-1) break;
int err = errno;
// If 64 chars do not fit into 256 wchar_ts the environment is so bizzare that doing
// anything is probably futile
assert(err != E2BIG);
// (Hopefully) the edge of the buffer happened to split a multibyte character, so keep
// adding one byte to the input buffer until either it succeeds or we add enough bytes to
// complete any character
if (++bytesAdded > 3)
throw wxString::Format(_T("Invalid input character found near line %u"), currentLine);
file.read(inptr + inbytesleft, 1);
inbytesleft++;
} while (!file.eof());
if (outptr > outbuf)
return *currout;
throw wxString::Format(_T("Invalid input character found near line %u"), currentLine);
}
//////////////////////////
// Reads a line from file
wxString TextFileReader::ReadLineFromFile() {
Open();
wxString wxbuffer;
wxString buffer;
size_t bufAlloc = 1024;
wxbuffer.Alloc(bufAlloc);
#ifdef TEXT_READER_USE_STDIO
char buffer[512];
buffer[0] = 0;
#else
std::string buffer = "";
#endif
buffer.Alloc(bufAlloc);
// Read UTF-16 line from file
if (Is16) {
char charbuffer[3];
charbuffer[2] = 0;
wchar_t ch = 0;
size_t len = 0;
#ifdef TEXT_READER_USE_STDIO
while (ch != L'\n' && !feof(file)) {
// Read two chars from file
fread(charbuffer, 2, 1, file);
#else
while (ch != L'\n' && !file.eof()) {
// Read two chars from file
charbuffer[0] = 0;
charbuffer[1] = 0;
file.read(charbuffer,2);
#endif
currentLine++;
// Read a line
wchar_t ch;
size_t len = 0;
for (ch = GetWChar(); ch != L'\n' && ch != 0; ch = GetWChar()) {
if (ch == L'\r') continue;
if (ch == 0xFEFF && len == 0) continue;
// Swap bytes for big endian
if (swap) {
register char aux = charbuffer[0];
charbuffer[0] = charbuffer[1];
charbuffer[1] = aux;
}
// Convert two chars into a widechar and append to string
ch = *((wchar_t*)charbuffer);
if (len >= bufAlloc - 1) {
bufAlloc *= 2;
wxbuffer.Alloc(bufAlloc);
}
wxbuffer += ch;
len++;
if (len >= bufAlloc - 1) {
bufAlloc *= 2;
buffer.Alloc(bufAlloc);
}
buffer += ch;
len++;
}
// Read ASCII/UTF-8 line from file
else {
#ifdef TEXT_READER_USE_STDIO
while (1) {
buffer[511] = '\1';
if (fgets(buffer, 512, file)) {
// read succeeded
// FIXME, this might break on incomplete multibyte characters
wxString linepart(buffer, *conv);
wxbuffer += linepart;
if (buffer[511] == '\1' || buffer[510] == '\n') {
// our sentinel \1 wasn't overwritten, meaning an EOL was found
break;
}
// otherwise the sentinel \1 was overwritten (presumably with \0), so just loop on
}
else {
// hit EOF
break;
}
}
#else
getline(file,buffer);
wxbuffer.Clear();
if (buffer.length()) wxbuffer = wxString(buffer.c_str(),*conv);
#endif
}
// Remove line breaks
//wxbuffer.Replace(_T("\r"),_T("\0"));
//wxbuffer.Replace(_T("\n"),_T("\0"));
size_t len=wxbuffer.Length();
for (size_t i=0;i<len;i++) {
if (wxbuffer[i] == _T('\r') || wxbuffer[i] == _T('\n')) wxbuffer[i] = _T(' ');
}
// Remove BOM
if (wxbuffer.Length() > 0 && wxbuffer[0] == 0xFEFF) {
wxbuffer = wxbuffer.Mid(1);
}
if (ch == 0)
readComplete = true;
// Trim
if (trim) {
wxbuffer.Trim(true);
wxbuffer.Trim(false);
buffer.Trim(true);
buffer.Trim(false);
}
return wxbuffer;
return buffer;
}
/////////////
// Open file
void TextFileReader::Open() {
if (open) return;
#ifdef TEXT_READER_USE_STDIO
// binary mode, because ascii mode is never to be trusted
file = _tfopen(filename.c_str(), _T("rb"));
if (file == 0) {
throw _T("Failed opening file for reading.");
}
#else
#ifdef __WINDOWS__
file.open(filename.wc_str(),std::ios::in | std::ios::binary);
#else
file.open(wxFNCONV(filename),std::ios::in | std::ios::binary);
#endif
if (!file.is_open()) {
throw _T("Failed opening file for reading.");
}
#endif
open = true;
}
//////////////
// Close file
void TextFileReader::Close() {
if (!open) return;
#ifdef TEXT_READER_USE_STDIO
fclose(file);
#else
file.close();
#endif
open = false;
}
//////////////////////////////////
// Checks if there's more to read
bool TextFileReader::HasMoreLines() {
#ifdef TEXT_READER_USE_STDIO
if (encoding == _T("binary")) return false;
return !feof(file);
#else
return (!file.eof());
#endif
return !readComplete;
}
////////////////////////////////
// Ensure that charset is valid
void TextFileReader::EnsureValid(wxString enc) {
if (enc == _T("unknown") || enc == _T("UTF-32BE") || enc == _T("UTF-32LE")) {
wxString error = _T("Character set ");
error += enc;
error += _T(" is not supported.");
throw error;
if (enc == _T("binary")) return;
enc = AegisubCSConv::GetRealEncodingName(enc);
iconv_t cd = iconv_open(WCHAR_T_ENCODING, enc.ToAscii());
bool canOpen = cd != (iconv_t)-1;
iconv_close(cd);
if (!canOpen) {
throw wxString::Format(_T("Character set %s is not supported."), enc.c_str());
}
}
///////////////////////////
// Get encoding being used
wxString TextFileReader::GetCurrentEncoding() {
return encoding;
}

View File

@ -33,46 +33,36 @@
// Contact: mailto:zeratul@cellosoft.com
//
#pragma once
///////////
// Headers
#include <wx/wxprec.h>
#include <wx/dynarray.h>
#include <wx/string.h>
#ifdef TEXT_READER_USE_STDIO
#include <stdio.h>
#else
#include <fstream>
#endif
#include "charset_conv.h"
/////////
// Class
class TextFileReader {
private:
wxString filename;
wxString encoding;
#ifdef TEXT_READER_USE_STDIO
FILE *file;
#else
std::ifstream file;
#endif
wxMBConv *conv;
bool Is16;
bool swap;
bool open;
bool customConv;
iconv_t conv;
bool trim;
bool readComplete;
wchar_t outbuf[256];
wchar_t *currout;
wchar_t *outptr;
size_t outbytesleft;
unsigned int currentLine;
void Open();
void Close();
void SetEncodingConfiguration();
wchar_t GetWChar();
public:
TextFileReader(wxString filename,wxString encoding=_T(""),bool trim=true);
TextFileReader(wxString filename,wxString encoding=_T(""), bool trim=true);
~TextFileReader();
wxString ReadLineFromFile();
@ -82,5 +72,3 @@ public:
wxString GetCurrentEncoding();
static wxString GetEncoding(const wxString filename);
};

View File

@ -33,61 +33,15 @@
// Contact: mailto:zeratul@cellosoft.com
//
///////////
// Headers
#include "config.h"
#include <fstream>
#include "text_file_writer.h"
#include "options.h"
#include "aegisub_endian.h"
///////////////
// Constructor
TextFileWriter::TextFileWriter(wxString _filename,wxString enc) {
// Setup
open = false;
customConv = false;
IsFirst = true;
filename = _filename;
// Set encoding
encoding = enc;
if (encoding == _T("Local") || (encoding.IsEmpty() && Options.AsText(_T("Save Charset")).Lower() == _T("local"))) {
conv = &wxConvLocal;
wxFontEncoding sysenc = wxLocale::GetSystemEncoding();
if (sysenc == wxFONTENCODING_UTF8 || sysenc == wxFONTENCODING_UTF7 ||
sysenc == wxFONTENCODING_UNICODE) // that last one may be a bit questionable
IsUnicode = true;
else
IsUnicode = false;
}
else {
if (encoding.IsEmpty()) encoding = Options.AsText(_T("Save Charset"));
if (encoding == _T("US-ASCII")) encoding = _T("ISO-8859-1");
conv = new wxCSConv(encoding);
customConv = true;
IsUnicode = encoding.Left(3) == _T("UTF");
}
// Open file
Open();
}
//////////////
// Destructor
TextFileWriter::~TextFileWriter() {
Close();
}
/////////////
// Open file
void TextFileWriter::Open() {
// Open file
if (open) return;
TextFileWriter::TextFileWriter(wxString filename, wxString encoding)
: conv() {
#ifdef WIN32
file.open(filename.wc_str(),std::ios::out | std::ios::binary | std::ios::trunc);
#else
@ -96,68 +50,24 @@ void TextFileWriter::Open() {
if (!file.is_open()) {
throw _T("Failed opening file for writing.");
}
open = true;
// Set encoding
SetEncoding();
if (encoding.IsEmpty()) encoding = Options.AsText(_T("Save Charset"));
conv.reset(new AegisubCSConv(encoding, true));
// Write the BOM
try {
WriteLineToFile(_T("\uFEFF"), false);
}
catch (wxString ignore) {
// If the BOM could not be converted to the target encoding it isn't needed
}
}
//////////////
// Close file
void TextFileWriter::Close() {
if (!open) return;
file.close();
open = false;
if (customConv) delete conv;
}
/////////////////
// Write to file
void TextFileWriter::WriteLineToFile(wxString line,bool addLineBreak) {
// Make sure it's loaded
if (!open) Open();
// Add line break
void TextFileWriter::WriteLineToFile(wxString line, bool addLineBreak) {
wxString temp = line;
if (addLineBreak) temp += _T("\r\n");
// Add BOM if it's the first line and the target format is Unicode
if (IsFirst && IsUnicode) {
wchar_t bom = 0xFEFF;
temp = wxString(bom) + temp;
}
IsFirst = false;
// 16-bit
if (Is16) {
wxWCharBuffer buf = temp.wc_str(*conv);
if (!buf.data())
return;
size_t len = wcslen(buf.data());
file.write((const char*)buf.data(),len*sizeof(wchar_t));
}
// 8-bit
else {
wxCharBuffer buf = temp.mb_str(*conv);
if (!buf.data())
return;
size_t len = strlen(buf.data());
file.write(buf.data(),len);
}
}
////////////////
// Set encoding
void TextFileWriter::SetEncoding() {
// Prepare
Is16 = false;
// UTF-16
if (encoding.Left(6) == _T("UTF-16")) {
Is16 = true;
}
wxCharBuffer buf = temp.mb_str(*conv);
if (buf.data())
file.write(buf.data(), conv->MBBuffLen(buf.data()));
}

View File

@ -37,40 +37,21 @@
#ifndef TEXT_FILE_WRITER_H
#define TEXT_FILE_WRITER_H
///////////
// Headers
#include <wx/wxprec.h>
#include <wx/string.h>
#include <wx/intl.h>
#include <fstream>
#include <memory>
#include "charset_conv.h"
/////////
// Class
class TextFileWriter {
private:
wxString filename;
wxString encoding;
std::ofstream file;
wxMBConv *conv;
bool customConv;
bool open;
bool Is16;
bool IsFirst;
bool IsUnicode;
void Open();
void Close();
void SetEncoding();
std::auto_ptr<AegisubCSConv> conv;
public:
TextFileWriter(wxString filename,wxString encoding=_T(""));
~TextFileWriter();
void WriteLineToFile(wxString line,bool addLineBreak=true);
TextFileWriter(wxString filename, wxString encoding=_T(""));
void WriteLineToFile(wxString line, bool addLineBreak=true);
};
#endif

View File

@ -51,6 +51,7 @@
#include "gl_wrap.h"
#include "mkv_wrap.h"
#include "vfw_wrap.h"
#include "charset_conv.h"
///////////////
@ -111,7 +112,7 @@ PClip AvisynthVideoProvider::OpenVideo(Aegisub::String _filename, bool mpeg2dec3
// Prepare filename
//char *videoFilename = env->SaveString(_filename.mb_str(wxConvLocal));
wxFileName fname(_filename);
char *videoFilename = env->SaveString(fname.GetShortPath().mb_str(wxConvLocal));
char *videoFilename = env->SaveString(fname.GetShortPath().mb_str(csConvLocal));
// Avisynth file, just import it
if (extension == _T(".avs")) {
@ -183,7 +184,7 @@ PClip AvisynthVideoProvider::OpenVideo(Aegisub::String _filename, bool mpeg2dec3
wxFileName ffsourcepath(StandardPaths::DecodePath(_T("?data/ffms2.dll")));
if (ffsourcepath.FileExists()) {
AVSTRACE(_T("AvisynthVideoProvider::OpenVideo: Loading FFMpegSource2"));
env->Invoke("LoadPlugin",env->SaveString(ffsourcepath.GetFullPath().mb_str(wxConvLocal)));
env->Invoke("LoadPlugin",env->SaveString(ffsourcepath.GetFullPath().mb_str(csConvLocal)));
AVSTRACE(_T("AvisynthVideoProvider::OpenVideo: Loaded FFMpegSource2"));
byFrame = true;
}
@ -213,7 +214,7 @@ PClip AvisynthVideoProvider::OpenVideo(Aegisub::String _filename, bool mpeg2dec3
wxFileName dss2path(StandardPaths::DecodePath(_T("?data/avss.dll")));
if (dss2path.FileExists()) {
AVSTRACE(_T("AvisynthVideoProvider::OpenVideo: Loading DirectShowSource2"));
env->Invoke("LoadPlugin",env->SaveString(dss2path.GetFullPath().mb_str(wxConvLocal)));
env->Invoke("LoadPlugin",env->SaveString(dss2path.GetFullPath().mb_str(csConvLocal)));
AVSTRACE(_T("AvisynthVideoProvider::OpenVideo: Loaded DirectShowSource2"));
}
}
@ -239,7 +240,7 @@ PClip AvisynthVideoProvider::OpenVideo(Aegisub::String _filename, bool mpeg2dec3
wxFileName dsspath(StandardPaths::DecodePath(_T("?data/DirectShowSource.dll")));
if (dsspath.FileExists()) {
AVSTRACE(_T("AvisynthVideoProvider::OpenVideo: Loading DirectShowSource"));
env->Invoke("LoadPlugin",env->SaveString(dsspath.GetFullPath().mb_str(wxConvLocal)));
env->Invoke("LoadPlugin",env->SaveString(dsspath.GetFullPath().mb_str(csConvLocal)));
AVSTRACE(_T("AvisynthVideoProvider::OpenVideo: Loaded DirectShowSource"));
}
@ -272,8 +273,8 @@ PClip AvisynthVideoProvider::OpenVideo(Aegisub::String _filename, bool mpeg2dec3
// Catch errors
catch (AvisynthError &err) {
AVSTRACE(_T("AvisynthVideoProvider::OpenVideo: Avisynth error: ") + wxString(err.msg,wxConvLocal));
throw _T("AviSynth error: ") + wxString(err.msg,wxConvLocal);
AVSTRACE(_T("AvisynthVideoProvider::OpenVideo: Avisynth error: ") + wxString(err.msg,csConvLocal));
throw _T("AviSynth error: ") + wxString(err.msg,csConvLocal);
}
// Check if video was loaded properly

View File

@ -45,6 +45,7 @@
#include "video_context.h"
#include "options.h"
#include "aegisub_endian.h"
#include "charset_conv.h"
#ifdef WIN32
#include <objbase.h>
#endif
@ -167,7 +168,7 @@ void FFmpegSourceVideoProvider::LoadVideo(Aegisub::String filename, double fps)
throw ErrorMsg;
}
VideoSource = FFMS_CreateVideoSource(FileNameWX.mb_str(wxConvUTF8), TrackNumber, Index, "", Threads, SeekMode, FFMSErrorMessage, MessageSize);
VideoSource = FFMS_CreateVideoSource(FileNameWX.mb_str(csConvLocal), TrackNumber, Index, "", Threads, SeekMode, FFMSErrorMessage, MessageSize);
FFMS_DestroyIndex(Index);
Index = NULL;
if (VideoSource == NULL) {