From c2087304fcfdeb58de7954f3196cdb213d5c4177 Mon Sep 17 00:00:00 2001 From: Thomas Goyne Date: Sat, 18 Jul 2009 00:58:13 +0000 Subject: [PATCH] A few minor cleanups to the new charset conversion code. Originally committed to SVN as r3159. --- aegisub/src/charset_conv.cpp | 6 ++++++ aegisub/src/charset_conv.h | 17 ++++++----------- aegisub/src/frame_main_events.cpp | 1 + aegisub/src/text_file_reader.cpp | 15 ++++++++++----- aegisub/src/text_file_reader.h | 8 ++++---- aegisub/src/text_file_writer.cpp | 5 +++++ aegisub/src/text_file_writer.h | 7 ++++++- 7 files changed, 38 insertions(+), 21 deletions(-) diff --git a/aegisub/src/charset_conv.cpp b/aegisub/src/charset_conv.cpp index 00eaf0044..5aefecafa 100644 --- a/aegisub/src/charset_conv.cpp +++ b/aegisub/src/charset_conv.cpp @@ -34,7 +34,13 @@ // #include "charset_conv.h" + #include +#include +#include +#include + +WX_DECLARE_STRING_HASH_MAP(wxString, PrettyNamesHash); #if wxUSE_THREADS static wxMutex encodingListMutex; diff --git a/aegisub/src/charset_conv.h b/aegisub/src/charset_conv.h index 5e8778947..e5b60d900 100644 --- a/aegisub/src/charset_conv.h +++ b/aegisub/src/charset_conv.h @@ -33,21 +33,18 @@ // Contact: mailto:zeratul@cellosoft.com // -#ifndef AEGISUB_STRCONV -#define AEGISUB_STRCONV +#ifndef AEGISUB_CHARSET_CONV_H +#define AEGISUB_CHARSET_CONV_H #include #include -#include -#include -#include #include -#include +#include +#include +#include #include "aegisub_endian.h" -WX_DECLARE_STRING_HASH_MAP(wxString, PrettyNamesHash); - #if !defined(_LIBICONV_VERSION) || _LIBICONV_VERSION < 0x010A || defined(LIBICONV_PLUG) #define ICONV_POSIX #endif @@ -56,7 +53,7 @@ class AegisubCSConv : public wxMBConv { public: // By default, any conversion that would be lossy will fail // When enableSubst is true, conversions to multibyte with a sufficiently large buffer - // are guarunteed to succeed, with characters dropped or changed as needed to fit the + // are guaranteed to succeed, with characters dropped or changed as needed to fit the // string into the target encoding. AegisubCSConv(const wxChar *mbEncName, bool enableSubst = false); virtual ~AegisubCSConv(); @@ -77,8 +74,6 @@ public: // Map a user-friendly encoding name to iconv's name static wxString GetRealEncodingName(wxString name); - static iconv_t IconvOpen(const char *toEncoding); - protected: iconv_t m2w, w2m; diff --git a/aegisub/src/frame_main_events.cpp b/aegisub/src/frame_main_events.cpp index b99feb650..bac0ff7ce 100644 --- a/aegisub/src/frame_main_events.cpp +++ b/aegisub/src/frame_main_events.cpp @@ -88,6 +88,7 @@ #include "standard_paths.h" #include "dialog_video_details.h" #include "keyframe.h" +#include "charset_conv.h" //////////////////// diff --git a/aegisub/src/text_file_reader.cpp b/aegisub/src/text_file_reader.cpp index 17fa4fe43..1fb4c9599 100644 --- a/aegisub/src/text_file_reader.cpp +++ b/aegisub/src/text_file_reader.cpp @@ -39,7 +39,10 @@ #include #include #include +#include #include "text_file_reader.h" +#include "charset_conv.h" + #ifdef WITH_UNIVCHARDET #include "charset_detect.h" @@ -66,7 +69,7 @@ TextFileReader::~TextFileReader() { if (conv != (iconv_t)-1) iconv_close(conv); } -wxString TextFileReader::GetEncoding(const wxString _filename) { +wxString TextFileReader::GetEncoding(const wxString filename) { // Prepare unsigned char b[4]; memset(b, 0, sizeof(b)); @@ -74,9 +77,9 @@ wxString TextFileReader::GetEncoding(const wxString _filename) { // Read four bytes from file std::ifstream ifile; #ifdef __WINDOWS__ - ifile.open(_filename.wc_str()); + ifile.open(filename.wc_str()); #else - ifile.open(wxFNCONV(_filename)); + ifile.open(wxFNCONV(filename)); #endif if (!ifile.is_open()) { return _T("unknown"); @@ -105,7 +108,7 @@ wxString TextFileReader::GetEncoding(const wxString _filename) { #ifdef WITH_UNIVCHARDET // Use universalchardet library to detect charset CharSetDetect det; - return det.GetEncoding(_filename); + return det.GetEncoding(filename); #else // Fall back to local return _T("Local"); @@ -153,7 +156,7 @@ wchar_t TextFileReader::GetWChar() { file.read(inptr + inbytesleft, 1); inbytesleft++; - } while (!file.eof()); + } while (!file.eof() && file.gcount()); if (outptr > outbuf) return *currout; @@ -172,6 +175,8 @@ wxString TextFileReader::ReadLineFromFile() { size_t len = 0; for (ch = GetWChar(); ch != L'\n' && ch != 0; ch = GetWChar()) { if (ch == L'\r') continue; + // Skip the BOM -- we don't need it as the encoding is already known + // and it sometimes causes conversion problems if (ch == 0xFEFF && len == 0) continue; if (len >= bufAlloc - 1) { diff --git a/aegisub/src/text_file_reader.h b/aegisub/src/text_file_reader.h index d1032392b..2d45c2823 100644 --- a/aegisub/src/text_file_reader.h +++ b/aegisub/src/text_file_reader.h @@ -39,8 +39,7 @@ #include #include #include - -#include "charset_conv.h" +#include class TextFileReader { private: @@ -57,10 +56,11 @@ private: unsigned int currentLine; - void Open(); - void Close(); wchar_t GetWChar(); + TextFileReader(const TextFileReader&); + TextFileReader& operator=(const TextFileReader&); + public: TextFileReader(wxString filename,wxString encoding=_T(""), bool trim=true); ~TextFileReader(); diff --git a/aegisub/src/text_file_writer.cpp b/aegisub/src/text_file_writer.cpp index edb0dd02b..7cd2c5085 100644 --- a/aegisub/src/text_file_writer.cpp +++ b/aegisub/src/text_file_writer.cpp @@ -39,6 +39,7 @@ #include "text_file_writer.h" #include "options.h" #include "aegisub_endian.h" +#include "charset_conv.h" TextFileWriter::TextFileWriter(wxString filename, wxString encoding) : conv() { @@ -63,6 +64,10 @@ TextFileWriter::TextFileWriter(wxString filename, wxString encoding) } } +TextFileWriter::~TextFileWriter() { + // Explicit empty destructor required with an auto_ptr to an incomplete class +} + void TextFileWriter::WriteLineToFile(wxString line, bool addLineBreak) { wxString temp = line; if (addLineBreak) temp += _T("\r\n"); diff --git a/aegisub/src/text_file_writer.h b/aegisub/src/text_file_writer.h index f2c03d7ec..fa51469dc 100644 --- a/aegisub/src/text_file_writer.h +++ b/aegisub/src/text_file_writer.h @@ -42,15 +42,20 @@ #include #include -#include "charset_conv.h" +class AegisubCSConv; class TextFileWriter { private: std::ofstream file; std::auto_ptr conv; + TextFileWriter(const TextFileWriter&); + TextFileWriter& operator=(const TextFileWriter&); + public: TextFileWriter(wxString filename, wxString encoding=_T("")); + ~TextFileWriter(); + void WriteLineToFile(wxString line, bool addLineBreak=true); };