A few minor cleanups to the new charset conversion code.

Originally committed to SVN as r3159.
2009-07-18 00:58:13 +00:00 · 2009-07-18 00:58:13 +00:00 · c2087304fc
parent 779dcadc69
commit c2087304fc
7 changed files with 38 additions and 21 deletions
--- a/aegisub/src/charset_conv.cpp
+++ b/aegisub/src/charset_conv.cpp
@ -34,7 +34,13 @@
 //

 #include "charset_conv.h"
+
 #include <stdint.h>
+#include <errno.h>
+#include <wx/hashmap.h>
+#include <wx/intl.h>
+
+WX_DECLARE_STRING_HASH_MAP(wxString, PrettyNamesHash);

 #if wxUSE_THREADS
 static wxMutex encodingListMutex;
--- a/aegisub/src/charset_conv.h
+++ b/aegisub/src/charset_conv.h
@ -33,21 +33,18 @@
 // Contact: mailto:zeratul@cellosoft.com
 //

-#ifndef AEGISUB_STRCONV
-#define AEGISUB_STRCONV
+#ifndef AEGISUB_CHARSET_CONV_H
+#define AEGISUB_CHARSET_CONV_H

 #include <iconv.h>
 #include <wchar.h>
-#include <wx/intl.h>
-#include <wx/hashmap.h>
-#include <wx/thread.h>
 #include <wx/arrstr.h>
-#include <errno.h>
+#include <wx/thread.h>
+#include <wx/string.h>
+#include <wx/strconv.h>

 #include "aegisub_endian.h"

-WX_DECLARE_STRING_HASH_MAP(wxString, PrettyNamesHash);
-
 #if !defined(_LIBICONV_VERSION) || _LIBICONV_VERSION < 0x010A || defined(LIBICONV_PLUG)
 #define ICONV_POSIX
 #endif
@ -56,7 +53,7 @@ class AegisubCSConv : public wxMBConv {
 public:
 	// By default, any conversion that would be lossy will fail
 	// When enableSubst is true, conversions to multibyte with a sufficiently large buffer
-	// are guarunteed to succeed, with characters dropped or changed as needed to fit the
+	// are guaranteed to succeed, with characters dropped or changed as needed to fit the
 	// string into the target encoding.
 	AegisubCSConv(const wxChar *mbEncName, bool enableSubst = false);
 	virtual ~AegisubCSConv();
@ -77,8 +74,6 @@ public:
 	// Map a user-friendly encoding name to iconv's name
 	static wxString GetRealEncodingName(wxString name);

-	static iconv_t IconvOpen(const char *toEncoding);
-
 protected:
 	iconv_t m2w, w2m;

--- a/aegisub/src/frame_main_events.cpp
+++ b/aegisub/src/frame_main_events.cpp
@ -88,6 +88,7 @@
 #include "standard_paths.h"
 #include "dialog_video_details.h"
 #include "keyframe.h"
+#include "charset_conv.h"


 ////////////////////
--- a/aegisub/src/text_file_reader.cpp
+++ b/aegisub/src/text_file_reader.cpp
@ -39,7 +39,10 @@
 #include <algorithm>
 #include <string>
 #include <assert.h>
+#include <errno.h>
 #include "text_file_reader.h"
+#include "charset_conv.h"
+

 #ifdef WITH_UNIVCHARDET
 #include "charset_detect.h"
@ -66,7 +69,7 @@ TextFileReader::~TextFileReader() {
 	if (conv != (iconv_t)-1) iconv_close(conv);
 }

-wxString TextFileReader::GetEncoding(const wxString _filename) {
+wxString TextFileReader::GetEncoding(const wxString filename) {
 	// Prepare
 	unsigned char b[4];
 	memset(b, 0, sizeof(b));
@ -74,9 +77,9 @@ wxString TextFileReader::GetEncoding(const wxString _filename) {
 	// Read four bytes from file
 	std::ifstream ifile;
 #ifdef __WINDOWS__
-	ifile.open(_filename.wc_str());
+	ifile.open(filename.wc_str());
 #else
-	ifile.open(wxFNCONV(_filename));
+	ifile.open(wxFNCONV(filename));
 #endif
 	if (!ifile.is_open()) {
 		return _T("unknown");
@ -105,7 +108,7 @@ wxString TextFileReader::GetEncoding(const wxString _filename) {
 #ifdef WITH_UNIVCHARDET
 	// Use universalchardet library to detect charset
 	CharSetDetect det;
-	return det.GetEncoding(_filename);
+	return det.GetEncoding(filename);
 #else
 	// Fall back to local
 	return _T("Local");
@ -153,7 +156,7 @@ wchar_t TextFileReader::GetWChar() {

 		file.read(inptr + inbytesleft, 1);
 		inbytesleft++;
-	} while (!file.eof());
+	} while (!file.eof() && file.gcount());

 	if (outptr > outbuf)
 		return *currout;
@ -172,6 +175,8 @@ wxString TextFileReader::ReadLineFromFile() {
 	size_t len = 0;
 	for (ch = GetWChar(); ch != L'\n' && ch != 0; ch = GetWChar()) {
 		if (ch == L'\r') continue;
+		// Skip the BOM -- we don't need it as the encoding is already known
+		// and it sometimes causes conversion problems
 		if (ch == 0xFEFF && len == 0) continue;

 		if (len >= bufAlloc - 1) {
--- a/aegisub/src/text_file_reader.h
+++ b/aegisub/src/text_file_reader.h
@ -39,8 +39,7 @@
 #include <wx/dynarray.h>
 #include <wx/string.h>
 #include <fstream>
-
-#include "charset_conv.h"
+#include <iconv.h>

 class TextFileReader {
 private:
@ -57,10 +56,11 @@ private:

 	unsigned int currentLine;

-	void Open();
-	void Close();
 	wchar_t GetWChar();

+	TextFileReader(const TextFileReader&);
+	TextFileReader& operator=(const TextFileReader&);
+
 public:
 	TextFileReader(wxString filename,wxString encoding=_T(""), bool trim=true);
 	~TextFileReader();
--- a/aegisub/src/text_file_writer.cpp
+++ b/aegisub/src/text_file_writer.cpp
@ -39,6 +39,7 @@
 #include "text_file_writer.h"
 #include "options.h"
 #include "aegisub_endian.h"
+#include "charset_conv.h"

 TextFileWriter::TextFileWriter(wxString filename, wxString encoding)
 : conv() {
@ -63,6 +64,10 @@ TextFileWriter::TextFileWriter(wxString filename, wxString encoding)
 	}
 }

+TextFileWriter::~TextFileWriter() {
+	// Explicit empty destructor required with an auto_ptr to an incomplete class
+}
+
 void TextFileWriter::WriteLineToFile(wxString line, bool addLineBreak) {
 	wxString temp = line;
 	if (addLineBreak) temp += _T("\r\n");
--- a/aegisub/src/text_file_writer.h
+++ b/aegisub/src/text_file_writer.h
@ -42,15 +42,20 @@
 #include <fstream>
 #include <memory>

-#include "charset_conv.h"
+class AegisubCSConv;

 class TextFileWriter {
 private:
 	std::ofstream file;
 	std::auto_ptr<AegisubCSConv> conv;

+	TextFileWriter(const TextFileWriter&);
+	TextFileWriter& operator=(const TextFileWriter&);
+
 public:
 	TextFileWriter(wxString filename, wxString encoding=_T(""));
+	~TextFileWriter();
+
 	void WriteLineToFile(wxString line, bool addLineBreak=true);
 };