A few minor cleanups to the new charset conversion code.

Originally committed to SVN as r3159.
This commit is contained in:
Thomas Goyne 2009-07-18 00:58:13 +00:00
parent 779dcadc69
commit c2087304fc
7 changed files with 38 additions and 21 deletions

View File

@ -34,7 +34,13 @@
//
#include "charset_conv.h"
#include <stdint.h>
#include <errno.h>
#include <wx/hashmap.h>
#include <wx/intl.h>
WX_DECLARE_STRING_HASH_MAP(wxString, PrettyNamesHash);
#if wxUSE_THREADS
static wxMutex encodingListMutex;

View File

@ -33,21 +33,18 @@
// Contact: mailto:zeratul@cellosoft.com
//
#ifndef AEGISUB_STRCONV
#define AEGISUB_STRCONV
#ifndef AEGISUB_CHARSET_CONV_H
#define AEGISUB_CHARSET_CONV_H
#include <iconv.h>
#include <wchar.h>
#include <wx/intl.h>
#include <wx/hashmap.h>
#include <wx/thread.h>
#include <wx/arrstr.h>
#include <errno.h>
#include <wx/thread.h>
#include <wx/string.h>
#include <wx/strconv.h>
#include "aegisub_endian.h"
WX_DECLARE_STRING_HASH_MAP(wxString, PrettyNamesHash);
#if !defined(_LIBICONV_VERSION) || _LIBICONV_VERSION < 0x010A || defined(LIBICONV_PLUG)
#define ICONV_POSIX
#endif
@ -56,7 +53,7 @@ class AegisubCSConv : public wxMBConv {
public:
// By default, any conversion that would be lossy will fail
// When enableSubst is true, conversions to multibyte with a sufficiently large buffer
// are guarunteed to succeed, with characters dropped or changed as needed to fit the
// are guaranteed to succeed, with characters dropped or changed as needed to fit the
// string into the target encoding.
AegisubCSConv(const wxChar *mbEncName, bool enableSubst = false);
virtual ~AegisubCSConv();
@ -77,8 +74,6 @@ public:
// Map a user-friendly encoding name to iconv's name
static wxString GetRealEncodingName(wxString name);
static iconv_t IconvOpen(const char *toEncoding);
protected:
iconv_t m2w, w2m;

View File

@ -88,6 +88,7 @@
#include "standard_paths.h"
#include "dialog_video_details.h"
#include "keyframe.h"
#include "charset_conv.h"
////////////////////

View File

@ -39,7 +39,10 @@
#include <algorithm>
#include <string>
#include <assert.h>
#include <errno.h>
#include "text_file_reader.h"
#include "charset_conv.h"
#ifdef WITH_UNIVCHARDET
#include "charset_detect.h"
@ -66,7 +69,7 @@ TextFileReader::~TextFileReader() {
if (conv != (iconv_t)-1) iconv_close(conv);
}
wxString TextFileReader::GetEncoding(const wxString _filename) {
wxString TextFileReader::GetEncoding(const wxString filename) {
// Prepare
unsigned char b[4];
memset(b, 0, sizeof(b));
@ -74,9 +77,9 @@ wxString TextFileReader::GetEncoding(const wxString _filename) {
// Read four bytes from file
std::ifstream ifile;
#ifdef __WINDOWS__
ifile.open(_filename.wc_str());
ifile.open(filename.wc_str());
#else
ifile.open(wxFNCONV(_filename));
ifile.open(wxFNCONV(filename));
#endif
if (!ifile.is_open()) {
return _T("unknown");
@ -105,7 +108,7 @@ wxString TextFileReader::GetEncoding(const wxString _filename) {
#ifdef WITH_UNIVCHARDET
// Use universalchardet library to detect charset
CharSetDetect det;
return det.GetEncoding(_filename);
return det.GetEncoding(filename);
#else
// Fall back to local
return _T("Local");
@ -153,7 +156,7 @@ wchar_t TextFileReader::GetWChar() {
file.read(inptr + inbytesleft, 1);
inbytesleft++;
} while (!file.eof());
} while (!file.eof() && file.gcount());
if (outptr > outbuf)
return *currout;
@ -172,6 +175,8 @@ wxString TextFileReader::ReadLineFromFile() {
size_t len = 0;
for (ch = GetWChar(); ch != L'\n' && ch != 0; ch = GetWChar()) {
if (ch == L'\r') continue;
// Skip the BOM -- we don't need it as the encoding is already known
// and it sometimes causes conversion problems
if (ch == 0xFEFF && len == 0) continue;
if (len >= bufAlloc - 1) {

View File

@ -39,8 +39,7 @@
#include <wx/dynarray.h>
#include <wx/string.h>
#include <fstream>
#include "charset_conv.h"
#include <iconv.h>
class TextFileReader {
private:
@ -57,10 +56,11 @@ private:
unsigned int currentLine;
void Open();
void Close();
wchar_t GetWChar();
TextFileReader(const TextFileReader&);
TextFileReader& operator=(const TextFileReader&);
public:
TextFileReader(wxString filename,wxString encoding=_T(""), bool trim=true);
~TextFileReader();

View File

@ -39,6 +39,7 @@
#include "text_file_writer.h"
#include "options.h"
#include "aegisub_endian.h"
#include "charset_conv.h"
TextFileWriter::TextFileWriter(wxString filename, wxString encoding)
: conv() {
@ -63,6 +64,10 @@ TextFileWriter::TextFileWriter(wxString filename, wxString encoding)
}
}
TextFileWriter::~TextFileWriter() {
// Explicit empty destructor required with an auto_ptr to an incomplete class
}
void TextFileWriter::WriteLineToFile(wxString line, bool addLineBreak) {
wxString temp = line;
if (addLineBreak) temp += _T("\r\n");

View File

@ -42,15 +42,20 @@
#include <fstream>
#include <memory>
#include "charset_conv.h"
class AegisubCSConv;
class TextFileWriter {
private:
std::ofstream file;
std::auto_ptr<AegisubCSConv> conv;
TextFileWriter(const TextFileWriter&);
TextFileWriter& operator=(const TextFileWriter&);
public:
TextFileWriter(wxString filename, wxString encoding=_T(""));
~TextFileWriter();
void WriteLineToFile(wxString line, bool addLineBreak=true);
};