Simplify charset detection

Originally committed to SVN as r4419.
This commit is contained in:
Thomas Goyne 2010-06-03 20:31:43 +00:00
parent 401560b190
commit f064624ecd
6 changed files with 16 additions and 45 deletions

View File

@ -50,6 +50,7 @@
#include "ass_file.h"
#include "ass_override.h"
#include "ass_style.h"
#include "charset_detect.h"
#include "compat.h"
#include "main.h"
#include "options.h"
@ -74,7 +75,7 @@ AssFile::~AssFile() {
/// @param file
/// @param charset
/// @param addToRecent
void AssFile::Load (const wxString _filename,const wxString charset,bool addToRecent) {
void AssFile::Load (const wxString &_filename,wxString charset,bool addToRecent) {
bool ok = true;
try {
@ -91,9 +92,9 @@ void AssFile::Load (const wxString _filename,const wxString charset,bool addToRe
fclose(file);
// Find file encoding
wxString enc;
if (charset.IsEmpty()) enc = TextFileReader::GetEncoding(_filename);
else enc = charset;
if (charset.empty()) {
charset = CharSetDetect::GetEncoding(_filename);
}
// Generic preparation
Clear();
@ -104,7 +105,7 @@ void AssFile::Load (const wxString _filename,const wxString charset,bool addToRe
// Read file
if (reader) {
reader->SetTarget(this);
reader->ReadFile(_filename,enc);
reader->ReadFile(_filename,charset);
}
// Couldn't find a type

View File

@ -109,7 +109,7 @@ public:
AssStyle *GetStyle(wxString name); // Gets style by its name
//wxString GetString(); // Returns the whole file as a single string
void Load(wxString file,wxString charset=_T(""),bool addToRecent=true); // Load from a file
void Load(const wxString &file,wxString charset=_T(""),bool addToRecent=true); // Load from a file
void Save(wxString file,bool setfilename=false,bool addToRecent=true,const wxString encoding=_T("")); // Save to a file. Pass true to second argument if this isn't a copy
void SaveMemory(std::vector<char> &dst,const wxString encoding=_T("")); // Save to a memory string
void Export(wxString file); // Saves exported copy, with effects applied

View File

@ -34,9 +34,6 @@
/// @ingroup utility
///
///////////
// Headers
#include "config.h"
#ifndef AGI_PRE
@ -52,16 +49,11 @@
#include <libaegisub/log.h>
#include "charset_detect.h"
#include "text_file_reader.h"
#include "compat.h"
namespace CharSetDetect {
/// @brief Get encoding
/// @param filename
/// @return
///
wxString CharSetDetect::GetEncoding(wxString filename) {
wxString GetEncoding(wxString const& filename) {
LOG_I("charset/file") << filename;
bool unknown = 0;
@ -70,7 +62,7 @@ wxString CharSetDetect::GetEncoding(wxString filename) {
try {
agi::charset::DetectAll(STD_STR(filename), list);
} catch (const agi::charset::UnknownCharset&) {
} catch (const agi::charset::UnknownCharset&) {
unknown = 1;
}
@ -93,3 +85,5 @@ wxString CharSetDetect::GetEncoding(wxString filename) {
return i_lst->second;
}
}

View File

@ -34,17 +34,9 @@
/// @ingroup utility
///
/// DOCME
/// @class CharSetDetect
/// @brief Detect character set of a file
class CharSetDetect {
private:
/// Character set
wxString result;
public:
namespace CharSetDetect {
/// @brief Get character set name.
/// @param filename File to check
/// @return Character set name
wxString GetEncoding(wxString filename);
};
wxString GetEncoding(wxString const& filename);
}

View File

@ -48,9 +48,7 @@
#include <libaegisub/log.h>
#include "charset_conv.h"
#ifdef WITH_UNIVCHARDET
#include "charset_detect.h"
#endif
#include "text_file_reader.h"
TextFileReader::TextFileReader(wxString filename, wxString enc, bool trim)
@ -62,7 +60,7 @@ TextFileReader::TextFileReader(wxString filename, wxString enc, bool trim)
#endif
if (!file.is_open()) throw L"Failed opening file for reading.";
if (encoding.IsEmpty()) encoding = GetEncoding(filename);
if (encoding.IsEmpty()) encoding = CharSetDetect::GetEncoding(filename);
if (encoding == L"binary") return;
encoding = AegisubCSConv::GetRealEncodingName(encoding);
conv = iconv_open(WCHAR_T_ENCODING, encoding.ToAscii());
@ -75,15 +73,6 @@ TextFileReader::~TextFileReader() {
if (conv != (iconv_t)-1) iconv_close(conv);
}
wxString TextFileReader::GetEncoding(wxString const& filename) {
// Use universalchardet library to detect charset
CharSetDetect det;
wxString str(det.GetEncoding(filename));
LOG_I("file/reader/text/encoding") << str;
return str;
}
wchar_t TextFileReader::GetWChar() {
// If there's already some converted characters waiting, return the next one
if (++currout < outptr) {

View File

@ -89,9 +89,4 @@ public:
/// @brief Get the file encoding used by this reader
/// @return "unknown", "binary", or a character encoding name
wxString GetCurrentEncoding();
/// @brief Attempt to detect a file's encoding
/// @param filename The file to check
/// @return "unknown", "binary", or a character encoding name
static wxString GetEncoding(wxString const& filename);
};