2012-10-30 16:59:47 +01:00
|
|
|
// Copyright (c) 2012, Thomas Goyne <plorkyeran@aegisub.org>
|
2006-01-16 22:02:54 +01:00
|
|
|
//
|
2012-10-30 16:59:47 +01:00
|
|
|
// Permission to use, copy, modify, and distribute this software for any
|
|
|
|
// purpose with or without fee is hereby granted, provided that the above
|
|
|
|
// copyright notice and this permission notice appear in all copies.
|
2006-01-16 22:02:54 +01:00
|
|
|
//
|
2012-10-30 16:59:47 +01:00
|
|
|
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
|
|
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
|
|
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
|
|
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
|
|
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
|
|
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
|
|
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
2006-01-16 22:02:54 +01:00
|
|
|
//
|
2009-07-29 07:43:02 +02:00
|
|
|
|
2007-12-31 07:46:22 +01:00
|
|
|
#ifdef WITH_HUNSPELL
|
2012-01-31 01:43:49 +01:00
|
|
|
#include "spellchecker_hunspell.h"
|
|
|
|
|
2013-01-04 16:01:50 +01:00
|
|
|
#include "options.h"
|
2012-01-31 01:43:49 +01:00
|
|
|
|
2013-01-04 16:01:50 +01:00
|
|
|
#include <libaegisub/charset_conv.h>
|
2014-05-29 00:19:05 +02:00
|
|
|
#include <libaegisub/format.h>
|
2013-01-04 16:01:50 +01:00
|
|
|
#include <libaegisub/fs.h>
|
2010-11-17 06:43:56 +01:00
|
|
|
#include <libaegisub/io.h>
|
|
|
|
#include <libaegisub/line_iterator.h>
|
2010-06-01 10:21:30 +02:00
|
|
|
#include <libaegisub/log.h>
|
2013-01-30 04:35:37 +01:00
|
|
|
#include <libaegisub/path.h>
|
2014-04-23 22:53:24 +02:00
|
|
|
#include <libaegisub/make_unique.h>
|
2009-09-10 15:06:40 +02:00
|
|
|
|
2013-12-24 18:42:20 +01:00
|
|
|
#include <boost/range/algorithm.hpp>
|
2014-06-25 18:28:25 +02:00
|
|
|
|
|
|
|
#define HUNSPELL_STATIC
|
|
|
|
#undef near
|
2013-11-19 17:59:27 +01:00
|
|
|
#include <hunspell/hunspell.hxx>
|
2007-04-22 04:03:40 +02:00
|
|
|
|
2012-01-31 01:43:49 +01:00
|
|
|
HunspellSpellChecker::HunspellSpellChecker()
|
|
|
|
: lang_listener(OPT_SUB("Tool/Spell Checker/Language", &HunspellSpellChecker::OnLanguageChanged, this))
|
|
|
|
, dict_path_listener(OPT_SUB("Path/Dictionary", &HunspellSpellChecker::OnPathChanged, this))
|
|
|
|
{
|
|
|
|
OnLanguageChanged();
|
2006-12-24 22:52:54 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
HunspellSpellChecker::~HunspellSpellChecker() {
|
|
|
|
}
|
|
|
|
|
2012-10-30 16:59:47 +01:00
|
|
|
bool HunspellSpellChecker::CanAddWord(std::string const& word) {
|
2012-01-31 01:43:49 +01:00
|
|
|
if (!hunspell) return false;
|
2010-06-03 22:32:25 +02:00
|
|
|
try {
|
2012-10-30 16:59:47 +01:00
|
|
|
conv->Convert(word);
|
2010-06-03 22:32:25 +02:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
catch (agi::charset::ConvError const&) {
|
|
|
|
return false;
|
|
|
|
}
|
2006-12-26 02:08:46 +01:00
|
|
|
}
|
|
|
|
|
2012-12-22 00:59:25 +01:00
|
|
|
bool HunspellSpellChecker::CanRemoveWord(std::string const& word) {
|
|
|
|
return !!customWords.count(word);
|
|
|
|
}
|
|
|
|
|
2012-10-30 16:59:47 +01:00
|
|
|
void HunspellSpellChecker::AddWord(std::string const& word) {
|
2012-01-31 01:43:49 +01:00
|
|
|
if (!hunspell) return;
|
2010-11-17 06:43:56 +01:00
|
|
|
|
|
|
|
// Add it to the in-memory dictionary
|
2012-10-30 16:59:47 +01:00
|
|
|
hunspell->add(conv->Convert(word).c_str());
|
2007-01-02 01:47:47 +01:00
|
|
|
|
2012-12-18 17:54:53 +01:00
|
|
|
// Add the word
|
2012-12-22 00:59:25 +01:00
|
|
|
if (customWords.insert(word).second)
|
|
|
|
WriteUserDictionary();
|
2012-12-18 17:54:53 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void HunspellSpellChecker::RemoveWord(std::string const& word) {
|
|
|
|
if (!hunspell) return;
|
|
|
|
|
|
|
|
// Remove it from the in-memory dictionary
|
|
|
|
hunspell->remove(conv->Convert(word).c_str());
|
|
|
|
|
2012-12-22 00:59:25 +01:00
|
|
|
auto word_iter = customWords.find(word);
|
|
|
|
if (word_iter != customWords.end()) {
|
|
|
|
customWords.erase(word_iter);
|
2012-12-18 17:54:53 +01:00
|
|
|
|
2012-12-22 00:59:25 +01:00
|
|
|
WriteUserDictionary();
|
2012-12-18 17:54:53 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-12-22 00:59:25 +01:00
|
|
|
void HunspellSpellChecker::ReadUserDictionary() {
|
|
|
|
customWords.clear();
|
|
|
|
|
2010-11-17 06:43:56 +01:00
|
|
|
// Read the old contents of the user's dictionary
|
2012-12-31 16:50:17 +01:00
|
|
|
try {
|
2014-05-27 04:44:33 +02:00
|
|
|
auto stream = agi::io::Open(userDicPath);
|
2012-12-22 00:59:25 +01:00
|
|
|
copy_if(
|
2013-02-17 04:47:31 +01:00
|
|
|
++agi::line_iterator<std::string>(*stream), agi::line_iterator<std::string>(),
|
2012-12-22 00:59:25 +01:00
|
|
|
inserter(customWords, customWords.end()),
|
|
|
|
[](std::string const& str) { return !str.empty(); });
|
2007-01-02 01:47:47 +01:00
|
|
|
}
|
2013-01-04 16:01:50 +01:00
|
|
|
catch (agi::fs::FileNotFound const&) {
|
2012-12-31 16:50:17 +01:00
|
|
|
// Not an error; user dictionary just doesn't exist
|
|
|
|
}
|
2012-12-18 17:54:53 +01:00
|
|
|
}
|
2008-01-14 03:01:50 +01:00
|
|
|
|
2012-12-22 00:59:25 +01:00
|
|
|
void HunspellSpellChecker::WriteUserDictionary() {
|
2012-12-31 16:50:17 +01:00
|
|
|
// Ensure that the path exists
|
2013-01-04 16:01:50 +01:00
|
|
|
agi::fs::CreateDirectory(userDicPath.parent_path());
|
2012-12-31 16:50:17 +01:00
|
|
|
|
2010-11-17 06:43:56 +01:00
|
|
|
// Write the new dictionary
|
2012-01-31 01:43:59 +01:00
|
|
|
{
|
2013-01-04 16:01:50 +01:00
|
|
|
agi::io::Save writer(userDicPath);
|
2012-12-22 00:59:25 +01:00
|
|
|
writer.Get() << customWords.size() << "\n";
|
|
|
|
copy(customWords.begin(), customWords.end(), std::ostream_iterator<std::string>(writer.Get(), "\n"));
|
2012-01-31 01:43:59 +01:00
|
|
|
}
|
|
|
|
|
2012-12-18 17:54:53 +01:00
|
|
|
// Announce a language change so that any other spellcheckers reload the
|
|
|
|
// current dictionary to get the addition/removal
|
2012-01-31 01:43:59 +01:00
|
|
|
lang_listener.Block();
|
|
|
|
OPT_SET("Tool/Spell Checker/Language")->SetString(OPT_GET("Tool/Spell Checker/Language")->GetString());
|
|
|
|
lang_listener.Unblock();
|
2006-12-25 06:43:00 +01:00
|
|
|
}
|
|
|
|
|
2012-10-30 16:59:47 +01:00
|
|
|
bool HunspellSpellChecker::CheckWord(std::string const& word) {
|
2012-01-31 01:43:49 +01:00
|
|
|
if (!hunspell) return true;
|
2010-06-03 22:32:25 +02:00
|
|
|
try {
|
2012-10-30 16:59:47 +01:00
|
|
|
return hunspell->spell(conv->Convert(word).c_str()) == 1;
|
2010-06-03 22:32:25 +02:00
|
|
|
}
|
|
|
|
catch (agi::charset::ConvError const&) {
|
|
|
|
return false;
|
|
|
|
}
|
2006-12-24 22:52:54 +01:00
|
|
|
}
|
2006-01-16 22:02:54 +01:00
|
|
|
|
2012-10-30 16:59:47 +01:00
|
|
|
std::vector<std::string> HunspellSpellChecker::GetSuggestions(std::string const& word) {
|
|
|
|
std::vector<std::string> suggestions;
|
2012-01-31 01:43:49 +01:00
|
|
|
if (!hunspell) return suggestions;
|
2006-12-25 06:43:00 +01:00
|
|
|
|
2010-11-17 06:43:56 +01:00
|
|
|
char **results;
|
2012-10-30 16:59:47 +01:00
|
|
|
int n = hunspell->suggest(&results, conv->Convert(word).c_str());
|
2006-12-25 06:43:00 +01:00
|
|
|
|
2010-11-17 06:43:56 +01:00
|
|
|
suggestions.reserve(n);
|
2012-10-30 16:59:47 +01:00
|
|
|
// Convert suggestions to UTF-8
|
2010-11-17 06:43:56 +01:00
|
|
|
for (int i = 0; i < n; ++i) {
|
|
|
|
try {
|
2012-10-30 16:59:47 +01:00
|
|
|
suggestions.push_back(rconv->Convert(results[i]));
|
2010-11-17 06:43:56 +01:00
|
|
|
}
|
|
|
|
catch (agi::charset::ConvError const&) {
|
|
|
|
// Shouldn't ever actually happen...
|
|
|
|
}
|
2014-05-27 04:58:41 +02:00
|
|
|
free(results[i]);
|
2010-06-03 22:32:25 +02:00
|
|
|
}
|
2006-12-25 06:43:00 +01:00
|
|
|
|
2014-05-27 04:58:41 +02:00
|
|
|
free(results);
|
2010-11-17 06:43:56 +01:00
|
|
|
|
2006-12-24 22:52:54 +01:00
|
|
|
return suggestions;
|
|
|
|
}
|
|
|
|
|
2013-12-24 18:42:20 +01:00
|
|
|
static std::vector<std::string> langs(const char *filter) {
|
|
|
|
std::vector<std::string> paths;
|
2014-06-04 02:12:25 +02:00
|
|
|
auto data_path = config::path->Decode("?dictionary/");
|
2013-12-24 18:42:20 +01:00
|
|
|
auto user_path = config::path->Decode(OPT_GET("Path/Dictionary")->GetString());
|
2010-11-17 06:43:56 +01:00
|
|
|
|
2013-12-24 18:42:20 +01:00
|
|
|
agi::fs::DirectoryIterator(data_path, filter).GetAll(paths);
|
|
|
|
agi::fs::DirectoryIterator(user_path, filter).GetAll(paths);
|
2013-01-04 16:01:50 +01:00
|
|
|
|
2013-12-24 18:42:20 +01:00
|
|
|
// Drop extensions
|
|
|
|
for (auto& fn : paths) fn.resize(fn.size() - 4);
|
2013-01-04 16:01:50 +01:00
|
|
|
|
2013-12-24 18:42:20 +01:00
|
|
|
boost::sort(paths);
|
|
|
|
paths.erase(unique(begin(paths), end(paths)), end(paths));
|
2010-11-17 06:43:56 +01:00
|
|
|
|
2013-12-24 18:42:20 +01:00
|
|
|
return paths;
|
|
|
|
}
|
2010-11-17 06:43:56 +01:00
|
|
|
|
2013-12-24 18:42:20 +01:00
|
|
|
std::vector<std::string> HunspellSpellChecker::GetLanguageList() {
|
|
|
|
if (languages.empty())
|
|
|
|
boost::set_intersection(langs("*.dic"), langs("*.aff"), back_inserter(languages));
|
2010-11-18 04:00:08 +01:00
|
|
|
return languages;
|
2006-12-24 22:52:54 +01:00
|
|
|
}
|
|
|
|
|
2013-12-24 18:42:20 +01:00
|
|
|
static bool check_path(agi::fs::path const& path, std::string const& language, agi::fs::path& aff, agi::fs::path& dic) {
|
2014-05-29 00:19:05 +02:00
|
|
|
aff = path/agi::format("%s.aff", language);
|
|
|
|
dic = path/agi::format("%s.dic", language);
|
2013-12-24 18:42:20 +01:00
|
|
|
return agi::fs::FileExists(aff) && agi::fs::FileExists(dic);
|
|
|
|
}
|
|
|
|
|
2012-01-31 01:43:49 +01:00
|
|
|
void HunspellSpellChecker::OnLanguageChanged() {
|
|
|
|
hunspell.reset();
|
|
|
|
|
2013-01-04 16:01:50 +01:00
|
|
|
auto language = OPT_GET("Tool/Spell Checker/Language")->GetString();
|
2012-01-31 01:43:49 +01:00
|
|
|
if (language.empty()) return;
|
2010-11-17 06:43:56 +01:00
|
|
|
|
2013-12-24 18:42:20 +01:00
|
|
|
agi::fs::path aff, dic;
|
|
|
|
auto path = config::path->Decode(OPT_GET("Path/Dictionary")->GetString() + "/");
|
|
|
|
if (!check_path(path, language, aff, dic)) {
|
2014-06-04 02:12:25 +02:00
|
|
|
path = config::path->Decode("?dictionary/");
|
2013-12-24 18:42:20 +01:00
|
|
|
if (!check_path(path, language, aff, dic))
|
|
|
|
return;
|
2010-11-17 06:43:56 +01:00
|
|
|
}
|
|
|
|
|
2013-12-24 18:42:20 +01:00
|
|
|
LOG_I("dictionary/file") << dic;
|
2010-11-17 06:43:56 +01:00
|
|
|
|
2014-06-25 18:36:55 +02:00
|
|
|
#ifdef _WIN32
|
|
|
|
// The prefix makes hunspell assume the paths are UTF-8 and use _wfopen
|
|
|
|
hunspell = agi::make_unique<Hunspell>(("\\\\?\\" + aff.string()).c_str(), ("\\\\?\\" + dic.string()).c_str());
|
|
|
|
#else
|
|
|
|
hunspell = agi::make_unique<Hunspell>(aff.string().c_str(), dic.string().c_str());
|
|
|
|
#endif
|
2012-01-31 01:43:49 +01:00
|
|
|
if (!hunspell) return;
|
2010-11-17 06:43:56 +01:00
|
|
|
|
2014-04-23 22:53:24 +02:00
|
|
|
conv = agi::make_unique<agi::charset::IconvWrapper>("utf-8", hunspell->get_dic_encoding());
|
|
|
|
rconv = agi::make_unique<agi::charset::IconvWrapper>(hunspell->get_dic_encoding(), "utf-8");
|
2010-11-17 06:43:56 +01:00
|
|
|
|
2014-05-29 00:19:05 +02:00
|
|
|
userDicPath = config::path->Decode("?user/dictionaries")/agi::format("user_%s.dic", language);
|
2012-12-22 00:59:25 +01:00
|
|
|
ReadUserDictionary();
|
|
|
|
|
|
|
|
for (auto const& word : customWords) {
|
|
|
|
try {
|
|
|
|
hunspell->add(conv->Convert(word).c_str());
|
|
|
|
}
|
|
|
|
catch (agi::charset::ConvError const&) {
|
|
|
|
// Normally this shouldn't happen, but some versions of Aegisub
|
|
|
|
// wrote words in the wrong charset
|
2008-01-14 03:01:50 +01:00
|
|
|
}
|
|
|
|
}
|
2006-12-24 22:52:54 +01:00
|
|
|
}
|
2007-12-31 07:46:22 +01:00
|
|
|
|
2012-01-31 01:43:49 +01:00
|
|
|
void HunspellSpellChecker::OnPathChanged() {
|
|
|
|
languages.clear();
|
|
|
|
}
|
|
|
|
|
2007-12-31 07:46:22 +01:00
|
|
|
#endif // WITH_HUNSPELL
|