Fix some edge-cases in the thesarus code

Handle missing parts-of-speach and trailing pipes on lines.
This commit is contained in:
Thomas Goyne 2013-11-02 07:52:45 -07:00
parent 3948bdc6f4
commit 766b82d71b
5 changed files with 37 additions and 26 deletions

View File

@ -55,15 +55,15 @@ Thesaurus::Thesaurus(agi::fs::path const& dat_path, agi::fs::path const& idx_pat
Thesaurus::~Thesaurus() { } Thesaurus::~Thesaurus() { }
void Thesaurus::Lookup(std::string const& word, std::vector<Entry> *out) { std::vector<Thesaurus::Entry> Thesaurus::Lookup(std::string const& word) {
out->clear(); std::vector<Entry> out;
if (!dat.get()) return; if (!dat.get()) return out;
std::map<std::string, int>::const_iterator it = offsets.find(word); auto it = offsets.find(word);
if (it == offsets.end()) return; if (it == offsets.end()) return out;
dat->seekg(it->second, std::ios::beg); dat->seekg(it->second, std::ios::beg);
if (!dat->good()) return; if (!dat->good()) return out;
// First line is the word and meaning count // First line is the word and meaning count
std::string temp; std::string temp;
@ -71,22 +71,36 @@ void Thesaurus::Lookup(std::string const& word, std::vector<Entry> *out) {
std::vector<std::string> header; std::vector<std::string> header;
std::string converted(conv->Convert(temp)); std::string converted(conv->Convert(temp));
boost::split(header, converted, _1 == '|'); boost::split(header, converted, _1 == '|');
if (header.size() != 2) return; if (header.size() != 2) return out;
int meanings = atoi(header[1].c_str()); int meanings = atoi(header[1].c_str());
out->resize(meanings); out.reserve(meanings);
for (int i = 0; i < meanings; ++i) { for (int i = 0; i < meanings; ++i) {
std::vector<std::string> line;
getline(*dat, temp); getline(*dat, temp);
std::string converted(conv->Convert(temp)); auto converted = conv->Convert(temp);
std::vector<std::string> line;
boost::split(line, converted, _1 == '|'); boost::split(line, converted, _1 == '|');
// The "definition" is just the part of speech plus the word it's if (line.size() < 2)
// giving synonyms for (which may not be the passed word) continue;
(*out)[i].first = line[0] + ' ' + line[1];
(*out)[i].second.reserve(line.size() - 2); Entry e;
copy(line.begin() + 2, line.end(), back_inserter((*out)[i].second)); // The "definition" is just the part of speech (which may be empty)
// plus the word it's giving synonyms for (which may not be the passed word)
if (!line[0].empty())
e.first = line[0] + ' ';
e.first += line[1];
e.second.reserve(line.size() - 2);
for (size_t i = 2; i < line.size(); ++i) {
if (line[i].size())
e.second.emplace_back(std::move(line[i]));
}
out.emplace_back(std::move(e));
} }
return out;
} }
} }

View File

@ -38,7 +38,7 @@ class Thesaurus {
public: public:
/// A pair of a word and synonyms for that word /// A pair of a word and synonyms for that word
typedef std::pair<std::string, std::vector<std::string> > Entry; typedef std::pair<std::string, std::vector<std::string>> Entry;
/// Constructor /// Constructor
/// @param dat_path Path to data file /// @param dat_path Path to data file
@ -48,8 +48,7 @@ public:
/// Look up synonyms for a word /// Look up synonyms for a word
/// @param word Word to look up /// @param word Word to look up
/// @param[out] out Vector to fill with word/synonym lists std::vector<Entry> Lookup(std::string const& word);
void Lookup(std::string const& word, std::vector<Entry> *out);
}; };
} }

View File

@ -387,8 +387,7 @@ void SubsTextEditCtrl::AddThesaurusEntries(wxMenu &menu) {
if (!thesaurus) if (!thesaurus)
thesaurus = agi::util::make_unique<Thesaurus>(); thesaurus = agi::util::make_unique<Thesaurus>();
std::vector<Thesaurus::Entry> results; auto results = thesaurus->Lookup(currentWord);
thesaurus->Lookup(currentWord, &results);
thesSugs.clear(); thesSugs.clear();
@ -398,7 +397,7 @@ void SubsTextEditCtrl::AddThesaurusEntries(wxMenu &menu) {
int curThesEntry = 0; int curThesEntry = 0;
for (auto const& result : results) { for (auto const& result : results) {
// Single word, insert directly // Single word, insert directly
if (result.second.size() == 1) { if (result.second.empty()) {
thesMenu->Append(EDIT_MENU_THESAURUS_SUGS+curThesEntry, to_wx(result.first)); thesMenu->Append(EDIT_MENU_THESAURUS_SUGS+curThesEntry, to_wx(result.first));
thesSugs.push_back(result.first); thesSugs.push_back(result.first);
++curThesEntry; ++curThesEntry;

View File

@ -44,10 +44,10 @@ Thesaurus::~Thesaurus() {
// Explicit empty destructor needed for scoped_ptr with incomplete types // Explicit empty destructor needed for scoped_ptr with incomplete types
} }
void Thesaurus::Lookup(std::string word, std::vector<Entry> *result) { std::vector<Thesaurus::Entry> Thesaurus::Lookup(std::string word) {
if (!impl.get()) return; if (!impl.get()) return std::vector<Entry>();
boost::to_lower(word); boost::to_lower(word);
impl->Lookup(word, result); return impl->Lookup(word);
} }
std::vector<std::string> Thesaurus::GetLanguageList() const { std::vector<std::string> Thesaurus::GetLanguageList() const {

View File

@ -53,8 +53,7 @@ public:
/// Get a list of synonyms for a word, grouped by possible meanings of the word /// Get a list of synonyms for a word, grouped by possible meanings of the word
/// @param word Word to get synonyms for /// @param word Word to get synonyms for
/// @param[out] result Output list std::vector<Entry> Lookup(std::string word);
void Lookup(std::string word, std::vector<Entry> *result);
/// Get a list of language codes which thesauri are available for /// Get a list of language codes which thesauri are available for
std::vector<std::string> GetLanguageList() const; std::vector<std::string> GetLanguageList() const;