From 6c6f60eb98cf1b90a9436144e79bc1a024124709 Mon Sep 17 00:00:00 2001 From: Thomas Goyne Date: Fri, 18 Apr 2014 20:04:16 -0700 Subject: [PATCH] Add an option to ignore punctuation for the character counter --- libaegisub/common/character_count.cpp | 29 +++++++++++++------ .../include/libaegisub/character_count.h | 10 +++++-- src/grid_column.cpp | 15 ++++++++-- src/libresrc/default_config.json | 3 +- src/libresrc/osx/default_config.json | 3 +- src/preferences.cpp | 1 + src/subs_edit_box.cpp | 8 +++-- 7 files changed, 52 insertions(+), 17 deletions(-) diff --git a/libaegisub/common/character_count.cpp b/libaegisub/common/character_count.cpp index 3d7878e1f..1862ec8c4 100644 --- a/libaegisub/common/character_count.cpp +++ b/libaegisub/common/character_count.cpp @@ -40,7 +40,7 @@ utext_ptr to_utext(Iterator begin, Iterator end) { } template -size_t count_in_range(Iterator begin, Iterator end, bool ignore_whitespace) { +size_t count_in_range(Iterator begin, Iterator end, int mask) { if (begin == end) return 0; static std::unique_ptr character_bi; @@ -60,33 +60,43 @@ size_t count_in_range(Iterator begin, Iterator end, bool ignore_whitespace) { size_t count = 0; auto pos = character_bi->first(); for (auto end = character_bi->next(); end != BreakIterator::DONE; pos = end, end = character_bi->next()) { - if (!ignore_whitespace) + if (!mask) ++count; else { UChar32 c; int i = 0; U8_NEXT_UNSAFE(begin + pos, i, c); - if (!u_isUWhiteSpace(c)) + if ((U_GET_GC_MASK(c) & mask) == 0) ++count; } } return count; } + +int ignore_mask_to_icu_mask(int mask) { + int ret = 0; + if (mask & agi::IGNORE_PUNCTUATION) + ret |= U_GC_P_MASK; + if (mask & agi::IGNORE_WHITESPACE) + ret |= U_GC_Z_MASK; + return ret; +} } namespace agi { -size_t CharacterCount(std::string const& str) { +size_t CharacterCount(std::string const& str, int mask) { + mask = ignore_mask_to_icu_mask(mask); size_t characters = 0; auto pos = begin(str); do { auto it = std::find(pos, end(str), '{'); - characters += count_in_range(pos, it, true); + characters += count_in_range(pos, it, mask); if (it == end(str)) break; pos = std::find(pos, end(str), '}'); if (pos == end(str)) { - characters += count_in_range(it, pos, true); + characters += count_in_range(it, pos, mask); break; } } while (++pos != end(str)); @@ -94,7 +104,8 @@ size_t CharacterCount(std::string const& str) { return characters; } -size_t MaxLineLength(std::string const& text, bool ignore_whitespace) { +size_t MaxLineLength(std::string const& text, int mask) { + mask = ignore_mask_to_icu_mask(mask); auto tokens = agi::ass::TokenizeDialogueBody(text); agi::ass::MarkDrawings(text, tokens); @@ -104,7 +115,7 @@ size_t MaxLineLength(std::string const& text, bool ignore_whitespace) { for (auto token : tokens) { if (token.type == agi::ass::DialogueTokenType::LINE_BREAK) { if (text[pos + 1] == 'h') { - if (!ignore_whitespace) + if (!(mask & U_GC_Z_MASK)) current_line_length += 1; } else { // N or n @@ -113,7 +124,7 @@ size_t MaxLineLength(std::string const& text, bool ignore_whitespace) { } } else if (token.type == agi::ass::DialogueTokenType::TEXT) - current_line_length += count_in_range(begin(text) + pos, begin(text) + pos + token.length, ignore_whitespace); + current_line_length += count_in_range(begin(text) + pos, begin(text) + pos + token.length, mask); pos += token.length; } diff --git a/libaegisub/include/libaegisub/character_count.h b/libaegisub/include/libaegisub/character_count.h index 512197a93..d0e66346a 100644 --- a/libaegisub/include/libaegisub/character_count.h +++ b/libaegisub/include/libaegisub/character_count.h @@ -17,7 +17,13 @@ #include namespace agi { + enum { + IGNORE_NONE = 0, + IGNORE_WHITESPACE = 1, + IGNORE_PUNCTUATION = 2 + }; + /// Get the length in characters of the longest line in the given text - size_t MaxLineLength(std::string const& text, bool ignore_whitespace); - size_t CharacterCount(std::string const& str); + size_t MaxLineLength(std::string const& text, int ignore_mask); + size_t CharacterCount(std::string const& str, int ignore_mask); } \ No newline at end of file diff --git a/src/grid_column.cpp b/src/grid_column.cpp index c4de1d22e..e7d2935d7 100644 --- a/src/grid_column.cpp +++ b/src/grid_column.cpp @@ -217,20 +217,31 @@ struct GridColumnMarginVert final : GridColumnMargin<2> { COLUMN_DESCRIPTION(_("Vertical Margin")) }; -struct GridColumnCPS final : GridColumn { +class GridColumnCPS final : public GridColumn { + const agi::OptionValue *ignore_whitespace = OPT_GET("Subtitle/Character Counter/Ignore Whitespace"); + const agi::OptionValue *ignore_punctuation = OPT_GET("Subtitle/Character Counter/Ignore Punctuation"); + +public: COLUMN_HEADER(_("CPS")) COLUMN_DESCRIPTION(_("Characters Per Second")) bool Centered() const override { return true; } bool RefreshOnTextChange() const override { return true; } wxString Value(const AssDialogue *d, const agi::Context *) const override { + int duration = d->End - d->Start; auto const& text = d->Text.get(); if (duration <= 0 || text.size() > static_cast(duration)) return wxS(""); - return std::to_wstring(agi::CharacterCount(text) * 1000 / duration); + int ignore = agi::IGNORE_NONE; + if (ignore_whitespace->GetBool()) + ignore |= agi::IGNORE_WHITESPACE; + if (ignore_punctuation->GetBool()) + ignore |= agi::IGNORE_PUNCTUATION; + + return std::to_wstring(agi::CharacterCount(text, ignore) * 1000 / duration); } int Width(const agi::Context *c, WidthHelper &helper, bool) const override { diff --git a/src/libresrc/default_config.json b/src/libresrc/default_config.json index 8f3088db0..19b97358e 100644 --- a/src/libresrc/default_config.json +++ b/src/libresrc/default_config.json @@ -351,7 +351,8 @@ "Subtitle" : { "Character Counter" : { - "Ignore Whitespace" : false + "Ignore Whitespace" : true, + "Ignore Punctuation" : true }, "Character Limit" : 40, "Default Resolution" : { diff --git a/src/libresrc/osx/default_config.json b/src/libresrc/osx/default_config.json index 8d411aaf2..8236175a0 100644 --- a/src/libresrc/osx/default_config.json +++ b/src/libresrc/osx/default_config.json @@ -351,7 +351,8 @@ "Subtitle" : { "Character Counter" : { - "Ignore Whitespace" : false + "Ignore Whitespace" : true, + "Ignore Punctuation" : true }, "Character Limit" : 40, "Default Resolution" : { diff --git a/src/preferences.cpp b/src/preferences.cpp index 55732104b..06608f443 100644 --- a/src/preferences.cpp +++ b/src/preferences.cpp @@ -195,6 +195,7 @@ Interface::Interface(wxTreebook *book, Preferences *parent): OptionPage(book, pa wxFlexGridSizer *character_count = PageSizer(_("Character Counter")); OptionAdd(character_count, _("Maximum characters per line"), "Subtitle/Character Limit", 0, 1000); OptionAdd(character_count, _("Ignore whitespace"), "Subtitle/Character Counter/Ignore Whitespace"); + OptionAdd(character_count, _("Ignore punctuation"), "Subtitle/Character Counter/Ignore Punctuation"); wxFlexGridSizer *grid = PageSizer(_("Grid")); OptionAdd(grid, _("Focus grid on click"), "Subtitle/Grid/Focus Allow"); diff --git a/src/subs_edit_box.cpp b/src/subs_edit_box.cpp index d73593712..883fa843b 100644 --- a/src/subs_edit_box.cpp +++ b/src/subs_edit_box.cpp @@ -588,8 +588,12 @@ void SubsEditBox::CallCommand(const char *cmd_name) { } void SubsEditBox::UpdateCharacterCount(std::string const& text) { - auto ignore_whitespace = OPT_GET("Subtitle/Character Counter/Ignore Whitespace")->GetBool(); - size_t length = agi::MaxLineLength(text, ignore_whitespace); + int ignore = agi::IGNORE_NONE; + if (OPT_GET("Subtitle/Character Counter/Ignore Whitespace")->GetBool()) + ignore |= agi::IGNORE_WHITESPACE; + if (OPT_GET("Subtitle/Character Counter/Ignore Punctuation")->GetBool()) + ignore |= agi::IGNORE_PUNCTUATION; + size_t length = agi::MaxLineLength(text, ignore); char_count->SetValue(wxString::Format("%lu", (unsigned long)length)); size_t limit = (size_t)OPT_GET("Subtitle/Character Limit")->GetInt(); if (limit && length > limit)