Add an option to ignore punctuation for the character counter

This commit is contained in:
Thomas Goyne 2014-04-18 20:04:16 -07:00
parent cff3c0a3df
commit 6c6f60eb98
7 changed files with 52 additions and 17 deletions

View File

@ -40,7 +40,7 @@ utext_ptr to_utext(Iterator begin, Iterator end) {
}
template <typename Iterator>
size_t count_in_range(Iterator begin, Iterator end, bool ignore_whitespace) {
size_t count_in_range(Iterator begin, Iterator end, int mask) {
if (begin == end) return 0;
static std::unique_ptr<icu::BreakIterator> character_bi;
@ -60,33 +60,43 @@ size_t count_in_range(Iterator begin, Iterator end, bool ignore_whitespace) {
size_t count = 0;
auto pos = character_bi->first();
for (auto end = character_bi->next(); end != BreakIterator::DONE; pos = end, end = character_bi->next()) {
if (!ignore_whitespace)
if (!mask)
++count;
else {
UChar32 c;
int i = 0;
U8_NEXT_UNSAFE(begin + pos, i, c);
if (!u_isUWhiteSpace(c))
if ((U_GET_GC_MASK(c) & mask) == 0)
++count;
}
}
return count;
}
int ignore_mask_to_icu_mask(int mask) {
int ret = 0;
if (mask & agi::IGNORE_PUNCTUATION)
ret |= U_GC_P_MASK;
if (mask & agi::IGNORE_WHITESPACE)
ret |= U_GC_Z_MASK;
return ret;
}
}
namespace agi {
size_t CharacterCount(std::string const& str) {
size_t CharacterCount(std::string const& str, int mask) {
mask = ignore_mask_to_icu_mask(mask);
size_t characters = 0;
auto pos = begin(str);
do {
auto it = std::find(pos, end(str), '{');
characters += count_in_range(pos, it, true);
characters += count_in_range(pos, it, mask);
if (it == end(str)) break;
pos = std::find(pos, end(str), '}');
if (pos == end(str)) {
characters += count_in_range(it, pos, true);
characters += count_in_range(it, pos, mask);
break;
}
} while (++pos != end(str));
@ -94,7 +104,8 @@ size_t CharacterCount(std::string const& str) {
return characters;
}
size_t MaxLineLength(std::string const& text, bool ignore_whitespace) {
size_t MaxLineLength(std::string const& text, int mask) {
mask = ignore_mask_to_icu_mask(mask);
auto tokens = agi::ass::TokenizeDialogueBody(text);
agi::ass::MarkDrawings(text, tokens);
@ -104,7 +115,7 @@ size_t MaxLineLength(std::string const& text, bool ignore_whitespace) {
for (auto token : tokens) {
if (token.type == agi::ass::DialogueTokenType::LINE_BREAK) {
if (text[pos + 1] == 'h') {
if (!ignore_whitespace)
if (!(mask & U_GC_Z_MASK))
current_line_length += 1;
}
else { // N or n
@ -113,7 +124,7 @@ size_t MaxLineLength(std::string const& text, bool ignore_whitespace) {
}
}
else if (token.type == agi::ass::DialogueTokenType::TEXT)
current_line_length += count_in_range(begin(text) + pos, begin(text) + pos + token.length, ignore_whitespace);
current_line_length += count_in_range(begin(text) + pos, begin(text) + pos + token.length, mask);
pos += token.length;
}

View File

@ -17,7 +17,13 @@
#include <string>
namespace agi {
enum {
IGNORE_NONE = 0,
IGNORE_WHITESPACE = 1,
IGNORE_PUNCTUATION = 2
};
/// Get the length in characters of the longest line in the given text
size_t MaxLineLength(std::string const& text, bool ignore_whitespace);
size_t CharacterCount(std::string const& str);
size_t MaxLineLength(std::string const& text, int ignore_mask);
size_t CharacterCount(std::string const& str, int ignore_mask);
}

View File

@ -217,20 +217,31 @@ struct GridColumnMarginVert final : GridColumnMargin<2> {
COLUMN_DESCRIPTION(_("Vertical Margin"))
};
struct GridColumnCPS final : GridColumn {
class GridColumnCPS final : public GridColumn {
const agi::OptionValue *ignore_whitespace = OPT_GET("Subtitle/Character Counter/Ignore Whitespace");
const agi::OptionValue *ignore_punctuation = OPT_GET("Subtitle/Character Counter/Ignore Punctuation");
public:
COLUMN_HEADER(_("CPS"))
COLUMN_DESCRIPTION(_("Characters Per Second"))
bool Centered() const override { return true; }
bool RefreshOnTextChange() const override { return true; }
wxString Value(const AssDialogue *d, const agi::Context *) const override {
int duration = d->End - d->Start;
auto const& text = d->Text.get();
if (duration <= 0 || text.size() > static_cast<size_t>(duration))
return wxS("");
return std::to_wstring(agi::CharacterCount(text) * 1000 / duration);
int ignore = agi::IGNORE_NONE;
if (ignore_whitespace->GetBool())
ignore |= agi::IGNORE_WHITESPACE;
if (ignore_punctuation->GetBool())
ignore |= agi::IGNORE_PUNCTUATION;
return std::to_wstring(agi::CharacterCount(text, ignore) * 1000 / duration);
}
int Width(const agi::Context *c, WidthHelper &helper, bool) const override {

View File

@ -351,7 +351,8 @@
"Subtitle" : {
"Character Counter" : {
"Ignore Whitespace" : false
"Ignore Whitespace" : true,
"Ignore Punctuation" : true
},
"Character Limit" : 40,
"Default Resolution" : {

View File

@ -351,7 +351,8 @@
"Subtitle" : {
"Character Counter" : {
"Ignore Whitespace" : false
"Ignore Whitespace" : true,
"Ignore Punctuation" : true
},
"Character Limit" : 40,
"Default Resolution" : {

View File

@ -195,6 +195,7 @@ Interface::Interface(wxTreebook *book, Preferences *parent): OptionPage(book, pa
wxFlexGridSizer *character_count = PageSizer(_("Character Counter"));
OptionAdd(character_count, _("Maximum characters per line"), "Subtitle/Character Limit", 0, 1000);
OptionAdd(character_count, _("Ignore whitespace"), "Subtitle/Character Counter/Ignore Whitespace");
OptionAdd(character_count, _("Ignore punctuation"), "Subtitle/Character Counter/Ignore Punctuation");
wxFlexGridSizer *grid = PageSizer(_("Grid"));
OptionAdd(grid, _("Focus grid on click"), "Subtitle/Grid/Focus Allow");

View File

@ -588,8 +588,12 @@ void SubsEditBox::CallCommand(const char *cmd_name) {
}
void SubsEditBox::UpdateCharacterCount(std::string const& text) {
auto ignore_whitespace = OPT_GET("Subtitle/Character Counter/Ignore Whitespace")->GetBool();
size_t length = agi::MaxLineLength(text, ignore_whitespace);
int ignore = agi::IGNORE_NONE;
if (OPT_GET("Subtitle/Character Counter/Ignore Whitespace")->GetBool())
ignore |= agi::IGNORE_WHITESPACE;
if (OPT_GET("Subtitle/Character Counter/Ignore Punctuation")->GetBool())
ignore |= agi::IGNORE_PUNCTUATION;
size_t length = agi::MaxLineLength(text, ignore);
char_count->SetValue(wxString::Format("%lu", (unsigned long)length));
size_t limit = (size_t)OPT_GET("Subtitle/Character Limit")->GetInt();
if (limit && length > limit)