mirror of https://github.com/odrling/Aegisub
Extract TEXT -> DRAWING conversion from SplitWords
This commit is contained in:
parent
3ec82952f8
commit
1f1cb36b6d
|
@ -95,9 +95,7 @@ class WordSplitter {
|
||||||
std::string const& text;
|
std::string const& text;
|
||||||
std::vector<DialogueToken> &tokens;
|
std::vector<DialogueToken> &tokens;
|
||||||
agi::scoped_holder<iconv_t, int(&)(iconv_t)> utf8_to_utf32;
|
agi::scoped_holder<iconv_t, int(&)(iconv_t)> utf8_to_utf32;
|
||||||
size_t last_ovr_end;
|
|
||||||
size_t pos;
|
size_t pos;
|
||||||
bool in_drawing;
|
|
||||||
|
|
||||||
bool IsWordSep(int chr) {
|
bool IsWordSep(int chr) {
|
||||||
static const int delims[] = {
|
static const int delims[] = {
|
||||||
|
@ -145,16 +143,10 @@ class WordSplitter {
|
||||||
tokens.insert(tokens.begin() + i + 1, DialogueToken(type, len));
|
tokens.insert(tokens.begin() + i + 1, DialogueToken(type, len));
|
||||||
tokens[i].length -= len;
|
tokens[i].length -= len;
|
||||||
++i;
|
++i;
|
||||||
++last_ovr_end;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void SplitText(size_t &i) {
|
void SplitText(size_t &i) {
|
||||||
if (in_drawing) {
|
|
||||||
tokens[i].type = dt::DRAWING;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
int chrlen = 0;
|
int chrlen = 0;
|
||||||
int len = tokens[i].length;
|
int len = tokens[i].length;
|
||||||
int tpos = pos;
|
int tpos = pos;
|
||||||
|
@ -174,60 +166,16 @@ public:
|
||||||
: text(text)
|
: text(text)
|
||||||
, tokens(tokens)
|
, tokens(tokens)
|
||||||
, utf8_to_utf32(iconv_open("utf-32le", "utf-8"), iconv_close)
|
, utf8_to_utf32(iconv_open("utf-32le", "utf-8"), iconv_close)
|
||||||
, last_ovr_end(0)
|
|
||||||
, pos(0)
|
, pos(0)
|
||||||
, in_drawing(false)
|
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
void SplitWords() {
|
void SplitWords() {
|
||||||
if (tokens.empty()) return;
|
if (tokens.empty()) return;
|
||||||
|
|
||||||
// VSFilter treats unclosed override blocks as plain text, so pretend
|
|
||||||
// all tokens after the last override block are TEXT
|
|
||||||
for (size_t i = tokens.size(); i > 0; --i) {
|
|
||||||
if (tokens[i - 1].type == dt::OVR_END) {
|
|
||||||
last_ovr_end = i;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (size_t i = 0; i < tokens.size(); ++i) {
|
for (size_t i = 0; i < tokens.size(); ++i) {
|
||||||
size_t len = tokens[i].length;
|
size_t len = tokens[i].length;
|
||||||
switch (tokens[i].type) {
|
if (tokens[i].type == dt::TEXT)
|
||||||
case dt::KARAOKE_TEMPLATE: break;
|
SplitText(i);
|
||||||
case dt::KARAOKE_VARIABLE: break;
|
|
||||||
case dt::LINE_BREAK: break;
|
|
||||||
case dt::TEXT: SplitText(i); break;
|
|
||||||
case dt::TAG_NAME:
|
|
||||||
if (i + 1 > last_ovr_end) {
|
|
||||||
SplitText(i);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (len != 1 || i + 1 >= tokens.size() || text[pos] != 'p')
|
|
||||||
break;
|
|
||||||
|
|
||||||
in_drawing = false;
|
|
||||||
|
|
||||||
if (tokens[i + 1].type != dt::ARG)
|
|
||||||
break;
|
|
||||||
|
|
||||||
for (size_t j = pos + len; j < pos + len + tokens[i + 1].length; ++j) {
|
|
||||||
char c = text[j];
|
|
||||||
// I have no idea why one would use leading zeros for
|
|
||||||
// the scale, but vsfilter allows it
|
|
||||||
if (c >= '1' && c <= '9')
|
|
||||||
in_drawing = true;
|
|
||||||
else if (c != '0')
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
if (i + 1 > last_ovr_end)
|
|
||||||
SplitText(i);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
pos += len;
|
pos += len;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -241,7 +189,73 @@ std::vector<DialogueToken> SyntaxHighlight(std::string const& text, std::vector<
|
||||||
return SyntaxHighlighter(text, spellchecker).Highlight(tokens);
|
return SyntaxHighlighter(text, spellchecker).Highlight(tokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MarkDrawings(std::string const& str, std::vector<DialogueToken> &tokens) {
|
||||||
|
if (tokens.empty()) return;
|
||||||
|
|
||||||
|
size_t last_ovr_end = 0;
|
||||||
|
for (size_t i = tokens.size(); i > 0; --i) {
|
||||||
|
if (tokens[i - 1].type == dt::OVR_END) {
|
||||||
|
last_ovr_end = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t pos = 0;
|
||||||
|
bool in_drawing = false;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < last_ovr_end; ++i) {
|
||||||
|
size_t len = tokens[i].length;
|
||||||
|
switch (tokens[i].type) {
|
||||||
|
case dt::TEXT:
|
||||||
|
if (in_drawing)
|
||||||
|
tokens[i].type = dt::DRAWING;
|
||||||
|
break;
|
||||||
|
case dt::TAG_NAME:
|
||||||
|
if (len != 1 || i + 1 >= tokens.size() || str[pos] != 'p')
|
||||||
|
break;
|
||||||
|
|
||||||
|
in_drawing = false;
|
||||||
|
|
||||||
|
if (i + 1 == last_ovr_end || tokens[i + 1].type != dt::ARG)
|
||||||
|
break;
|
||||||
|
|
||||||
|
for (size_t j = pos + len; j < pos + len + tokens[i + 1].length; ++j) {
|
||||||
|
char c = str[j];
|
||||||
|
// I have no idea why one would use leading zeros for
|
||||||
|
// the scale, but vsfilter allows it
|
||||||
|
if (c >= '1' && c <= '9')
|
||||||
|
in_drawing = true;
|
||||||
|
else if (c != '0')
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default: break;
|
||||||
|
}
|
||||||
|
|
||||||
|
pos += len;
|
||||||
|
}
|
||||||
|
|
||||||
|
// VSFilter treats unclosed override blocks as plain text, so merge all
|
||||||
|
// the tokens after the last override block into a single TEXT (or DRAWING)
|
||||||
|
// token
|
||||||
|
for (size_t i = last_ovr_end; i < tokens.size(); ++i) {
|
||||||
|
switch (tokens[i].type) {
|
||||||
|
case dt::KARAOKE_TEMPLATE: break;
|
||||||
|
case dt::KARAOKE_VARIABLE: break;
|
||||||
|
case dt::LINE_BREAK: break;
|
||||||
|
default:
|
||||||
|
tokens[i].type = in_drawing ? dt::DRAWING : dt::TEXT;
|
||||||
|
if (i > 0 && tokens[i - 1].type == tokens[i].type) {
|
||||||
|
tokens[i - 1].length += tokens[i].length;
|
||||||
|
tokens.erase(tokens.begin() + i);
|
||||||
|
--i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void SplitWords(std::string const& str, std::vector<DialogueToken> &tokens) {
|
void SplitWords(std::string const& str, std::vector<DialogueToken> &tokens) {
|
||||||
|
MarkDrawings(str, tokens);
|
||||||
WordSplitter(str, tokens).SplitWords();
|
WordSplitter(str, tokens).SplitWords();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -71,6 +71,9 @@ namespace agi {
|
||||||
/// Tokenize the passed string as the body of a dialogue line
|
/// Tokenize the passed string as the body of a dialogue line
|
||||||
std::vector<DialogueToken> TokenizeDialogueBody(std::string const& str, bool karaoke_templater=false);
|
std::vector<DialogueToken> TokenizeDialogueBody(std::string const& str, bool karaoke_templater=false);
|
||||||
|
|
||||||
|
/// Convert the body of drawings to DRAWING tokens
|
||||||
|
void MarkDrawings(std::string const& str, std::vector<DialogueToken> &tokens);
|
||||||
|
|
||||||
/// Split the words in the TEXT tokens of the lexed line into their
|
/// Split the words in the TEXT tokens of the lexed line into their
|
||||||
/// own tokens and convert the body of drawings to DRAWING tokens
|
/// own tokens and convert the body of drawings to DRAWING tokens
|
||||||
void SplitWords(std::string const& str, std::vector<DialogueToken> &tokens);
|
void SplitWords(std::string const& str, std::vector<DialogueToken> &tokens);
|
||||||
|
|
|
@ -126,11 +126,10 @@ TEST(lagi_word_split, unclosed_ovr) {
|
||||||
};
|
};
|
||||||
|
|
||||||
SplitWords(text, tokens);
|
SplitWords(text, tokens);
|
||||||
ASSERT_EQ(4u, tokens.size());
|
ASSERT_EQ(3u, tokens.size());
|
||||||
EXPECT_EQ(dt::WORD, tokens[0].type);
|
EXPECT_EQ(dt::WORD, tokens[0].type);
|
||||||
EXPECT_EQ(dt::TEXT, tokens[1].type);
|
EXPECT_EQ(dt::TEXT, tokens[1].type);
|
||||||
EXPECT_EQ(dt::TEXT, tokens[2].type);
|
EXPECT_EQ(dt::WORD, tokens[2].type);
|
||||||
EXPECT_EQ(dt::WORD, tokens[3].type);
|
|
||||||
|
|
||||||
text = "{";
|
text = "{";
|
||||||
tokens.clear();
|
tokens.clear();
|
||||||
|
|
Loading…
Reference in New Issue