Extract TEXT -> DRAWING conversion from SplitWords

This commit is contained in:
Thomas Goyne 2012-12-30 08:27:03 -08:00
parent 3ec82952f8
commit 1f1cb36b6d
3 changed files with 73 additions and 57 deletions

View File

@ -95,9 +95,7 @@ class WordSplitter {
std::string const& text;
std::vector<DialogueToken> &tokens;
agi::scoped_holder<iconv_t, int(&)(iconv_t)> utf8_to_utf32;
size_t last_ovr_end;
size_t pos;
bool in_drawing;
bool IsWordSep(int chr) {
static const int delims[] = {
@ -145,16 +143,10 @@ class WordSplitter {
tokens.insert(tokens.begin() + i + 1, DialogueToken(type, len));
tokens[i].length -= len;
++i;
++last_ovr_end;
}
}
void SplitText(size_t &i) {
if (in_drawing) {
tokens[i].type = dt::DRAWING;
return;
}
int chrlen = 0;
int len = tokens[i].length;
int tpos = pos;
@ -174,60 +166,16 @@ public:
: text(text)
, tokens(tokens)
, utf8_to_utf32(iconv_open("utf-32le", "utf-8"), iconv_close)
, last_ovr_end(0)
, pos(0)
, in_drawing(false)
{ }
void SplitWords() {
if (tokens.empty()) return;
// VSFilter treats unclosed override blocks as plain text, so pretend
// all tokens after the last override block are TEXT
for (size_t i = tokens.size(); i > 0; --i) {
if (tokens[i - 1].type == dt::OVR_END) {
last_ovr_end = i;
break;
}
}
for (size_t i = 0; i < tokens.size(); ++i) {
size_t len = tokens[i].length;
switch (tokens[i].type) {
case dt::KARAOKE_TEMPLATE: break;
case dt::KARAOKE_VARIABLE: break;
case dt::LINE_BREAK: break;
case dt::TEXT: SplitText(i); break;
case dt::TAG_NAME:
if (i + 1 > last_ovr_end) {
SplitText(i);
break;
}
if (len != 1 || i + 1 >= tokens.size() || text[pos] != 'p')
break;
in_drawing = false;
if (tokens[i + 1].type != dt::ARG)
break;
for (size_t j = pos + len; j < pos + len + tokens[i + 1].length; ++j) {
char c = text[j];
// I have no idea why one would use leading zeros for
// the scale, but vsfilter allows it
if (c >= '1' && c <= '9')
in_drawing = true;
else if (c != '0')
break;
}
break;
default:
if (i + 1 > last_ovr_end)
SplitText(i);
break;
}
if (tokens[i].type == dt::TEXT)
SplitText(i);
pos += len;
}
}
@ -241,7 +189,73 @@ std::vector<DialogueToken> SyntaxHighlight(std::string const& text, std::vector<
return SyntaxHighlighter(text, spellchecker).Highlight(tokens);
}
void MarkDrawings(std::string const& str, std::vector<DialogueToken> &tokens) {
if (tokens.empty()) return;
size_t last_ovr_end = 0;
for (size_t i = tokens.size(); i > 0; --i) {
if (tokens[i - 1].type == dt::OVR_END) {
last_ovr_end = i;
break;
}
}
size_t pos = 0;
bool in_drawing = false;
for (size_t i = 0; i < last_ovr_end; ++i) {
size_t len = tokens[i].length;
switch (tokens[i].type) {
case dt::TEXT:
if (in_drawing)
tokens[i].type = dt::DRAWING;
break;
case dt::TAG_NAME:
if (len != 1 || i + 1 >= tokens.size() || str[pos] != 'p')
break;
in_drawing = false;
if (i + 1 == last_ovr_end || tokens[i + 1].type != dt::ARG)
break;
for (size_t j = pos + len; j < pos + len + tokens[i + 1].length; ++j) {
char c = str[j];
// I have no idea why one would use leading zeros for
// the scale, but vsfilter allows it
if (c >= '1' && c <= '9')
in_drawing = true;
else if (c != '0')
break;
}
break;
default: break;
}
pos += len;
}
// VSFilter treats unclosed override blocks as plain text, so merge all
// the tokens after the last override block into a single TEXT (or DRAWING)
// token
for (size_t i = last_ovr_end; i < tokens.size(); ++i) {
switch (tokens[i].type) {
case dt::KARAOKE_TEMPLATE: break;
case dt::KARAOKE_VARIABLE: break;
case dt::LINE_BREAK: break;
default:
tokens[i].type = in_drawing ? dt::DRAWING : dt::TEXT;
if (i > 0 && tokens[i - 1].type == tokens[i].type) {
tokens[i - 1].length += tokens[i].length;
tokens.erase(tokens.begin() + i);
--i;
}
}
}
}
void SplitWords(std::string const& str, std::vector<DialogueToken> &tokens) {
MarkDrawings(str, tokens);
WordSplitter(str, tokens).SplitWords();
}

View File

@ -71,6 +71,9 @@ namespace agi {
/// Tokenize the passed string as the body of a dialogue line
std::vector<DialogueToken> TokenizeDialogueBody(std::string const& str, bool karaoke_templater=false);
/// Convert the body of drawings to DRAWING tokens
void MarkDrawings(std::string const& str, std::vector<DialogueToken> &tokens);
/// Split the words in the TEXT tokens of the lexed line into their
/// own tokens and convert the body of drawings to DRAWING tokens
void SplitWords(std::string const& str, std::vector<DialogueToken> &tokens);

View File

@ -126,11 +126,10 @@ TEST(lagi_word_split, unclosed_ovr) {
};
SplitWords(text, tokens);
ASSERT_EQ(4u, tokens.size());
ASSERT_EQ(3u, tokens.size());
EXPECT_EQ(dt::WORD, tokens[0].type);
EXPECT_EQ(dt::TEXT, tokens[1].type);
EXPECT_EQ(dt::TEXT, tokens[2].type);
EXPECT_EQ(dt::WORD, tokens[3].type);
EXPECT_EQ(dt::WORD, tokens[2].type);
text = "{";
tokens.clear();