Extract TEXT -> DRAWING conversion from SplitWords

2025-04-11 22:56:02 +02:00 · 2012-12-30 08:27:03 -08:00 · 2012-12-30 08:27:03 -08:00 · 1f1cb36b6d
commit 1f1cb36b6d
parent 3ec82952f8
3 changed files with 73 additions and 57 deletions
--- a/aegisub/libaegisub/ass/dialogue_parser.cpp
+++ b/aegisub/libaegisub/ass/dialogue_parser.cpp
@ -95,9 +95,7 @@ class WordSplitter {
 	std::string const& text;
 	std::vector<DialogueToken> &tokens;
 	agi::scoped_holder<iconv_t, int(&)(iconv_t)> utf8_to_utf32;
 	size_t last_ovr_end;
 	size_t pos;
 	bool in_drawing;
 	bool IsWordSep(int chr) {
 		static const int delims[] = {
@ -145,16 +143,10 @@ class WordSplitter {
 			tokens.insert(tokens.begin() + i + 1, DialogueToken(type, len));
 			tokens[i].length -= len;
 			++i;
 			++last_ovr_end;
 		}
 	}
 	void SplitText(size_t &i) {
 		if (in_drawing) {
 			tokens[i].type = dt::DRAWING;
 			return;
 		}
 		int chrlen = 0;
 		int len = tokens[i].length;
 		int tpos = pos;
@ -174,60 +166,16 @@ public:
 	: text(text)
 	, tokens(tokens)
 	, utf8_to_utf32(iconv_open("utf-32le", "utf-8"), iconv_close)
 	, last_ovr_end(0)
 	, pos(0)
 	, in_drawing(false)
 	{ }
 	void SplitWords() {
 		if (tokens.empty()) return;
 		// VSFilter treats unclosed override blocks as plain text, so pretend
 		// all tokens after the last override block are TEXT
 		for (size_t i = tokens.size(); i > 0; --i) {
 			if (tokens[i - 1].type == dt::OVR_END) {
 				last_ovr_end = i;
 				break;
 			}
 		}
 		for (size_t i = 0; i < tokens.size(); ++i) {
 			size_t len = tokens[i].length;
-			switch (tokens[i].type) {
+			if (tokens[i].type == dt::TEXT)
-				case dt::KARAOKE_TEMPLATE: break;
+					SplitText(i);
 				case dt::KARAOKE_VARIABLE: break;
 				case dt::LINE_BREAK: break;
 				case dt::TEXT: SplitText(i); break;
 				case dt::TAG_NAME:
 					if (i + 1 > last_ovr_end) {
 						SplitText(i);
 						break;
 					}
 					if (len != 1 || i + 1 >= tokens.size() || text[pos] != 'p')
 						break;
 					in_drawing = false;
 					if (tokens[i + 1].type != dt::ARG)
 						break;
 					for (size_t j = pos + len; j < pos + len + tokens[i + 1].length; ++j) {
 						char c = text[j];
 						// I have no idea why one would use leading zeros for
 						// the scale, but vsfilter allows it
 						if (c >= '1' && c <= '9')
 							in_drawing = true;
 						else if (c != '0')
 							break;
 					}
 					break;
 				default:
 					if (i + 1 > last_ovr_end)
 						SplitText(i);
 					break;
 			}
 			pos += len;
 		}
 	}
@ -241,7 +189,73 @@ std::vector<DialogueToken> SyntaxHighlight(std::string const& text, std::vector<
 	return SyntaxHighlighter(text, spellchecker).Highlight(tokens);
 }
 void MarkDrawings(std::string const& str, std::vector<DialogueToken> &tokens) {
 	if (tokens.empty()) return;
 	size_t last_ovr_end = 0;
 	for (size_t i = tokens.size(); i > 0; --i) {
 		if (tokens[i - 1].type == dt::OVR_END) {
 			last_ovr_end = i;
 			break;
 		}
 	}
 	size_t pos = 0;
 	bool in_drawing = false;
 	for (size_t i = 0; i < last_ovr_end; ++i) {
 		size_t len = tokens[i].length;
 		switch (tokens[i].type) {
 			case dt::TEXT:
 				if (in_drawing)
 					tokens[i].type = dt::DRAWING;
 				break;
 			case dt::TAG_NAME:
 				if (len != 1 || i + 1 >= tokens.size() || str[pos] != 'p')
 					break;
 				in_drawing = false;
 				if (i + 1 == last_ovr_end || tokens[i + 1].type != dt::ARG)
 					break;
 				for (size_t j = pos + len; j < pos + len + tokens[i + 1].length; ++j) {
 					char c = str[j];
 					// I have no idea why one would use leading zeros for
 					// the scale, but vsfilter allows it
 					if (c >= '1' && c <= '9')
 						in_drawing = true;
 					else if (c != '0')
 						break;
 				}
 				break;
 			default: break;
 		}
 		pos += len;
 	}
 	// VSFilter treats unclosed override blocks as plain text, so merge all
 	// the tokens after the last override block into a single TEXT (or DRAWING)
 	// token
 	for (size_t i = last_ovr_end; i < tokens.size(); ++i) {
 		switch (tokens[i].type) {
 			case dt::KARAOKE_TEMPLATE: break;
 			case dt::KARAOKE_VARIABLE: break;
 			case dt::LINE_BREAK: break;
 			default:
 				tokens[i].type = in_drawing ? dt::DRAWING : dt::TEXT;
 				if (i > 0 && tokens[i - 1].type == tokens[i].type) {
 					tokens[i - 1].length += tokens[i].length;
 					tokens.erase(tokens.begin() + i);
 					--i;
 				}
 		}
 	}
 }
 void SplitWords(std::string const& str, std::vector<DialogueToken> &tokens) {
 	MarkDrawings(str, tokens);
 	WordSplitter(str, tokens).SplitWords();
 }
--- a/aegisub/libaegisub/include/libaegisub/ass/dialogue_parser.h
+++ b/aegisub/libaegisub/include/libaegisub/ass/dialogue_parser.h
@ -71,6 +71,9 @@ namespace agi {
 		/// Tokenize the passed string as the body of a dialogue line
 		std::vector<DialogueToken> TokenizeDialogueBody(std::string const& str, bool karaoke_templater=false);
 		/// Convert the body of drawings to DRAWING tokens
 		void MarkDrawings(std::string const& str, std::vector<DialogueToken> &tokens);
 		/// Split the words in the TEXT tokens of the lexed line into their
 		/// own tokens and convert the body of drawings to DRAWING tokens
 		void SplitWords(std::string const& str, std::vector<DialogueToken> &tokens);
--- a/aegisub/tests/libaegisub_word_split.cpp
+++ b/aegisub/tests/libaegisub_word_split.cpp
@ -126,11 +126,10 @@ TEST(lagi_word_split, unclosed_ovr) {
 	};
 	SplitWords(text, tokens);
-	ASSERT_EQ(4u, tokens.size());
+	ASSERT_EQ(3u, tokens.size());
 	EXPECT_EQ(dt::WORD, tokens[0].type);
 	EXPECT_EQ(dt::TEXT, tokens[1].type);
-	EXPECT_EQ(dt::TEXT, tokens[2].type);
+	EXPECT_EQ(dt::WORD, tokens[2].type);
 	EXPECT_EQ(dt::WORD, tokens[3].type);
 	text = "{";
 	tokens.clear();