From 9d4be82be8fed7534de32b3bd9a0bddca74564a1 Mon Sep 17 00:00:00 2001 From: Thomas Goyne Date: Mon, 29 Oct 2012 17:33:16 -0700 Subject: [PATCH] Replace syntax highlighting logic with new stuff in libaegisub --- .../libaegisub_vs2008.vcproj | 4 + aegisub/libaegisub/Makefile | 1 + aegisub/libaegisub/ass/dialogue_parser.cpp | 191 +++++++++++++++ aegisub/libaegisub/common/parser.cpp | 2 + .../include/libaegisub/ass/dialogue_parser.h | 26 +++ aegisub/src/subs_edit_ctrl.cpp | 217 ++---------------- 6 files changed, 242 insertions(+), 199 deletions(-) create mode 100644 aegisub/libaegisub/ass/dialogue_parser.cpp diff --git a/aegisub/build/libaegisub_vs2008/libaegisub_vs2008.vcproj b/aegisub/build/libaegisub_vs2008/libaegisub_vs2008.vcproj index 032c86d34..035c606ab 100644 --- a/aegisub/build/libaegisub_vs2008/libaegisub_vs2008.vcproj +++ b/aegisub/build/libaegisub_vs2008/libaegisub_vs2008.vcproj @@ -549,6 +549,10 @@ + + diff --git a/aegisub/libaegisub/Makefile b/aegisub/libaegisub/Makefile index 8b57339ae..49e9c9191 100644 --- a/aegisub/libaegisub/Makefile +++ b/aegisub/libaegisub/Makefile @@ -16,6 +16,7 @@ common/charset_conv.o: CXXFLAGS += $(CFLAGS_ICONV) unix/path.o: CXXFLAGS += -DP_DATA=\"$(P_DATA)\" -DP_DOC=\"$(P_DOC)\" -DP_LOCALE=\"$(P_LOCALE)\" SRC += \ + ass/dialogue_parser.cpp \ common/cajun/elements.cpp \ common/cajun/reader.cpp \ common/cajun/writer.cpp \ diff --git a/aegisub/libaegisub/ass/dialogue_parser.cpp b/aegisub/libaegisub/ass/dialogue_parser.cpp new file mode 100644 index 000000000..b0010e3c2 --- /dev/null +++ b/aegisub/libaegisub/ass/dialogue_parser.cpp @@ -0,0 +1,191 @@ +// Copyright (c) 2012, Thomas Goyne +// +// Permission to use, copy, modify, and distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +// +// Aegisub Project http://www.aegisub.org/ + +#include "../config.h" + +#include "libaegisub/ass/dialogue_parser.h" + +#include "libaegisub/scoped_ptr.h" +#include "libaegisub/spellchecker.h" + +#include "iconv.h" + +namespace { + +typedef std::vector TokenVec; +namespace dt = agi::ass::DialogueTokenType; +namespace ss = agi::ass::SyntaxStyle; + +class SyntaxHighlighter { + TokenVec ranges; + std::string const& text; + agi::SpellChecker *spellchecker; + agi::scoped_holder utf8_to_utf32; + + void SetStyling(int len, int type) { + if (ranges.size() && ranges.back().type == type) + ranges.back().length += len; + else + ranges.push_back(agi::ass::DialogueToken(type, len)); + } + + void CheckWord(int start, int end) { + int len = end - start; + if (!len) return; + + if (!spellchecker->CheckWord(text.substr(start, len))) + SetStyling(len, ss::SPELLING); + else + SetStyling(len, ss::NORMAL); + } + + int NextChar(int pos, int len, int& char_len) { + int chr = 0; + char *inptr = const_cast(&text[pos]); + size_t inlen = len; + char *outptr = (char *)&chr; + size_t outlen = sizeof chr; + + if (iconv(utf8_to_utf32, &inptr, &inlen, &outptr, &outlen) != 1) + return 0; + + char_len = len - inlen; + return chr; + } + + void StyleSpellCheck(int pos, int len) { + const int delims[] = { + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0028, + 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 0x003a, + 0x003b, 0x003d, 0x003f, 0x0040, 0x005b, 0x005c, 0x005d, 0x005e, + 0x005f, 0x0060, 0x007b, 0x007c, 0x007d, 0x007e, 0x00a1, 0x00a2, + 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00aa, 0x00ab, + 0x00b0, 0x00b6, 0x00b7, 0x00ba, 0x00bb, 0x00bf, 0x02dc, 0x0e3f, + 0x2010, 0x2013, 0x2014, 0x2015, 0x2018, 0x2019, 0x201c, 0x201d, + 0x2020, 0x2021, 0x2022, 0x2025, 0x2026, 0x2026, 0x2030, 0x2031, + 0x2032, 0x203b, 0x203b, 0x203d, 0x2042, 0x2044, 0x20a6, 0x20a9, + 0x20aa, 0x20ac, 0x20ad, 0x2116, 0x2234, 0x2235, 0x2420, 0x2422, + 0x2423, 0x2506, 0x25ca, 0x2605, 0x261e, 0x2e2e, 0x3000, 0x3001, + 0x3002, 0x3008, 0x3009, 0x300a, 0x300b, 0x300c, 0x300d, 0x300e, + 0x300f, 0x3010, 0x3011, 0x3014, 0x3015, 0x3016, 0x3017, 0x3018, + 0x3019, 0x301a, 0x301b, 0x301c, 0x3030, 0x303d, 0x30fb, 0xff0a, + 0xff5b, 0xff5d, 0xff5e + }; + + int chrlen = 0; + int start = pos; + for (; len > 0; pos += chrlen, len -= chrlen) { + int chr = NextChar(pos, len, chrlen); + if (!chr) return; + + if (std::binary_search(std::begin(delims), std::end(delims), chr)) { + CheckWord(start, pos); + SetStyling(1, ss::NORMAL); + start = pos + 1; + } + } + + CheckWord(start, pos); + } + +public: + SyntaxHighlighter(std::string const& text, agi::SpellChecker *spellchecker) + : text(text) + , spellchecker(spellchecker) + , utf8_to_utf32(iconv_open("utf-32", "utf-8"), iconv_close) + { } + + TokenVec Highlight(TokenVec const& tokens, bool template_line) { + if (tokens.empty()) return ranges; + + bool in_drawing = false; + size_t pos = 0; + + // VSFilter treats unclosed override blocks as plain text, so pretend + // all tokens after the last override block are TEXT + size_t last_ovr_end = 0; + for (size_t i = tokens.size(); i > 0; --i) { + if (tokens[i - 1].type == dt::OVR_END) { + last_ovr_end = i - 1; + break; + } + } + + for (size_t i = 0; i < tokens.size(); ++i) { + size_t len = tokens[i].length; + switch (i > last_ovr_end ? dt::TEXT : tokens[i].type) { + case dt::LINE_BREAK: SetStyling(len, ss::LINE_BREAK); break; + case dt::ERROR: SetStyling(len, ss::ERROR); break; + case dt::ARG: SetStyling(len, ss::PARAMETER); break; + case dt::COMMENT: SetStyling(len, ss::COMMENT); break; + case dt::WHITESPACE: SetStyling(len, ss::NORMAL); break; + case dt::OPEN_PAREN: case dt::CLOSE_PAREN: case dt::ARG_SEP: case dt::TAG_START: + SetStyling(len, ss::PUNCTUATION); + break; + case dt::OVR_BEGIN: case dt::OVR_END: + SetStyling(len, ss::OVERRIDE); + break; + + case dt::TEXT: + if (in_drawing) + SetStyling(len, ss::DRAWING); + else if (spellchecker) + StyleSpellCheck(pos, len); + else + SetStyling(len, ss::NORMAL); + break; + + case dt::TAG_NAME: + SetStyling(len, ss::TAG); + + if (len != 1 || i + 1 >= tokens.size() || text[pos] != 'p') + break; + + in_drawing = false; + + if (tokens[i + 1].type != dt::ARG) + break; + + for (size_t j = pos + len; j < pos + len + tokens[i + 1].length; ++j) { + char c = text[j]; + // I have no idea why one would use leading zeros for + // the scale, but vsfilter allows it + if (c >= '1' && c <= '9') + in_drawing = true; + else if (c != '0') + break; + } + break; + } + + pos += len; + // karaoke templater + } + + return ranges; + } +}; +} + +namespace agi { +namespace ass { + +std::vector SyntaxHighlight(std::string const& text, std::vector const& tokens, bool template_line, SpellChecker *spellchecker) { + return SyntaxHighlighter(text, spellchecker).Highlight(tokens, template_line); +} + +} +} diff --git a/aegisub/libaegisub/common/parser.cpp b/aegisub/libaegisub/common/parser.cpp index bd383a26c..99d5e42fe 100644 --- a/aegisub/libaegisub/common/parser.cpp +++ b/aegisub/libaegisub/common/parser.cpp @@ -11,6 +11,8 @@ // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +// +// Aegisub Project http://www.aegisub.org/ #include "../config.h" diff --git a/aegisub/libaegisub/include/libaegisub/ass/dialogue_parser.h b/aegisub/libaegisub/include/libaegisub/ass/dialogue_parser.h index 5c2a10f06..87624ef76 100644 --- a/aegisub/libaegisub/include/libaegisub/ass/dialogue_parser.h +++ b/aegisub/libaegisub/include/libaegisub/ass/dialogue_parser.h @@ -11,12 +11,18 @@ // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +// +// Aegisub Project http://www.aegisub.org/ #ifndef LAGI_PRE #include #endif +#undef ERROR + namespace agi { + class SpellChecker; + namespace ass { namespace DialogueTokenType { enum { @@ -36,6 +42,24 @@ namespace agi { }; } + namespace SyntaxStyle { + enum { + NORMAL = 0, + COMMENT, + DRAWING, + OVERRIDE, + PUNCTUATION, + TAG, + ERROR, + PARAMETER, + LINE_BREAK, + KARAOKE_TEMPLATE, + KARAOKE_VARIABLE, + + SPELLING = 32 + }; + } + struct DialogueToken { int type; size_t length; @@ -43,5 +67,7 @@ namespace agi { }; std::vector TokenizeDialogueBody(std::string const& str); + + std::vector SyntaxHighlight(std::string const& text, std::vector const& tokens, bool template_line, SpellChecker *spellchecker); } } diff --git a/aegisub/src/subs_edit_ctrl.cpp b/aegisub/src/subs_edit_ctrl.cpp index db9027f4d..8a8ea8e9a 100644 --- a/aegisub/src/subs_edit_ctrl.cpp +++ b/aegisub/src/subs_edit_ctrl.cpp @@ -58,6 +58,7 @@ #include "thesaurus.h" #include "utils.h" +#include #include /// Event ids @@ -233,20 +234,6 @@ void SubsTextEditCtrl::OnKeyDown(wxKeyEvent &event) { event.Skip(); } -enum { - STYLE_NORMAL = 0, - STYLE_COMMENT, - STYLE_DRAWING, - STYLE_OVERRIDE, - STYLE_PUNCTUATION, - STYLE_TAG, - STYLE_ERROR, - STYLE_PARAMETER, - STYLE_LINE_BREAK, - STYLE_KARAOKE_TEMPLATE, - STYLE_KARAOKE_VARIABLE -}; - void SubsTextEditCtrl::SetSyntaxStyle(int id, wxFont &font, std::string const& name) { StyleSetFont(id, font); StyleSetBold(id, OPT_GET("Colour/Subtitle/Syntax/Bold/" + name)->GetBool()); @@ -263,17 +250,18 @@ void SubsTextEditCtrl::SetStyles() { if (!fontname.empty()) font.SetFaceName(fontname); font.SetPointSize(OPT_GET("Subtitle/Edit Box/Font Size")->GetInt()); - SetSyntaxStyle(STYLE_NORMAL, font, "Normal"); - SetSyntaxStyle(STYLE_COMMENT, font, "Comment"); - SetSyntaxStyle(STYLE_DRAWING, font, "Drawing"); - SetSyntaxStyle(STYLE_OVERRIDE, font, "Brackets"); - SetSyntaxStyle(STYLE_PUNCTUATION, font, "Slashes"); - SetSyntaxStyle(STYLE_TAG, font, "Tags"); - SetSyntaxStyle(STYLE_ERROR, font, "Error"); - SetSyntaxStyle(STYLE_PARAMETER, font, "Parameters"); - SetSyntaxStyle(STYLE_LINE_BREAK, font, "Line Break"); - SetSyntaxStyle(STYLE_KARAOKE_TEMPLATE, font, "Karaoke Template"); - SetSyntaxStyle(STYLE_KARAOKE_VARIABLE, font, "Karaoke Variable"); + namespace ss = agi::ass::SyntaxStyle; + SetSyntaxStyle(ss::NORMAL, font, "Normal"); + SetSyntaxStyle(ss::COMMENT, font, "Comment"); + SetSyntaxStyle(ss::DRAWING, font, "Drawing"); + SetSyntaxStyle(ss::OVERRIDE, font, "Brackets"); + SetSyntaxStyle(ss::PUNCTUATION, font, "Slashes"); + SetSyntaxStyle(ss::TAG, font, "Tags"); + SetSyntaxStyle(ss::ERROR, font, "Error"); + SetSyntaxStyle(ss::PARAMETER, font, "Parameters"); + SetSyntaxStyle(ss::LINE_BREAK, font, "Line Break"); + SetSyntaxStyle(ss::KARAOKE_TEMPLATE, font, "Karaoke Template"); + SetSyntaxStyle(ss::KARAOKE_VARIABLE, font, "Karaoke Variable"); // Misspelling indicator IndicatorSetStyle(0,wxSTC_INDIC_SQUIGGLE); @@ -292,154 +280,14 @@ void SubsTextEditCtrl::UpdateStyle() { if (text.empty()) return; - // Check if it's a template line AssDialogue *diag = context ? context->selectionController->GetActiveLine() : 0; - bool templateLine = diag && diag->Comment && diag->Effect.Lower().StartsWith("template"); + bool template_line = diag && diag->Comment && diag->Effect.Lower().StartsWith("template"); - size_t last_template = 0; - if (templateLine) - last_template = text.rfind('!'); - size_t last_ovr_end = text.rfind('}'); - if (last_ovr_end == text.npos) - last_ovr_end = 0; - - bool in_parens = false; - bool in_unparened_arg = false; - bool in_draw_mode = false; - bool in_ovr = false; - bool in_karaoke_template = false; - - int range_len = 0; - int style = STYLE_NORMAL; - - char cur_char = 0; - char next_char = text[0]; - - size_t eat_chars = 0; - - for (size_t i = 0; i < text.size(); ++i) { - // Current/previous characters - char prev_char = cur_char; - cur_char = next_char; - next_char = i + 1 < text.size() ? text[i + 1] : 0; - - if (eat_chars > 0) { - ++range_len; - --eat_chars; - continue; - } - - int new_style = style; - - // Start karaoke template variable - if (templateLine && cur_char == '$') { - new_style = STYLE_KARAOKE_VARIABLE; - } - // Continue karaoke template variable - else if (style == STYLE_KARAOKE_VARIABLE && ((cur_char >= 'A' && cur_char <= 'Z') || (cur_char >= 'a' && cur_char <= 'z') || cur_char == '_')) { - // Do nothing and just continue the karaoke variable style - } - // Start karaoke template - else if (templateLine && !in_karaoke_template && cur_char == '!' && i < last_template) { - new_style = STYLE_KARAOKE_TEMPLATE; - in_karaoke_template = true; - } - // End karaoke template - else if (in_karaoke_template && cur_char == '!') { - new_style = STYLE_KARAOKE_TEMPLATE; - in_karaoke_template = false; - } - // Continue karaoke template - else if (in_karaoke_template) { - new_style = STYLE_KARAOKE_TEMPLATE; - } - // Start override block - else if (cur_char == '{' && i < last_ovr_end) { - new_style = in_ovr ? STYLE_ERROR : STYLE_OVERRIDE; - in_ovr = true; - } - // End override block - else if (cur_char == '}') { - new_style = in_ovr ? STYLE_OVERRIDE : STYLE_ERROR; - in_ovr = false; - - in_parens = false; - in_unparened_arg = false; - } - // Plain text - else if (!in_ovr) { - // Is \n, \N or \h? - if (cur_char == '\\' && (next_char == 'n' || next_char == 'N' || next_char == 'h')) { - new_style = STYLE_LINE_BREAK; - eat_chars = 1; - } - else if (in_draw_mode) - new_style = STYLE_DRAWING; - else - new_style = STYLE_NORMAL; - } - // Inside override tag - else { - // Special character - if (cur_char == '\\' || cur_char == '(' || cur_char == ')' || cur_char == ',') { - new_style = STYLE_PUNCTUATION; - in_unparened_arg = false; - - if (style == STYLE_TAG && cur_char == '(') - in_parens = true; - // This is technically wrong for nested tags, but it doesn't - // matter as \t doesn't have any arguments after the subtag(s) - else if (cur_char == ')') - in_parens = false; - } - // Beginning of a tag - else if (prev_char == '\\') { - new_style = STYLE_TAG; - // \r and \fn are special as their argument is an - // unparenthesized string - if (cur_char == 'r') - in_unparened_arg = true; - else if (cur_char == 'f' && next_char == 'n') { - eat_chars = 1; - in_unparened_arg = true; - } - // For \p we need to check if it's entering or leaving draw mode - // Luckily draw mode can't be set in the style, so no argument - // always means leave draw mode - else if (cur_char == 'p' && (next_char < 'a' || next_char > 'z')) { - in_draw_mode = false; - for (size_t idx = i + 1; idx < text.size(); ++idx) { - char c = text[idx]; - // I have no idea why one would use leading zeros for - // the scale, but vsfilter allows it - if (c >= '1' && c <= '9') - in_draw_mode = true; - else if (c != '0') - break; - } - } - // All tags start with letters or numbers - else if (cur_char < '0' || (cur_char > '9' && cur_char < 'A') || (cur_char > 'Z' && cur_char < 'a') || cur_char > 'z') - new_style = STYLE_ERROR; - } - else if ((in_parens && style != STYLE_TAG) || in_unparened_arg || (style == STYLE_TAG && ((cur_char < 'A' || cur_char > 'z') || (cur_char > 'Z' && cur_char < 'a')))) { - new_style = STYLE_PARAMETER; - } - else if (style != STYLE_TAG && style != STYLE_PARAMETER && style != STYLE_ERROR) { - new_style = STYLE_COMMENT; - } - } - - if (new_style != style) { - SetStyling(range_len, style); - style = new_style; - range_len = 0; - } - - ++range_len; + std::vector tokens = agi::ass::TokenizeDialogueBody(text); + std::vector style_ranges = agi::ass::SyntaxHighlight(text, tokens, template_line, spellchecker.get()); + for (size_t i = 0; i < style_ranges.size(); ++i) { + SetStyling(style_ranges[i].length, style_ranges[i].type); } - SetStyling(range_len, style); - StyleSpellCheck(); } /// @brief Update call tip @@ -655,35 +503,6 @@ void SubsTextEditCtrl::UpdateCallTip(wxStyledTextEvent &) { CallTipSetHighlight(highStart,highEnd); } -void SubsTextEditCtrl::StyleSpellCheck() { - if (!spellchecker) return; - - // Results - wxString text = GetText(); - IntPairVector results; - GetWordBoundaries(text,results); - - // Style - int count = results.size(); - for (int i=0;iCheckWord(from_wx(curWord))) { - StartUnicodeStyling(s,32); - SetUnicodeStyling(s,e-s,32); - } - } - - // It seems like wxStyledTextCtrl wants you to finish styling at the end of the text. - // I don't really understand why, it's not documented anywhere I can find, but this fixes bug #595. - StartUnicodeStyling(text.Length(), 0); - SetUnicodeStyling(text.Length(), 0, 0); -} - void SubsTextEditCtrl::SetTextTo(wxString text) { SetEvtHandlerEnabled(false); Freeze();