Replace syntax highlighting logic with new stuff in libaegisub

This commit is contained in:
Thomas Goyne 2012-10-29 17:33:16 -07:00
parent 76adcad999
commit 9d4be82be8
6 changed files with 242 additions and 199 deletions

View File

@ -549,6 +549,10 @@
<Filter
Name="ASS"
>
<File
RelativePath="..\..\libaegisub\ass\dialogue_parser.cpp"
>
</File>
<File
RelativePath="..\..\libaegisub\include\libaegisub\ass\dialogue_parser.h"
>

View File

@ -16,6 +16,7 @@ common/charset_conv.o: CXXFLAGS += $(CFLAGS_ICONV)
unix/path.o: CXXFLAGS += -DP_DATA=\"$(P_DATA)\" -DP_DOC=\"$(P_DOC)\" -DP_LOCALE=\"$(P_LOCALE)\"
SRC += \
ass/dialogue_parser.cpp \
common/cajun/elements.cpp \
common/cajun/reader.cpp \
common/cajun/writer.cpp \

View File

@ -0,0 +1,191 @@
// Copyright (c) 2012, Thomas Goyne <plorkyeran@aegisub.org>
//
// Permission to use, copy, modify, and distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
//
// Aegisub Project http://www.aegisub.org/
#include "../config.h"
#include "libaegisub/ass/dialogue_parser.h"
#include "libaegisub/scoped_ptr.h"
#include "libaegisub/spellchecker.h"
#include "iconv.h"
namespace {
typedef std::vector<agi::ass::DialogueToken> TokenVec;
namespace dt = agi::ass::DialogueTokenType;
namespace ss = agi::ass::SyntaxStyle;
class SyntaxHighlighter {
TokenVec ranges;
std::string const& text;
agi::SpellChecker *spellchecker;
agi::scoped_holder<iconv_t, int(&)(iconv_t)> utf8_to_utf32;
void SetStyling(int len, int type) {
if (ranges.size() && ranges.back().type == type)
ranges.back().length += len;
else
ranges.push_back(agi::ass::DialogueToken(type, len));
}
void CheckWord(int start, int end) {
int len = end - start;
if (!len) return;
if (!spellchecker->CheckWord(text.substr(start, len)))
SetStyling(len, ss::SPELLING);
else
SetStyling(len, ss::NORMAL);
}
int NextChar(int pos, int len, int& char_len) {
int chr = 0;
char *inptr = const_cast<char *>(&text[pos]);
size_t inlen = len;
char *outptr = (char *)&chr;
size_t outlen = sizeof chr;
if (iconv(utf8_to_utf32, &inptr, &inlen, &outptr, &outlen) != 1)
return 0;
char_len = len - inlen;
return chr;
}
void StyleSpellCheck(int pos, int len) {
const int delims[] = {
0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0028,
0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 0x003a,
0x003b, 0x003d, 0x003f, 0x0040, 0x005b, 0x005c, 0x005d, 0x005e,
0x005f, 0x0060, 0x007b, 0x007c, 0x007d, 0x007e, 0x00a1, 0x00a2,
0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00aa, 0x00ab,
0x00b0, 0x00b6, 0x00b7, 0x00ba, 0x00bb, 0x00bf, 0x02dc, 0x0e3f,
0x2010, 0x2013, 0x2014, 0x2015, 0x2018, 0x2019, 0x201c, 0x201d,
0x2020, 0x2021, 0x2022, 0x2025, 0x2026, 0x2026, 0x2030, 0x2031,
0x2032, 0x203b, 0x203b, 0x203d, 0x2042, 0x2044, 0x20a6, 0x20a9,
0x20aa, 0x20ac, 0x20ad, 0x2116, 0x2234, 0x2235, 0x2420, 0x2422,
0x2423, 0x2506, 0x25ca, 0x2605, 0x261e, 0x2e2e, 0x3000, 0x3001,
0x3002, 0x3008, 0x3009, 0x300a, 0x300b, 0x300c, 0x300d, 0x300e,
0x300f, 0x3010, 0x3011, 0x3014, 0x3015, 0x3016, 0x3017, 0x3018,
0x3019, 0x301a, 0x301b, 0x301c, 0x3030, 0x303d, 0x30fb, 0xff0a,
0xff5b, 0xff5d, 0xff5e
};
int chrlen = 0;
int start = pos;
for (; len > 0; pos += chrlen, len -= chrlen) {
int chr = NextChar(pos, len, chrlen);
if (!chr) return;
if (std::binary_search(std::begin(delims), std::end(delims), chr)) {
CheckWord(start, pos);
SetStyling(1, ss::NORMAL);
start = pos + 1;
}
}
CheckWord(start, pos);
}
public:
SyntaxHighlighter(std::string const& text, agi::SpellChecker *spellchecker)
: text(text)
, spellchecker(spellchecker)
, utf8_to_utf32(iconv_open("utf-32", "utf-8"), iconv_close)
{ }
TokenVec Highlight(TokenVec const& tokens, bool template_line) {
if (tokens.empty()) return ranges;
bool in_drawing = false;
size_t pos = 0;
// VSFilter treats unclosed override blocks as plain text, so pretend
// all tokens after the last override block are TEXT
size_t last_ovr_end = 0;
for (size_t i = tokens.size(); i > 0; --i) {
if (tokens[i - 1].type == dt::OVR_END) {
last_ovr_end = i - 1;
break;
}
}
for (size_t i = 0; i < tokens.size(); ++i) {
size_t len = tokens[i].length;
switch (i > last_ovr_end ? dt::TEXT : tokens[i].type) {
case dt::LINE_BREAK: SetStyling(len, ss::LINE_BREAK); break;
case dt::ERROR: SetStyling(len, ss::ERROR); break;
case dt::ARG: SetStyling(len, ss::PARAMETER); break;
case dt::COMMENT: SetStyling(len, ss::COMMENT); break;
case dt::WHITESPACE: SetStyling(len, ss::NORMAL); break;
case dt::OPEN_PAREN: case dt::CLOSE_PAREN: case dt::ARG_SEP: case dt::TAG_START:
SetStyling(len, ss::PUNCTUATION);
break;
case dt::OVR_BEGIN: case dt::OVR_END:
SetStyling(len, ss::OVERRIDE);
break;
case dt::TEXT:
if (in_drawing)
SetStyling(len, ss::DRAWING);
else if (spellchecker)
StyleSpellCheck(pos, len);
else
SetStyling(len, ss::NORMAL);
break;
case dt::TAG_NAME:
SetStyling(len, ss::TAG);
if (len != 1 || i + 1 >= tokens.size() || text[pos] != 'p')
break;
in_drawing = false;
if (tokens[i + 1].type != dt::ARG)
break;
for (size_t j = pos + len; j < pos + len + tokens[i + 1].length; ++j) {
char c = text[j];
// I have no idea why one would use leading zeros for
// the scale, but vsfilter allows it
if (c >= '1' && c <= '9')
in_drawing = true;
else if (c != '0')
break;
}
break;
}
pos += len;
// karaoke templater
}
return ranges;
}
};
}
namespace agi {
namespace ass {
std::vector<DialogueToken> SyntaxHighlight(std::string const& text, std::vector<DialogueToken> const& tokens, bool template_line, SpellChecker *spellchecker) {
return SyntaxHighlighter(text, spellchecker).Highlight(tokens, template_line);
}
}
}

View File

@ -11,6 +11,8 @@
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
//
// Aegisub Project http://www.aegisub.org/
#include "../config.h"

View File

@ -11,12 +11,18 @@
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
//
// Aegisub Project http://www.aegisub.org/
#ifndef LAGI_PRE
#include <vector>
#endif
#undef ERROR
namespace agi {
class SpellChecker;
namespace ass {
namespace DialogueTokenType {
enum {
@ -36,6 +42,24 @@ namespace agi {
};
}
namespace SyntaxStyle {
enum {
NORMAL = 0,
COMMENT,
DRAWING,
OVERRIDE,
PUNCTUATION,
TAG,
ERROR,
PARAMETER,
LINE_BREAK,
KARAOKE_TEMPLATE,
KARAOKE_VARIABLE,
SPELLING = 32
};
}
struct DialogueToken {
int type;
size_t length;
@ -43,5 +67,7 @@ namespace agi {
};
std::vector<DialogueToken> TokenizeDialogueBody(std::string const& str);
std::vector<DialogueToken> SyntaxHighlight(std::string const& text, std::vector<DialogueToken> const& tokens, bool template_line, SpellChecker *spellchecker);
}
}

View File

@ -58,6 +58,7 @@
#include "thesaurus.h"
#include "utils.h"
#include <libaegisub/ass/dialogue_parser.h>
#include <libaegisub/spellchecker.h>
/// Event ids
@ -233,20 +234,6 @@ void SubsTextEditCtrl::OnKeyDown(wxKeyEvent &event) {
event.Skip();
}
enum {
STYLE_NORMAL = 0,
STYLE_COMMENT,
STYLE_DRAWING,
STYLE_OVERRIDE,
STYLE_PUNCTUATION,
STYLE_TAG,
STYLE_ERROR,
STYLE_PARAMETER,
STYLE_LINE_BREAK,
STYLE_KARAOKE_TEMPLATE,
STYLE_KARAOKE_VARIABLE
};
void SubsTextEditCtrl::SetSyntaxStyle(int id, wxFont &font, std::string const& name) {
StyleSetFont(id, font);
StyleSetBold(id, OPT_GET("Colour/Subtitle/Syntax/Bold/" + name)->GetBool());
@ -263,17 +250,18 @@ void SubsTextEditCtrl::SetStyles() {
if (!fontname.empty()) font.SetFaceName(fontname);
font.SetPointSize(OPT_GET("Subtitle/Edit Box/Font Size")->GetInt());
SetSyntaxStyle(STYLE_NORMAL, font, "Normal");
SetSyntaxStyle(STYLE_COMMENT, font, "Comment");
SetSyntaxStyle(STYLE_DRAWING, font, "Drawing");
SetSyntaxStyle(STYLE_OVERRIDE, font, "Brackets");
SetSyntaxStyle(STYLE_PUNCTUATION, font, "Slashes");
SetSyntaxStyle(STYLE_TAG, font, "Tags");
SetSyntaxStyle(STYLE_ERROR, font, "Error");
SetSyntaxStyle(STYLE_PARAMETER, font, "Parameters");
SetSyntaxStyle(STYLE_LINE_BREAK, font, "Line Break");
SetSyntaxStyle(STYLE_KARAOKE_TEMPLATE, font, "Karaoke Template");
SetSyntaxStyle(STYLE_KARAOKE_VARIABLE, font, "Karaoke Variable");
namespace ss = agi::ass::SyntaxStyle;
SetSyntaxStyle(ss::NORMAL, font, "Normal");
SetSyntaxStyle(ss::COMMENT, font, "Comment");
SetSyntaxStyle(ss::DRAWING, font, "Drawing");
SetSyntaxStyle(ss::OVERRIDE, font, "Brackets");
SetSyntaxStyle(ss::PUNCTUATION, font, "Slashes");
SetSyntaxStyle(ss::TAG, font, "Tags");
SetSyntaxStyle(ss::ERROR, font, "Error");
SetSyntaxStyle(ss::PARAMETER, font, "Parameters");
SetSyntaxStyle(ss::LINE_BREAK, font, "Line Break");
SetSyntaxStyle(ss::KARAOKE_TEMPLATE, font, "Karaoke Template");
SetSyntaxStyle(ss::KARAOKE_VARIABLE, font, "Karaoke Variable");
// Misspelling indicator
IndicatorSetStyle(0,wxSTC_INDIC_SQUIGGLE);
@ -292,154 +280,14 @@ void SubsTextEditCtrl::UpdateStyle() {
if (text.empty()) return;
// Check if it's a template line
AssDialogue *diag = context ? context->selectionController->GetActiveLine() : 0;
bool templateLine = diag && diag->Comment && diag->Effect.Lower().StartsWith("template");
bool template_line = diag && diag->Comment && diag->Effect.Lower().StartsWith("template");
size_t last_template = 0;
if (templateLine)
last_template = text.rfind('!');
size_t last_ovr_end = text.rfind('}');
if (last_ovr_end == text.npos)
last_ovr_end = 0;
bool in_parens = false;
bool in_unparened_arg = false;
bool in_draw_mode = false;
bool in_ovr = false;
bool in_karaoke_template = false;
int range_len = 0;
int style = STYLE_NORMAL;
char cur_char = 0;
char next_char = text[0];
size_t eat_chars = 0;
for (size_t i = 0; i < text.size(); ++i) {
// Current/previous characters
char prev_char = cur_char;
cur_char = next_char;
next_char = i + 1 < text.size() ? text[i + 1] : 0;
if (eat_chars > 0) {
++range_len;
--eat_chars;
continue;
}
int new_style = style;
// Start karaoke template variable
if (templateLine && cur_char == '$') {
new_style = STYLE_KARAOKE_VARIABLE;
}
// Continue karaoke template variable
else if (style == STYLE_KARAOKE_VARIABLE && ((cur_char >= 'A' && cur_char <= 'Z') || (cur_char >= 'a' && cur_char <= 'z') || cur_char == '_')) {
// Do nothing and just continue the karaoke variable style
}
// Start karaoke template
else if (templateLine && !in_karaoke_template && cur_char == '!' && i < last_template) {
new_style = STYLE_KARAOKE_TEMPLATE;
in_karaoke_template = true;
}
// End karaoke template
else if (in_karaoke_template && cur_char == '!') {
new_style = STYLE_KARAOKE_TEMPLATE;
in_karaoke_template = false;
}
// Continue karaoke template
else if (in_karaoke_template) {
new_style = STYLE_KARAOKE_TEMPLATE;
}
// Start override block
else if (cur_char == '{' && i < last_ovr_end) {
new_style = in_ovr ? STYLE_ERROR : STYLE_OVERRIDE;
in_ovr = true;
}
// End override block
else if (cur_char == '}') {
new_style = in_ovr ? STYLE_OVERRIDE : STYLE_ERROR;
in_ovr = false;
in_parens = false;
in_unparened_arg = false;
}
// Plain text
else if (!in_ovr) {
// Is \n, \N or \h?
if (cur_char == '\\' && (next_char == 'n' || next_char == 'N' || next_char == 'h')) {
new_style = STYLE_LINE_BREAK;
eat_chars = 1;
}
else if (in_draw_mode)
new_style = STYLE_DRAWING;
else
new_style = STYLE_NORMAL;
}
// Inside override tag
else {
// Special character
if (cur_char == '\\' || cur_char == '(' || cur_char == ')' || cur_char == ',') {
new_style = STYLE_PUNCTUATION;
in_unparened_arg = false;
if (style == STYLE_TAG && cur_char == '(')
in_parens = true;
// This is technically wrong for nested tags, but it doesn't
// matter as \t doesn't have any arguments after the subtag(s)
else if (cur_char == ')')
in_parens = false;
}
// Beginning of a tag
else if (prev_char == '\\') {
new_style = STYLE_TAG;
// \r and \fn are special as their argument is an
// unparenthesized string
if (cur_char == 'r')
in_unparened_arg = true;
else if (cur_char == 'f' && next_char == 'n') {
eat_chars = 1;
in_unparened_arg = true;
}
// For \p we need to check if it's entering or leaving draw mode
// Luckily draw mode can't be set in the style, so no argument
// always means leave draw mode
else if (cur_char == 'p' && (next_char < 'a' || next_char > 'z')) {
in_draw_mode = false;
for (size_t idx = i + 1; idx < text.size(); ++idx) {
char c = text[idx];
// I have no idea why one would use leading zeros for
// the scale, but vsfilter allows it
if (c >= '1' && c <= '9')
in_draw_mode = true;
else if (c != '0')
break;
}
}
// All tags start with letters or numbers
else if (cur_char < '0' || (cur_char > '9' && cur_char < 'A') || (cur_char > 'Z' && cur_char < 'a') || cur_char > 'z')
new_style = STYLE_ERROR;
}
else if ((in_parens && style != STYLE_TAG) || in_unparened_arg || (style == STYLE_TAG && ((cur_char < 'A' || cur_char > 'z') || (cur_char > 'Z' && cur_char < 'a')))) {
new_style = STYLE_PARAMETER;
}
else if (style != STYLE_TAG && style != STYLE_PARAMETER && style != STYLE_ERROR) {
new_style = STYLE_COMMENT;
}
}
if (new_style != style) {
SetStyling(range_len, style);
style = new_style;
range_len = 0;
}
++range_len;
std::vector<agi::ass::DialogueToken> tokens = agi::ass::TokenizeDialogueBody(text);
std::vector<agi::ass::DialogueToken> style_ranges = agi::ass::SyntaxHighlight(text, tokens, template_line, spellchecker.get());
for (size_t i = 0; i < style_ranges.size(); ++i) {
SetStyling(style_ranges[i].length, style_ranges[i].type);
}
SetStyling(range_len, style);
StyleSpellCheck();
}
/// @brief Update call tip
@ -655,35 +503,6 @@ void SubsTextEditCtrl::UpdateCallTip(wxStyledTextEvent &) {
CallTipSetHighlight(highStart,highEnd);
}
void SubsTextEditCtrl::StyleSpellCheck() {
if (!spellchecker) return;
// Results
wxString text = GetText();
IntPairVector results;
GetWordBoundaries(text,results);
// Style
int count = results.size();
for (int i=0;i<count;i++) {
// Get current word
int s = results[i].first;
int e = results[i].second;
wxString curWord = text.Mid(s,e-s);
// Check if it's valid
if (!spellchecker->CheckWord(from_wx(curWord))) {
StartUnicodeStyling(s,32);
SetUnicodeStyling(s,e-s,32);
}
}
// It seems like wxStyledTextCtrl wants you to finish styling at the end of the text.
// I don't really understand why, it's not documented anywhere I can find, but this fixes bug #595.
StartUnicodeStyling(text.Length(), 0);
SetUnicodeStyling(text.Length(), 0, 0);
}
void SubsTextEditCtrl::SetTextTo(wxString text) {
SetEvtHandlerEnabled(false);
Freeze();