From c556a475098d9ad3adb23885bf08352c75bbc33f Mon Sep 17 00:00:00 2001 From: Thomas Goyne Date: Tue, 9 Apr 2013 20:46:59 -0700 Subject: [PATCH] Replace wxRegex bindings for lua with boost::regex bindings API is mostly unchanged other than the addition of a lot more flags. Should be less buggy since it has an actual test suite, and generally has a more powerful regex syntax with better support for Unicode. The bindings are written in MoonScript. For now the compiled form is store in the repo for convenince. --- aegisub/automation/include/re.lua | 569 +++++++++--------- aegisub/automation/include/re.moon | 244 ++++++++ aegisub/build/Aegisub/Aegisub.vcxproj | 1 + aegisub/build/Aegisub/Aegisub.vcxproj.filters | 3 + aegisub/src/Makefile | 6 +- aegisub/src/auto4_lua.cpp | 122 +--- aegisub/src/auto4_lua_utils.h | 1 + aegisub/src/auto4_regex.cpp | 208 +++++++ 8 files changed, 753 insertions(+), 401 deletions(-) create mode 100644 aegisub/automation/include/re.moon create mode 100644 aegisub/src/auto4_regex.cpp diff --git a/aegisub/automation/include/re.lua b/aegisub/automation/include/re.lua index 1de7aa7e3..32413ed9c 100644 --- a/aegisub/automation/include/re.lua +++ b/aegisub/automation/include/re.lua @@ -1,301 +1,314 @@ --- Copyright (c) 2012, Thomas Goyne --- --- Permission to use, copy, modify, and distribute this software for any --- purpose with or without fee is hereby granted, provided that the above --- copyright notice and this permission notice appear in all copies. --- --- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES --- WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF --- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR --- ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES --- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN --- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF --- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - --- Get the wxRegex binding local regex = aegisub.__init_regex() - --- Compiled regular expression type protoype -local re_proto = {} -local re_proto_mt = { __index = re_proto } - --- Convert an iterator to an array -local function to_table(...) - local arr = {} - local i = 1 - for v in ... do - arr[i] = v - i = i + 1 - end - return arr +local select_first +select_first = function(n, a, ...) + if n == 0 then + return + end + return a, select_first(n - 1, ...) end - --- Return the first n elements from ... -local function select_first(n, a, ...) - if n == 0 then return end - return a, select_first(n - 1, ...) +local unpack_args +unpack_args = function(...) + local userdata_start = nil + for i = 1, select('#', ...) do + local v = select(i, ...) + if type(v) == 'userdata' then + userdata_start = i + break + end + end + if not (userdata_start) then + return 0, ... + end + local flags = regex.process_flags(select(userdata_start, ...)) + if type(flags) == 'string' then + error(flags, 3) + end + return flags, select_first(userdata_start - 1, ...) end - --- Extract the flags from ..., bitwise OR them together, and move them to the --- front of ... -local function unpack_args(...) - local n = select('#', ...) - local userdata_start = nil - for i = 1, n do - local v = select(i, ...) - if type(v) == "userdata" then - userdata_start = i - break - end - end - - if not userdata_start then - return 0, ... - end - - flags = regex.process_flags(select(userdata_start, ...)) - if type(flags) == "string" then - error(flags, 3) - end - - return flags, select_first(userdata_start - 1, ...) +local check_arg +check_arg = function(arg, expected_type, argn, func_name, level) + if type(arg) ~= expected_type then + return error("Argument " .. tostring(argn) .. " to " .. tostring(func_name) .. " should be a '" .. tostring(expected_type) .. "', is '" .. tostring(type(arg)) .. "' (" .. tostring(arg) .. ")", level + 1) + end end - --- Verify that a valid value was passed for self -local function check_self(self) - if getmetatable(self) ~= re_proto_mt then - error("re method called with invalid self. You probably used . when : is needed.", 3) - end +local replace_match +replace_match = function(match, func, str, last, acc) + if last < match.last then + acc[#acc + 1] = str:sub(last, match.first - 1) + end + local repl = func(match.str, match.first, match.last) + if type(repl) == 'string' then + acc[#acc + 1] = repl + else + acc[#acc + 1] = match.str + end + return match.first, match.last + 1 end - --- Typecheck a variable and throw an error if it fails -local function check_arg(arg, expected_type, argn, func_name, level) - if type(arg) ~= expected_type then - error( - string.format("Argument %d to %s should be a '%s', is '%s' (%s)", - argn, func_name, expected_type, type(arg), tostring(arg)), - level + 1) - end +local do_single_replace_fun +do_single_replace_fun = function(re, func, str, acc, pos) + local matches = re:match(str, pos) + if not (matches) then + return pos + end + local start + if #matches == 1 then + start = 1 + else + start = 2 + end + local last = pos + local first + for i = start, #matches do + first, last = replace_match(matches[i], func, str, last, acc) + end + if first == last then + acc[#acc + 1] = str:sub(last, last) + last = last + 1 + end + return last, matches[1].first <= str:len() end - -function re_proto.gsplit(self, str, skip_empty, max_split) - check_self(self) - check_arg(str, "string", 2, "gsplit", self._level) - if not max_split or max_split <= 0 then max_split = str:len() end - - local function do_split() +local do_replace_fun +do_replace_fun = function(re, func, str, max) + local acc = { } + local pos = 1 + local i + for i = 1, max do + local more + pos, more = do_single_replace_fun(re, func, str, acc, pos) + if not (more) then + max = i + break + end + end + return table.concat(acc, '') .. str:sub(pos) +end +local RegEx +do + local start + local _parent_0 = nil + local _base_0 = { + _check_self = function(self) + if not (self.__class == RegEx) then + return error('re method called with invalid self. You probably used . when : is needed.', 3) + end + end, + gsplit = function(self, str, skip_empty, max_split) + self:_check_self() + check_arg(str, 'string', 2, 'gsplit', self._level) + if not max_split or max_split <= 0 then + max_split = str:len() + end + start = 1 + local prev = 1 + local do_split + do_split = function() if not str or str:len() == 0 then - return nil + return end - - if max_split == 0 or not regex.matches(self._regex, str) then - local ret = str - str = nil - return ret + local first, last + if max_split > 0 then + first, last = regex.search(self._regex, str, start) end - - local first, last = regex.get_match(self._regex, str, 0) - local ret = str:sub(1, first - 1) - str = str:sub(last + 1) - + if not first or first > str:len() then + local ret = str:sub(prev, str:len()) + str = nil + return ret + end + local ret = str:sub(prev, first - 1) + prev = last + 1 + start = 1 + (function() + if start >= last then + return start + else + return last + end + end)() if skip_empty and ret:len() == 0 then - return do_split() + return do_split() else - max_split = max_split - 1 - return ret + max_split = max_split - 1 + return ret end - end - - return do_split -end - -function re_proto.split(self, str, skip_empty, max_split) - check_self(self) - check_arg(str, "string", 2, "split", self._level) - return to_table(self:gsplit(str, skip_empty, max_split)) -end - -function re_proto.gfind(self, str) - check_self(self) - check_arg(str, "string", 2, "gfind", self._level) - - local offset = 0 - return function() - local has_matches = regex.matches(self._regex, str) - if not has_matches then return end - - local first, last = regex.get_match(self._regex, str, 0) - local ret = str:sub(first, last) - str = str:sub(last + 1) - - last = last + offset - offset = offset + first - return ret, offset, last - end -end - -function re_proto.find(self, str) - check_self(self) - check_arg(str, "string", 2, "find", self._level) - - local i = 1 - local ret = {} - for s, f, l in self:gfind(str) do - ret[i] = { - str = s, + end + return do_split + end, + split = function(self, str, skip_empty, max_split) + self:_check_self() + check_arg(str, 'string', 2, 'split', self._level) + return (function() + local _accum_0 = { } + local _len_0 = 1 + for v in self:gsplit(str, skip_empty, max_split) do + _accum_0[_len_0] = v + _len_0 = _len_0 + 1 + end + return _accum_0 + end)() + end, + gfind = function(self, str) + self:_check_self() + check_arg(str, 'string', 2, 'gfind', self._level) + start = 1 + return function() + local first, last = regex.search(self._regex, str, start) + if not (first) then + return + end + if last >= start then + start = last + 1 + else + start = start + 1 + end + return str:sub(first, last), first, last + end + end, + find = function(self, str) + self:_check_self() + check_arg(str, 'string', 2, 'find', self._level) + local ret = (function() + local _accum_0 = { } + local _len_0 = 1 + for s, f, l in self:gfind(str) do + _accum_0[_len_0] = { + str = s, first = f, - last = l - } + last = l + } + _len_0 = _len_0 + 1 + end + return _accum_0 + end)() + return next(ret) and ret + end, + sub = function(self, str, repl, max_count) + self:_check_self() + check_arg(str, 'string', 2, 'sub', self._level) + if max_count ~= nil then + check_arg(max_count, 'number', 4, 'sub', self._level) + end + if not max_count or max_count == 0 then + max_count = str:len() + 1 + end + if type(repl) == 'function' then + return do_replace_fun(self, repl, str, max_count) + elseif type(repl) == 'string' then + return regex.replace(self._regex, repl, str, max_count) + else + return error("Argument 2 to sub should be a string or function, is '" .. tostring(type(repl)) .. "' (" .. tostring(repl) .. ")", self._level) + end + end, + gmatch = function(self, str, start) + self:_check_self() + check_arg(str, 'string', 2, 'gmatch', self._level) + if start then + start = start - 1 + else + start = 0 + end + local match = regex.match(self._regex, str, start) + local i = 1 + return function() + if not (match) then + return + end + local first, last = regex.get_match(match, i) + if not (first) then + return + end i = i + 1 - end - return ret -end - --- Replace a match with the value returned from func when passed the match -local function replace_match(match, func, str, last, acc) - if last < match.last then - acc[#acc + 1] = str:sub(last, match.first - 1) - end - - local ret = func(match.str, match.first, match.last) - if type(ret) == "string" then - acc[#acc + 1] = ret - else - -- If it didn't return a string just leave the old value - acc[#acc + 1] = match.str - end - - return match.last + 1 -end - --- Replace all matches from a single iteration of the regexp -local function do_single_replace_fun(re, func, str, acc) - local matches = re:match(str) - - -- No more matches so just return what we have so far - if not matches then - return str - end - - -- One match means no capturing groups, so pass the entire thing to - -- the replace function - if #matches == 1 then - local rest = replace_match(matches[1], func, str, 1, acc) - return str:sub(rest), true - end - - -- Multiple matches means there were capture groups, so skip the first one - -- and pass the rest to the replace function - local last = 1 - for i = 2, #matches do - last = replace_match(matches[i], func, str, last, acc) - end - - return str:sub(last), true -end - -local function do_replace_fun(re, func, str, max) - local acc = {} - local i - for i = 1, max do - str, continue = do_single_replace_fun(re, func, str, acc) - if not continue then max = i end - end - return table.concat(acc, "") .. str, max -end - -function re_proto.sub(self, str, repl, count) - check_self(self) - check_arg(str, "string", 2, "sub", self._level) - if count ~= nil then - check_arg(count, "number", 4, "sub", self._level) - end - - if not count or count == 0 then count = str:len() end - - if type(repl) == "function" then - return do_replace_fun(self, repl, str, count) - elseif type(repl) == "string" then - return regex.replace(self._regex, repl, str, count) - else - error( - string.format("Argument 2 to sub should be a string or function, is '%s' (%s)", - type(repl), tostring(repl)), - self._level) - end -end - -function re_proto.gmatch(self, str) - check_self(self) - check_arg(str, "string", 2, "gmatch", self._level) - - local match_count = regex.match_count(self._regex, str) - local i = 0 - return function() - if i == match_count then return end - i = i + 1 - local first, last = regex.get_match(self._regex, str, i - 1) return { - str = str:sub(first, last), - first = first, - last = last + str = str:sub(first + start, last + start), + first = first + start, + last = last + start } + end + end, + match = function(self, str, start) + self:_check_self() + check_arg(str, 'string', 2, 'match', self._level) + local ret = (function() + local _accum_0 = { } + local _len_0 = 1 + for v in self:gmatch(str, start) do + _accum_0[_len_0] = v + _len_0 = _len_0 + 1 + end + return _accum_0 + end)() + if next(ret) == nil then + return nil + end + return ret end -end - -function re_proto.match(self, str) - check_self(self) - check_arg(str, "string", 2, "match", self._level) - - local ret = to_table(self:gmatch(str)) - -- Return nil rather than a empty table so that if re.match(...) works - if next(ret) == nil then return end - return ret -end - --- Create a wxRegExp object from a pattern, flags, and error depth -local function real_compile(pattern, level, flags, stored_level) - local regex = regex.compile(pattern, flags) - if not regex then - error("Bad syntax in regular expression", level + 1) + } + _base_0.__index = _base_0 + if _parent_0 then + setmetatable(_base_0, _parent_0.__base) + end + local _class_0 = setmetatable({ + __init = function(self, _regex, _level) + self._regex, self._level = _regex, _level + end, + __base = _base_0, + __name = "RegEx", + __parent = _parent_0 + }, { + __index = function(cls, name) + local val = rawget(_base_0, name) + if val == nil and _parent_0 then + return _parent_0[name] + else + return val + end + end, + __call = function(cls, ...) + local _self_0 = setmetatable({}, _base_0) + cls.__init(_self_0, ...) + return _self_0 end - return setmetatable({ - _regex = regex, - _level = stored_level or level + 1 - }, - re_proto_mt) + }) + _base_0.__class = _class_0 + local self = _class_0 + start = 1 + if _parent_0 and _parent_0.__inherited then + _parent_0.__inherited(_parent_0, _class_0) + end + RegEx = _class_0 end - --- Compile a pattern then invoke a method on it -local function invoke(str, pattern, fn, flags, ...) - local comp = real_compile(pattern, 3, flags) - return comp[fn](comp, str, ...) +local real_compile +real_compile = function(pattern, level, flags, stored_level) + if pattern == '' then + error('Regular expression must not be empty', level + 1) + end + local re = regex.compile(pattern, flags) + if type(re) == 'string' then + error(regex, level + 1) + end + return RegEx(re, stored_level or level + 1) end - --- Generate a static version of a method with arg type checking -local function gen_wrapper(impl_name) - return function(str, pattern, ...) - check_arg(str, "string", 1, impl_name, 2) - check_arg(pattern, "string", 2, impl_name, 2) - return invoke(str, pattern, impl_name, unpack_args(...)) - end +local invoke +invoke = function(str, pattern, fn, flags, ...) + local compiled_regex = real_compile(pattern, 3, flags) + return compiled_regex[fn](compiled_regex, str, ...) +end +local gen_wrapper +gen_wrapper = function(impl_name) + return function(str, pattern, ...) + check_arg(str, 'string', 1, impl_name, 2) + check_arg(pattern, 'string', 2, impl_name, 2) + return invoke(str, pattern, impl_name, unpack_args(...)) + end end - --- And now at last the actual public API local re = regex.init_flags(re) - -function re.compile(pattern, ...) - check_arg(pattern, "string", 1, "compile", 2) - return real_compile(pattern, 2, regex.process_flags(...), 2) +re.compile = function(pattern, ...) + check_arg(pattern, 'string', 1, 'compile', 2) + return real_compile(pattern, 2, regex.process_flags(...), 2) end - -re.split = gen_wrapper("split") -re.gsplit = gen_wrapper("gsplit") -re.find = gen_wrapper("find") -re.gfind = gen_wrapper("gfind") -re.match = gen_wrapper("match") -re.gmatch = gen_wrapper("gmatch") -re.sub = gen_wrapper("sub") - -_G.re = re -return _G.re +re.split = gen_wrapper('split') +re.gsplit = gen_wrapper('gsplit') +re.find = gen_wrapper('find') +re.gfind = gen_wrapper('gfind') +re.match = gen_wrapper('match') +re.gmatch = gen_wrapper('gmatch') +re.sub = gen_wrapper('sub') +return re diff --git a/aegisub/automation/include/re.moon b/aegisub/automation/include/re.moon new file mode 100644 index 000000000..17a879a8c --- /dev/null +++ b/aegisub/automation/include/re.moon @@ -0,0 +1,244 @@ +-- Copyright (c) 2012, Thomas Goyne +-- +-- Permission to use, copy, modify, and distribute this software for any +-- purpose with or without fee is hereby granted, provided that the above +-- copyright notice and this permission notice appear in all copies. +-- +-- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +-- WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +-- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +-- ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +-- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +-- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +-- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +-- Get the boost::regex binding +regex = aegisub.__init_regex() + +-- Return the first n elements from ... +select_first = (n, a, ...) -> + if n == 0 then return + a, select_first n - 1, ... + +-- Extract the flags from ..., bitwise OR them together, and move them to the +-- front of ... +unpack_args = (...) -> + userdata_start = nil + for i = 1, select '#', ... + v = select i, ... + if type(v) == 'userdata' + userdata_start = i + break + + return 0, ... unless userdata_start + + flags = regex.process_flags select userdata_start, ... + if type(flags) == 'string' + error(flags, 3) + + flags, select_first userdata_start - 1, ... + + +-- Typecheck a variable and throw an error if it fails +check_arg = (arg, expected_type, argn, func_name, level) -> + if type(arg) != expected_type + error "Argument #{argn} to #{func_name} should be a '#{expected_type}', is '#{type(arg)}' (#{arg})", + level + 1 + +-- Replace a match with the value returned from func when passed the match +replace_match = (match, func, str, last, acc) -> + -- Copy everything between the last match and this match + if last < match.last + acc[#acc + 1] = str\sub last, match.first - 1 + + repl = func match.str, match.first, match.last + + -- If it didn't return a string just leave the old value + acc[#acc + 1] = if type(repl) == 'string' then repl else match.str + + match.first, match.last + 1 + +-- Replace all matches from a single iteration of the regexp +do_single_replace_fun = (re, func, str, acc, pos) -> + matches = re\match str, pos + + -- No more matches so just return what's left of the input + return pos unless matches + + -- If there's only one match then there's no capturing groups and we need + -- to pass the entire match to the replace function, but if there's + -- multiple then we want to skip the full match and only pass the capturing + -- groups. + start = if #matches == 1 then 1 else 2 + last = pos + local first + for i = start, #matches + first, last = replace_match matches[i], func, str, last, acc + + -- Always eat at least one character from the input or we'll just make the + -- same match max_count times + if first == last + acc[#acc + 1] = str\sub last, last + last += 1 + + return last, matches[1].first <= str\len() + +do_replace_fun = (re, func, str, max) -> + acc = {} + pos = 1 + local i + for i = 1, max do + pos, more = do_single_replace_fun re, func, str, acc, pos + unless more + max = i + break + table.concat(acc, '') .. str\sub pos + +-- Compiled regular expression type protoype +class RegEx + -- Verify that a valid value was passed for self + _check_self: => + unless @__class == RegEx + error 're method called with invalid self. You probably used . when : is needed.', 3 + + new: (@_regex, @_level) => + + start = 1 + gsplit: (str, skip_empty, max_split) => + @_check_self! + check_arg str, 'string', 2, 'gsplit', @_level + if not max_split or max_split <= 0 then max_split = str\len() + + start = 1 + prev = 1 + do_split = () -> + if not str or str\len() == 0 then return + + local first, last + if max_split > 0 + first, last = regex.search @_regex, str, start + + if not first or first > str\len() + ret = str\sub prev, str\len() + str = nil + return ret + + ret = str\sub prev, first - 1 + prev = last + 1 + + start = 1 + if start >= last then start else last + + if skip_empty and ret\len() == 0 + do_split() + else + max_split -= 1 + ret + + do_split + + split: (str, skip_empty, max_split) => + @_check_self! + check_arg str, 'string', 2, 'split', @_level + [v for v in @gsplit str, skip_empty, max_split] + + gfind: (str) => + @_check_self! + check_arg str, 'string', 2, 'gfind', @_level + + start = 1 + -> + first, last = regex.search(@_regex, str, start) + return unless first + + start = if last >= start then last + 1 else start + 1 + str\sub(first, last), first, last + + find: (str) => + @_check_self! + check_arg str, 'string', 2, 'find', @_level + + ret = [str: s, first: f, last: l for s, f, l in @gfind(str)] + next(ret) and ret + + sub: (str, repl, max_count) => + @_check_self! + check_arg str, 'string', 2, 'sub', @_level + if max_count != nil + check_arg max_count, 'number', 4, 'sub', @_level + + max_count = str\len() + 1 if not max_count or max_count == 0 + + if type(repl) == 'function' + do_replace_fun @, repl, str, max_count + elseif type(repl) == 'string' + regex.replace @_regex, repl, str, max_count + else + error "Argument 2 to sub should be a string or function, is '#{type(repl)}' (#{repl})", @_level + + gmatch: (str, start) => + @_check_self! + check_arg str, 'string', 2, 'gmatch', @_level + start = if start then start - 1 else 0 + + match = regex.match @_regex, str, start + i = 1 + -> + return unless match + first, last = regex.get_match match, i + return unless first + i += 1 + + { + str: str\sub first + start, last + start + first: first + start + last: last + start + } + + match: (str, start) => + @_check_self! + check_arg(str, 'string', 2, 'match', @_level) + + ret = [v for v in @gmatch str, start] + -- Return nil rather than a empty table so that if re.match(...) works + return nil if next(ret) == nil + ret + +-- Create a regex object from a pattern, flags, and error depth +real_compile = (pattern, level, flags, stored_level) -> + if pattern == '' + error 'Regular expression must not be empty', level + 1 + + re = regex.compile pattern, flags + if type(re) == 'string' + error regex, level + 1 + + RegEx re, stored_level or level + 1 + +-- Compile a pattern then invoke a method on it +invoke = (str, pattern, fn, flags, ...) -> + compiled_regex = real_compile(pattern, 3, flags) + compiled_regex[fn](compiled_regex, str, ...) + +-- Generate a static version of a method with arg type checking +gen_wrapper = (impl_name) -> + (str, pattern, ...) -> + check_arg str, 'string', 1, impl_name, 2 + check_arg pattern, 'string', 2, impl_name, 2 + invoke str, pattern, impl_name, unpack_args ... + +-- And now at last the actual public API +re = regex.init_flags(re) + +re.compile = (pattern, ...) -> + check_arg pattern, 'string', 1, 'compile', 2 + real_compile pattern, 2, regex.process_flags(...), 2 + +re.split = gen_wrapper 'split' +re.gsplit = gen_wrapper 'gsplit' +re.find = gen_wrapper 'find' +re.gfind = gen_wrapper 'gfind' +re.match = gen_wrapper 'match' +re.gmatch = gen_wrapper 'gmatch' +re.sub = gen_wrapper 'sub' + +re diff --git a/aegisub/build/Aegisub/Aegisub.vcxproj b/aegisub/build/Aegisub/Aegisub.vcxproj index 69ec6704e..8cf1ebd6c 100644 --- a/aegisub/build/Aegisub/Aegisub.vcxproj +++ b/aegisub/build/Aegisub/Aegisub.vcxproj @@ -327,6 +327,7 @@ + diff --git a/aegisub/build/Aegisub/Aegisub.vcxproj.filters b/aegisub/build/Aegisub/Aegisub.vcxproj.filters index e7b2dd773..24222fdcd 100644 --- a/aegisub/build/Aegisub/Aegisub.vcxproj.filters +++ b/aegisub/build/Aegisub/Aegisub.vcxproj.filters @@ -1238,6 +1238,9 @@ ASS + + Automation\Lua + diff --git a/aegisub/src/Makefile b/aegisub/src/Makefile index 8227f95fe..7989ae0b2 100644 --- a/aegisub/src/Makefile +++ b/aegisub/src/Makefile @@ -92,11 +92,11 @@ endif # AUTOMATION ############ ifeq (yes, $(HAVE_AUTO4_LUA)) -auto4_lua.o auto4_lua_assfile.o auto4_lua_dialog.o auto4_lua_progresssink.o auto4_lua_scriptreader.o: CXXFLAGS += $(CFLAGS_LUA) +auto4_lua.o auto4_lua_assfile.o auto4_lua_dialog.o auto4_lua_progresssink.o auto4_lua_regex.o auto4_lua_scriptreader.o: CXXFLAGS += $(CFLAGS_LUA) LIBS += $(LIBS_LUA) -SRC += auto4_lua.cpp auto4_lua_assfile.cpp auto4_lua_dialog.cpp auto4_lua_progresssink.cpp auto4_lua_scriptreader.cpp +SRC += auto4_lua.cpp auto4_lua_assfile.cpp auto4_lua_dialog.cpp auto4_lua_progresssink.cpp auto4_regex.cpp auto4_lua_scriptreader.cpp else -EXTRA_DIST += auto4_lua.cpp auto4_lua_assfile.cpp auto4_lua_dialog.cpp auto4_lua_progresssink.cpp auto4_lua_scriptreader.cpp +EXTRA_DIST += auto4_lua.cpp auto4_lua_assfile.cpp auto4_lua_dialog.cpp auto4_lua_progresssink.cpp auto4_regex.cpp auto4_lua_scriptreader.cpp endif ############## diff --git a/aegisub/src/auto4_lua.cpp b/aegisub/src/auto4_lua.cpp index 60633aa3f..7aa7459cb 100644 --- a/aegisub/src/auto4_lua.cpp +++ b/aegisub/src/auto4_lua.cpp @@ -71,7 +71,6 @@ #include #include #include -#include #include namespace { @@ -111,125 +110,6 @@ namespace { return 1; } - inline wxRegEx *get_regex(lua_State *L) - { - return static_cast(luaL_checkudata(L, 1, "aegisub.regex")); - } - - int regex_matches(lua_State *L) - { - lua_pushboolean(L, get_regex(L)->Matches(check_wxstring(L, 2))); - return 1; - } - - int regex_match_count(lua_State *L) - { - wxRegEx *re = get_regex(L); - if (re->Matches(check_wxstring(L, 2))) - push_value(L, re->GetMatchCount()); - else - push_value(L, 0); - return 1; - } - - size_t utf8_len(wxString const& w) - { -#if wxUSE_UNICODE_UTF8 - return w.utf8_length(); -#else - return w.utf8_str().length(); -#endif - } - - int regex_get_match(lua_State *L) - { - wxString str(check_wxstring(L, 2)); - size_t start, len; - get_regex(L)->GetMatch(&start, &len, luaL_checkinteger(L, 3)); - push_value(L, utf8_len(str.Left(start)) + 1); - push_value(L, utf8_len(str.Left(start + len))); - return 2; - } - - int regex_replace(lua_State *L) - { - wxString str(check_wxstring(L, 3)); - int reps = get_regex(L)->Replace(&str, check_wxstring(L, 2), luaL_checkinteger(L, 4)); - push_value(L, str); - push_value(L, reps); - return 2; - } - - int regex_compile(lua_State *L) - { - wxString pattern(check_wxstring(L, 1)); - int flags = luaL_checkinteger(L, 2); - wxRegEx *re = static_cast(lua_newuserdata(L, sizeof(wxRegEx))); - new(re) wxRegEx(pattern, wxRE_ADVANCED | flags); - - luaL_getmetatable(L, "aegisub.regex"); - lua_setmetatable(L, -2); - - // return nil and handle the error in lua as it's a bit easier to - // report the actual call site from there - if (!re->IsValid()) { - lua_pop(L, 1); - lua_pushnil(L); - } - - return 1; - } - - int regex_gc(lua_State *L) { - get_regex(L)->~wxRegEx(); - return 0; - } - - int regex_process_flags(lua_State *L) - { - int ret = 0; - int nargs = lua_gettop(L); - for (int i = 1; i <= nargs; ++i) { - if (!lua_islightuserdata(L, i)) { - push_value(L, "Flags must follow all non-flag arguments"); - return 1; - } - ret |= (int)(intptr_t)lua_touserdata(L, i); - } - - push_value(L, ret); - return 1; - } - - int regex_init_flags(lua_State *L) - { - lua_newtable(L); - - set_field(L, "ICASE", (void*)wxRE_ICASE); - set_field(L, "NOSUB", (void*)wxRE_NOSUB); - set_field(L, "NEWLINE", (void*)wxRE_NEWLINE); - - return 1; - } - - int regex_init(lua_State *L) - { - if (luaL_newmetatable(L, "aegisub.regex")) { - set_field(L, "__gc", regex_gc); - lua_pop(L, 1); - } - - lua_newtable(L); - set_field(L, "matches", regex_matches); - set_field(L, "match_count", regex_match_count); - set_field(L, "get_match", regex_get_match); - set_field(L, "replace", regex_replace); - set_field(L, "compile", regex_compile); - set_field(L, "process_flags", regex_process_flags); - set_field(L, "init_flags", regex_init_flags); - return 1; - } - int clipboard_get(lua_State *L) { std::string data = GetClipboard(); @@ -274,6 +154,8 @@ namespace { } namespace Automation4 { + int regex_init(lua_State *L); + // LuaScript LuaScript::LuaScript(agi::fs::path const& filename) : Script(filename) diff --git a/aegisub/src/auto4_lua_utils.h b/aegisub/src/auto4_lua_utils.h index 607d3d32b..d64f37263 100644 --- a/aegisub/src/auto4_lua_utils.h +++ b/aegisub/src/auto4_lua_utils.h @@ -33,6 +33,7 @@ inline void push_value(lua_State *L, const char *value) { lua_pushstring(L, valu inline void push_value(lua_State *L, double value) { lua_pushnumber(L, value); } inline void push_value(lua_State *L, int value) { lua_pushinteger(L, value); } inline void push_value(lua_State *L, size_t value) { lua_pushinteger(L, value); } +inline void push_value(lua_State *L, long value) { lua_pushinteger(L, value); } inline void push_value(lua_State *L, void *p) { lua_pushlightuserdata(L, p); } inline void push_value(lua_State *L, wxString const& value) { diff --git a/aegisub/src/auto4_regex.cpp b/aegisub/src/auto4_regex.cpp new file mode 100644 index 000000000..4638f464d --- /dev/null +++ b/aegisub/src/auto4_regex.cpp @@ -0,0 +1,208 @@ +// Copyright (c) 2013, Thomas Goyne +// +// Permission to use, copy, modify, and distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +// +// Aegisub Project http://www.aegisub.org/ + +#include "config.h" + +#ifdef WITH_AUTO4_LUA +#include "auto4_lua_utils.h" + +#include + +namespace { +boost::u32regex& get_regex(lua_State *L) { + return *static_cast(luaL_checkudata(L, 1, "aegisub.regex")); +} + +boost::smatch& get_smatch(lua_State *L) { + return *static_cast(luaL_checkudata(L, 1, "aegisub.smatch")); +} + +int regex_matches(lua_State *L) { + lua_pushboolean(L, u32regex_match(luaL_checkstring(L, 2), get_regex(L))); + return 1; +} + +int regex_match(lua_State *L) { + auto re = get_regex(L); + std::string str = luaL_checkstring(L, 2); + int start = lua_tointeger(L, 3); + + auto result = static_cast(lua_newuserdata(L, sizeof(boost::smatch))); + new(result) boost::smatch; + luaL_getmetatable(L, "aegisub.smatch"); + lua_setmetatable(L, -2); + + if (!u32regex_search(str.cbegin() + start, str.cend(), *result, re, + start > 0 ? boost::match_prev_avail | boost::match_not_bob : boost::match_default)) + { + lua_pop(L, 1); + lua_pushnil(L); + } + + return 1; +} + +int regex_get_match(lua_State *L) { + auto match = get_smatch(L); + int idx = luaL_checkinteger(L, 2) - 1; + if (static_cast(idx) > match.size() || !match[idx].matched) { + lua_pushnil(L); + return 1; + } + + push_value(L, distance(match.prefix().first, match[idx].first + 1)); + push_value(L, distance(match.prefix().first, match[idx].second)); + return 2; +} + +int regex_search(lua_State *L) { + auto re = get_regex(L); + std::string str = luaL_checkstring(L, 2); + int start = luaL_checkinteger(L, 3) - 1; + boost::smatch result; + if (!u32regex_search(str.cbegin() + start, str.cend(), result, re, + start > 0 ? boost::match_prev_avail | boost::match_not_bob : boost::match_default)) + { + lua_pushnil(L); + return 1; + } + + push_value(L, start + result.position() + 1); + push_value(L, start + result.position() + result.length()); + return 2; +} + +int regex_replace(lua_State *L) { + auto re = get_regex(L); + const auto replacement = luaL_checkstring(L, 2); + const std::string str = luaL_checkstring(L, 3); + int max_count = luaL_checkinteger(L, 4); + + // Can't just use regex_replace here since it can only do one or infinite replacements + auto match = boost::u32regex_iterator(begin(str), end(str), re); + auto end_it = boost::u32regex_iterator(); + + auto suffix = begin(str); + + std::string ret; + auto out = back_inserter(ret); + while (match != end_it && max_count > 0) { + copy(suffix, match->prefix().second, out); + match->format(out, replacement); + suffix = match->suffix().first; + ++match; + --max_count; + } + + copy(suffix, end(str), out); + + push_value(L, ret); + return 1; +} + +int regex_compile(lua_State *L) { + std::string pattern(luaL_checkstring(L, 1)); + int flags = luaL_checkinteger(L, 2); + boost::u32regex *re = static_cast(lua_newuserdata(L, sizeof(boost::u32regex))); + + try { + new(re) boost::u32regex; + *re = boost::make_u32regex(pattern, boost::u32regex::perl | flags); + } + catch (std::exception const& e) { + lua_pop(L, 1); + push_value(L, e.what()); + return 1; + // Do the actual triggering of the error in the Lua code as that code + // can report the original call site + } + + luaL_getmetatable(L, "aegisub.regex"); + lua_setmetatable(L, -2); + + return 1; +} + +int regex_gc(lua_State *L) { + using boost::u32regex; + get_regex(L).~u32regex(); + return 0; +} + +int smatch_gc(lua_State *L) { + using boost::smatch; + get_smatch(L).~smatch(); + return 0; +} + +int regex_process_flags(lua_State *L) { + int ret = 0; + int nargs = lua_gettop(L); + for (int i = 1; i <= nargs; ++i) { + if (!lua_islightuserdata(L, i)) { + push_value(L, "Flags must follow all non-flag arguments"); + return 1; + } + ret |= (int)(intptr_t)lua_touserdata(L, i); + } + + push_value(L, ret); + return 1; +} + +int regex_init_flags(lua_State *L) { + lua_newtable(L); + + set_field(L, "ICASE", (void*)boost::u32regex::icase); + set_field(L, "NOSUB", (void*)boost::u32regex::nosubs); + set_field(L, "COLLATE", (void*)boost::u32regex::collate); + set_field(L, "NEWLINE_ALT", (void*)boost::u32regex::newline_alt); + set_field(L, "NO_MOD_M", (void*)boost::u32regex::no_mod_m); + set_field(L, "NO_MOD_S", (void*)boost::u32regex::no_mod_s); + set_field(L, "MOD_S", (void*)boost::u32regex::mod_s); + set_field(L, "MOD_X", (void*)boost::u32regex::mod_x); + set_field(L, "NO_EMPTY_SUBEXPRESSIONS", (void*)boost::u32regex::no_empty_expressions); + + return 1; +} + +} + +namespace Automation4 { +int regex_init(lua_State *L) { + if (luaL_newmetatable(L, "aegisub.regex")) { + set_field(L, "__gc", regex_gc); + lua_pop(L, 1); + } + + if (luaL_newmetatable(L, "aegisub.smatch")) { + set_field(L, "__gc", smatch_gc); + lua_pop(L, 1); + } + + lua_newtable(L); + set_field(L, "matches", regex_matches); + set_field(L, "search", regex_search); + set_field(L, "match", regex_match); + set_field(L, "get_match", regex_get_match); + set_field(L, "replace", regex_replace); + set_field(L, "compile", regex_compile); + set_field(L, "process_flags", regex_process_flags); + set_field(L, "init_flags", regex_init_flags); + return 1; +} +} +#endif