2014-03-27 03:24:37 +01:00
|
|
|
// Copyright (c) 2014, Thomas Goyne <plorkyeran@aegisub.org>
|
2013-04-10 05:46:59 +02:00
|
|
|
//
|
|
|
|
// Permission to use, copy, modify, and distribute this software for any
|
|
|
|
// purpose with or without fee is hereby granted, provided that the above
|
|
|
|
// copyright notice and this permission notice appear in all copies.
|
|
|
|
//
|
|
|
|
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
|
|
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
|
|
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
|
|
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
|
|
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
|
|
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
|
|
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
|
|
//
|
|
|
|
// Aegisub Project http://www.aegisub.org/
|
|
|
|
|
2014-07-20 06:48:58 +02:00
|
|
|
#include "libaegisub/lua/ffi.h"
|
|
|
|
#include "libaegisub/make_unique.h"
|
2013-04-10 05:46:59 +02:00
|
|
|
|
|
|
|
#include <boost/regex/icu.hpp>
|
|
|
|
|
2014-07-20 06:48:58 +02:00
|
|
|
using boost::u32regex;
|
2013-04-10 05:46:59 +02:00
|
|
|
namespace {
|
2014-07-20 06:48:58 +02:00
|
|
|
// A cmatch with a match range attached to it so that we can return a pointer to
|
|
|
|
// an int pair without an extra heap allocation each time (LuaJIT can't compile
|
|
|
|
// ffi calls which return aggregates by value)
|
|
|
|
struct agi_re_match {
|
|
|
|
boost::cmatch m;
|
|
|
|
int range[2];
|
|
|
|
};
|
|
|
|
|
|
|
|
struct agi_re_flag {
|
|
|
|
const char *name;
|
|
|
|
int value;
|
|
|
|
};
|
2013-04-10 05:46:59 +02:00
|
|
|
}
|
|
|
|
|
2014-07-20 06:48:58 +02:00
|
|
|
namespace agi {
|
|
|
|
AGI_DEFINE_TYPE_NAME(u32regex);
|
|
|
|
AGI_DEFINE_TYPE_NAME(agi_re_match);
|
|
|
|
AGI_DEFINE_TYPE_NAME(agi_re_flag);
|
2013-04-10 05:46:59 +02:00
|
|
|
}
|
|
|
|
|
2014-07-20 06:48:58 +02:00
|
|
|
namespace {
|
|
|
|
using match = agi_re_match;
|
|
|
|
bool search(u32regex& re, const char *str, size_t len, int start, boost::cmatch& result) {
|
|
|
|
return u32regex_search(str + start, str + len, result, re,
|
|
|
|
start > 0 ? boost::match_prev_avail | boost::match_not_bob : boost::match_default);
|
2013-04-10 05:46:59 +02:00
|
|
|
}
|
|
|
|
|
2014-07-20 06:48:58 +02:00
|
|
|
match *regex_match(u32regex& re, const char *str, size_t len, int start) {
|
|
|
|
auto result = agi::make_unique<match>();
|
|
|
|
if (!search(re, str, len, start, result->m))
|
|
|
|
return nullptr;
|
|
|
|
return result.release();
|
2013-04-10 05:46:59 +02:00
|
|
|
}
|
|
|
|
|
2014-07-20 06:48:58 +02:00
|
|
|
int *regex_get_match(match& match, size_t idx) {
|
|
|
|
if (idx > match.m.size() || !match.m[idx].matched)
|
|
|
|
return nullptr;
|
|
|
|
match.range[0] = std::distance(match.m.prefix().first, match.m[idx].first + 1);
|
|
|
|
match.range[1] = std::distance(match.m.prefix().first, match.m[idx].second);
|
|
|
|
return match.range;
|
2013-04-10 05:46:59 +02:00
|
|
|
}
|
|
|
|
|
2014-07-20 06:48:58 +02:00
|
|
|
int *regex_search(u32regex& re, const char *str, size_t len, size_t start) {
|
|
|
|
boost::cmatch result;
|
|
|
|
if (!search(re, str, len, start, result))
|
|
|
|
return nullptr;
|
2013-04-10 05:46:59 +02:00
|
|
|
|
2014-07-20 06:48:58 +02:00
|
|
|
auto ret = static_cast<int *>(malloc(sizeof(int) * 2));
|
|
|
|
ret[0] = start + result.position() + 1;
|
|
|
|
ret[1] = start + result.position() + result.length();
|
|
|
|
return ret;
|
2013-04-10 05:46:59 +02:00
|
|
|
}
|
|
|
|
|
2014-07-20 06:48:58 +02:00
|
|
|
char *regex_replace(u32regex& re, const char *replacement, const char *str, size_t len, int max_count) {
|
2013-04-10 05:46:59 +02:00
|
|
|
// Can't just use regex_replace here since it can only do one or infinite replacements
|
2014-07-20 06:48:58 +02:00
|
|
|
auto match = boost::u32regex_iterator<const char *>(str, str + len, re);
|
|
|
|
auto end_it = boost::u32regex_iterator<const char *>();
|
2013-04-10 05:46:59 +02:00
|
|
|
|
2014-07-20 06:48:58 +02:00
|
|
|
auto suffix = str;
|
2013-04-10 05:46:59 +02:00
|
|
|
|
|
|
|
std::string ret;
|
|
|
|
auto out = back_inserter(ret);
|
|
|
|
while (match != end_it && max_count > 0) {
|
|
|
|
copy(suffix, match->prefix().second, out);
|
|
|
|
match->format(out, replacement);
|
|
|
|
suffix = match->suffix().first;
|
|
|
|
++match;
|
|
|
|
--max_count;
|
|
|
|
}
|
|
|
|
|
2014-07-20 06:48:58 +02:00
|
|
|
ret += suffix;
|
2014-12-28 05:03:39 +01:00
|
|
|
return agi::lua::strndup(ret);
|
2013-04-10 05:46:59 +02:00
|
|
|
}
|
|
|
|
|
2014-07-20 06:48:58 +02:00
|
|
|
u32regex *regex_compile(const char *pattern, int flags, char **err) {
|
|
|
|
auto re = agi::make_unique<u32regex>();
|
2013-04-10 05:46:59 +02:00
|
|
|
try {
|
|
|
|
*re = boost::make_u32regex(pattern, boost::u32regex::perl | flags);
|
2014-07-20 06:48:58 +02:00
|
|
|
return re.release();
|
2013-04-10 05:46:59 +02:00
|
|
|
}
|
|
|
|
catch (std::exception const& e) {
|
2014-07-20 06:48:58 +02:00
|
|
|
*err = strdup(e.what());
|
|
|
|
return nullptr;
|
2013-04-10 05:46:59 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-07-20 06:48:58 +02:00
|
|
|
void regex_free(u32regex *re) { delete re; }
|
|
|
|
void match_free(match *m) { delete m; }
|
|
|
|
|
|
|
|
const agi_re_flag *get_regex_flags() {
|
|
|
|
static const agi_re_flag flags[] = {
|
|
|
|
{"ICASE", boost::u32regex::icase},
|
|
|
|
{"NOSUB", boost::u32regex::nosubs},
|
|
|
|
{"COLLATE", boost::u32regex::collate},
|
|
|
|
{"NEWLINE_ALT", boost::u32regex::newline_alt},
|
|
|
|
{"NO_MOD_M", boost::u32regex::no_mod_m},
|
|
|
|
{"NO_MOD_S", boost::u32regex::no_mod_s},
|
|
|
|
{"MOD_S", boost::u32regex::mod_s},
|
|
|
|
{"MOD_X", boost::u32regex::mod_x},
|
|
|
|
{"NO_EMPTY_SUBEXPRESSIONS", boost::u32regex::no_empty_expressions},
|
|
|
|
{nullptr, 0}
|
|
|
|
};
|
|
|
|
return flags;
|
2013-04-10 05:46:59 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-03-27 03:24:37 +01:00
|
|
|
extern "C" int luaopen_re_impl(lua_State *L) {
|
2014-07-20 06:48:58 +02:00
|
|
|
agi::lua::register_lib_table(L, {"agi_re_match", "u32regex"},
|
|
|
|
"search", regex_search,
|
|
|
|
"match", regex_match,
|
|
|
|
"get_match", regex_get_match,
|
|
|
|
"replace", regex_replace,
|
|
|
|
"compile", regex_compile,
|
|
|
|
"get_flags", get_regex_flags,
|
|
|
|
"match_free", match_free,
|
|
|
|
"regex_free", regex_free);
|
2013-04-10 05:46:59 +02:00
|
|
|
return 1;
|
|
|
|
}
|