Aegisub/libaegisub/lua/modules/re.cpp

142 lines
4.3 KiB
C++

// Copyright (c) 2014, Thomas Goyne <plorkyeran@aegisub.org>
//
// Permission to use, copy, modify, and distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
//
// Aegisub Project http://www.aegisub.org/
#include "libaegisub/lua/ffi.h"
#include "libaegisub/make_unique.h"
#include <boost/regex/icu.hpp>
using boost::u32regex;
namespace {
// A cmatch with a match range attached to it so that we can return a pointer to
// an int pair without an extra heap allocation each time (LuaJIT can't compile
// ffi calls which return aggregates by value)
struct agi_re_match {
boost::cmatch m;
int range[2];
};
struct agi_re_flag {
const char *name;
int value;
};
}
namespace agi {
AGI_DEFINE_TYPE_NAME(u32regex);
AGI_DEFINE_TYPE_NAME(agi_re_match);
AGI_DEFINE_TYPE_NAME(agi_re_flag);
}
namespace {
using match = agi_re_match;
bool search(u32regex& re, const char *str, size_t len, int start, boost::cmatch& result) {
return u32regex_search(str + start, str + len, result, re,
start > 0 ? boost::match_prev_avail | boost::match_not_bob : boost::match_default);
}
match *regex_match(u32regex& re, const char *str, size_t len, int start) {
auto result = agi::make_unique<match>();
if (!search(re, str, len, start, result->m))
return nullptr;
return result.release();
}
int *regex_get_match(match& match, size_t idx) {
if (idx > match.m.size() || !match.m[idx].matched)
return nullptr;
match.range[0] = std::distance(match.m.prefix().first, match.m[idx].first + 1);
match.range[1] = std::distance(match.m.prefix().first, match.m[idx].second);
return match.range;
}
int *regex_search(u32regex& re, const char *str, size_t len, size_t start) {
boost::cmatch result;
if (!search(re, str, len, start, result))
return nullptr;
auto ret = static_cast<int *>(malloc(sizeof(int) * 2));
ret[0] = start + result.position() + 1;
ret[1] = start + result.position() + result.length();
return ret;
}
char *regex_replace(u32regex& re, const char *replacement, const char *str, size_t len, int max_count) {
// Can't just use regex_replace here since it can only do one or infinite replacements
auto match = boost::u32regex_iterator<const char *>(str, str + len, re);
auto end_it = boost::u32regex_iterator<const char *>();
auto suffix = str;
std::string ret;
auto out = back_inserter(ret);
while (match != end_it && max_count > 0) {
copy(suffix, match->prefix().second, out);
match->format(out, replacement);
suffix = match->suffix().first;
++match;
--max_count;
}
ret += suffix;
return agi::lua::strndup(ret);
}
u32regex *regex_compile(const char *pattern, int flags, char **err) {
auto re = agi::make_unique<u32regex>();
try {
*re = boost::make_u32regex(pattern, boost::u32regex::perl | flags);
return re.release();
}
catch (std::exception const& e) {
*err = strdup(e.what());
return nullptr;
}
}
void regex_free(u32regex *re) { delete re; }
void match_free(match *m) { delete m; }
const agi_re_flag *get_regex_flags() {
static const agi_re_flag flags[] = {
{"ICASE", boost::u32regex::icase},
{"NOSUB", boost::u32regex::nosubs},
{"COLLATE", boost::u32regex::collate},
{"NEWLINE_ALT", boost::u32regex::newline_alt},
{"NO_MOD_M", boost::u32regex::no_mod_m},
{"NO_MOD_S", boost::u32regex::no_mod_s},
{"MOD_S", boost::u32regex::mod_s},
{"MOD_X", boost::u32regex::mod_x},
{"NO_EMPTY_SUBEXPRESSIONS", boost::u32regex::no_empty_expressions},
{nullptr, 0}
};
return flags;
}
}
extern "C" int luaopen_re_impl(lua_State *L) {
agi::lua::register_lib_table(L, {"agi_re_match", "u32regex"},
"search", regex_search,
"match", regex_match,
"get_match", regex_get_match,
"replace", regex_replace,
"compile", regex_compile,
"get_flags", get_regex_flags,
"match_free", match_free,
"regex_free", regex_free);
return 1;
}