diff --git a/automation/include/aegisub/unicode.moon b/automation/include/aegisub/unicode.moon index 672ff7a2a..0b5053e6a 100644 --- a/automation/include/aegisub/unicode.moon +++ b/automation/include/aegisub/unicode.moon @@ -29,6 +29,26 @@ -- http://www.ietf.org/rfc/rfc2279.txt impl = require 'aegisub.__unicode_impl' +ffi = require 'ffi' +ffi.cdef[[ + void free(void *ptr); +]] + +transfer_string = (cdata) -> + return nil if cdata == nil + str = ffi.string cdata + ffi.C.free cdata + str + +conv_func = (f) -> + err = ffi.new 'char *[1]' + (str) -> + err[0] = nil + result = f str, err + errmsg = transfer_string err[0] + if errmsg + error errmsg, 2 + transfer_string result local unicode unicode = @@ -86,8 +106,8 @@ unicode = res = res*64 + s\byte(i) - 128 res - to_upper_case: impl.to_upper_case - to_lower_case: impl.to_lower_case - to_fold_case: impl.to_fold_case + to_upper_case: conv_func impl.to_upper_case + to_lower_case: conv_func impl.to_lower_case + to_fold_case: conv_func impl.to_fold_case return unicode diff --git a/automation/tests/aegisub.cpp b/automation/tests/aegisub.cpp index ea667cd94..61330b4a1 100644 --- a/automation/tests/aegisub.cpp +++ b/automation/tests/aegisub.cpp @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -47,6 +48,7 @@ int main(int argc, char **argv) { return 1; } + std::locale::global(boost::locale::generator().generate("")); agi::dispatch::Init([](agi::dispatch::Thunk f) { }); agi::log::log = new agi::log::LogSink; diff --git a/automation/tests/modules/unicode.moon b/automation/tests/modules/unicode.moon index 0db945e1e..d29bed8f2 100644 --- a/automation/tests/modules/unicode.moon +++ b/automation/tests/modules/unicode.moon @@ -45,3 +45,45 @@ describe 'codepoint', -> assert.is.equal 0x1F113, unicode.codepoint '🄓' it 'should give ignore codepoints after the first', -> assert.is.equal 97, unicode.codepoint 'abc' + +describe 'to_upper_case', -> + it 'should support plain ASCII', -> + assert.is.equal 'ABC', unicode.to_upper_case 'abc' + it 'should support accents', -> + assert.is.equal 'ÀÈÌ', unicode.to_upper_case 'àèì' + it 'should support fullwidth letters', -> + assert.is.equal 'ABC', unicode.to_upper_case 'abc' + it 'should support greek', -> + assert.is.equal 'ΑΒΓ', unicode.to_upper_case 'αβγ' + it 'should support sharp-s', -> + assert.is.equal 'SS', unicode.to_upper_case 'ß' + it 'should support ligatures', -> + assert.is.equal 'FFI', unicode.to_upper_case 'ffi' + +describe 'to_lower_case', -> + it 'should support plain ASCII', -> + assert.is.equal 'abc', unicode.to_lower_case 'ABC' + it 'should support accents', -> + assert.is.equal 'àèì', unicode.to_lower_case 'ÀÈÌ' + it 'should support fullwidth letters', -> + assert.is.equal 'abc', unicode.to_lower_case 'ABC' + it 'should support greek', -> + assert.is.equal 'αβγ', unicode.to_lower_case 'ΑΒΓ' + it 'should support sharp-s', -> + assert.is.equal 'ß', unicode.to_lower_case 'ẞ' + -- note: Unicode doesn't have any uppercase precomposed ligatures + +describe 'to_fold_case', -> + it 'should support plain ASCII', -> + assert.is.equal 'abc', unicode.to_fold_case 'ABC' + it 'should support accents', -> + assert.is.equal 'àèì', unicode.to_fold_case 'ÀÈÌ' + it 'should support fullwidth letters', -> + assert.is.equal 'abc', unicode.to_fold_case 'ABC' + it 'should support greek', -> + assert.is.equal 'αβγ', unicode.to_fold_case 'ΑΒΓ' + it 'should support sharp-s', -> + assert.is.equal 'ss', unicode.to_fold_case 'ẞ' + it 'should support ligatures', -> + assert.is.equal 'ffi', unicode.to_fold_case 'ffi' + diff --git a/libaegisub/lua/modules/unicode.cpp b/libaegisub/lua/modules/unicode.cpp index 7c8deb3a9..a11487449 100644 --- a/libaegisub/lua/modules/unicode.cpp +++ b/libaegisub/lua/modules/unicode.cpp @@ -14,33 +14,45 @@ // // Aegisub Project http://www.aegisub.org/ -#include "libaegisub/lua/utils.h" +#include #include +#include namespace { -using namespace agi::lua; - -int unicode_upper(lua_State *L) { - push_value(L, boost::locale::to_upper(check_string(L, 1))); - return 1; +template +void push_ffi_function(lua_State *L, const char *name, T *func) { + lua_pushvalue(L, -2); // push cast function + lua_pushstring(L, agi::type_name::name().c_str()); + // This cast isn't legal, but LuaJIT internally requires that it work + lua_pushlightuserdata(L, (void *)func); + lua_call(L, 2, 1); + lua_setfield(L, -2, name); } -int unicode_lower(lua_State *L) { - push_value(L, boost::locale::to_lower(check_string(L, 1))); - return 1; -} - -int unicode_fold(lua_State *L) { - push_value(L, boost::locale::fold_case(check_string(L, 1))); - return 1; +template +char *wrap(const char *str, char **err) { + try { + return strdup(func(str, std::locale()).c_str()); + } catch (std::exception const& e) { + *err = strdup(e.what()); + return nullptr; + } } } extern "C" int luaopen_unicode_impl(lua_State *L) { + lua_getglobal(L, "require"); + lua_pushstring(L, "ffi"); + lua_call(L, 1, 1); + lua_getfield(L, -1, "cast"); + lua_remove(L, -2); // ffi table + lua_createtable(L, 0, 3); - set_field(L, "to_upper_case"); - set_field(L, "to_lower_case"); - set_field(L, "to_fold_case"); + push_ffi_function(L, "to_upper_case", wrap>); + push_ffi_function(L, "to_lower_case", wrap>); + push_ffi_function(L, "to_fold_case", wrap>); + + lua_remove(L, -2); // ffi.cast function return 1; }