diff --git a/libaegisub/common/charset_conv.cpp b/libaegisub/common/charset_conv.cpp index e541f3328..c6f7becfc 100644 --- a/libaegisub/common/charset_conv.cpp +++ b/libaegisub/common/charset_conv.cpp @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include @@ -44,28 +44,33 @@ static const size_t iconv_failed = (size_t)-1; namespace { using namespace agi::charset; - struct ltstr { - bool operator()(const char* s1, const char* s2) const { - return strcmp(s1, s2) < 0; - } - }; Converter *get_converter(bool subst, const char *src, const char *dst); /// @brief Map a user-friendly encoding name to the real encoding name - const char *get_real_encoding_name(const char* name) { - static std::map pretty_names; - - if (pretty_names.empty()) { -# define ADD(pretty, real) pretty_names[pretty] = real + const char *get_real_encoding_name(const char *name) { + struct pair { const char *pretty; const char *real; }; + static pair pretty_names[] = { +# define ADD(pretty, real) pair{pretty, real}, # include # undef ADD - } + }; - auto real = pretty_names.find(name); - if (real != pretty_names.end()) - return real->second; - return name; + static bool init = false; + if (!init) { + init = true; + boost::sort(pretty_names, [](pair a, pair b) { + return strcmp(a.pretty, b.pretty) < 0; + }); + } + + auto enc = boost::lower_bound(pretty_names, name, [](pair a, const char *b) { + return strcmp(a.pretty, b) < 0; + }); + + if (enc != std::end(pretty_names) && strcmp(enc->pretty, name) == 0) + return enc->real; + return name; } size_t get_bom_size(Iconv& cd) { diff --git a/libaegisub/include/libaegisub/charset_conv.h b/libaegisub/include/libaegisub/charset_conv.h index e9c60c21d..c283c36ca 100644 --- a/libaegisub/include/libaegisub/charset_conv.h +++ b/libaegisub/include/libaegisub/charset_conv.h @@ -120,7 +120,7 @@ template T const& GetEncodingsList() { static T name_list; if (name_list.empty()) { -# define ADD(pretty, real) if (IsConversionSupported(real, "utf-8")) name_list.push_back(pretty) +# define ADD(pretty, real) if (IsConversionSupported(real, "utf-8")) name_list.push_back(pretty); # include # undef ADD } diff --git a/libaegisub/include/libaegisub/charsets.def b/libaegisub/include/libaegisub/charsets.def index 1228c3153..72edba3ab 100644 --- a/libaegisub/include/libaegisub/charsets.def +++ b/libaegisub/include/libaegisub/charsets.def @@ -1,78 +1,78 @@ -ADD("Local", ""); +ADD("Local", "") -ADD("Unicode (UTF-8)", "utf-8"); -ADD("Unicode (UTF-16)", "utf-16"); -ADD("Unicode (UTF-16BE)", "utf-16be"); -ADD("Unicode (UTF-16LE)", "utf-16le"); -ADD("Unicode (UTF-32)", "utf-32"); -ADD("Unicode (UTF-32BE)", "utf-32be"); -ADD("Unicode (UTF-32LE)", "utf-32le"); -ADD("Unicode (UTF-7)", "utf-7"); +ADD("Unicode (UTF-8)", "utf-8") +ADD("Unicode (UTF-16)", "utf-16") +ADD("Unicode (UTF-16BE)", "utf-16be") +ADD("Unicode (UTF-16LE)", "utf-16le") +ADD("Unicode (UTF-32)", "utf-32") +ADD("Unicode (UTF-32BE)", "utf-32be") +ADD("Unicode (UTF-32LE)", "utf-32le") +ADD("Unicode (UTF-7)", "utf-7") -ADD("Arabic (IBM-864)", "ibm864"); -ADD("Arabic (ISO-8859-6)", "iso-8859-6"); -ADD("Arabic (Windows-1256)", "windows-1256"); +ADD("Arabic (IBM-864)", "ibm864") +ADD("Arabic (ISO-8859-6)", "iso-8859-6") +ADD("Arabic (Windows-1256)", "windows-1256") -ADD("Armenian (ARMSCII-8)", "armscii-8"); +ADD("Armenian (ARMSCII-8)", "armscii-8") -ADD("Baltic (ISO-8859-13)", "iso-8859-13"); -ADD("Baltic (ISO-8859-4)", "iso-8859-4"); -ADD("Baltic (Windows-1257)", "windows-1257"); +ADD("Baltic (ISO-8859-13)", "iso-8859-13") +ADD("Baltic (ISO-8859-4)", "iso-8859-4") +ADD("Baltic (Windows-1257)", "windows-1257") -ADD("Celtic (ISO-8859-14)", "iso-8859-14"); +ADD("Celtic (ISO-8859-14)", "iso-8859-14") -ADD("Central European (IBM-852)", "ibm852"); -ADD("Central European (ISO-8859-2)", "iso-8859-2"); -ADD("Central European (Windows-1250)", "windows-1250"); +ADD("Central European (IBM-852)", "ibm852") +ADD("Central European (ISO-8859-2)", "iso-8859-2") +ADD("Central European (Windows-1250)", "windows-1250") -ADD("Chinese Simplified (GB18030)", "gb18030"); -ADD("Chinese Simplified (GB2312)", "gb2312"); -ADD("Chinese Simplified (HZ)", "hz-gb-2312"); -ADD("Chinese Simplified (ISO-2022-CN)", "iso-2022-cn"); -ADD("Chinese Traditional (Big5)", "big5"); -ADD("Chinese Traditional (Big5-HKSCS)", "big5-hkscs"); +ADD("Chinese Simplified (GB18030)", "gb18030") +ADD("Chinese Simplified (GB2312)", "gb2312") +ADD("Chinese Simplified (HZ)", "hz-gb-2312") +ADD("Chinese Simplified (ISO-2022-CN)", "iso-2022-cn") +ADD("Chinese Traditional (Big5)", "big5") +ADD("Chinese Traditional (Big5-HKSCS)", "big5-hkscs") -ADD("Cyrillic (IBM-855)", "ibm855"); -ADD("Cyrillic (ISO-8859-5)", "iso-8859-5"); -ADD("Cyrillic (KOI8-R)", "koi8-r"); -ADD("Cyrillic (Windows-1251)", "windows-1251"); -ADD("Cyrillic/Russian (CP-866)", "ibm866"); -ADD("Cyrillic/Ukrainian (KOI8-U)", "koi8-u"); +ADD("Cyrillic (IBM-855)", "ibm855") +ADD("Cyrillic (ISO-8859-5)", "iso-8859-5") +ADD("Cyrillic (KOI8-R)", "koi8-r") +ADD("Cyrillic (Windows-1251)", "windows-1251") +ADD("Cyrillic/Russian (CP-866)", "ibm866") +ADD("Cyrillic/Ukrainian (KOI8-U)", "koi8-u") -ADD("English (US-ASCII)", "us-ascii"); +ADD("English (US-ASCII)", "us-ascii") -ADD("Greek (ISO-8859-7)", "iso-8859-7"); -ADD("Greek (Windows-1253)", "windows-1253"); +ADD("Greek (ISO-8859-7)", "iso-8859-7") +ADD("Greek (Windows-1253)", "windows-1253") -ADD("Hebrew (IBM-862)", "ibm862"); -ADD("Hebrew (Windows-1255)", "windows-1255"); -ADD("Hebrew Visual (ISO-8859-8)", "iso-8859-8"); +ADD("Hebrew (IBM-862)", "ibm862") +ADD("Hebrew (Windows-1255)", "windows-1255") +ADD("Hebrew Visual (ISO-8859-8)", "iso-8859-8") -ADD("Japanese (EUC-JP)", "euc-jp"); -ADD("Japanese (ISO-2022-JP)", "iso-2022-jp"); -ADD("Japanese (Shift_JIS)", "shift_jis"); +ADD("Japanese (EUC-JP)", "euc-jp") +ADD("Japanese (ISO-2022-JP)", "iso-2022-jp") +ADD("Japanese (Shift_JIS)", "shift_jis") -ADD("Korean (EUC-KR)", "euc-kr"); -ADD("Korean (ISO-2022-KR)", "iso-2022-kr"); +ADD("Korean (EUC-KR)", "euc-kr") +ADD("Korean (ISO-2022-KR)", "iso-2022-kr") -ADD("Nordic (ISO-8859-10)", "iso-8859-10"); +ADD("Nordic (ISO-8859-10)", "iso-8859-10") -ADD("Romanian (ISO-8859-16)", "iso-8859-16"); +ADD("Romanian (ISO-8859-16)", "iso-8859-16") -ADD("South European (ISO-8859-3)", "iso-8859-3"); +ADD("South European (ISO-8859-3)", "iso-8859-3") -ADD("Thai (ISO-8859-11)", "iso-8859-11"); -ADD("Thai (TIS-620)", "tis-620"); -ADD("Thai (Windows-874)", "windows-874"); +ADD("Thai (ISO-8859-11)", "iso-8859-11") +ADD("Thai (TIS-620)", "tis-620") +ADD("Thai (Windows-874)", "windows-874") -ADD("Turkish (IBM-857)", "ibm857"); -ADD("Turkish (ISO-8859-9)", "iso-8859-9"); -ADD("Turkish (Windows-1254)", "windows-1254"); +ADD("Turkish (IBM-857)", "ibm857") +ADD("Turkish (ISO-8859-9)", "iso-8859-9") +ADD("Turkish (Windows-1254)", "windows-1254") -ADD("Vietnamese (VISCII)", "viscii"); -ADD("Vietnamese (Windows-1258)", "windows-1258"); +ADD("Vietnamese (VISCII)", "viscii") +ADD("Vietnamese (Windows-1258)", "windows-1258") -ADD("Western (IBM-850)", "ibm850"); -ADD("Western (ISO-8859-1)", "iso-8859-1"); -ADD("Western (ISO-8859-15)", "iso-8859-15"); -ADD("Western (Windows-1252)", "windows-1252"); +ADD("Western (IBM-850)", "ibm850") +ADD("Western (ISO-8859-1)", "iso-8859-1") +ADD("Western (ISO-8859-15)", "iso-8859-15") +ADD("Western (Windows-1252)", "windows-1252") diff --git a/tests/tests/iconv.cpp b/tests/tests/iconv.cpp index 5746296ca..6d7197cb1 100644 --- a/tests/tests/iconv.cpp +++ b/tests/tests/iconv.cpp @@ -135,14 +135,13 @@ TEST(lagi_iconv, wchar_tSupport) { } TEST(lagi_iconv, Roundtrip) { - std::vector names = GetEncodingsList >(); - for (auto cur = names.begin(); cur != names.end(); ++cur) { - ASSERT_NO_THROW(IconvWrapper("utf-8", cur->c_str())); - ASSERT_NO_THROW(IconvWrapper(cur->c_str(), "utf-8")); + for (auto const& name : GetEncodingsList>()) { + ASSERT_NO_THROW(IconvWrapper("utf-8", name.c_str())); + ASSERT_NO_THROW(IconvWrapper(name.c_str(), "utf-8")); EXPECT_EQ( "Jackdaws love my big sphinx of quartz", - IconvWrapper(cur->c_str(), "utf-8").Convert( - IconvWrapper("utf-8", cur->c_str()).Convert( + IconvWrapper(name.c_str(), "utf-8").Convert( + IconvWrapper("utf-8", name.c_str()).Convert( "Jackdaws love my big sphinx of quartz"))); } }