Aegisub/tests/tests/iconv.cpp

181 lines
5.9 KiB
C++

// Copyright (c) 2010, Thomas Goyne <plorkyeran@aegisub.org>
//
// Permission to use, copy, modify, and distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#include <libaegisub/charset_conv.h>
#include <main.h>
#include <cstdint>
#include <iconv.h>
using namespace agi::charset;
TEST(lagi_iconv, BasicSetup) {
EXPECT_NO_THROW(IconvWrapper("UTF-8", "UTF-16LE"));
}
TEST(lagi_iconv, InvalidConversions) {
EXPECT_THROW(IconvWrapper("nonexistent charset", "UTF-16LE"), UnsupportedConversion);
EXPECT_THROW(IconvWrapper("UTF-16LE", "nonexistent charset"), UnsupportedConversion);
EXPECT_THROW(IconvWrapper("nonexistent charset", "nonexistent charset"), UnsupportedConversion);
}
TEST(lagi_iconv, StrLen1) {
IconvWrapper conv("UTF-8", "UTF-8", false);
for (int i = 0; i < 10; i++) {
std::string str(i, ' ');
ASSERT_EQ(i, conv.SrcStrLen(str.c_str()));
ASSERT_EQ(i, conv.DstStrLen(str.c_str()));
}
}
TEST(lagi_iconv, StrLen2) {
IconvWrapper conv("UTF-16LE", "UTF-16LE", false);
for (int i = 0; i < 10; i++) {
std::basic_string<int16_t> str(i, ' ');
ASSERT_EQ(2*i, conv.SrcStrLen((const char *)str.c_str()));
ASSERT_EQ(2*i, conv.DstStrLen((const char *)str.c_str()));
}
}
TEST(lagi_iconv, StrLen4) {
IconvWrapper conv("UTF-32LE", "UTF-32LE", false);
for (int i = 0; i < 10; i++) {
std::basic_string<int32_t> str(i, ' ');
ASSERT_EQ(4*i, conv.SrcStrLen((const char *)str.c_str()));
ASSERT_EQ(4*i, conv.DstStrLen((const char *)str.c_str()));
}
}
#ifdef _LIBICONV_VERSION
TEST(lagi_iconv, Fallbacks) {
IconvWrapper nofallback("UTF-8", "Shift-JIS", false);
IconvWrapper fallback("UTF-8", "Shift-JIS", true);
IconvWrapper noneneeded("UTF-8", "UTF-16LE", false);
// Shift-JIS does not have a backslash
EXPECT_THROW(nofallback.Convert("\\"), BadInput);
ASSERT_NO_THROW(fallback.Convert("\\"));
EXPECT_EQ("\\", fallback.Convert("\\"));
EXPECT_NO_THROW(noneneeded.Convert("\\"));
// BOM into non-unicode
char bom[] = "\xEF\xBB\xBF";
EXPECT_THROW(nofallback.Convert(bom), BadInput);
ASSERT_NO_THROW(fallback.Convert(bom));
EXPECT_EQ("", fallback.Convert(bom));
EXPECT_NO_THROW(noneneeded.Convert(bom));
// A snowman (U+2603)
char snowman[] = "\xE2\x98\x83";
EXPECT_THROW(nofallback.Convert(snowman), BadInput);
EXPECT_NO_THROW(noneneeded.Convert(snowman));
ASSERT_NO_THROW(fallback.Convert(snowman));
EXPECT_EQ("?", fallback.Convert(snowman));
}
TEST(lagi_iconv, BadInput) {
IconvWrapper utf16("UTF-16LE", "UTF-8");
EXPECT_THROW(utf16.Convert(" "), BadInput);
IconvWrapper utf8("UTF-8", "UTF-16LE");
EXPECT_THROW(utf8.Convert("\xE2\xFF"), BadInput);
}
#endif
TEST(lagi_iconv, Conversions) {
IconvWrapper utf16le("UTF-16LE", "UTF-8", false);
IconvWrapper utf16be("UTF-16BE", "UTF-8", false);
IconvWrapper utf8("UTF-8", "UTF-16LE", false);
char space_utf8_[] = " ";
char space_utf16be_[] = {0, 32, 0, 0};
char space_utf16le_[] = {32, 0, 0, 0};
std::string space_utf8(space_utf8_);
std::string space_utf16be(space_utf16be_, 2);
std::string space_utf16le(space_utf16le_, 2);
EXPECT_EQ(space_utf8, utf16le.Convert(space_utf16le));
EXPECT_EQ(space_utf8, utf16be.Convert(space_utf16be));
EXPECT_EQ(space_utf16le, utf8.Convert(space_utf8));
}
// Basic overflow tests
TEST(lagi_iconv, Buffer) {
IconvWrapper conv("UTF-8", "UTF-16LE", false);
char buff[32];
memset(buff, 0xFF, sizeof(buff));
EXPECT_THROW(conv.Convert("", 1, buff, 0), BufferTooSmall);
EXPECT_EQ('\xFF', buff[0]);
EXPECT_THROW(conv.Convert("", 1, buff, 1), BufferTooSmall);
EXPECT_EQ('\xFF', buff[0]);
EXPECT_NO_THROW(conv.Convert("", 1, buff, 2));
EXPECT_EQ('\0', buff[0]);
EXPECT_EQ('\0', buff[1]);
EXPECT_EQ('\xFF', buff[2]);
}
TEST(lagi_iconv, LocalSupport) {
ASSERT_NO_THROW(IconvWrapper("UTF-8", ""));
IconvWrapper conv("UTF-8", "");
ASSERT_NO_THROW(conv.Convert(" "));
EXPECT_EQ(" ", conv.Convert(" "));
}
TEST(lagi_iconv, wchar_tSupport) {
EXPECT_NO_THROW(IconvWrapper("UTF-8", "wchar_t"));
}
TEST(lagi_iconv, Roundtrip) {
for (auto const& name : GetEncodingsList<std::vector<std::string>>()) {
ASSERT_NO_THROW(IconvWrapper("utf-8", name.c_str()));
ASSERT_NO_THROW(IconvWrapper(name.c_str(), "utf-8"));
EXPECT_EQ(
"Jackdaws love my big sphinx of quartz",
IconvWrapper(name.c_str(), "utf-8").Convert(
IconvWrapper("utf-8", name.c_str()).Convert(
"Jackdaws love my big sphinx of quartz")));
}
}
TEST(lagi_iconv, Iso6937) {
ASSERT_NO_THROW(IconvWrapper("UTF-8", "ISO-6937-2"));
IconvWrapper subst("UTF-8", "ISO-6937-2");
IconvWrapper no_subst("UTF-8", "ISO-6937-2", false);
// 7-bit is same as ISO-8859
for (int i = 0; i < 128; ++i) {
const char buf[] = { (char)i, 0 };
std::string ret;
EXPECT_NO_THROW(ret = subst.Convert(buf));
EXPECT_STREQ(buf, ret.c_str());
}
std::string ret;
// LATIN CAPITAL LETTER D WITH CARON (U+010E) - multibyte char in main block
EXPECT_NO_THROW(ret = subst.Convert("\xC4\x8E"));
EXPECT_STREQ("\xCF\x44", ret.c_str());
// BREVE - multibyte char in extended ranges
EXPECT_NO_THROW(ret = subst.Convert("\xCB\x98"));
EXPECT_STREQ("\xC6\x20", ret.c_str());
// EM DASH - single byte char in extended ranges
EXPECT_NO_THROW(ret = subst.Convert("\xE2\x80\x94"));
EXPECT_STREQ("\xD0", ret.c_str());
// codepoint not in ISO-6937-2
EXPECT_NO_THROW(ret = subst.Convert("\xCB\x97"));
EXPECT_STREQ("?", ret.c_str());
EXPECT_THROW(no_subst.Convert("\xCB\x97"), BadInput);
}