extend utf8 unit test

This commit is contained in:
Arvid Norberg 2014-05-03 21:00:59 +00:00
parent 474e77039f
commit 253e827c28
1 changed files with 177 additions and 49 deletions

View File

@ -40,6 +40,133 @@ POSSIBILITY OF SUCH DAMAGE.
using namespace libtorrent;
void verify_transforms(char const* utf8_source, int utf8_source_len = -1)
{
if (utf8_source_len == -1)
utf8_source_len = strlen(utf8_source);
// utf8 -> utf16 -> utf32 -> utf8
{
std::vector<UTF16> utf16(utf8_source_len);
UTF8 const* in8 = (UTF8 const*)utf8_source;
UTF16* out16 = &utf16[0];
ConversionResult ret = ConvertUTF8toUTF16(&in8, in8 + utf8_source_len
, &out16, out16 + utf16.size(), strictConversion);
TEST_EQUAL(ret, conversionOK);
if (ret != conversionOK && utf8_source_len < 10)
{
for (char const* i = utf8_source; *i != 0; ++i)
fprintf(stderr, "%x ", UTF8(*i));
}
std::vector<UTF32> utf32(utf8_source_len);
UTF16 const* in16 = &utf16[0];
UTF32* out32 = &utf32[0];
ret = ConvertUTF16toUTF32(&in16, out16
, &out32, out32 + utf32.size(), strictConversion);
TEST_EQUAL(ret, conversionOK);
if (ret != conversionOK && utf8_source_len < 10)
{
for (char const* i = utf8_source; *i != 0; ++i)
fprintf(stderr, "%x ", UTF8(*i));
}
std::vector<UTF8> utf8(utf8_source_len);
UTF32 const* in32 = &utf32[0];
UTF8* out8 = &utf8[0];
ret = ConvertUTF32toUTF8(&in32, out32
, &out8, out8 + utf8.size(), strictConversion);
TEST_EQUAL(ret, conversionOK);
if (ret != conversionOK && utf8_source_len < 10)
{
for (char const* i = utf8_source; *i != 0; ++i)
fprintf(stderr, "%x ", UTF8(*i));
}
TEST_EQUAL(out8 - &utf8[0], utf8_source_len);
TEST_CHECK(std::equal(&utf8[0], out8, (UTF8 const*)utf8_source));
}
// utf8 -> utf32 -> utf16 -> utf8
{
std::vector<UTF32> utf32(utf8_source_len);
UTF8 const* in8 = (UTF8 const*)utf8_source;
UTF32* out32 = &utf32[0];
ConversionResult ret = ConvertUTF8toUTF32(&in8, in8 + utf8_source_len
, &out32, out32 + utf32.size(), strictConversion);
TEST_EQUAL(ret, conversionOK);
if (ret != conversionOK && utf8_source_len < 10)
{
for (char const* i = utf8_source; *i != 0; ++i)
fprintf(stderr, "%x ", UTF8(*i));
}
std::vector<UTF16> utf16(utf8_source_len);
UTF32 const* in32 = &utf32[0];
UTF16* out16 = &utf16[0];
ret = ConvertUTF32toUTF16(&in32, out32
, &out16, out16 + utf16.size(), strictConversion);
TEST_EQUAL(ret, conversionOK);
if (ret != conversionOK && utf8_source_len < 10)
{
for (char const* i = utf8_source; *i != 0; ++i)
fprintf(stderr, "%x ", UTF8(*i));
}
std::vector<UTF8> utf8(utf8_source_len);
UTF16 const* in16 = &utf16[0];
UTF8* out8 = &utf8[0];
ret = ConvertUTF16toUTF8(&in16, out16
, &out8, out8 + utf8.size(), strictConversion);
TEST_EQUAL(ret, conversionOK);
if (ret != conversionOK && utf8_source_len < 10)
{
for (char const* i = utf8_source; *i != 0; ++i)
fprintf(stderr, "%x ", UTF8(*i));
}
TEST_EQUAL(out8 - &utf8[0], utf8_source_len);
TEST_CHECK(std::equal(&utf8[0], out8, (UTF8 const*)utf8_source));
}
}
void expect_error(char const* utf8, ConversionResult expect)
{
UTF8 const* in8 = (UTF8 const*)utf8;
std::vector<UTF32> utf32(strlen(utf8));
UTF32* out32 = &utf32[0];
ConversionResult ret = ConvertUTF8toUTF32(&in8, in8 + strlen(utf8)
, &out32, out32 + utf32.size(), strictConversion);
TEST_EQUAL(ret, expect);
if (ret != expect)
{
fprintf(stderr, "%d expected %d\n", ret, expect);
for (char const* i = utf8; *i != 0; ++i)
fprintf(stderr, "%x ", UTF8(*i));
}
in8 = (UTF8 const*)utf8;
std::vector<UTF16> utf16(strlen(utf8));
UTF16* out16 = &utf16[0];
ret = ConvertUTF8toUTF16(&in8, in8 + strlen(utf8)
, &out16, out16 + utf16.size(), strictConversion);
TEST_EQUAL(ret, expect);
if (ret != expect)
{
fprintf(stderr, "%d expected %d\n", ret, expect);
for (char const* i = utf8; *i != 0; ++i)
fprintf(stderr, "%x ", UTF8(*i));
}
}
int test_main()
{
std::vector<char> utf8_source;
@ -51,63 +178,64 @@ int test_main()
// test lower level conversions
// utf8 -> utf16 -> utf32 -> utf8
{
std::vector<UTF16> utf16(utf8_source.size());
UTF8 const* in8 = (UTF8 const*)&utf8_source[0];
UTF16* out16 = &utf16[0];
ConversionResult ret = ConvertUTF8toUTF16(&in8, in8 + utf8_source.size()
, &out16, out16 + utf16.size(), strictConversion);
verify_transforms(&utf8_source[0], utf8_source.size());
TEST_EQUAL(ret, conversionOK);
verify_transforms("\xc3\xb0");
verify_transforms("\xed\x9f\xbf");
verify_transforms("\xee\x80\x80");
verify_transforms("\xef\xbf\xbd");
verify_transforms("\xf4\x8f\xbf\xbf");
verify_transforms("\xf0\x91\x80\x80\x30");
std::vector<UTF32> utf32(utf8_source.size());
UTF16 const* in16 = &utf16[0];
UTF32* out32 = &utf32[0];
ret = ConvertUTF16toUTF32(&in16, out16
, &out32, out32 + utf32.size(), strictConversion);
// Unexpected continuation bytes
expect_error("\x80", sourceIllegal);
expect_error("\xbf", sourceIllegal);
TEST_EQUAL(ret, conversionOK);
// Impossible bytes
// The following two bytes cannot appear in a correct UTF-8 string
expect_error("\xff", sourceExhausted);
expect_error("\xfe", sourceExhausted);
expect_error("\xff\xff\xfe\xfe", sourceExhausted);
std::vector<UTF8> utf8(utf8_source.size());
UTF32 const* in32 = &utf32[0];
UTF8* out8 = &utf8[0];
ret = ConvertUTF32toUTF8(&in32, out32
, &out8, out8 + utf8.size(), strictConversion);
// Examples of an overlong ASCII character
expect_error("\xc0\xaf", sourceIllegal);
expect_error("\xe0\x80\xaf", sourceIllegal);
expect_error("\xf0\x80\x80\xaf", sourceIllegal);
expect_error("\xf8\x80\x80\x80\xaf ", sourceIllegal);
expect_error("\xfc\x80\x80\x80\x80\xaf", sourceIllegal);
TEST_EQUAL(ret, conversionOK);
TEST_EQUAL(out8 - &utf8[0], utf8_source.size());
TEST_CHECK(std::equal(&utf8[0], out8, (UTF8 const*)&utf8_source[0]));
}
// Maximum overlong sequences
expect_error("\xc1\xbf", sourceIllegal);
expect_error("\xe0\x9f\xbf", sourceIllegal);
expect_error("\xf0\x8f\xbf\xbf", sourceIllegal);
expect_error("\xf8\x87\xbf\xbf\xbf", sourceIllegal);
expect_error("\xfc\x83\xbf\xbf\xbf\xbf", sourceIllegal);
// utf8 -> utf32 -> utf16 -> utf8
{
std::vector<UTF32> utf32(utf8_source.size());
UTF8 const* in8 = (UTF8 const*)&utf8_source[0];
UTF32* out32 = &utf32[0];
ConversionResult ret = ConvertUTF8toUTF32(&in8, in8 + utf8_source.size()
, &out32, out32 + utf32.size(), strictConversion);
// Overlong representation of the NUL character
expect_error("\xc0\x80", sourceIllegal);
expect_error("\xe0\x80\x80", sourceIllegal);
expect_error("\xf0\x80\x80\x80", sourceIllegal);
expect_error("\xf8\x80\x80\x80\x80", sourceIllegal);
expect_error("\xfc\x80\x80\x80\x80\x80", sourceIllegal);
TEST_EQUAL(ret, conversionOK);
// Single UTF-16 surrogates
expect_error("\xed\xa0\x80", sourceIllegal);
expect_error("\xed\xad\xbf", sourceIllegal);
expect_error("\xed\xae\x80", sourceIllegal);
expect_error("\xed\xaf\xbf", sourceIllegal);
expect_error("\xed\xb0\x80", sourceIllegal);
expect_error("\xed\xbe\x80", sourceIllegal);
expect_error("\xed\xbf\xbf", sourceIllegal);
std::vector<UTF16> utf16(utf8_source.size());
UTF32 const* in32 = &utf32[0];
UTF16* out16 = &utf16[0];
ret = ConvertUTF32toUTF16(&in32, out32
, &out16, out16 + utf16.size(), strictConversion);
TEST_EQUAL(ret, conversionOK);
std::vector<UTF8> utf8(utf8_source.size());
UTF16 const* in16 = &utf16[0];
UTF8* out8 = &utf8[0];
ret = ConvertUTF16toUTF8(&in16, out16
, &out8, out8 + utf8.size(), strictConversion);
TEST_EQUAL(ret, conversionOK);
TEST_EQUAL(out8 - &utf8[0], utf8_source.size());
TEST_CHECK(std::equal(&utf8[0], out8, (UTF8 const*)&utf8_source[0]));
}
// Paired UTF-16 surrogates
expect_error("\xed\xa0\x80\xed\xb0\x80", sourceIllegal);
expect_error("\xed\xa0\x80\xed\xbf\xbf", sourceIllegal);
expect_error("\xed\xad\xbf\xed\xb0\x80", sourceIllegal);
expect_error("\xed\xad\xbf\xed\xbf\xbf", sourceIllegal);
expect_error("\xed\xae\x80\xed\xb0\x80", sourceIllegal);
expect_error("\xed\xae\x80\xed\xbf\xbf", sourceIllegal);
expect_error("\xed\xaf\xbf\xed\xb0\x80", sourceIllegal);
expect_error("\xed\xaf\xbf\xed\xbf\xbf", sourceIllegal);
// test higher level conversions