merge more lenient utf8 conversion from RC_1_0

This commit is contained in:
arvidn 2015-07-27 08:59:38 -07:00
parent dfd4e13450
commit f34f09ca58
3 changed files with 36 additions and 0 deletions

View File

@ -74,6 +74,7 @@
1.0.6 release
* make utf8 conversions more lenient
* fix loading of piece priorities from resume data
* improved seed-mode handling (seed-mode will now automatically be left when
performing operations implying it's not a seed)

View File

@ -72,6 +72,15 @@ namespace libtorrent
, reinterpret_cast<UTF32**>(&dst_start)
, reinterpret_cast<UTF32*>(dst_start + wide.size())
, lenientConversion);
if (ret == sourceIllegal)
{
// assume Latin-1
wide.clear();
std::copy((boost::uint8_t const*)*src_start
, (boost::uint8_t const*)src_end
, std::back_inserter(wide));
return static_cast<utf8_conv_result_t>(ret);
}
wide.resize(dst_start - wide.c_str());
return static_cast<utf8_conv_result_t>(ret);
}
@ -92,6 +101,15 @@ namespace libtorrent
, reinterpret_cast<UTF16**>(&dst_start)
, reinterpret_cast<UTF16*>(dst_start + wide.size())
, lenientConversion);
if (ret == sourceIllegal)
{
// assume Latin-1
wide.clear();
std::copy((boost::uint8_t const*)*src_start
, (boost::uint8_t const*)src_end
, std::back_inserter(wide));
return static_cast<utf8_conv_result_t>(ret);
}
wide.resize(dst_start - wide.c_str());
return static_cast<utf8_conv_result_t>(ret);
}

View File

@ -253,3 +253,20 @@ TORRENT_TEST(utf8)
TEST_EQUAL(utf8, identity);
}
TORRENT_TEST(invalid_encoding)
{
// thest invalid utf8 encodings. just treat it as "Latin-1"
boost::uint8_t const test_string[] = {
0xd2, 0xe5, 0xf0, 0xea, 0xf1, 0x20, 0xe8, 0x20, 0xca, 0xe0, 0xe9, 0xea,
0xee, 0xf1, 0x2e, 0x32, 0x30, 0x31, 0x34, 0x2e, 0x42, 0x44, 0x52, 0x69,
0x70, 0x2e, 0x31, 0x30, 0x38, 0x30, 0x70, 0x2e, 0x6d, 0x6b, 0x76, 0x00
};
std::wstring wide;
utf8_wchar((const char*)test_string, wide);
std::wstring cmp_wide;
std::copy(test_string, test_string + sizeof(test_string) - 1,
std::back_inserter(cmp_wide));
TEST_CHECK(wide == cmp_wide);
}