merge more lenient utf8 conversion from RC_1_0

2015-07-27 08:59:38 -07:00 · 2015-07-27 08:59:38 -07:00 · f34f09ca58
parent dfd4e13450
commit f34f09ca58
3 changed files with 36 additions and 0 deletions
--- a/1
+++ b/1
@ -74,6 +74,7 @@

 1.0.6 release

+	* make utf8 conversions more lenient
 	* fix loading of piece priorities from resume data
 	* improved seed-mode handling (seed-mode will now automatically be left when
 	  performing operations implying it's not a seed)
--- a/src/utf8.cpp
+++ b/src/utf8.cpp
@ -72,6 +72,15 @@ namespace libtorrent
 					, reinterpret_cast<UTF32**>(&dst_start)
 					, reinterpret_cast<UTF32*>(dst_start + wide.size())
 					, lenientConversion);
+				if (ret == sourceIllegal)
+				{
+					// assume Latin-1
+					wide.clear();
+					std::copy((boost::uint8_t const*)*src_start
+						, (boost::uint8_t const*)src_end
+						, std::back_inserter(wide));
+					return static_cast<utf8_conv_result_t>(ret);
+				}
 				wide.resize(dst_start - wide.c_str());
 				return static_cast<utf8_conv_result_t>(ret);
 			}
@ -92,6 +101,15 @@ namespace libtorrent
 					, reinterpret_cast<UTF16**>(&dst_start)
 					, reinterpret_cast<UTF16*>(dst_start + wide.size())
 					, lenientConversion);
+				if (ret == sourceIllegal)
+				{
+					// assume Latin-1
+					wide.clear();
+					std::copy((boost::uint8_t const*)*src_start
+						, (boost::uint8_t const*)src_end
+						, std::back_inserter(wide));
+					return static_cast<utf8_conv_result_t>(ret);
+				}
 				wide.resize(dst_start - wide.c_str());
 				return static_cast<utf8_conv_result_t>(ret);
 			}
--- a/test/test_utf8.cpp
+++ b/test/test_utf8.cpp
@ -253,3 +253,20 @@ TORRENT_TEST(utf8)
 	TEST_EQUAL(utf8, identity);
 }

+TORRENT_TEST(invalid_encoding)
+{
+	// thest invalid utf8 encodings. just treat it as "Latin-1"
+	boost::uint8_t const test_string[] = {
+		0xd2, 0xe5, 0xf0, 0xea, 0xf1, 0x20, 0xe8, 0x20, 0xca, 0xe0, 0xe9, 0xea,
+		0xee, 0xf1, 0x2e, 0x32, 0x30, 0x31, 0x34, 0x2e, 0x42, 0x44, 0x52, 0x69,
+		0x70, 0x2e, 0x31, 0x30, 0x38, 0x30, 0x70, 0x2e, 0x6d, 0x6b, 0x76, 0x00
+	};
+	std::wstring wide;
+	utf8_wchar((const char*)test_string, wide);
+
+	std::wstring cmp_wide;
+	std::copy(test_string, test_string + sizeof(test_string) - 1,
+		std::back_inserter(cmp_wide));
+	TEST_CHECK(wide == cmp_wide);
+}
+