From da8af033cc5cf8afe0ed6678379030359eb6c5a3 Mon Sep 17 00:00:00 2001 From: Arvid Norberg Date: Sat, 24 Sep 2016 22:16:10 -0400 Subject: [PATCH] modernize utf8 conversion functions to throw and move (#1145) modernize utf8 conversion functions to throw and move --- include/libtorrent/string_view.hpp | 2 + include/libtorrent/utf8.hpp | 42 ++++++---- src/create_torrent.cpp | 12 +-- src/escape_string.cpp | 29 +++---- src/file_storage.cpp | 12 +-- src/torrent_handle.cpp | 8 +- src/torrent_info.cpp | 8 +- src/utf8.cpp | 125 +++++++++++++++++++++++------ test/test_utf8.cpp | 14 ++-- 9 files changed, 156 insertions(+), 96 deletions(-) diff --git a/include/libtorrent/string_view.hpp b/include/libtorrent/string_view.hpp index fb9064595..37e17438d 100644 --- a/include/libtorrent/string_view.hpp +++ b/include/libtorrent/string_view.hpp @@ -41,11 +41,13 @@ POSSIBILITY OF SUCH DAMAGE. #include namespace libtorrent { using string_view = boost::string_ref; +using wstring_view = boost::wstring_ref; } #else #include namespace libtorrent { using string_view = boost::string_view; +using wstring_view = boost::wstring_view; } #endif diff --git a/include/libtorrent/utf8.hpp b/include/libtorrent/utf8.hpp index 50856f334..07b9eaca7 100644 --- a/include/libtorrent/utf8.hpp +++ b/include/libtorrent/utf8.hpp @@ -42,35 +42,43 @@ POSSIBILITY OF SUCH DAMAGE. #include #include +#include "libtorrent/string_view.hpp" +#include "libtorrent/error_code.hpp" + namespace libtorrent { - // internal - // results from UTF-8 conversion functions utf8_wchar and - // wchar_utf8 - enum utf8_conv_result_t + namespace utf8_errors { - // conversion successful - conversion_ok, + enum error_code_enum + { + // conversion successful + conversion_ok, - // partial character in source, but hit end - source_exhausted, + // partial character in source, but hit end + source_exhausted, - // insuff. room in target for conversion - target_exhausted, + // insuff. room in target for conversion + target_exhausted, - // source sequence is illegal/malformed - source_illegal - }; + // source sequence is illegal/malformed + source_illegal + }; + + // hidden + TORRENT_EXPORT error_code make_error_code(error_code_enum e); + } + + TORRENT_EXPORT boost::system::error_category const& utf8_category(); // ``utf8_wchar`` converts a UTF-8 string (``utf8``) to a wide character // string (``wide``). ``wchar_utf8`` converts a wide character string // (``wide``) to a UTF-8 string (``utf8``). The return value is one of // the enumeration values from utf8_conv_result_t. - TORRENT_EXTRA_EXPORT utf8_conv_result_t utf8_wchar( - const std::string &utf8, std::wstring &wide); - TORRENT_EXTRA_EXPORT utf8_conv_result_t wchar_utf8( - const std::wstring &wide, std::string &utf8); + TORRENT_EXTRA_EXPORT std::wstring utf8_wchar(string_view utf8, error_code& ec); + TORRENT_EXTRA_EXPORT std::wstring utf8_wchar(string_view utf8); + TORRENT_EXTRA_EXPORT std::string wchar_utf8(wstring_view wide, error_code& ec); + TORRENT_EXTRA_EXPORT std::string wchar_utf8(wstring_view wide); } #endif // !BOOST_NO_STD_WSTRING diff --git a/src/create_torrent.cpp b/src/create_torrent.cpp index 61dd54fdb..b46ca2e90 100644 --- a/src/create_torrent.cpp +++ b/src/create_torrent.cpp @@ -197,8 +197,7 @@ namespace libtorrent void add_files(file_storage& fs, std::wstring const& wfile , std::function p, std::uint32_t flags) { - std::string utf8; - wchar_utf8(wfile, utf8); + std::string utf8 = wchar_utf8(wfile); add_files_impl(fs, parent_path(complete(utf8)) , filename(utf8), p, flags); } @@ -206,8 +205,7 @@ namespace libtorrent void add_files(file_storage& fs , std::wstring const& wfile, std::uint32_t flags) { - std::string utf8; - wchar_utf8(wfile, utf8); + std::string utf8 = wchar_utf8(wfile); add_files_impl(fs, parent_path(complete(utf8)) , filename(utf8), default_pred, flags); } @@ -215,16 +213,14 @@ namespace libtorrent void set_piece_hashes(create_torrent& t, std::wstring const& p , std::function f, error_code& ec) { - std::string utf8; - wchar_utf8(p, utf8); + std::string utf8 = wchar_utf8(p); set_piece_hashes(t, utf8, f, ec); } void set_piece_hashes_deprecated(create_torrent& t, std::wstring const& p , std::function f, error_code& ec) { - std::string utf8; - wchar_utf8(p, utf8); + std::string utf8 = wchar_utf8(p); set_piece_hashes(t, utf8, f, ec); } #endif diff --git a/src/escape_string.cpp b/src/escape_string.cpp index ade676629..42f3879c0 100644 --- a/src/escape_string.cpp +++ b/src/escape_string.cpp @@ -464,16 +464,16 @@ namespace libtorrent #if defined TORRENT_WINDOWS && TORRENT_USE_WSTRING std::wstring convert_to_wstring(std::string const& s) { - std::wstring ret; - int result = libtorrent::utf8_wchar(s, ret); - if (result == 0) return ret; + error_code ec; + std::wstring ret = libtorrent::utf8_wchar(s, ec); + if (!ec) return ret; ret.clear(); const char* end = &s[0] + s.size(); for (const char* i = &s[0]; i < end;) { wchar_t c = '.'; - result = std::mbtowc(&c, i, end - i); + int const result = std::mbtowc(&c, i, end - i); if (result > 0) i += result; else ++i; ret += c; @@ -483,9 +483,9 @@ namespace libtorrent std::string convert_from_wstring(std::wstring const& s) { - std::string ret; - int result = libtorrent::wchar_utf8(s, ret); - if (result == 0) return ret; + error_code ec; + std::string ret = libtorrent::wchar_utf8(s, ec); + if (!ec) return ret; ret.clear(); const wchar_t* end = &s[0] + s.size(); @@ -493,7 +493,7 @@ namespace libtorrent { char c[10]; TORRENT_ASSERT(sizeof(c) >= MB_CUR_MAX); - result = std::wctomb(c, *i); + int const result = std::wctomb(c, *i); if (result > 0) { i += result; @@ -562,8 +562,7 @@ namespace libtorrent std::string convert_to_native(std::string const& s) { - std::wstring ws; - libtorrent::utf8_wchar(s, ws); + std::wstring ws = libtorrent::utf8_wchar(s); std::string ret; ret.resize(ws.size() * 4 + 1); std::size_t size = WideCharToMultiByte(CP_ACP, 0, ws.c_str(), -1, &ret[0], int(ret.size()), nullptr, nullptr); @@ -581,17 +580,14 @@ namespace libtorrent if (size == std::size_t(-1)) return s; if (size != 0 && ws[size - 1] == '\0') --size; ws.resize(size); - std::string ret; - libtorrent::wchar_utf8(ws, ret); - return ret; + return libtorrent::wchar_utf8(ws); } #elif TORRENT_USE_LOCALE std::string convert_to_native(std::string const& s) { - std::wstring ws; - libtorrent::utf8_wchar(s, ws); + std::wstring ws = libtorrent::utf8_wchar(s); std::size_t size = wcstombs(0, ws.c_str(), 0); if (size == std::size_t(-1)) return s; std::string ret; @@ -609,8 +605,7 @@ namespace libtorrent std::size_t size = mbstowcs(&ws[0], s.c_str(), s.size()); if (size == std::size_t(-1)) return s; std::string ret; - libtorrent::wchar_utf8(ws, ret); - return ret; + return libtorrent::wchar_utf8(ws); } #endif diff --git a/src/file_storage.cpp b/src/file_storage.cpp index 3a3e11b99..456986b6a 100644 --- a/src/file_storage.cpp +++ b/src/file_storage.cpp @@ -353,25 +353,19 @@ namespace libtorrent #if TORRENT_USE_WSTRING void file_storage::set_name(std::wstring const& n) { - std::string utf8; - wchar_utf8(n, utf8); - m_name = utf8; + m_name = wchar_utf8(n); } void file_storage::rename_file_deprecated(int index, std::wstring const& new_filename) { TORRENT_ASSERT_PRECOND(index >= 0 && index < int(m_files.size())); - std::string utf8; - wchar_utf8(new_filename, utf8); - update_path_index(m_files[index], utf8); + update_path_index(m_files[index], wchar_utf8(new_filename)); } void file_storage::add_file(std::wstring const& file, std::int64_t file_size , int file_flags, std::time_t mtime, string_view symlink_path) { - std::string utf8; - wchar_utf8(file, utf8); - add_file(utf8, file_size, file_flags, mtime, symlink_path); + add_file(wchar_utf8(file), file_size, file_flags, mtime, symlink_path); } void file_storage::rename_file(int index, std::wstring const& new_filename) diff --git a/src/torrent_handle.cpp b/src/torrent_handle.cpp index a0b6ed358..75183f112 100644 --- a/src/torrent_handle.cpp +++ b/src/torrent_handle.cpp @@ -186,16 +186,12 @@ namespace libtorrent void torrent_handle::move_storage( std::wstring const& save_path, int flags) const { - std::string utf8; - wchar_utf8(save_path, utf8); - async_call(&torrent::move_storage, utf8, flags); + async_call(&torrent::move_storage, wchar_utf8(save_path), flags); } void torrent_handle::rename_file(int index, std::wstring const& new_name) const { - std::string utf8; - wchar_utf8(new_name, utf8); - async_call(&torrent::rename_file, index, utf8); + async_call(&torrent::rename_file, index, wchar_utf8(new_name)); } #endif // TORRENT_NO_DEPRECATE #endif // TORRENT_USE_WSTRING diff --git a/src/torrent_info.cpp b/src/torrent_info.cpp index cd59a6702..ae7c173be 100644 --- a/src/torrent_info.cpp +++ b/src/torrent_info.cpp @@ -891,10 +891,8 @@ namespace libtorrent , int const flags) { std::vector buf; - std::string utf8; - wchar_utf8(filename, utf8); error_code ec; - int ret = load_file(utf8, buf, ec); + int ret = load_file(wchar_utf8(filename), buf, ec); if (ret < 0) throw system_error(ec); bdecode_node e; @@ -961,9 +959,7 @@ namespace libtorrent , int const flags) { std::vector buf; - std::string utf8; - wchar_utf8(filename, utf8); - int ret = load_file(utf8, buf, ec); + int ret = load_file(wchar_utf8(filename), buf, ec); if (ret < 0) return; bdecode_node e; diff --git a/src/utf8.cpp b/src/utf8.cpp index 2cb6ff241..9254570a2 100644 --- a/src/utf8.cpp +++ b/src/utf8.cpp @@ -38,6 +38,8 @@ POSSIBILITY OF SUCH DAMAGE. #include #include "libtorrent/utf8.hpp" +#include "libtorrent/assert.hpp" +#include "libtorrent/error_code.hpp" #include "libtorrent/ConvertUTF.h" @@ -54,14 +56,14 @@ namespace libtorrent template struct convert_to_wide { - static utf8_conv_result_t convert(UTF8 const** src_start + static utf8_errors::error_code_enum convert(UTF8 const** src_start , UTF8 const* src_end , std::wstring& wide) { TORRENT_UNUSED(src_start); TORRENT_UNUSED(src_end); TORRENT_UNUSED(wide); - return source_illegal; + return utf8_errors::error_code_enum::source_illegal; } }; @@ -69,7 +71,7 @@ namespace libtorrent template<> struct convert_to_wide<4> { - static utf8_conv_result_t convert(char const** src_start + static utf8_errors::error_code_enum convert(char const** src_start , char const* src_end , std::wstring& wide) { @@ -87,10 +89,10 @@ namespace libtorrent std::copy(reinterpret_cast(*src_start) ,reinterpret_cast(src_end) , std::back_inserter(wide)); - return static_cast(ret); + return static_cast(ret); } wide.resize(dst_start - wide.c_str()); - return static_cast(ret); + return static_cast(ret); } }; @@ -98,7 +100,7 @@ namespace libtorrent template<> struct convert_to_wide<2> { - static utf8_conv_result_t convert(char const** src_start + static utf8_errors::error_code_enum convert(char const** src_start , char const* src_end , std::wstring& wide) { @@ -116,10 +118,10 @@ namespace libtorrent std::copy(reinterpret_cast(*src_start) , reinterpret_cast(src_end) , std::back_inserter(wide)); - return static_cast(ret); + return static_cast(ret); } wide.resize(dst_start - wide.c_str()); - return static_cast(ret); + return static_cast(ret); } }; @@ -127,14 +129,14 @@ namespace libtorrent template struct convert_from_wide { - static utf8_conv_result_t convert(wchar_t const** src_start + static utf8_errors::error_code_enum convert(wchar_t const** src_start , wchar_t const* src_end , std::string& utf8) { TORRENT_UNUSED(src_start); TORRENT_UNUSED(src_end); TORRENT_UNUSED(utf8); - return source_illegal; + return utf8_errors::error_code_enum::source_illegal; } }; @@ -142,7 +144,7 @@ namespace libtorrent template<> struct convert_from_wide<4> { - static utf8_conv_result_t convert(wchar_t const** src_start + static utf8_errors::error_code_enum convert(wchar_t const** src_start , wchar_t const* src_end , std::string& utf8) { @@ -154,7 +156,7 @@ namespace libtorrent , reinterpret_cast(dst_start + utf8.size()) , lenientConversion); utf8.resize(dst_start - &utf8[0]); - return static_cast(ret); + return static_cast(ret); } }; @@ -162,7 +164,7 @@ namespace libtorrent template<> struct convert_from_wide<2> { - static utf8_conv_result_t convert(wchar_t const** src_start + static utf8_errors::error_code_enum convert(wchar_t const** src_start , wchar_t const* src_end , std::string& utf8) { @@ -174,28 +176,103 @@ namespace libtorrent , reinterpret_cast(dst_start + utf8.size()) , lenientConversion); utf8.resize(dst_start - &utf8[0]); - return static_cast(ret); + return static_cast(ret); + } + }; + + struct utf8_error_category : boost::system::error_category + { + const char* name() const BOOST_SYSTEM_NOEXCEPT override + { + return "UTF error"; + } + + std::string message(int ev) const BOOST_SYSTEM_NOEXCEPT override + { + char const* error_messages[] = { + "ok", + "source exhausted", + "target exhausted", + "source illegal" + }; + + TORRENT_ASSERT(ev >= 0); + TORRENT_ASSERT(ev < sizeof(error_messages)/sizeof(error_messages[0])); + return error_messages[ev]; + } + + boost::system::error_condition default_error_condition( + int ev) const BOOST_SYSTEM_NOEXCEPT override + { + return boost::system::error_condition(ev, *this); } }; } // anonymous namespace - utf8_conv_result_t utf8_wchar(std::string const& utf8, std::wstring &wide) + namespace utf8_errors { - // allocate space for worst-case - wide.resize(utf8.size()); - char const* src_start = utf8.c_str(); - return convert_to_wide::convert( - &src_start, src_start + utf8.size(), wide); + boost::system::error_code make_error_code(utf8_errors::error_code_enum e) + { + return error_code(e, utf8_category()); + } + } // utf_errors namespace + + boost::system::error_category const& utf8_category() + { + static utf8_error_category cat; + return cat; } - utf8_conv_result_t wchar_utf8(std::wstring const& wide, std::string &utf8) + std::wstring utf8_wchar(string_view utf8, error_code& ec) { // allocate space for worst-case + std::wstring wide; + wide.resize(utf8.size()); + char const* src_start = utf8.data(); + utf8_errors::error_code_enum const ret = convert_to_wide::convert( + &src_start, src_start + utf8.size(), wide); + if (ret != utf8_errors::error_code_enum::conversion_ok) + ec = make_error_code(ret); + return wide; + } + + std::wstring utf8_wchar(string_view wide) + { + error_code ec; + std::wstring ret = utf8_wchar(wide, ec); +#ifndef BOOST_NO_EXCEPTIONS + if (ec) throw system_error(ec); +#else + if (ec) std::terminate(); +#endif + return ret; + } + + std::string wchar_utf8(wstring_view wide, error_code& ec) + { + // allocate space for worst-case + std::string utf8; utf8.resize(wide.size() * 6); - if (wide.empty()) return conversion_ok; - wchar_t const* src_start = wide.c_str(); - return convert_from_wide::convert( + if (wide.empty()) return {}; + + wchar_t const* src_start = wide.data(); + utf8_errors::error_code_enum const ret = convert_from_wide::convert( &src_start, src_start + wide.size(), utf8); + if (ret != utf8_errors::error_code_enum::conversion_ok) + ec = make_error_code(ret); + return utf8; + } + + std::string wchar_utf8(wstring_view wide) + { + error_code ec; + std::string ret = wchar_utf8(wide, ec); +#ifndef BOOST_NO_EXCEPTIONS + if (ec) throw system_error(ec); +#else + if (ec) std::terminate(); +#endif + return ret; } } diff --git a/test/test_utf8.cpp b/test/test_utf8.cpp index e0152aa4f..8e9b27319 100644 --- a/test/test_utf8.cpp +++ b/test/test_utf8.cpp @@ -242,13 +242,8 @@ TORRENT_TEST(utf8) std::string utf8; std::copy(utf8_source.begin(), utf8_source.end(), std::back_inserter(utf8)); - std::wstring wide; - utf8_conv_result_t ret = utf8_wchar(utf8, wide); - TEST_EQUAL(ret, conversion_ok); - - std::string identity; - ret = wchar_utf8(wide, identity); - TEST_EQUAL(ret, conversion_ok); + std::wstring const wide = utf8_wchar(utf8); + std::string const identity = wchar_utf8(wide); TEST_EQUAL(utf8, identity); } @@ -261,8 +256,9 @@ TORRENT_TEST(invalid_encoding) 0xee, 0xf1, 0x2e, 0x32, 0x30, 0x31, 0x34, 0x2e, 0x42, 0x44, 0x52, 0x69, 0x70, 0x2e, 0x31, 0x30, 0x38, 0x30, 0x70, 0x2e, 0x6d, 0x6b, 0x76, 0x00 }; - std::wstring wide; - utf8_wchar((const char*)test_string, wide); + error_code ec; + std::wstring wide = utf8_wchar((char const*)test_string, ec); + TEST_CHECK(ec); std::wstring cmp_wide; std::copy(test_string, test_string + sizeof(test_string) - 1,