modernize utf8 conversion functions to throw and move (#1145)

modernize utf8 conversion functions to throw and move
This commit is contained in:
Arvid Norberg 2016-09-24 22:16:10 -04:00 committed by GitHub
parent 372d992d8e
commit da8af033cc
9 changed files with 156 additions and 96 deletions

View File

@ -41,11 +41,13 @@ POSSIBILITY OF SUCH DAMAGE.
#include <boost/utility/string_ref.hpp>
namespace libtorrent {
using string_view = boost::string_ref;
using wstring_view = boost::wstring_ref;
}
#else
#include <boost/utility/string_view.hpp>
namespace libtorrent {
using string_view = boost::string_view;
using wstring_view = boost::wstring_view;
}
#endif

View File

@ -42,35 +42,43 @@ POSSIBILITY OF SUCH DAMAGE.
#include <string>
#include <cwchar>
#include "libtorrent/string_view.hpp"
#include "libtorrent/error_code.hpp"
namespace libtorrent
{
// internal
// results from UTF-8 conversion functions utf8_wchar and
// wchar_utf8
enum utf8_conv_result_t
namespace utf8_errors
{
// conversion successful
conversion_ok,
enum error_code_enum
{
// conversion successful
conversion_ok,
// partial character in source, but hit end
source_exhausted,
// partial character in source, but hit end
source_exhausted,
// insuff. room in target for conversion
target_exhausted,
// insuff. room in target for conversion
target_exhausted,
// source sequence is illegal/malformed
source_illegal
};
// source sequence is illegal/malformed
source_illegal
};
// hidden
TORRENT_EXPORT error_code make_error_code(error_code_enum e);
}
TORRENT_EXPORT boost::system::error_category const& utf8_category();
// ``utf8_wchar`` converts a UTF-8 string (``utf8``) to a wide character
// string (``wide``). ``wchar_utf8`` converts a wide character string
// (``wide``) to a UTF-8 string (``utf8``). The return value is one of
// the enumeration values from utf8_conv_result_t.
TORRENT_EXTRA_EXPORT utf8_conv_result_t utf8_wchar(
const std::string &utf8, std::wstring &wide);
TORRENT_EXTRA_EXPORT utf8_conv_result_t wchar_utf8(
const std::wstring &wide, std::string &utf8);
TORRENT_EXTRA_EXPORT std::wstring utf8_wchar(string_view utf8, error_code& ec);
TORRENT_EXTRA_EXPORT std::wstring utf8_wchar(string_view utf8);
TORRENT_EXTRA_EXPORT std::string wchar_utf8(wstring_view wide, error_code& ec);
TORRENT_EXTRA_EXPORT std::string wchar_utf8(wstring_view wide);
}
#endif // !BOOST_NO_STD_WSTRING

View File

@ -197,8 +197,7 @@ namespace libtorrent
void add_files(file_storage& fs, std::wstring const& wfile
, std::function<bool(std::string)> p, std::uint32_t flags)
{
std::string utf8;
wchar_utf8(wfile, utf8);
std::string utf8 = wchar_utf8(wfile);
add_files_impl(fs, parent_path(complete(utf8))
, filename(utf8), p, flags);
}
@ -206,8 +205,7 @@ namespace libtorrent
void add_files(file_storage& fs
, std::wstring const& wfile, std::uint32_t flags)
{
std::string utf8;
wchar_utf8(wfile, utf8);
std::string utf8 = wchar_utf8(wfile);
add_files_impl(fs, parent_path(complete(utf8))
, filename(utf8), default_pred, flags);
}
@ -215,16 +213,14 @@ namespace libtorrent
void set_piece_hashes(create_torrent& t, std::wstring const& p
, std::function<void(int)> f, error_code& ec)
{
std::string utf8;
wchar_utf8(p, utf8);
std::string utf8 = wchar_utf8(p);
set_piece_hashes(t, utf8, f, ec);
}
void set_piece_hashes_deprecated(create_torrent& t, std::wstring const& p
, std::function<void(int)> f, error_code& ec)
{
std::string utf8;
wchar_utf8(p, utf8);
std::string utf8 = wchar_utf8(p);
set_piece_hashes(t, utf8, f, ec);
}
#endif

View File

@ -464,16 +464,16 @@ namespace libtorrent
#if defined TORRENT_WINDOWS && TORRENT_USE_WSTRING
std::wstring convert_to_wstring(std::string const& s)
{
std::wstring ret;
int result = libtorrent::utf8_wchar(s, ret);
if (result == 0) return ret;
error_code ec;
std::wstring ret = libtorrent::utf8_wchar(s, ec);
if (!ec) return ret;
ret.clear();
const char* end = &s[0] + s.size();
for (const char* i = &s[0]; i < end;)
{
wchar_t c = '.';
result = std::mbtowc(&c, i, end - i);
int const result = std::mbtowc(&c, i, end - i);
if (result > 0) i += result;
else ++i;
ret += c;
@ -483,9 +483,9 @@ namespace libtorrent
std::string convert_from_wstring(std::wstring const& s)
{
std::string ret;
int result = libtorrent::wchar_utf8(s, ret);
if (result == 0) return ret;
error_code ec;
std::string ret = libtorrent::wchar_utf8(s, ec);
if (!ec) return ret;
ret.clear();
const wchar_t* end = &s[0] + s.size();
@ -493,7 +493,7 @@ namespace libtorrent
{
char c[10];
TORRENT_ASSERT(sizeof(c) >= MB_CUR_MAX);
result = std::wctomb(c, *i);
int const result = std::wctomb(c, *i);
if (result > 0)
{
i += result;
@ -562,8 +562,7 @@ namespace libtorrent
std::string convert_to_native(std::string const& s)
{
std::wstring ws;
libtorrent::utf8_wchar(s, ws);
std::wstring ws = libtorrent::utf8_wchar(s);
std::string ret;
ret.resize(ws.size() * 4 + 1);
std::size_t size = WideCharToMultiByte(CP_ACP, 0, ws.c_str(), -1, &ret[0], int(ret.size()), nullptr, nullptr);
@ -581,17 +580,14 @@ namespace libtorrent
if (size == std::size_t(-1)) return s;
if (size != 0 && ws[size - 1] == '\0') --size;
ws.resize(size);
std::string ret;
libtorrent::wchar_utf8(ws, ret);
return ret;
return libtorrent::wchar_utf8(ws);
}
#elif TORRENT_USE_LOCALE
std::string convert_to_native(std::string const& s)
{
std::wstring ws;
libtorrent::utf8_wchar(s, ws);
std::wstring ws = libtorrent::utf8_wchar(s);
std::size_t size = wcstombs(0, ws.c_str(), 0);
if (size == std::size_t(-1)) return s;
std::string ret;
@ -609,8 +605,7 @@ namespace libtorrent
std::size_t size = mbstowcs(&ws[0], s.c_str(), s.size());
if (size == std::size_t(-1)) return s;
std::string ret;
libtorrent::wchar_utf8(ws, ret);
return ret;
return libtorrent::wchar_utf8(ws);
}
#endif

View File

@ -353,25 +353,19 @@ namespace libtorrent
#if TORRENT_USE_WSTRING
void file_storage::set_name(std::wstring const& n)
{
std::string utf8;
wchar_utf8(n, utf8);
m_name = utf8;
m_name = wchar_utf8(n);
}
void file_storage::rename_file_deprecated(int index, std::wstring const& new_filename)
{
TORRENT_ASSERT_PRECOND(index >= 0 && index < int(m_files.size()));
std::string utf8;
wchar_utf8(new_filename, utf8);
update_path_index(m_files[index], utf8);
update_path_index(m_files[index], wchar_utf8(new_filename));
}
void file_storage::add_file(std::wstring const& file, std::int64_t file_size
, int file_flags, std::time_t mtime, string_view symlink_path)
{
std::string utf8;
wchar_utf8(file, utf8);
add_file(utf8, file_size, file_flags, mtime, symlink_path);
add_file(wchar_utf8(file), file_size, file_flags, mtime, symlink_path);
}
void file_storage::rename_file(int index, std::wstring const& new_filename)

View File

@ -186,16 +186,12 @@ namespace libtorrent
void torrent_handle::move_storage(
std::wstring const& save_path, int flags) const
{
std::string utf8;
wchar_utf8(save_path, utf8);
async_call(&torrent::move_storage, utf8, flags);
async_call(&torrent::move_storage, wchar_utf8(save_path), flags);
}
void torrent_handle::rename_file(int index, std::wstring const& new_name) const
{
std::string utf8;
wchar_utf8(new_name, utf8);
async_call(&torrent::rename_file, index, utf8);
async_call(&torrent::rename_file, index, wchar_utf8(new_name));
}
#endif // TORRENT_NO_DEPRECATE
#endif // TORRENT_USE_WSTRING

View File

@ -891,10 +891,8 @@ namespace libtorrent
, int const flags)
{
std::vector<char> buf;
std::string utf8;
wchar_utf8(filename, utf8);
error_code ec;
int ret = load_file(utf8, buf, ec);
int ret = load_file(wchar_utf8(filename), buf, ec);
if (ret < 0) throw system_error(ec);
bdecode_node e;
@ -961,9 +959,7 @@ namespace libtorrent
, int const flags)
{
std::vector<char> buf;
std::string utf8;
wchar_utf8(filename, utf8);
int ret = load_file(utf8, buf, ec);
int ret = load_file(wchar_utf8(filename), buf, ec);
if (ret < 0) return;
bdecode_node e;

View File

@ -38,6 +38,8 @@ POSSIBILITY OF SUCH DAMAGE.
#include <iterator>
#include "libtorrent/utf8.hpp"
#include "libtorrent/assert.hpp"
#include "libtorrent/error_code.hpp"
#include "libtorrent/ConvertUTF.h"
@ -54,14 +56,14 @@ namespace libtorrent
template<int width>
struct convert_to_wide
{
static utf8_conv_result_t convert(UTF8 const** src_start
static utf8_errors::error_code_enum convert(UTF8 const** src_start
, UTF8 const* src_end
, std::wstring& wide)
{
TORRENT_UNUSED(src_start);
TORRENT_UNUSED(src_end);
TORRENT_UNUSED(wide);
return source_illegal;
return utf8_errors::error_code_enum::source_illegal;
}
};
@ -69,7 +71,7 @@ namespace libtorrent
template<>
struct convert_to_wide<4>
{
static utf8_conv_result_t convert(char const** src_start
static utf8_errors::error_code_enum convert(char const** src_start
, char const* src_end
, std::wstring& wide)
{
@ -87,10 +89,10 @@ namespace libtorrent
std::copy(reinterpret_cast<std::uint8_t const*>(*src_start)
,reinterpret_cast<std::uint8_t const*>(src_end)
, std::back_inserter(wide));
return static_cast<utf8_conv_result_t>(ret);
return static_cast<utf8_errors::error_code_enum>(ret);
}
wide.resize(dst_start - wide.c_str());
return static_cast<utf8_conv_result_t>(ret);
return static_cast<utf8_errors::error_code_enum>(ret);
}
};
@ -98,7 +100,7 @@ namespace libtorrent
template<>
struct convert_to_wide<2>
{
static utf8_conv_result_t convert(char const** src_start
static utf8_errors::error_code_enum convert(char const** src_start
, char const* src_end
, std::wstring& wide)
{
@ -116,10 +118,10 @@ namespace libtorrent
std::copy(reinterpret_cast<std::uint8_t const*>(*src_start)
, reinterpret_cast<std::uint8_t const*>(src_end)
, std::back_inserter(wide));
return static_cast<utf8_conv_result_t>(ret);
return static_cast<utf8_errors::error_code_enum>(ret);
}
wide.resize(dst_start - wide.c_str());
return static_cast<utf8_conv_result_t>(ret);
return static_cast<utf8_errors::error_code_enum>(ret);
}
};
@ -127,14 +129,14 @@ namespace libtorrent
template<int width>
struct convert_from_wide
{
static utf8_conv_result_t convert(wchar_t const** src_start
static utf8_errors::error_code_enum convert(wchar_t const** src_start
, wchar_t const* src_end
, std::string& utf8)
{
TORRENT_UNUSED(src_start);
TORRENT_UNUSED(src_end);
TORRENT_UNUSED(utf8);
return source_illegal;
return utf8_errors::error_code_enum::source_illegal;
}
};
@ -142,7 +144,7 @@ namespace libtorrent
template<>
struct convert_from_wide<4>
{
static utf8_conv_result_t convert(wchar_t const** src_start
static utf8_errors::error_code_enum convert(wchar_t const** src_start
, wchar_t const* src_end
, std::string& utf8)
{
@ -154,7 +156,7 @@ namespace libtorrent
, reinterpret_cast<UTF8*>(dst_start + utf8.size())
, lenientConversion);
utf8.resize(dst_start - &utf8[0]);
return static_cast<utf8_conv_result_t>(ret);
return static_cast<utf8_errors::error_code_enum>(ret);
}
};
@ -162,7 +164,7 @@ namespace libtorrent
template<>
struct convert_from_wide<2>
{
static utf8_conv_result_t convert(wchar_t const** src_start
static utf8_errors::error_code_enum convert(wchar_t const** src_start
, wchar_t const* src_end
, std::string& utf8)
{
@ -174,28 +176,103 @@ namespace libtorrent
, reinterpret_cast<UTF8*>(dst_start + utf8.size())
, lenientConversion);
utf8.resize(dst_start - &utf8[0]);
return static_cast<utf8_conv_result_t>(ret);
return static_cast<utf8_errors::error_code_enum>(ret);
}
};
struct utf8_error_category : boost::system::error_category
{
const char* name() const BOOST_SYSTEM_NOEXCEPT override
{
return "UTF error";
}
std::string message(int ev) const BOOST_SYSTEM_NOEXCEPT override
{
char const* error_messages[] = {
"ok",
"source exhausted",
"target exhausted",
"source illegal"
};
TORRENT_ASSERT(ev >= 0);
TORRENT_ASSERT(ev < sizeof(error_messages)/sizeof(error_messages[0]));
return error_messages[ev];
}
boost::system::error_condition default_error_condition(
int ev) const BOOST_SYSTEM_NOEXCEPT override
{
return boost::system::error_condition(ev, *this);
}
};
} // anonymous namespace
utf8_conv_result_t utf8_wchar(std::string const& utf8, std::wstring &wide)
namespace utf8_errors
{
// allocate space for worst-case
wide.resize(utf8.size());
char const* src_start = utf8.c_str();
return convert_to_wide<sizeof(wchar_t)>::convert(
&src_start, src_start + utf8.size(), wide);
boost::system::error_code make_error_code(utf8_errors::error_code_enum e)
{
return error_code(e, utf8_category());
}
} // utf_errors namespace
boost::system::error_category const& utf8_category()
{
static utf8_error_category cat;
return cat;
}
utf8_conv_result_t wchar_utf8(std::wstring const& wide, std::string &utf8)
std::wstring utf8_wchar(string_view utf8, error_code& ec)
{
// allocate space for worst-case
std::wstring wide;
wide.resize(utf8.size());
char const* src_start = utf8.data();
utf8_errors::error_code_enum const ret = convert_to_wide<sizeof(wchar_t)>::convert(
&src_start, src_start + utf8.size(), wide);
if (ret != utf8_errors::error_code_enum::conversion_ok)
ec = make_error_code(ret);
return wide;
}
std::wstring utf8_wchar(string_view wide)
{
error_code ec;
std::wstring ret = utf8_wchar(wide, ec);
#ifndef BOOST_NO_EXCEPTIONS
if (ec) throw system_error(ec);
#else
if (ec) std::terminate();
#endif
return ret;
}
std::string wchar_utf8(wstring_view wide, error_code& ec)
{
// allocate space for worst-case
std::string utf8;
utf8.resize(wide.size() * 6);
if (wide.empty()) return conversion_ok;
wchar_t const* src_start = wide.c_str();
return convert_from_wide<sizeof(wchar_t)>::convert(
if (wide.empty()) return {};
wchar_t const* src_start = wide.data();
utf8_errors::error_code_enum const ret = convert_from_wide<sizeof(wchar_t)>::convert(
&src_start, src_start + wide.size(), utf8);
if (ret != utf8_errors::error_code_enum::conversion_ok)
ec = make_error_code(ret);
return utf8;
}
std::string wchar_utf8(wstring_view wide)
{
error_code ec;
std::string ret = wchar_utf8(wide, ec);
#ifndef BOOST_NO_EXCEPTIONS
if (ec) throw system_error(ec);
#else
if (ec) std::terminate();
#endif
return ret;
}
}

View File

@ -242,13 +242,8 @@ TORRENT_TEST(utf8)
std::string utf8;
std::copy(utf8_source.begin(), utf8_source.end(), std::back_inserter(utf8));
std::wstring wide;
utf8_conv_result_t ret = utf8_wchar(utf8, wide);
TEST_EQUAL(ret, conversion_ok);
std::string identity;
ret = wchar_utf8(wide, identity);
TEST_EQUAL(ret, conversion_ok);
std::wstring const wide = utf8_wchar(utf8);
std::string const identity = wchar_utf8(wide);
TEST_EQUAL(utf8, identity);
}
@ -261,8 +256,9 @@ TORRENT_TEST(invalid_encoding)
0xee, 0xf1, 0x2e, 0x32, 0x30, 0x31, 0x34, 0x2e, 0x42, 0x44, 0x52, 0x69,
0x70, 0x2e, 0x31, 0x30, 0x38, 0x30, 0x70, 0x2e, 0x6d, 0x6b, 0x76, 0x00
};
std::wstring wide;
utf8_wchar((const char*)test_string, wide);
error_code ec;
std::wstring wide = utf8_wchar((char const*)test_string, ec);
TEST_CHECK(ec);
std::wstring cmp_wide;
std::copy(test_string, test_string + sizeof(test_string) - 1,