use span in parse_utf8_codepoint

This commit is contained in:
arvidn 2017-09-02 00:27:47 +02:00 committed by Arvid Norberg
parent 11f008e90e
commit f8ba4f480f
3 changed files with 7 additions and 8 deletions

View File

@ -78,7 +78,7 @@ namespace libtorrent {
// TODO: 3 take a string_view here
TORRENT_EXTRA_EXPORT std::pair<std::int32_t, int>
parse_utf8_codepoint(char const* str, int len);
parse_utf8_codepoint(string_view str);
}
#endif

View File

@ -236,8 +236,7 @@ namespace libtorrent {
for (std::size_t i = 0; i < element.size(); i += std::size_t(seq_len))
{
std::int32_t code_point;
std::tie(code_point, seq_len) = parse_utf8_codepoint(element.data() + i
, int(element.size() - i));
std::tie(code_point, seq_len) = parse_utf8_codepoint(element.substr(i));
if (code_point >= 0 && filter_path_character(code_point))
{

View File

@ -267,17 +267,17 @@ namespace {
// returns the unicode codepoint and the number of bytes of the utf8 sequence
// that was parsed. The codepoint is -1 if it's invalid
std::pair<std::int32_t, int> parse_utf8_codepoint(char const* str, int const len)
std::pair<std::int32_t, int> parse_utf8_codepoint(string_view str)
{
int const sequence_len = trailingBytesForUTF8[static_cast<std::uint8_t>(*str)] + 1;
if (sequence_len > len) return std::make_pair(-1, len);
int const sequence_len = trailingBytesForUTF8[static_cast<std::uint8_t>(str[0])] + 1;
if (sequence_len > int(str.size())) return std::make_pair(-1, static_cast<int>(str.size()));
if (sequence_len > 4)
{
return std::make_pair(-1, sequence_len);
}
if (!isLegalUTF8(reinterpret_cast<UTF8 const*>(str), sequence_len))
if (!isLegalUTF8(reinterpret_cast<UTF8 const*>(str.data()), sequence_len))
{
return std::make_pair(-1, sequence_len);
}
@ -286,7 +286,7 @@ namespace {
for (int i = 0; i < sequence_len; ++i)
{
ch <<= 6;
ch += static_cast<std::uint8_t>(str[i]);
ch += static_cast<std::uint8_t>(str[static_cast<std::size_t>(i)]);
}
ch -= offsetsFromUTF8[sequence_len-1];