merge RC_1_1 into master
This commit is contained in:
commit
73a3e390b5
|
@ -73,6 +73,8 @@
|
||||||
* require C++11 to build libtorrent
|
* require C++11 to build libtorrent
|
||||||
|
|
||||||
|
|
||||||
|
* improve path sanitization (filter unicode text direction characters)
|
||||||
|
* deprecate partial_piece_info::piece_state
|
||||||
* bind upnp requests to correct local address
|
* bind upnp requests to correct local address
|
||||||
* save resume data when removing web seeds
|
* save resume data when removing web seeds
|
||||||
* fix proxying of https connections
|
* fix proxying of https connections
|
||||||
|
|
|
@ -134,6 +134,11 @@ TORRENT_EXTRA_EXPORT ConversionResult ConvertUTF32toUTF16 (
|
||||||
TORRENT_EXTRA_EXPORT Boolean isLegalUTF8Sequence(const UTF8 *source,
|
TORRENT_EXTRA_EXPORT Boolean isLegalUTF8Sequence(const UTF8 *source,
|
||||||
const UTF8 *sourceEnd);
|
const UTF8 *sourceEnd);
|
||||||
|
|
||||||
|
TORRENT_EXTRA_EXPORT Boolean isLegalUTF8(const UTF8 *source, int length);
|
||||||
|
|
||||||
|
extern const char trailingBytesForUTF8[256];
|
||||||
|
extern const UTF32 offsetsFromUTF8[6];
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -177,6 +177,7 @@ namespace libtorrent { namespace aux {
|
||||||
// get_download_queue() is called, it will be invalidated.
|
// get_download_queue() is called, it will be invalidated.
|
||||||
block_info* blocks;
|
block_info* blocks;
|
||||||
|
|
||||||
|
#ifndef TORRENT_NO_DEPRECATE
|
||||||
// the speed classes. These may be used by the piece picker to
|
// the speed classes. These may be used by the piece picker to
|
||||||
// coalesce requests of similar download rates
|
// coalesce requests of similar download rates
|
||||||
enum state_t { none, slow, medium, fast };
|
enum state_t { none, slow, medium, fast };
|
||||||
|
@ -193,7 +194,12 @@ namespace libtorrent { namespace aux {
|
||||||
// downloaded pieces down. Pieces set to ``none`` can be converted into
|
// downloaded pieces down. Pieces set to ``none`` can be converted into
|
||||||
// any of ``fast``, ``medium`` or ``slow`` as soon as a peer want to
|
// any of ``fast``, ``medium`` or ``slow`` as soon as a peer want to
|
||||||
// download from it.
|
// download from it.
|
||||||
state_t piece_state;
|
state_t TORRENT_DEPRECATED_MEMBER piece_state;
|
||||||
|
#else
|
||||||
|
// hidden
|
||||||
|
enum deprecated_state_t { none, slow, medium, fast };
|
||||||
|
deprecated_state_t deprecated_piece_state;
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
// for std::hash (and to support using this type in unordered_map etc.)
|
// for std::hash (and to support using this type in unordered_map etc.)
|
||||||
|
|
|
@ -39,6 +39,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||||
// convert_to_native and convert_from_native
|
// convert_to_native and convert_from_native
|
||||||
#if TORRENT_USE_WSTRING || defined TORRENT_WINDOWS
|
#if TORRENT_USE_WSTRING || defined TORRENT_WINDOWS
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <cwchar>
|
#include <cwchar>
|
||||||
|
|
||||||
|
@ -78,6 +79,10 @@ namespace libtorrent {
|
||||||
TORRENT_EXTRA_EXPORT std::wstring utf8_wchar(string_view utf8);
|
TORRENT_EXTRA_EXPORT std::wstring utf8_wchar(string_view utf8);
|
||||||
TORRENT_EXTRA_EXPORT std::string wchar_utf8(wstring_view wide, error_code& ec);
|
TORRENT_EXTRA_EXPORT std::string wchar_utf8(wstring_view wide, error_code& ec);
|
||||||
TORRENT_EXTRA_EXPORT std::string wchar_utf8(wstring_view wide);
|
TORRENT_EXTRA_EXPORT std::string wchar_utf8(wstring_view wide);
|
||||||
|
|
||||||
|
// TODO: 3 take a string_view here
|
||||||
|
TORRENT_EXTRA_EXPORT std::pair<std::int32_t, int>
|
||||||
|
parse_utf8_codepoint(char const* str, int len);
|
||||||
}
|
}
|
||||||
#endif // !BOOST_NO_STD_WSTRING
|
#endif // !BOOST_NO_STD_WSTRING
|
||||||
|
|
||||||
|
|
|
@ -171,7 +171,7 @@ if (result == sourceIllegal) {
|
||||||
* left as-is for anyone who may want to do such conversion, which was
|
* left as-is for anyone who may want to do such conversion, which was
|
||||||
* allowed in earlier algorithms.
|
* allowed in earlier algorithms.
|
||||||
*/
|
*/
|
||||||
static const char trailingBytesForUTF8[256] = {
|
const char trailingBytesForUTF8[256] = {
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
@ -187,7 +187,7 @@ static const char trailingBytesForUTF8[256] = {
|
||||||
* This table contains as many values as there might be trailing bytes
|
* This table contains as many values as there might be trailing bytes
|
||||||
* in a UTF-8 sequence.
|
* in a UTF-8 sequence.
|
||||||
*/
|
*/
|
||||||
static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
|
const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
|
||||||
0x03C82080UL, 0xFA082080UL, 0x82082080UL };
|
0x03C82080UL, 0xFA082080UL, 0x82082080UL };
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -292,7 +292,7 @@ ConversionResult ConvertUTF16toUTF8 (
|
||||||
* definition of UTF-8 goes up to 4-byte sequences.
|
* definition of UTF-8 goes up to 4-byte sequences.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static Boolean isLegalUTF8(const UTF8 *source, int length) {
|
Boolean isLegalUTF8(const UTF8 *source, int length) {
|
||||||
UTF8 a;
|
UTF8 a;
|
||||||
const UTF8 *srcptr = source+length;
|
const UTF8 *srcptr = source+length;
|
||||||
switch (length) {
|
switch (length) {
|
||||||
|
|
|
@ -6283,6 +6283,11 @@ namespace libtorrent {
|
||||||
pi.finished = int(i->finished);
|
pi.finished = int(i->finished);
|
||||||
pi.writing = int(i->writing);
|
pi.writing = int(i->writing);
|
||||||
pi.requested = int(i->requested);
|
pi.requested = int(i->requested);
|
||||||
|
#ifndef TORRENT_NO_DEPRECATE
|
||||||
|
pi.piece_state = partial_piece_info::none;
|
||||||
|
#else
|
||||||
|
pi.deprecated_piece_state = partial_piece_info::none;
|
||||||
|
#endif
|
||||||
TORRENT_ASSERT(counter * blocks_per_piece + pi.blocks_in_piece <= int(blk.size()));
|
TORRENT_ASSERT(counter * blocks_per_piece + pi.blocks_in_piece <= int(blk.size()));
|
||||||
pi.blocks = &blk[std::size_t(counter * blocks_per_piece)];
|
pi.blocks = &blk[std::size_t(counter * blocks_per_piece)];
|
||||||
int const piece_size = torrent_file().piece_size(i->index);
|
int const piece_size = torrent_file().piece_size(i->index);
|
||||||
|
|
|
@ -60,24 +60,44 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
|
#include <cstdint>
|
||||||
#include <iterator>
|
#include <iterator>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <ctime>
|
#include <ctime>
|
||||||
|
#include <array>
|
||||||
|
|
||||||
namespace libtorrent {
|
namespace libtorrent {
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
bool valid_path_character(char c)
|
bool valid_path_character(std::int32_t const c)
|
||||||
{
|
{
|
||||||
#ifdef TORRENT_WINDOWS
|
#ifdef TORRENT_WINDOWS
|
||||||
static const char invalid_chars[] = "?<>\"|\b*:";
|
static const char invalid_chars[] = "?<>\"|\b*:";
|
||||||
#else
|
#else
|
||||||
static const char invalid_chars[] = "";
|
static const char invalid_chars[] = "";
|
||||||
#endif
|
#endif
|
||||||
if (c >= 0 && c < 32) return false;
|
if (c < 32) return false;
|
||||||
return std::strchr(invalid_chars, c) == nullptr;
|
if (c > 127) return true;
|
||||||
|
return std::strchr(invalid_chars, static_cast<char>(c)) == nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool filter_path_character(std::int32_t const c)
|
||||||
|
{
|
||||||
|
// these unicode characters change the writing writing direction of the
|
||||||
|
// string and can be used for attacks:
|
||||||
|
// https://security.stackexchange.com/questions/158802/how-can-this-executable-have-an-avi-extension
|
||||||
|
static const std::array<std::int32_t, 7> bad_cp = {{0x202a, 0x202b, 0x202c, 0x202d, 0x202e, 0x200e, 0x200f}};
|
||||||
|
if (std::find(bad_cp.begin(), bad_cp.end(), c) != bad_cp.end()) return true;
|
||||||
|
|
||||||
|
#ifdef TORRENT_WINDOWS
|
||||||
|
static const char invalid_chars[] = "/\\:";
|
||||||
|
#else
|
||||||
|
static const char invalid_chars[] = "/\\";
|
||||||
|
#endif
|
||||||
|
if (c > 127) return false;
|
||||||
|
return std::strchr(invalid_chars, static_cast<char>(c)) != NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // anonymous namespace
|
} // anonymous namespace
|
||||||
|
@ -209,118 +229,36 @@ namespace libtorrent {
|
||||||
// the number of dots we've added
|
// the number of dots we've added
|
||||||
char num_dots = 0;
|
char num_dots = 0;
|
||||||
bool found_extension = false;
|
bool found_extension = false;
|
||||||
for (std::size_t i = 0; i < element.size(); ++i)
|
|
||||||
|
int seq_len = 0;
|
||||||
|
for (std::size_t i = 0; i < element.size(); i += std::size_t(seq_len))
|
||||||
{
|
{
|
||||||
if (element[i] == '/'
|
std::int32_t code_point;
|
||||||
|| element[i] == '\\'
|
std::tie(code_point, seq_len) = parse_utf8_codepoint(element.data() + i
|
||||||
#ifdef TORRENT_WINDOWS
|
, int(element.size() - i));
|
||||||
|| element[i] == ':'
|
|
||||||
#endif
|
if (code_point >= 0 && filter_path_character(code_point))
|
||||||
)
|
{
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (element[i] == '.') ++num_dots;
|
if (code_point < 0
|
||||||
|
|| !valid_path_character(code_point))
|
||||||
int last_len = 0;
|
|
||||||
|
|
||||||
if ((element[i] & 0x80) == 0)
|
|
||||||
{
|
|
||||||
// 1 byte
|
|
||||||
if (valid_path_character(element[i]))
|
|
||||||
{
|
|
||||||
path += element[i];
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
path += '_';
|
|
||||||
}
|
|
||||||
last_len = 1;
|
|
||||||
}
|
|
||||||
else if ((element[i] & 0xe0) == 0xc0)
|
|
||||||
{
|
|
||||||
// 2 bytes
|
|
||||||
if (element.size() - i < 2
|
|
||||||
|| (element[i + 1] & 0xc0) != 0x80)
|
|
||||||
{
|
|
||||||
path += '_';
|
|
||||||
last_len = 1;
|
|
||||||
}
|
|
||||||
else if ((element[i] & 0x1f) == 0)
|
|
||||||
{
|
|
||||||
// overlong sequences are invalid
|
|
||||||
path += '_';
|
|
||||||
last_len = 1;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
path += element[i];
|
|
||||||
path += element[i + 1];
|
|
||||||
last_len = 2;
|
|
||||||
}
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
else if ((element[i] & 0xf0) == 0xe0)
|
|
||||||
{
|
|
||||||
// 3 bytes
|
|
||||||
if (element.size() - i < 3
|
|
||||||
|| (element[i + 1] & 0xc0) != 0x80
|
|
||||||
|| (element[i + 2] & 0xc0) != 0x80
|
|
||||||
)
|
|
||||||
{
|
|
||||||
path += '_';
|
|
||||||
last_len = 1;
|
|
||||||
}
|
|
||||||
else if ((element[i] & 0x0f) == 0)
|
|
||||||
{
|
|
||||||
// overlong sequences are invalid
|
|
||||||
path += '_';
|
|
||||||
last_len = 1;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
path += element[i];
|
|
||||||
path += element[i + 1];
|
|
||||||
path += element[i + 2];
|
|
||||||
last_len = 3;
|
|
||||||
}
|
|
||||||
i += 2;
|
|
||||||
}
|
|
||||||
else if ((element[i] & 0xf8) == 0xf0)
|
|
||||||
{
|
|
||||||
// 4 bytes
|
|
||||||
if (element.size() - i < 4
|
|
||||||
|| (element[i + 1] & 0xc0) != 0x80
|
|
||||||
|| (element[i + 2] & 0xc0) != 0x80
|
|
||||||
|| (element[i + 3] & 0xc0) != 0x80
|
|
||||||
)
|
|
||||||
{
|
|
||||||
path += '_';
|
|
||||||
last_len = 1;
|
|
||||||
}
|
|
||||||
else if ((element[i] & 0x07) == 0
|
|
||||||
&& (element[i + 1] & 0x3f) == 0)
|
|
||||||
{
|
|
||||||
// overlong sequences are invalid
|
|
||||||
path += '_';
|
|
||||||
last_len = 1;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
path += element[i];
|
|
||||||
path += element[i + 1];
|
|
||||||
path += element[i + 2];
|
|
||||||
path += element[i + 3];
|
|
||||||
last_len = 4;
|
|
||||||
}
|
|
||||||
i += 3;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
|
// invalid utf8 sequence, replace with "_"
|
||||||
path += '_';
|
path += '_';
|
||||||
last_len = 1;
|
++added;
|
||||||
|
++unicode_chars;
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
added += last_len;
|
// validation passed, add it to the output string
|
||||||
|
for (std::size_t k = i; k < i + std::size_t(seq_len); ++k)
|
||||||
|
path.push_back(element[k]);
|
||||||
|
|
||||||
|
if (code_point == '.') ++num_dots;
|
||||||
|
|
||||||
|
added += seq_len;
|
||||||
++unicode_chars;
|
++unicode_chars;
|
||||||
|
|
||||||
// any given path element should not
|
// any given path element should not
|
||||||
|
|
33
src/utf8.cpp
33
src/utf8.cpp
|
@ -268,6 +268,39 @@ namespace {
|
||||||
if (ec) aux::throw_ex<system_error>(ec);
|
if (ec) aux::throw_ex<system_error>(ec);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// returns the unicode codepoint and the number of bytes of the utf8 sequence
|
||||||
|
// that was parsed. The codepoint is -1 if it's invalid
|
||||||
|
std::pair<std::int32_t, int> parse_utf8_codepoint(char const* str, int const len)
|
||||||
|
{
|
||||||
|
int const sequence_len = trailingBytesForUTF8[static_cast<std::uint8_t>(*str)] + 1;
|
||||||
|
if (sequence_len > len) return std::make_pair(-1, len);
|
||||||
|
|
||||||
|
if (sequence_len > 4)
|
||||||
|
{
|
||||||
|
return std::make_pair(-1, sequence_len);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!isLegalUTF8(reinterpret_cast<UTF8 const*>(str), sequence_len))
|
||||||
|
{
|
||||||
|
return std::make_pair(-1, sequence_len);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::uint32_t ch = 0;
|
||||||
|
for (int i = 0; i < sequence_len; ++i)
|
||||||
|
{
|
||||||
|
ch <<= 6;
|
||||||
|
ch += static_cast<std::uint8_t>(str[i]);
|
||||||
|
}
|
||||||
|
ch -= offsetsFromUTF8[sequence_len-1];
|
||||||
|
|
||||||
|
if (ch > 0x7fffffff)
|
||||||
|
{
|
||||||
|
return std::make_pair(-1, sequence_len);
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::make_pair(static_cast<std::int32_t>(ch), sequence_len);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __clang__
|
#ifdef __clang__
|
||||||
|
|
|
@ -493,7 +493,7 @@ TORRENT_TEST(sanitize_path)
|
||||||
// 5-byte utf-8 sequence (not allowed)
|
// 5-byte utf-8 sequence (not allowed)
|
||||||
path.clear();
|
path.clear();
|
||||||
sanitize_append_path_element(path, "filename\xf8\x9f\x9f\x9f\x9f" "foobar");
|
sanitize_append_path_element(path, "filename\xf8\x9f\x9f\x9f\x9f" "foobar");
|
||||||
TEST_EQUAL(path, "filename_____foobar");
|
TEST_EQUAL(path, "filename_foobar");
|
||||||
|
|
||||||
// redundant (overlong) 2-byte sequence
|
// redundant (overlong) 2-byte sequence
|
||||||
// ascii code 0x2e encoded with a leading 0
|
// ascii code 0x2e encoded with a leading 0
|
||||||
|
@ -512,6 +512,23 @@ TORRENT_TEST(sanitize_path)
|
||||||
path.clear();
|
path.clear();
|
||||||
sanitize_append_path_element(path, "filename\xf0\x80\x80\xae");
|
sanitize_append_path_element(path, "filename\xf0\x80\x80\xae");
|
||||||
TEST_EQUAL(path, "filename_");
|
TEST_EQUAL(path, "filename_");
|
||||||
|
|
||||||
|
// a filename where every character is filtered is not replaced by an understcore
|
||||||
|
path.clear();
|
||||||
|
sanitize_append_path_element(path, "//\\");
|
||||||
|
TEST_EQUAL(path, "");
|
||||||
|
|
||||||
|
// make sure suspicious unicode characters are filtered out
|
||||||
|
path.clear();
|
||||||
|
// that's utf-8 for U+200e LEFT-TO-RIGHT MARK
|
||||||
|
sanitize_append_path_element(path, "foo\xe2\x80\x8e" "bar");
|
||||||
|
TEST_EQUAL(path, "foobar");
|
||||||
|
|
||||||
|
// make sure suspicious unicode characters are filtered out
|
||||||
|
path.clear();
|
||||||
|
// that's utf-8 for U+202b RIGHT-TO-LEFT EMBEDDING
|
||||||
|
sanitize_append_path_element(path, "foo\xe2\x80\xab" "bar");
|
||||||
|
TEST_EQUAL(path, "foobar");
|
||||||
}
|
}
|
||||||
|
|
||||||
TORRENT_TEST(verify_encoding)
|
TORRENT_TEST(verify_encoding)
|
||||||
|
|
Loading…
Reference in New Issue