diff --git a/AUTHORS b/AUTHORS index 39b676e04..cbb4f02e9 100644 --- a/AUTHORS +++ b/AUTHORS @@ -15,6 +15,8 @@ Stas Khirman Ryan Norton Andrew Resch +Thanks to (github user) nervoir for bug reports + Building and maintainance of the autotools scripts: Michael Wojciechowski Peter Koeleman diff --git a/ChangeLog b/ChangeLog index bb23e9298..472d9f775 100644 --- a/ChangeLog +++ b/ChangeLog @@ -76,6 +76,9 @@ * resume data no longer has timestamps of files * require C++11 to build libtorrent + * fix utf-8 encoding check in torrent parser + * fix infinite loop when parsing maliciously crafted torrents + * fix invalid read in parse_int in bdecoder * fix issue with very long tracker- and web seed URLs * don't attempt to create empty files on startup, if they already exist * fix force-recheck issue (new files would not be picked up) diff --git a/include/libtorrent/string_util.hpp b/include/libtorrent/string_util.hpp index abd4e0073..7f44eb81d 100644 --- a/include/libtorrent/string_util.hpp +++ b/include/libtorrent/string_util.hpp @@ -119,6 +119,15 @@ namespace libtorrent { TORRENT_EXTRA_EXPORT bool is_i2p_url(std::string const& url); #endif + + // this can be used as the hash function in std::unordered_* + struct TORRENT_EXTRA_EXPORT string_hash_no_case + { size_t operator()(std::string const& s) const; }; + + // these can be used as the comparison functions in std::map and std::set + struct TORRENT_EXTRA_EXPORT string_eq_no_case + { bool operator()(std::string const& lhs, std::string const& rhs) const; }; + } #endif diff --git a/src/ConvertUTF.cpp b/src/ConvertUTF.cpp index 422e992ba..e5789a1f0 100644 --- a/src/ConvertUTF.cpp +++ b/src/ConvertUTF.cpp @@ -300,7 +300,7 @@ Boolean isLegalUTF8(const UTF8 *source, int length) { /* Everything else falls through when "true"... */ case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; - case 2: if ((a = (*--srcptr)) > 0xBF) return false; + case 2: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; switch (*source) { /* no fall-through in this inner switch */ diff --git a/src/bdecode.cpp b/src/bdecode.cpp index e0f424f90..446306bc6 100644 --- a/src/bdecode.cpp +++ b/src/bdecode.cpp @@ -121,10 +121,12 @@ namespace { } // anonymous namespace - // fills in 'val' with what the string between start and the - // first occurrence of the delimiter is interpreted as an int. - // return the pointer to the delimiter, or 0 if there is a - // parse error. val should be initialized to zero + // reads the string between start and end, or up to the first occurrance of + // 'delimiter', whichever comes first. This string is interpreted as an + // integer which is assigned to 'val'. If there's a non-delimiter and + // non-digit in the range, a parse error is reported in 'ec'. If the value + // cannot be represented by the variable 'val' and overflow error is reported + // by 'ec'. char const* parse_int(char const* start, char const* end, char delimiter , std::int64_t& val, bdecode_errors::error_code_enum& ec) { @@ -150,8 +152,6 @@ namespace { val += digit; ++start; } - if (*start != delimiter) - ec = bdecode_errors::expected_colon; return start; } @@ -663,9 +663,11 @@ namespace { bool negative = false; if (*ptr == '-') negative = true; bdecode_errors::error_code_enum ec = bdecode_errors::no_error; - parse_int(ptr + negative + char const* end = parse_int(ptr + negative , ptr + size, 'e', val, ec); if (ec) return 0; + TORRENT_UNUSED(end); + TORRENT_ASSERT(end < ptr + size); if (negative) val = -val; return val; } @@ -890,6 +892,8 @@ namespace { start = parse_int(start, end, ':', len, e); if (e) TORRENT_FAIL_BDECODE(e); + if (start == end) + TORRENT_FAIL_BDECODE(bdecode_errors::expected_colon); // remaining buffer size excluding ':' ptrdiff_t const buff_size = end - start - 1; diff --git a/src/lazy_bdecode.cpp b/src/lazy_bdecode.cpp index 904a4e8b6..71a6e80d3 100644 --- a/src/lazy_bdecode.cpp +++ b/src/lazy_bdecode.cpp @@ -131,6 +131,8 @@ namespace { start = parse_int(start, end, ':', len, e); if (e) TORRENT_FAIL_BDECODE(e); + if (start == end) + TORRENT_FAIL_BDECODE(bdecode_errors::expected_colon); // remaining buffer size excluding ':' const ptrdiff_t buff_size = end - start - 1; @@ -203,6 +205,8 @@ namespace { start = parse_int(start, end, ':', len, e); if (e) TORRENT_FAIL_BDECODE(e); + if (start == end) + TORRENT_FAIL_BDECODE(bdecode_errors::expected_colon); // remaining buffer size excluding ':' const ptrdiff_t buff_size = end - start - 1; diff --git a/src/string_util.cpp b/src/string_util.cpp index ce7e2bba2..41edc027b 100644 --- a/src/string_util.cpp +++ b/src/string_util.cpp @@ -401,4 +401,27 @@ namespace libtorrent { #endif + std::size_t string_hash_no_case::operator()(std::string const& s) const + { + std::size_t ret = 5381; + for (std::string::const_iterator i = s.begin(); i != s.end(); ++i) + ret = (ret * 33) ^ static_cast(to_lower(*i)); + return ret; + } + + bool string_eq_no_case::operator()(std::string const& lhs, std::string const& rhs) const + { + if (lhs.size() != rhs.size()) return false; + + std::string::const_iterator s1 = lhs.begin(); + std::string::const_iterator s2 = rhs.begin(); + + while (s1 != lhs.end() && s2 != rhs.end()) + { + if (to_lower(*s1) != to_lower(*s2)) return false; + ++s1; + ++s2; + } + return true; + } } diff --git a/src/torrent.cpp b/src/torrent.cpp index 0b700528a..b7ee5a04c 100644 --- a/src/torrent.cpp +++ b/src/torrent.cpp @@ -5897,6 +5897,7 @@ namespace libtorrent { if (is_paused()) return; if (m_ses.is_aborted()) return; + if (is_upload_only()) return; // this web seed may have redirected all files to other URLs, leaving it // having no file left, and there's no longer any point in connecting to diff --git a/src/torrent_info.cpp b/src/torrent_info.cpp index 2f7a72726..0508c06dc 100644 --- a/src/torrent_info.cpp +++ b/src/torrent_info.cpp @@ -244,8 +244,7 @@ namespace libtorrent { continue; } - if (code_point < 0 - || !valid_path_character(code_point)) + if (code_point < 0 || !valid_path_character(code_point)) { // invalid utf8 sequence, replace with "_" path += '_'; @@ -254,9 +253,14 @@ namespace libtorrent { continue; } + TORRENT_ASSERT(isLegalUTF8(reinterpret_cast(element.data() + i), seq_len)); + // validation passed, add it to the output string for (std::size_t k = i; k < i + std::size_t(seq_len); ++k) + { + TORRENT_ASSERT(element[k] != 0); path.push_back(element[k]); + } if (code_point == '.') ++num_dots; @@ -486,46 +490,6 @@ namespace { return true; } - struct string_hash_no_case - { - std::size_t operator()(std::string const& s) const - { - char const* s1 = s.c_str(); - std::size_t ret = 5381; - int c; - - for (;;) - { - c = *s1++; - if (c == 0) - break; - ret = (ret * 33) ^ std::size_t(to_lower(char(c))); - } - - return ret; - } - }; - - struct string_eq_no_case - { - bool operator()(std::string const& lhs, std::string const& rhs) const - { - char c1, c2; - char const* s1 = lhs.c_str(); - char const* s2 = rhs.c_str(); - - while (*s1 != 0 && *s2 != 0) - { - c1 = to_lower(*s1); - c2 = to_lower(*s2); - if (c1 != c2) return false; - ++s1; - ++s2; - } - return *s1 == *s2; - } - }; - // root_dir is the name of the torrent, unless this is a single file // torrent, in which case it's empty. bool extract_files(bdecode_node const& list, file_storage& target @@ -644,7 +608,7 @@ namespace { { // as long as this file already exists // increase the counter - std::uint32_t h = m_files.file_path_hash(i, empty_str); + std::uint32_t const h = m_files.file_path_hash(i, empty_str); if (!files.insert(h).second) { // This filename appears to already exist! diff --git a/src/web_connection_base.cpp b/src/web_connection_base.cpp index 96b7737a8..1b62de42e 100644 --- a/src/web_connection_base.cpp +++ b/src/web_connection_base.cpp @@ -62,6 +62,8 @@ namespace libtorrent { TORRENT_ASSERT(is_outgoing()); + TORRENT_ASSERT(!m_torrent.lock()->is_upload_only()); + // we only want left-over bandwidth // TODO: introduce a web-seed default class which has a low download priority @@ -102,9 +104,10 @@ namespace libtorrent { // which fails because the m_num_connecting count is not consistent until // after we call peer_connection::start m_upload_only = true; - disconnect_if_redundant(); - if (is_disconnecting()) return; peer_connection::start(); + // disconnect_if_redundant must be called after start to keep + // m_num_connecting consistent + disconnect_if_redundant(); } web_connection_base::~web_connection_base() = default; diff --git a/test/test_bdecode.cpp b/test/test_bdecode.cpp index 6484d93f6..1915f6271 100644 --- a/test/test_bdecode.cpp +++ b/test/test_bdecode.cpp @@ -857,8 +857,9 @@ TORRENT_TEST(parse_int) { char b[] = "1234567890e"; std::int64_t val = 0; - bdecode_errors::error_code_enum ec; + bdecode_errors::error_code_enum ec = bdecode_errors::no_error; char const* e = parse_int(b, b + sizeof(b)-1, 'e', val, ec); + TEST_EQUAL(ec, bdecode_errors::no_error); TEST_EQUAL(val, 1234567890); TEST_EQUAL(e, b + sizeof(b) - 2); } @@ -901,7 +902,7 @@ TORRENT_TEST(parse_length_overflow) bdecode_node e; int ret = bdecode(b[i], b[i] + strlen(b[i]), e, ec); TEST_EQUAL(ret, -1); - TEST_CHECK(ec == error_code(bdecode_errors::unexpected_eof)); + TEST_EQUAL(ec, error_code(bdecode_errors::unexpected_eof)); } } @@ -910,9 +911,9 @@ TORRENT_TEST(expected_colon_string) { char b[] = "928"; std::int64_t val = 0; - bdecode_errors::error_code_enum ec; + bdecode_errors::error_code_enum ec = bdecode_errors::no_error; char const* e = parse_int(b, b + sizeof(b)-1, ':', val, ec); - TEST_EQUAL(ec, bdecode_errors::expected_colon); + TEST_EQUAL(ec, bdecode_errors::no_error); TEST_EQUAL(e, b + 3); } @@ -1395,6 +1396,21 @@ TORRENT_TEST(partial_parse4) TEST_EQUAL(print_entry(e), "{ 'a': 1, 'b': 'foo', 'c': [ 1 ] }"); } +TORRENT_TEST(partial_parse_string) +{ + // it's important to not have a null terminator here + // to allow address sanitizer to trigger in case the decoder reads past the + // end + char b[] = { '5', '5'}; + + bdecode_node e; + error_code ec; + int pos; + int ret = bdecode(b, b + sizeof(b), e, ec, &pos); + TEST_EQUAL(ret, -1); + TEST_EQUAL(pos, 2); +} + // test switch_underlying_buffer TORRENT_TEST(switch_buffer) { diff --git a/test/test_bencoding.cpp b/test/test_bencoding.cpp index 1ef5edf13..ea26b8a4c 100644 --- a/test/test_bencoding.cpp +++ b/test/test_bencoding.cpp @@ -579,8 +579,9 @@ TORRENT_TEST(lazy_entry) { char b[] = "1234567890e"; std::int64_t val = 0; - bdecode_errors::error_code_enum ec; + bdecode_errors::error_code_enum ec = bdecode_errors::no_error; char const* e = parse_int(b, b + sizeof(b)-1, 'e', val, ec); + TEST_CHECK(ec == bdecode_errors::no_error); TEST_EQUAL(val, 1234567890); TEST_EQUAL(e, b + sizeof(b) - 2); } @@ -607,9 +608,9 @@ TORRENT_TEST(lazy_entry) { char b[] = "928"; std::int64_t val = 0; - bdecode_errors::error_code_enum ec; + bdecode_errors::error_code_enum ec = bdecode_errors::no_error; char const* e = parse_int(b, b + sizeof(b)-1, ':', val, ec); - TEST_CHECK(ec == bdecode_errors::expected_colon); + TEST_CHECK(ec == bdecode_errors::no_error); TEST_EQUAL(e, b + 3); } diff --git a/test/test_string.cpp b/test/test_string.cpp index ae8f10704..60e5bd031 100644 --- a/test/test_string.cpp +++ b/test/test_string.cpp @@ -412,3 +412,39 @@ TORRENT_TEST(i2p_url) TEST_CHECK(!is_i2p_url("http://i2p/foo bar")); } #endif + +TORRENT_TEST(string_hash_no_case) +{ + string_hash_no_case hsh; + + // make sure different strings yield different hashes + TEST_CHECK(hsh("ab") != hsh("ba")); + + // make sure case is ignored + TEST_EQUAL(hsh("Ab"), hsh("ab")); + TEST_EQUAL(hsh("Ab"), hsh("aB")); + + // make sure zeroes in strings are supported + TEST_CHECK(hsh(std::string("\0a", 2)) != hsh(std::string("\0b", 2))); + TEST_EQUAL(hsh(std::string("\0a", 2)), hsh(std::string("\0a", 2))); +} + +TORRENT_TEST(string_eq_no_case) +{ + string_eq_no_case cmp; + TEST_CHECK(cmp("ab", "ba") == false); + TEST_CHECK(cmp("", "")); + TEST_CHECK(cmp("abc", "abc")); + + // make sure different lengths are correctly treated as different + TEST_CHECK(cmp("abc", "ab") == false); + + // make sure case is ignored + TEST_CHECK(cmp("Ab", "ab")); + TEST_CHECK(cmp("Ab", "aB")); + + // make sure zeros are supported + TEST_CHECK(cmp(std::string("\0a", 2), std::string("\0b", 2)) == false); + TEST_CHECK(cmp(std::string("\0a", 2), std::string("\0a", 2))); +} + diff --git a/test/test_torrent_info.cpp b/test/test_torrent_info.cpp index 930b58258..bbe89dddf 100644 --- a/test/test_torrent_info.cpp +++ b/test/test_torrent_info.cpp @@ -350,6 +350,10 @@ TORRENT_TEST(sanitize_path_trailing_spaces) TORRENT_TEST(sanitize_path) { std::string path; + sanitize_append_path_element(path, "\0\0\xed\0\x80"); + TEST_EQUAL(path, "_"); + + path.clear(); sanitize_append_path_element(path, "/a/"); sanitize_append_path_element(path, "b"); sanitize_append_path_element(path, "c"); @@ -530,6 +534,17 @@ TORRENT_TEST(sanitize_path) TEST_EQUAL(path, "foobar"); } +TORRENT_TEST(sanitize_path_zeroes) +{ + std::string path; + sanitize_append_path_element(path, "\0foo"); + TEST_EQUAL(path, "_"); + + path.clear(); + sanitize_append_path_element(path, "\0\0\0\0"); + TEST_EQUAL(path, "_"); +} + TORRENT_TEST(verify_encoding) { // verify_encoding @@ -609,6 +624,12 @@ TORRENT_TEST(verify_encoding) TEST_CHECK(!verify_encoding(test)); std::printf("%s\n", test.c_str()); TEST_CHECK(test == "filename____"); + + // missing byte header + test = "filename\xed\0\x80"; + TEST_CHECK(!verify_encoding(test)); + fprintf(stdout, "%s\n", test.c_str()); + TEST_CHECK(test == "filename_"); } TORRENT_TEST(parse_torrents)