From 196588c7b8c9ee1c49c76dd1e126a69f9afa45b1 Mon Sep 17 00:00:00 2001 From: Arvid Norberg Date: Sun, 14 Jul 2019 11:33:46 -0400 Subject: [PATCH] add torrent_info constructor overloads to control torrent file limits --- ChangeLog | 1 + examples/dump_torrent.cpp | 31 +++++------ include/libtorrent/torrent_info.hpp | 27 ++++++++-- src/torrent_info.cpp | 82 +++++++++++++++++++++++++---- 4 files changed, 110 insertions(+), 31 deletions(-) diff --git a/ChangeLog b/ChangeLog index cda69295e..3aa1414db 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,4 @@ + * add torrent_info constructor overloads to control torrent file limits * feature to disable DHT, PEX and LSD per torrent * fix issue where trackers from magnet links were not included in create_torrent() * make peer_info::client a byte array in python binding diff --git a/examples/dump_torrent.cpp b/examples/dump_torrent.cpp index 3a18be424..831cd2d4c 100644 --- a/examples/dump_torrent.cpp +++ b/examples/dump_torrent.cpp @@ -58,40 +58,37 @@ std::vector load_file(std::string const& filename) int main(int argc, char* argv[]) try { - if (argc < 2 || argc > 4) { - std::cerr << "usage: dump_torrent torrent-file [total-items-limit] [recursion-limit]\n"; + if (argc < 2 || argc > 5) { + std::cerr << "usage: dump_torrent torrent-file [total-items-limit] [recursion-limit] [piece-count-limit]\n"; return 1; } - int item_limit = 1000000; - int depth_limit = 1000; + lt::load_torrent_limits cfg; - if (argc > 2) item_limit = atoi(argv[2]); - if (argc > 3) depth_limit = atoi(argv[3]); + if (argc > 2) cfg.max_decode_tokens = atoi(argv[2]); + if (argc > 3) cfg.max_decode_depth = atoi(argv[3]); + if (argc > 4) cfg.max_pieces = atoi(argv[4]); std::vector buf = load_file(argv[1]); - lt::bdecode_node e; int pos = -1; lt::error_code ec; - std::cout << "decoding. recursion limit: " << depth_limit - << " total item count limit: " << item_limit << "\n"; - int const ret = lt::bdecode(&buf[0], &buf[0] + buf.size(), e, ec, &pos - , depth_limit, item_limit); + std::cout << "decoding. recursion limit: " << cfg.max_decode_depth + << " total item count limit: " << cfg.max_decode_tokens << "\n"; + lt::bdecode_node const e = lt::bdecode(buf, ec, &pos, cfg.max_decode_depth + , cfg.max_decode_tokens); std::printf("\n\n----- raw info -----\n\n%s\n", print_entry(e).c_str()); - if (ret != 0) { + if (ec) { std::cerr << "failed to decode: '" << ec.message() << "' at character: " << pos<< "\n"; return 1; } - lt::torrent_info const t(e); - e.clear(); - std::vector().swap(buf); + lt::torrent_info const t(std::move(e), cfg); + buf.clear(); // print info about torrent - std::printf("\n\n----- torrent file info -----\n\n" - "nodes:\n"); + std::printf("\n\n----- torrent file info -----\n\nnodes:\n"); for (auto const& i : t.nodes()) std::printf("%s: %d\n", i.first.c_str(), i.second); diff --git a/include/libtorrent/torrent_info.hpp b/include/libtorrent/torrent_info.hpp index f3f2e1c19..2ec54382d 100644 --- a/include/libtorrent/torrent_info.hpp +++ b/include/libtorrent/torrent_info.hpp @@ -111,6 +111,20 @@ namespace libtorrent { // used to disambiguate a bencoded buffer and a filename extern TORRENT_EXPORT from_span_t from_span; + // this object holds configuration options for limits to use when loading + // torrents. They are meant to prevent loading potentially malicious torrents + // that cause excessive memory allocations. + struct load_torrent_limits + { + int max_buffer_size = 6000000; + // the max number of pieces allowed in the torrent + int max_pieces = 0x100000; + // the max recursion depth in the bdecoded structure + int max_decode_depth = 100; + // the max number of bdecode tokens + int max_decode_tokens = 2000000; + }; + // TODO: there may be some opportunities to optimize the size if torrent_info. // specifically to turn some std::string and std::vector into pointers class TORRENT_EXPORT torrent_info @@ -157,6 +171,9 @@ namespace libtorrent { : torrent_info(span{buffer, size}, from_span) {} explicit torrent_info(span buffer, from_span_t); explicit torrent_info(std::string const& filename); + torrent_info(std::string const& filename, load_torrent_limits const& cfg); + torrent_info(span buffer, load_torrent_limits const& cfg, from_span_t); + torrent_info(bdecode_node const& torrent_file, load_torrent_limits const& cfg); #endif // BOOST_NO_EXCEPTIONS torrent_info(torrent_info const& t); explicit torrent_info(sha1_hash const& info_hash); @@ -524,7 +541,11 @@ namespace libtorrent { // where we only have the info-dict. The bdecode_node ``e`` points to a // parsed info-dictionary. ``ec`` returns an error code if something // fails (typically if the info dictionary is malformed). + // the `piece_limit` parameter allows limiting the amount of memory + // dedicated to loading the torrent, and fails for torrents that exceed + // the limit bool parse_info_section(bdecode_node const& e, error_code& ec); + bool parse_info_section(bdecode_node const& e, error_code& ec, int piece_limit); // This function looks up keys from the info-dictionary of the loaded // torrent file. It can be used to access extension values put in the @@ -551,11 +572,11 @@ namespace libtorrent { // __ http://bittorrent.org/beps/bep_0030.html bool is_merkle_torrent() const { return !m_merkle_tree.empty(); } - bool parse_torrent_file(bdecode_node const& libtorrent, error_code& ec); - - // if we're logging member offsets, we need access to them private: + bool parse_torrent_file(bdecode_node const& libtorrent, error_code& ec); + bool parse_torrent_file(bdecode_node const& libtorrent, error_code& ec, int piece_limit); + void resolve_duplicate_filenames(); // the slow path, in case we detect/suspect a name collision diff --git a/src/torrent_info.cpp b/src/torrent_info.cpp index 9b3ba4c38..2070921e6 100644 --- a/src/torrent_info.cpp +++ b/src/torrent_info.cpp @@ -78,6 +78,14 @@ namespace libtorrent { namespace { + // this is an arbitrary limit to avoid malicious torrents causing + // unreasaonably large allocations for the merkle hash tree + // the size of the tree would be max_pieces * sizeof(int) * 2 + // which is about 8 MB with this limit + // TODO: remove this limit and the overloads that imply it, in favour of + // using load_torrent_limits + constexpr int default_piece_limit = 0x100000; + bool valid_path_character(std::int32_t const c) { #ifdef TORRENT_WINDOWS @@ -539,16 +547,21 @@ namespace { } int load_file(std::string const& filename, std::vector& v - , error_code& ec) + , error_code& ec, int const max_buffer_size = 80000000) { ec.clear(); file f; if (!f.open(filename, open_mode::read_only, ec)) return -1; - std::int64_t s = f.get_size(ec); + std::int64_t const s = f.get_size(ec); if (ec) return -1; + if (s > max_buffer_size) + { + ec = errors::metadata_too_large; + return -1; + } v.resize(std::size_t(s)); if (s == 0) return 0; - std::int64_t read = f.readv(0, {v}, ec); + std::int64_t const read = f.readv(0, {v}, ec); if (read != s) return -3; if (ec) return -3; return 0; @@ -869,6 +882,48 @@ namespace { INVARIANT_CHECK; } + torrent_info::torrent_info(bdecode_node const& torrent_file + , load_torrent_limits const& cfg) + { + error_code ec; + if (!parse_torrent_file(torrent_file, ec, cfg.max_pieces)) + aux::throw_ex(ec); + + INVARIANT_CHECK; + } + + torrent_info::torrent_info(span buffer + , load_torrent_limits const& cfg, from_span_t) + { + error_code ec; + bdecode_node e = bdecode(buffer, ec, nullptr + , cfg.max_decode_depth, cfg.max_decode_tokens); + if (ec) aux::throw_ex(ec); + + if (!parse_torrent_file(e, ec, cfg.max_pieces)) + aux::throw_ex(ec); + + INVARIANT_CHECK; + } + + torrent_info::torrent_info(std::string const& filename + , load_torrent_limits const& cfg) + { + std::vector buf; + error_code ec; + int ret = load_file(filename, buf, ec, cfg.max_buffer_size); + if (ret < 0) aux::throw_ex(ec); + + bdecode_node e = bdecode(buf, ec, nullptr, cfg.max_decode_depth + , cfg.max_decode_tokens); + if (ec) aux::throw_ex(ec); + + if (!parse_torrent_file(e, ec, cfg.max_pieces)) + aux::throw_ex(ec); + + INVARIANT_CHECK; + } + #if TORRENT_ABI_VERSION == 1 torrent_info::torrent_info(std::wstring const& filename) { @@ -1004,8 +1059,13 @@ namespace { return m_info_dict.dict_find_string_value("ssl-cert"); } + bool torrent_info::parse_info_section(bdecode_node const& e, error_code& ec) + { + return parse_info_section(e, ec, default_piece_limit); + } + bool torrent_info::parse_info_section(bdecode_node const& info - , error_code& ec) + , error_code& ec, int const max_pieces) { if (info.type() != bdecode_node::dict_t) { @@ -1129,12 +1189,6 @@ namespace { return false; } - // this is an arbitrary limit to avoid malicious torrents causing - // unreasaonably large allocations for the merkle hash tree - // the size of the tree would be max_pieces * sizeof(int) * 2 - // which is about 6.3 MB with this limit - const int max_pieces = 0xC0000; - // we expect the piece hashes to be < 2 GB in size if (files.num_pieces() >= std::numeric_limits::max() / 20 || files.num_pieces() > max_pieces) @@ -1315,6 +1369,12 @@ namespace { bool torrent_info::parse_torrent_file(bdecode_node const& torrent_file , error_code& ec) + { + return parse_torrent_file(torrent_file, ec, default_piece_limit); + } + + bool torrent_info::parse_torrent_file(bdecode_node const& torrent_file + , error_code& ec, int const piece_limit) { if (torrent_file.type() != bdecode_node::dict_t) { @@ -1342,7 +1402,7 @@ namespace { ec = errors::torrent_missing_info; return false; } - if (!parse_info_section(info, ec)) return false; + if (!parse_info_section(info, ec, piece_limit)) return false; resolve_duplicate_filenames(); #ifndef TORRENT_DISABLE_MUTABLE_TORRENTS