diff --git a/ChangeLog b/ChangeLog index d0656eca6..cc42e2387 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,4 @@ + * added support for merkle hash tree torrents (.merkle.torrent) * added 'seed mode', which assumes that all files are complete and checks hashes lazily, as blocks are requested * added new extension for file attributes (executable and hidden) diff --git a/docs/features.rst b/docs/features.rst index 589915ebf..ca42ed238 100644 --- a/docs/features.rst +++ b/docs/features.rst @@ -46,6 +46,8 @@ extensions * super seeding/initial seeding (`BEP 16`_). * private torrents (`BEP 27`_). * support for IPv6, including `BEP 7`_ and `BEP 24`_. +* support for merkle hash tree torrents. This makes the size of torrent files + scale well with the size of the content. .. _extensions: manual.html#extensions .. _`http seeding`: manual.html#http-seeding @@ -189,6 +191,38 @@ and hence decreasing the likelihood of slow peers blocking the completion of pie The piece picker can also be set to download pieces in sequential order. +merkle hash tree torrents +------------------------- + +Merkle hash tree torrents is an extension that lets a torrent file only contain the +root hash of the hash tree forming the piece hashes. The main benefit of this feature +is that regardless of how many pieces there is in a torrent, the .torrent file will +always be the same size. It will only grow with the number of files (since it still +has to contain the file names). + +With regular torrents, clients have to request multiple blocks for pieces, typically +from different peers, before the data can be verified against the piece hash. The +larger the pieces are, the longer it will take to download a complete piece and verify +it. Before the piece is verified, it cannot be shared with the swarm, which means the +larger piece sizes, the slower turnaround data has when it is downloaded by peers. +Since on average the data has to sit around, waiting, in client buffers before it has +been verified and can be uploaded again. + +Another problem with large piece sizes is that it is harder for a client to pinpoint +the malicious or buggy peer when a piece fails, and it will take longer to re-download +it and take more tries before the piece succeeds the larger the pieces are. + +The piece size in regular torrents is a tradeoff between the size of the .torrent file +itself and the piece size. Often, for files that are 4 GB, the piece size is 2 or 4 MB, +just to avoid making the .torrent file too big. + +Merkle torrents solves these problems by removing the tradeoff between .torrent size and +piece size. With merkle torrents, the piece size can be the minimum block size (16 kB), +which lets peers verify every block of data received from peers, immediately. This +gives a minimum turnaround time and completely removes the problem of identifying malicious +peers. + + portability =========== diff --git a/docs/make_torrent.rst b/docs/make_torrent.rst index 63f16b446..f9991f68e 100644 --- a/docs/make_torrent.rst +++ b/docs/make_torrent.rst @@ -185,7 +185,8 @@ The ``create_torrent`` class has the following synopsis:: struct create_torrent { - create_torrent(file_storage& fs, int piece_size = 0, int pad_size_limit = -1); + enum { optimize = 1, merkle = 2 }; + create_torrent(file_storage& fs, int piece_size = 0, int pad_size_limit = -1, int flags = optimize); create_torrent(torrent_info const& ti); entry generate() const; @@ -211,7 +212,8 @@ create_torrent() :: - create_torrent(file_storage& fs, int piece_size = 0, int pad_size_limit = -1); + enum { optimize = 1, merkle = 2 }; + create_torrent(file_storage& fs, int piece_size = 0, int pad_size_limit = -1, int flags = optimize); create_torrent(torrent_info const& ti); The ``piece_size`` is the size of each piece in bytes. It must @@ -220,15 +222,30 @@ piece_size will becalculated such that the torrent file is roughly 40 kB. If a ``pad_size_limit`` is specified (other than -1), any file larger than the specified number of bytes will be preceeded by a pad file to align it -with the start od a piece. +with the start od a piece. The pad_file_limit is ignored unless the +``optimize`` flag is passed. -The overlad that takes a ``torrent_info`` object will make a verbatim +The overload that takes a ``torrent_info`` object will make a verbatim copy of its info dictionary (to preserve the info-hash). The copy of the info dictionary will be used by ``generate()``. This means that none of the member functions of create_torrent that affects the content of the info dictionary (such as ``set_hash()``), will have any affect. +The ``flags`` arguments specifies options for the torrent creation. It can +be any combination of the following flags: + +optimize + This will insert pad files to align the files to piece boundaries, for + optimized disk-I/O. + +merkle + This will create a merkle hash tree torrent. A merkle torrent cannot + be opened in clients that don't specifically support merkle torrents. + The benefit is that the resulting torrent file will be much smaller and + not grow with more pieces. When this option is specified, it is + recommended to have a 16 kiB piece size. + generate() ---------- diff --git a/examples/make_torrent.cpp b/examples/make_torrent.cpp index 21133a1fe..de763001f 100644 --- a/examples/make_torrent.cpp +++ b/examples/make_torrent.cpp @@ -72,6 +72,8 @@ void print_usage() "Generates a torrent file from the specified file\n" "or directory and writes it to standard out\n\n" "OPTIONS:\n" + "-m generate a merkle hash tree torrent.\n" + " merkle torrents require client support\n" "-w url adds a web seed to the torrent with\n" " the specified url\n" "-t url adds the specified tracker to the\n" @@ -103,6 +105,7 @@ int main(int argc, char* argv[]) std::vector trackers; int pad_file_limit = -1; int piece_size = 0; + int flags = 0; for (int i = 2; i < argc; ++i) { @@ -125,11 +128,15 @@ int main(int argc, char* argv[]) case 'p': ++i; pad_file_limit = atoi(argv[i]); + flags |= create_torrent::optimize; break; case 's': ++i; piece_size = atoi(argv[i]); break; + case 'm': + flags |= create_torrent::merkle; + break; default: print_usage(); return 1; @@ -142,7 +149,7 @@ int main(int argc, char* argv[]) add_files(fs, full_path, file_filter); - create_torrent t(fs, piece_size, pad_file_limit); + create_torrent t(fs, piece_size, pad_file_limit, flags); for (std::vector::iterator i = trackers.begin() , end(trackers.end()); i != end; ++i) t.add_tracker(*i); diff --git a/include/libtorrent/bt_peer_connection.hpp b/include/libtorrent/bt_peer_connection.hpp index dbc93fd0c..bddc89ba8 100644 --- a/include/libtorrent/bt_peer_connection.hpp +++ b/include/libtorrent/bt_peer_connection.hpp @@ -132,7 +132,7 @@ namespace libtorrent msg_have_none, msg_reject_request, msg_allowed_fast, - + // extension protocol message msg_extended = 20, diff --git a/include/libtorrent/create_torrent.hpp b/include/libtorrent/create_torrent.hpp index 93ec613d7..ae8a2c1f9 100644 --- a/include/libtorrent/create_torrent.hpp +++ b/include/libtorrent/create_torrent.hpp @@ -69,7 +69,7 @@ namespace libtorrent struct TORRENT_EXPORT create_torrent { - enum { optimize = 1 }; + enum { optimize = 1, merkle = 2 }; create_torrent(file_storage& fs, int piece_size = 0 , int pad_file_limit = -1, int flags = optimize); @@ -134,11 +134,14 @@ namespace libtorrent // to know if it should be written as a multifile torrent // or not. e.g. test/test there's one file and one directory // and they have the same name. - bool m_multifile; + bool m_multifile:1; // this is true if the torrent is private. i.e., is should not // be announced on the dht - bool m_private; + bool m_private:1; + + // if set to one, a merkle torrent will be generated + bool m_merkle_torrent:1; }; namespace detail diff --git a/include/libtorrent/peer_connection.hpp b/include/libtorrent/peer_connection.hpp index 8b799bd68..1ba5001c1 100644 --- a/include/libtorrent/peer_connection.hpp +++ b/include/libtorrent/peer_connection.hpp @@ -538,6 +538,7 @@ namespace libtorrent bool has_disk_receive_buffer() const { return m_disk_recv_buffer; } void cut_receive_buffer(int size, int packet_size); void reset_recv_buffer(int packet_size); + void set_soft_packet_size(int size) { m_soft_packet_size = size; } void setup_receive(); @@ -736,6 +737,12 @@ namespace libtorrent // we're currently receiving int m_packet_size; + // some messages needs to be read from the socket + // buffer in multiple stages. This soft packet + // size limits the read size between message handler + // dispatch. Ignored when set to 0 + int m_soft_packet_size; + // the number of bytes of the bittorrent payload // we've received so far int m_recv_pos; diff --git a/include/libtorrent/peer_id.hpp b/include/libtorrent/peer_id.hpp index d737fb4fb..67d07cc7a 100644 --- a/include/libtorrent/peer_id.hpp +++ b/include/libtorrent/peer_id.hpp @@ -69,6 +69,11 @@ namespace libtorrent std::memcpy(m_number, &s[0], sl); } + void assign(char const* str) + { + std::memcpy(m_number, str, size); + } + void clear() { std::fill(m_number,m_number+number_size,0); diff --git a/include/libtorrent/torrent.hpp b/include/libtorrent/torrent.hpp index 7f9575d15..82198087c 100644 --- a/include/libtorrent/torrent.hpp +++ b/include/libtorrent/torrent.hpp @@ -692,6 +692,8 @@ namespace libtorrent m_verified.set_bit(piece); } + bool add_merkle_nodes(std::map const& n, int piece); + private: void on_files_deleted(int ret, disk_io_job const& j); diff --git a/include/libtorrent/torrent_info.hpp b/include/libtorrent/torrent_info.hpp index e9cd41256..4b53c1640 100644 --- a/include/libtorrent/torrent_info.hpp +++ b/include/libtorrent/torrent_info.hpp @@ -247,14 +247,27 @@ namespace libtorrent sha1_hash hash_for_piece(int index) const { return sha1_hash(hash_for_piece_ptr(index)); } + std::vector const& merkle_tree() const { return m_merkle_tree; } + void set_merkle_tree(std::vector& h) + { TORRENT_ASSERT(h.size() == m_merkle_tree.size() ); m_merkle_tree.swap(h); } + char const* hash_for_piece_ptr(int index) const { TORRENT_ASSERT(index >= 0); TORRENT_ASSERT(index < m_files.num_pieces()); - TORRENT_ASSERT(m_piece_hashes); - TORRENT_ASSERT(m_piece_hashes >= m_info_section.get()); - TORRENT_ASSERT(m_piece_hashes < m_info_section.get() + m_info_section_size); - return &m_piece_hashes[index*20]; + if (is_merkle_torrent()) + { + TORRENT_ASSERT(index < m_merkle_tree.size() - m_merkle_first_leaf); + return (const char*)&m_merkle_tree[m_merkle_first_leaf + index][0]; + } + else + { + TORRENT_ASSERT(m_piece_hashes); + TORRENT_ASSERT(m_piece_hashes >= m_info_section.get()); + TORRENT_ASSERT(m_piece_hashes < m_info_section.get() + m_info_section_size); + TORRENT_ASSERT(index < m_info_section_size / 20); + return &m_piece_hashes[index*20]; + } } boost::optional creation_date() const; @@ -290,6 +303,11 @@ namespace libtorrent int metadata_size() const { return m_info_section_size; } + bool add_merkle_nodes(std::map const& subtree + , int piece); + std::map build_merkle_list(int piece) const; + bool is_merkle_torrent() const { return !m_merkle_tree.empty(); } + private: void copy_on_write(); @@ -345,6 +363,14 @@ namespace libtorrent // pointing to the first byte of the first sha-1 hash char const* m_piece_hashes; + // if this is a merkle torrent, this is the merkle + // tree. It has space for merkle_num_nodes(merkle_num_leafs(num_pieces)) + // hashes + std::vector m_merkle_tree; + // the index to the first leaf. This is where the hash for the + // first piece is stored + int m_merkle_first_leaf; + // the info section parsed. points into m_info_section // parsed lazily mutable lazy_entry m_info_dict; diff --git a/src/bt_peer_connection.cpp b/src/bt_peer_connection.cpp index 1e7cdc288..6a87b70c9 100644 --- a/src/bt_peer_connection.cpp +++ b/src/bt_peer_connection.cpp @@ -788,9 +788,22 @@ namespace libtorrent *(i.begin + 5) |= 0x10; #endif + // we support merkle torrents + *(i.begin + 5) |= 0x08; + // we support FAST extension *(i.begin + 7) |= 0x04; +#ifdef TORRENT_VERBOSE_LOGGING + for (int k = 0; k < 8; ++k) + { + for (int j = 0; j < 8; ++j) + { + if (i.begin[k] & (0x80 >> j)) (*m_logger) << "1"; + else (*m_logger) << "0"; + } + } +#endif i.begin += 8; // info hash @@ -1060,17 +1073,57 @@ namespace libtorrent buffer::const_interval recv_buffer = receive_buffer(); int recv_pos = recv_buffer.end - recv_buffer.begin; - if (recv_pos == 1) + boost::shared_ptr t = associated_torrent().lock(); + TORRENT_ASSERT(t); + bool merkle = (unsigned char)recv_buffer.begin[0] == 250; + if (merkle) { - TORRENT_ASSERT(!has_disk_receive_buffer()); - if (!allocate_disk_receive_buffer(packet_size() - 9)) + if (recv_pos == 1) + { + set_soft_packet_size(13); + m_statistics.received_bytes(0, received); + return; + } + if (recv_pos < 13) { m_statistics.received_bytes(0, received); return; } + if (recv_pos == 13) + { + const char* ptr = recv_buffer.begin + 9; + int list_size = detail::read_int32(ptr); + // now we know how long the bencoded hash list is + // and we can allocate the disk buffer and receive + // into it + + if (list_size > packet_size() - 13) + { + disconnect("invalid hash list in hash piece message"); + return; + } + + TORRENT_ASSERT(!has_disk_receive_buffer()); + if (!allocate_disk_receive_buffer(packet_size() - 13 - list_size)) + { + m_statistics.received_bytes(0, received); + return; + } + } + } + else + { + if (recv_pos == 1) + { + TORRENT_ASSERT(!has_disk_receive_buffer()); + if (!allocate_disk_receive_buffer(packet_size() - 9)) + { + m_statistics.received_bytes(0, received); + return; + } + } } TORRENT_ASSERT(has_disk_receive_buffer() || packet_size() == 9); - // classify the received data as protocol chatter // or data payload for the statistics if (recv_pos <= 9) @@ -1090,15 +1143,86 @@ namespace libtorrent , 9 - (recv_pos - received)); } - incoming_piece_fragment(); - if (is_disconnecting()) return; - if (!packet_finished()) return; - const char* ptr = recv_buffer.begin + 1; peer_request p; p.piece = detail::read_int32(ptr); p.start = detail::read_int32(ptr); - p.length = packet_size() - 9; + + const int header_size = merkle?13:9; + int list_size = 0; + + if (merkle) + { + list_size = detail::read_int32(ptr); + p.length = packet_size() - list_size - header_size; + } + else + { + p.length = packet_size() - header_size; + } + +#ifdef TORRENT_VERBOSE_LOGGING +// (*m_logger) << time_now_string() << " <== PIECE_FRAGMENT p: " << p.piece +// << " start: " << p.start << " length: " << p.length << "\n"; +#endif + + if (recv_pos - received < header_size && recv_pos >= header_size) + { +// begin_receive_piece(p) + } + + TORRENT_ASSERT(has_disk_receive_buffer() || packet_size() == header_size); + + incoming_piece_fragment(); + if (is_disconnecting()) return; + if (!packet_finished()) return; + + if (merkle && list_size > 0) + { +#ifdef TORRENT_VERBOSE_LOGGING + (*m_logger) << time_now_string() << " <== HASHPIECE " << p.piece << " list: " << list_size << " "; +#endif + lazy_entry hash_list; + if (lazy_bdecode(recv_buffer.begin + 13, recv_buffer.end + 13 + list_size, hash_list) != 0) + { + disconnect("invalid bencoding in hashpiece message"); + return; + } + + // the list has this format: + // [ [node-index, hash], [node-index, hash], ... ] + if (hash_list.type() != lazy_entry::list_t) + { + disconnect("invalid hash-list in hashpiece message"); + return; + } + + std::map nodes; + for (int i = 0; i < hash_list.list_size(); ++i) + { + lazy_entry const* e = hash_list.list_at(i); + if (e->type() != lazy_entry::list_t + || e->list_size() != 2 + || e->list_at(0)->type() != lazy_entry::int_t + || e->list_at(1)->type() != lazy_entry::string_t + || e->list_at(1)->string_length() != 20) continue; + +#ifdef TORRENT_VERBOSE_LOGGING + (*m_logger) << " " << e->list_int_value_at(0) << ": " + << sha1_hash(e->list_at(1)->string_ptr()); +#endif + nodes.insert(nodes.begin(), std::make_pair(int(e->list_int_value_at(0)) + , sha1_hash(e->list_at(1)->string_ptr()))); + } +#ifdef TORRENT_VERBOSE_LOGGING + (*m_logger) << "\n"; +#endif + if (!nodes.empty() && !t->add_merkle_nodes(nodes, p.piece)) + { + disconnect("invalid hashes in hashpiece message"); + return; + } + } disk_buffer_holder holder(m_ses, release_disk_receive_buffer()); incoming_piece(p, holder); @@ -1404,7 +1528,8 @@ namespace libtorrent buffer::const_interval recv_buffer = receive_buffer(); TORRENT_ASSERT(recv_buffer.left() >= 1); - int packet_type = recv_buffer[0]; + int packet_type = (unsigned char)recv_buffer[0]; + if (packet_type == 250) packet_type = msg_piece; if (packet_type < 0 || packet_type >= num_supported_messages || m_message_handler[packet_type] == 0) @@ -1780,14 +1905,53 @@ namespace libtorrent boost::shared_ptr t = associated_torrent().lock(); TORRENT_ASSERT(t); - char msg[4 + 1 + 4 + 4]; + bool merkle = t->torrent_file().is_merkle_torrent() && r.start == 0; + // the hash piece looks like this: + // uint8_t msg + // uint32_t piece index + // uint32_t start + // uint32_t list len + // var bencoded list + // var piece data + char msg[4 + 1 + 4 + 4 + 4]; char* ptr = msg; TORRENT_ASSERT(r.length <= 16 * 1024); detail::write_int32(r.length + 1 + 4 + 4, ptr); - detail::write_uint8(msg_piece, ptr); + if (merkle) + detail::write_uint8(250, ptr); + else + detail::write_uint8(msg_piece, ptr); detail::write_int32(r.piece, ptr); detail::write_int32(r.start, ptr); - send_buffer(msg, sizeof(msg)); + + // if this is a merkle torrent and the start offset + // is 0, we need to include the merkle node hashes + if (merkle) + { + std::vector piece_list_buf; + entry piece_list; + entry::list_type& l = piece_list.list(); + std::map merkle_node_list = t->torrent_file().build_merkle_list(r.piece); + for (std::map::iterator i = merkle_node_list.begin() + , end(merkle_node_list.end()); i != end; ++i) + { + l.push_back(entry(entry::list_t)); + l.back().list().push_back(i->first); + l.back().list().push_back(i->second.to_string()); + } + bencode(std::back_inserter(piece_list_buf), piece_list); + detail::write_int32(piece_list_buf.size(), ptr); + + char* ptr = msg; + detail::write_int32(r.length + 1 + 4 + 4 + 4 + piece_list_buf.size(), ptr); + + send_buffer(msg, 17); + send_buffer(&piece_list_buf[0], piece_list_buf.size()); + } + else + { + send_buffer(msg, 13); + } append_send_buffer(buffer.get(), r.length , boost::bind(&session_impl::free_disk_buffer diff --git a/src/create_torrent.cpp b/src/create_torrent.cpp index ce71599b4..e9f037bf3 100644 --- a/src/create_torrent.cpp +++ b/src/create_torrent.cpp @@ -90,6 +90,7 @@ namespace libtorrent , m_creation_date(pt::second_clock::universal_time()) , m_multifile(fs.num_files() > 1) , m_private(false) + , m_merkle_torrent(flags & merkle) { TORRENT_ASSERT(fs.num_files() > 0); #if BOOST_VERSION < 103600 @@ -99,7 +100,7 @@ namespace libtorrent #endif // a piece_size of 0 means automatic - if (piece_size == 0) + if (piece_size == 0 && !m_merkle_torrent) { const int target_size = 40 * 1024; piece_size = fs.total_size() / (target_size / 20); @@ -112,6 +113,10 @@ namespace libtorrent } piece_size = i; } + else if (piece_size == 0 && m_merkle_torrent) + { + piece_size = 16*1024; + } // make sure the size is an even power of 2 #ifndef NDEBUG @@ -137,6 +142,7 @@ namespace libtorrent , m_creation_date(pt::second_clock::universal_time()) , m_multifile(ti.num_files() > 1) , m_private(ti.priv()) + , m_merkle_torrent(ti.is_merkle_torrent()) { TORRENT_ASSERT(ti.is_valid()); if (ti.creation_date()) m_creation_date = *ti.creation_date(); @@ -290,14 +296,57 @@ namespace libtorrent } info["piece length"] = m_files.piece_length(); - entry& pieces = info["pieces"]; - - std::string& p = pieces.string(); - - for (std::vector::const_iterator i = m_piece_hash.begin(); - i != m_piece_hash.end(); ++i) + if (m_merkle_torrent) { - p.append((char*)i->begin(), (char*)i->end()); + std::vector merkle_tree; + + // defined in torrent_info.cpp + int merkle_num_leafs(int); + int merkle_num_nodes(int); + int merkle_get_parent(int); + int merkle_get_sibling(int); + + int num_leafs = merkle_num_leafs(m_files.num_pieces()); + int num_nodes = merkle_num_nodes(num_leafs); + int first_leaf = num_nodes - num_leafs; + merkle_tree.resize(num_nodes); + int num_pieces = m_piece_hash.size(); + for (int i = 0; i < num_pieces; ++i) + merkle_tree[first_leaf + i] = m_piece_hash[i]; + sha1_hash filler(0); + for (int i = num_pieces; i < num_leafs; ++i) + merkle_tree[first_leaf + i] = filler; + + // now that we have initialized all leaves, build + // each level bottom-up + int level_start = first_leaf; + int level_size = num_leafs; + while (level_start > 0) + { + int parent = merkle_get_parent(level_start); + for (int i = level_start; i < level_start + level_size; i += 2, ++parent) + { + hasher h; + h.update((char const*)&merkle_tree[i][0], 20); + h.update((char const*)&merkle_tree[i+1][0], 20); + merkle_tree[parent] = h.final(); + } + level_start = merkle_get_parent(level_start); + level_size /= 2; + } + TORRENT_ASSERT(level_size == 1); + std::string& p = info["root hash"].string(); + p.assign((char const*)&merkle_tree[0][0], 20); + } + else + { + std::string& p = info["pieces"].string(); + + for (std::vector::const_iterator i = m_piece_hash.begin(); + i != m_piece_hash.end(); ++i) + { + p.append((char*)i->begin(), (char*)i->end()); + } } std::vector buf; diff --git a/src/peer_connection.cpp b/src/peer_connection.cpp index c92871d4b..44c8a7e43 100644 --- a/src/peer_connection.cpp +++ b/src/peer_connection.cpp @@ -98,6 +98,7 @@ namespace libtorrent , m_num_pieces(0) , m_timeout(m_ses.settings().peer_timeout) , m_packet_size(0) + , m_soft_packet_size(0) , m_recv_pos(0) , m_disk_recv_buffer_size(0) , m_reading_bytes(0) @@ -210,6 +211,7 @@ namespace libtorrent , m_num_pieces(0) , m_timeout(m_ses.settings().peer_timeout) , m_packet_size(0) + , m_soft_packet_size(0) , m_recv_pos(0) , m_disk_recv_buffer_size(0) , m_reading_bytes(0) @@ -3512,6 +3514,9 @@ namespace libtorrent TORRENT_ASSERT(m_packet_size > 0); int max_receive = m_packet_size - m_recv_pos; + if (m_recv_pos >= m_soft_packet_size) m_soft_packet_size = 0; + if (m_soft_packet_size && max_receive > m_soft_packet_size - m_recv_pos) + max_receive = m_soft_packet_size - m_recv_pos; int quota_left = m_bandwidth_limit[download_channel].quota_left(); if (!m_ignore_bandwidth_limits && max_receive > quota_left) max_receive = quota_left; @@ -3775,7 +3780,10 @@ namespace libtorrent buffer(m_packet_size).swap(m_recv_buffer); } + if (m_recv_pos >= m_soft_packet_size) m_soft_packet_size = 0; max_receive = m_packet_size - m_recv_pos; + if (m_soft_packet_size && max_receive > m_soft_packet_size - m_recv_pos) + max_receive = m_soft_packet_size - m_recv_pos; int quota_left = m_bandwidth_limit[download_channel].quota_left(); if (!m_ignore_bandwidth_limits && max_receive > quota_left) max_receive = quota_left; diff --git a/src/torrent.cpp b/src/torrent.cpp index 2132ecf43..585c44600 100644 --- a/src/torrent.cpp +++ b/src/torrent.cpp @@ -439,6 +439,11 @@ namespace libtorrent } } + bool torrent::add_merkle_nodes(std::map const& nodes, int piece) + { + return m_torrent_file->add_merkle_nodes(nodes, piece); + } + peer_request torrent::to_req(piece_block const& p) { int block_offset = p.block_index * m_block_size; @@ -3001,6 +3006,29 @@ namespace libtorrent add_web_seed(url, web_seed_entry::http_seed); } } + + if (m_torrent_file->is_merkle_torrent()) + { + lazy_entry const* mt = rd.dict_find_string("merkle tree"); + if (mt) + { + std::vector tree; + tree.resize(m_torrent_file->merkle_tree().size()); + std::memcpy(&tree[0], mt->string_ptr() + , (std::min)(mt->string_length(), int(tree.size()) * 20)); + if (mt->string_length() < tree.size() * 20) + std::memset(&tree[0] + mt->string_length() / 20, 0 + , tree.size() - mt->string_length() / 20); + m_torrent_file->set_merkle_tree(tree); + } + else + { + // TODO: if this is a merkle torrent and we can't + // restore the tree, we need to wipe all the + // bits in the have array + TORRENT_ASSERT(false); + } + } } void torrent::write_resume_data(entry& ret) const @@ -3034,6 +3062,16 @@ namespace libtorrent static_cast(torrent_file().piece_length()) / block_size(); ret["blocks per piece"] = num_blocks_per_piece; + if (m_torrent_file->is_merkle_torrent()) + { + // we need to save the whole merkle hash tree + // in order to resume + std::string& tree_str = ret["merkle tree"].string(); + std::vector const& tree = m_torrent_file->merkle_tree(); + tree_str.resize(tree.size() * 20); + std::memcpy(&tree_str[0], &tree[0], tree.size() * 20); + } + // if this torrent is a seed, we won't have a piece picker // and there will be no half-finished pieces. if (!is_seed()) diff --git a/src/torrent_info.cpp b/src/torrent_info.cpp index 0701821e3..5bc45546a 100644 --- a/src/torrent_info.cpp +++ b/src/torrent_info.cpp @@ -61,9 +61,7 @@ POSSIBILITY OF SUCH DAMAGE. namespace gr = boost::gregorian; -using namespace libtorrent; - -namespace +namespace libtorrent { namespace fs = boost::filesystem; @@ -255,10 +253,36 @@ namespace } return true; } -} -namespace libtorrent -{ + int merkle_get_parent(int tree_node) + { + // node 0 doesn't have a parent + TORRENT_ASSERT(tree_node > 0); + return (tree_node - 1) / 2; + } + + int merkle_get_sibling(int tree_node) + { + // node 0 doesn't have a sibling + TORRENT_ASSERT(tree_node > 0); + // even numbers have their sibling to the left + // odd numbers have their sibling to the right + return tree_node + (tree_node&1?1:-1); + } + + int merkle_num_nodes(int leafs) + { + TORRENT_ASSERT(leafs > 0); + return (leafs << 1) - 1; + } + + int merkle_num_leafs(int pieces) + { + // round up to nearest 2 exponent + int i; + for (i = 0; pieces > 0; pieces >>= 1, ++i); + return 1 << i; + } int load_file(fs::path const& filename, std::vector& v) { @@ -285,6 +309,7 @@ namespace libtorrent , m_private(false) , m_info_section_size(0) , m_piece_hashes(0) + , m_merkle_first_leaf(0) { std::vector tmp; std::back_insert_iterator > out(tmp); @@ -316,6 +341,7 @@ namespace libtorrent , m_private(false) , m_info_section_size(0) , m_piece_hashes(0) + , m_merkle_first_leaf(0) { error_code ec; if (!parse_torrent_file(torrent_file, ec)) @@ -328,6 +354,7 @@ namespace libtorrent , m_private(false) , m_info_section_size(0) , m_piece_hashes(0) + , m_merkle_first_leaf(0) { error_code ec; lazy_entry e; @@ -365,6 +392,7 @@ namespace libtorrent , m_private(false) , m_info_section_size(0) , m_piece_hashes(0) + , m_merkle_first_leaf(0) { std::vector buf; std::string utf8; @@ -399,6 +427,7 @@ namespace libtorrent , m_private(false) , m_info_section_size(0) , m_piece_hashes(0) + , m_merkle_first_leaf(0) { lazy_entry e; if (lazy_bdecode(buffer, buffer + size, e) != 0) @@ -608,26 +637,116 @@ namespace libtorrent / m_files.piece_length())); lazy_entry const* pieces = info.dict_find("pieces"); - if (pieces == 0 || pieces->type() != lazy_entry::string_t) + lazy_entry const* root_hash = info.dict_find("root hash"); + if ((pieces == 0 || pieces->type() != lazy_entry::string_t) + && (root_hash == 0 || root_hash->type() != lazy_entry::string_t)) { ec = error_code(errors::torrent_missing_pieces, libtorrent_category); return false; } - if (pieces->string_length() != m_files.num_pieces() * 20) + if (pieces) { - ec = error_code(errors::torrent_invalid_hashes, libtorrent_category); - return false; - } + if (pieces->string_length() != m_files.num_pieces() * 20) + { + ec = error_code(errors::torrent_invalid_hashes, libtorrent_category); + return false; + } - m_piece_hashes = m_info_section.get() + (pieces->string_ptr() - section.first); - TORRENT_ASSERT(m_piece_hashes >= m_info_section.get()); - TORRENT_ASSERT(m_piece_hashes < m_info_section.get() + m_info_section_size); + m_piece_hashes = m_info_section.get() + (pieces->string_ptr() - section.first); + TORRENT_ASSERT(m_piece_hashes >= m_info_section.get()); + TORRENT_ASSERT(m_piece_hashes < m_info_section.get() + m_info_section_size); + } + else + { + TORRENT_ASSERT(root_hash); + if (root_hash->string_length() != 20) + { + ec = error_code(errors::torrent_invalid_hashes, libtorrent_category); + return false; + } + int num_leafs = merkle_num_leafs(m_files.num_pieces()); + int num_nodes = merkle_num_nodes(num_leafs); + m_merkle_first_leaf = num_nodes - num_leafs; + m_merkle_tree.resize(num_nodes); + std::memset(&m_merkle_tree[0], 0, num_nodes * 20); + m_merkle_tree[0].assign(root_hash->string_ptr()); + } m_private = info.dict_find_int_value("private", 0); return true; } + bool torrent_info::add_merkle_nodes(std::map const& subtree + , int piece) + { + int n = m_merkle_first_leaf + piece; + typedef std::map::const_iterator iter; + iter i = subtree.find(n); + if (i == subtree.end()) return false; + sha1_hash h = i->second; + + // if the verification passes, these are the + // nodes to add to our tree + std::map to_add; + + while (n > 0) + { + int sibling = merkle_get_sibling(n); + int parent = merkle_get_parent(n); + iter sibling_hash = subtree.find(sibling); + if (sibling_hash == subtree.end()) + return false; + to_add[n] = h; + to_add[sibling] = sibling_hash->second; + hasher hs; + if (sibling < n) + { + hs.update((char const*)&sibling_hash->second[0], 20); + hs.update((char const*)&h[0], 20); + } + else + { + hs.update((char const*)&h[0], 20); + hs.update((char const*)&sibling_hash->second[0], 20); + } + h = hs.final(); + n = parent; + } + if (h != m_merkle_tree[0]) return false; + + // the nodes and piece hash matched the root-hash + // insert them into our tree + + for (std::map::iterator i = to_add.begin() + , end(to_add.end()); i != end; ++i) + { + m_merkle_tree[i->first] = i->second; + } + return true; + } + + // builds a list of nodes that are required to verify + // the given piece + std::map torrent_info::build_merkle_list(int piece) const + { + std::map ret; + int n = m_merkle_first_leaf + piece; + ret[n] = m_merkle_tree[n]; + ret[0] = m_merkle_tree[0]; + while (n > 0) + { + int sibling = merkle_get_sibling(n); + int parent = merkle_get_parent(n); + ret[sibling] = m_merkle_tree[sibling]; + // we cannot build the tree path if one + // of the nodes in the tree is missing + TORRENT_ASSERT(m_merkle_tree[sibling] != sha1_hash(0)); + n = parent; + } + return ret; + } + bool torrent_info::parse_torrent_file(lazy_entry const& torrent_file, error_code& ec) { if (torrent_file.type() != lazy_entry::dict_t)