From b4abe6677d0337f936fe2dedb8bc8d83df056360 Mon Sep 17 00:00:00 2001 From: Arvid Norberg Date: Sat, 27 Mar 2010 15:51:30 +0000 Subject: [PATCH] support for sha1 file-hashes --- ChangeLog | 1 + docs/make_torrent.rst | 20 ++++++++++++++ docs/manual.rst | 13 +++++++++ examples/dump_torrent.cpp | 6 +++-- examples/make_torrent.cpp | 6 +++++ include/libtorrent/create_torrent.hpp | 39 +++++++++++++++++++++++++++ include/libtorrent/file_storage.hpp | 5 +++- src/create_torrent.cpp | 18 +++++++++++++ src/torrent_info.cpp | 16 ++++++++++- 9 files changed, 120 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index 35111d5bc..cb43b844c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,4 @@ + * supports calculating sha1 file-hashes when creating torrents * made the send_buffer_watermark performance warning more meaningful * supports complete_ago extension * dropped zlib as a dependency and builds using puff.c instead diff --git a/docs/make_torrent.rst b/docs/make_torrent.rst index a633679c2..bf97f3160 100644 --- a/docs/make_torrent.rst +++ b/docs/make_torrent.rst @@ -222,6 +222,7 @@ The ``create_torrent`` class has the following synopsis:: , merkle = 2 , modification_time = 4 , symlink = 8 + , calculate_file_hashes = 16 }; create_torrent(file_storage& fs, int piece_size = 0, int pad_size_limit = -1, int flags = optimize); create_torrent(torrent_info const& ti); @@ -233,6 +234,7 @@ The ``create_torrent`` class has the following synopsis:: void set_comment(char const* str); void set_creator(char const* str); void set_hash(int index, sha1_hash const& h); + void set_file_hash(int index, sha1_hash const& h); void add_url_seed(std::string const& url); void add_node(std::pair const& node); void add_tracker(std::string const& url, int tier = 0); @@ -254,6 +256,7 @@ create_torrent() , merkle = 2 , modification_time = 4 , symlink = 8 + , calculate_file_hashes = 16 }; create_torrent(file_storage& fs, int piece_size = 0, int pad_size_limit = -1, int flags = optimize); create_torrent(torrent_info const& ti); @@ -303,6 +306,12 @@ symlink of the symlink so that the original directory structure can be reproduced on the downloading side. +calculate_file_hashes + If this is set, the `set_piece_hashes()`_ function will, as it calculates + the piece hashes, also calculate the file hashes and add those associated + with each file. Note that unless you use the `set_piece_hashes()`_ function, + this flag will have no effect. + generate() ---------- @@ -366,6 +375,17 @@ to set the hash for every piece in the torrent before generating it. If you have the files on disk, you can use the high level convenience function to do this. See `set_piece_hashes()`_. +set_file_hash() +--------------- + + :: + + void set_file_hash(int index, sha1_hash const& h); + +This sets the sha1 hash for this file. This hash will end up under the key ``sha1`` +associated with this file (for multi-file torrents) or in the root info dictionary +for single-file torrents. + add_url_seed() -------------- diff --git a/docs/manual.rst b/docs/manual.rst index 6027ae2ff..6bbf36ede 100644 --- a/docs/manual.rst +++ b/docs/manual.rst @@ -1653,9 +1653,12 @@ iterators with the type ``file_entry``. size_type offset; size_type size; size_type file_base; + std::string symlink_path; + boost::shared_ptr filehash; bool pad_file:1; bool hidden_attribute:1; bool executable_attribute:1; + bool symlink_attribute:1; }; The ``path`` is the full (relative) path of each file. i.e. if it is a multi-file @@ -1678,6 +1681,16 @@ They are just there to make sure the next file is aligned to a particular byte o or piece boundry. These files should typically be hidden from an end user. They are not written to disk. +``hidden_attribute`` is true if the file was marked as hidden (on windows). + +``executable_attribute`` is true if the file was marked as executable (posix) + +``symlink_attribute`` is true if the file was a symlink. If this is the case +the ``symlink_path`` specifies the original location where the data for this file +was found. + +``filehash`` is a pointer that is set in case the torrent file included a sha1 hash +for this file. This may be use to look up more sources for this file on other networks. num_files() file_at() --------------------- diff --git a/examples/dump_torrent.cpp b/examples/dump_torrent.cpp index c273a23c5..413bc2aa7 100644 --- a/examples/dump_torrent.cpp +++ b/examples/dump_torrent.cpp @@ -119,13 +119,15 @@ int main(int argc, char* argv[]) { int first = t.map_file(index, 0, 0).piece; int last = t.map_file(index, (std::max)(i->size-1, size_type(0)), 0).piece; - printf(" %11"PRId64" %c%c%c%c [ %4d, %4d ] %s %s%s\n" + printf(" %11"PRId64" %c%c%c%c [ %4d, %4d ] %s %s %s%s\n" , i->size , (i->pad_file?'p':'-') , (i->executable_attribute?'x':'-') , (i->hidden_attribute?'h':'-') , (i->symlink_attribute?'l':'-') - , first, last, i->path.c_str() + , first, last + , i->filehash ? to_hex(i->filehash->to_string()).c_str() : "" + , i->path.c_str() , i->symlink_attribute ? "-> ": "" , i->symlink_attribute ? i->symlink_path.c_str() : ""); } diff --git a/examples/make_torrent.cpp b/examples/make_torrent.cpp index e957912b2..d2bf67704 100644 --- a/examples/make_torrent.cpp +++ b/examples/make_torrent.cpp @@ -66,6 +66,9 @@ void print_usage() "OPTIONS:\n" "-m generate a merkle hash tree torrent.\n" " merkle torrents require client support\n" + "-f include sha-1 file hashes in the torrent\n" + " this helps supporting mixing sources from\n" + " other networks\n" "-w url adds a web seed to the torrent with\n" " the specified url\n" "-t url adds the specified tracker to the\n" @@ -144,6 +147,9 @@ int main(int argc, char* argv[]) ++i; outfile = argv[i]; break; + case 'f': + flags |= create_torrent::calculate_file_hashes; + break; default: print_usage(); return 1; diff --git a/include/libtorrent/create_torrent.hpp b/include/libtorrent/create_torrent.hpp index 5fab27577..32bf1743b 100644 --- a/include/libtorrent/create_torrent.hpp +++ b/include/libtorrent/create_torrent.hpp @@ -71,6 +71,7 @@ namespace libtorrent , merkle = 2 , modification_time = 4 , symlinks = 8 + , calculate_file_hashes = 16 }; create_torrent(file_storage& fs, int piece_size = 0 @@ -83,6 +84,7 @@ namespace libtorrent void set_comment(char const* str); void set_creator(char const* str); void set_hash(int index, sha1_hash const& h); + void set_file_hash(int index, sha1_hash const& h); void add_url_seed(std::string const& url); void add_node(std::pair const& node); void add_tracker(std::string const& url, int tier = 0); @@ -93,6 +95,8 @@ namespace libtorrent int piece_size(int i) const { return m_files.piece_size(i); } bool priv() const { return m_private; } + bool should_add_file_hashes() const { return m_calculate_file_hashes; } + private: file_storage& m_files; @@ -109,6 +113,8 @@ namespace libtorrent std::vector m_piece_hash; + std::vector m_filehashes; + // dht nodes to add to the routing table/bootstrap from typedef std::vector > nodes_t; nodes_t m_nodes; @@ -153,6 +159,11 @@ namespace libtorrent // the torrent file. The full data of the pointed-to // file is still included bool m_include_symlinks:1; + + // this is only used by set_piece_hashes(). It will + // calculate sha1 hashes for each file and add it + // to the file list + bool m_calculate_file_hashes:1; }; namespace detail @@ -234,6 +245,11 @@ namespace libtorrent boost::scoped_ptr st( default_storage_constructor(const_cast(t.files()), 0, p, fp)); + // if we're calculating file hashes as well, use this hasher + hasher filehash; + int file_idx = 0; + size_type left_in_file = t.files().at(0).size; + // calculate the hash for all pieces int num = t.num_pieces(); piece_holder buf(t.piece_length()); @@ -247,6 +263,29 @@ namespace libtorrent ec = st->error(); return; } + + if (t.should_add_file_hashes()) + { + int left_in_piece = t.piece_size(i); + // the number of bytes from this file we just read + while (left_in_piece > 0) + { + int to_hash_for_file = (std::min)(size_type(left_in_piece), left_in_file); + filehash.update(buf.bytes(), to_hash_for_file); + left_in_file -= to_hash_for_file; + left_in_piece -= to_hash_for_file; + if (left_in_file == 0) + { + if (!t.files().at(file_idx).pad_file) + t.set_file_hash(file_idx, filehash.final()); + filehash.reset(); + file_idx++; + if (file_idx >= t.files().num_files()) break; + left_in_file = t.files().at(file_idx).size; + } + } + } + hasher h(buf.bytes(), t.piece_size(i)); t.set_hash(i, h.final()); f(i); diff --git a/include/libtorrent/file_storage.hpp b/include/libtorrent/file_storage.hpp index 7f749436d..a3d8a2223 100644 --- a/include/libtorrent/file_storage.hpp +++ b/include/libtorrent/file_storage.hpp @@ -36,10 +36,12 @@ POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include "libtorrent/size_type.hpp" #include "libtorrent/assert.hpp" #include "libtorrent/peer_request.hpp" +#include "libtorrent/peer_id.hpp" namespace libtorrent { @@ -59,11 +61,12 @@ namespace libtorrent // compressed into a single file, such as a so-called part file. size_type file_base; std::time_t mtime; + std::string symlink_path; + boost::shared_ptr filehash; bool pad_file:1; bool hidden_attribute:1; bool executable_attribute:1; bool symlink_attribute:1; - std::string symlink_path; }; struct TORRENT_EXPORT file_slice diff --git a/src/create_torrent.cpp b/src/create_torrent.cpp index 767c06b86..9226e6b4d 100644 --- a/src/create_torrent.cpp +++ b/src/create_torrent.cpp @@ -115,6 +115,7 @@ namespace libtorrent , m_merkle_torrent(flags & merkle) , m_include_mtime(flags & modification_time) , m_include_symlinks(flags & symlinks) + , m_calculate_file_hashes(flags & calculate_file_hashes) { TORRENT_ASSERT(fs.num_files() > 0); @@ -303,6 +304,10 @@ namespace libtorrent for (char const* e = split.c_str(); e != 0; e = next_path_element(e)) sympath_e.list().push_back(entry(e)); } + if (!m_filehashes.empty()) + { + info["sha1"] = m_filehashes[0].to_string(); + } } else { @@ -347,6 +352,11 @@ namespace libtorrent for (char const* e = split.c_str(); e != 0; e = next_path_element(e)) sympath_e.list().push_back(entry(e)); } + int file_index = i - m_files.begin(); + if (!m_filehashes.empty() && m_filehashes[file_index] != sha1_hash()) + { + file_e["sha1"] = m_filehashes[file_index].to_string(); + } } } } @@ -423,6 +433,14 @@ namespace libtorrent m_piece_hash[index] = h; } + void create_torrent::set_file_hash(int index, sha1_hash const& h) + { + TORRENT_ASSERT(index >= 0); + TORRENT_ASSERT(index < (int)m_files.num_files()); + if (m_filehashes.empty()) m_filehashes.resize(m_files.num_files()); + m_filehashes[index] = h; + } + void create_torrent::add_node(std::pair const& node) { m_nodes.push_back(node); diff --git a/src/torrent_info.cpp b/src/torrent_info.cpp index 1fc686878..5e5c4f375 100644 --- a/src/torrent_info.cpp +++ b/src/torrent_info.cpp @@ -291,6 +291,13 @@ namespace libtorrent } } + lazy_entry const* fh = dict.dict_find_string("sha1"); + if (fh && fh->string_length() == 20) + { + target.filehash.reset(new sha1_hash); + std::memcpy(&(*target.filehash)[0], fh->string_ptr(), 20); + } + lazy_entry const* s_p = dict.dict_find("symlink path"); if (s_p != 0 && s_p->type() == lazy_entry::list_t) { @@ -339,7 +346,7 @@ namespace libtorrent int cnt = 0; std::set files; - // as long as we this file already exists + // as long as this file already exists // increase the counter while (!files.insert(e.path).second) { @@ -793,6 +800,13 @@ namespace libtorrent e.symlink_path = combine_path(e.symlink_path, path_element); } } + lazy_entry const* fh = info.dict_find_string("sha1"); + if (fh && fh->string_length() == 20) + { + e.filehash.reset(new sha1_hash); + std::memcpy(&(*e.filehash)[0], fh->string_ptr(), 20); + } + // bitcomet pad file if (e.path.find("_____padding_file_") != std::string::npos) e.pad_file = true;