From 8bb7aa62ae4c04763d983a629c164ba6f657e2f7 Mon Sep 17 00:00:00 2001 From: Arvid Norberg Date: Sun, 2 Oct 2016 23:08:03 -0700 Subject: [PATCH] generalize and improve http redirect support for web seeds (#1060) generalize and improve http redirect support for web seeds --- ChangeLog | 1 + docs/manual.rst | 4 + include/libtorrent/create_torrent.hpp | 4 +- include/libtorrent/file_storage.hpp | 17 +- include/libtorrent/http_seed_connection.hpp | 2 + include/libtorrent/parse_url.hpp | 3 + include/libtorrent/peer_connection.hpp | 1 + include/libtorrent/torrent.hpp | 36 ++- include/libtorrent/torrent_info.hpp | 14 +- simulation/create_torrent.cpp | 2 +- simulation/libsimulator | 2 +- simulation/test_web_seed.cpp | 340 ++++++++++++++++++-- src/file_storage.cpp | 30 ++ src/http_seed_connection.cpp | 7 + src/parse_url.cpp | 35 +- src/peer_connection.cpp | 31 +- src/piece_picker.cpp | 6 +- src/torrent.cpp | 35 +- src/torrent_handle.cpp | 4 +- src/web_connection_base.cpp | 3 - src/web_peer_connection.cpp | 143 ++++++-- test/test_file_storage.cpp | 63 +++- test/test_http_parser.cpp | 46 +++ 23 files changed, 709 insertions(+), 120 deletions(-) diff --git a/ChangeLog b/ChangeLog index 03c9aeca8..fdfd720c8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,4 @@ + * improve support for HTTP redirects for web seeds * use string_view in entry interface * deprecate "send_stats" property on trackers (since lt_tracker extension has been removed) diff --git a/docs/manual.rst b/docs/manual.rst index 6b04cbd49..e17498919 100644 --- a/docs/manual.rst +++ b/docs/manual.rst @@ -557,6 +557,10 @@ torrent, only that filename is appended. If the torrent is a multi-file torrent, the torrent's name '/' the file name is appended. This is the same directory structure that libtorrent will download torrents into. +There is limited support for HTTP redirects. In case some files are redirected +to *different hosts*, the files must be piece aligned or padded to be piece +aligned. + .. _`BEP 17`: http://bittorrent.org/beps/bep_0017.html .. _`BEP 19`: http://bittorrent.org/beps/bep_0019.html diff --git a/include/libtorrent/create_torrent.hpp b/include/libtorrent/create_torrent.hpp index e81a3b698..d319fdb19 100644 --- a/include/libtorrent/create_torrent.hpp +++ b/include/libtorrent/create_torrent.hpp @@ -149,7 +149,7 @@ namespace libtorrent // be a multiple of 16 kiB. If a piece size of 0 is specified, a // piece_size will be calculated such that the torrent file is roughly 40 kB. // - // If a ``pad_size_limit`` is specified (other than -1), any file larger than + // If a ``pad_file_limit`` is specified (other than -1), any file larger than // the specified number of bytes will be preceded by a pad file to align it // with the start of a piece. The pad_file_limit is ignored unless the // ``optimize_alignment`` flag is passed. Typically it doesn't make sense @@ -168,7 +168,7 @@ namespace libtorrent // ``alignment`` is used when pad files are enabled. This is the size // eligible files are aligned to. The default is -1, which means the // piece size of the torrent. - create_torrent(file_storage& fs, int piece_size = 0 + explicit create_torrent(file_storage& fs, int piece_size = 0 , int pad_file_limit = -1, int flags = optimize_alignment , int alignment = -1); create_torrent(torrent_info const& ti); diff --git a/include/libtorrent/file_storage.hpp b/include/libtorrent/file_storage.hpp index 5070c17bd..ceb2c1f22 100644 --- a/include/libtorrent/file_storage.hpp +++ b/include/libtorrent/file_storage.hpp @@ -629,6 +629,21 @@ namespace libtorrent // the torrent is unloaded int m_num_files; }; -} + + namespace aux { + + // returns the piece range that entirely falls within the specified file. the + // end piece is one-past the last piece that entierly falls within the file. + // i.e. They can conveniently be used as loop boundaries. No edge partial + // pieces will be included. + TORRENT_EXTRA_EXPORT std::tuple file_piece_range_exclusive(file_storage const& fs, int file); + + // returns the piece range of pieces that overlaps with the specified file. + // the end piece is one-past the last piece. i.e. They can conveniently be + // used as loop boundaries. + TORRENT_EXTRA_EXPORT std::tuple file_piece_range_inclusive(file_storage const& fs, int file); + +} // namespace aux +} // namespace libtorrent #endif // TORRENT_FILE_STORAGE_HPP_INCLUDED diff --git a/include/libtorrent/http_seed_connection.hpp b/include/libtorrent/http_seed_connection.hpp index 48f7c6ab4..471f644ed 100644 --- a/include/libtorrent/http_seed_connection.hpp +++ b/include/libtorrent/http_seed_connection.hpp @@ -67,6 +67,8 @@ namespace libtorrent virtual void on_receive(error_code const& error , std::size_t bytes_transferred) override; + virtual void on_connected() override; + std::string const& url() const override { return m_url; } virtual void get_specific_peer_info(peer_info& p) const override; diff --git a/include/libtorrent/parse_url.hpp b/include/libtorrent/parse_url.hpp index efac49966..492e73e02 100644 --- a/include/libtorrent/parse_url.hpp +++ b/include/libtorrent/parse_url.hpp @@ -48,6 +48,9 @@ namespace libtorrent , std::string, int, std::string> parse_url_components(std::string url, error_code& ec); + // split a URL in its base and path parts + TORRENT_EXTRA_EXPORT std::tuple + split_url(std::string url, error_code& ec); } #endif diff --git a/include/libtorrent/peer_connection.hpp b/include/libtorrent/peer_connection.hpp index b2e10d13e..d034fc449 100644 --- a/include/libtorrent/peer_connection.hpp +++ b/include/libtorrent/peer_connection.hpp @@ -409,6 +409,7 @@ namespace libtorrent std::vector const& upload_queue() const; void clear_request_queue(); + void clear_download_queue(); // estimate of how long it will take until we have // received all piece requests that we have sent diff --git a/include/libtorrent/torrent.hpp b/include/libtorrent/torrent.hpp index b443032a7..fdcce5444 100644 --- a/include/libtorrent/torrent.hpp +++ b/include/libtorrent/torrent.hpp @@ -59,6 +59,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "libtorrent/assert.hpp" #include "libtorrent/bitfield.hpp" #include "libtorrent/aux_/session_interface.hpp" +#include "libtorrent/aux_/time.hpp" #include "libtorrent/deadline_timer.hpp" #include "libtorrent/peer_class_set.hpp" #include "libtorrent/link.hpp" @@ -131,7 +132,7 @@ namespace libtorrent , web_seed_entry::headers_t const& extra_headers_ = web_seed_entry::headers_t()); // if this is > now, we can't reconnect yet - time_point retry; + time_point retry = aux::time_now(); // if the hostname of the web seed has been resolved, // these are its IP addresses @@ -141,28 +142,42 @@ namespace libtorrent // connection, just to count hash failures // it's also used to hold the peer_connection // pointer, when the web seed is connected - ipv4_peer peer_info; + ipv4_peer peer_info{tcp::endpoint(), true, 0}; // this is initialized to true, but if we discover the // server not to support it, it's set to false, and we // make larger requests. - bool supports_keepalive; + bool supports_keepalive = true; // this indicates whether or not we're resolving the // hostname of this URL - bool resolving; + bool resolving = false; // if the user wanted to remove this while // we were resolving it. In this case, we set // the removed flag to true, to make the resolver // callback remove it - bool removed; + bool removed = false; + + // if this is true, this URL was created by a redirect and should not be + // saved in the resume data + bool ephemeral = false; // if the web server doesn't support keepalive or a block request was // interrupted, the block received so far is kept here for the next // connection to pick up - peer_request restart_request; + peer_request restart_request = { -1, -1, -1}; std::vector restart_piece; + + // this maps file index to a URL it has been redirected to. If an entry is + // missing, it means it has not been redirected and the full path should + // be constructed normally based on the filename. All redirections are + // relative to the web seed hostname root. + std::map redirects; + + // if this bitfield is non-empty, it represents the files this web server + // has. + bitfield have_files; }; struct TORRENT_EXTRA_EXPORT torrent_hot_members @@ -564,12 +579,13 @@ namespace libtorrent // -------------------------------------------- // PEER MANAGEMENT - // add or remove a url that will be attempted for - // finding the file(s) in this torrent. - void add_web_seed(std::string const& url + // add_web_seed won't add duplicates. If we have already added an entry + // with this URL, we'll get back the existing entry + web_seed_t* add_web_seed(std::string const& url , web_seed_t::type_t type , std::string const& auth = std::string() - , web_seed_t::headers_t const& extra_headers = web_seed_entry::headers_t()); + , web_seed_t::headers_t const& extra_headers = web_seed_entry::headers_t() + , bool ephemeral = false); void remove_web_seed(std::string const& url, web_seed_t::type_t type); void disconnect_web_seed(peer_connection* p); diff --git a/include/libtorrent/torrent_info.hpp b/include/libtorrent/torrent_info.hpp index a4c94fb8a..a3dd31b7d 100644 --- a/include/libtorrent/torrent_info.hpp +++ b/include/libtorrent/torrent_info.hpp @@ -147,18 +147,18 @@ namespace libtorrent // // The ``flags`` argument is currently unused. #ifndef BOOST_NO_EXCEPTIONS - torrent_info(bdecode_node const& torrent_file, int flags = 0); - torrent_info(char const* buffer, int size, int flags = 0); - torrent_info(std::string const& filename, int flags = 0); + explicit torrent_info(bdecode_node const& torrent_file, int flags = 0); + explicit torrent_info(char const* buffer, int size, int flags = 0); + explicit torrent_info(std::string const& filename, int flags = 0); #endif // BOOST_NO_EXCEPTIONS - torrent_info(torrent_info const& t); - torrent_info(sha1_hash const& info_hash, int flags = 0); + explicit torrent_info(torrent_info const& t); + explicit torrent_info(sha1_hash const& info_hash, int flags = 0); torrent_info(bdecode_node const& torrent_file, error_code& ec, int flags = 0); torrent_info(char const* buffer, int size, error_code& ec, int flags = 0); torrent_info(std::string const& filename, error_code& ec, int flags = 0); #ifndef TORRENT_NO_DEPRECATE TORRENT_DEPRECATED - torrent_info(lazy_entry const& torrent_file, int flags = 0); + explicit torrent_info(lazy_entry const& torrent_file, int flags = 0); TORRENT_DEPRECATED torrent_info(lazy_entry const& torrent_file, error_code& ec , int flags = 0); @@ -169,7 +169,7 @@ namespace libtorrent torrent_info(std::wstring const& filename, error_code& ec , int flags = 0); TORRENT_DEPRECATED - torrent_info(std::wstring const& filename, int flags = 0); + explicit torrent_info(std::wstring const& filename, int flags = 0); #endif // TORRENT_USE_WSTRING #endif // TORRENT_NO_DEPRECATE diff --git a/simulation/create_torrent.cpp b/simulation/create_torrent.cpp index 2e3887c01..815423e46 100644 --- a/simulation/create_torrent.cpp +++ b/simulation/create_torrent.cpp @@ -39,7 +39,7 @@ namespace lt = libtorrent; std::string save_path(int idx) { - int swarm_id = test_counter(); + int const swarm_id = test_counter(); char path[200]; std::snprintf(path, sizeof(path), "swarm-%04d-peer-%02d" , swarm_id, idx); diff --git a/simulation/libsimulator b/simulation/libsimulator index c19b35e05..3eae1da7d 160000 --- a/simulation/libsimulator +++ b/simulation/libsimulator @@ -1 +1 @@ -Subproject commit c19b35e05831ad46cefd1a9e9f8b022c799bc8d0 +Subproject commit 3eae1da7de7d13ab26472c4a44ee4477ce24fa26 diff --git a/simulation/test_web_seed.cpp b/simulation/test_web_seed.cpp index d03c11cc3..f07aa70c8 100644 --- a/simulation/test_web_seed.cpp +++ b/simulation/test_web_seed.cpp @@ -34,6 +34,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "libtorrent/settings_pack.hpp" #include "libtorrent/deadline_timer.hpp" #include "libtorrent/torrent_info.hpp" +#include "libtorrent/alert_types.hpp" #include "simulator/http_server.hpp" #include "settings.hpp" #include "libtorrent/create_torrent.hpp" @@ -48,20 +49,59 @@ using namespace libtorrent; namespace lt = libtorrent; -std::shared_ptr create_torrent(file_storage& fs) +add_torrent_params create_torrent(file_storage& fs, bool const pad_files = false) { int const piece_size = 0x4000; - libtorrent::create_torrent t(fs, piece_size); + libtorrent::create_torrent t(fs, piece_size + , pad_files ? piece_size : -1 + , pad_files ? create_torrent::optimize_alignment : 0); std::vector piece(piece_size); - for (int i = 0; i < int(piece.size()); ++i) - piece[i] = (i % 26) + 'A'; - - // calculate the hash for all pieces int const num = t.num_pieces(); - sha1_hash ph = hasher(&piece[0], int(piece.size())).final(); - for (int i = 0; i < num; ++i) - t.set_hash(i, ph); + if (pad_files) + { + for (int i = 0; i < num; ++i) + { + std::vector files = fs.map_block(i, 0, fs.piece_size(i)); + int k = 0; + for (auto& f : files) + { + if (fs.pad_file_at(f.file_index)) + { + for (int j = 0; j < f.size; ++j) + piece[k++] = 0; + } + else + { + for (int j = 0; j < f.size; ++j, ++k) + piece[k] = (k % 26) + 'A'; + } + } + // the last piece may be shorter. pad with zeroes + for (; k < int(piece.size()); ++k) + piece[k++] = 0; + + sha1_hash ph = hasher(&piece[0], int(piece.size())).final(); + t.set_hash(i, ph); + } + } + else + { + for (int i = 0; i < int(piece.size()); ++i) + piece[i] = (i % 26) + 'A'; + + // calculate the hash for all pieces + sha1_hash ph = hasher(&piece[0], int(piece.size())).final(); + for (int i = 0; i < num; ++i) + t.set_hash(i, ph); + } + + // the last piece may have a different size + if ((fs.total_size() % piece_size) > 0) + { + piece.resize(fs.total_size() % piece_size); + t.set_hash(num-1, hasher(&piece[0], int(piece.size())).final()); + } std::vector tmp; std::back_insert_iterator > out(tmp); @@ -70,8 +110,13 @@ std::shared_ptr create_torrent(file_storage& fs) bencode(out, tor); error_code ec; - return std::make_shared( + add_torrent_params ret; + ret.ti = std::make_shared( &tmp[0], int(tmp.size()), std::ref(ec), 0); + ret.flags &= ~lt::add_torrent_params::flag_auto_managed; + ret.flags &= ~lt::add_torrent_params::flag_paused; + ret.save_path = "."; + return ret; } // this is the general template for these tests. create the session with custom // settings (Settings), set up the test, by adding torrents with certain @@ -112,25 +157,22 @@ void run_test(Setup const& setup test(sim, *ses); } -TORRENT_TEST(single_file_torrent) +TORRENT_TEST(single_file) { using namespace libtorrent; + + file_storage fs; + fs.add_file("abc'abc", 0x8000); // this filename will have to be escaped + lt::add_torrent_params params = ::create_torrent(fs); + params.url_seeds.push_back("http://2.2.2.2:8080/"); + bool expected = false; run_test( - [](lt::session& ses) + [¶ms](lt::session& ses) { - file_storage fs; - fs.add_file("abc'abc", 0x8000); // this filename will have to be escaped - lt::add_torrent_params params; - params.ti = ::create_torrent(fs); - params.url_seeds.push_back("http://2.2.2.2:8080/"); - params.flags &= ~lt::add_torrent_params::flag_auto_managed; - params.flags &= ~lt::add_torrent_params::flag_paused; - params.save_path = "."; ses.async_add_torrent(params); }, - [](lt::session& ses, lt::alert const* alert) { - }, + [](lt::session& ses, lt::alert const* alert) {}, [&expected](sim::simulation& sim, lt::session& ses) { sim::asio::io_service web_server(sim, address_v4::from_string("2.2.2.2")); @@ -153,3 +195,257 @@ TORRENT_TEST(single_file_torrent) TEST_CHECK(expected); } +TORRENT_TEST(multi_file) +{ + using namespace libtorrent; + file_storage fs; + fs.add_file(combine_path("foo", "abc'abc"), 0x8000); // this filename will have to be escaped + fs.add_file(combine_path("foo", "bar"), 0x3000); + lt::add_torrent_params params = ::create_torrent(fs); + params.url_seeds.push_back("http://2.2.2.2:8080/"); + + std::array expected{{ false, false }}; + run_test( + [¶ms](lt::session& ses) + { + ses.async_add_torrent(params); + }, + [](lt::session& ses, lt::alert const* alert) {}, + [&expected](sim::simulation& sim, lt::session& ses) + { + sim::asio::io_service web_server(sim, address_v4::from_string("2.2.2.2")); + // listen on port 8080 + sim::http_server http(web_server, 8080); + + // make sure the requested file is correctly escaped + http.register_handler("/foo/abc%27abc" + , [&expected](std::string, std::string, std::map&) + { + expected[0] = true; + return sim::send_response(404, "not found", 0); + }); + http.register_handler("/foo/bar" + , [&expected](std::string, std::string, std::map&) + { + expected[1] = true; + return sim::send_response(404, "not found", 0); + }); + + sim.run(); + } + ); + + TEST_CHECK(expected[0]); + TEST_CHECK(expected[1]); +} + +std::string generate_content(lt::file_storage const& fs, int file + , std::int64_t offset, std::int64_t len) +{ + std::string ret; + ret.reserve(len); + std::int64_t const file_offset = fs.file_offset(file); + int const piece_size = fs.piece_length(); + for (std::int64_t i = offset; i < offset + len; ++i) + ret.push_back((((i + file_offset) % piece_size) % 26) + 'A'); + return ret; +} + +void serve_content_for(sim::http_server& http, std::string const& path + , lt::file_storage const& fs, int const file) +{ + http.register_content(path, fs.file_size(file) + , [&fs,file](std::int64_t offset, std::int64_t len) + { return generate_content(fs, file, offset, len); }); +} + +// test redirecting *unaligned* files to the same server still working. i.e. the +// second redirect is added to the same web-seed entry as the first one +TORRENT_TEST(unaligned_file_redirect) +{ + using namespace libtorrent; + file_storage fs; + fs.add_file(combine_path("foo", "1"), 0xc030); + fs.add_file(combine_path("foo", "2"), 0xc030); + lt::add_torrent_params params = ::create_torrent(fs); + params.url_seeds.push_back("http://2.2.2.2:8080/"); + + bool seeding = false; + + run_test( + [¶ms](lt::session& ses) + { + ses.async_add_torrent(params); + }, + [&](lt::session& ses, lt::alert const* alert) { + if (lt::alert_cast(alert)) + seeding = true; + }, + [&fs](sim::simulation& sim, lt::session& ses) + { + // http1 is the root web server that will just redirect requests to + // other servers + sim::asio::io_service web_server1(sim, address_v4::from_string("2.2.2.2")); + sim::http_server http1(web_server1, 8080); + // redirect file 1 and file 2 to the same servers + http1.register_redirect("/foo/1", "http://3.3.3.3:4444/bla/file1"); + http1.register_redirect("/foo/2", "http://3.3.3.3:4444/bar/file2"); + + // server for serving the content + sim::asio::io_service web_server2(sim, address_v4::from_string("3.3.3.3")); + sim::http_server http2(web_server2, 4444); + serve_content_for(http2, "/bla/file1", fs, 0); + serve_content_for(http2, "/bar/file2", fs, 1); + + sim.run(); + } + ); + + TEST_EQUAL(seeding, true); +} + +// test redirecting *unaligned* but padded files to separate servers +TORRENT_TEST(multi_file_redirect_pad_files) +{ + using namespace libtorrent; + file_storage fs_; + fs_.add_file(combine_path("foo", "1"), 0xc030); + fs_.add_file(combine_path("foo", "2"), 0xc030); + // true means use padfiles + lt::add_torrent_params params = ::create_torrent(fs_, true); + params.url_seeds.push_back("http://2.2.2.2:8080/"); + + // since the final torrent is different than what we built (because of pad + // files), ask about it. + file_storage const& fs = params.ti->files(); + + bool seeding = false; + + run_test( + [¶ms](lt::session& ses) + { + ses.async_add_torrent(params); + }, + [&](lt::session& ses, lt::alert const* alert) { + if (lt::alert_cast(alert)) + seeding = true; + }, + [&fs](sim::simulation& sim, lt::session& ses) + { + // http1 is the root web server that will just redirect requests to + // other servers + sim::asio::io_service web_server1(sim, address_v4::from_string("2.2.2.2")); + sim::http_server http1(web_server1, 8080); + // redirect file 1 and file 2 to different servers + http1.register_redirect("/foo/1", "http://3.3.3.3:4444/bla/file1"); + http1.register_redirect("/foo/2", "http://4.4.4.4:9999/bar/file2"); + + // server for file 1 + sim::asio::io_service web_server2(sim, address_v4::from_string("3.3.3.3")); + sim::http_server http2(web_server2, 4444); + serve_content_for(http2, "/bla/file1", fs, 0); + + // server for file 2 + sim::asio::io_service web_server3(sim, address_v4::from_string("4.4.4.4")); + sim::http_server http3(web_server3, 9999); + serve_content_for(http3, "/bar/file2", fs, 2); + + sim.run(); + } + ); + + TEST_EQUAL(seeding, true); +} +// test that a web seed can redirect files to separate web servers (as long as +// they are piece aligned) +TORRENT_TEST(multi_file_redirect) +{ + using namespace libtorrent; + file_storage fs; + fs.add_file(combine_path("foo", "1"), 0xc000); + fs.add_file(combine_path("foo", "2"), 0xc030); + lt::add_torrent_params params = ::create_torrent(fs); + params.url_seeds.push_back("http://2.2.2.2:8080/"); + + bool seeding = false; + + run_test( + [¶ms](lt::session& ses) + { + ses.async_add_torrent(params); + }, + [&](lt::session& ses, lt::alert const* alert) { + if (lt::alert_cast(alert)) + seeding = true; + }, + [&fs](sim::simulation& sim, lt::session& ses) + { + // http1 is the root web server that will just redirect requests to + // other servers + sim::asio::io_service web_server1(sim, address_v4::from_string("2.2.2.2")); + sim::http_server http1(web_server1, 8080); + // redirect file 1 and file 2 to different servers + http1.register_redirect("/foo/1", "http://3.3.3.3:4444/bla/file1"); + http1.register_redirect("/foo/2", "http://4.4.4.4:9999/bar/file2"); + + // server for file 1 + sim::asio::io_service web_server2(sim, address_v4::from_string("3.3.3.3")); + sim::http_server http2(web_server2, 4444); + serve_content_for(http2, "/bla/file1", fs, 0); + + // server for file 2 + sim::asio::io_service web_server3(sim, address_v4::from_string("4.4.4.4")); + sim::http_server http3(web_server3, 9999); + serve_content_for(http3, "/bar/file2", fs, 1); + + sim.run(); + } + ); + + TEST_EQUAL(seeding, true); +} + +// this is expected to fail, since the files are not aligned and redirected to +// separate servers, without pad files +TORRENT_TEST(multi_file_unaligned_redirect) +{ + using namespace libtorrent; + file_storage fs; + fs.add_file(combine_path("foo", "1"), 0xc030); + fs.add_file(combine_path("foo", "2"), 0xc030); + lt::add_torrent_params params = ::create_torrent(fs); + params.url_seeds.push_back("http://2.2.2.2:8080/"); + + run_test( + [¶ms](lt::session& ses) + { + ses.async_add_torrent(params); + }, + [&](lt::session& ses, lt::alert const* alert) { + // We don't expect to get this aslert + TEST_CHECK(lt::alert_cast(alert) == nullptr); + }, + [&fs](sim::simulation& sim, lt::session& ses) + { + // http1 is the root web server that will just redirect requests to + // other servers + sim::asio::io_service web_server1(sim, address_v4::from_string("2.2.2.2")); + sim::http_server http1(web_server1, 8080); + // redirect file 1 and file 2 to different servers + http1.register_redirect("/foo/1", "http://3.3.3.3:4444/bla/file1"); + http1.register_redirect("/foo/2", "http://4.4.4.4:9999/bar/file2"); + + // server for file 1 + sim::asio::io_service web_server2(sim, address_v4::from_string("3.3.3.3")); + sim::http_server http2(web_server2, 4444); + serve_content_for(http2, "/bla/file1", fs, 0); + + // server for file 2 + sim::asio::io_service web_server3(sim, address_v4::from_string("4.4.4.4")); + sim::http_server http3(web_server3, 9999); + serve_content_for(http3, "/bar/file2", fs, 1); + + sim.run(); + } + ); +} diff --git a/src/file_storage.cpp b/src/file_storage.cpp index 456986b6a..c50680627 100644 --- a/src/file_storage.cpp +++ b/src/file_storage.cpp @@ -1119,4 +1119,34 @@ namespace libtorrent #endif std::vector().swap(m_paths); } + + namespace aux + { + + std::tuple file_piece_range_exclusive(file_storage const& fs, int file) + { + peer_request const range = fs.map_file(file, 0, 1); + std::int64_t const file_size = fs.file_size(file); + std::int64_t const piece_size = fs.piece_length(); + int const begin_piece = range.start == 0 ? range.piece : range.piece + 1; + // the last piece is potentially smaller than the other pieces, so the + // generic logic doesn't really work. If this file is the last file, the + // last piece doesn't overlap with any other file and it's entirely + // contained within the last file. + int const end_piece = (file == fs.num_files() - 1) + ? fs.num_pieces() + : (range.piece * piece_size + range.start + file_size + 1) / piece_size; + return std::make_tuple(begin_piece, end_piece); + } + + std::tuple file_piece_range_inclusive(file_storage const& fs, int file) + { + peer_request const range = fs.map_file(file, 0, 1); + std::int64_t const file_size = fs.file_size(file); + std::int64_t const piece_size = fs.piece_length(); + int const end_piece = (range.piece * piece_size + range.start + file_size - 1) / piece_size + 1; + return std::make_tuple(range.piece, end_piece); + } + + } // namespace aux } diff --git a/src/http_seed_connection.cpp b/src/http_seed_connection.cpp index 7fc79fd1a..d145d5741 100644 --- a/src/http_seed_connection.cpp +++ b/src/http_seed_connection.cpp @@ -71,6 +71,13 @@ namespace libtorrent #endif } + void http_seed_connection::on_connected() + { + // this is always a seed + incoming_have_all(); + web_connection_base::on_connected(); + } + void http_seed_connection::disconnect(error_code const& ec , operation_t op, int error) { diff --git a/src/parse_url.cpp b/src/parse_url.cpp index 8d911404d..0c8509844 100644 --- a/src/parse_url.cpp +++ b/src/parse_url.cpp @@ -128,8 +128,41 @@ namespace libtorrent start = end; exit: - return std::make_tuple(protocol, auth, hostname, port + return std::make_tuple(std::move(protocol) + , std::move(auth) + , std::move(hostname) + , port , std::string(start, url.end())); } + // splits a url into the base url and the path + std::tuple + split_url(std::string url, error_code& ec) + { + std::string base; + std::string path; + + // PARSE URL + std::string::iterator pos + = std::find(url.begin(), url.end(), ':'); + + if (pos == url.end() || url.end() - pos < 3 + || *(pos + 1) != '/' || *(pos + 2) != '/') + { + ec = errors::unsupported_url_protocol; + return std::make_tuple(url, path); + } + pos += 3; // skip "://" + + pos = std::find(pos, url.end(), '/'); + if (pos == url.end()) + { + return std::make_tuple(std::move(url), std::move(path)); + } + + base.assign(url.begin(), pos); + path.assign(pos, url.end()); + return std::make_tuple(std::move(base), std::move(path)); + } + } diff --git a/src/peer_connection.cpp b/src/peer_connection.cpp index e29078c6a..6108f9340 100644 --- a/src/peer_connection.cpp +++ b/src/peer_connection.cpp @@ -1381,6 +1381,22 @@ namespace libtorrent } } + void peer_connection::clear_download_queue() + { + std::shared_ptr t = m_torrent.lock(); + piece_picker& picker = t->picker(); + torrent_peer* self_peer = peer_info_struct(); + while (!m_download_queue.empty()) + { + pending_block& qe = m_download_queue.back(); + if (!qe.timed_out && !qe.not_wanted) + picker.abort_download(qe.block, self_peer); + m_outstanding_bytes -= t->to_req(qe.block).length; + if (m_outstanding_bytes < 0) m_outstanding_bytes = 0; + m_download_queue.pop_back(); + } + } + namespace { bool match_request(peer_request const& r, piece_block const& b, int const block_size) @@ -3595,8 +3611,8 @@ namespace libtorrent return; } - int block_offset = block.block_index * t->block_size(); - int block_size + int const block_offset = block.block_index * t->block_size(); + int const block_size = (std::min)(t->torrent_file().piece_size(block.piece_index)-block_offset, t->block_size()); TORRENT_ASSERT(block_size > 0); @@ -4262,17 +4278,8 @@ namespace libtorrent if (t->has_picker()) { + clear_download_queue(); piece_picker& picker = t->picker(); - - while (!m_download_queue.empty()) - { - pending_block& qe = m_download_queue.back(); - if (!qe.timed_out && !qe.not_wanted) - picker.abort_download(qe.block, self_peer); - m_outstanding_bytes -= t->to_req(qe.block).length; - if (m_outstanding_bytes < 0) m_outstanding_bytes = 0; - m_download_queue.pop_back(); - } while (!m_request_queue.empty()) { pending_block& qe = m_request_queue.back(); diff --git a/src/piece_picker.cpp b/src/piece_picker.cpp index 2c329928d..2cd428bcf 100644 --- a/src/piece_picker.cpp +++ b/src/piece_picker.cpp @@ -587,10 +587,7 @@ namespace libtorrent TORRENT_ASSERT(m_priority_boundaries.back() == int(m_pieces.size())); } -#ifndef TORRENT_EXPENSIVE_INVARIANT_CHECKS - return; -#endif - +#ifdef TORRENT_EXPENSIVE_INVARIANT_CHECKS { int index = 0; for (std::vector::const_iterator i = m_piece_map.begin() @@ -736,6 +733,7 @@ namespace libtorrent TORRENT_ASSERT(m_piece_map[*i].priority(this) >= 0); } } +#endif // TORRENT_EXPENSIVE_INVARIANT_CHECKS } #endif diff --git a/src/torrent.cpp b/src/torrent.cpp index 70d02f210..a36b0e425 100644 --- a/src/torrent.cpp +++ b/src/torrent.cpp @@ -127,32 +127,16 @@ namespace libtorrent web_seed_t::web_seed_t(web_seed_entry const& wse) : web_seed_entry(wse) - , retry(aux::time_now()) - , peer_info(tcp::endpoint(), true, 0) - , supports_keepalive(true) - , resolving(false) - , removed(false) { peer_info.web_seed = true; - restart_request.piece = -1; - restart_request.start = -1; - restart_request.length = -1; } web_seed_t::web_seed_t(std::string const& url_, web_seed_entry::type_t type_ , std::string const& auth_ , web_seed_entry::headers_t const& extra_headers_) : web_seed_entry(url_, type_, auth_, extra_headers_) - , retry(aux::time_now()) - , peer_info(tcp::endpoint(), true, 0) - , supports_keepalive(true) - , resolving(false) - , removed(false) { peer_info.web_seed = true; - restart_request.piece = -1; - restart_request.start = -1; - restart_request.length = -1; } torrent_hot_members::torrent_hot_members(aux::session_interface& ses @@ -6153,6 +6137,12 @@ namespace libtorrent if (is_paused()) return; if (m_ses.is_aborted()) return; + // this web seed may have redirected all files to other URLs, leaving it + // having no file left, and there's no longer any point in connecting to + // it. + if (!web->have_files.empty() + && web->have_files.none_set()) return; + std::shared_ptr s = std::make_shared(m_ses.get_io_service()); if (!s) return; @@ -6450,7 +6440,7 @@ namespace libtorrent entry::list_type& httpseed_list = ret["httpseeds"].list(); for (web_seed_t const& ws : m_web_seeds) { - if (ws.removed) continue; + if (ws.removed || ws.ephemeral) continue; if (ws.type == web_seed_entry::url_seed) url_list.push_back(ws.url); else if (ws.type == web_seed_entry::http_seed) @@ -9008,16 +8998,21 @@ namespace libtorrent // add or remove a url that will be attempted for // finding the file(s) in this torrent. - void torrent::add_web_seed(std::string const& url + web_seed_t* torrent::add_web_seed(std::string const& url , web_seed_entry::type_t type , std::string const& auth - , web_seed_entry::headers_t const& extra_headers) + , web_seed_entry::headers_t const& extra_headers + , bool const ephemeral) { web_seed_t ent(url, type, auth, extra_headers); + ent.ephemeral = ephemeral; + // don't add duplicates - if (std::find(m_web_seeds.begin(), m_web_seeds.end(), ent) != m_web_seeds.end()) return; + auto it = std::find(m_web_seeds.begin(), m_web_seeds.end(), ent); + if (it != m_web_seeds.end()) return &*it; m_web_seeds.push_back(ent); set_need_save_resume(); + return &m_web_seeds.back(); } void torrent::set_session_paused(bool const b) diff --git a/src/torrent_handle.cpp b/src/torrent_handle.cpp index 75183f112..11f52fd4c 100644 --- a/src/torrent_handle.cpp +++ b/src/torrent_handle.cpp @@ -518,7 +518,7 @@ namespace libtorrent void torrent_handle::add_url_seed(std::string const& url) const { async_call(&torrent::add_web_seed, url, web_seed_entry::url_seed - , std::string(), web_seed_entry::headers_t()); + , std::string(), web_seed_entry::headers_t(), false); } void torrent_handle::remove_url_seed(std::string const& url) const @@ -535,7 +535,7 @@ namespace libtorrent void torrent_handle::add_http_seed(std::string const& url) const { async_call(&torrent::add_web_seed, url, web_seed_entry::http_seed - , std::string(), web_seed_entry::headers_t()); + , std::string(), web_seed_entry::headers_t(), false); } void torrent_handle::remove_http_seed(std::string const& url) const diff --git a/src/web_connection_base.cpp b/src/web_connection_base.cpp index d92a65003..7c448e124 100644 --- a/src/web_connection_base.cpp +++ b/src/web_connection_base.cpp @@ -110,9 +110,6 @@ namespace libtorrent std::shared_ptr t = associated_torrent().lock(); TORRENT_ASSERT(t); - // this is always a seed - incoming_have_all(); - // it is always possible to request pieces incoming_unchoke(); diff --git a/src/web_peer_connection.cpp b/src/web_peer_connection.cpp index 517d8e3f7..ec85021cf 100644 --- a/src/web_peer_connection.cpp +++ b/src/web_peer_connection.cpp @@ -131,7 +131,36 @@ web_peer_connection::web_peer_connection(peer_connection_args const& pack void web_peer_connection::on_connected() { - incoming_have_all(); + if (m_web->have_files.empty()) + { + incoming_have_all(); + } + else + { + std::shared_ptr t = associated_torrent().lock(); + + // only advertise pieces that are contained within the files we have as + // indicated by m_web->have_files AND padfiles! + // it's important to include pieces that may overlap many files, as long + // as we have all those files, so instead of starting with a clear bitfied + // and setting the pieces corresponding to files we have, we do it the + // other way around. Start with assuming we have all files, and clear + // pieces overlapping with files we *don't* have. + bitfield have; + file_storage const& fs = t->torrent_file().files(); + have.resize(fs.num_pieces(), true); + int const num_files = m_web->have_files.size(); + for (int i = 0; i < num_files; ++i) + { + // if we have the file, no need to do anything + if (m_web->have_files.get_bit(i) || fs.pad_file_at(i)) continue; + + std::tuple const range = aux::file_piece_range_inclusive(fs, i); + for (int k = std::get<0>(range); k < std::get<1>(range); ++k) + have.clear_bit(k); + } + incoming_bitfield(have); + } if (m_web->restart_request.piece != -1) { // increase the chances of requesting the block @@ -320,7 +349,7 @@ void web_peer_connection::write_request(peer_request const& r) // pretend read callback where we can deliver the zeroes for the partfile int num_pad_files = 0; - // TODO: 2 do we really need a special case here? wouldn't the multi-file + // TODO: 3 do we really need a special case here? wouldn't the multi-file // case handle single file torrents correctly too? if (single_file_request) { @@ -373,23 +402,30 @@ void web_peer_connection::write_request(peer_request const& r) continue; } + TORRENT_ASSERT(m_web->have_files.empty() + || m_web->have_files.get_bit(f.file_index)); + request += "GET "; if (using_proxy) { // m_url is already a properly escaped URL // with the correct slashes. Don't encode it again request += m_url; - std::string path = info.orig_files().file_path(f.file_index); -#ifdef TORRENT_WINDOWS - convert_path_to_posix(path); -#endif - request += escape_path(path); + } + + auto redirection = m_web->redirects.find(f.file_index); + if (redirection != m_web->redirects.end()) + { + request += redirection->second; } else { - // m_path is already a properly escaped URL - // with the correct slashes. Don't encode it again - request += m_path; + if (!using_proxy) + { + // m_path is already a properly escaped URL + // with the correct slashes. Don't encode it again + request += m_path; + } std::string path = info.orig_files().file_path(f.file_index); #ifdef TORRENT_WINDOWS @@ -537,7 +573,7 @@ void web_peer_connection::on_receive_padfile() handle_padfile(); } -void web_peer_connection::handle_error(int bytes_left) +void web_peer_connection::handle_error(int const bytes_left) { std::shared_ptr t = associated_torrent().lock(); TORRENT_ASSERT(t); @@ -545,6 +581,7 @@ void web_peer_connection::handle_error(int bytes_left) // TODO: 2 just make this peer not have the pieces // associated with the file we just requested. Only // when it doesn't have any of the file do the following + // pad files will make it complicated int retry_time = atoi(m_parser.header("retry-after").c_str()); if (retry_time <= 0) retry_time = m_settings.get_int(settings_pack::urlseed_wait_retry); // temporarily unavailable, retry later @@ -561,7 +598,7 @@ void web_peer_connection::handle_error(int bytes_left) return; } -void web_peer_connection::handle_redirect(int bytes_left) +void web_peer_connection::handle_redirect(int const bytes_left) { // this means we got a redirection request // look for the location header @@ -594,37 +631,74 @@ void web_peer_connection::handle_redirect(int bytes_left) disconnect(errors::torrent_aborted, op_bittorrent); return; } - // TODO: 2 create a mapping of file-index to redirection URLs. Use that to form - // URLs instead. Support to reconnect to a new server without destructing this - // peer_connection - torrent_info const& info = t->torrent_file(); - std::string path = info.orig_files().file_path(file_index); -#ifdef TORRENT_WINDOWS - convert_path_to_posix(path); + + location = resolve_redirect_location(m_url, location); +#ifndef TORRENT_DISABLE_LOGGING + peer_log(peer_log_alert::info, "LOCATION", "%s", location.c_str()); #endif - path = escape_path(path); - size_t i = location.rfind(path); - if (i == std::string::npos) + // TODO: 3 this could be made more efficient for the case when we use an + // HTTP proxy. Then we wouldn't need to add new web seeds to the torrent, + // we could just make the redirect table contain full URLs. + std::string redirect_base; + std::string redirect_path; + error_code ec; + std::tie(redirect_base, redirect_path) = split_url(location, ec); + + if (ec) { - t->remove_web_seed_conn(this, errors::invalid_redirection, op_bittorrent, 2); - m_web = nullptr; - TORRENT_ASSERT(is_disconnecting()); + // we should not try this server again. + disconnect(errors::missing_location, op_bittorrent, 1); return; } - location.resize(i); + + // add_web_seed won't add duplicates. If we have already added an entry + // with this URL, we'll get back the existing entry + web_seed_t* web = t->add_web_seed(redirect_base, web_seed_entry::url_seed, m_external_auth, m_extra_headers); + web->have_files.resize(t->torrent_file().num_files(), false); + + // the new web seed we're adding only has this file for now + // we may add more files later + web->redirects[file_index] = redirect_path; + if (web->have_files.get_bit(file_index) == false) + { + web->have_files.set_bit(file_index); + if (web->peer_info.connection) + { + peer_connection* pc = static_cast(web->peer_info.connection); + + // we just learned that this host has this file, and we're currently + // connected to it. Make it advertise that it has this file to the + // bittorrent engine + file_storage const& fs = t->torrent_file().files(); + std::tuple const range = aux::file_piece_range_exclusive(fs, file_index); + for (int i = std::get<0>(range); i < std::get<1>(range); ++i) + pc->incoming_have(i); + } + } + + // we don't have this file on this server. Don't ask for it again + m_web->have_files.resize(t->torrent_file().num_files(), true); + if (m_web->have_files.get_bit(file_index) == true) + { + m_web->have_files.clear_bit(file_index); + disconnect(errors::redirecting, op_bittorrent, 2); + } } else { location = resolve_redirect_location(m_url, location); - } - #ifndef TORRENT_DISABLE_LOGGING - peer_log(peer_log_alert::info, "LOCATION", "%s", location.c_str()); + peer_log(peer_log_alert::info, "LOCATION", "%s", location.c_str()); #endif - t->add_web_seed(location, web_seed_entry::url_seed, m_external_auth, m_extra_headers); - t->remove_web_seed_conn(this, errors::redirecting, op_bittorrent, 2); - m_web = nullptr; - TORRENT_ASSERT(is_disconnecting()); + t->add_web_seed(location, web_seed_entry::url_seed, m_external_auth, m_extra_headers); + + // this web seed doesn't have any files. Don't try to request from it + // again this session + m_web->have_files.resize(t->torrent_file().num_files(), false); + disconnect(errors::redirecting, op_bittorrent, 2); + m_web = nullptr; + TORRENT_ASSERT(is_disconnecting()); + } return; } @@ -728,6 +802,9 @@ void web_peer_connection::on_receive(error_code const& error // if the status code is not one of the accepted ones, abort if (!is_ok_status(m_parser.status_code())) { + file_request_t const& file_req = m_file_requests.front(); + m_web->have_files.resize(t->torrent_file().num_files(), true); + m_web->have_files.clear_bit(file_req.file_index); handle_error(int(recv_buffer.size())); return; } diff --git a/test/test_file_storage.cpp b/test/test_file_storage.cpp index 78e11baa6..cfe2743a1 100644 --- a/test/test_file_storage.cpp +++ b/test/test_file_storage.cpp @@ -303,7 +303,68 @@ TORRENT_TEST(optimize_pad_fillers) TEST_EQUAL(fs.pad_file_at(3), false); } -// TODO: add more optimize() tests +TORRENT_TEST(piece_range_exclusive) +{ + int const piece_size = 16; + file_storage fs; + fs.set_piece_length(piece_size); + fs.add_file(combine_path("temp_storage", "0"), piece_size); + fs.add_file(combine_path("temp_storage", "1"), piece_size * 4 + 1); + fs.add_file(combine_path("temp_storage", "2"), piece_size * 4 - 1); + fs.set_num_pieces(int((fs.total_size() + piece_size - 1) / piece_size)); + // +---+---+---+---+---+---+---+---+---+ + // pieces | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | + // +---+---+---+---+---+---+---+---+---+ + // files | 0 | 1 | 2 | + // +---+----------------+--------------+ + + TEST_CHECK(aux::file_piece_range_exclusive(fs, 0) == std::make_tuple(0, 1)); + TEST_CHECK(aux::file_piece_range_exclusive(fs, 1) == std::make_tuple(1, 5)); + TEST_CHECK(aux::file_piece_range_exclusive(fs, 2) == std::make_tuple(6, 9)); +} + +TORRENT_TEST(piece_range_inclusive) +{ + int const piece_size = 16; + file_storage fs; + fs.set_piece_length(piece_size); + fs.add_file(combine_path("temp_storage", "0"), piece_size); + fs.add_file(combine_path("temp_storage", "1"), piece_size * 4 + 1); + fs.add_file(combine_path("temp_storage", "2"), piece_size * 4 - 1); + fs.set_num_pieces(int((fs.total_size() + piece_size - 1) / piece_size)); + // +---+---+---+---+---+---+---+---+---+ + // pieces | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | + // +---+---+---+---+---+---+---+---+---+ + // files | 0 | 1 | 2 | + // +---+----------------+--------------+ + + TEST_CHECK(aux::file_piece_range_inclusive(fs, 0) == std::make_tuple(0, 1)); + TEST_CHECK(aux::file_piece_range_inclusive(fs, 1) == std::make_tuple(1, 6)); + TEST_CHECK(aux::file_piece_range_inclusive(fs, 2) == std::make_tuple(5, 9)); +} + +TORRENT_TEST(piece_range) +{ + int const piece_size = 0x4000; + file_storage fs; + fs.set_piece_length(piece_size); + fs.add_file(combine_path("temp_storage", "0"), piece_size * 3); + fs.add_file(combine_path("temp_storage", "1"), piece_size * 3 + 0x30); + fs.set_num_pieces(int((fs.total_size() + piece_size - 1) / piece_size)); + // +---+---+---+---+---+---+---+ + // pieces | 0 | 1 | 2 | 3 | 4 | 5 | 6 | + // +---+---+---+---+---+---+---+ + // files | 0 | 1 | + // +---+-------+------------+ + + TEST_CHECK(aux::file_piece_range_inclusive(fs, 0) == std::make_tuple(0, 3)); + TEST_CHECK(aux::file_piece_range_inclusive(fs, 1) == std::make_tuple(3, 7)); + + TEST_CHECK(aux::file_piece_range_exclusive(fs, 0) == std::make_tuple(0, 3)); + TEST_CHECK(aux::file_piece_range_exclusive(fs, 1) == std::make_tuple(3, 7)); +} + +// TODO: test file_storage::optimize // TODO: test map_block // TODO: test piece_size(int piece) // TODO: test file_index_at_offset diff --git a/test/test_http_parser.cpp b/test/test_http_parser.cpp index 3b9aea934..5a9651da9 100644 --- a/test/test_http_parser.cpp +++ b/test/test_http_parser.cpp @@ -395,6 +395,52 @@ TORRENT_TEST(http_parser) TEST_CHECK(ec == error_code(errors::unsupported_url_protocol)); ec.clear(); + // test split_url + + TEST_CHECK(split_url("http://foo:bar@host.com:80/path/to/file", ec) + == std::make_tuple("http://foo:bar@host.com:80", "/path/to/file")); + + TEST_CHECK(split_url("http://host.com/path/to/file", ec) + == std::make_tuple("http://host.com", "/path/to/file")); + + TEST_CHECK(split_url("ftp://host.com:21/path/to/file", ec) + == std::make_tuple("ftp://host.com:21", "/path/to/file")); + + TEST_CHECK(split_url("http://host.com/path?foo:bar@foo:", ec) + == std::make_tuple("http://host.com", "/path?foo:bar@foo:")); + + TEST_CHECK(split_url("http://192.168.0.1/path/to/file", ec) + == std::make_tuple("http://192.168.0.1", "/path/to/file")); + + TEST_CHECK(split_url("http://[2001:ff00::1]:42/path/to/file", ec) + == std::make_tuple("http://[2001:ff00::1]:42", "/path/to/file")); + + TEST_CHECK(split_url("http://[2001:ff00::1]:42", ec) + == std::make_tuple("http://[2001:ff00::1]:42", "")); + + TEST_CHECK(split_url("bla://[2001:ff00::1]:42/path/to/file", ec) + == std::make_tuple("bla://[2001:ff00::1]:42", "/path/to/file")); + + ec.clear(); + TEST_CHECK(split_url("foo:/[2001:ff00::1]:42/path/to/file", ec) + == std::make_tuple("foo:/[2001:ff00::1]:42/path/to/file", "")); + TEST_CHECK(ec == error_code(errors::unsupported_url_protocol)); + + ec.clear(); + TEST_CHECK(split_url("foo:/", ec) + == std::make_tuple("foo:/", "")); + TEST_CHECK(ec == error_code(errors::unsupported_url_protocol)); + + ec.clear(); + TEST_CHECK(split_url("//[2001:ff00::1]:42/path/to/file", ec) + == std::make_tuple("//[2001:ff00::1]:42/path/to/file", "")); + TEST_CHECK(ec == error_code(errors::unsupported_url_protocol)); + + ec.clear(); + TEST_CHECK(split_url("//host.com/path?foo:bar@foo:", ec) + == std::make_tuple("//host.com/path?foo:bar@foo:", "")); + TEST_CHECK(ec == error_code(errors::unsupported_url_protocol)); + // test resolve_redirect_location TEST_EQUAL(resolve_redirect_location("http://example.com/a/b", "a")