From 1a27ff7107f928cdfc2834ae02f57f4f160d1f23 Mon Sep 17 00:00:00 2001 From: Arvid Norberg Date: Fri, 5 Oct 2018 02:19:27 +0200 Subject: [PATCH] parse_magnet_uri simplification --- include/libtorrent/aux_/escape_string.hpp | 3 - src/escape_string.cpp | 23 -- src/magnet_uri.cpp | 285 ++++++++++------------ test/test_magnet.cpp | 35 ++- test/test_string.cpp | 12 - test/test_torrent.cpp | 6 +- 6 files changed, 150 insertions(+), 214 deletions(-) diff --git a/include/libtorrent/aux_/escape_string.hpp b/include/libtorrent/aux_/escape_string.hpp index 29f352d2b..da5dd7037 100644 --- a/include/libtorrent/aux_/escape_string.hpp +++ b/include/libtorrent/aux_/escape_string.hpp @@ -84,9 +84,6 @@ namespace libtorrent { #endif TORRENT_EXTRA_EXPORT std::string base32decode(string_view s); - TORRENT_EXTRA_EXPORT string_view url_has_argument( - string_view url, std::string argument, std::string::size_type* out_pos = nullptr); - // replaces \ with / TORRENT_EXTRA_EXPORT void convert_path_to_posix(std::string& path); #ifdef TORRENT_WINDOWS diff --git a/src/escape_string.cpp b/src/escape_string.cpp index 304585581..cd6552719 100644 --- a/src/escape_string.cpp +++ b/src/escape_string.cpp @@ -473,29 +473,6 @@ namespace libtorrent { return pos + p; } - string_view url_has_argument( - string_view url, std::string argument, std::string::size_type* out_pos) - { - auto i = url.find('?'); - if (i == std::string::npos) return {}; - ++i; - - argument += '='; - - if (url.substr(i, argument.size()) == argument) - { - auto const pos = i + argument.size(); - if (out_pos) *out_pos = pos; - return url.substr(pos, url.substr(pos).find('&')); - } - argument.insert(0, "&"); - i = find(url, argument, i); - if (i == std::string::npos) return {}; - auto const pos = i + argument.size(); - if (out_pos) *out_pos = pos; - return url.substr(pos, find(url, "&", pos) - pos); - } - #if defined TORRENT_WINDOWS std::wstring convert_to_wstring(std::string const& s) { diff --git a/src/magnet_uri.cpp b/src/magnet_uri.cpp index a264d9f97..b58422165 100644 --- a/src/magnet_uri.cpp +++ b/src/magnet_uri.cpp @@ -132,6 +132,8 @@ namespace libtorrent { , void* userdata) { add_torrent_params params(std::move(sc)); + error_code ec; + parse_magnet_uri(uri, params, ec); params.storage_mode = storage_mode; params.userdata = userdata; params.save_path = save_path; @@ -139,20 +141,6 @@ namespace libtorrent { if (paused) params.flags |= add_torrent_params::flag_paused; else params.flags &= ~add_torrent_params::flag_paused; - error_code ec; - string_view display_name = url_has_argument(uri, "dn"); - if (!display_name.empty()) params.name = unescape_string(display_name, ec); - string_view tracker_string = url_has_argument(uri, "tr"); - if (!tracker_string.empty()) params.trackers.push_back(unescape_string(tracker_string, ec)); - - string_view btih = url_has_argument(uri, "xt"); - if (btih.empty()) return torrent_handle(); - - if (btih.substr(0, 9) != "urn:btih:") return torrent_handle(); - - if (btih.size() == 40 + 9) aux::from_hex({&btih[9], 40}, params.info_hash.data()); - else params.info_hash.assign(base32decode(btih.substr(9)).c_str()); - return ses.add_torrent(std::move(params)); } @@ -177,185 +165,162 @@ namespace libtorrent { void parse_magnet_uri(string_view uri, add_torrent_params& p, error_code& ec) { ec.clear(); - std::string name; + std::string display_name; + string_view sv(uri); + if (sv.substr(0, 8) != "magnet:?"_sv) { - error_code e; - string_view display_name = url_has_argument(uri, "dn"); - if (!display_name.empty()) name = unescape_string(display_name, e); + ec = errors::unsupported_url_protocol; + return; } + sv = sv.substr(8); - // parse trackers out of the magnet link - auto pos = std::string::npos; - string_view url = url_has_argument(uri, "tr", &pos); int tier = 0; - while (pos != std::string::npos) + bool has_ih = false; + while (!sv.empty()) { - // since we're about to assign tiers to the trackers, make sure the two - // vectors are aligned - if (p.tracker_tiers.size() != p.trackers.size()) - p.tracker_tiers.resize(p.trackers.size(), 0); + string_view name; + std::tie(name, sv) = split_string(sv, '='); + string_view value; + std::tie(value, sv) = split_string(sv, '&'); - error_code e; - std::string tracker = unescape_string(url, e); - if (!e) + if (name == "dn"_sv) // display name { - p.trackers.push_back(std::move(tracker)); - p.tracker_tiers.push_back(tier++); + error_code e; + display_name = unescape_string(value, e); } - pos = find(uri, "&tr=", pos); - if (pos == std::string::npos) break; - pos += 4; - url = uri.substr(pos, find(uri, "&", pos) - pos); - } + else if (name == "tr"_sv) // tracker + { + // since we're about to assign tiers to the trackers, make sure the two + // vectors are aligned + if (p.tracker_tiers.size() != p.trackers.size()) + p.tracker_tiers.resize(p.trackers.size(), 0); + error_code e; + std::string tracker = unescape_string(value, e); + if (!e) + { + p.trackers.push_back(std::move(tracker)); + p.tracker_tiers.push_back(tier++); + } + } + else if (name == "ws"_sv) // web seed + { + error_code e; + std::string webseed = unescape_string(value, e); + if (!e) p.url_seeds.push_back(std::move(webseed)); + } + else if (name == "xt"_sv) + { + std::string unescaped_btih; + if (value.find('%') != string_view::npos) + { + unescaped_btih = unescape_string(value, ec); + if (ec) return; + value = unescaped_btih; + } - // parse web seeds out of the magnet link - pos = std::string::npos; - url = url_has_argument(uri, "ws", &pos); - while (pos != std::string::npos) - { - error_code e; - std::string webseed = unescape_string(url, e); - if (!e) p.url_seeds.push_back(std::move(webseed)); - pos = find(uri, "&ws=", pos); - if (pos == std::string::npos) break; - pos += 4; - url = uri.substr(pos, find(uri, "&", pos) - pos); - } + if (value.substr(0, 9) != "urn:btih:") continue; + value = value.substr(9); - string_view btih = url_has_argument(uri, "xt"); - std::string unescaped_btih; - if (btih.empty()) - { - ec = errors::missing_info_hash_in_uri; - return; - } - if (btih.find('%') != string_view::npos) - { - unescaped_btih = unescape_string(btih, ec); - if (ec) return; - btih = unescaped_btih; - } - if (btih.substr(0, 9) != "urn:btih:") - { - ec = errors::missing_info_hash_in_uri; - return; - } - - auto select_pos = std::string::npos; - string_view select = url_has_argument(uri, "so", &select_pos); - while (!select.empty()) - { - // parse the ranges or indices - do + sha1_hash info_hash; + if (value.size() == 40) aux::from_hex({value.data(), 40}, info_hash.data()); + else if (value.size() == 32) + { + std::string const ih = base32decode(value); + if (ih.size() != 20) + { + ec = errors::invalid_info_hash; + return; + } + info_hash.assign(ih); + } + else + { + ec = errors::invalid_info_hash; + return; + } + p.info_hash = info_hash; + has_ih = true; + } + else if (name == "so"_sv) // select-only (files) { // accept only digits, '-' and ',' - if (std::any_of(select.begin(), select.end(), [](char c) + if (std::any_of(value.begin(), value.end(), [](char c) { return !is_digit(c) && c != '-' && c != ','; })) - break; + continue; - string_view token; - std::tie(token, select) = split_string(select, ','); - - int idx1, idx2; - // TODO: what's the right number here? - constexpr int max_index = 10000; // can't risk out of memory - - auto const divider = token.find_first_of('-'); - if (divider != std::string::npos) // it's a range + do { - if (divider == 0) // no start index - continue; - if (divider == token.size() - 1) // no end index - continue; + string_view token; + std::tie(token, value) = split_string(value, ','); - idx1 = std::atoi(token.substr(0, divider).to_string().c_str()); - if (idx1 < 0 || idx1 > max_index) // invalid index - continue; - idx2 = std::atoi(token.substr(divider + 1).to_string().c_str()); - if (idx2 < 0 || idx2 > max_index) // invalid index - continue; + if (token.empty()) continue; - if (idx1 > idx2) // wrong range limits - continue; - } - else // it's an index - { - idx1 = std::atoi(token.to_string().c_str()); - if (idx1 < 0 || idx1 > max_index) // invalid index - continue; - idx2 = idx1; - } + int idx1, idx2; + // TODO: what's the right number here? + constexpr int max_index = 10000; // can't risk out of memory - if (int(p.file_priorities.size()) <= idx2) - p.file_priorities.resize(std::size_t(idx2 + 1), dont_download); + auto const divider = token.find_first_of('-'); + if (divider != std::string::npos) // it's a range + { + if (divider == 0) // no start index + continue; + if (divider == token.size() - 1) // no end index + continue; - for (int i = idx1; i <= idx2; i++) - p.file_priorities[std::size_t(i)] = default_priority; + idx1 = std::atoi(token.substr(0, divider).to_string().c_str()); + if (idx1 < 0 || idx1 > max_index) // invalid index + continue; + idx2 = std::atoi(token.substr(divider + 1).to_string().c_str()); + if (idx2 < 0 || idx2 > max_index) // invalid index + continue; - } while (!select.empty()); + if (idx1 > idx2) // wrong range limits + continue; + } + else // it's an index + { + idx1 = std::atoi(token.to_string().c_str()); + if (idx1 < 0 || idx1 > max_index) // invalid index + continue; + idx2 = idx1; + } - select_pos = find(uri, "&so=", select_pos); - if (select_pos == std::string::npos) break; - select_pos += 4; - select = uri.substr(select_pos, find(uri, "&", select_pos) - select_pos); - } + if (int(p.file_priorities.size()) <= idx2) + p.file_priorities.resize(std::size_t(idx2 + 1), dont_download); - std::string::size_type peer_pos = std::string::npos; - string_view peer = url_has_argument(uri, "x.pe", &peer_pos); - while (!peer.empty()) - { - error_code e; - tcp::endpoint endp = parse_endpoint(peer, e); - if (!e) - p.peers.push_back(std::move(endp)); - - peer_pos = find(uri, "&x.pe=", peer_pos); - if (peer_pos == std::string::npos) break; - peer_pos += 6; - peer = uri.substr(peer_pos, find(uri, "&", peer_pos) - peer_pos); - } + for (int i = idx1; i <= idx2; i++) + p.file_priorities[std::size_t(i)] = default_priority; + } while (!value.empty()); + } + else if (name == "x.pe") + { + error_code e; + tcp::endpoint endp = parse_endpoint(value, e); + if (!e) p.peers.push_back(std::move(endp)); + } #ifndef TORRENT_DISABLE_DHT - std::string::size_type node_pos = std::string::npos; - string_view node = url_has_argument(uri, "dht", &node_pos); - while (!node.empty()) - { - std::string::size_type const divider = node.find_last_of(':'); - if (divider != std::string::npos) + else if (name == "dht"_sv) { - int const port = std::atoi(node.substr(divider + 1).to_string().c_str()); - if (port > 0 && port < int(std::numeric_limits::max())) - p.dht_nodes.emplace_back(node.substr(0, divider).to_string(), port); + auto const divider = value.find_last_of(':'); + if (divider != std::string::npos) + { + int const port = std::atoi(value.substr(divider + 1).to_string().c_str()); + if (port > 0 && port < int(std::numeric_limits::max())) + p.dht_nodes.emplace_back(value.substr(0, divider).to_string(), port); + } } - - node_pos = find(uri, "&dht=", node_pos); - if (node_pos == std::string::npos) break; - node_pos += 5; - node = uri.substr(node_pos, find(uri, "&", node_pos) - node_pos); - } #endif - - sha1_hash info_hash; - if (btih.size() == 40 + 9) aux::from_hex({&btih[9], 40}, info_hash.data()); - else if (btih.size() == 32 + 9) - { - std::string const ih = base32decode(btih.substr(9)); - if (ih.size() != 20) - { - ec = errors::invalid_info_hash; - return; - } - info_hash.assign(ih); } - else + + if (!has_ih) { - ec = errors::invalid_info_hash; + ec = errors::missing_info_hash_in_uri; return; } - p.info_hash = info_hash; - if (!name.empty()) p.name = name; + if (!display_name.empty()) p.name = display_name; } add_torrent_params parse_magnet_uri(string_view uri) diff --git a/test/test_magnet.cpp b/test/test_magnet.cpp index a407725cc..93a9af558 100644 --- a/test/test_magnet.cpp +++ b/test/test_magnet.cpp @@ -145,6 +145,7 @@ TORRENT_TEST(magnet) "&tr=http://2" "&dn=foo" "&dht=127.0.0.1:43" + "&xt=urn:ed2k:a0a9277894123b27945224fbac8366c9" "&xt=urn:btih:c352cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"); p.flags &= ~torrent_flags::paused; p.flags &= ~torrent_flags::auto_managed; @@ -481,49 +482,57 @@ TORRENT_TEST(parse_magnet_select_only_multiple) , {no, yes, yes, yes, yes}); } -TORRENT_TEST(parse_magnet_select_only_invalid_index_and_range) +TORRENT_TEST(parse_magnet_select_only_inverted_range) { test_select_only("magnet:?xt=urn:btih:cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd" - "&dn=foo&so=-4,3-,7-4,a,100000000&dht=10.0.0.1:1337&so=10" + "&dn=foo&so=7-4,100000000&dht=10.0.0.1:1337&so=10" + , {no, no, no, no, no, no, no, no, no, no, yes}); +} + +TORRENT_TEST(parse_magnet_select_only_index_bounds) +{ + test_select_only("magnet:?xt=urn:btih:cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd" + "&dn=foo&so=100000000&dht=10.0.0.1:1337&so=10" , {no, no, no, no, no, no, no, no, no, no, yes}); } TORRENT_TEST(parse_magnet_select_only_invalid_range1) { test_select_only("magnet:?xt=urn:btih:cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd" - "&dn=foo&so=-4", {}); + "&dn=foo&so=-4&so=1", {no, yes}); } TORRENT_TEST(parse_magnet_select_only_invalid_range2) { test_select_only("magnet:?xt=urn:btih:cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd" - "&dn=foo&so=3-", {}); -} - -TORRENT_TEST(parse_magnet_select_only_invalid_range3) -{ - test_select_only("magnet:?xt=urn:btih:cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd" - "&dn=foo&so=7-4", {}); + "&dn=foo&so=3-&so=1", {no, yes}); } TORRENT_TEST(parse_magnet_select_only_invalid_index_character) { test_select_only("magnet:?xt=urn:btih:cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd" - "&dn=foo&so=a", {}); + "&dn=foo&so=a&so=1", {no, yes}); } TORRENT_TEST(parse_magnet_select_only_invalid_index_value) { test_select_only("magnet:?xt=urn:btih:cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd" - "&dn=foo&so=100000000", {}); + "&dn=foo&so=100000000&so=1", {no, yes}); +} + +TORRENT_TEST(parse_magnet_select_only_invalid_no_value) +{ + test_select_only("magnet:?xt=urn:btih:cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd" + "&dn=foo&so=&dht=10.0.0.1:1337&so=", {}); } TORRENT_TEST(parse_magnet_select_only_invalid_no_values) { test_select_only("magnet:?xt=urn:btih:cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd" - "&dn=foo&so=&dht=10.0.0.1:1337&so=", {}); + "&dn=foo&so=&dht=10.0.0.1:1337&so=,,1", {no, yes}); } + TORRENT_TEST(parse_magnet_select_only_invalid_quotes) { test_select_only("magnet:?xt=urn:btih:cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd" diff --git a/test/test_string.cpp b/test/test_string.cpp index ab820138d..549327696 100644 --- a/test/test_string.cpp +++ b/test/test_string.cpp @@ -320,18 +320,6 @@ TORRENT_TEST(read_until) == "abcdesdf sdgf"); } -TORRENT_TEST(url_has_argument) -{ - TEST_CHECK(url_has_argument("http://127.0.0.1/test", "test") == ""); - TEST_CHECK(url_has_argument("http://127.0.0.1/test?foo=24", "bar") == ""); - TEST_CHECK(url_has_argument("http://127.0.0.1/test?foo=24", "foo") == "24"); - TEST_CHECK(url_has_argument("http://127.0.0.1/test?foo=24&bar=23", "foo") == "24"); - TEST_CHECK(url_has_argument("http://127.0.0.1/test?foo=24&bar=23", "bar") == "23"); - TEST_CHECK(url_has_argument("http://127.0.0.1/test?foo=24&bar=23&a=e", "bar") == "23"); - TEST_CHECK(url_has_argument("http://127.0.0.1/test?foo=24&bar=23&a=e", "a") == "e"); - TEST_CHECK(url_has_argument("http://127.0.0.1/test?foo=24&bar=23&a=e", "b") == ""); -} - TORRENT_TEST(path) { std::string path = "a\\b\\c"; diff --git a/test/test_torrent.cpp b/test/test_torrent.cpp index ef347d31a..809588085 100644 --- a/test/test_torrent.cpp +++ b/test/test_torrent.cpp @@ -609,7 +609,7 @@ TORRENT_TEST(test_move_storage_no_metadata) { lt::session ses(settings()); error_code ec; - add_torrent_params p = parse_magnet_uri("magnet?xt=urn:btih:abababababababababababababababababababab", ec); + add_torrent_params p = parse_magnet_uri("magnet:?xt=urn:btih:abababababababababababababababababababab", ec); p.save_path = "save_path"; torrent_handle h = ses.add_torrent(p); @@ -624,7 +624,7 @@ TORRENT_TEST(test_have_piece_no_metadata) { lt::session ses(settings()); error_code ec; - add_torrent_params p = parse_magnet_uri("magnet?xt=urn:btih:abababababababababababababababababababab", ec); + add_torrent_params p = parse_magnet_uri("magnet:?xt=urn:btih:abababababababababababababababababababab", ec); p.save_path = "save_path"; torrent_handle h = ses.add_torrent(p); @@ -655,7 +655,7 @@ TORRENT_TEST(test_read_piece_no_metadata) { lt::session ses(settings()); error_code ec; - add_torrent_params p = parse_magnet_uri("magnet?xt=urn:btih:abababababababababababababababababababab", ec); + add_torrent_params p = parse_magnet_uri("magnet:?xt=urn:btih:abababababababababababababababababababab", ec); p.save_path = "save_path"; torrent_handle h = ses.add_torrent(p);