diff --git a/ChangeLog b/ChangeLog index 6cf2256c8..3eb4f2c35 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,4 @@ + * make parse_magnet_uri take a string_view instead of std::string * deprecate add_torrent_params::url field. use parse_magnet_uri instead * optimize download queue management * deprecated (undocumented) file:// urls diff --git a/include/libtorrent/aux_/escape_string.hpp b/include/libtorrent/aux_/escape_string.hpp index abd380335..4dc34989e 100644 --- a/include/libtorrent/aux_/escape_string.hpp +++ b/include/libtorrent/aux_/escape_string.hpp @@ -54,8 +54,7 @@ namespace libtorrent } - // TODO: 3 this should probably take a string_ref - TORRENT_EXTRA_EXPORT std::string unescape_string(std::string const& s, error_code& ec); + TORRENT_EXTRA_EXPORT std::string unescape_string(string_view s, error_code& ec); // replaces all disallowed URL characters by their %-encoding TORRENT_EXTRA_EXPORT std::string escape_string(string_view str); // same as escape_string but does not encode '/' @@ -64,6 +63,9 @@ namespace libtorrent // it will be encoded TORRENT_EXTRA_EXPORT std::string maybe_url_encode(std::string const& url); + TORRENT_EXTRA_EXPORT string_view trim(string_view); + TORRENT_EXTRA_EXPORT string_view::size_type find(string_view haystack, string_view needle, string_view::size_type pos); + #ifndef TORRENT_NO_DEPRECATE // deprecated in 1.2 // convert a file://-URL to a proper path @@ -77,11 +79,11 @@ namespace libtorrent // encodes a string using the base64 scheme TORRENT_EXTRA_EXPORT std::string base64encode(std::string const& s); // encodes a string using the base32 scheme - TORRENT_EXTRA_EXPORT std::string base32encode(std::string const& s, int flags=0); - TORRENT_EXTRA_EXPORT std::string base32decode(std::string const& s); + TORRENT_EXTRA_EXPORT std::string base32encode(string_view s, int flags = 0); + TORRENT_EXTRA_EXPORT std::string base32decode(string_view s); - TORRENT_EXTRA_EXPORT std::string url_has_argument( - std::string const& url, std::string argument, std::string::size_type* out_pos = 0); + TORRENT_EXTRA_EXPORT string_view url_has_argument( + string_view url, std::string argument, std::string::size_type* out_pos = 0); // replaces \ with / TORRENT_EXTRA_EXPORT void convert_path_to_posix(std::string& path); diff --git a/include/libtorrent/magnet_uri.hpp b/include/libtorrent/magnet_uri.hpp index 308eff5c1..aebe4143e 100644 --- a/include/libtorrent/magnet_uri.hpp +++ b/include/libtorrent/magnet_uri.hpp @@ -37,6 +37,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "libtorrent/config.hpp" #include "libtorrent/torrent_handle.hpp" #include "libtorrent/add_torrent_params.hpp" +#include "libtorrent/string_view.hpp" namespace libtorrent { @@ -77,7 +78,7 @@ namespace libtorrent // This function parses out information from the magnet link and populates the // add_torrent_params object. - TORRENT_EXPORT void parse_magnet_uri(std::string const& uri, add_torrent_params& p, error_code& ec); + TORRENT_EXPORT void parse_magnet_uri(string_view uri, add_torrent_params& p, error_code& ec); } #endif diff --git a/include/libtorrent/socket_io.hpp b/include/libtorrent/socket_io.hpp index cf7a17a42..d89f70a92 100644 --- a/include/libtorrent/socket_io.hpp +++ b/include/libtorrent/socket_io.hpp @@ -40,6 +40,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "libtorrent/bdecode.hpp" #include "libtorrent/bencode.hpp" #include "libtorrent/sha1_hash.hpp" +#include "libtorrent/string_view.hpp" #include namespace libtorrent @@ -48,7 +49,7 @@ namespace libtorrent TORRENT_EXTRA_EXPORT std::string print_endpoint(address const& addr, int port); TORRENT_EXTRA_EXPORT std::string print_endpoint(tcp::endpoint const& ep); TORRENT_EXTRA_EXPORT std::string print_endpoint(udp::endpoint const& ep); - TORRENT_EXTRA_EXPORT tcp::endpoint parse_endpoint(std::string str, error_code& ec); + TORRENT_EXTRA_EXPORT tcp::endpoint parse_endpoint(string_view str, error_code& ec); TORRENT_EXTRA_EXPORT std::string address_to_bytes(address const& a); TORRENT_EXTRA_EXPORT std::string endpoint_to_bytes(udp::endpoint const& ep); diff --git a/src/escape_string.cpp b/src/escape_string.cpp index edc8315db..b1c5365db 100644 --- a/src/escape_string.cpp +++ b/src/escape_string.cpp @@ -66,12 +66,12 @@ namespace libtorrent extern const char hex_chars[]; } - std::string unescape_string(std::string const& s, error_code& ec) + std::string unescape_string(string_view s, error_code& ec) { std::string ret; - for (std::string::const_iterator i = s.begin(); i != s.end(); ++i) + for (auto i = s.begin(); i != s.end(); ++i) { - if(*i == '+') + if (*i == '+') { ret += ' '; } @@ -89,9 +89,9 @@ namespace libtorrent } int high; - if(*i >= '0' && *i <= '9') high = *i - '0'; - else if(*i >= 'A' && *i <= 'F') high = *i + 10 - 'A'; - else if(*i >= 'a' && *i <= 'f') high = *i + 10 - 'a'; + if (*i >= '0' && *i <= '9') high = *i - '0'; + else if (*i >= 'A' && *i <= 'F') high = *i + 10 - 'A'; + else if (*i >= 'a' && *i <= 'f') high = *i + 10 - 'a'; else { ec = errors::invalid_escaped_string; @@ -338,7 +338,7 @@ namespace libtorrent return ret; } - std::string base32encode(std::string const& s, int flags) + std::string base32encode(string_view s, int flags) { static char const base32_table_canonical[] = { @@ -362,7 +362,7 @@ namespace libtorrent aux::array outbuf; std::string ret; - for (std::string::const_iterator i = s.begin(); i != s.end();) + for (auto i = s.begin(); i != s.end();) { int available_input = std::min(int(inbuf.size()), int(s.end()-i)); @@ -402,13 +402,13 @@ namespace libtorrent return ret; } - std::string base32decode(std::string const& s) + std::string base32decode(string_view s) { aux::array inbuf; aux::array outbuf; std::string ret; - for (std::string::const_iterator i = s.begin(); i != s.end();) + for (auto i = s.begin(); i != s.end();) { int available_input = std::min(int(inbuf.size()), int(s.end() - i)); @@ -459,27 +459,41 @@ namespace libtorrent return ret; } - std::string url_has_argument( - std::string const& url, std::string argument, std::string::size_type* out_pos) + string_view trim(string_view str) { - size_t i = url.find('?'); - if (i == std::string::npos) return std::string(); + auto const first = str.find_first_not_of(" \t\n\r"); + auto const last = str.find_last_not_of(" \t\n\r"); + return str.substr(first, last - first + 1); + } + + string_view::size_type find(string_view haystack, string_view needle, string_view::size_type pos) + { + auto const p = haystack.substr(pos).find(needle); + if (p == string_view::npos) return p; + return pos + p; + } + + string_view url_has_argument( + string_view url, std::string argument, std::string::size_type* out_pos) + { + auto i = url.find('?'); + if (i == std::string::npos) return {}; ++i; argument += '='; - if (url.compare(i, argument.size(), argument) == 0) + if (url.substr(i, argument.size()) == argument) { - size_t pos = i + argument.size(); + auto pos = i + argument.size(); if (out_pos) *out_pos = pos; - return url.substr(pos, url.find('&', pos) - pos); + return url.substr(pos, url.substr(pos).find('&')); } argument.insert(0, "&"); - i = url.find(argument, i); - if (i == std::string::npos) return std::string(); - size_t pos = i + argument.size(); + i = find(url, argument, i); + if (i == std::string::npos) return {}; + auto pos = i + argument.size(); if (out_pos) *out_pos = pos; - return url.substr(pos, url.find('&', pos) - pos); + return url.substr(pos, find(url, "&", pos) - pos); } #if defined TORRENT_WINDOWS && TORRENT_USE_WSTRING diff --git a/src/magnet_uri.cpp b/src/magnet_uri.cpp index 1152664ed..f275fd88a 100644 --- a/src/magnet_uri.cpp +++ b/src/magnet_uri.cpp @@ -140,15 +140,15 @@ namespace libtorrent else params.flags &= ~add_torrent_params::flag_paused; error_code ec; - std::string display_name = url_has_argument(uri, "dn"); - if (!display_name.empty()) params.name = unescape_string(display_name.c_str(), ec); - std::string tracker_string = url_has_argument(uri, "tr"); - if (!tracker_string.empty()) params.trackers.push_back(unescape_string(tracker_string.c_str(), ec)); + string_view display_name = url_has_argument(uri, "dn"); + if (!display_name.empty()) params.name = unescape_string(display_name, ec); + string_view tracker_string = url_has_argument(uri, "tr"); + if (!tracker_string.empty()) params.trackers.push_back(unescape_string(tracker_string, ec)); - std::string btih = url_has_argument(uri, "xt"); + string_view btih = url_has_argument(uri, "xt"); if (btih.empty()) return torrent_handle(); - if (btih.compare(0, 9, "urn:btih:") != 0) return torrent_handle(); + if (btih.substr(0, 9) != "urn:btih:") return torrent_handle(); if (btih.size() == 40 + 9) aux::from_hex({&btih[9], 40}, params.info_hash.data()); else params.info_hash.assign(base32decode(btih.substr(9)).c_str()); @@ -167,21 +167,20 @@ namespace libtorrent #endif // BOOST_NO_EXCEPTIONS #endif // TORRENT_NO_DEPRECATE - // TODO: 3 take string_view here instead - void parse_magnet_uri(std::string const& uri, add_torrent_params& p, error_code& ec) + void parse_magnet_uri(string_view uri, add_torrent_params& p, error_code& ec) { ec.clear(); std::string name; { error_code e; - std::string display_name = url_has_argument(uri, "dn"); - if (!display_name.empty()) name = unescape_string(display_name.c_str(), e); + string_view display_name = url_has_argument(uri, "dn"); + if (!display_name.empty()) name = unescape_string(display_name, e); } // parse trackers out of the magnet link - std::string::size_type pos = std::string::npos; - std::string url = url_has_argument(uri, "tr", &pos); + auto pos = std::string::npos; + string_view url = url_has_argument(uri, "tr", &pos); int tier = 0; while (pos != std::string::npos) { @@ -191,14 +190,14 @@ namespace libtorrent p.tracker_tiers.resize(p.trackers.size(), 0); error_code e; - url = unescape_string(url, e); + std::string tracker = unescape_string(url, e); if (e) continue; - p.trackers.push_back(url); + p.trackers.push_back(std::move(tracker)); p.tracker_tiers.push_back(tier++); - pos = uri.find("&tr=", pos); + pos = find(uri, "&tr=", pos); if (pos == std::string::npos) break; pos += 4; - url = uri.substr(pos, uri.find('&', pos) - pos); + url = uri.substr(pos, find(uri, "&", pos) - pos); } // parse web seeds out of the magnet link @@ -207,30 +206,30 @@ namespace libtorrent while (pos != std::string::npos) { error_code e; - url = unescape_string(url, e); + std::string webseed = unescape_string(url, e); if (e) continue; - p.url_seeds.push_back(url); - pos = uri.find("&ws=", pos); + p.url_seeds.push_back(std::move(webseed)); + pos = find(uri, "&ws=", pos); if (pos == std::string::npos) break; pos += 4; - url = uri.substr(pos, uri.find('&', pos) - pos); + url = uri.substr(pos, find(uri, "&", pos) - pos); } - std::string btih = url_has_argument(uri, "xt"); + string_view btih = url_has_argument(uri, "xt"); if (btih.empty()) { ec = errors::missing_info_hash_in_uri; return; } - if (btih.compare(0, 9, "urn:btih:") != 0) + if (btih.substr(0, 9) != "urn:btih:") { ec = errors::missing_info_hash_in_uri; return; } std::string::size_type peer_pos = std::string::npos; - std::string peer = url_has_argument(uri, "x.pe", &peer_pos); + string_view peer = url_has_argument(uri, "x.pe", &peer_pos); while (!peer.empty()) { error_code e; @@ -238,29 +237,29 @@ namespace libtorrent if (!e) p.peers.push_back(endp); - peer_pos = uri.find("&x.pe=", peer_pos); + peer_pos = find(uri, "&x.pe=", peer_pos); if (peer_pos == std::string::npos) break; peer_pos += 6; - peer = uri.substr(peer_pos, uri.find('&', peer_pos) - peer_pos); + peer = uri.substr(peer_pos, find(uri, "&", peer_pos) - peer_pos); } #ifndef TORRENT_DISABLE_DHT std::string::size_type node_pos = std::string::npos; - std::string node = url_has_argument(uri, "dht", &node_pos); + string_view node = url_has_argument(uri, "dht", &node_pos); while (!node.empty()) { std::string::size_type divider = node.find_last_of(':'); if (divider != std::string::npos) { - int port = atoi(node.c_str() + divider + 1); + int port = atoi(node.substr(divider + 1).to_string().c_str()); if (port != 0) - p.dht_nodes.push_back(std::make_pair(node.substr(0, divider), port)); + p.dht_nodes.push_back(std::make_pair(node.substr(0, divider).to_string(), port)); } - node_pos = uri.find("&dht=", node_pos); + node_pos = find(uri, "&dht=", node_pos); if (node_pos == std::string::npos) break; node_pos += 5; - node = uri.substr(node_pos, uri.find('&', node_pos) - node_pos); + node = uri.substr(node_pos, find(uri, "&", node_pos) - node_pos); } #endif diff --git a/src/socket_io.cpp b/src/socket_io.cpp index 9e962f052..1c638be15 100644 --- a/src/socket_io.cpp +++ b/src/socket_io.cpp @@ -38,6 +38,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "libtorrent/address.hpp" #include "libtorrent/io.hpp" // for write_uint16 #include "libtorrent/hasher.hpp" // for hasher +#include "libtorrent/aux_/escape_string.hpp" // for trim namespace libtorrent { @@ -87,35 +88,41 @@ namespace libtorrent return print_endpoint(ep.address(), ep.port()); } - tcp::endpoint parse_endpoint(std::string str, error_code& ec) + tcp::endpoint parse_endpoint(string_view str, error_code& ec) { tcp::endpoint ret; - std::string::iterator start = str.begin(); - std::string::iterator port_pos; - // remove white spaces in front of the string - while (start != str.end() && is_space(*start)) - ++start; + str = trim(str); + + string_view addr; + string_view port; + + if (str.empty()) + { + ec = errors::invalid_port; + return ret; + } // this is for IPv6 addresses - if (start != str.end() && *start == '[') + if (str.front() == '[') { - ++start; - port_pos = std::find(start, str.end(), ']'); - if (port_pos == str.end()) + auto const close_bracket = str.find_first_of(']'); + if (close_bracket == string_view::npos) { ec = errors::expected_close_bracket_in_address; return ret; } - *port_pos = '\0'; - ++port_pos; - if (port_pos == str.end() || *port_pos != ':') + addr = str.substr(1, close_bracket - 1); + port = str.substr(close_bracket + 1); + if (port.empty() || port.front() != ':') { ec = errors::invalid_port; return ret; } + // shave off the ':' + port = port.substr(1); #if TORRENT_USE_IPV6 - ret.address(address_v6::from_string(&*start, ec)); + ret.address(address_v6::from_string(addr.to_string(), ec)); #else ec = boost::asio::error::address_family_not_supported; #endif @@ -123,25 +130,31 @@ namespace libtorrent } else { - port_pos = std::find(start, str.end(), ':'); - if (port_pos == str.end()) + auto const port_pos = str.find_first_of(':'); + if (port_pos == string_view::npos) { ec = errors::invalid_port; return ret; } - *port_pos = '\0'; - ret.address(address_v4::from_string(&*start, ec)); + addr = str.substr(0, port_pos); + port = str.substr(port_pos + 1); + ret.address(address_v4::from_string(addr.to_string(), ec)); if (ec) return ret; } - ++port_pos; - if (port_pos == str.end()) + if (port.empty()) { ec = errors::invalid_port; return ret; } - ret.port(std::uint16_t(std::atoi(&*port_pos))); + int const port_num = std::atoi(port.to_string().c_str()); + if (port_num <= 0 || port_num > std::numeric_limits::max()) + { + ec = errors::invalid_port; + return ret; + } + ret.port(static_cast(port_num)); return ret; } diff --git a/test/test_primitives.cpp b/test/test_primitives.cpp index fdafd047f..a3140f74e 100644 --- a/test/test_primitives.cpp +++ b/test/test_primitives.cpp @@ -106,29 +106,15 @@ TORRENT_TEST(primitives) // test network functions - // test print_endpoint, parse_endpoint and print_address + // test print_endpoint, print_address TEST_EQUAL(print_endpoint(ep("127.0.0.1", 23)), "127.0.0.1:23"); + TEST_EQUAL(print_address(addr4("241.124.23.5")), "241.124.23.5"); + #if TORRENT_USE_IPV6 TEST_EQUAL(print_endpoint(ep("ff::1", 1214)), "[ff::1]:1214"); -#endif - ec.clear(); - TEST_EQUAL(parse_endpoint("127.0.0.1:23", ec), ep("127.0.0.1", 23)); - TEST_CHECK(!ec); - ec.clear(); -#if TORRENT_USE_IPV6 - TEST_EQUAL(parse_endpoint(" \t[ff::1]:1214 \r", ec), ep("ff::1", 1214)); - TEST_CHECK(!ec); -#endif - TEST_EQUAL(print_address(addr4("241.124.23.5")), "241.124.23.5"); -#if TORRENT_USE_IPV6 TEST_EQUAL(print_address(addr6("2001:ff::1")), "2001:ff::1"); - parse_endpoint("[ff::1]", ec); - TEST_EQUAL(ec, error_code(errors::invalid_port)); #endif - parse_endpoint("[ff::1:5", ec); - TEST_EQUAL(ec, error_code(errors::expected_close_bracket_in_address)); - // test address_to_bytes TEST_EQUAL(address_to_bytes(addr4("10.11.12.13")), "\x0a\x0b\x0c\x0d"); TEST_EQUAL(address_to_bytes(addr4("16.5.127.1")), "\x10\x05\x7f\x01"); diff --git a/test/test_socket_io.cpp b/test/test_socket_io.cpp index 6b7599bb0..aae76fb3f 100644 --- a/test/test_socket_io.cpp +++ b/test/test_socket_io.cpp @@ -34,6 +34,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "setup_transfer.hpp" #include "libtorrent/socket_io.hpp" #include "libtorrent/socket.hpp" +#include "libtorrent/aux_/escape_string.hpp" // for trim #include @@ -115,10 +116,29 @@ TORRENT_TEST(parse_invalid_ipv4_endpoint) TEST_CHECK(ec); ec.clear(); + endp = parse_endpoint("127.0.0.1:-4", ec); + TEST_CHECK(ec); + ec.clear(); + + endp = parse_endpoint("127.0.0.1:66000", ec); + TEST_CHECK(ec); + ec.clear(); + + endp = parse_endpoint("127.0.0.1:abc", ec); + TEST_CHECK(ec); + ec.clear(); + endp = parse_endpoint("127.0.0.1", ec); TEST_CHECK(ec); ec.clear(); +#ifndef TORRENT_WINDOWS + // it appears windows siliently accepts truncated IP addresses + endp = parse_endpoint("127.0.0:123", ec); + TEST_CHECK(ec); + ec.clear(); +#endif + endp = parse_endpoint("127.0.0.1:", ec); TEST_CHECK(ec); ec.clear(); @@ -126,10 +146,21 @@ TORRENT_TEST(parse_invalid_ipv4_endpoint) endp = parse_endpoint("127.0.0.1X", ec); TEST_CHECK(ec); ec.clear(); +} - endp = parse_endpoint("127.0.0.1:4", ec); +TORRENT_TEST(parse_valid_ip4_endpoint) +{ + error_code ec; + TEST_EQUAL(parse_endpoint("127.0.0.1:4", ec), ep("127.0.0.1", 4)); + TEST_CHECK(!ec); + ec.clear(); + + TEST_EQUAL(parse_endpoint("\t 127.0.0.1:4 \n", ec), ep("127.0.0.1", 4)); + TEST_CHECK(!ec); + ec.clear(); + + TEST_EQUAL(parse_endpoint("127.0.0.1:23", ec), ep("127.0.0.1", 23)); TEST_CHECK(!ec); - TEST_EQUAL(endp, ep("127.0.0.1", 4)); ec.clear(); } @@ -155,9 +186,44 @@ TORRENT_TEST(parse_invalid_ipv6_endpoint) TEST_CHECK(ec); ec.clear(); - endp = parse_endpoint("[::1]:4", ec); + endp = parse_endpoint("[::1", ec); + TEST_CHECK(ec == errors::expected_close_bracket_in_address); + ec.clear(); + + parse_endpoint("[ff::1:5", ec); + TEST_EQUAL(ec, error_code(errors::expected_close_bracket_in_address)); + ec.clear(); + + endp = parse_endpoint("[abcd]:123", ec); + TEST_CHECK(ec); + ec.clear(); + + endp = parse_endpoint("[ff::1]", ec); + TEST_EQUAL(ec, error_code(errors::invalid_port)); + ec.clear(); +} + +TORRENT_TEST(parse_valid_ipv6_endpoint) +{ + error_code ec; + TEST_EQUAL(parse_endpoint("[::1]:4", ec), ep("::1", 4)); + TEST_CHECK(!ec); + ec.clear(); + + TEST_EQUAL(parse_endpoint(" \t[ff::1]:1214 \r", ec), ep("ff::1", 1214)); TEST_CHECK(!ec); - TEST_EQUAL(endp, ep("::1", 4)); ec.clear(); } #endif + +TORRENT_TEST(trim) +{ + TEST_EQUAL(trim(" a"), "a"); + TEST_EQUAL(trim(" a "), "a"); + TEST_EQUAL(trim("\t \na \t\r"), "a"); + TEST_EQUAL(trim(" \t \ta"), "a"); + TEST_EQUAL(trim("a "), "a"); + TEST_EQUAL(trim("a \t"), "a"); + TEST_EQUAL(trim("a \t\n \tb"), "a \t\n \tb"); +} +