diff --git a/ChangeLog b/ChangeLog index 65a7f827d..693b5c272 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,4 @@ + * encodes urls in torrent files that needs to be encoded * fixed not passing &supportcrypto=1 when encryption is disabled * introduced an upload mode, which torrents are switched into when it hits a disk write error, instead of stopping the torrent. diff --git a/include/libtorrent/escape_string.hpp b/include/libtorrent/escape_string.hpp index 199e09db9..48a8c70e3 100644 --- a/include/libtorrent/escape_string.hpp +++ b/include/libtorrent/escape_string.hpp @@ -51,8 +51,13 @@ namespace libtorrent bool TORRENT_EXPORT string_begins_no_case(char const* s1, char const* s2); std::string TORRENT_EXPORT unescape_string(std::string const& s, error_code& ec); + // replaces all disallowed URL characters by their %-encoding std::string TORRENT_EXPORT escape_string(const char* str, int len); + // same as escape_string but does not encode '/' std::string TORRENT_EXPORT escape_path(const char* str, int len); + // if the url does not appear to be encoded, and it contains illegal url characters + // it will be encoded + std::string TORRENT_EXPORT maybe_url_encode(std::string const& url); // encodes a string using the base64 scheme TORRENT_EXPORT std::string base64encode(std::string const& s); diff --git a/src/escape_string.cpp b/src/escape_string.cpp index 44e0a538a..dadf5f7b3 100644 --- a/src/escape_string.cpp +++ b/src/escape_string.cpp @@ -40,9 +40,11 @@ POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include "libtorrent/assert.hpp" #include "libtorrent/escape_string.hpp" +#include "libtorrent/parse_url.hpp" #if TORRENT_USE_WPATH @@ -164,22 +166,24 @@ namespace libtorrent // http://www.ietf.org/rfc/rfc2396.txt // section 2.3 // some trackers seems to require that ' is escaped - //static const char unreserved_chars[] = "-_.!~*'()"; - static const char unreserved_chars[] = "/-_.!~*()" + static const char unreserved_chars[] = "%'/-_.!~*()" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" "0123456789"; static const char hex_chars[] = "0123456789abcdef"; - std::string escape_string(const char* str, int len) + // the offset is used to ignore the first characters in the unreserved_chars table. + static std::string escape_string_impl(const char* str, int len, int offset) { TORRENT_ASSERT(str != 0); TORRENT_ASSERT(len >= 0); + TORRENT_ASSERT(offset >= 0); + TORRENT_ASSERT(offset < sizeof(unreserved_chars)-1); std::string ret; for (int i = 0; i < len; ++i) { if (std::count( - unreserved_chars + unreserved_chars+offset , unreserved_chars+sizeof(unreserved_chars)-1 , *str)) { @@ -196,30 +200,49 @@ namespace libtorrent return ret; } + std::string escape_string(const char* str, int len) + { + return escape_string_impl(str, len, 3); + } + std::string escape_path(const char* str, int len) { - TORRENT_ASSERT(str != 0); - TORRENT_ASSERT(len >= 0); + return escape_string_impl(str, len, 2); + } - std::string ret; + static bool need_encoding(char const* str, int len) + { for (int i = 0; i < len; ++i) { if (std::count( unreserved_chars , unreserved_chars+sizeof(unreserved_chars)-1 - , *str)) + , *str) == 0) { - ret += *str; - } - else - { - ret += '%'; - ret += hex_chars[((unsigned char)*str) >> 4]; - ret += hex_chars[((unsigned char)*str) & 15]; + return true; } ++str; } - return ret; + return false; + } + + std::string maybe_url_encode(std::string const& url) + { + std::string protocol, host, auth, path; + int port; + error_code ec; + boost::tie(protocol, auth, host, port, path) = parse_url_components(url, ec); + if (ec) return url; + + // first figure out if this url contains unencoded characters + if (!need_encoding(path.c_str(), path.size())) + return url; + + char msg[NAME_MAX*4]; + snprintf(msg, sizeof(msg), "%s://%s%s%s:%d%s", protocol.c_str(), auth.c_str() + , auth.empty()?"":"@", host.c_str(), port + , escape_path(path.c_str(), path.size()).c_str()); + return msg; } std::string base64encode(const std::string& s) diff --git a/src/torrent_info.cpp b/src/torrent_info.cpp index 4b914a006..dc1f56fa4 100644 --- a/src/torrent_info.cpp +++ b/src/torrent_info.cpp @@ -837,7 +837,7 @@ namespace libtorrent lazy_entry const* url_seeds = torrent_file.dict_find("url-list"); if (url_seeds && url_seeds->type() == lazy_entry::string_t) { - m_url_seeds.push_back(url_seeds->string_value()); + m_url_seeds.push_back(maybe_url_encode(url_seeds->string_value())); } else if (url_seeds && url_seeds->type() == lazy_entry::list_t) { @@ -845,7 +845,7 @@ namespace libtorrent { lazy_entry const* url = url_seeds->list_at(i); if (url->type() != lazy_entry::string_t) continue; - m_url_seeds.push_back(url->string_value()); + m_url_seeds.push_back(maybe_url_encode(url->string_value())); } } @@ -853,7 +853,7 @@ namespace libtorrent lazy_entry const* http_seeds = torrent_file.dict_find("httpseeds"); if (http_seeds && http_seeds->type() == lazy_entry::string_t) { - m_http_seeds.push_back(http_seeds->string_value()); + m_http_seeds.push_back(maybe_url_encode(http_seeds->string_value())); } else if (http_seeds && http_seeds->type() == lazy_entry::list_t) { @@ -861,7 +861,7 @@ namespace libtorrent { lazy_entry const* url = http_seeds->list_at(i); if (url->type() != lazy_entry::string_t) continue; - m_http_seeds.push_back(url->string_value()); + m_http_seeds.push_back(maybe_url_encode(url->string_value())); } } diff --git a/test/test_primitives.cpp b/test/test_primitives.cpp index cca90dc46..a4bdffaaa 100644 --- a/test/test_primitives.cpp +++ b/test/test_primitives.cpp @@ -359,6 +359,15 @@ int test_main() { using namespace libtorrent; + // test maybe_url_encode + + TEST_CHECK(maybe_url_encode("http://test:test@abc.com/abc<>abc") == "http://test:test@abc.com:80/abc%3c%3eabc"); + TEST_CHECK(maybe_url_encode("http://abc.com/foo bar") == "http://abc.com:80/foo%20bar"); + TEST_CHECK(maybe_url_encode("abc") == "abc"); + TEST_CHECK(maybe_url_encode("http://abc.com/abc") == "http://abc.com/abc"); + + // test sanitize_path + TEST_CHECK(sanitize_path("/a/b/c").string() == "a/b/c"); TEST_CHECK(sanitize_path("a/../c").string() == "a/c"); TEST_CHECK(sanitize_path("/.././c").string() == "c");