/* Copyright (c) 2003-2016, Arvid Norberg All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the author nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "libtorrent/config.hpp" #include #include #include #include #include #include #ifdef TORRENT_WINDOWS #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif #include #endif #if TORRENT_USE_ICONV #include #include #endif #include "libtorrent/assert.hpp" #include "libtorrent/parse_url.hpp" #include "libtorrent/random.hpp" #include "libtorrent/utf8.hpp" #include "libtorrent/aux_/escape_string.hpp" #include "libtorrent/aux_/max_path.hpp" // for TORRENT_MAX_PATH #include "libtorrent/string_util.hpp" // for to_string namespace libtorrent { // defined in hex.cpp extern const char hex_chars[]; std::string unescape_string(std::string const& s, error_code& ec) { std::string ret; for (std::string::const_iterator i = s.begin(); i != s.end(); ++i) { if(*i == '+') { ret += ' '; } else if (*i != '%') { ret += *i; } else { ++i; if (i == s.end()) { ec = errors::invalid_escaped_string; return ret; } int high; if(*i >= '0' && *i <= '9') high = *i - '0'; else if(*i >= 'A' && *i <= 'F') high = *i + 10 - 'A'; else if(*i >= 'a' && *i <= 'f') high = *i + 10 - 'a'; else { ec = errors::invalid_escaped_string; return ret; } ++i; if (i == s.end()) { ec = errors::invalid_escaped_string; return ret; } int low; if(*i >= '0' && *i <= '9') low = *i - '0'; else if(*i >= 'A' && *i <= 'F') low = *i + 10 - 'A'; else if(*i >= 'a' && *i <= 'f') low = *i + 10 - 'a'; else { ec = errors::invalid_escaped_string; return ret; } ret += char(high * 16 + low); } } return ret; } // http://www.ietf.org/rfc/rfc2396.txt // section 2.3 static const char unreserved_chars[] = // when determining if a url needs encoding // % should be ok "%+" // reserved ";?:@=&,$/" // unreserved (special characters) ' excluded, // since some buggy trackers fail with those "-_!.~*()" // unreserved (alphanumerics) "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" "0123456789"; // the offset is used to ignore the first characters in the unreserved_chars table. static std::string escape_string_impl(const char* str, int len, int offset) { TORRENT_ASSERT(str != 0); TORRENT_ASSERT(len >= 0); TORRENT_ASSERT(offset >= 0); TORRENT_ASSERT(offset < int(sizeof(unreserved_chars))-1); std::string ret; for (int i = 0; i < len; ++i) { if (std::strchr(unreserved_chars+offset, *str) && *str != 0) { ret += *str; } else { ret += '%'; ret += hex_chars[boost::uint8_t(*str) >> 4]; ret += hex_chars[boost::uint8_t(*str) & 15]; } ++str; } return ret; } std::string escape_string(const char* str, int len) { return escape_string_impl(str, len, 11); } std::string escape_path(const char* str, int len) { return escape_string_impl(str, len, 10); } bool need_encoding(char const* str, int len) { for (int i = 0; i < len; ++i) { if (std::strchr(unreserved_chars, *str) == 0 || *str == 0) return true; ++str; } return false; } void convert_path_to_posix(std::string& path) { for (std::string::iterator i = path.begin() , end(path.end()); i != end; ++i) if (*i == '\\') *i = '/'; } #ifdef TORRENT_WINDOWS void convert_path_to_windows(std::string& path) { for (std::string::iterator i = path.begin() , end(path.end()); i != end; ++i) if (*i == '/') *i = '\\'; } #endif // TODO: 2 this should probably be moved into string_util.cpp std::string read_until(char const*& str, char delim, char const* end) { TORRENT_ASSERT(str <= end); std::string ret; while (str != end && *str != delim) { ret += *str; ++str; } // skip the delimiter as well while (str != end && *str == delim) ++str; return ret; } std::string maybe_url_encode(std::string const& url) { std::string protocol, host, auth, path; int port; error_code ec; boost::tie(protocol, auth, host, port, path) = parse_url_components(url, ec); if (ec) return url; // first figure out if this url contains unencoded characters if (!need_encoding(path.c_str(), int(path.size()))) return url; char msg[TORRENT_MAX_PATH*4]; std::snprintf(msg, sizeof(msg), "%s://%s%s%s%s%s%s", protocol.c_str(), auth.c_str() , auth.empty()?"":"@", host.c_str() , port == -1 ? "" : ":" , port == -1 ? "" : to_string(port).data() , escape_path(path.c_str(), int(path.size())).c_str()); return msg; } std::string resolve_file_url(std::string const& url) { TORRENT_ASSERT(url.substr(0, 7) == "file://"); // first, strip the file:// part. // On windows, we have // to strip the first / as well int num_to_strip = 7; #ifdef TORRENT_WINDOWS if (url[7] == '/' || url[7] == '\\') ++num_to_strip; #endif std::string ret = url.substr(num_to_strip); // we also need to URL-decode it error_code ec; std::string unescaped = unescape_string(ret, ec); if (ec) unescaped = ret; // on windows, we need to convert forward slashes // to backslashes #ifdef TORRENT_WINDOWS convert_path_to_windows(unescaped); #endif return unescaped; } std::string base64encode(const std::string& s) { static const char base64_table[] = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' }; unsigned char inbuf[3]; unsigned char outbuf[4]; std::string ret; for (std::string::const_iterator i = s.begin(); i != s.end();) { // available input is 1,2 or 3 bytes // since we read 3 bytes at a time at most int available_input = (std::min)(3, int(s.end()-i)); // clear input buffer std::fill(inbuf, inbuf+3, 0); // read a chunk of input into inbuf std::copy(i, i + available_input, inbuf); i += available_input; // encode inbuf to outbuf outbuf[0] = (inbuf[0] & 0xfc) >> 2; outbuf[1] = ((inbuf[0] & 0x03) << 4) | ((inbuf [1] & 0xf0) >> 4); outbuf[2] = ((inbuf[1] & 0x0f) << 2) | ((inbuf [2] & 0xc0) >> 6); outbuf[3] = inbuf[2] & 0x3f; // write output for (int j = 0; j < available_input+1; ++j) { ret += base64_table[outbuf[j]]; } // write pad for (int j = 0; j < 3 - available_input; ++j) { ret += '='; } } return ret; } std::string base32encode(std::string const& s, int flags) { static const char base32_table_canonical[] = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '2', '3', '4', '5', '6', '7' }; static const char base32_table_lowercase[] = { 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '2', '3', '4', '5', '6', '7' }; const char *base32_table = 0 != (flags & string::lowercase) ? base32_table_lowercase : base32_table_canonical; int input_output_mapping[] = {0, 2, 4, 5, 7, 8}; unsigned char inbuf[5]; unsigned char outbuf[8]; std::string ret; for (std::string::const_iterator i = s.begin(); i != s.end();) { int available_input = (std::min)(5, int(s.end()-i)); // clear input buffer std::fill(inbuf, inbuf+5, 0); // read a chunk of input into inbuf std::copy(i, i + available_input, inbuf); i += available_input; // encode inbuf to outbuf outbuf[0] = (inbuf[0] & 0xf8) >> 3; outbuf[1] = ((inbuf[0] & 0x07) << 2) | ((inbuf[1] & 0xc0) >> 6); outbuf[2] = ((inbuf[1] & 0x3e) >> 1); outbuf[3] = ((inbuf[1] & 0x01) << 4) | ((inbuf[2] & 0xf0) >> 4); outbuf[4] = ((inbuf[2] & 0x0f) << 1) | ((inbuf[3] & 0x80) >> 7); outbuf[5] = ((inbuf[3] & 0x7c) >> 2); outbuf[6] = ((inbuf[3] & 0x03) << 3) | ((inbuf[4] & 0xe0) >> 5); outbuf[7] = inbuf[4] & 0x1f; // write output int num_out = input_output_mapping[available_input]; for (int j = 0; j < num_out; ++j) { ret += base32_table[outbuf[j]]; } if (0 == (flags & string::no_padding)) { // write pad for (int j = 0; j < 8 - num_out; ++j) { ret += '='; } } } return ret; } std::string base32decode(std::string const& s) { unsigned char inbuf[8]; unsigned char outbuf[5]; std::string ret; for (std::string::const_iterator i = s.begin(); i != s.end();) { int available_input = (std::min)(8, int(s.end()-i)); int pad_start = 0; if (available_input < 8) pad_start = available_input; // clear input buffer std::fill(inbuf, inbuf+8, 0); for (int j = 0; j < available_input; ++j) { char in = std::toupper(*i++); if (in >= 'A' && in <= 'Z') inbuf[j] = in - 'A'; else if (in >= '2' && in <= '7') inbuf[j] = in - '2' + ('Z' - 'A') + 1; else if (in == '=') { inbuf[j] = 0; if (pad_start == 0) pad_start = j; } else if (in == '1') inbuf[j] = 'I' - 'A'; else return std::string(); TORRENT_ASSERT(inbuf[j] == (inbuf[j] & 0x1f)); } // decode inbuf to outbuf outbuf[0] = inbuf[0] << 3; outbuf[0] |= inbuf[1] >> 2; outbuf[1] = (inbuf[1] & 0x3) << 6; outbuf[1] |= inbuf[2] << 1; outbuf[1] |= (inbuf[3] & 0x10) >> 4; outbuf[2] = (inbuf[3] & 0x0f) << 4; outbuf[2] |= (inbuf[4] & 0x1e) >> 1; outbuf[3] = (inbuf[4] & 0x01) << 7; outbuf[3] |= (inbuf[5] & 0x1f) << 2; outbuf[3] |= (inbuf[6] & 0x18) >> 3; outbuf[4] = (inbuf[6] & 0x07) << 5; outbuf[4] |= inbuf[7]; int input_output_mapping[] = {5, 1, 1, 2, 2, 3, 4, 4, 5}; int num_out = input_output_mapping[pad_start]; // write output std::copy(outbuf, outbuf + num_out, std::back_inserter(ret)); } return ret; } std::string url_has_argument( std::string const& url, std::string argument, std::string::size_type* out_pos) { size_t i = url.find('?'); if (i == std::string::npos) return std::string(); ++i; argument += '='; if (url.compare(i, argument.size(), argument) == 0) { size_t pos = i + argument.size(); if (out_pos) *out_pos = pos; return url.substr(pos, url.find('&', pos) - pos); } argument.insert(0, "&"); i = url.find(argument, i); if (i == std::string::npos) return std::string(); size_t pos = i + argument.size(); if (out_pos) *out_pos = pos; return url.substr(pos, url.find('&', pos) - pos); } #if defined TORRENT_WINDOWS && TORRENT_USE_WSTRING std::wstring convert_to_wstring(std::string const& s) { std::wstring ret; int result = libtorrent::utf8_wchar(s, ret); if (result == 0) return ret; ret.clear(); const char* end = &s[0] + s.size(); for (const char* i = &s[0]; i < end;) { wchar_t c = '.'; result = std::mbtowc(&c, i, end - i); if (result > 0) i += result; else ++i; ret += c; } return ret; } std::string convert_from_wstring(std::wstring const& s) { std::string ret; int result = libtorrent::wchar_utf8(s, ret); if (result == 0) return ret; ret.clear(); const wchar_t* end = &s[0] + s.size(); for (const wchar_t* i = &s[0]; i < end;) { char c[10]; TORRENT_ASSERT(sizeof(c) >= MB_CUR_MAX); result = std::wctomb(c, *i); if (result > 0) { i += result; ret.append(c, result); } else { ++i; ret += "."; } } return ret; } #endif #if TORRENT_USE_ICONV std::string iconv_convert_impl(std::string const& s, iconv_t h) { std::string ret; size_t insize = s.size(); size_t outsize = insize * 4; ret.resize(outsize); char const* in = s.c_str(); char* out = &ret[0]; // posix has a weird iconv signature. implementations // differ on what this signature should be, so we use // a macro to let config.hpp determine it size_t retval = iconv(h, TORRENT_ICONV_ARG &in, &insize, &out, &outsize); if (retval == (size_t)-1) return s; // if this string has an invalid utf-8 sequence in it, don't touch it if (insize != 0) return s; // not sure why this would happen, but it seems to be possible if (outsize > s.size() * 4) return s; // outsize is the number of bytes unused of the out-buffer TORRENT_ASSERT(ret.size() >= outsize); ret.resize(ret.size() - outsize); return ret; } std::string convert_to_native(std::string const& s) { static std::mutex iconv_mutex; // only one thread can use this handle at a time std::lock_guard l(iconv_mutex); // the empty string represents the local dependent encoding static iconv_t iconv_handle = iconv_open("", "UTF-8"); if (iconv_handle == iconv_t(-1)) return s; return iconv_convert_impl(s, iconv_handle); } std::string convert_from_native(std::string const& s) { static std::mutex iconv_mutex; // only one thread can use this handle at a time std::lock_guard l(iconv_mutex); // the empty string represents the local dependent encoding static iconv_t iconv_handle = iconv_open("UTF-8", ""); if (iconv_handle == iconv_t(-1)) return s; return iconv_convert_impl(s, iconv_handle); } #elif defined TORRENT_WINDOWS std::string convert_to_native(std::string const& s) { std::wstring ws; libtorrent::utf8_wchar(s, ws); std::string ret; ret.resize(ws.size() * 4 + 1); std::size_t size = WideCharToMultiByte(CP_ACP, 0, ws.c_str(), -1, &ret[0], int(ret.size()), NULL, NULL); if (size == std::size_t(-1)) return s; if (size != 0 && ret[size - 1] == '\0') --size; ret.resize(size); return ret; } std::string convert_from_native(std::string const& s) { std::wstring ws; ws.resize(s.size() + 1); std::size_t size = MultiByteToWideChar(CP_ACP, 0, s.c_str(), -1, &ws[0], int(ws.size())); if (size == std::size_t(-1)) return s; if (size != 0 && ws[size - 1] == '\0') --size; ws.resize(size); std::string ret; libtorrent::wchar_utf8(ws, ret); return ret; } #elif TORRENT_USE_LOCALE std::string convert_to_native(std::string const& s) { std::wstring ws; libtorrent::utf8_wchar(s, ws); std::size_t size = wcstombs(0, ws.c_str(), 0); if (size == std::size_t(-1)) return s; std::string ret; ret.resize(size); size = wcstombs(&ret[0], ws.c_str(), size + 1); if (size == std::size_t(-1)) return s; ret.resize(size); return ret; } std::string convert_from_native(std::string const& s) { std::wstring ws; ws.resize(s.size()); std::size_t size = mbstowcs(&ws[0], s.c_str(), s.size()); if (size == std::size_t(-1)) return s; std::string ret; libtorrent::wchar_utf8(ws, ret); return ret; } #endif }