/* Copyright (c) 2003-2008, Arvid Norberg All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the author nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "libtorrent/pch.hpp" #include #include #include #include #include #include #include #ifdef _MSC_VER #pragma warning(push, 1) #endif #include #include #include #ifdef _MSC_VER #pragma warning(pop) #endif #include "libtorrent/torrent_info.hpp" #include "libtorrent/bencode.hpp" #include "libtorrent/hasher.hpp" #include "libtorrent/entry.hpp" #include "libtorrent/file.hpp" #include "libtorrent/utf8.hpp" namespace gr = boost::gregorian; using namespace libtorrent; namespace { namespace fs = boost::filesystem; void convert_to_utf8(std::string& str, unsigned char chr) { str += 0xc0 | ((chr & 0xff) >> 6); str += 0x80 | (chr & 0x3f); } bool verify_encoding(std::string& target) { std::string tmp_path; bool valid_encoding = true; for (std::string::iterator i = target.begin() , end(target.end()); i != end; ++i) { // valid ascii-character if ((*i & 0x80) == 0) { tmp_path += *i; continue; } if (std::distance(i, end) < 2) { convert_to_utf8(tmp_path, *i); valid_encoding = false; continue; } // valid 2-byte utf-8 character if ((i[0] & 0xe0) == 0xc0 && (i[1] & 0xc0) == 0x80) { tmp_path += i[0]; tmp_path += i[1]; i += 1; continue; } if (std::distance(i, end) < 3) { convert_to_utf8(tmp_path, *i); valid_encoding = false; continue; } // valid 3-byte utf-8 character if ((i[0] & 0xf0) == 0xe0 && (i[1] & 0xc0) == 0x80 && (i[2] & 0xc0) == 0x80) { tmp_path += i[0]; tmp_path += i[1]; tmp_path += i[2]; i += 2; continue; } if (std::distance(i, end) < 4) { convert_to_utf8(tmp_path, *i); valid_encoding = false; continue; } // valid 4-byte utf-8 character if ((i[0] & 0xf0) == 0xe0 && (i[1] & 0xc0) == 0x80 && (i[2] & 0xc0) == 0x80 && (i[3] & 0xc0) == 0x80) { tmp_path += i[0]; tmp_path += i[1]; tmp_path += i[2]; tmp_path += i[3]; i += 3; continue; } convert_to_utf8(tmp_path, *i); valid_encoding = false; } // the encoding was not valid utf-8 // save the original encoding and replace the // commonly used path with the correctly // encoded string if (!valid_encoding) target = tmp_path; return valid_encoding; } void verify_encoding(file_entry& target) { std::string p = target.path.string(); if (!verify_encoding(p)) target.path = p; } void trim_path_element(std::string& path_element) { // on windows, NAME_MAX refers to Unicode characters // on linux it refers to bytes (utf-8 encoded) // TODO: Make this count Unicode characters instead of bytes on windows if (path_element.size() > NAME_MAX) { // truncate filenames that are too long. But keep extensions! std::string ext = fs::extension(path_element); if (ext.size() > 15) { path_element.resize(NAME_MAX); } else { path_element.resize(NAME_MAX - ext.size()); path_element += ext; } } } bool extract_single_file(lazy_entry const& dict, file_entry& target , std::string const& root_dir) { lazy_entry const* length = dict.dict_find("length"); if (length == 0 || length->type() != lazy_entry::int_t) return false; target.size = length->int_value(); target.path = root_dir; target.file_base = 0; // prefer the name.utf-8 // because if it exists, it is more // likely to be correctly encoded lazy_entry const* p = dict.dict_find("path.utf-8"); if (p == 0 || p->type() != lazy_entry::list_t) p = dict.dict_find("path"); if (p == 0 || p->type() != lazy_entry::list_t) return false; for (int i = 0, end(p->list_size()); i < end; ++i) { if (p->list_at(i)->type() != lazy_entry::string_t) return false; std::string path_element = p->list_at(i)->string_value(); trim_path_element(path_element); if (path_element != "..") target.path /= path_element; } verify_encoding(target); if (target.path.is_complete()) return false; // bitcomet pad file if (target.path.string().find("_____padding_file_") != std::string::npos) target.pad_file = true; lazy_entry const* attr = dict.dict_find_string("attr"); if (attr) { for (int i = 0; i < attr->string_length(); ++i) { switch (attr->string_ptr()[i]) { case 'x': target.executable_attribute = true; break; case 'h': target.hidden_attribute = true; break; case 'p': target.pad_file = true; break; } } } return true; } bool extract_files(lazy_entry const& list, file_storage& target , std::string const& root_dir) { if (list.type() != lazy_entry::list_t) return false; for (int i = 0, end(list.list_size()); i < end; ++i) { file_entry e; if (!extract_single_file(*list.list_at(i), e, root_dir)) return false; target.add_file(e); } return true; } } namespace libtorrent { int load_file(fs::path const& filename, std::vector& v) { file f; error_code ec; if (!f.open(filename, file::read_only, ec)) return -1; size_type s = f.get_size(ec); if (ec) return -1; if (s > 5000000) return -2; v.resize(s); if (s == 0) return 0; file::iovec_t b = {&v[0], s}; size_type read = f.readv(0, &b, 1, ec); if (read != s) return -3; if (ec) return -3; return 0; } #ifndef TORRENT_NO_DEPRECATE // standard constructor that parses a torrent file torrent_info::torrent_info(entry const& torrent_file) : m_creation_date(pt::ptime(pt::not_a_date_time)) , m_multifile(false) , m_private(false) , m_info_section_size(0) , m_piece_hashes(0) { std::vector tmp; std::back_insert_iterator > out(tmp); bencode(out, torrent_file); lazy_entry e; lazy_bdecode(&tmp[0], &tmp[0] + tmp.size(), e); std::string error; #ifndef BOOST_NO_EXCEPTIONS if (!parse_torrent_file(e, error)) throw invalid_torrent_file(); #else parse_torrent_file(e, error); #endif } #endif torrent_info::torrent_info(lazy_entry const& torrent_file) : m_creation_date(pt::ptime(pt::not_a_date_time)) , m_multifile(false) , m_private(false) , m_info_section_size(0) , m_piece_hashes(0) { std::string error; #ifndef BOOST_NO_EXCEPTIONS if (!parse_torrent_file(torrent_file, error)) throw invalid_torrent_file(); #else parse_torrent_file(torrent_file, error); #endif } torrent_info::torrent_info(char const* buffer, int size) : m_creation_date(pt::ptime(pt::not_a_date_time)) , m_multifile(false) , m_private(false) , m_info_section_size(0) , m_piece_hashes(0) { std::string error; lazy_entry e; lazy_bdecode(buffer, buffer + size, e); #ifndef BOOST_NO_EXCEPTIONS if (!parse_torrent_file(e, error)) throw invalid_torrent_file(); #else parse_torrent_file(e, error); #endif } // constructor used for creating new torrents // will not contain any hashes, comments, creation date // just the necessary to use it with piece manager // used for torrents with no metadata torrent_info::torrent_info(sha1_hash const& info_hash) : m_info_hash(info_hash) , m_creation_date(pt::second_clock::universal_time()) , m_multifile(false) , m_private(false) , m_info_section_size(0) , m_piece_hashes(0) {} torrent_info::torrent_info(fs::path const& filename) : m_creation_date(pt::ptime(pt::not_a_date_time)) , m_multifile(false) , m_private(false) { std::vector buf; int ret = load_file(filename, buf); if (ret < 0) return; if (buf.empty()) #ifndef BOOST_NO_EXCEPTIONS throw invalid_torrent_file(); #else return; #endif lazy_entry e; lazy_bdecode(&buf[0], &buf[0] + buf.size(), e); std::string error; #ifndef BOOST_NO_EXCEPTIONS if (!parse_torrent_file(e, error)) throw invalid_torrent_file(); #else parse_torrent_file(e, error); #endif } torrent_info::torrent_info(fs::wpath const& filename) : m_creation_date(pt::ptime(pt::not_a_date_time)) , m_multifile(false) , m_private(false) { std::vector buf; std::string utf8; wchar_utf8(filename.string(), utf8); int ret = load_file(utf8, buf); if (ret < 0) return; if (buf.empty()) #ifndef BOOST_NO_EXCEPTIONS throw invalid_torrent_file(); #else return; #endif lazy_entry e; lazy_bdecode(&buf[0], &buf[0] + buf.size(), e); std::string error; #ifndef BOOST_NO_EXCEPTIONS if (!parse_torrent_file(e, error)) throw invalid_torrent_file(); #else parse_torrent_file(e, error); #endif } torrent_info::~torrent_info() {} void torrent_info::copy_on_write() { if (m_orig_files) return; m_orig_files.reset(new file_storage(m_files)); } void torrent_info::swap(torrent_info& ti) { using std::swap; m_urls.swap(ti.m_urls); m_url_seeds.swap(ti.m_url_seeds); m_files.swap(ti.m_files); m_orig_files.swap(ti.m_orig_files); m_nodes.swap(ti.m_nodes); swap(m_info_hash, ti.m_info_hash); swap(m_creation_date, ti.m_creation_date); m_comment.swap(ti.m_comment); m_created_by.swap(ti.m_created_by); swap(m_multifile, ti.m_multifile); swap(m_private, ti.m_private); swap(m_info_section, ti.m_info_section); swap(m_info_section_size, ti.m_info_section_size); swap(m_piece_hashes, ti.m_piece_hashes); swap(m_info_dict, ti.m_info_dict); } bool torrent_info::parse_info_section(lazy_entry const& info, std::string& error) { if (info.type() != lazy_entry::dict_t) { error = "'info' entry is not a dictionary"; return false; } // hash the info-field to calculate info-hash hasher h; std::pair section = info.data_section(); h.update(section.first, section.second); m_info_hash = h.final(); // copy the info section m_info_section_size = section.second; m_info_section.reset(new char[m_info_section_size]); memcpy(m_info_section.get(), section.first, m_info_section_size); TORRENT_ASSERT(section.first[0] == 'd'); TORRENT_ASSERT(section.first[m_info_section_size-1] == 'e'); // extract piece length int piece_length = info.dict_find_int_value("piece length", -1); if (piece_length <= 0) { error = "invalid or missing 'piece length' entry in torrent file"; return false; } m_files.set_piece_length(piece_length); // extract file name (or the directory name if it's a multifile libtorrent) std::string name = info.dict_find_string_value("name.utf-8"); if (name.empty()) name = info.dict_find_string_value("name"); if (name.empty()) { error = "missing name in torrent file"; return false; } fs::path tmp = name; if (tmp.is_complete()) { name = tmp.leaf(); trim_path_element(name); } #if BOOST_VERSION < 103600 else if (tmp.has_branch_path()) #else else if (tmp.has_parent_path()) #endif { fs::path p; for (fs::path::iterator i = tmp.begin() , end(tmp.end()); i != end; ++i) { if (*i == "." || *i == "..") continue; std::string path_element = *i; trim_path_element(path_element); p /= path_element; } name = p.string(); } else { trim_path_element(name); } if (name == ".." || name == ".") { error = "invalid 'name' of torrent (possible exploit attempt)"; return false; } // correct utf-8 encoding errors verify_encoding(name); // extract file list lazy_entry const* i = info.dict_find_list("files"); if (i == 0) { // if there's no list of files, there has to be a length // field. file_entry e; e.path = name; e.offset = 0; e.size = info.dict_find_int_value("length", -1); // bitcomet pad file if (e.path.string().find("_____padding_file_") != std::string::npos) e.pad_file = true; if (e.size < 0) { error = "invalid length of torrent"; return false; } m_files.add_file(e); m_multifile = false; } else { if (!extract_files(*i, m_files, name)) { error = "failed to parse files from torrent file"; return false; } m_multifile = true; } m_files.set_name(name); // extract sha-1 hashes for all pieces // we want this division to round upwards, that's why we have the // extra addition m_files.set_num_pieces(int((m_files.total_size() + m_files.piece_length() - 1) / m_files.piece_length())); lazy_entry const* pieces = info.dict_find("pieces"); if (pieces == 0 || pieces->type() != lazy_entry::string_t) { error = "invalid or missing 'pieces' entry in torrent file"; return false; } if (pieces->string_length() != m_files.num_pieces() * 20) { error = "incorrect number of piece hashes in torrent file"; return false; } m_piece_hashes = m_info_section.get() + (pieces->string_ptr() - section.first); TORRENT_ASSERT(m_piece_hashes >= m_info_section.get()); TORRENT_ASSERT(m_piece_hashes < m_info_section.get() + m_info_section_size); m_private = info.dict_find_int_value("private", 0); return true; } bool torrent_info::parse_torrent_file(lazy_entry const& torrent_file, std::string& error) { if (torrent_file.type() != lazy_entry::dict_t) { error = "torrent file is not a dictionary"; return false; } // extract the url of the tracker lazy_entry const* i = torrent_file.dict_find_list("announce-list"); if (i) { m_urls.reserve(i->list_size()); for (int j = 0, end(i->list_size()); j < end; ++j) { lazy_entry const* tier = i->list_at(j); if (tier->type() != lazy_entry::list_t) continue; for (int k = 0, end(tier->list_size()); k < end; ++k) { announce_entry e(tier->list_string_value_at(k)); if (e.url.empty()) continue; e.tier = j; e.fail_limit = 0; e.source = announce_entry::source_torrent; m_urls.push_back(e); } } if (!m_urls.empty()) { // shuffle each tier std::vector::iterator start = m_urls.begin(); std::vector::iterator stop; int current_tier = m_urls.front().tier; for (stop = m_urls.begin(); stop != m_urls.end(); ++stop) { if (stop->tier != current_tier) { std::random_shuffle(start, stop); start = stop; current_tier = stop->tier; } } std::random_shuffle(start, stop); } } if (m_urls.empty()) { announce_entry e(torrent_file.dict_find_string_value("announce")); e.fail_limit = 0; e.source = announce_entry::source_torrent; if (!e.url.empty()) m_urls.push_back(e); } lazy_entry const* nodes = torrent_file.dict_find_list("nodes"); if (nodes) { for (int i = 0, end(nodes->list_size()); i < end; ++i) { lazy_entry const* n = nodes->list_at(i); if (n->type() != lazy_entry::list_t || n->list_size() < 2 || n->list_at(0)->type() != lazy_entry::string_t || n->list_at(1)->type() != lazy_entry::int_t) continue; m_nodes.push_back(std::make_pair( n->list_at(0)->string_value() , int(n->list_at(1)->int_value()))); } } // extract creation date size_type cd = torrent_file.dict_find_int_value("creation date", -1); if (cd >= 0) { m_creation_date = pt::ptime(gr::date(1970, gr::Jan, 1)) + pt::seconds(long(cd)); } // if there are any url-seeds, extract them lazy_entry const* url_seeds = torrent_file.dict_find("url-list"); if (url_seeds && url_seeds->type() == lazy_entry::string_t) { m_url_seeds.push_back(url_seeds->string_value()); } else if (url_seeds && url_seeds->type() == lazy_entry::list_t) { for (int i = 0, end(url_seeds->list_size()); i < end; ++i) { lazy_entry const* url = url_seeds->list_at(i); if (url->type() != lazy_entry::string_t) continue; m_url_seeds.push_back(url->string_value()); } } // if there are any http-seeds, extract them lazy_entry const* http_seeds = torrent_file.dict_find("httpseeds"); if (http_seeds && http_seeds->type() == lazy_entry::string_t) { m_http_seeds.push_back(http_seeds->string_value()); } else if (http_seeds && http_seeds->type() == lazy_entry::list_t) { for (int i = 0, end(http_seeds->list_size()); i < end; ++i) { lazy_entry const* url = http_seeds->list_at(i); if (url->type() != lazy_entry::string_t) continue; m_http_seeds.push_back(url->string_value()); } } m_comment = torrent_file.dict_find_string_value("comment.utf-8"); if (m_comment.empty()) m_comment = torrent_file.dict_find_string_value("comment"); verify_encoding(m_comment); m_created_by = torrent_file.dict_find_string_value("created by.utf-8"); if (m_created_by.empty()) m_created_by = torrent_file.dict_find_string_value("created by"); verify_encoding(m_created_by); lazy_entry const* info = torrent_file.dict_find_dict("info"); if (info == 0) { error = "missing or invalid 'info' section in torrent file"; return false; } return parse_info_section(*info, error); } boost::optional torrent_info::creation_date() const { if (m_creation_date != pt::ptime(gr::date(pt::not_a_date_time))) { return boost::optional(m_creation_date); } return boost::optional(); } void torrent_info::add_tracker(std::string const& url, int tier) { announce_entry e(url); e.tier = tier; e.source = announce_entry::source_client; m_urls.push_back(e); using boost::bind; std::sort(m_urls.begin(), m_urls.end(), bind(&announce_entry::tier, _1) < bind(&announce_entry::tier, _2)); } #ifndef TORRENT_NO_DEPRECATE // ------- start deprecation ------- void torrent_info::print(std::ostream& os) const { os << "trackers:\n"; for (std::vector::const_iterator i = trackers().begin(); i != trackers().end(); ++i) { os << i->tier << ": " << i->url << "\n"; } if (!m_comment.empty()) os << "comment: " << m_comment << "\n"; // if (m_creation_date != pt::ptime(gr::date(pt::not_a_date_time))) // os << "creation date: " << to_simple_string(m_creation_date) << "\n"; os << "private: " << (m_private?"yes":"no") << "\n"; os << "number of pieces: " << num_pieces() << "\n"; os << "piece length: " << piece_length() << "\n"; os << "files:\n"; for (file_storage::iterator i = m_files.begin(); i != m_files.end(); ++i) os << " " << std::setw(11) << i->size << " " << i->path.string() << "\n"; } // ------- end deprecation ------- #endif }