/* Copyright (c) 2003-2018, Arvid Norberg All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the author nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "libtorrent/file_storage.hpp" #include "libtorrent/string_util.hpp" // for allocate_string_copy #include "libtorrent/utf8.hpp" #include "libtorrent/index_range.hpp" #include "libtorrent/aux_/path.hpp" #include "libtorrent/aux_/numeric_cast.hpp" #include "libtorrent/aux_/disable_warnings_push.hpp" #include #include "libtorrent/aux_/disable_warnings_pop.hpp" #include #include #include #include #include #if defined(TORRENT_WINDOWS) || defined(TORRENT_OS2) #define TORRENT_SEPARATOR '\\' #else #define TORRENT_SEPARATOR '/' #endif using namespace std::placeholders; namespace libtorrent { constexpr file_flags_t file_storage::flag_pad_file; constexpr file_flags_t file_storage::flag_hidden; constexpr file_flags_t file_storage::flag_executable; constexpr file_flags_t file_storage::flag_symlink; #if TORRENT_ABI_VERSION == 1 constexpr file_flags_t file_storage::pad_file; constexpr file_flags_t file_storage::attribute_hidden; constexpr file_flags_t file_storage::attribute_executable; constexpr file_flags_t file_storage::attribute_symlink; #endif file_storage::file_storage() : m_piece_length(0) , m_num_pieces(0) , m_total_size(0) {} file_storage::~file_storage() = default; // even though this copy constructor and the copy assignment // operator are identical to what the compiler would have // generated, they are put here to explicitly make them part // of libtorrent and properly exported by the .dll. file_storage::file_storage(file_storage const&) = default; file_storage& file_storage::operator=(file_storage const&) = default; file_storage::file_storage(file_storage&&) noexcept = default; void file_storage::reserve(int num_files) { m_files.reserve(num_files); } int file_storage::piece_size(piece_index_t const index) const { TORRENT_ASSERT_PRECOND(index >= piece_index_t(0) && index < end_piece()); if (index == last_piece()) { std::int64_t const size_except_last = (num_pieces() - 1) * std::int64_t(piece_length()); std::int64_t const size = total_size() - size_except_last; TORRENT_ASSERT(size > 0); TORRENT_ASSERT(size <= piece_length()); return int(size); } else return piece_length(); } namespace { bool compare_file_offset(internal_file_entry const& lhs , internal_file_entry const& rhs) { return lhs.offset < rhs.offset; } } // path is supposed to include the name of the torrent itself. // or an absolute path, to move a file outside of the download directory void file_storage::update_path_index(internal_file_entry& e , std::string const& path, bool const set_name) { if (is_complete(path)) { TORRENT_ASSERT(set_name); e.set_name(path); e.path_index = -2; return; } TORRENT_ASSERT(path[0] != '/'); // split the string into the leaf filename // and the branch path string_view leaf; string_view branch_path; std::tie(branch_path, leaf) = rsplit_path(path); if (branch_path.empty()) { if (set_name) e.set_name(leaf); e.path_index = -1; return; } // if the path *does* contain the name of the torrent (as we expect) // strip it before adding it to m_paths if (lsplit_path(branch_path).first == m_name) { branch_path = lsplit_path(branch_path).second; // strip duplicate separators while (!branch_path.empty() && (branch_path.front() == TORRENT_SEPARATOR #if defined(TORRENT_WINDOWS) || defined(TORRENT_OS2) || branch_path.front() == '/' #endif )) branch_path.remove_prefix(1); e.no_root_dir = false; } else { e.no_root_dir = true; } e.path_index = get_or_add_path(branch_path); if (set_name) e.set_name(leaf); } int file_storage::get_or_add_path(string_view const path) { // do we already have this path in the path list? auto const p = std::find(m_paths.rbegin(), m_paths.rend(), path); if (p == m_paths.rend()) { // no, we don't. add it int const ret = int(m_paths.size()); TORRENT_ASSERT(path.size() == 0 || path[0] != '/'); m_paths.emplace_back(path.data(), path.size()); return ret; } else { // yes we do. use it return int(p.base() - m_paths.begin() - 1); } } #if TORRENT_ABI_VERSION == 1 file_entry::file_entry(): offset(0), size(0) , mtime(0), pad_file(false), hidden_attribute(false) , executable_attribute(false) , symlink_attribute(false) {} file_entry::~file_entry() = default; #endif // TORRENT_ABI_VERSION internal_file_entry::internal_file_entry() : offset(0) , symlink_index(not_a_symlink) , no_root_dir(false) , size(0) , name_len(name_is_owned) , pad_file(false) , hidden_attribute(false) , executable_attribute(false) , symlink_attribute(false) , name(nullptr) , path_index(-1) {} internal_file_entry::~internal_file_entry() { if (name_len == name_is_owned) delete[] name; } internal_file_entry::internal_file_entry(internal_file_entry const& fe) : offset(fe.offset) , symlink_index(fe.symlink_index) , no_root_dir(fe.no_root_dir) , size(fe.size) , name_len(fe.name_len) , pad_file(fe.pad_file) , hidden_attribute(fe.hidden_attribute) , executable_attribute(fe.executable_attribute) , symlink_attribute(fe.symlink_attribute) , name(nullptr) , path_index(fe.path_index) { bool const borrow = fe.name_len != name_is_owned; set_name(fe.filename(), borrow); } internal_file_entry& internal_file_entry::operator=(internal_file_entry const& fe) & { if (&fe == this) return *this; offset = fe.offset; size = fe.size; path_index = fe.path_index; symlink_index = fe.symlink_index; pad_file = fe.pad_file; hidden_attribute = fe.hidden_attribute; executable_attribute = fe.executable_attribute; symlink_attribute = fe.symlink_attribute; no_root_dir = fe.no_root_dir; // if the name is not owned, don't allocate memory, we can point into the // same metadata buffer bool const borrow = fe.name_len != name_is_owned; set_name(fe.filename(), borrow); return *this; } internal_file_entry::internal_file_entry(internal_file_entry&& fe) noexcept : offset(fe.offset) , symlink_index(fe.symlink_index) , no_root_dir(fe.no_root_dir) , size(fe.size) , name_len(fe.name_len) , pad_file(fe.pad_file) , hidden_attribute(fe.hidden_attribute) , executable_attribute(fe.executable_attribute) , symlink_attribute(fe.symlink_attribute) , name(fe.name) , path_index(fe.path_index) { fe.name_len = 0; fe.name = nullptr; } internal_file_entry& internal_file_entry::operator=(internal_file_entry&& fe) & noexcept { if (&fe == this) return *this; offset = fe.offset; size = fe.size; path_index = fe.path_index; symlink_index = fe.symlink_index; pad_file = fe.pad_file; hidden_attribute = fe.hidden_attribute; executable_attribute = fe.executable_attribute; symlink_attribute = fe.symlink_attribute; no_root_dir = fe.no_root_dir; name = fe.name; name_len = fe.name_len; fe.name_len = 0; fe.name = nullptr; return *this; } // if borrow_string is true, don't take ownership over n, just // point to it. // if borrow_string is false, n will be copied and owned by the // internal_file_entry. void internal_file_entry::set_name(string_view n, bool const borrow_string) { // free the current string, before assigning the new one if (name_len == name_is_owned) delete[] name; if (n.empty()) { TORRENT_ASSERT(borrow_string == false); name = nullptr; } else if (borrow_string) { // we have limited space in the length field. truncate string // if it's too long if (n.size() >= name_is_owned) n = n.substr(name_is_owned - 1); name = n.data(); name_len = aux::numeric_cast(n.size()); } else { name = allocate_string_copy(n); name_len = name_is_owned; } } string_view internal_file_entry::filename() const { if (name_len != name_is_owned) return {name, std::size_t(name_len)}; return name ? string_view(name) : string_view(); } void file_storage::apply_pointer_offset(std::ptrdiff_t const off) { for (auto& f : m_files) { if (f.name_len == internal_file_entry::name_is_owned) continue; f.name += off; } for (auto& h : m_file_hashes) { if (h == nullptr) continue; h += off; } } #if TORRENT_ABI_VERSION == 1 void file_storage::add_file_borrow(char const* filename, int filename_len , std::string const& path, std::int64_t file_size, file_flags_t file_flags , char const* filehash, std::int64_t mtime, string_view symlink_path) { TORRENT_ASSERT(filename_len >= 0); add_file_borrow({filename, std::size_t(filename_len)}, path, file_size , file_flags, filehash, mtime, symlink_path); } void file_storage::add_file(file_entry const& fe, char const* filehash) { file_flags_t flags = {}; if (fe.pad_file) flags |= file_storage::flag_pad_file; if (fe.hidden_attribute) flags |= file_storage::flag_hidden; if (fe.executable_attribute) flags |= file_storage::flag_executable; if (fe.symlink_attribute) flags |= file_storage::flag_symlink; add_file_borrow({}, fe.path, fe.size, flags, filehash, fe.mtime , fe.symlink_path); } void file_storage::set_name(std::wstring const& n) { m_name = wchar_utf8(n); } void file_storage::rename_file_deprecated(file_index_t index, std::wstring const& new_filename) { TORRENT_ASSERT_PRECOND(index >= file_index_t(0) && index < end_file()); update_path_index(m_files[index], wchar_utf8(new_filename)); } void file_storage::add_file(std::wstring const& file, std::int64_t file_size , file_flags_t const file_flags, std::time_t mtime, string_view symlink_path) { add_file(wchar_utf8(file), file_size, file_flags, mtime, symlink_path); } void file_storage::rename_file(file_index_t index, std::wstring const& new_filename) { rename_file_deprecated(index, new_filename); } #endif // TORRENT_ABI_VERSION void file_storage::rename_file(file_index_t const index , std::string const& new_filename) { TORRENT_ASSERT_PRECOND(index >= file_index_t(0) && index < end_file()); update_path_index(m_files[index], new_filename); } #if TORRENT_ABI_VERSION == 1 file_storage::iterator file_storage::file_at_offset_deprecated(std::int64_t offset) const { // find the file iterator and file offset internal_file_entry target; target.offset = aux::numeric_cast(offset); TORRENT_ASSERT(!compare_file_offset(target, m_files.front())); auto file_iter = std::upper_bound( begin_deprecated(), end_deprecated(), target, compare_file_offset); TORRENT_ASSERT(file_iter != begin_deprecated()); --file_iter; return file_iter; } file_storage::iterator file_storage::file_at_offset(std::int64_t offset) const { return file_at_offset_deprecated(offset); } #endif file_index_t file_storage::file_index_at_offset(std::int64_t const offset) const { TORRENT_ASSERT_PRECOND(offset >= 0); TORRENT_ASSERT_PRECOND(offset < m_total_size); // find the file iterator and file offset internal_file_entry target; target.offset = aux::numeric_cast(offset); TORRENT_ASSERT(!compare_file_offset(target, m_files.front())); auto file_iter = std::upper_bound( m_files.begin(), m_files.end(), target, compare_file_offset); TORRENT_ASSERT(file_iter != m_files.begin()); --file_iter; return file_index_t(int(file_iter - m_files.begin())); } char const* file_storage::file_name_ptr(file_index_t const index) const { return m_files[index].name; } int file_storage::file_name_len(file_index_t const index) const { if (m_files[index].name_len == internal_file_entry::name_is_owned) return -1; return m_files[index].name_len; } std::vector file_storage::map_block(piece_index_t const piece , std::int64_t const offset, int size) const { TORRENT_ASSERT_PRECOND(piece >= piece_index_t{0}); TORRENT_ASSERT_PRECOND(piece < end_piece()); TORRENT_ASSERT_PRECOND(num_files() > 0); std::vector ret; if (m_files.empty()) return ret; // find the file iterator and file offset internal_file_entry target; target.offset = aux::numeric_cast(static_cast(piece) * std::int64_t(m_piece_length) + offset); TORRENT_ASSERT_PRECOND(std::int64_t(target.offset) + size <= m_total_size); TORRENT_ASSERT(!compare_file_offset(target, m_files.front())); // in case the size is past the end, fix it up if (std::int64_t(target.offset) + size > m_total_size) size = aux::numeric_cast(m_total_size - std::int64_t(target.offset)); auto file_iter = std::upper_bound( m_files.begin(), m_files.end(), target, compare_file_offset); TORRENT_ASSERT(file_iter != m_files.begin()); --file_iter; std::int64_t file_offset = target.offset - file_iter->offset; for (; size > 0; file_offset -= file_iter->size, ++file_iter) { TORRENT_ASSERT(file_iter != m_files.end()); if (file_offset < std::int64_t(file_iter->size)) { file_slice f{}; f.file_index = file_index_t(int(file_iter - m_files.begin())); f.offset = file_offset; f.size = std::min(std::int64_t(file_iter->size) - file_offset, std::int64_t(size)); TORRENT_ASSERT(f.size <= size); size -= int(f.size); file_offset += f.size; ret.push_back(f); } TORRENT_ASSERT(size >= 0); } return ret; } #if TORRENT_ABI_VERSION == 1 file_entry file_storage::at(int index) const { return at_deprecated(index); } internal_file_entry const& file_storage::internal_at(int const index) const { TORRENT_ASSERT(index >= 0); TORRENT_ASSERT(index < int(m_files.size())); return m_files[file_index_t(index)]; } file_entry file_storage::at_deprecated(int index) const { TORRENT_ASSERT_PRECOND(index >= 0 && index < int(m_files.size())); file_entry ret; internal_file_entry const& ife = m_files[index]; ret.path = file_path(index); ret.offset = ife.offset; ret.size = ife.size; ret.mtime = mtime(index); ret.pad_file = ife.pad_file; ret.hidden_attribute = ife.hidden_attribute; ret.executable_attribute = ife.executable_attribute; ret.symlink_attribute = ife.symlink_attribute; if (ife.symlink_index != internal_file_entry::not_a_symlink) ret.symlink_path = symlink(index); ret.filehash = hash(index); return ret; } #endif // TORRENT_ABI_VERSION int file_storage::num_files() const noexcept { return int(m_files.size()); } // returns the index of the one-past-end file in the file storage file_index_t file_storage::end_file() const noexcept { return m_files.end_index(); } file_index_t file_storage::last_file() const noexcept { return --m_files.end_index(); } index_range file_storage::file_range() const noexcept { return m_files.range(); } index_range file_storage::piece_range() const noexcept { return {piece_index_t{0}, end_piece()}; } peer_request file_storage::map_file(file_index_t const file_index , std::int64_t const file_offset, int const size) const { TORRENT_ASSERT_PRECOND(file_index < end_file()); TORRENT_ASSERT(m_num_pieces >= 0); peer_request ret{}; if (file_index >= end_file()) { ret.piece = end_piece(); ret.start = 0; ret.length = 0; return ret; } std::int64_t const offset = file_offset + this->file_offset(file_index); if (offset >= total_size()) { ret.piece = end_piece(); ret.start = 0; ret.length = 0; } else { ret.piece = piece_index_t(int(offset / piece_length())); ret.start = int(offset % piece_length()); ret.length = size; if (offset + size > total_size()) ret.length = int(total_size() - offset); } return ret; } void file_storage::add_file(std::string const& path, std::int64_t file_size , file_flags_t const file_flags, std::time_t mtime, string_view symlink_path) { add_file_borrow({}, path, file_size, file_flags, nullptr, mtime , symlink_path); } void file_storage::add_file_borrow(string_view filename , std::string const& path, std::int64_t const file_size , file_flags_t const file_flags, char const* filehash , std::int64_t const mtime, string_view symlink_path) { TORRENT_ASSERT_PRECOND(file_size >= 0); TORRENT_ASSERT_PRECOND(!is_complete(filename)); if (!has_parent_path(path)) { // you have already added at least one file with a // path to the file (branch_path), which means that // all the other files need to be in the same top // directory as the first file. TORRENT_ASSERT_PRECOND(m_files.empty()); m_name = path; } else { if (m_files.empty()) m_name = lsplit_path(path).first.to_string(); } // this is poor-man's emplace_back() m_files.resize(m_files.size() + 1); internal_file_entry& e = m_files.back(); // the last argument specified whether the function should also set // the filename. If it does, it will copy the leaf filename from path. // if filename is empty, we should copy it. If it isn't, we're borrowing // it and we can save the copy by setting it after this call to // update_path_index(). update_path_index(e, path, filename.empty()); // filename is allowed to be empty, in which case we just use path if (!filename.empty()) e.set_name(filename, true); e.size = aux::numeric_cast(file_size); e.offset = aux::numeric_cast(m_total_size); e.pad_file = bool(file_flags & file_storage::flag_pad_file); e.hidden_attribute = bool(file_flags & file_storage::flag_hidden); e.executable_attribute = bool(file_flags & file_storage::flag_executable); e.symlink_attribute = bool(file_flags & file_storage::flag_symlink); if (filehash) { if (m_file_hashes.size() < m_files.size()) m_file_hashes.resize(m_files.size()); m_file_hashes[last_file()] = filehash; } if (!symlink_path.empty() && m_symlinks.size() < internal_file_entry::not_a_symlink - 1) { e.symlink_index = m_symlinks.size(); m_symlinks.emplace_back(symlink_path.to_string()); } else { e.symlink_attribute = false; } if (mtime) { if (m_mtime.size() < m_files.size()) m_mtime.resize(m_files.size()); m_mtime[last_file()] = std::time_t(mtime); } m_total_size += e.size; } sha1_hash file_storage::hash(file_index_t const index) const { if (index >= m_file_hashes.end_index()) return sha1_hash(); return sha1_hash(m_file_hashes[index]); } std::string const& file_storage::symlink(file_index_t const index) const { TORRENT_ASSERT_PRECOND(index >= file_index_t(0) && index < end_file()); internal_file_entry const& fe = m_files[index]; TORRENT_ASSERT(fe.symlink_index < int(m_symlinks.size())); auto const& link = m_symlinks[fe.symlink_index]; // TODO: 3 this is a hack to retain ABI compatibility with 1.2.1 // in next major release, make this return by value static std::string storage[4]; static std::atomic counter{0}; std::string& ret = storage[(counter++) % 4]; ret.reserve(m_name.size() + link.size() + 1); ret.assign(m_name); append_path(ret, link); return ret; } std::time_t file_storage::mtime(file_index_t const index) const { if (index >= m_mtime.end_index()) return 0; return m_mtime[index]; } namespace { template void process_string_lowercase(CRC& crc, string_view str) { for (char const c : str) crc.process_byte(to_lower(c) & 0xff); } template void process_path_lowercase( std::unordered_set& table , CRC crc, string_view str) { if (str.empty()) return; for (char const c : str) { if (c == TORRENT_SEPARATOR) table.insert(crc.checksum()); crc.process_byte(to_lower(c) & 0xff); } table.insert(crc.checksum()); } } void file_storage::all_path_hashes( std::unordered_set& table) const { boost::crc_optimal<32, 0x1EDC6F41, 0xFFFFFFFF, 0xFFFFFFFF, true, true> crc; if (!m_name.empty()) { process_string_lowercase(crc, m_name); TORRENT_ASSERT(m_name[m_name.size() - 1] != TORRENT_SEPARATOR); crc.process_byte(TORRENT_SEPARATOR); } for (auto const& p : m_paths) process_path_lowercase(table, crc, p); } std::uint32_t file_storage::file_path_hash(file_index_t const index , std::string const& save_path) const { TORRENT_ASSERT_PRECOND(index >= file_index_t(0) && index < end_file()); internal_file_entry const& fe = m_files[index]; boost::crc_optimal<32, 0x1EDC6F41, 0xFFFFFFFF, 0xFFFFFFFF, true, true> crc; if (fe.path_index == -2) { // -2 means this is an absolute path filename process_string_lowercase(crc, fe.filename()); } else if (fe.path_index == -1) { // -1 means no path if (!save_path.empty()) { process_string_lowercase(crc, save_path); TORRENT_ASSERT(save_path[save_path.size() - 1] != TORRENT_SEPARATOR); crc.process_byte(TORRENT_SEPARATOR); } process_string_lowercase(crc, fe.filename()); } else if (fe.no_root_dir) { if (!save_path.empty()) { process_string_lowercase(crc, save_path); TORRENT_ASSERT(save_path[save_path.size() - 1] != TORRENT_SEPARATOR); crc.process_byte(TORRENT_SEPARATOR); } std::string const& p = m_paths[fe.path_index]; if (!p.empty()) { process_string_lowercase(crc, p); TORRENT_ASSERT(p[p.size() - 1] != TORRENT_SEPARATOR); crc.process_byte(TORRENT_SEPARATOR); } process_string_lowercase(crc, fe.filename()); } else { if (!save_path.empty()) { process_string_lowercase(crc, save_path); TORRENT_ASSERT(save_path[save_path.size() - 1] != TORRENT_SEPARATOR); crc.process_byte(TORRENT_SEPARATOR); } process_string_lowercase(crc, m_name); TORRENT_ASSERT(m_name.size() > 0); TORRENT_ASSERT(m_name[m_name.size() - 1] != TORRENT_SEPARATOR); crc.process_byte(TORRENT_SEPARATOR); std::string const& p = m_paths[fe.path_index]; if (!p.empty()) { process_string_lowercase(crc, p); TORRENT_ASSERT(p.size() > 0); TORRENT_ASSERT(p[p.size() - 1] != TORRENT_SEPARATOR); crc.process_byte(TORRENT_SEPARATOR); } process_string_lowercase(crc, fe.filename()); } return crc.checksum(); } std::string file_storage::file_path(file_index_t const index, std::string const& save_path) const { TORRENT_ASSERT_PRECOND(index >= file_index_t(0) && index < end_file()); internal_file_entry const& fe = m_files[index]; std::string ret; // -2 means this is an absolute path filename if (fe.path_index == -2) { ret = fe.filename().to_string(); } else if (fe.path_index == -1) { // -1 means no path ret.reserve(save_path.size() + fe.filename().size() + 1); ret.assign(save_path); append_path(ret, fe.filename()); } else if (fe.no_root_dir) { std::string const& p = m_paths[fe.path_index]; ret.reserve(save_path.size() + p.size() + fe.filename().size() + 2); ret.assign(save_path); append_path(ret, p); append_path(ret, fe.filename()); } else { std::string const& p = m_paths[fe.path_index]; ret.reserve(save_path.size() + m_name.size() + p.size() + fe.filename().size() + 3); ret.assign(save_path); append_path(ret, m_name); append_path(ret, p); append_path(ret, fe.filename()); } // a single return statement, just to make NRVO more likely to kick in return ret; } std::string file_storage::internal_file_path(file_index_t const index) const { TORRENT_ASSERT_PRECOND(index >= file_index_t(0) && index < end_file()); internal_file_entry const& fe = m_files[index]; if (fe.path_index >= 0) { std::string ret; std::string const& p = m_paths[fe.path_index]; ret.reserve(p.size() + fe.filename().size() + 2); append_path(ret, p); append_path(ret, fe.filename()); return ret; } else { return fe.filename().to_string(); } } string_view file_storage::file_name(file_index_t const index) const { TORRENT_ASSERT_PRECOND(index >= file_index_t(0) && index < end_file()); internal_file_entry const& fe = m_files[index]; return fe.filename(); } std::int64_t file_storage::file_size(file_index_t const index) const { TORRENT_ASSERT_PRECOND(index >= file_index_t(0) && index < end_file()); return m_files[index].size; } bool file_storage::pad_file_at(file_index_t const index) const { TORRENT_ASSERT_PRECOND(index >= file_index_t(0) && index < end_file()); return m_files[index].pad_file; } std::int64_t file_storage::file_offset(file_index_t const index) const { TORRENT_ASSERT_PRECOND(index >= file_index_t(0) && index < end_file()); return m_files[index].offset; } file_flags_t file_storage::file_flags(file_index_t const index) const { TORRENT_ASSERT_PRECOND(index >= file_index_t(0) && index < end_file()); internal_file_entry const& fe = m_files[index]; return (fe.pad_file ? file_storage::flag_pad_file : file_flags_t{}) | (fe.hidden_attribute ? file_storage::flag_hidden : file_flags_t{}) | (fe.executable_attribute ? file_storage::flag_executable : file_flags_t{}) | (fe.symlink_attribute ? file_storage::flag_symlink : file_flags_t{}); } bool file_storage::file_absolute_path(file_index_t const index) const { TORRENT_ASSERT_PRECOND(index >= file_index_t(0) && index < end_file()); internal_file_entry const& fe = m_files[index]; return fe.path_index == -2; } #if TORRENT_ABI_VERSION == 1 sha1_hash file_storage::hash(internal_file_entry const& fe) const { int index = int(&fe - &m_files[0]); if (index >= int(m_file_hashes.size())) return sha1_hash(nullptr); return sha1_hash(m_file_hashes[index]); } std::string const& file_storage::symlink(internal_file_entry const& fe) const { TORRENT_ASSERT_PRECOND(fe.symlink_index < int(m_symlinks.size())); return m_symlinks[fe.symlink_index]; } std::time_t file_storage::mtime(internal_file_entry const& fe) const { int index = int(&fe - &m_files[0]); if (index >= int(m_mtime.size())) return 0; return m_mtime[index]; } int file_storage::file_index(internal_file_entry const& fe) const { int index = int(&fe - &m_files[0]); TORRENT_ASSERT_PRECOND(index >= 0 && index < int(m_files.size())); return index; } std::string file_storage::file_path(internal_file_entry const& fe , std::string const& save_path) const { int const index = int(&fe - &m_files[0]); return file_path(index, save_path); } std::string file_storage::file_name(internal_file_entry const& fe) const { return fe.filename().to_string(); } std::int64_t file_storage::file_size(internal_file_entry const& fe) const { return fe.size; } bool file_storage::pad_file_at(internal_file_entry const& fe) const { return fe.pad_file; } std::int64_t file_storage::file_offset(internal_file_entry const& fe) const { return fe.offset; } file_entry file_storage::at(file_storage::iterator i) const { return at_deprecated(int(i - m_files.begin())); } #endif // TORRENT_ABI_VERSION void file_storage::reorder_file(int const index, int const dst) { TORRENT_ASSERT(index < int(m_files.size())); TORRENT_ASSERT(dst < int(m_files.size())); TORRENT_ASSERT(dst < index); std::iter_swap(m_files.begin() + index, m_files.begin() + dst); if (!m_mtime.empty()) { TORRENT_ASSERT(m_mtime.size() == m_files.size()); if (int(m_mtime.size()) < index) m_mtime.resize(index + 1, 0); std::iter_swap(m_mtime.begin() + dst, m_mtime.begin() + index); } if (!m_file_hashes.empty()) { TORRENT_ASSERT(m_file_hashes.size() == m_files.size()); if (int(m_file_hashes.size()) < index) m_file_hashes.resize(index + 1, nullptr); std::iter_swap(m_file_hashes.begin() + dst, m_file_hashes.begin() + index); } } void file_storage::swap(file_storage& ti) noexcept { using std::swap; swap(ti.m_files, m_files); swap(ti.m_file_hashes, m_file_hashes); swap(ti.m_symlinks, m_symlinks); swap(ti.m_mtime, m_mtime); swap(ti.m_paths, m_paths); swap(ti.m_name, m_name); swap(ti.m_total_size, m_total_size); swap(ti.m_num_pieces, m_num_pieces); swap(ti.m_piece_length, m_piece_length); } void file_storage::optimize(int const pad_file_limit, int alignment , bool const tail_padding) { if (alignment == -1) alignment = m_piece_length; // TODO: padfiles should be removed std::int64_t off = 0; int padding_file = 0; for (auto i = m_files.begin(); i != m_files.end(); ++i) { if ((off % alignment) == 0) { // this file position is aligned, pick the largest // available file to put here. If we encounter a file whose size is // divisible by `alignment`, we pick that immediately, since that // will not affect whether we're at an aligned position and will // improve packing of files auto best_match = i; for (auto k = i; k != m_files.end(); ++k) { // a file whose size fits the alignment always takes priority, // since it will let us keep placing aligned files if ((k->size % aux::numeric_cast(alignment)) == 0) { best_match = k; break; } // otherwise, pick the largest file, to have as many bytes be // aligned. if (best_match->size < k->size) best_match = k; } if (best_match != i) { int const index = int(best_match - m_files.begin()); int const cur_index = int(i - m_files.begin()); reorder_file(index, cur_index); i = m_files.begin() + cur_index; } } else if (pad_file_limit >= 0 && i->size > std::uint32_t(pad_file_limit) && i->pad_file == false) { // if we have pad files enabled, and this file is // not piece-aligned and the file size exceeds the // limit, and it's not a padding file itself. // so add a padding file in front of it int const pad_size = alignment - (off % alignment); // find the largest file that fits in pad_size auto best_match = m_files.end(); // if pad_file_limit is 0, it means all files are padded, there's // no point in trying to find smaller files to use as filling if (pad_file_limit > 0) { for (auto j = i + 1; j < m_files.end(); ++j) { if (j->size > std::uint32_t(pad_size)) continue; if (best_match == m_files.end() || j->size > best_match->size) best_match = j; } if (best_match != m_files.end()) { // we found one // We cannot have found i, because i->size > pad_file_limit // which is forced to be no less than alignment. We only // look for files <= pad_size, which never is greater than // alignment TORRENT_ASSERT(best_match != i); int index = int(best_match - m_files.begin()); int cur_index = int(i - m_files.begin()); reorder_file(index, cur_index); i = m_files.begin() + cur_index; i->offset = aux::numeric_cast(off); off += i->size; continue; } } // we could not find a file that fits in pad_size // add a padding file // note that i will be set to point to the // new pad file. Once we're done adding it, we need // to increment i to point to the current file again // first add the pad file to the end of the file list // then swap it in place. This minimizes the amount // of copying of internal_file_entry, which is somewhat // expensive (until we have move semantics) add_pad_file(pad_size, i, off, padding_file); TORRENT_ASSERT((off % alignment) == 0); continue; } i->offset = aux::numeric_cast(off); off += i->size; if (tail_padding && i->size > std::uint32_t(pad_file_limit) && (off % alignment) != 0) { // skip the file we just put in place, so we put the pad // file after it ++i; // tail-padding is enabled, and the offset after this file is not // aligned. The last file must be padded too, in order to match an // equivalent tail-padded file. add_pad_file(alignment - (off % alignment), i, off, padding_file); TORRENT_ASSERT((off % alignment) == 0); if (i == m_files.end()) break; } } m_total_size = off; } void file_storage::add_pad_file(int const size , std::vector::iterator& i , std::int64_t& offset , int& pad_file_counter) { int const cur_index = int(i - m_files.begin()); int const index = int(m_files.size()); m_files.push_back(internal_file_entry()); internal_file_entry& e = m_files.back(); // i may have been invalidated, refresh it i = m_files.begin() + cur_index; e.size = aux::numeric_cast(size); e.offset = aux::numeric_cast(offset); e.path_index = get_or_add_path(".pad"); char name[15]; std::snprintf(name, sizeof(name), "%d", pad_file_counter); e.set_name(name); e.pad_file = true; offset += size; ++pad_file_counter; if (!m_mtime.empty()) m_mtime.resize(index + 1, 0); if (!m_file_hashes.empty()) m_file_hashes.resize(index + 1, nullptr); if (index != cur_index) reorder_file(index, cur_index); } void file_storage::sanitize_symlinks() { // symlinks are unusual, this function is optimized assuming there are no // symbolic links in the torrent. If we find one symbolic link, we'll // build the hash table of files it's allowed to refer to, but don't pay // that price up-front. std::unordered_map file_map; bool file_map_initialized = false; // lazily instantiated set of all valid directories a symlink may point to // TODO: in C++17 this could be string_view std::unordered_set dir_map; bool dir_map_initialized = false; // symbolic links that points to directories std::unordered_map dir_links; // we validate symlinks in (potentially) 2 passes over the files. // remaining symlinks to validate after the first pass std::vector symlinks_to_validate; for (auto const i : file_range()) { if (!(file_flags(i) & file_storage::flag_symlink)) continue; if (!file_map_initialized) { for (auto const j : file_range()) file_map.insert({internal_file_path(j), j}); file_map_initialized = true; } internal_file_entry const& fe = m_files[i]; TORRENT_ASSERT(fe.symlink_index < int(m_symlinks.size())); // symlink targets are only allowed to point to files or directories in // this torrent. { std::string target = m_symlinks[fe.symlink_index]; if (is_complete(target)) { // a symlink target is not allowed to be an absolute path, ever // this symlink is invalid, make it point to itself m_symlinks[fe.symlink_index] = internal_file_path(i); continue; } auto const iter = file_map.find(target); if (iter != file_map.end()) { m_symlinks[fe.symlink_index] = target; if (file_flags(iter->second) & file_storage::flag_symlink) { // we don't know whether this symlink is a file or a // directory, so make the conservative assumption that it's a // directory dir_links[internal_file_path(i)] = target; } continue; } // it may point to a directory that doesn't have any files (but only // other directories), in which case it won't show up in m_paths if (!dir_map_initialized) { for (auto const& p : m_paths) for (string_view pv = p; !pv.empty(); pv = rsplit_path(pv).first) dir_map.insert(pv.to_string()); dir_map_initialized = true; } if (dir_map.count(target)) { // it points to a sub directory within the torrent, that's OK m_symlinks[fe.symlink_index] = target; dir_links[internal_file_path(i)] = target; continue; } } // for backwards compatibility, allow paths relative to the link as // well if (fe.path_index >= 0) { std::string target = m_paths[fe.path_index]; append_path(target, m_symlinks[fe.symlink_index]); // if it points to a directory, that's OK auto const it = std::find(m_paths.begin(), m_paths.end(), target); if (it != m_paths.end()) { m_symlinks[fe.symlink_index] = *it; dir_links[internal_file_path(i)] = *it; continue; } if (dir_map.count(target)) { // it points to a sub directory within the torrent, that's OK m_symlinks[fe.symlink_index] = target; dir_links[internal_file_path(i)] = target; continue; } auto const iter = file_map.find(target); if (iter != file_map.end()) { m_symlinks[fe.symlink_index] = target; if (file_flags(iter->second) & file_storage::flag_symlink) { // we don't know whether this symlink is a file or a // directory, so make the conservative assumption that it's a // directory dir_links[internal_file_path(i)] = target; } continue; } } // we don't know whether this symlink is a file or a // directory, so make the conservative assumption that it's a // directory dir_links[internal_file_path(i)] = m_symlinks[fe.symlink_index]; symlinks_to_validate.push_back(i); } // in case there were some "complex" symlinks, we nee a second pass to // validate those. For example, symlinks whose target rely on other // symlinks for (auto const i : symlinks_to_validate) { internal_file_entry const& fe = m_files[i]; TORRENT_ASSERT(fe.symlink_index < int(m_symlinks.size())); std::string target = m_symlinks[fe.symlink_index]; // to avoid getting stuck in an infinite loop, we only allow traversing // a symlink once std::set traversed; // this is where we check every path element for existence. If it's not // among the concrete paths, it may be a symlink, which is also OK // note that we won't iterate through this for the last step, where the // filename is included. The filename is validated after the loop for (string_view branch = lsplit_path(target).first; branch.size() < target.size(); branch = lsplit_path(target, branch.size() + 1).first) { // this is a concrete directory if (dir_map.count(branch.to_string())) continue; auto const iter = dir_links.find(branch.to_string()); if (iter == dir_links.end()) goto failed; if (traversed.count(branch.to_string())) goto failed; traversed.insert(branch.to_string()); // this path element is a symlink. substitute the branch so far by // the link target target = combine_path(iter->second, target.substr(branch.size() + 1)); // start over with the new (concrete) path branch = {}; } // the final (resolved) target must be a valid file // or directory if (file_map.count(target) == 0 && dir_map.count(target) == 0) goto failed; // this is OK continue; failed: // this symlink is invalid, make it point to itself m_symlinks[fe.symlink_index] = internal_file_path(i); } } namespace aux { std::tuple file_piece_range_exclusive(file_storage const& fs, file_index_t const file) { peer_request const range = fs.map_file(file, 0, 1); std::int64_t const file_size = fs.file_size(file); std::int64_t const piece_size = fs.piece_length(); piece_index_t const begin_piece = range.start == 0 ? range.piece : piece_index_t(static_cast(range.piece) + 1); // the last piece is potentially smaller than the other pieces, so the // generic logic doesn't really work. If this file is the last file, the // last piece doesn't overlap with any other file and it's entirely // contained within the last file. piece_index_t const end_piece = (file == file_index_t(fs.num_files() - 1)) ? piece_index_t(fs.num_pieces()) : piece_index_t(int((static_cast(range.piece) * piece_size + range.start + file_size + 1) / piece_size)); return std::make_tuple(begin_piece, end_piece); } std::tuple file_piece_range_inclusive(file_storage const& fs, file_index_t const file) { peer_request const range = fs.map_file(file, 0, 1); std::int64_t const file_size = fs.file_size(file); std::int64_t const piece_size = fs.piece_length(); piece_index_t const end_piece = piece_index_t(int((static_cast(range.piece) * piece_size + range.start + file_size - 1) / piece_size + 1)); return std::make_tuple(range.piece, end_piece); } } // namespace aux }