diff --git a/include/libtorrent/file_storage.hpp b/include/libtorrent/file_storage.hpp index c2ba05391..49e1f808d 100644 --- a/include/libtorrent/file_storage.hpp +++ b/include/libtorrent/file_storage.hpp @@ -136,6 +136,8 @@ namespace libtorrent void set_name(char const* n, bool borrow_string = false, int string_len = 0); std::string filename() const; + char const* filename_ptr() const { return name; } + int filename_len() const { return name_len == name_is_owned?strlen(name):name_len; } enum { name_is_owned = (1<<12)-1, @@ -457,6 +459,13 @@ namespace libtorrent bool pad_file_at(int index) const; boost::int64_t file_offset(int index) const; + // returns the crc32 hash of file_path(index) + boost::uint32_t file_path_hash(int index, std::string const& save_path) const; + + // returns the crc32 hash of the path at index. Note, this index does not + // refer to a file, but to a path in the vector returned by paths(). + boost::uint32_t path_hash(int index, std::string const& save_path) const; + // flags indicating various attributes for files in // a file_storage. enum file_flags_t @@ -493,6 +502,7 @@ namespace libtorrent // filesystem by making them all point to the same filename, but with // different file bases, so that they don't overlap. // torrent_info::remap_files() can be used to use a new file layout. + // TODO: 3 deprecate the file_base feature boost::int64_t file_base(int index) const; void set_file_base(int index, boost::int64_t off); @@ -544,7 +554,8 @@ namespace libtorrent // the number of pieces in the torrent int m_num_pieces; - void update_path_index(internal_file_entry& e); + void update_path_index(internal_file_entry& e, std::string const& path + , bool set_name = true); void reorder_file(int index, int dst); // the list of files that this torrent consists of diff --git a/include/libtorrent/torrent_info.hpp b/include/libtorrent/torrent_info.hpp index 683277e46..4e046e885 100644 --- a/include/libtorrent/torrent_info.hpp +++ b/include/libtorrent/torrent_info.hpp @@ -677,6 +677,9 @@ namespace libtorrent void resolve_duplicate_filenames(); + // the slow path, in case we detect/suspect a name collision + void resolve_duplicate_filenames_slow(); + #if TORRENT_USE_INVARIANT_CHECKS friend class invariant_access; void check_invariant() const; diff --git a/src/file_storage.cpp b/src/file_storage.cpp index 5d5f933fe..369cd88fd 100644 --- a/src/file_storage.cpp +++ b/src/file_storage.cpp @@ -35,9 +35,16 @@ POSSIBILITY OF SUCH DAMAGE. #include "libtorrent/file.hpp" #include "libtorrent/utf8.hpp" #include +#include #include #include +#if defined(TORRENT_WINDOWS) || defined(TORRENT_OS2) +#define TORRENT_SEPARATOR '\\' +#else +#define TORRENT_SEPARATOR '/' +#endif + namespace libtorrent { file_storage::file_storage() @@ -70,43 +77,56 @@ namespace libtorrent return piece_length(); } - void file_storage::update_path_index(internal_file_entry& e) + namespace { - std::string fn = e.filename(); - if (is_complete(fn)) + bool compare_string(char const* str, int len, std::string const& str2) { + if (str2.size() != len) return false; + return memcmp(str2.c_str(), str, len) == 0; + } + } + + // path is not supposed to include the name of the torrent itself. + void file_storage::update_path_index(internal_file_entry& e + , std::string const& path, bool set_name) + { + if (is_complete(path)) + { + TORRENT_ASSERT(set_name); + e.set_name(path.c_str()); e.path_index = -2; return; } + + TORRENT_ASSERT(path[0] != '/'); + // sorry about this messy string handling, but I did // profile it, and it was expensive - char const* leaf = filename_cstr(fn.c_str()); + char const* leaf = filename_cstr(path.c_str()); char const* branch_path = ""; - if (leaf > fn.c_str()) + int branch_len = 0; + if (leaf > path.c_str()) { // split the string into the leaf filename // and the branch path - branch_path = fn.c_str(); - ((char*)leaf)[-1] = 0; + branch_path = path.c_str(); + branch_len = leaf - path.c_str(); } - if (branch_path[0] == 0) + if (branch_len <= 0) { + if (set_name) e.set_name(leaf); e.path_index = -1; return; } - int branch_len = strlen(branch_path); if (branch_len >= m_name.size() - && std::memcmp(branch_path, m_name.c_str(), m_name.size()) == 0 - && (branch_len == m_name.size() -#ifdef TORRENT_WINDOWS - || branch_path[m_name.size()] == '\\' -#endif - || branch_path[m_name.size()] == '/' - )) + && std::memcmp(branch_path, m_name.c_str(), m_name.size()) == 0) { - branch_path += m_name.size() + // the +1 is to skip the trailing '/' (or '\') + int offset = m_name.size() + (m_name.size() == branch_len?0:1); + branch_path += offset; + branch_len -= offset; e.no_root_dir = false; } else @@ -116,20 +136,29 @@ namespace libtorrent // do we already have this path in the path list? std::vector::reverse_iterator p - = std::find(m_paths.rbegin(), m_paths.rend(), branch_path); + = std::find_if(m_paths.rbegin(), m_paths.rend() + , boost::bind(&compare_string, branch_path, branch_len, _1)); if (p == m_paths.rend()) { // no, we don't. add it e.path_index = m_paths.size(); - m_paths.push_back(branch_path); + TORRENT_ASSERT(branch_path[0] != '/'); + + // trim trailing slashes + if (branch_len > 0 && branch_path[branch_len-1] == TORRENT_SEPARATOR) + --branch_len; + + // poor man's emplace back + m_paths.resize(m_paths.size() + 1); + m_paths.back().assign(branch_path, branch_len); } else { // yes we do. use it e.path_index = p.base() - m_paths.begin() - 1; } - e.set_name(leaf); + if (set_name) e.set_name(leaf); } #ifndef TORRENT_NO_DEPRECATE @@ -260,8 +289,7 @@ namespace libtorrent TORRENT_ASSERT_PRECOND(index >= 0 && index < int(m_files.size())); std::string utf8; wchar_utf8(new_filename, utf8); - m_files[index].set_name(utf8.c_str()); - update_path_index(m_files[index]); + update_path_index(m_files[index], utf8); } void file_storage::add_file(std::wstring const& file, boost::int64_t file_size @@ -282,8 +310,7 @@ namespace libtorrent void file_storage::rename_file(int index, std::string const& new_filename) { TORRENT_ASSERT_PRECOND(index >= 0 && index < int(m_files.size())); - m_files[index].set_name(new_filename.c_str()); - update_path_index(m_files[index]); + update_path_index(m_files[index], new_filename); } namespace @@ -468,15 +495,17 @@ namespace libtorrent if (m_files.empty()) m_name = split_path(path).c_str(); } - internal_file_entry ife; - m_files.push_back(ife); + + // this is poor-man's emplace_back() + m_files.resize(m_files.size() + 1); internal_file_entry& e = m_files.back(); - // first set the filename to the full path so update_path_index() - // can do its thing, then rename it to point to the borrowed filename - // pointer - e.set_name(path.c_str()); - update_path_index(e); + // the last argument specified whether the function should also set + // the filename. If it does, it will copy the leaf filename from path. + // if filename is NULL, we should copy it. If it isn't, we're borrowing + // it and we can save the copy by setting it after this call to + // update_path_index(). + update_path_index(e, path, filename == NULL); // filename is allowed to be NULL, in which case we just use path if (filename) @@ -547,6 +576,104 @@ namespace libtorrent return m_file_base[index]; } + namespace + { + template + void process_string_lowercase(CRC& crc, char const* str, int len) + { + for (int i = 0; i < len; ++i, ++str) + crc.process_byte(to_lower(*str)); + } + } + + boost::uint32_t file_storage::path_hash(int index + , std::string const& save_path) const + { + TORRENT_ASSERT_PRECOND(index >= 0 && index < int(m_paths.size())); + + boost::crc_optimal<32, 0x1EDC6F41, 0xFFFFFFFF, 0xFFFFFFFF, true, true> crc; + + if (!save_path.empty()) + { + process_string_lowercase(crc, save_path.c_str(), save_path.size()); + TORRENT_ASSERT(save_path[save_path.size()-1] != TORRENT_SEPARATOR); + crc.process_byte(TORRENT_SEPARATOR); + } + + process_string_lowercase(crc, m_name.c_str(), m_name.size()); + crc.process_byte(TORRENT_SEPARATOR); + process_string_lowercase(crc, m_paths[index].c_str(), m_paths[index].size()); + return crc.checksum(); + } + + boost::uint32_t file_storage::file_path_hash(int index + , std::string const& save_path) const + { + TORRENT_ASSERT_PRECOND(index >= 0 && index < int(m_files.size())); + internal_file_entry const& fe = m_files[index]; + + boost::crc_optimal<32, 0x1EDC6F41, 0xFFFFFFFF, 0xFFFFFFFF, true, true> crc; + + if (fe.path_index == -2) + { + // -2 means this is an absolute path filename + process_string_lowercase(crc, fe.filename_ptr(), fe.filename_len()); + } + else if (fe.path_index == -1) + { + // -1 means no path + if (!save_path.empty()) + { + process_string_lowercase(crc, save_path.c_str(), save_path.size()); + TORRENT_ASSERT(save_path[save_path.size()-1] != TORRENT_SEPARATOR); + crc.process_byte(TORRENT_SEPARATOR); + } + process_string_lowercase(crc, fe.filename_ptr(), fe.filename_len()); + } + else if (fe.no_root_dir) + { + if (!save_path.empty()) + { + process_string_lowercase(crc, save_path.c_str(), save_path.size()); + TORRENT_ASSERT(save_path[save_path.size()-1] != TORRENT_SEPARATOR); + crc.process_byte(TORRENT_SEPARATOR); + } + std::string const& p = m_paths[fe.path_index]; + if (!p.empty()) + { + process_string_lowercase(crc, p.c_str(), p.size()); + TORRENT_ASSERT(p[p.size()-1] != TORRENT_SEPARATOR); + crc.process_byte(TORRENT_SEPARATOR); + } + process_string_lowercase(crc, fe.filename_ptr(), fe.filename_len()); + } + else + { + if (!save_path.empty()) + { + process_string_lowercase(crc, save_path.c_str(), save_path.size()); + TORRENT_ASSERT(save_path[save_path.size()-1] != TORRENT_SEPARATOR); + crc.process_byte(TORRENT_SEPARATOR); + } + process_string_lowercase(crc, m_name.c_str(), m_name.size()); + TORRENT_ASSERT(m_name.size() > 0); + TORRENT_ASSERT(m_name[m_name.size()-1] != TORRENT_SEPARATOR); + crc.process_byte(TORRENT_SEPARATOR); + + std::string const& p = m_paths[fe.path_index]; + if (!p.empty()) + { + process_string_lowercase(crc, p.c_str(), p.size()); + TORRENT_ASSERT(p.size() > 0); + TORRENT_ASSERT(p[p.size()-1] != TORRENT_SEPARATOR); + crc.process_byte(TORRENT_SEPARATOR); + } + process_string_lowercase(crc, fe.filename_ptr(), fe.filename_len()); + } + + return crc.checksum(); + } + std::string file_storage::file_path(int index, std::string const& save_path) const { TORRENT_ASSERT_PRECOND(index >= 0 && index < int(m_files.size())); diff --git a/src/torrent_info.cpp b/src/torrent_info.cpp index 8a1900280..7cfbac555 100644 --- a/src/torrent_info.cpp +++ b/src/torrent_info.cpp @@ -357,9 +357,11 @@ namespace libtorrent if (path.empty()) path = "_"; } - // top level is extracting the file for a single-file torrent. The + // 'top_level' is extracting the file for a single-file torrent. The // distinction is that the filename is found in "name" rather than // "path" + // root_dir is the name of the torrent, unless this is a single file + // torrent, in which case it's empty. bool extract_single_file(lazy_entry const& dict, file_storage& files , std::string const& root_dir, ptrdiff_t info_ptr_diff, bool top_level , error_code& ec) @@ -374,8 +376,6 @@ namespace libtorrent boost::int64_t mtime = dict.dict_find_int_value("mtime", 0); - // prefer the name.utf-8 because if it exists, it is more likely to be - // correctly encoded std::string path = root_dir; std::string path_element; char const* filename = NULL; @@ -383,6 +383,8 @@ namespace libtorrent if (top_level) { + // prefer the name.utf-8 because if it exists, it is more likely to be + // correctly encoded lazy_entry const* p = dict.dict_find_string("name.utf-8"); if (p == 0) p = dict.dict_find_string("name"); if (p == 0 || p->string_length() == 0) @@ -407,6 +409,7 @@ namespace libtorrent return false; } + int preallocate = path.size(); for (int i = 0, end(p->list_size()); i < end; ++i) { lazy_entry const* e = p->list_at(i); @@ -415,6 +418,13 @@ namespace libtorrent ec = errors::torrent_missing_name; return false; } + preallocate += e->string_length() + 1; + } + path.reserve(preallocate); + + for (int i = 0, end(p->list_size()); i < end; ++i) + { + lazy_entry const* e = p->list_at(i); if (i == end - 1) { filename = e->string_ptr() + info_ptr_diff; @@ -538,6 +548,8 @@ namespace libtorrent }; #endif + // root_dir is the name of the torrent, unless this is a single file + // torrent, in which case it's empty. bool extract_files(lazy_entry const& list, file_storage& target , std::string const& root_dir, ptrdiff_t info_ptr_diff, error_code& ec) { @@ -752,6 +764,42 @@ namespace libtorrent void torrent_info::resolve_duplicate_filenames() { INVARIANT_CHECK; + +#if TORRENT_HAS_BOOST_UNORDERED + boost::unordered_set files; +#else + std::set files; +#endif + + std::string empty_str; + + // insert all directories first, to make sure no files + // are allowed to collied with them + std::vector const& paths = m_files.paths(); + for (int i = 0; i != int(paths.size()); ++i) + { + files.insert(m_files.path_hash(i, empty_str)); + } + + for (int i = 0; i < m_files.num_files(); ++i) + { + // as long as this file already exists + // increase the counter + boost::uint32_t h = m_files.file_path_hash(i, empty_str); + if (!files.insert(h).second) + { + // This filename appears to already exist! + // If this happens, just start over and do it the slow way, + // comparing full file names and come up with new names + resolve_duplicate_filenames_slow(); + return; + } + } + } + + void torrent_info::resolve_duplicate_filenames_slow() + { + INVARIANT_CHECK; int cnt = 0; #if TORRENT_HAS_BOOST_UNORDERED @@ -759,7 +807,9 @@ namespace libtorrent #else std::set files; #endif + std::vector const& paths = m_files.paths(); + files.reserve(paths.size() + m_files.num_files()); // insert all directories first, to make sure no files // are allowed to collied with them diff --git a/src/ut_metadata.cpp b/src/ut_metadata.cpp index e44be8e7f..f13a7e2e3 100644 --- a/src/ut_metadata.cpp +++ b/src/ut_metadata.cpp @@ -117,6 +117,8 @@ namespace libtorrent { namespace virtual void on_files_checked() { + // TODO: 2 if we were to initialize m_metadata_size lazily instead, + // we would probably be more efficient // initialize m_metadata_size metadata(); } diff --git a/test/test_file_storage.cpp b/test/test_file_storage.cpp index e4c7a1e79..8f4885ccc 100644 --- a/test/test_file_storage.cpp +++ b/test/test_file_storage.cpp @@ -166,6 +166,7 @@ int test_main() } { + // test map_file file_storage fs; fs.set_piece_length(512); fs.add_file(combine_path("temp_storage", "test1.tmp"), 17); @@ -191,6 +192,29 @@ int test_main() TEST_EQUAL(rq.length, 841); } + { + // test file_path_hash and path_hash. Make sure we can detect a path + // whose name collides with + file_storage fs; + fs.set_piece_length(512); + fs.add_file(combine_path("temp_storage", combine_path("foo", "test1")), 17); + fs.add_file(combine_path("temp_storage", "foo"), 612); + + fprintf(stderr, "path: %s\n", fs.paths()[0].c_str()); + fprintf(stderr, "file: %s\n", fs.file_path(1).c_str()); + boost::uint32_t file_hash = fs.file_path_hash(1, "a"); + boost::uint32_t path_hash = fs.path_hash(0, "a"); + TEST_EQUAL(file_hash, path_hash); + } + + // TODO: test map_block + // TODO: test piece_size(int piece) + // TODO: test file_index_at_offset + // TODO: test file attributes + // TODO: test symlinks + // TODO: test pad_files + // TODO: test reorder_file (make sure internal_file_entry::swap() is used) + return 0; } diff --git a/test/test_peer_list.cpp b/test/test_peer_list.cpp index 5ea5abd0d..3b50d6e65 100644 --- a/test/test_peer_list.cpp +++ b/test/test_peer_list.cpp @@ -84,7 +84,7 @@ struct mock_peer_connection : peer_connection_interface virtual tcp::endpoint const& remote() const { return m_remote; } virtual tcp::endpoint local_endpoint() const { return ep("127.0.0.1", 8080); } virtual void disconnect(error_code const& ec - , peer_connection_interface::operation_t op, int error = 0) + , operation_t op, int error = 0) { /* remove from mock_torrent list */ m_tp = 0; } virtual peer_id const& pid() const { return m_id; } virtual void set_holepunch_mode() {} diff --git a/test/test_torrent_info.cpp b/test/test_torrent_info.cpp index d5834f0ea..41c25b6bb 100644 --- a/test/test_torrent_info.cpp +++ b/test/test_torrent_info.cpp @@ -533,7 +533,7 @@ int test_torrent_parse() return 0; } -void test_storage() +void test_resolve_duplicates() { file_storage fs; @@ -547,6 +547,8 @@ void test_storage() fs.add_file("test/B.exe", 0x4000); fs.add_file("test/test/TEMPORARY.TXT", 0x4000); fs.add_file("test/A", 0x4000); + fs.add_file("test/long/path/name/that/collides", 0x4000); + fs.add_file("test/long/path", 0x4000); libtorrent::create_torrent t(fs, 0x4000); @@ -564,7 +566,7 @@ void test_storage() torrent_info ti(&tmp[0], tmp.size()); - char const* filenames[10] = + char const* filenames[] = { "test/temporary.txt", "test/A/tmp", @@ -576,6 +578,8 @@ void test_storage() "test/B.2.exe", // duplicate of b.exe "test/test/TEMPORARY.TXT", // a file with the same name in a seprate directory is fine "test/A.2", // duplicate of directory a + "test/long/path/name/that/collides", // a subset of this path collides with the next filename + "test/long/path.1" // so this file needs to be renamed, to not collide with the path name }; for (int i = 0; i < ti.num_files(); ++i) @@ -645,9 +649,10 @@ void test_copy() int test_main() { - test_storage(); + test_resolve_duplicates(); test_copy(); test_torrent_parse(); + return 0; }