From 9af3066b5666693a7035c37c0b71516a77c034a3 Mon Sep 17 00:00:00 2001 From: Arvid Norberg Date: Sat, 21 Mar 2015 16:55:23 +0000 Subject: [PATCH] fix file collision logic in torrent_info --- include/libtorrent/file_storage.hpp | 11 +++- src/file.cpp | 4 -- src/file_storage.cpp | 37 ++++++++---- src/torrent_info.cpp | 24 ++++---- test/test_rss.cpp | 1 + test/test_torrent_info.cpp | 90 +++++++++++++++++++---------- 6 files changed, 107 insertions(+), 60 deletions(-) diff --git a/include/libtorrent/file_storage.hpp b/include/libtorrent/file_storage.hpp index 3c11b1428..dc6155605 100644 --- a/include/libtorrent/file_storage.hpp +++ b/include/libtorrent/file_storage.hpp @@ -37,6 +37,7 @@ POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include "libtorrent/assert.hpp" #include "libtorrent/peer_request.hpp" @@ -468,9 +469,13 @@ namespace libtorrent // returns the crc32 hash of file_path(index) boost::uint32_t file_path_hash(int index, std::string const& save_path) const; - // returns the crc32 hash of the path at index. Note, this index does not - // refer to a file, but to a path in the vector returned by paths(). - boost::uint32_t path_hash(int index, std::string const& save_path) const; + // this will add the CRC32 hash of all directory entries to the table. No + // filename will be included, just directories. Every depth of directories + // are added separately to allow test for collisions with files at all + // levels. i.e. if one path in the torrent is ``foo/bar/baz``, the CRC32 + // hashes for ``foo``, ``foo/bar`` and ``foo/bar/baz`` will be added to + // the set. + void all_path_hashes(boost::unordered_set& table) const; // flags indicating various attributes for files in // a file_storage. diff --git a/src/file.cpp b/src/file.cpp index 1f6a362e6..cede93394 100644 --- a/src/file.cpp +++ b/src/file.cpp @@ -30,10 +30,6 @@ POSSIBILITY OF SUCH DAMAGE. */ -/* - Physical file offset patch by Morten Husveit -*/ - #define _FILE_OFFSET_BITS 64 #define _LARGE_FILES 1 diff --git a/src/file_storage.cpp b/src/file_storage.cpp index f8a69cab8..a53c5e89c 100644 --- a/src/file_storage.cpp +++ b/src/file_storage.cpp @@ -575,26 +575,41 @@ namespace libtorrent for (int i = 0; i < len; ++i, ++str) crc.process_byte(to_lower(*str)); } + + template + void process_path_lowercase( + boost::unordered_set& table + , CRC crc + , char const* str, int len) + { + if (len == 0) return; + for (int i = 0; i < len; ++i, ++str) + { + if (*str == TORRENT_SEPARATOR) + table.insert(crc.checksum()); + crc.process_byte(to_lower(*str)); + } + table.insert(crc.checksum()); + } } - boost::uint32_t file_storage::path_hash(int index - , std::string const& save_path) const + void file_storage::all_path_hashes( + boost::unordered_set& table) const { - TORRENT_ASSERT_PRECOND(index >= 0 && index < int(m_paths.size())); - boost::crc_optimal<32, 0x1EDC6F41, 0xFFFFFFFF, 0xFFFFFFFF, true, true> crc; - if (!save_path.empty()) + if (!m_name.empty()) { - process_string_lowercase(crc, save_path.c_str(), save_path.size()); - TORRENT_ASSERT(save_path[save_path.size()-1] != TORRENT_SEPARATOR); + process_string_lowercase(crc, m_name.c_str(), m_name.size()); + TORRENT_ASSERT(m_name[m_name.size()-1] != TORRENT_SEPARATOR); crc.process_byte(TORRENT_SEPARATOR); } - process_string_lowercase(crc, m_name.c_str(), m_name.size()); - crc.process_byte(TORRENT_SEPARATOR); - process_string_lowercase(crc, m_paths[index].c_str(), m_paths[index].size()); - return crc.checksum(); + for (int i = 0; i != int(m_paths.size()); ++i) + { + std::string const& p = m_paths[i]; + process_path_lowercase(table, crc, p.c_str(), p.size()); + } } boost::uint32_t file_storage::file_path_hash(int index diff --git a/src/torrent_info.cpp b/src/torrent_info.cpp index cb183f145..1e879c141 100644 --- a/src/torrent_info.cpp +++ b/src/torrent_info.cpp @@ -65,9 +65,7 @@ POSSIBILITY OF SUCH DAMAGE. #include #include -#if TORRENT_HAS_BOOST_UNORDERED #include -#endif #include @@ -776,22 +774,13 @@ namespace libtorrent { INVARIANT_CHECK; -#if TORRENT_HAS_BOOST_UNORDERED boost::unordered_set files; -#else - std::set files; -#endif std::string empty_str; // insert all directories first, to make sure no files // are allowed to collied with them - std::vector const& paths = m_files.paths(); - for (int i = 0; i != int(paths.size()); ++i) - { - files.insert(m_files.path_hash(i, empty_str)); - } - + m_files.all_path_hashes(files); for (int i = 0; i < m_files.num_files(); ++i) { // as long as this file already exists @@ -827,7 +816,16 @@ namespace libtorrent for (std::vector::const_iterator i = paths.begin() , end(paths.end()); i != end; ++i) { - files.insert(combine_path(m_files.name(), *i)); + std::string p = combine_path(m_files.name(), *i); + files.insert(p); + while (has_parent_path(p)) + { + p = parent_path(p); + // we don't want trailing slashes here + TORRENT_ASSERT(p.back() == *TORRENT_SEPARATOR); + p.pop_back(); + files.insert(p); + } } for (int i = 0; i < m_files.num_files(); ++i) diff --git a/test/test_rss.cpp b/test/test_rss.cpp index 7e224fd27..52605e69d 100644 --- a/test/test_rss.cpp +++ b/test/test_rss.cpp @@ -97,6 +97,7 @@ void test_feed(std::string const& filename, rss_expect const& expect) settings_pack pack; pack.set_int(settings_pack::max_retry_port_bind, 100); pack.set_str(settings_pack::listen_interfaces, "0.0.0.0:100"); + // TODO: 4 we can't use session_impl here boost::shared_ptr s = boost::make_shared(); settings_pack p; s->start_session(p); diff --git a/test/test_torrent_info.cpp b/test/test_torrent_info.cpp index db7df98a8..85a2b1971 100644 --- a/test/test_torrent_info.cpp +++ b/test/test_torrent_info.cpp @@ -574,22 +574,37 @@ int test_torrent_parse() return 0; } -void test_resolve_duplicates() +void test_resolve_duplicates(int test_case) { file_storage fs; - fs.add_file("test/temporary.txt", 0x4000); - fs.add_file("test/A/tmp", 0x4000); - fs.add_file("test/Temporary.txt", 0x4000); - fs.add_file("test/TeMPorArY.txT", 0x4000); - fs.add_file("test/a", 0x4000); - fs.add_file("test/b.exe", 0x4000); - fs.add_file("test/B.ExE", 0x4000); - fs.add_file("test/B.exe", 0x4000); - fs.add_file("test/test/TEMPORARY.TXT", 0x4000); - fs.add_file("test/A", 0x4000); - fs.add_file("test/long/path/name/that/collides", 0x4000); - fs.add_file("test/long/path", 0x4000); + switch (test_case) + { + case 0: + fs.add_file("test/temporary.txt", 0x4000); + fs.add_file("test/Temporary.txt", 0x4000); + fs.add_file("test/TeMPorArY.txT", 0x4000); + fs.add_file("test/test/TEMPORARY.TXT", 0x4000); + break; + case 1: + fs.add_file("test/b.exe", 0x4000); + fs.add_file("test/B.ExE", 0x4000); + fs.add_file("test/B.exe", 0x4000); + fs.add_file("test/filler", 0x4000); + break; + case 2: + fs.add_file("test/A/tmp", 0x4000); + fs.add_file("test/a", 0x4000); + fs.add_file("test/A", 0x4000); + fs.add_file("test/filler", 0x4000); + break; + case 3: + fs.add_file("test/long/path/name/that/collides", 0x4000); + fs.add_file("test/long/path", 0x4000); + fs.add_file("test/filler-1", 0x4000); + fs.add_file("test/filler-2", 0x4000); + break; + } libtorrent::create_torrent t(fs, 0x4000); @@ -607,30 +622,45 @@ void test_resolve_duplicates() torrent_info ti(&tmp[0], tmp.size()); - char const* filenames[] = + char const* filenames[4][4] = { - "test/temporary.txt", - "test/A/tmp", - "test/Temporary.1.txt", // duplicate of temporary.txt - "test/TeMPorArY.2.txT", // duplicate of temporary.txt - "test/a.1", // a file may not have the same name as a directory - "test/b.exe", - "test/B.1.ExE", // duplicate of b.exe - "test/B.2.exe", // duplicate of b.exe - "test/test/TEMPORARY.TXT", // a file with the same name in a seprate directory is fine - "test/A.2", // duplicate of directory a - "test/long/path/name/that/collides", // a subset of this path collides with the next filename - "test/long/path.1" // so this file needs to be renamed, to not collide with the path name + { // case 0 + "test/temporary.txt", + "test/Temporary.1.txt", // duplicate of temporary.txt + "test/TeMPorArY.2.txT", // duplicate of temporary.txt + // a file with the same name in a seprate directory is fine + "test/test/TEMPORARY.TXT", + }, + { // case 1 + "test/b.exe", + "test/B.1.ExE", // duplicate of b.exe + "test/B.2.exe", // duplicate of b.exe + "test/filler", + }, + { // case 2 + "test/A/tmp", + "test/a.1", // a file may not have the same name as a directory + "test/A.2", // duplicate of directory a + "test/filler", + }, + { // case 3 + // a subset of this path collides with the next filename + "test/long/path/name/that/collides", + // so this file needs to be renamed, to not collide with the path name + "test/long/path.1", + "test/filler-1", + "test/filler-2", + } }; for (int i = 0; i < ti.num_files(); ++i) { std::string p = ti.files().file_path(i); convert_path_to_posix(p); - fprintf(stderr, "%s == %s\n", p.c_str(), filenames[i]); + fprintf(stderr, "%s == %s\n", p.c_str(), filenames[test_case][i]); // TODO: 3 fix duplicate name detection to make this test pass - TEST_EQUAL(p, filenames[i]); + TEST_EQUAL(p, filenames[test_case][i]); } } @@ -692,7 +722,9 @@ void test_copy() int test_main() { - test_resolve_duplicates(); + for (int i = 0; i < 4; ++i) + test_resolve_duplicates(i); + test_copy(); #ifndef TORRENT_DISABLE_MUTABLE_TORRENTS test_mutable_torrents();