improve file_storage::sanitize_symlinks

This commit is contained in:
arvidn 2019-07-21 15:50:57 -07:00 committed by Arvid Norberg
parent 3995ffeafd
commit 8e23f9cc92
12 changed files with 401 additions and 22 deletions

View File

@ -1,3 +1,4 @@
* improve sanitation of symlinks, to support more complex link targets
* add DHT routing table affinity for BEP 42 nodes
* add torrent_info constructor overloads to control torrent file limits
* feature to disable DHT, PEX and LSD per torrent

View File

@ -139,6 +139,7 @@ namespace libtorrent {
// split out a path segment from the left side or right side
TORRENT_EXTRA_EXPORT std::pair<string_view, string_view> rsplit_path(string_view p);
TORRENT_EXTRA_EXPORT std::pair<string_view, string_view> lsplit_path(string_view p);
TORRENT_EXTRA_EXPORT std::pair<string_view, string_view> lsplit_path(string_view p, std::size_t pos);
TORRENT_EXTRA_EXPORT std::string extension(std::string const& f);
TORRENT_EXTRA_EXPORT std::string remove_extension(std::string const& f);

View File

@ -532,6 +532,7 @@ namespace libtorrent {
private:
std::string internal_file_path(file_index_t index) const;
file_index_t last_file() const noexcept;
int get_or_add_path(string_view path);

View File

@ -39,10 +39,33 @@ POSSIBILITY OF SUCH DAMAGE.
#if BOOST_VERSION < 106100
#include <boost/utility/string_ref.hpp>
#include <cstring> // for strchr
namespace libtorrent {
using string_view = boost::string_ref;
using wstring_view = boost::wstring_ref;
inline string_view::size_type find_first_of(string_view const v, char const c
, string_view::size_type pos)
{
while (pos < v.size())
{
if (v[pos] == c) return pos;
++pos;
}
return string_view::npos;
}
inline string_view::size_type find_first_of(string_view const v, char const* c
, string_view::size_type pos)
{
while (pos < v.size())
{
if (std::strchr(c, v[pos]) != nullptr) return pos;
++pos;
}
return string_view::npos;
}
}
#else
#include <boost/utility/string_view.hpp>
@ -50,6 +73,18 @@ namespace libtorrent {
using string_view = boost::string_view;
using wstring_view = boost::wstring_view;
inline string_view::size_type find_first_of(string_view const v, char const c
, string_view::size_type pos)
{
return v.find_first_of(c, pos);
}
inline string_view::size_type find_first_of(string_view const v, char const* c
, string_view::size_type pos)
{
return v.find_first_of(c, pos);
}
}
#endif

View File

@ -44,6 +44,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include <cstdio>
#include <algorithm>
#include <functional>
#include <set>
#if defined(TORRENT_WINDOWS) || defined(TORRENT_OS2)
#define TORRENT_SEPARATOR '\\'
@ -657,7 +658,16 @@ namespace {
TORRENT_ASSERT_PRECOND(index >= file_index_t(0) && index < end_file());
internal_file_entry const& fe = m_files[index];
TORRENT_ASSERT(fe.symlink_index < int(m_symlinks.size()));
return m_symlinks[fe.symlink_index];
auto const& link = m_symlinks[fe.symlink_index];
// TODO: 3 this is a hack to retain ABI compatibility with 1.2.1
// in next major release, make this return by value
static std::string ret;
ret.reserve(m_name.size() + link.size() + 1);
ret.assign(m_name);
append_path(ret, link);
return ret;
}
std::time_t file_storage::mtime(file_index_t const index) const
@ -818,6 +828,26 @@ namespace {
return ret;
}
std::string file_storage::internal_file_path(file_index_t const index) const
{
TORRENT_ASSERT_PRECOND(index >= file_index_t(0) && index < end_file());
internal_file_entry const& fe = m_files[index];
if (fe.path_index >= 0)
{
std::string ret;
std::string const& p = m_paths[fe.path_index];
ret.reserve(p.size() + fe.filename().size() + 2);
append_path(ret, p);
append_path(ret, fe.filename());
return ret;
}
else
{
return fe.filename().to_string();
}
}
string_view file_storage::file_name(file_index_t const index) const
{
TORRENT_ASSERT_PRECOND(index >= file_index_t(0) && index < end_file());
@ -1112,13 +1142,26 @@ namespace {
std::unordered_map<std::string, file_index_t> file_map;
bool file_map_initialized = false;
// lazily instantiated set of all valid directories a symlink may point to
// TODO: in C++17 this could be string_view
std::unordered_set<std::string> dir_map;
bool dir_map_initialized = false;
// symbolic links that points to directories
std::unordered_map<std::string, std::string> dir_links;
// we validate symlinks in (potentially) 2 passes over the files.
// remaining symlinks to validate after the first pass
std::vector<file_index_t> symlinks_to_validate;
for (auto const i : file_range())
{
if (!(file_flags(i) & file_storage::flag_symlink)) continue;
if (!file_map_initialized)
{
for (auto const j : file_range()) file_map[file_path(j)] = j;
for (auto const j : file_range())
file_map.insert({internal_file_path(j), j});
file_map_initialized = true;
}
@ -1128,54 +1171,145 @@ namespace {
// symlink targets are only allowed to point to files or directories in
// this torrent.
{
std::string target = symlink(i);
std::string target = m_symlinks[fe.symlink_index];
// if it points to a directory, that's OK
auto it = std::find(m_paths.begin(), m_paths.end(), target);
if (it != m_paths.end())
if (is_complete(target))
{
m_symlinks[fe.symlink_index] = combine_path(name(), *it);
// a symlink target is not allowed to be an absolute path, ever
// this symlink is invalid, make it point to itself
m_symlinks[fe.symlink_index] = internal_file_path(i);
continue;
}
target = combine_path(name(), target);
auto const idx = file_map.find(target);
if (idx != file_map.end())
auto const iter = file_map.find(target);
if (iter != file_map.end())
{
m_symlinks[fe.symlink_index] = target;
if (file_flags(iter->second) & file_storage::flag_symlink)
{
// we don't know whether this symlink is a file or a
// directory, so make the conservative assumption that it's a
// directory
dir_links[internal_file_path(i)] = target;
}
continue;
}
}
// this symlink target points to a file that's not part of this torrent
// file structure. That's not allowed by the spec.
// it may point to a directory that doesn't have any files (but only
// other directories), in which case it won't show up in m_paths
if (!dir_map_initialized)
{
for (auto const& p : m_paths)
for (string_view pv = p; !pv.empty(); pv = rsplit_path(pv).first)
dir_map.insert(pv.to_string());
dir_map_initialized = true;
}
if (dir_map.count(target))
{
// it points to a sub directory within the torrent, that's OK
m_symlinks[fe.symlink_index] = target;
dir_links[internal_file_path(i)] = target;
continue;
}
}
// for backwards compatibility, allow paths relative to the link as
// well
if (fe.path_index >= 0)
{
std::string target = m_paths[fe.path_index];
append_path(target, symlink(i));
append_path(target, m_symlinks[fe.symlink_index]);
// if it points to a directory, that's OK
auto it = std::find(m_paths.begin(), m_paths.end(), target);
auto const it = std::find(m_paths.begin(), m_paths.end(), target);
if (it != m_paths.end())
{
m_symlinks[fe.symlink_index] = combine_path(name(), *it);
m_symlinks[fe.symlink_index] = *it;
dir_links[internal_file_path(i)] = *it;
continue;
}
target = combine_path(name(), target);
auto const idx = file_map.find(target);
if (idx != file_map.end())
if (dir_map.count(target))
{
// it points to a sub directory within the torrent, that's OK
m_symlinks[fe.symlink_index] = target;
dir_links[internal_file_path(i)] = target;
continue;
}
auto const iter = file_map.find(target);
if (iter != file_map.end())
{
m_symlinks[fe.symlink_index] = target;
if (file_flags(iter->second) & file_storage::flag_symlink)
{
// we don't know whether this symlink is a file or a
// directory, so make the conservative assumption that it's a
// directory
dir_links[internal_file_path(i)] = target;
}
continue;
}
}
// we don't know whether this symlink is a file or a
// directory, so make the conservative assumption that it's a
// directory
dir_links[internal_file_path(i)] = m_symlinks[fe.symlink_index];
symlinks_to_validate.push_back(i);
}
// in case there were some "complex" symlinks, we nee a second pass to
// validate those. For example, symlinks whose target rely on other
// symlinks
for (auto const i : symlinks_to_validate)
{
internal_file_entry const& fe = m_files[i];
TORRENT_ASSERT(fe.symlink_index < int(m_symlinks.size()));
std::string target = m_symlinks[fe.symlink_index];
// to avoid getting stuck in an infinite loop, we only allow traversing
// a symlink once
std::set<std::string> traversed;
// this is where we check every path element for existence. If it's not
// among the concrete paths, it may be a symlink, which is also OK
// note that we won't iterate through this for the last step, where the
// filename is included. The filename is validated after the loop
for (string_view branch = lsplit_path(target).first;
branch.size() < target.size();
branch = lsplit_path(target, branch.size() + 1).first)
{
// this is a concrete directory
if (dir_map.count(branch.to_string())) continue;
auto const iter = dir_links.find(branch.to_string());
if (iter == dir_links.end()) goto failed;
if (traversed.count(branch.to_string())) goto failed;
traversed.insert(branch.to_string());
// this path element is a symlink. substitute the branch so far by
// the link target
target = combine_path(iter->second, target.substr(branch.size() + 1));
// start over with the new (concrete) path
branch = {};
}
// the final (resolved) target must be a valid file
// or directory
if (file_map.count(target) == 0
&& dir_map.count(target) == 0) goto failed;
// this is OK
continue;
failed:
// this symlink is invalid, make it point to itself
m_symlinks[fe.symlink_index] = file_path(i);
m_symlinks[fe.symlink_index] = internal_file_path(i);
}
}

View File

@ -910,6 +910,25 @@ namespace {
return { p.substr(0, sep), p.substr(sep + 1) };
}
std::pair<string_view, string_view> lsplit_path(string_view p, std::size_t pos)
{
if (p.empty()) return {{}, {}};
// for absolute paths, skip the initial "/"
if (p.front() == TORRENT_SEPARATOR_CHAR
#if defined(TORRENT_WINDOWS) || defined(TORRENT_OS2)
|| p.front() == '/'
#endif
)
{ p.remove_prefix(1); if (pos > 0) --pos; }
#if defined(TORRENT_WINDOWS) || defined(TORRENT_OS2)
auto const sep = find_first_of(p, "/\\", std::string::size_type(pos));
#else
auto const sep = find_first_of(p, TORRENT_SEPARATOR_CHAR, std::string::size_type(pos));
#endif
if (sep == string_view::npos) return {p, {}};
return { p.substr(0, sep), p.substr(sep + 1) };
}
std::string complete(string_view f)
{
if (is_complete(f)) return f.to_string();

View File

@ -120,7 +120,8 @@ TEST_TORRENTS = \
url_seed_multi_space.torrent \
url_seed_multi_space_nolist.torrent \
url_seed_multi_single_file.torrent \
whitespace_url.torrent
whitespace_url.torrent \
overlapping_symlinks.torrent
MUTABLE_TEST_TORRENTS = \
test1.torrent \

View File

@ -317,6 +317,42 @@ TORRENT_TEST(split_path)
TEST_CHECK(rsplit_path("") == r("", ""));
}
TORRENT_TEST(split_path_pos)
{
using r = std::pair<string_view, string_view>;
#ifdef TORRENT_WINDOWS
TEST_CHECK(lsplit_path("\\b\\c\\d", 0) == r("b", "c\\d"));
TEST_CHECK(lsplit_path("\\b\\c\\d", 1) == r("b", "c\\d"));
TEST_CHECK(lsplit_path("\\b\\c\\d", 2) == r("b", "c\\d"));
TEST_CHECK(lsplit_path("\\b\\c\\d", 3) == r("b\\c", "d"));
TEST_CHECK(lsplit_path("\\b\\c\\d", 4) == r("b\\c", "d"));
TEST_CHECK(lsplit_path("\\b\\c\\d", 5) == r("b\\c\\d", ""));
TEST_CHECK(lsplit_path("\\b\\c\\d", 6) == r("b\\c\\d", ""));
TEST_CHECK(lsplit_path("b\\c\\d", 0) == r("b", "c\\d"));
TEST_CHECK(lsplit_path("b\\c\\d", 1) == r("b", "c\\d"));
TEST_CHECK(lsplit_path("b\\c\\d", 2) == r("b\\c", "d"));
TEST_CHECK(lsplit_path("b\\c\\d", 3) == r("b\\c", "d"));
TEST_CHECK(lsplit_path("b\\c\\d", 4) == r("b\\c\\d", ""));
TEST_CHECK(lsplit_path("b\\c\\d", 5) == r("b\\c\\d", ""));
#endif
TEST_CHECK(lsplit_path("/b/c/d", 0) == r("b", "c/d"));
TEST_CHECK(lsplit_path("/b/c/d", 1) == r("b", "c/d"));
TEST_CHECK(lsplit_path("/b/c/d", 2) == r("b", "c/d"));
TEST_CHECK(lsplit_path("/b/c/d", 3) == r("b/c", "d"));
TEST_CHECK(lsplit_path("/b/c/d", 4) == r("b/c", "d"));
TEST_CHECK(lsplit_path("/b/c/d", 5) == r("b/c/d", ""));
TEST_CHECK(lsplit_path("/b/c/d", 6) == r("b/c/d", ""));
TEST_CHECK(lsplit_path("b/c/d", 0) == r("b", "c/d"));
TEST_CHECK(lsplit_path("b/c/d", 1) == r("b", "c/d"));
TEST_CHECK(lsplit_path("b/c/d", 2) == r("b/c", "d"));
TEST_CHECK(lsplit_path("b/c/d", 3) == r("b/c", "d"));
TEST_CHECK(lsplit_path("b/c/d", 4) == r("b/c/d", ""));
TEST_CHECK(lsplit_path("b/c/d", 5) == r("b/c/d", ""));
}
// file class
TORRENT_TEST(file)
{

View File

@ -624,6 +624,135 @@ TORRENT_TEST(map_block_mid)
}
}
#ifdef TORRENT_WINDOWS
#define SEP "\\"
#else
#define SEP "/"
#endif
TORRENT_TEST(sanitize_symlinks)
{
file_storage fs;
fs.set_piece_length(1024);
// invalid
#if defined(TORRENT_WINDOWS) || defined(TORRENT_OS2)
fs.add_file("test/0", 0, file_storage::flag_symlink, 0, "C:\\invalid\\target\\path");
#else
fs.add_file("test/0", 0, file_storage::flag_symlink, 0, "/invalid/target/path");
#endif
// there is no file with this name, so this is invalid
fs.add_file("test/1", 0, file_storage::flag_symlink, 0, "ZZ");
// there is no file with this name, so this is invalid
fs.add_file("test/2", 0, file_storage::flag_symlink, 0, "B" SEP "B" SEP "ZZ");
// this should be OK
fs.add_file("test/3", 0, file_storage::flag_symlink, 0, "0");
// this should be OK
fs.add_file("test/4", 0, file_storage::flag_symlink, 0, "A");
// this is advanced, but OK
fs.add_file("test/5", 0, file_storage::flag_symlink, 0, "4" SEP "B");
// this is advanced, but OK
fs.add_file("test/6", 0, file_storage::flag_symlink, 0, "5" SEP "C");
// this is not OK
fs.add_file("test/7", 0, file_storage::flag_symlink, 0, "4" SEP "B" SEP "C" SEP "ZZ");
// this is the only actual content
fs.add_file("test/A" SEP "B" SEP "C", 10000);
fs.set_num_pieces(int((fs.total_size() + 1023) / 1024));
fs.sanitize_symlinks();
// these were all invalid symlinks, so they're made to point to themselves
TEST_EQUAL(fs.symlink(file_index_t{0}), "test" SEP "0");
TEST_EQUAL(fs.symlink(file_index_t{1}), "test" SEP "1");
TEST_EQUAL(fs.symlink(file_index_t{2}), "test" SEP "2");
// ok
TEST_EQUAL(fs.symlink(file_index_t{3}), "test" SEP "0");
TEST_EQUAL(fs.symlink(file_index_t{4}), "test" SEP "A");
TEST_EQUAL(fs.symlink(file_index_t{5}), "test" SEP "4" SEP "B");
TEST_EQUAL(fs.symlink(file_index_t{6}), "test" SEP "5" SEP "C");
// does not point to a valid file
TEST_EQUAL(fs.symlink(file_index_t{7}), "test" SEP "7");
}
TORRENT_TEST(sanitize_symlinks_single_file)
{
file_storage fs;
fs.set_piece_length(1024);
fs.add_file("test", 1);
fs.set_num_pieces(int((fs.total_size() + 1023) / 1024));
fs.sanitize_symlinks();
TEST_EQUAL(fs.file_path(file_index_t{0}), "test");
}
TORRENT_TEST(sanitize_symlinks_cascade)
{
file_storage fs;
fs.set_piece_length(1024);
fs.add_file("test/0", 0, file_storage::flag_symlink, 0, "1" SEP "ZZ");
fs.add_file("test/1", 0, file_storage::flag_symlink, 0, "2");
fs.add_file("test/2", 0, file_storage::flag_symlink, 0, "3");
fs.add_file("test/3", 0, file_storage::flag_symlink, 0, "4");
fs.add_file("test/4", 0, file_storage::flag_symlink, 0, "5");
fs.add_file("test/5", 0, file_storage::flag_symlink, 0, "6");
fs.add_file("test/6", 0, file_storage::flag_symlink, 0, "7");
fs.add_file("test/7", 0, file_storage::flag_symlink, 0, "A");
fs.add_file("test/no-exist", 0, file_storage::flag_symlink, 0, "1" SEP "ZZZ");
// this is the only actual content
fs.add_file("test/A" SEP "ZZ", 10000);
fs.set_num_pieces(int((fs.total_size() + 1023) / 1024));
fs.sanitize_symlinks();
TEST_EQUAL(fs.symlink(file_index_t{0}), "test" SEP "1" SEP "ZZ");
TEST_EQUAL(fs.symlink(file_index_t{1}), "test" SEP "2");
TEST_EQUAL(fs.symlink(file_index_t{2}), "test" SEP "3");
TEST_EQUAL(fs.symlink(file_index_t{3}), "test" SEP "4");
TEST_EQUAL(fs.symlink(file_index_t{4}), "test" SEP "5");
TEST_EQUAL(fs.symlink(file_index_t{5}), "test" SEP "6");
TEST_EQUAL(fs.symlink(file_index_t{6}), "test" SEP "7");
TEST_EQUAL(fs.symlink(file_index_t{7}), "test" SEP "A");
TEST_EQUAL(fs.symlink(file_index_t{8}), "test" SEP "no-exist");
}
TORRENT_TEST(sanitize_symlinks_circular)
{
file_storage fs;
fs.set_piece_length(1024);
fs.add_file("test/0", 0, file_storage::flag_symlink, 0, "1");
fs.add_file("test/1", 0, file_storage::flag_symlink, 0, "0");
// when this is resolved, we end up in an infinite loop. Make sure we can
// handle that
fs.add_file("test/2", 0, file_storage::flag_symlink, 0, "0/ZZ");
// this is the only actual content
fs.add_file("test/A" SEP "ZZ", 10000);
fs.set_num_pieces(int((fs.total_size() + 1023) / 1024));
fs.sanitize_symlinks();
TEST_EQUAL(fs.symlink(file_index_t{0}), "test" SEP "1");
TEST_EQUAL(fs.symlink(file_index_t{1}), "test" SEP "0");
// this was invalid, so it points to itself
TEST_EQUAL(fs.symlink(file_index_t{2}), "test" SEP "2");
}
// TODO: test file attributes
// TODO: test symlinks
// TODO: test reorder_file (make sure internal_file_entry::swap() is used)

View File

@ -494,3 +494,17 @@ TORRENT_TEST(string_ptr_move_assign)
TEST_CHECK(*p2 == nullptr);
}
TORRENT_TEST(find_first_of)
{
string_view test("01234567891");
TEST_EQUAL(find_first_of(test, '1', 0), 1);
TEST_EQUAL(find_first_of(test, '1', 1), 1);
TEST_EQUAL(find_first_of(test, '1', 2), 10);
TEST_EQUAL(find_first_of(test, '1', 3), 10);
TEST_EQUAL(find_first_of(test, "61", 0), 1);
TEST_EQUAL(find_first_of(test, "61", 1), 1);
TEST_EQUAL(find_first_of(test, "61", 2), 6);
TEST_EQUAL(find_first_of(test, "61", 3), 6);
TEST_EQUAL(find_first_of(test, "61", 4), 6);
}

View File

@ -134,6 +134,7 @@ static test_torrent_t test_torrents[] =
{ "absolute_filename.torrent" },
{ "invalid_filename.torrent" },
{ "invalid_filename2.torrent" },
{ "overlapping_symlinks.torrent" },
};
struct test_failing_torrent_t
@ -846,6 +847,13 @@ TORRENT_TEST(parse_torrents)
{
TEST_EQUAL(ti->num_files(), 3);
}
else if (t.file == "overlapping_symlinks.torrent"_sv)
{
TEST_CHECK(ti->num_files() > 3);
TEST_EQUAL(ti->files().symlink(file_index_t{0}), "SDL2.framework" SEPARATOR "Versions" SEPARATOR "Current" SEPARATOR "Headers");
TEST_EQUAL(ti->files().symlink(file_index_t{1}), "SDL2.framework" SEPARATOR "Versions" SEPARATOR "Current" SEPARATOR "Resources");
TEST_EQUAL(ti->files().symlink(file_index_t{2}), "SDL2.framework" SEPARATOR "Versions" SEPARATOR "Current" SEPARATOR "SDL2");
}
file_storage const& fs = ti->files();
for (file_index_t idx{0}; idx != file_index_t(fs.num_files()); ++idx)

Binary file not shown.