forked from premiere/premiere-libtorrent
extend testing of sanitize_append_path_element
This commit is contained in:
parent
5c44bf1b36
commit
3460f203f3
|
@ -64,8 +64,7 @@ namespace libtorrent
|
|||
// internal, exposed for the unit test
|
||||
TORRENT_EXTRA_EXPORT void sanitize_append_path_element(std::string& path
|
||||
, char const* element, int element_len);
|
||||
TORRENT_EXTRA_EXPORT bool verify_encoding(std::string& target
|
||||
, bool fix_paths = false);
|
||||
TORRENT_EXTRA_EXPORT bool verify_encoding(std::string& target);
|
||||
|
||||
// the web_seed_entry holds information about a web seed (also known
|
||||
// as URL seed or HTTP seed). It is essentially a URL with some state
|
||||
|
|
|
@ -94,11 +94,8 @@ namespace libtorrent
|
|||
|
||||
} // anonymous namespace
|
||||
|
||||
// fixes invalid UTF-8 sequences and
|
||||
// replaces characters that are invalid
|
||||
// in paths
|
||||
TORRENT_EXTRA_EXPORT bool verify_encoding(std::string& target
|
||||
, bool fix_paths)
|
||||
// fixes invalid UTF-8 sequences
|
||||
TORRENT_EXTRA_EXPORT bool verify_encoding(std::string& target)
|
||||
{
|
||||
if (target.empty()) return true;
|
||||
|
||||
|
@ -145,14 +142,6 @@ namespace libtorrent
|
|||
valid_encoding = false;
|
||||
}
|
||||
|
||||
// if fix paths is true, also replace characters that are invalid
|
||||
// in filenames
|
||||
if (fix_paths && codepoint < 0x7f && !valid_path_character(codepoint))
|
||||
{
|
||||
codepoint = '_';
|
||||
valid_encoding = false;
|
||||
}
|
||||
|
||||
// encode codepoint into utf-8
|
||||
cp = &codepoint;
|
||||
UTF8 sequence[5];
|
||||
|
@ -250,7 +239,14 @@ namespace libtorrent
|
|||
if ((element[i] & 0x80) == 0)
|
||||
{
|
||||
// 1 byte
|
||||
path += element[i];
|
||||
if (valid_path_character(element[i]))
|
||||
{
|
||||
path += element[i];
|
||||
}
|
||||
else
|
||||
{
|
||||
path += '_';
|
||||
}
|
||||
last_len = 1;
|
||||
}
|
||||
else if ((element[i] & 0xe0) == 0xc0)
|
||||
|
@ -259,7 +255,13 @@ namespace libtorrent
|
|||
if (element_len - i < 2
|
||||
|| (element[i+1] & 0xc0) != 0x80)
|
||||
{
|
||||
path += '?';
|
||||
path += '_';
|
||||
last_len = 1;
|
||||
}
|
||||
else if ((element[i] & 0x1f) == 0)
|
||||
{
|
||||
// overlong sequences are invalid
|
||||
path += '_';
|
||||
last_len = 1;
|
||||
}
|
||||
else
|
||||
|
@ -278,7 +280,13 @@ namespace libtorrent
|
|||
|| (element[i+2] & 0xc0) != 0x80
|
||||
)
|
||||
{
|
||||
path += '?';
|
||||
path += '_';
|
||||
last_len = 1;
|
||||
}
|
||||
else if ((element[i] & 0x0f) == 0)
|
||||
{
|
||||
// overlong sequences are invalid
|
||||
path += '_';
|
||||
last_len = 1;
|
||||
}
|
||||
else
|
||||
|
@ -299,7 +307,14 @@ namespace libtorrent
|
|||
|| (element[i+3] & 0xc0) != 0x80
|
||||
)
|
||||
{
|
||||
path += '?';
|
||||
path += '_';
|
||||
last_len = 1;
|
||||
}
|
||||
else if ((element[i] & 0x07) == 0
|
||||
&& (element[i+1] & 0x3f) == 0)
|
||||
{
|
||||
// overlong sequences are invalid
|
||||
path += '_';
|
||||
last_len = 1;
|
||||
}
|
||||
else
|
||||
|
@ -312,6 +327,11 @@ namespace libtorrent
|
|||
}
|
||||
i += 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
path += '_';
|
||||
last_len = 1;
|
||||
}
|
||||
|
||||
added += last_len;
|
||||
++unicode_chars;
|
||||
|
@ -347,18 +367,20 @@ namespace libtorrent
|
|||
return;
|
||||
}
|
||||
|
||||
if (added == 0 && added_separator)
|
||||
{
|
||||
// remove the separator added at the beginning
|
||||
path.erase(path.end()-1);
|
||||
return;
|
||||
}
|
||||
|
||||
// remove trailing spaces and dots. These aren't allowed in filenames on windows
|
||||
for (int i = path.size() - 1; i >= 0; --i)
|
||||
{
|
||||
if (path[i] != ' ' && path[i] != '.') break;
|
||||
path.resize(i);
|
||||
--added;
|
||||
TORRENT_ASSERT(added >= 0);
|
||||
}
|
||||
|
||||
if (added == 0 && added_separator)
|
||||
{
|
||||
// remove the separator added at the beginning
|
||||
path.erase(path.end()-1);
|
||||
return;
|
||||
}
|
||||
|
||||
if (path.empty()) path = "_";
|
||||
|
@ -405,8 +427,6 @@ namespace libtorrent
|
|||
filename = p.string_ptr() + info_ptr_diff;
|
||||
filename_len = p.string_length();
|
||||
sanitize_append_path_element(path, p.string_ptr(), p.string_length());
|
||||
|
||||
// if (path.empty()) path = to_hex(files.info_hash().to_string());
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -451,7 +471,7 @@ namespace libtorrent
|
|||
bdecode_node attr = dict.dict_find_string("attr");
|
||||
if (attr)
|
||||
{
|
||||
for (int i = 0; i < attr.string_length(); ++i)
|
||||
for (int i = 0; i < attr.string_length(); ++i)
|
||||
{
|
||||
switch (attr.string_ptr()[i])
|
||||
{
|
||||
|
@ -1604,7 +1624,7 @@ namespace libtorrent
|
|||
m_comment = torrent_file.dict_find_string_value("comment.utf-8");
|
||||
if (m_comment.empty()) m_comment = torrent_file.dict_find_string_value("comment");
|
||||
verify_encoding(m_comment);
|
||||
|
||||
|
||||
m_created_by = torrent_file.dict_find_string_value("created by.utf-8");
|
||||
if (m_created_by.empty()) m_created_by = torrent_file.dict_find_string_value("created by");
|
||||
verify_encoding(m_created_by);
|
||||
|
|
|
@ -161,7 +161,6 @@ test_failing_torrent_t test_error_torrents[] =
|
|||
// TODO: torrent with 'l' (symlink) attribute
|
||||
// TODO: creating a merkle torrent (torrent_info::build_merkle_list)
|
||||
// TODO: torrent with multiple trackers in multiple tiers, making sure we shuffle them (how do you test shuffling?, load it multiple times and make sure it's in different order at least once)
|
||||
// TODO: sanitize_append_path_element with all kinds of UTF-8 sequences, including invalid ones
|
||||
// TODO: torrents with a missing name
|
||||
// TODO: torrents with a zero-length name
|
||||
// TODO: torrents with a merkle tree and add_merkle_nodes
|
||||
|
@ -285,6 +284,11 @@ TORRENT_TEST(sanitize_path)
|
|||
TEST_EQUAL(path, "a/c");
|
||||
#endif
|
||||
|
||||
path.clear();
|
||||
sanitize_append_path_element(path, "a", 1);
|
||||
sanitize_append_path_element(path, "..", 2);
|
||||
TEST_EQUAL(path, "a");
|
||||
|
||||
path.clear();
|
||||
sanitize_append_path_element(path, "/..", 3);
|
||||
sanitize_append_path_element(path, ".", 1);
|
||||
|
@ -328,6 +332,201 @@ TORRENT_TEST(sanitize_path)
|
|||
TEST_EQUAL(path, "c/c");
|
||||
#endif
|
||||
|
||||
path.clear();
|
||||
sanitize_append_path_element(path, "\b", 1);
|
||||
TEST_EQUAL(path, "_");
|
||||
|
||||
path.clear();
|
||||
sanitize_append_path_element(path, "\b", 1);
|
||||
sanitize_append_path_element(path, "filename", 8);
|
||||
#ifdef TORRENT_WINDOWS
|
||||
TEST_EQUAL(path, "_\\filename");
|
||||
#else
|
||||
TEST_EQUAL(path, "_/filename");
|
||||
#endif
|
||||
|
||||
path.clear();
|
||||
sanitize_append_path_element(path, "filename", 8);
|
||||
sanitize_append_path_element(path, "\b", 1);
|
||||
#ifdef TORRENT_WINDOWS
|
||||
TEST_EQUAL(path, "filename\\_");
|
||||
#else
|
||||
TEST_EQUAL(path, "filename/_");
|
||||
#endif
|
||||
|
||||
path.clear();
|
||||
sanitize_append_path_element(path, "abc", 3);
|
||||
sanitize_append_path_element(path, "", 0);
|
||||
#ifdef TORRENT_WINDOWS
|
||||
TEST_EQUAL(path, "abc\\_");
|
||||
#else
|
||||
TEST_EQUAL(path, "abc/_");
|
||||
#endif
|
||||
|
||||
path.clear();
|
||||
sanitize_append_path_element(path, "abc", 3);
|
||||
sanitize_append_path_element(path, " ", 3);
|
||||
TEST_EQUAL(path, "abc");
|
||||
|
||||
path.clear();
|
||||
sanitize_append_path_element(path, "", 0);
|
||||
sanitize_append_path_element(path, "abc", 3);
|
||||
#ifdef TORRENT_WINDOWS
|
||||
TEST_EQUAL(path, "_\\abc");
|
||||
#else
|
||||
TEST_EQUAL(path, "_/abc");
|
||||
#endif
|
||||
|
||||
path.clear();
|
||||
sanitize_append_path_element(path, "\b?filename=4", 12);
|
||||
#ifdef TORRENT_WINDOWS
|
||||
TEST_EQUAL(path, "__filename=4");
|
||||
#else
|
||||
TEST_EQUAL(path, "_?filename=4");
|
||||
#endif
|
||||
|
||||
path.clear();
|
||||
sanitize_append_path_element(path, "filename=4", 10);
|
||||
TEST_EQUAL(path, "filename=4");
|
||||
|
||||
// valid 2-byte sequence
|
||||
path.clear();
|
||||
sanitize_append_path_element(path, "filename\xc2\xa1", 10);
|
||||
TEST_EQUAL(path, "filename\xc2\xa1");
|
||||
|
||||
// truncated 2-byte sequence
|
||||
path.clear();
|
||||
sanitize_append_path_element(path, "filename\xc2", 9);
|
||||
TEST_EQUAL(path, "filename_");
|
||||
|
||||
// valid 3-byte sequence
|
||||
path.clear();
|
||||
sanitize_append_path_element(path, "filename\xe2\x9f\xb9", 11);
|
||||
TEST_EQUAL(path, "filename\xe2\x9f\xb9");
|
||||
|
||||
// truncated 3-byte sequence
|
||||
path.clear();
|
||||
sanitize_append_path_element(path, "filename\xe2\x9f", 10);
|
||||
TEST_EQUAL(path, "filename_");
|
||||
|
||||
// truncated 3-byte sequence
|
||||
path.clear();
|
||||
sanitize_append_path_element(path, "filename\xe2", 9);
|
||||
TEST_EQUAL(path, "filename_");
|
||||
|
||||
// valid 4-byte sequence
|
||||
path.clear();
|
||||
sanitize_append_path_element(path, "filename\xf0\x9f\x92\x88", 12);
|
||||
TEST_EQUAL(path, "filename\xf0\x9f\x92\x88");
|
||||
|
||||
// truncated 4-byte sequence
|
||||
path.clear();
|
||||
sanitize_append_path_element(path, "filename\xf0\x9f\x92", 11);
|
||||
TEST_EQUAL(path, "filename_");
|
||||
|
||||
// 5-byte utf-8 sequence (not allowed)
|
||||
path.clear();
|
||||
sanitize_append_path_element(path, "filename\xf8\x9f\x9f\x9f\x9f" "foobar", 19);
|
||||
TEST_EQUAL(path, "filename_____foobar");
|
||||
|
||||
// redundant (overlong) 2-byte sequence
|
||||
// ascii code 0x2e encoded with a leading 0
|
||||
path.clear();
|
||||
sanitize_append_path_element(path, "filename\xc0\xae", 10);
|
||||
TEST_EQUAL(path, "filename_");
|
||||
|
||||
// redundant (overlong) 3-byte sequence
|
||||
// ascii code 0x2e encoded with two leading 0s
|
||||
path.clear();
|
||||
sanitize_append_path_element(path, "filename\xe0\x80\xae", 11);
|
||||
TEST_EQUAL(path, "filename_");
|
||||
|
||||
// redundant (overlong) 4-byte sequence
|
||||
// ascii code 0x2e encoded with three leading 0s
|
||||
path.clear();
|
||||
sanitize_append_path_element(path, "filename\xf0\x80\x80\xae", 12);
|
||||
TEST_EQUAL(path, "filename_");
|
||||
}
|
||||
|
||||
TORRENT_TEST(verify_encoding)
|
||||
{
|
||||
// verify_encoding
|
||||
std::string test = "\b?filename=4";
|
||||
TEST_CHECK(verify_encoding(test));
|
||||
TEST_CHECK(test == "\b?filename=4");
|
||||
|
||||
test = "filename=4";
|
||||
TEST_CHECK(verify_encoding(test));
|
||||
TEST_CHECK(test == "filename=4");
|
||||
|
||||
// valid 2-byte sequence
|
||||
test = "filename\xc2\xa1";
|
||||
TEST_CHECK(verify_encoding(test));
|
||||
fprintf(stderr, "%s\n", test.c_str());
|
||||
TEST_CHECK(test == "filename\xc2\xa1");
|
||||
|
||||
// truncated 2-byte sequence
|
||||
test = "filename\xc2";
|
||||
TEST_CHECK(!verify_encoding(test));
|
||||
fprintf(stderr, "%s\n", test.c_str());
|
||||
TEST_CHECK(test == "filename_");
|
||||
|
||||
// valid 3-byte sequence
|
||||
test = "filename\xe2\x9f\xb9";
|
||||
TEST_CHECK(verify_encoding(test));
|
||||
fprintf(stderr, "%s\n", test.c_str());
|
||||
TEST_CHECK(test == "filename\xe2\x9f\xb9");
|
||||
|
||||
// truncated 3-byte sequence
|
||||
test = "filename\xe2\x9f";
|
||||
TEST_CHECK(!verify_encoding(test));
|
||||
fprintf(stderr, "%s\n", test.c_str());
|
||||
TEST_CHECK(test == "filename_");
|
||||
|
||||
// truncated 3-byte sequence
|
||||
test = "filename\xe2";
|
||||
TEST_CHECK(!verify_encoding(test));
|
||||
fprintf(stderr, "%s\n", test.c_str());
|
||||
TEST_CHECK(test == "filename_");
|
||||
|
||||
// valid 4-byte sequence
|
||||
test = "filename\xf0\x9f\x92\x88";
|
||||
TEST_CHECK(verify_encoding(test));
|
||||
fprintf(stderr, "%s\n", test.c_str());
|
||||
TEST_CHECK(test == "filename\xf0\x9f\x92\x88");
|
||||
|
||||
// truncated 4-byte sequence
|
||||
test = "filename\xf0\x9f\x92";
|
||||
TEST_CHECK(!verify_encoding(test));
|
||||
fprintf(stderr, "%s\n", test.c_str());
|
||||
TEST_CHECK(test == "filename_");
|
||||
|
||||
// 5-byte utf-8 sequence (not allowed)
|
||||
test = "filename\xf8\x9f\x9f\x9f\x9f""foobar";
|
||||
TEST_CHECK(!verify_encoding(test));
|
||||
fprintf(stderr, "%s\n", test.c_str());
|
||||
TEST_CHECK(test == "filename_____foobar");
|
||||
|
||||
// redundant (overlong) 2-byte sequence
|
||||
// ascii code 0x2e encoded with a leading 0
|
||||
test = "filename\xc0\xae";
|
||||
TEST_CHECK(!verify_encoding(test));
|
||||
fprintf(stderr, "%s\n", test.c_str());
|
||||
TEST_CHECK(test == "filename__");
|
||||
|
||||
// redundant (overlong) 3-byte sequence
|
||||
// ascii code 0x2e encoded with two leading 0s
|
||||
test = "filename\xe0\x80\xae";
|
||||
TEST_CHECK(!verify_encoding(test));
|
||||
fprintf(stderr, "%s\n", test.c_str());
|
||||
TEST_CHECK(test == "filename___");
|
||||
|
||||
// redundant (overlong) 4-byte sequence
|
||||
// ascii code 0x2e encoded with three leading 0s
|
||||
test = "filename\xf0\x80\x80\xae";
|
||||
TEST_CHECK(!verify_encoding(test));
|
||||
fprintf(stderr, "%s\n", test.c_str());
|
||||
TEST_CHECK(test == "filename____");
|
||||
}
|
||||
|
||||
TORRENT_TEST(parse)
|
||||
|
@ -375,88 +574,6 @@ TORRENT_TEST(parse)
|
|||
std::cerr << ti3.name() << std::endl;
|
||||
TEST_EQUAL(ti3.name(), "test2..test3.......test4");
|
||||
|
||||
// verify_encoding
|
||||
std::string test = "\b?filename=4";
|
||||
TEST_CHECK(!verify_encoding(test, true));
|
||||
#ifdef TORRENT_WINDOWS
|
||||
TEST_CHECK(test == "__filename=4");
|
||||
#else
|
||||
TEST_CHECK(test == "_?filename=4");
|
||||
#endif
|
||||
|
||||
test = "filename=4";
|
||||
TEST_CHECK(verify_encoding(test, true));
|
||||
TEST_CHECK(test == "filename=4");
|
||||
|
||||
// valid 2-byte sequence
|
||||
test = "filename\xc2\xa1";
|
||||
TEST_CHECK(verify_encoding(test, true));
|
||||
fprintf(stderr, "%s\n", test.c_str());
|
||||
TEST_CHECK(test == "filename\xc2\xa1");
|
||||
|
||||
// truncated 2-byte sequence
|
||||
test = "filename\xc2";
|
||||
TEST_CHECK(!verify_encoding(test, true));
|
||||
fprintf(stderr, "%s\n", test.c_str());
|
||||
TEST_CHECK(test == "filename_");
|
||||
|
||||
// valid 3-byte sequence
|
||||
test = "filename\xe2\x9f\xb9";
|
||||
TEST_CHECK(verify_encoding(test, true));
|
||||
fprintf(stderr, "%s\n", test.c_str());
|
||||
TEST_CHECK(test == "filename\xe2\x9f\xb9");
|
||||
|
||||
// truncated 3-byte sequence
|
||||
test = "filename\xe2\x9f";
|
||||
TEST_CHECK(!verify_encoding(test, true));
|
||||
fprintf(stderr, "%s\n", test.c_str());
|
||||
TEST_CHECK(test == "filename_");
|
||||
|
||||
// truncated 3-byte sequence
|
||||
test = "filename\xe2";
|
||||
TEST_CHECK(!verify_encoding(test, true));
|
||||
fprintf(stderr, "%s\n", test.c_str());
|
||||
TEST_CHECK(test == "filename_");
|
||||
|
||||
// valid 4-byte sequence
|
||||
test = "filename\xf0\x9f\x92\x88";
|
||||
TEST_CHECK(verify_encoding(test, true));
|
||||
fprintf(stderr, "%s\n", test.c_str());
|
||||
TEST_CHECK(test == "filename\xf0\x9f\x92\x88");
|
||||
|
||||
// truncated 4-byte sequence
|
||||
test = "filename\xf0\x9f\x92";
|
||||
TEST_CHECK(!verify_encoding(test, true));
|
||||
fprintf(stderr, "%s\n", test.c_str());
|
||||
TEST_CHECK(test == "filename_");
|
||||
|
||||
// 5-byte utf-8 sequence (not allowed)
|
||||
test = "filename\xf8\x9f\x9f\x9f\x9f""foobar";
|
||||
TEST_CHECK(!verify_encoding(test, true));
|
||||
fprintf(stderr, "%s\n", test.c_str());
|
||||
TEST_CHECK(test == "filename_____foobar");
|
||||
|
||||
// redundant (overlong) 2-byte sequence
|
||||
// ascii code 0x2e encoded with a leading 0
|
||||
test = "filename\xc0\xae";
|
||||
TEST_CHECK(!verify_encoding(test, true));
|
||||
fprintf(stderr, "%s\n", test.c_str());
|
||||
TEST_CHECK(test == "filename__");
|
||||
|
||||
// redundant (overlong) 3-byte sequence
|
||||
// ascii code 0x2e encoded with two leading 0s
|
||||
test = "filename\xe0\x80\xae";
|
||||
TEST_CHECK(!verify_encoding(test, true));
|
||||
fprintf(stderr, "%s\n", test.c_str());
|
||||
TEST_CHECK(test == "filename___");
|
||||
|
||||
// redundant (overlong) 4-byte sequence
|
||||
// ascii code 0x2e encoded with three leading 0s
|
||||
test = "filename\xf0\x80\x80\xae";
|
||||
TEST_CHECK(!verify_encoding(test, true));
|
||||
fprintf(stderr, "%s\n", test.c_str());
|
||||
TEST_CHECK(test == "filename____");
|
||||
|
||||
std::string root_dir = parent_path(current_working_directory());
|
||||
for (int i = 0; i < int(sizeof(test_torrents)/sizeof(test_torrents[0])); ++i)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue