From b5fe0f95a2a5b335416f0602896886ca57472489 Mon Sep 17 00:00:00 2001 From: arvidn Date: Tue, 15 Aug 2017 13:37:12 +0200 Subject: [PATCH] fix issue in UTF-8 encoding validation --- ChangeLog | 1 + src/ConvertUTF.cpp | 2 +- src/torrent_info.cpp | 10 +++++++--- test/test_torrent_info.cpp | 22 ++++++++++++++++++++++ 4 files changed, 31 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index 938c6eef8..15e648238 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,4 @@ + * fix utf-8 encoding check in torrent parser * fix infinite loop when parsing maliciously crafted torrents * fix invalid read in parse_int in bdecoder * fix issue with very long tracker- and web seed URLs diff --git a/src/ConvertUTF.cpp b/src/ConvertUTF.cpp index 60ba17df1..49bcc3dc3 100644 --- a/src/ConvertUTF.cpp +++ b/src/ConvertUTF.cpp @@ -300,7 +300,7 @@ Boolean isLegalUTF8(const UTF8 *source, int length) { /* Everything else falls through when "true"... */ case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; - case 2: if ((a = (*--srcptr)) > 0xBF) return false; + case 2: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; switch (*source) { /* no fall-through in this inner switch */ diff --git a/src/torrent_info.cpp b/src/torrent_info.cpp index a7124226a..b3f521057 100644 --- a/src/torrent_info.cpp +++ b/src/torrent_info.cpp @@ -255,8 +255,7 @@ namespace libtorrent continue; } - if (code_point < 0 - || !valid_path_character(code_point)) + if (code_point < 0 || !valid_path_character(code_point)) { // invalid utf8 sequence, replace with "_" path += '_'; @@ -265,9 +264,14 @@ namespace libtorrent continue; } + TORRENT_ASSERT(isLegalUTF8(reinterpret_cast(element + i), seq_len)); + // validation passed, add it to the output string for (int k = i; k < i + seq_len; ++k) + { + TORRENT_ASSERT(element[k] != 0); path.push_back(element[k]); + } if (code_point == '.') ++num_dots; @@ -618,7 +622,7 @@ namespace libtorrent { // as long as this file already exists // increase the counter - boost::uint32_t h = m_files.file_path_hash(i, empty_str); + boost::uint32_t const h = m_files.file_path_hash(i, empty_str); if (!files.insert(h).second) { // This filename appears to already exist! diff --git a/test/test_torrent_info.cpp b/test/test_torrent_info.cpp index 5cb081cf3..2311750b2 100644 --- a/test/test_torrent_info.cpp +++ b/test/test_torrent_info.cpp @@ -326,6 +326,11 @@ TORRENT_TEST(sanitize_path_trailing_spaces) TORRENT_TEST(sanitize_path) { std::string path; + + sanitize_append_path_element(path, "\0\0\xed\0\x80", 5); + TEST_EQUAL(path, "_"); + + path.clear(); sanitize_append_path_element(path, "/a/", 3); sanitize_append_path_element(path, "b", 1); sanitize_append_path_element(path, "c", 1); @@ -506,6 +511,17 @@ TORRENT_TEST(sanitize_path) TEST_EQUAL(path, "foobar"); } +TORRENT_TEST(sanitize_path_zeroes) +{ + std::string path; + sanitize_append_path_element(path, "\0foo", 4); + TEST_EQUAL(path, "foo"); + + path.clear(); + sanitize_append_path_element(path, "\0\0\0\0", 4); + TEST_EQUAL(path, ""); +} + TORRENT_TEST(verify_encoding) { // verify_encoding @@ -585,6 +601,12 @@ TORRENT_TEST(verify_encoding) TEST_CHECK(!verify_encoding(test)); fprintf(stdout, "%s\n", test.c_str()); TEST_CHECK(test == "filename____"); + + // missing byte header + test = "filename\xed\0\x80"; + TEST_CHECK(!verify_encoding(test)); + fprintf(stdout, "%s\n", test.c_str()); + TEST_CHECK(test == "filename_"); } TORRENT_TEST(parse_torrents)