reject put messages with incorrect bencoding (#2118)

add function to check for soft bdecode errors. reject put messages with incorrect bencoding
2017-07-02 15:30:32 -07:00 · 2017-07-02 15:30:32 -07:00 · ec2fb91aa5
parent fab8af6be2
commit ec2fb91aa5
4 changed files with 219 additions and 2 deletions
--- a/include/libtorrent/bdecode.hpp
+++ b/include/libtorrent/bdecode.hpp
@ -360,6 +360,10 @@ struct TORRENT_EXPORT bdecode_node
 	// decode().
 	void switch_underlying_buffer(char const* buf);

+	// returns true if there is a non-fatal error in the bencoding of this node
+	// or its children
+	bool has_soft_error(span<char> error) const;
+
 private:
 	bdecode_node(detail::bdecode_token const* tokens, char const* buf
 		, int len, int idx);
--- a/src/bdecode.cpp
+++ b/src/bdecode.cpp
@ -289,6 +289,95 @@ namespace {
 		m_buffer = buf;
 	}

+	bool bdecode_node::has_soft_error(span<char> error) const
+	{
+		if (type() == none_t) return false;
+
+		bdecode_token const* tokens = m_root_tokens;
+		int token = m_token_idx;
+
+		// we don't know what the original depth_limit was
+		// so this has to go on the heap
+		std::vector<int> stack;
+		// make the initial allocation the default depth_limit
+		stack.reserve(100);
+
+		do
+		{
+			switch (tokens[token].type)
+			{
+			case bdecode_token::integer:
+				if (m_buffer[tokens[token].offset + 1] == '0'
+					&& m_buffer[tokens[token].offset + 2] != 'e')
+				{
+					std::snprintf(error.data(), error.size(), "leading zero in integer");
+					return true;
+				}
+				break;
+			case bdecode_token::string:
+				if (m_buffer[tokens[token].offset] == '0'
+					&& m_buffer[tokens[token].offset + 1] != ':')
+				{
+					std::snprintf(error.data(), error.size(), "leading zero in string length");
+					return true;
+				}
+				break;
+			case bdecode_token::dict:
+			case bdecode_token::list:
+				stack.push_back(token);
+				break;
+			case bdecode_token::end:
+				auto const parent = stack.back();
+				stack.pop_back();
+				if (tokens[parent].type == bdecode_token::dict
+					&& token != parent + 1)
+				{
+					// this is the end of a non-empty dict
+					// check the sort order of the keys
+					int k1 = parent + 1;
+					for (;;)
+					{
+						// skip to the first key's value
+						int const v1 = k1 + tokens[k1].next_item;
+						// then to the next key
+						int const k2 = v1 + tokens[v1].next_item;
+
+						// check if k1 was the last key in the dict
+						if (k2 == token)
+							break;
+
+						int const v2 = k2 + tokens[k2].next_item;
+
+						int const k1_start = tokens[k1].offset + tokens[k1].start_offset();
+						int const k1_len = tokens[v1].offset - k1_start;
+						int const k2_start = tokens[k2].offset + tokens[k2].start_offset();
+						int const k2_len = tokens[v2].offset - k2_start;
+
+						int const min_len = std::min(k1_len, k2_len);
+
+						int cmp = std::memcmp(m_buffer + k1_start, m_buffer + k2_start, std::size_t(min_len));
+						if (cmp > 0 || (cmp == 0 && k1_len > k2_len))
+						{
+							std::snprintf(error.data(), error.size(), "unsorted dictionary key");
+							return true;
+						}
+						else if (cmp == 0 && k1_len == k2_len)
+						{
+							std::snprintf(error.data(), error.size(), "duplicate dictionary key");
+							return true;
+						}
+
+						k1 = k2;
+					}
+				}
+				break;
+			}
+
+			++token;
+		} while (!stack.empty());
+		return false;
+	}
+
 	bdecode_node::type_t bdecode_node::type() const
 	{
 		if (m_token_idx == -1) return none_t;
@ -811,7 +900,9 @@ namespace {

 					// skip ':'
 					++start;
-					if (start >= end) TORRENT_FAIL_BDECODE(bdecode_errors::unexpected_eof);
+					// no need to range check start here
+					// the check above ensures that the buffer is long enough to hold
+					// the string's length which guarantees that start <= end

 					// the bdecode_token only has 8 bits to keep the header size
 					// in. If it overflows, fail!
--- a/src/kademlia/node.cpp
+++ b/src/kademlia/node.cpp
@ -966,8 +966,12 @@ void node::incoming_request(msg const& m, entry& e)
 		};

 		// attempt to parse the message
+		// also reject the message if it has any non-fatal encoding errors
+		// because put messages contain a signed value they must have correct bencoding
+		// otherwise the value will not round-trip without breaking the signature
 		bdecode_node msg_keys[7];
-		if (!verify_message(arg_ent, msg_desc, msg_keys, error_string))
+		if (!verify_message(arg_ent, msg_desc, msg_keys, error_string)
+			|| arg_ent.has_soft_error(error_string))
 		{
 			m_counters.inc_stats_counter(counters::dht_invalid_put);
 			incoming_error(e, error_string);
--- a/test/test_bdecode.cpp
+++ b/test/test_bdecode.cpp
@ -162,6 +162,9 @@ TORRENT_TEST(dict)
 	TEST_EQUAL(e.dict_find("c").string_value(), std::string("bbb"));
 	TEST_EQUAL(e.dict_find("c").string_length(), 3);
 	TEST_EQUAL(e.dict_find_string_value("X"), "0123456789");
+	char error_string[200];
+	TEST_CHECK(e.has_soft_error(error_string));
+	TEST_EQUAL(std::string(error_string), std::string("unsorted dictionary key"));
 }

 // test dictionary with a key without a value
@ -206,6 +209,69 @@ TORRENT_TEST(dict_null_key)
 	TEST_EQUAL(d.int_value(), 1);
 }

+// soft error reported for dictionary with unordered keys
+TORRENT_TEST(dict_unordered_keys)
+{
+	char error_string[200];
+	{
+		char b[] = "d2:abi1e2:aai2ee";
+		bdecode_node e;
+		error_code ec;
+		int ret = bdecode(b, b + sizeof(b) - 1, e, ec);
+		TEST_EQUAL(ret, 0);
+		TEST_CHECK(e.has_soft_error(error_string));
+		TEST_EQUAL(std::string(error_string), std::string("unsorted dictionary key"));
+	}
+	{
+		char b[] = "d2:bai1e2:aai2ee";
+		bdecode_node e;
+		error_code ec;
+		int ret = bdecode(b, b + sizeof(b) - 1, e, ec);
+		TEST_EQUAL(ret, 0);
+		TEST_CHECK(e.has_soft_error(error_string));
+		TEST_EQUAL(std::string(error_string), std::string("unsorted dictionary key"));
+	}
+	{
+		char b[] = "d2:aai1e1:ai2ee";
+		bdecode_node e;
+		error_code ec;
+		int ret = bdecode(b, b + sizeof(b) - 1, e, ec);
+		TEST_EQUAL(ret, 0);
+		TEST_CHECK(e.has_soft_error(error_string));
+		TEST_EQUAL(std::string(error_string), std::string("unsorted dictionary key"));
+	}
+	{
+		char b[] = "d1:ai1e2:aai2ee";
+		bdecode_node e;
+		error_code ec;
+		int ret = bdecode(b, b + sizeof(b) - 1, e, ec);
+		TEST_EQUAL(ret, 0);
+		TEST_CHECK(!e.has_soft_error(error_string));
+		TEST_EQUAL(std::string(error_string), std::string("unsorted dictionary key"));
+	}
+	{
+		char b[] = "d2:aai1e1:bi2ee";
+		bdecode_node e;
+		error_code ec;
+		int ret = bdecode(b, b + sizeof(b) - 1, e, ec);
+		TEST_EQUAL(ret, 0);
+		TEST_CHECK(!e.has_soft_error(error_string));
+		TEST_EQUAL(std::string(error_string), std::string("unsorted dictionary key"));
+	}
+}
+
+TORRENT_TEST(dict_duplicate_key)
+{
+	char b[] = "d2:aai1e2:aai2ee";
+	bdecode_node e;
+	error_code ec;
+	int ret = bdecode(b, b + sizeof(b) - 1, e, ec);
+	TEST_EQUAL(ret, 0);
+	char error_string[200];
+	TEST_CHECK(e.has_soft_error(error_string));
+	TEST_EQUAL(std::string(error_string), std::string("duplicate dictionary key"));
+}
+
 // premature e
 TORRENT_TEST(premature_e)
 {
@ -275,6 +341,33 @@ TORRENT_TEST(overflow_length_prefix2)
 	std::printf("%s\n", print_entry(e).c_str());
 }

+TORRENT_TEST(leading_zero_length_prefix)
+{
+	{
+		char b[] = "06:foobar";
+		bdecode_node e;
+		error_code ec;
+		int pos;
+		int ret = bdecode(b, b + sizeof(b) - 1, e, ec, &pos);
+		TEST_EQUAL(ret, 0);
+		char error_string[200];
+		TEST_CHECK(e.has_soft_error(error_string));
+		TEST_EQUAL(std::string(error_string), std::string("leading zero in string length"));
+		std::printf("%s\n", print_entry(e).c_str());
+	}
+	{
+		char b[] = "0:";
+		bdecode_node e;
+		error_code ec;
+		int pos;
+		int ret = bdecode(b, b + sizeof(b) - 1, e, ec, &pos);
+		TEST_EQUAL(ret, 0);
+		char error_string[200];
+		TEST_CHECK(!e.has_soft_error(error_string));
+		std::printf("%s\n", print_entry(e).c_str());
+	}
+}
+
 // test integer without any digits
 TORRENT_TEST(nodigit_int)
 {
@ -359,6 +452,31 @@ TORRENT_TEST(int_truncated)
 	std::printf("%s\n", print_entry(e).c_str());
 }

+TORRENT_TEST(int_leading_zero)
+{
+	{
+		char b[] = "i01e";
+		bdecode_node e;
+		error_code ec;
+		int ret = bdecode(b, b + sizeof(b) - 1, e, ec);
+		TEST_EQUAL(ret, 0);
+		char error_string[200];
+		TEST_CHECK(e.has_soft_error(error_string));
+		TEST_EQUAL(std::string(error_string), std::string("leading zero in integer"));
+		std::printf("%s\n", print_entry(e).c_str());
+	}
+	{
+		char b[] = "i0e";
+		bdecode_node e;
+		error_code ec;
+		int ret = bdecode(b, b + sizeof(b) - 1, e, ec);
+		TEST_EQUAL(ret, 0);
+		char error_string[200];
+		TEST_CHECK(!e.has_soft_error(error_string));
+		std::printf("%s\n", print_entry(e).c_str());
+	}
+}
+
 // bdecode_error
 TORRENT_TEST(bdecode_error)
 {