added high performance bdecoder code

2008-04-11 03:41:09 +00:00 · 2008-04-11 03:41:09 +00:00 · b300c7f835
parent 093d912e9a
commit b300c7f835
8 changed files with 476 additions and 1 deletions
--- a/1
+++ b/1
@ -246,6 +246,7 @@ SOURCES =
 	connection_queue
 	disk_buffer_holder
 	entry
+	lazy_bdecode
 	escape_string
 	gzip
 	http_connection
--- a/include/Makefile.am
+++ b/include/Makefile.am
@ -33,6 +33,7 @@ libtorrent/io.hpp \
 libtorrent/ip_filter.hpp \
 libtorrent/chained_buffer.hpp \
 libtorrent/lsd.hpp \
+libtorrent/lazy_entry.hpp \
 libtorrent/peer.hpp \
 libtorrent/peer_connection.hpp \
 libtorrent/bt_peer_connection.hpp \
--- a/include/libtorrent/lazy_entry.hpp
+++ b/include/libtorrent/lazy_entry.hpp
@ -0,0 +1,156 @@
+/*
+
+Copyright (c) 2003, Arvid Norberg
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the distribution.
+    * Neither the name of the author nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#ifndef TORRENT_LAZY_ENTRY_HPP_INCLUDED
+#define TORRENT_LAZY_ENTRY_HPP_INCLUDED
+
+#include <utility>
+#include <vector>
+#include "libtorrent/assert.hpp"
+#include <boost/cstdint.hpp>
+
+namespace libtorrent
+{
+	struct lazy_entry;
+
+	char* parse_int(char* start, char* end, char delimiter, boost::int64_t& val);
+	// return 0 = success
+	int lazy_bdecode(char* start, char* end, lazy_entry& ret, int depth_limit = 1000);
+
+	struct lazy_entry
+	{
+		enum entry_type_t
+		{
+			none_t, dict_t, list_t, string_t, int_t
+		};
+
+		lazy_entry() : m_type(none_t) { m_data.start = 0; }
+
+		entry_type_t type() const { return m_type; }
+
+		// start is a null terminated string (decimal number)
+		void construct_int(char* start)
+		{
+			TORRENT_ASSERT(m_type == none_t);
+			m_type = int_t;
+			m_data.start = start;
+		}
+
+		boost::int64_t int_value() const;
+
+		// string functions
+		// ================
+
+		// start is a null terminated string
+		void construct_string(char* start)
+		{
+			TORRENT_ASSERT(m_type == none_t);
+			m_type = string_t;
+			m_data.start = start;
+		}
+
+		char const* string_value() const
+		{
+			TORRENT_ASSERT(m_type == string_t);
+			return m_data.start;
+		}
+
+		// dictionary functions
+		// ====================
+
+		void construct_dict()
+		{
+			TORRENT_ASSERT(m_type == none_t);
+			m_type = dict_t;
+			m_size = 0;
+			m_capacity = 0;
+		}
+
+		lazy_entry* dict_append(char* name);
+		lazy_entry* dict_find(char const* name);
+		lazy_entry const* dict_find(char const* name) const
+		{ return const_cast<lazy_entry*>(this)->dict_find(name); }
+
+		int dict_size() const
+		{
+			TORRENT_ASSERT(m_type == dict_t);
+			return m_size;
+		}
+
+		// list functions
+		// ==============
+
+		void construct_list()
+		{
+			TORRENT_ASSERT(m_type == none_t);
+			m_type = list_t;
+			m_size = 0;
+			m_capacity = 0;
+		}
+
+		lazy_entry* list_append();
+		lazy_entry* list_at(int i)
+		{
+			TORRENT_ASSERT(m_type == list_t);
+			TORRENT_ASSERT(i < m_size);
+			return &m_data.list[i];
+		}
+
+		int list_size() const
+		{
+			TORRENT_ASSERT(m_type == list_t);
+			return m_size;
+		}
+
+		void clear();
+
+		~lazy_entry()
+		{ clear(); }
+
+	private:
+
+		entry_type_t m_type;
+		union data_t
+		{
+			std::pair<char*, lazy_entry>* dict;
+			lazy_entry* list;
+			char* start;
+		} m_data;
+		int m_size; // if list or dictionary, the number of items
+		int m_capacity; // if list or dictionary, allocated number of items
+	};
+
+};
+
+
+#endif
+
--- a/src/Makefile.am
+++ b/src/Makefile.am
@ -13,7 +13,7 @@ kademlia/traversal_algorithm.cpp
 endif

 libtorrent_la_SOURCES = entry.cpp escape_string.cpp \
-assert.cpp enum_net.cpp broadcast_socket.cpp \
+lazy_bdecode.cpp assert.cpp enum_net.cpp broadcast_socket.cpp \
 peer_connection.cpp bt_peer_connection.cpp web_peer_connection.cpp \
 natpmp.cpp piece_picker.cpp policy.cpp session.cpp session_impl.cpp sha1.cpp \
 stat.cpp storage.cpp mapped_storage.cpp torrent.cpp torrent_handle.cpp pe_crypto.cpp \
--- a/src/lazy_bdecode.cpp
+++ b/src/lazy_bdecode.cpp
@ -0,0 +1,243 @@
+/*
+
+Copyright (c) 2008, Arvid Norberg
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the distribution.
+    * Neither the name of the author nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#include "libtorrent/lazy_entry.hpp"
+
+namespace libtorrent
+{
+	int fail_bdecode() { return -1; }
+
+	// fills in 'val' with what the string between start and the
+	// first occurance of the delimiter is interpreted as an int.
+	// return the pointer to the delimiter, or 0 if there is a
+	// parse error. val should be initialized to zero
+	char* parse_int(char* start, char* end, char delimiter, boost::int64_t& val)
+	{
+		while (start < end && *start != delimiter)
+		{
+			if (!std::isdigit(*start)) { fail_bdecode(); return 0; }
+			val *= 10;
+			val += *start - '0';
+			++start;
+		}
+		return start;
+	}
+
+	char* find_char(char* start, char* end, char delimiter)
+	{
+		while (start < end && *start != delimiter) ++start;
+		return start;
+	}
+
+	// return 0 = success
+	int lazy_bdecode(char* start, char* end, lazy_entry& ret, int depth_limit)
+	{
+		ret.clear();
+		if (start == end) return 0;
+
+		std::vector<lazy_entry*> stack;
+
+		stack.push_back(&ret);
+		while (start < end)
+		{
+			if (stack.empty()) break; // done!
+
+			lazy_entry* top = stack.back();
+
+			if (stack.size() > depth_limit) return fail_bdecode();
+			if (start == end) return fail_bdecode();
+			char t = *start;
+			*start = 0; // null terminate any previous string
+			++start;
+			if (start == end && t != 'e') return fail_bdecode();
+
+			switch (top->type())
+			{
+				case lazy_entry::dict_t:
+				{
+					if (t == 'e')
+					{
+						stack.pop_back();
+						continue;
+					}
+					boost::int64_t len = t - '0';
+					start = parse_int(start, end, ':', len);
+					if (start == 0 || start + len + 3 > end || *start != ':') return fail_bdecode();
+					++start;
+					lazy_entry* ent = top->dict_append(start);
+					start += len;
+					stack.push_back(ent);
+					t = *start;
+					*start = 0; // null terminate any previous string
+					++start;
+					break;
+				}
+				case lazy_entry::list_t:
+				{
+					if (t == 'e')
+					{
+						stack.pop_back();
+						continue;
+					}
+					lazy_entry* ent = top->list_append();
+					stack.push_back(ent);
+					break;
+				}
+				default: break;
+			}
+
+			top = stack.back();
+			switch (t)
+			{
+				case 'd':
+					top->construct_dict();
+					continue;
+				case 'l':
+					top->construct_list();
+					continue;
+				case 'i':
+					top->construct_int(start);
+					start = find_char(start, end, 'e');
+					if (start == end) return fail_bdecode();
+					TORRENT_ASSERT(*start == 'e');
+					*start++ = 0;
+					stack.pop_back();
+					continue;
+				default:
+				{
+					if (!std::isdigit(t)) return fail_bdecode();
+
+					boost::int64_t len = t - '0';
+					start = parse_int(start, end, ':', len);
+					if (start == 0 || start + len + 1 > end || *start != ':') return fail_bdecode();
+					++start;
+					top->construct_string(start);
+					stack.pop_back();
+					start += len;
+					continue;
+				}
+			}
+			return 0;
+		}
+		return 0;
+	}
+
+	boost::int64_t lazy_entry::int_value() const
+	{
+		TORRENT_ASSERT(m_type == int_t);
+		boost::int64_t val = 0;
+		bool negative = false;
+		if (*m_data.start == '-') negative = true;
+		parse_int(negative?m_data.start+1:m_data.start, m_data.start + 100, 0, val);
+		if (negative) val = -val;
+		return val;
+	}
+
+	lazy_entry* lazy_entry::dict_append(char* name)
+	{
+		TORRENT_ASSERT(m_type == dict_t);
+		TORRENT_ASSERT(m_size <= m_capacity);
+		if (m_capacity == 0)
+		{
+			int capacity = 10;
+			m_data.dict = new (std::nothrow) std::pair<char*, lazy_entry>[capacity];
+			if (m_data.dict == 0) return 0;
+			m_capacity = capacity;
+		}
+		else if (m_size == m_capacity)
+		{
+			int capacity = m_capacity * 2;
+			std::pair<char*, lazy_entry>* tmp = new (std::nothrow) std::pair<char*, lazy_entry>[capacity];
+			if (tmp == 0) return 0;
+			std::memcpy(tmp, m_data.dict, sizeof(std::pair<char*, lazy_entry>) * m_size);
+			delete[] m_data.dict;
+			m_data.dict = tmp;
+			m_capacity = capacity;
+		}
+
+		TORRENT_ASSERT(m_size < m_capacity);
+		std::pair<char*, lazy_entry>& ret = m_data.dict[m_size++];
+		ret.first = name;
+		return &ret.second;
+	}
+
+	lazy_entry* lazy_entry::dict_find(char const* name)
+	{
+		TORRENT_ASSERT(m_type == dict_t);
+		for (int i = 0; i < m_size; ++i)
+		{
+			if (strcmp(name, m_data.dict[i].first) == 0)
+				return &m_data.dict[i].second;
+		}
+		return 0;
+	}
+
+	lazy_entry* lazy_entry::list_append()
+	{
+		TORRENT_ASSERT(m_type == list_t);
+		TORRENT_ASSERT(m_size <= m_capacity);
+		if (m_capacity == 0)
+		{
+			int capacity = 10;
+			m_data.list = new (std::nothrow) lazy_entry[capacity];
+			if (m_data.list == 0) return 0;
+			m_capacity = capacity;
+		}
+		else if (m_size == m_capacity)
+		{
+			int capacity = m_capacity * 2;
+			lazy_entry* tmp = new (std::nothrow) lazy_entry[capacity];
+			if (tmp == 0) return 0;
+			std::memcpy(tmp, m_data.list, sizeof(lazy_entry) * m_size);
+			delete[] m_data.list;
+			m_data.list = tmp;
+			m_capacity = capacity;
+		}
+
+		TORRENT_ASSERT(m_size < m_capacity);
+		return m_data.list + (m_size++);
+	}
+
+	void lazy_entry::clear()
+	{
+		switch (m_type)
+		{
+			case list_t: delete[] m_data.list; break;
+			case dict_t: delete[] m_data.dict; break;
+			default: break;
+		}
+		m_size = 0;
+		m_capacity = 0;
+		m_type = none_t;
+	}
+};
+
--- a/test/Jamfile
+++ b/test/Jamfile
@ -37,6 +37,7 @@ test-suite libtorrent :
 	[ run test_fast_extension.cpp ]
 	[ run test_pe_crypto.cpp ]
 	[ run test_bencoding.cpp ]
+	[ run test_bdecode_performance.cpp ]
 	[ run test_primitives.cpp ]
 	[ run test_ip_filter.cpp ]
 	[ run test_hasher.cpp ]
--- a/test/test_bdecode_performance.cpp
+++ b/test/test_bdecode_performance.cpp
@ -0,0 +1,27 @@
+#include "libtorrent/lazy_entry.hpp"
+#include <boost/lexical_cast.hpp>
+#include <iostream>
+
+#include "test.hpp"
+#include "libtorrent/time.hpp"
+
+using namespace libtorrent;
+
+int test_main()
+{
+	using namespace libtorrent;
+
+	ptime start(time_now());
+
+	for (int i = 0; i < 1000000; ++i)
+	{
+		char b[] = "d1:ai12453e1:b3:aaa1:c3:bbbe";
+		lazy_entry e;
+		int ret = lazy_bdecode(b, b + sizeof(b)-1, e);
+	}
+	ptime stop(time_now());
+
+	std::cout << "done in " << total_milliseconds(stop - start) / 1000. << " seconds per million message" << std::endl;
+	return 0;
+}
+
--- a/test/test_bencoding.cpp
+++ b/test/test_bencoding.cpp
@ -1,4 +1,5 @@
 #include "libtorrent/bencode.hpp"
+#include "libtorrent/lazy_entry.hpp"
 #include <boost/lexical_cast.hpp>

 #include "test.hpp"
@ -68,7 +69,52 @@ int test_main()
 		TEST_CHECK(encode(e) == "d3:cow3:moo4:spam4:eggse");
 		TEST_CHECK(decode(encode(e)) == e);
 	}
+
+	{
+		char b[] = "i12453e";
+		lazy_entry e;
+		int ret = lazy_bdecode(b, b + sizeof(b)-1, e);
+		TORRENT_ASSERT(ret == 0);
+		TORRENT_ASSERT(e.type() == lazy_entry::int_t);
+		TORRENT_ASSERT(e.int_value() == 12453);
+	}
 	
+	{
+		char b[] = "26:abcdefghijklmnopqrstuvwxyz";
+		lazy_entry e;
+		int ret = lazy_bdecode(b, b + sizeof(b)-1, e);
+		TORRENT_ASSERT(ret == 0);
+		TORRENT_ASSERT(e.type() == lazy_entry::string_t);
+		TORRENT_ASSERT(e.string_value() == std::string("abcdefghijklmnopqrstuvwxyz"));
+	}
+
+	{
+		char b[] = "li12453e3:aaae";
+		lazy_entry e;
+		int ret = lazy_bdecode(b, b + sizeof(b)-1, e);
+		TORRENT_ASSERT(ret == 0);
+		TORRENT_ASSERT(e.type() == lazy_entry::list_t);
+		TORRENT_ASSERT(e.list_size() == 2);
+		TORRENT_ASSERT(e.list_at(0)->type() == lazy_entry::int_t);
+		TORRENT_ASSERT(e.list_at(1)->type() == lazy_entry::string_t);
+		TORRENT_ASSERT(e.list_at(0)->int_value() == 12453);
+		TORRENT_ASSERT(e.list_at(1)->string_value() == std::string("aaa"));
+	}
+
+	{
+		char b[] = "d1:ai12453e1:b3:aaa1:c3:bbbe";
+		lazy_entry e;
+		int ret = lazy_bdecode(b, b + sizeof(b)-1, e);
+		TORRENT_ASSERT(ret == 0);
+		TORRENT_ASSERT(e.type() == lazy_entry::dict_t);
+		TORRENT_ASSERT(e.dict_size() == 3);
+		TORRENT_ASSERT(e.dict_find("a")->type() == lazy_entry::int_t);
+		TORRENT_ASSERT(e.dict_find("a")->int_value() == 12453);
+		TORRENT_ASSERT(e.dict_find("b")->type() == lazy_entry::string_t);
+		TORRENT_ASSERT(e.dict_find("b")->string_value() == std::string("aaa"));
+		TORRENT_ASSERT(e.dict_find("c")->type() == lazy_entry::string_t);
+		TORRENT_ASSERT(e.dict_find("c")->string_value() == std::string("bbb"));
+	}
 	return 0;
 }