diff --git a/Jamfile b/Jamfile index b2031a51f..73dcb1bbb 100755 --- a/Jamfile +++ b/Jamfile @@ -246,6 +246,7 @@ SOURCES = connection_queue disk_buffer_holder entry + lazy_bdecode escape_string gzip http_connection diff --git a/include/Makefile.am b/include/Makefile.am index 2a789a2e3..5e0f7bd78 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -33,6 +33,7 @@ libtorrent/io.hpp \ libtorrent/ip_filter.hpp \ libtorrent/chained_buffer.hpp \ libtorrent/lsd.hpp \ +libtorrent/lazy_entry.hpp \ libtorrent/peer.hpp \ libtorrent/peer_connection.hpp \ libtorrent/bt_peer_connection.hpp \ diff --git a/include/libtorrent/lazy_entry.hpp b/include/libtorrent/lazy_entry.hpp new file mode 100644 index 000000000..b40e85b44 --- /dev/null +++ b/include/libtorrent/lazy_entry.hpp @@ -0,0 +1,156 @@ +/* + +Copyright (c) 2003, Arvid Norberg +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the distribution. + * Neither the name of the author nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef TORRENT_LAZY_ENTRY_HPP_INCLUDED +#define TORRENT_LAZY_ENTRY_HPP_INCLUDED + +#include +#include +#include "libtorrent/assert.hpp" +#include + +namespace libtorrent +{ + struct lazy_entry; + + char* parse_int(char* start, char* end, char delimiter, boost::int64_t& val); + // return 0 = success + int lazy_bdecode(char* start, char* end, lazy_entry& ret, int depth_limit = 1000); + + struct lazy_entry + { + enum entry_type_t + { + none_t, dict_t, list_t, string_t, int_t + }; + + lazy_entry() : m_type(none_t) { m_data.start = 0; } + + entry_type_t type() const { return m_type; } + + // start is a null terminated string (decimal number) + void construct_int(char* start) + { + TORRENT_ASSERT(m_type == none_t); + m_type = int_t; + m_data.start = start; + } + + boost::int64_t int_value() const; + + // string functions + // ================ + + // start is a null terminated string + void construct_string(char* start) + { + TORRENT_ASSERT(m_type == none_t); + m_type = string_t; + m_data.start = start; + } + + char const* string_value() const + { + TORRENT_ASSERT(m_type == string_t); + return m_data.start; + } + + // dictionary functions + // ==================== + + void construct_dict() + { + TORRENT_ASSERT(m_type == none_t); + m_type = dict_t; + m_size = 0; + m_capacity = 0; + } + + lazy_entry* dict_append(char* name); + lazy_entry* dict_find(char const* name); + lazy_entry const* dict_find(char const* name) const + { return const_cast(this)->dict_find(name); } + + int dict_size() const + { + TORRENT_ASSERT(m_type == dict_t); + return m_size; + } + + // list functions + // ============== + + void construct_list() + { + TORRENT_ASSERT(m_type == none_t); + m_type = list_t; + m_size = 0; + m_capacity = 0; + } + + lazy_entry* list_append(); + lazy_entry* list_at(int i) + { + TORRENT_ASSERT(m_type == list_t); + TORRENT_ASSERT(i < m_size); + return &m_data.list[i]; + } + + int list_size() const + { + TORRENT_ASSERT(m_type == list_t); + return m_size; + } + + void clear(); + + ~lazy_entry() + { clear(); } + + private: + + entry_type_t m_type; + union data_t + { + std::pair* dict; + lazy_entry* list; + char* start; + } m_data; + int m_size; // if list or dictionary, the number of items + int m_capacity; // if list or dictionary, allocated number of items + }; + +}; + + +#endif + diff --git a/src/Makefile.am b/src/Makefile.am index 9efbc64c1..e1cf35b28 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -13,7 +13,7 @@ kademlia/traversal_algorithm.cpp endif libtorrent_la_SOURCES = entry.cpp escape_string.cpp \ -assert.cpp enum_net.cpp broadcast_socket.cpp \ +lazy_bdecode.cpp assert.cpp enum_net.cpp broadcast_socket.cpp \ peer_connection.cpp bt_peer_connection.cpp web_peer_connection.cpp \ natpmp.cpp piece_picker.cpp policy.cpp session.cpp session_impl.cpp sha1.cpp \ stat.cpp storage.cpp mapped_storage.cpp torrent.cpp torrent_handle.cpp pe_crypto.cpp \ diff --git a/src/lazy_bdecode.cpp b/src/lazy_bdecode.cpp new file mode 100644 index 000000000..8681f452e --- /dev/null +++ b/src/lazy_bdecode.cpp @@ -0,0 +1,243 @@ +/* + +Copyright (c) 2008, Arvid Norberg +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the distribution. + * Neither the name of the author nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "libtorrent/lazy_entry.hpp" + +namespace libtorrent +{ + int fail_bdecode() { return -1; } + + // fills in 'val' with what the string between start and the + // first occurance of the delimiter is interpreted as an int. + // return the pointer to the delimiter, or 0 if there is a + // parse error. val should be initialized to zero + char* parse_int(char* start, char* end, char delimiter, boost::int64_t& val) + { + while (start < end && *start != delimiter) + { + if (!std::isdigit(*start)) { fail_bdecode(); return 0; } + val *= 10; + val += *start - '0'; + ++start; + } + return start; + } + + char* find_char(char* start, char* end, char delimiter) + { + while (start < end && *start != delimiter) ++start; + return start; + } + + // return 0 = success + int lazy_bdecode(char* start, char* end, lazy_entry& ret, int depth_limit) + { + ret.clear(); + if (start == end) return 0; + + std::vector stack; + + stack.push_back(&ret); + while (start < end) + { + if (stack.empty()) break; // done! + + lazy_entry* top = stack.back(); + + if (stack.size() > depth_limit) return fail_bdecode(); + if (start == end) return fail_bdecode(); + char t = *start; + *start = 0; // null terminate any previous string + ++start; + if (start == end && t != 'e') return fail_bdecode(); + + switch (top->type()) + { + case lazy_entry::dict_t: + { + if (t == 'e') + { + stack.pop_back(); + continue; + } + boost::int64_t len = t - '0'; + start = parse_int(start, end, ':', len); + if (start == 0 || start + len + 3 > end || *start != ':') return fail_bdecode(); + ++start; + lazy_entry* ent = top->dict_append(start); + start += len; + stack.push_back(ent); + t = *start; + *start = 0; // null terminate any previous string + ++start; + break; + } + case lazy_entry::list_t: + { + if (t == 'e') + { + stack.pop_back(); + continue; + } + lazy_entry* ent = top->list_append(); + stack.push_back(ent); + break; + } + default: break; + } + + top = stack.back(); + switch (t) + { + case 'd': + top->construct_dict(); + continue; + case 'l': + top->construct_list(); + continue; + case 'i': + top->construct_int(start); + start = find_char(start, end, 'e'); + if (start == end) return fail_bdecode(); + TORRENT_ASSERT(*start == 'e'); + *start++ = 0; + stack.pop_back(); + continue; + default: + { + if (!std::isdigit(t)) return fail_bdecode(); + + boost::int64_t len = t - '0'; + start = parse_int(start, end, ':', len); + if (start == 0 || start + len + 1 > end || *start != ':') return fail_bdecode(); + ++start; + top->construct_string(start); + stack.pop_back(); + start += len; + continue; + } + } + return 0; + } + return 0; + } + + boost::int64_t lazy_entry::int_value() const + { + TORRENT_ASSERT(m_type == int_t); + boost::int64_t val = 0; + bool negative = false; + if (*m_data.start == '-') negative = true; + parse_int(negative?m_data.start+1:m_data.start, m_data.start + 100, 0, val); + if (negative) val = -val; + return val; + } + + lazy_entry* lazy_entry::dict_append(char* name) + { + TORRENT_ASSERT(m_type == dict_t); + TORRENT_ASSERT(m_size <= m_capacity); + if (m_capacity == 0) + { + int capacity = 10; + m_data.dict = new (std::nothrow) std::pair[capacity]; + if (m_data.dict == 0) return 0; + m_capacity = capacity; + } + else if (m_size == m_capacity) + { + int capacity = m_capacity * 2; + std::pair* tmp = new (std::nothrow) std::pair[capacity]; + if (tmp == 0) return 0; + std::memcpy(tmp, m_data.dict, sizeof(std::pair) * m_size); + delete[] m_data.dict; + m_data.dict = tmp; + m_capacity = capacity; + } + + TORRENT_ASSERT(m_size < m_capacity); + std::pair& ret = m_data.dict[m_size++]; + ret.first = name; + return &ret.second; + } + + lazy_entry* lazy_entry::dict_find(char const* name) + { + TORRENT_ASSERT(m_type == dict_t); + for (int i = 0; i < m_size; ++i) + { + if (strcmp(name, m_data.dict[i].first) == 0) + return &m_data.dict[i].second; + } + return 0; + } + + lazy_entry* lazy_entry::list_append() + { + TORRENT_ASSERT(m_type == list_t); + TORRENT_ASSERT(m_size <= m_capacity); + if (m_capacity == 0) + { + int capacity = 10; + m_data.list = new (std::nothrow) lazy_entry[capacity]; + if (m_data.list == 0) return 0; + m_capacity = capacity; + } + else if (m_size == m_capacity) + { + int capacity = m_capacity * 2; + lazy_entry* tmp = new (std::nothrow) lazy_entry[capacity]; + if (tmp == 0) return 0; + std::memcpy(tmp, m_data.list, sizeof(lazy_entry) * m_size); + delete[] m_data.list; + m_data.list = tmp; + m_capacity = capacity; + } + + TORRENT_ASSERT(m_size < m_capacity); + return m_data.list + (m_size++); + } + + void lazy_entry::clear() + { + switch (m_type) + { + case list_t: delete[] m_data.list; break; + case dict_t: delete[] m_data.dict; break; + default: break; + } + m_size = 0; + m_capacity = 0; + m_type = none_t; + } +}; + diff --git a/test/Jamfile b/test/Jamfile index 410c8037a..6cca533b8 100644 --- a/test/Jamfile +++ b/test/Jamfile @@ -37,6 +37,7 @@ test-suite libtorrent : [ run test_fast_extension.cpp ] [ run test_pe_crypto.cpp ] [ run test_bencoding.cpp ] + [ run test_bdecode_performance.cpp ] [ run test_primitives.cpp ] [ run test_ip_filter.cpp ] [ run test_hasher.cpp ] diff --git a/test/test_bdecode_performance.cpp b/test/test_bdecode_performance.cpp new file mode 100644 index 000000000..b27799a6a --- /dev/null +++ b/test/test_bdecode_performance.cpp @@ -0,0 +1,27 @@ +#include "libtorrent/lazy_entry.hpp" +#include +#include + +#include "test.hpp" +#include "libtorrent/time.hpp" + +using namespace libtorrent; + +int test_main() +{ + using namespace libtorrent; + + ptime start(time_now()); + + for (int i = 0; i < 1000000; ++i) + { + char b[] = "d1:ai12453e1:b3:aaa1:c3:bbbe"; + lazy_entry e; + int ret = lazy_bdecode(b, b + sizeof(b)-1, e); + } + ptime stop(time_now()); + + std::cout << "done in " << total_milliseconds(stop - start) / 1000. << " seconds per million message" << std::endl; + return 0; +} + diff --git a/test/test_bencoding.cpp b/test/test_bencoding.cpp index 4c232be9d..b006c45d9 100644 --- a/test/test_bencoding.cpp +++ b/test/test_bencoding.cpp @@ -1,4 +1,5 @@ #include "libtorrent/bencode.hpp" +#include "libtorrent/lazy_entry.hpp" #include #include "test.hpp" @@ -68,7 +69,52 @@ int test_main() TEST_CHECK(encode(e) == "d3:cow3:moo4:spam4:eggse"); TEST_CHECK(decode(encode(e)) == e); } + + { + char b[] = "i12453e"; + lazy_entry e; + int ret = lazy_bdecode(b, b + sizeof(b)-1, e); + TORRENT_ASSERT(ret == 0); + TORRENT_ASSERT(e.type() == lazy_entry::int_t); + TORRENT_ASSERT(e.int_value() == 12453); + } + { + char b[] = "26:abcdefghijklmnopqrstuvwxyz"; + lazy_entry e; + int ret = lazy_bdecode(b, b + sizeof(b)-1, e); + TORRENT_ASSERT(ret == 0); + TORRENT_ASSERT(e.type() == lazy_entry::string_t); + TORRENT_ASSERT(e.string_value() == std::string("abcdefghijklmnopqrstuvwxyz")); + } + + { + char b[] = "li12453e3:aaae"; + lazy_entry e; + int ret = lazy_bdecode(b, b + sizeof(b)-1, e); + TORRENT_ASSERT(ret == 0); + TORRENT_ASSERT(e.type() == lazy_entry::list_t); + TORRENT_ASSERT(e.list_size() == 2); + TORRENT_ASSERT(e.list_at(0)->type() == lazy_entry::int_t); + TORRENT_ASSERT(e.list_at(1)->type() == lazy_entry::string_t); + TORRENT_ASSERT(e.list_at(0)->int_value() == 12453); + TORRENT_ASSERT(e.list_at(1)->string_value() == std::string("aaa")); + } + + { + char b[] = "d1:ai12453e1:b3:aaa1:c3:bbbe"; + lazy_entry e; + int ret = lazy_bdecode(b, b + sizeof(b)-1, e); + TORRENT_ASSERT(ret == 0); + TORRENT_ASSERT(e.type() == lazy_entry::dict_t); + TORRENT_ASSERT(e.dict_size() == 3); + TORRENT_ASSERT(e.dict_find("a")->type() == lazy_entry::int_t); + TORRENT_ASSERT(e.dict_find("a")->int_value() == 12453); + TORRENT_ASSERT(e.dict_find("b")->type() == lazy_entry::string_t); + TORRENT_ASSERT(e.dict_find("b")->string_value() == std::string("aaa")); + TORRENT_ASSERT(e.dict_find("c")->type() == lazy_entry::string_t); + TORRENT_ASSERT(e.dict_find("c")->string_value() == std::string("bbb")); + } return 0; }