added high performance bdecoder code

This commit is contained in:
Arvid Norberg 2008-04-11 03:41:09 +00:00
parent 093d912e9a
commit b300c7f835
8 changed files with 476 additions and 1 deletions

View File

@ -246,6 +246,7 @@ SOURCES =
connection_queue
disk_buffer_holder
entry
lazy_bdecode
escape_string
gzip
http_connection

View File

@ -33,6 +33,7 @@ libtorrent/io.hpp \
libtorrent/ip_filter.hpp \
libtorrent/chained_buffer.hpp \
libtorrent/lsd.hpp \
libtorrent/lazy_entry.hpp \
libtorrent/peer.hpp \
libtorrent/peer_connection.hpp \
libtorrent/bt_peer_connection.hpp \

View File

@ -0,0 +1,156 @@
/*
Copyright (c) 2003, Arvid Norberg
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the distribution.
* Neither the name of the author nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef TORRENT_LAZY_ENTRY_HPP_INCLUDED
#define TORRENT_LAZY_ENTRY_HPP_INCLUDED
#include <utility>
#include <vector>
#include "libtorrent/assert.hpp"
#include <boost/cstdint.hpp>
namespace libtorrent
{
struct lazy_entry;
char* parse_int(char* start, char* end, char delimiter, boost::int64_t& val);
// return 0 = success
int lazy_bdecode(char* start, char* end, lazy_entry& ret, int depth_limit = 1000);
struct lazy_entry
{
enum entry_type_t
{
none_t, dict_t, list_t, string_t, int_t
};
lazy_entry() : m_type(none_t) { m_data.start = 0; }
entry_type_t type() const { return m_type; }
// start is a null terminated string (decimal number)
void construct_int(char* start)
{
TORRENT_ASSERT(m_type == none_t);
m_type = int_t;
m_data.start = start;
}
boost::int64_t int_value() const;
// string functions
// ================
// start is a null terminated string
void construct_string(char* start)
{
TORRENT_ASSERT(m_type == none_t);
m_type = string_t;
m_data.start = start;
}
char const* string_value() const
{
TORRENT_ASSERT(m_type == string_t);
return m_data.start;
}
// dictionary functions
// ====================
void construct_dict()
{
TORRENT_ASSERT(m_type == none_t);
m_type = dict_t;
m_size = 0;
m_capacity = 0;
}
lazy_entry* dict_append(char* name);
lazy_entry* dict_find(char const* name);
lazy_entry const* dict_find(char const* name) const
{ return const_cast<lazy_entry*>(this)->dict_find(name); }
int dict_size() const
{
TORRENT_ASSERT(m_type == dict_t);
return m_size;
}
// list functions
// ==============
void construct_list()
{
TORRENT_ASSERT(m_type == none_t);
m_type = list_t;
m_size = 0;
m_capacity = 0;
}
lazy_entry* list_append();
lazy_entry* list_at(int i)
{
TORRENT_ASSERT(m_type == list_t);
TORRENT_ASSERT(i < m_size);
return &m_data.list[i];
}
int list_size() const
{
TORRENT_ASSERT(m_type == list_t);
return m_size;
}
void clear();
~lazy_entry()
{ clear(); }
private:
entry_type_t m_type;
union data_t
{
std::pair<char*, lazy_entry>* dict;
lazy_entry* list;
char* start;
} m_data;
int m_size; // if list or dictionary, the number of items
int m_capacity; // if list or dictionary, allocated number of items
};
};
#endif

View File

@ -13,7 +13,7 @@ kademlia/traversal_algorithm.cpp
endif
libtorrent_la_SOURCES = entry.cpp escape_string.cpp \
assert.cpp enum_net.cpp broadcast_socket.cpp \
lazy_bdecode.cpp assert.cpp enum_net.cpp broadcast_socket.cpp \
peer_connection.cpp bt_peer_connection.cpp web_peer_connection.cpp \
natpmp.cpp piece_picker.cpp policy.cpp session.cpp session_impl.cpp sha1.cpp \
stat.cpp storage.cpp mapped_storage.cpp torrent.cpp torrent_handle.cpp pe_crypto.cpp \

243
src/lazy_bdecode.cpp Normal file
View File

@ -0,0 +1,243 @@
/*
Copyright (c) 2008, Arvid Norberg
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the distribution.
* Neither the name of the author nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include "libtorrent/lazy_entry.hpp"
namespace libtorrent
{
int fail_bdecode() { return -1; }
// fills in 'val' with what the string between start and the
// first occurance of the delimiter is interpreted as an int.
// return the pointer to the delimiter, or 0 if there is a
// parse error. val should be initialized to zero
char* parse_int(char* start, char* end, char delimiter, boost::int64_t& val)
{
while (start < end && *start != delimiter)
{
if (!std::isdigit(*start)) { fail_bdecode(); return 0; }
val *= 10;
val += *start - '0';
++start;
}
return start;
}
char* find_char(char* start, char* end, char delimiter)
{
while (start < end && *start != delimiter) ++start;
return start;
}
// return 0 = success
int lazy_bdecode(char* start, char* end, lazy_entry& ret, int depth_limit)
{
ret.clear();
if (start == end) return 0;
std::vector<lazy_entry*> stack;
stack.push_back(&ret);
while (start < end)
{
if (stack.empty()) break; // done!
lazy_entry* top = stack.back();
if (stack.size() > depth_limit) return fail_bdecode();
if (start == end) return fail_bdecode();
char t = *start;
*start = 0; // null terminate any previous string
++start;
if (start == end && t != 'e') return fail_bdecode();
switch (top->type())
{
case lazy_entry::dict_t:
{
if (t == 'e')
{
stack.pop_back();
continue;
}
boost::int64_t len = t - '0';
start = parse_int(start, end, ':', len);
if (start == 0 || start + len + 3 > end || *start != ':') return fail_bdecode();
++start;
lazy_entry* ent = top->dict_append(start);
start += len;
stack.push_back(ent);
t = *start;
*start = 0; // null terminate any previous string
++start;
break;
}
case lazy_entry::list_t:
{
if (t == 'e')
{
stack.pop_back();
continue;
}
lazy_entry* ent = top->list_append();
stack.push_back(ent);
break;
}
default: break;
}
top = stack.back();
switch (t)
{
case 'd':
top->construct_dict();
continue;
case 'l':
top->construct_list();
continue;
case 'i':
top->construct_int(start);
start = find_char(start, end, 'e');
if (start == end) return fail_bdecode();
TORRENT_ASSERT(*start == 'e');
*start++ = 0;
stack.pop_back();
continue;
default:
{
if (!std::isdigit(t)) return fail_bdecode();
boost::int64_t len = t - '0';
start = parse_int(start, end, ':', len);
if (start == 0 || start + len + 1 > end || *start != ':') return fail_bdecode();
++start;
top->construct_string(start);
stack.pop_back();
start += len;
continue;
}
}
return 0;
}
return 0;
}
boost::int64_t lazy_entry::int_value() const
{
TORRENT_ASSERT(m_type == int_t);
boost::int64_t val = 0;
bool negative = false;
if (*m_data.start == '-') negative = true;
parse_int(negative?m_data.start+1:m_data.start, m_data.start + 100, 0, val);
if (negative) val = -val;
return val;
}
lazy_entry* lazy_entry::dict_append(char* name)
{
TORRENT_ASSERT(m_type == dict_t);
TORRENT_ASSERT(m_size <= m_capacity);
if (m_capacity == 0)
{
int capacity = 10;
m_data.dict = new (std::nothrow) std::pair<char*, lazy_entry>[capacity];
if (m_data.dict == 0) return 0;
m_capacity = capacity;
}
else if (m_size == m_capacity)
{
int capacity = m_capacity * 2;
std::pair<char*, lazy_entry>* tmp = new (std::nothrow) std::pair<char*, lazy_entry>[capacity];
if (tmp == 0) return 0;
std::memcpy(tmp, m_data.dict, sizeof(std::pair<char*, lazy_entry>) * m_size);
delete[] m_data.dict;
m_data.dict = tmp;
m_capacity = capacity;
}
TORRENT_ASSERT(m_size < m_capacity);
std::pair<char*, lazy_entry>& ret = m_data.dict[m_size++];
ret.first = name;
return &ret.second;
}
lazy_entry* lazy_entry::dict_find(char const* name)
{
TORRENT_ASSERT(m_type == dict_t);
for (int i = 0; i < m_size; ++i)
{
if (strcmp(name, m_data.dict[i].first) == 0)
return &m_data.dict[i].second;
}
return 0;
}
lazy_entry* lazy_entry::list_append()
{
TORRENT_ASSERT(m_type == list_t);
TORRENT_ASSERT(m_size <= m_capacity);
if (m_capacity == 0)
{
int capacity = 10;
m_data.list = new (std::nothrow) lazy_entry[capacity];
if (m_data.list == 0) return 0;
m_capacity = capacity;
}
else if (m_size == m_capacity)
{
int capacity = m_capacity * 2;
lazy_entry* tmp = new (std::nothrow) lazy_entry[capacity];
if (tmp == 0) return 0;
std::memcpy(tmp, m_data.list, sizeof(lazy_entry) * m_size);
delete[] m_data.list;
m_data.list = tmp;
m_capacity = capacity;
}
TORRENT_ASSERT(m_size < m_capacity);
return m_data.list + (m_size++);
}
void lazy_entry::clear()
{
switch (m_type)
{
case list_t: delete[] m_data.list; break;
case dict_t: delete[] m_data.dict; break;
default: break;
}
m_size = 0;
m_capacity = 0;
m_type = none_t;
}
};

View File

@ -37,6 +37,7 @@ test-suite libtorrent :
[ run test_fast_extension.cpp ]
[ run test_pe_crypto.cpp ]
[ run test_bencoding.cpp ]
[ run test_bdecode_performance.cpp ]
[ run test_primitives.cpp ]
[ run test_ip_filter.cpp ]
[ run test_hasher.cpp ]

View File

@ -0,0 +1,27 @@
#include "libtorrent/lazy_entry.hpp"
#include <boost/lexical_cast.hpp>
#include <iostream>
#include "test.hpp"
#include "libtorrent/time.hpp"
using namespace libtorrent;
int test_main()
{
using namespace libtorrent;
ptime start(time_now());
for (int i = 0; i < 1000000; ++i)
{
char b[] = "d1:ai12453e1:b3:aaa1:c3:bbbe";
lazy_entry e;
int ret = lazy_bdecode(b, b + sizeof(b)-1, e);
}
ptime stop(time_now());
std::cout << "done in " << total_milliseconds(stop - start) / 1000. << " seconds per million message" << std::endl;
return 0;
}

View File

@ -1,4 +1,5 @@
#include "libtorrent/bencode.hpp"
#include "libtorrent/lazy_entry.hpp"
#include <boost/lexical_cast.hpp>
#include "test.hpp"
@ -68,7 +69,52 @@ int test_main()
TEST_CHECK(encode(e) == "d3:cow3:moo4:spam4:eggse");
TEST_CHECK(decode(encode(e)) == e);
}
{
char b[] = "i12453e";
lazy_entry e;
int ret = lazy_bdecode(b, b + sizeof(b)-1, e);
TORRENT_ASSERT(ret == 0);
TORRENT_ASSERT(e.type() == lazy_entry::int_t);
TORRENT_ASSERT(e.int_value() == 12453);
}
{
char b[] = "26:abcdefghijklmnopqrstuvwxyz";
lazy_entry e;
int ret = lazy_bdecode(b, b + sizeof(b)-1, e);
TORRENT_ASSERT(ret == 0);
TORRENT_ASSERT(e.type() == lazy_entry::string_t);
TORRENT_ASSERT(e.string_value() == std::string("abcdefghijklmnopqrstuvwxyz"));
}
{
char b[] = "li12453e3:aaae";
lazy_entry e;
int ret = lazy_bdecode(b, b + sizeof(b)-1, e);
TORRENT_ASSERT(ret == 0);
TORRENT_ASSERT(e.type() == lazy_entry::list_t);
TORRENT_ASSERT(e.list_size() == 2);
TORRENT_ASSERT(e.list_at(0)->type() == lazy_entry::int_t);
TORRENT_ASSERT(e.list_at(1)->type() == lazy_entry::string_t);
TORRENT_ASSERT(e.list_at(0)->int_value() == 12453);
TORRENT_ASSERT(e.list_at(1)->string_value() == std::string("aaa"));
}
{
char b[] = "d1:ai12453e1:b3:aaa1:c3:bbbe";
lazy_entry e;
int ret = lazy_bdecode(b, b + sizeof(b)-1, e);
TORRENT_ASSERT(ret == 0);
TORRENT_ASSERT(e.type() == lazy_entry::dict_t);
TORRENT_ASSERT(e.dict_size() == 3);
TORRENT_ASSERT(e.dict_find("a")->type() == lazy_entry::int_t);
TORRENT_ASSERT(e.dict_find("a")->int_value() == 12453);
TORRENT_ASSERT(e.dict_find("b")->type() == lazy_entry::string_t);
TORRENT_ASSERT(e.dict_find("b")->string_value() == std::string("aaa"));
TORRENT_ASSERT(e.dict_find("c")->type() == lazy_entry::string_t);
TORRENT_ASSERT(e.dict_find("c")->string_value() == std::string("bbb"));
}
return 0;
}