improvements to lazy_bdecode

This commit is contained in:
Arvid Norberg 2008-04-13 22:34:04 +00:00
parent 644d3aa66c
commit 748a66c343
3 changed files with 140 additions and 38 deletions

View File

@ -43,9 +43,9 @@ namespace libtorrent
{
struct lazy_entry;
char* parse_int(char* start, char* end, char delimiter, boost::int64_t& val);
char const* parse_int(char const* start, char const* end, char delimiter, boost::int64_t& val);
// return 0 = success
int lazy_bdecode(char* start, char* end, lazy_entry& ret, int depth_limit = 1000);
int lazy_bdecode(char const* start, char const* end, lazy_entry& ret, int depth_limit = 1000);
struct lazy_entry
{
@ -54,16 +54,21 @@ namespace libtorrent
none_t, dict_t, list_t, string_t, int_t
};
lazy_entry() : m_type(none_t) { m_data.start = 0; }
lazy_entry() : m_type(none_t), m_begin(0), m_end(0)
{ m_data.start = 0; }
entry_type_t type() const { return m_type; }
// start is a null terminated string (decimal number)
void construct_int(char* start)
// start points to the first decimal digit
// length is the number of digits
void construct_int(char const* start, int length)
{
TORRENT_ASSERT(m_type == none_t);
m_type = int_t;
m_data.start = start;
m_size = length;
m_begin = start - 1; // include 'i'
m_end = start + length + 1; // include 'e'
}
boost::int64_t int_value() const;
@ -71,36 +76,46 @@ namespace libtorrent
// string functions
// ================
// start is a null terminated string
void construct_string(char* start, int length)
{
TORRENT_ASSERT(m_type == none_t);
m_type = string_t;
m_data.start = start;
m_size =length;
}
void construct_string(char const* start, int length);
char const* string_value() const
// the string is not null-terminated!
char const* string_ptr() const
{
TORRENT_ASSERT(m_type == string_t);
return m_data.start;
}
// this will return a null terminated string
// it will write to the source buffer!
char const* string_cstr() const
{
TORRENT_ASSERT(m_type == string_t);
const_cast<char*>(m_data.start)[m_size] = 0;
return m_data.start;
}
std::string string_value() const
{
TORRENT_ASSERT(m_type == string_t);
return std::string(m_data.start, m_size);
}
int string_length() const
{ return m_size; }
// dictionary functions
// ====================
void construct_dict()
void construct_dict(char const* begin)
{
TORRENT_ASSERT(m_type == none_t);
m_type = dict_t;
m_size = 0;
m_capacity = 0;
m_begin = begin;
}
lazy_entry* dict_append(char* name);
lazy_entry* dict_append(char const* name);
lazy_entry* dict_find(char const* name);
lazy_entry const* dict_find(char const* name) const
{ return const_cast<lazy_entry*>(this)->dict_find(name); }
@ -120,12 +135,13 @@ namespace libtorrent
// list functions
// ==============
void construct_list()
void construct_list(char const* begin)
{
TORRENT_ASSERT(m_type == none_t);
m_type = list_t;
m_size = 0;
m_capacity = 0;
m_begin = begin;
}
lazy_entry* list_append();
@ -144,22 +160,37 @@ namespace libtorrent
return m_size;
}
// end points one byte passed end
void set_end(char const* end)
{
TORRENT_ASSERT(end > m_begin);
m_end = end;
}
void clear();
~lazy_entry()
{ clear(); }
// returns pointers into the source buffer where
// this entry has its bencoded data
std::pair<char const*, int> data_section();
private:
entry_type_t m_type;
union data_t
{
std::pair<char*, lazy_entry>* dict;
std::pair<char const*, lazy_entry>* dict;
lazy_entry* list;
char* start;
char const* start;
} m_data;
int m_size; // if list or dictionary, the number of items
int m_capacity; // if list or dictionary, allocated number of items
// used for dictionaries and lists to record the range
// in the original buffer they are based on
char const* m_begin;
char const* m_end;
};
std::ostream& operator<<(std::ostream& os, lazy_entry const& e);

View File

@ -42,7 +42,7 @@ namespace libtorrent
// first occurance of the delimiter is interpreted as an int.
// return the pointer to the delimiter, or 0 if there is a
// parse error. val should be initialized to zero
char* parse_int(char* start, char* end, char delimiter, boost::int64_t& val)
char const* parse_int(char const* start, char const* end, char delimiter, boost::int64_t& val)
{
while (start < end && *start != delimiter)
{
@ -55,14 +55,14 @@ namespace libtorrent
return start;
}
char* find_char(char* start, char* end, char delimiter)
char const* find_char(char const* start, char const* end, char delimiter)
{
while (start < end && *start != delimiter) ++start;
return start;
}
// return 0 = success
int lazy_bdecode(char* start, char* end, lazy_entry& ret, int depth_limit)
int lazy_bdecode(char const* start, char const* end, lazy_entry& ret, int depth_limit)
{
ret.clear();
if (start == end) return 0;
@ -76,10 +76,10 @@ namespace libtorrent
lazy_entry* top = stack.back();
if (stack.size() > depth_limit) return fail_bdecode();
if (int(stack.size()) > depth_limit) return fail_bdecode();
if (start == end) return fail_bdecode();
char t = *start;
*start++ = 0; // null terminate any previous string
++start;
if (start == end && t != 'e') return fail_bdecode();
switch (top->type())
@ -88,6 +88,7 @@ namespace libtorrent
{
if (t == 'e')
{
top->set_end(start);
stack.pop_back();
continue;
}
@ -99,13 +100,14 @@ namespace libtorrent
start += len;
stack.push_back(ent);
t = *start;
*start++ = 0; // null terminate any previous string
++start;
break;
}
case lazy_entry::list_t:
{
if (t == 'e')
{
top->set_end(start);
stack.pop_back();
continue;
}
@ -120,19 +122,22 @@ namespace libtorrent
switch (t)
{
case 'd':
top->construct_dict();
top->construct_dict(start - 1);
continue;
case 'l':
top->construct_list();
top->construct_list(start - 1);
continue;
case 'i':
top->construct_int(start);
{
char const* int_start = start;
start = find_char(start, end, 'e');
top->construct_int(int_start, start - int_start);
if (start == end) return fail_bdecode();
TORRENT_ASSERT(*start == 'e');
*start++ = 0;
++start;
stack.pop_back();
continue;
}
default:
{
using namespace std;
@ -159,45 +164,90 @@ namespace libtorrent
boost::int64_t val = 0;
bool negative = false;
if (*m_data.start == '-') negative = true;
parse_int(negative?m_data.start+1:m_data.start, m_data.start + 100, 0, val);
parse_int(negative?m_data.start+1:m_data.start, m_data.start + m_size, 'e', val);
if (negative) val = -val;
return val;
}
lazy_entry* lazy_entry::dict_append(char* name)
lazy_entry* lazy_entry::dict_append(char const* name)
{
TORRENT_ASSERT(m_type == dict_t);
TORRENT_ASSERT(m_size <= m_capacity);
if (m_capacity == 0)
{
int capacity = 10;
m_data.dict = new (std::nothrow) std::pair<char*, lazy_entry>[capacity];
m_data.dict = new (std::nothrow) std::pair<char const*, lazy_entry>[capacity];
if (m_data.dict == 0) return 0;
m_capacity = capacity;
}
else if (m_size == m_capacity)
{
int capacity = m_capacity * 2;
std::pair<char*, lazy_entry>* tmp = new (std::nothrow) std::pair<char*, lazy_entry>[capacity];
std::pair<char const*, lazy_entry>* tmp = new (std::nothrow) std::pair<char const*, lazy_entry>[capacity];
if (tmp == 0) return 0;
std::memcpy(tmp, m_data.dict, sizeof(std::pair<char*, lazy_entry>) * m_size);
std::memcpy(tmp, m_data.dict, sizeof(std::pair<char const*, lazy_entry>) * m_size);
delete[] m_data.dict;
m_data.dict = tmp;
m_capacity = capacity;
}
TORRENT_ASSERT(m_size < m_capacity);
std::pair<char*, lazy_entry>& ret = m_data.dict[m_size++];
std::pair<char const*, lazy_entry>& ret = m_data.dict[m_size++];
ret.first = name;
return &ret.second;
}
namespace
{
// the number of decimal digits needed
// to represent the given value
int num_digits(int val)
{
int ret = 1;
while (val > 10)
{
++ret;
val /= 10;
}
return ret;
}
}
void lazy_entry::construct_string(char const* start, int length)
{
TORRENT_ASSERT(m_type == none_t);
m_type = string_t;
m_data.start = start;
m_size = length;
m_begin = start - 1 - num_digits(length);
m_end = start + length;
}
namespace
{
// str1 is null-terminated
// str2 is not, str2 is len2 chars
bool string_equal(char const* str1, char const* str2, int len2)
{
while (len2 > 0)
{
if (*str1 != *str2) return false;
if (*str1 == 0) return false;
++str1;
++str2;
--len2;
}
return true;
}
}
lazy_entry* lazy_entry::dict_find(char const* name)
{
TORRENT_ASSERT(m_type == dict_t);
for (int i = 0; i < m_size; ++i)
{
if (strcmp(name, m_data.dict[i].first) == 0)
std::pair<char const*, lazy_entry> const& e = m_data.dict[i];
if (string_equal(name, e.first, e.second.m_begin - e.first))
return &m_data.dict[i].second;
}
return 0;
@ -242,6 +292,12 @@ namespace libtorrent
m_type = none_t;
}
std::pair<char const*, int> lazy_entry::data_section()
{
typedef std::pair<char const*, int> return_t;
return return_t(m_begin, m_end - m_begin);
}
std::ostream& operator<<(std::ostream& os, lazy_entry const& e)
{
switch (e.type())
@ -251,7 +307,7 @@ namespace libtorrent
case lazy_entry::string_t:
{
bool printable = true;
char const* str = e.string_value();
char const* str = e.string_ptr();
for (int i = 0; i < e.string_length(); ++i)
{
using namespace std;
@ -259,7 +315,7 @@ namespace libtorrent
printable = false;
break;
}
if (printable) return os << str;
if (printable) return os << e.string_value();
for (int i = 0; i < e.string_length(); ++i)
os << std::hex << int((unsigned char)(str[i]));
return os;

View File

@ -77,6 +77,9 @@ int test_main()
int ret = lazy_bdecode(b, b + sizeof(b)-1, e);
TORRENT_ASSERT(ret == 0);
std::cout << e << std::endl;
std::pair<const char*, int> section = e.data_section();
TORRENT_ASSERT(memcmp(b, section.first, section.second) == 0);
TORRENT_ASSERT(section.second == sizeof(b) - 1);
TORRENT_ASSERT(e.type() == lazy_entry::int_t);
TORRENT_ASSERT(e.int_value() == 12453);
}
@ -87,6 +90,9 @@ int test_main()
int ret = lazy_bdecode(b, b + sizeof(b)-1, e);
TORRENT_ASSERT(ret == 0);
std::cout << e << std::endl;
std::pair<const char*, int> section = e.data_section();
TORRENT_ASSERT(memcmp(b, section.first, section.second) == 0);
TORRENT_ASSERT(section.second == sizeof(b) - 1);
TORRENT_ASSERT(e.type() == lazy_entry::string_t);
TORRENT_ASSERT(e.string_value() == std::string("abcdefghijklmnopqrstuvwxyz"));
TORRENT_ASSERT(e.string_length() == 26);
@ -98,6 +104,9 @@ int test_main()
int ret = lazy_bdecode(b, b + sizeof(b)-1, e);
TORRENT_ASSERT(ret == 0);
std::cout << e << std::endl;
std::pair<const char*, int> section = e.data_section();
TORRENT_ASSERT(memcmp(b, section.first, section.second) == 0);
TORRENT_ASSERT(section.second == sizeof(b) - 1);
TORRENT_ASSERT(e.type() == lazy_entry::list_t);
TORRENT_ASSERT(e.list_size() == 2);
TORRENT_ASSERT(e.list_at(0)->type() == lazy_entry::int_t);
@ -105,6 +114,9 @@ int test_main()
TORRENT_ASSERT(e.list_at(0)->int_value() == 12453);
TORRENT_ASSERT(e.list_at(1)->string_value() == std::string("aaa"));
TORRENT_ASSERT(e.list_at(1)->string_length() == 3);
section = e.list_at(1)->data_section();
TORRENT_ASSERT(memcmp("3:aaa", section.first, section.second) == 0);
TORRENT_ASSERT(section.second == 5);
}
{
@ -113,6 +125,9 @@ int test_main()
int ret = lazy_bdecode(b, b + sizeof(b)-1, e);
TORRENT_ASSERT(ret == 0);
std::cout << e << std::endl;
std::pair<const char*, int> section = e.data_section();
TORRENT_ASSERT(memcmp(b, section.first, section.second) == 0);
TORRENT_ASSERT(section.second == sizeof(b) - 1);
TORRENT_ASSERT(e.type() == lazy_entry::dict_t);
TORRENT_ASSERT(e.dict_size() == 3);
TORRENT_ASSERT(e.dict_find("a")->type() == lazy_entry::int_t);