improvements to lazy_bdecode

This commit is contained in:
Arvid Norberg 2008-04-13 22:34:04 +00:00
parent 644d3aa66c
commit 748a66c343
3 changed files with 140 additions and 38 deletions

View File

@ -43,9 +43,9 @@ namespace libtorrent
{ {
struct lazy_entry; struct lazy_entry;
char* parse_int(char* start, char* end, char delimiter, boost::int64_t& val); char const* parse_int(char const* start, char const* end, char delimiter, boost::int64_t& val);
// return 0 = success // return 0 = success
int lazy_bdecode(char* start, char* end, lazy_entry& ret, int depth_limit = 1000); int lazy_bdecode(char const* start, char const* end, lazy_entry& ret, int depth_limit = 1000);
struct lazy_entry struct lazy_entry
{ {
@ -54,16 +54,21 @@ namespace libtorrent
none_t, dict_t, list_t, string_t, int_t none_t, dict_t, list_t, string_t, int_t
}; };
lazy_entry() : m_type(none_t) { m_data.start = 0; } lazy_entry() : m_type(none_t), m_begin(0), m_end(0)
{ m_data.start = 0; }
entry_type_t type() const { return m_type; } entry_type_t type() const { return m_type; }
// start is a null terminated string (decimal number) // start points to the first decimal digit
void construct_int(char* start) // length is the number of digits
void construct_int(char const* start, int length)
{ {
TORRENT_ASSERT(m_type == none_t); TORRENT_ASSERT(m_type == none_t);
m_type = int_t; m_type = int_t;
m_data.start = start; m_data.start = start;
m_size = length;
m_begin = start - 1; // include 'i'
m_end = start + length + 1; // include 'e'
} }
boost::int64_t int_value() const; boost::int64_t int_value() const;
@ -71,36 +76,46 @@ namespace libtorrent
// string functions // string functions
// ================ // ================
// start is a null terminated string void construct_string(char const* start, int length);
void construct_string(char* start, int length)
{
TORRENT_ASSERT(m_type == none_t);
m_type = string_t;
m_data.start = start;
m_size =length;
}
char const* string_value() const // the string is not null-terminated!
char const* string_ptr() const
{ {
TORRENT_ASSERT(m_type == string_t); TORRENT_ASSERT(m_type == string_t);
return m_data.start; return m_data.start;
} }
// this will return a null terminated string
// it will write to the source buffer!
char const* string_cstr() const
{
TORRENT_ASSERT(m_type == string_t);
const_cast<char*>(m_data.start)[m_size] = 0;
return m_data.start;
}
std::string string_value() const
{
TORRENT_ASSERT(m_type == string_t);
return std::string(m_data.start, m_size);
}
int string_length() const int string_length() const
{ return m_size; } { return m_size; }
// dictionary functions // dictionary functions
// ==================== // ====================
void construct_dict() void construct_dict(char const* begin)
{ {
TORRENT_ASSERT(m_type == none_t); TORRENT_ASSERT(m_type == none_t);
m_type = dict_t; m_type = dict_t;
m_size = 0; m_size = 0;
m_capacity = 0; m_capacity = 0;
m_begin = begin;
} }
lazy_entry* dict_append(char* name); lazy_entry* dict_append(char const* name);
lazy_entry* dict_find(char const* name); lazy_entry* dict_find(char const* name);
lazy_entry const* dict_find(char const* name) const lazy_entry const* dict_find(char const* name) const
{ return const_cast<lazy_entry*>(this)->dict_find(name); } { return const_cast<lazy_entry*>(this)->dict_find(name); }
@ -120,12 +135,13 @@ namespace libtorrent
// list functions // list functions
// ============== // ==============
void construct_list() void construct_list(char const* begin)
{ {
TORRENT_ASSERT(m_type == none_t); TORRENT_ASSERT(m_type == none_t);
m_type = list_t; m_type = list_t;
m_size = 0; m_size = 0;
m_capacity = 0; m_capacity = 0;
m_begin = begin;
} }
lazy_entry* list_append(); lazy_entry* list_append();
@ -144,22 +160,37 @@ namespace libtorrent
return m_size; return m_size;
} }
// end points one byte passed end
void set_end(char const* end)
{
TORRENT_ASSERT(end > m_begin);
m_end = end;
}
void clear(); void clear();
~lazy_entry() ~lazy_entry()
{ clear(); } { clear(); }
// returns pointers into the source buffer where
// this entry has its bencoded data
std::pair<char const*, int> data_section();
private: private:
entry_type_t m_type; entry_type_t m_type;
union data_t union data_t
{ {
std::pair<char*, lazy_entry>* dict; std::pair<char const*, lazy_entry>* dict;
lazy_entry* list; lazy_entry* list;
char* start; char const* start;
} m_data; } m_data;
int m_size; // if list or dictionary, the number of items int m_size; // if list or dictionary, the number of items
int m_capacity; // if list or dictionary, allocated number of items int m_capacity; // if list or dictionary, allocated number of items
// used for dictionaries and lists to record the range
// in the original buffer they are based on
char const* m_begin;
char const* m_end;
}; };
std::ostream& operator<<(std::ostream& os, lazy_entry const& e); std::ostream& operator<<(std::ostream& os, lazy_entry const& e);

View File

@ -42,7 +42,7 @@ namespace libtorrent
// first occurance of the delimiter is interpreted as an int. // first occurance of the delimiter is interpreted as an int.
// return the pointer to the delimiter, or 0 if there is a // return the pointer to the delimiter, or 0 if there is a
// parse error. val should be initialized to zero // parse error. val should be initialized to zero
char* parse_int(char* start, char* end, char delimiter, boost::int64_t& val) char const* parse_int(char const* start, char const* end, char delimiter, boost::int64_t& val)
{ {
while (start < end && *start != delimiter) while (start < end && *start != delimiter)
{ {
@ -55,14 +55,14 @@ namespace libtorrent
return start; return start;
} }
char* find_char(char* start, char* end, char delimiter) char const* find_char(char const* start, char const* end, char delimiter)
{ {
while (start < end && *start != delimiter) ++start; while (start < end && *start != delimiter) ++start;
return start; return start;
} }
// return 0 = success // return 0 = success
int lazy_bdecode(char* start, char* end, lazy_entry& ret, int depth_limit) int lazy_bdecode(char const* start, char const* end, lazy_entry& ret, int depth_limit)
{ {
ret.clear(); ret.clear();
if (start == end) return 0; if (start == end) return 0;
@ -76,10 +76,10 @@ namespace libtorrent
lazy_entry* top = stack.back(); lazy_entry* top = stack.back();
if (stack.size() > depth_limit) return fail_bdecode(); if (int(stack.size()) > depth_limit) return fail_bdecode();
if (start == end) return fail_bdecode(); if (start == end) return fail_bdecode();
char t = *start; char t = *start;
*start++ = 0; // null terminate any previous string ++start;
if (start == end && t != 'e') return fail_bdecode(); if (start == end && t != 'e') return fail_bdecode();
switch (top->type()) switch (top->type())
@ -88,6 +88,7 @@ namespace libtorrent
{ {
if (t == 'e') if (t == 'e')
{ {
top->set_end(start);
stack.pop_back(); stack.pop_back();
continue; continue;
} }
@ -99,13 +100,14 @@ namespace libtorrent
start += len; start += len;
stack.push_back(ent); stack.push_back(ent);
t = *start; t = *start;
*start++ = 0; // null terminate any previous string ++start;
break; break;
} }
case lazy_entry::list_t: case lazy_entry::list_t:
{ {
if (t == 'e') if (t == 'e')
{ {
top->set_end(start);
stack.pop_back(); stack.pop_back();
continue; continue;
} }
@ -120,19 +122,22 @@ namespace libtorrent
switch (t) switch (t)
{ {
case 'd': case 'd':
top->construct_dict(); top->construct_dict(start - 1);
continue; continue;
case 'l': case 'l':
top->construct_list(); top->construct_list(start - 1);
continue; continue;
case 'i': case 'i':
top->construct_int(start); {
char const* int_start = start;
start = find_char(start, end, 'e'); start = find_char(start, end, 'e');
top->construct_int(int_start, start - int_start);
if (start == end) return fail_bdecode(); if (start == end) return fail_bdecode();
TORRENT_ASSERT(*start == 'e'); TORRENT_ASSERT(*start == 'e');
*start++ = 0; ++start;
stack.pop_back(); stack.pop_back();
continue; continue;
}
default: default:
{ {
using namespace std; using namespace std;
@ -159,45 +164,90 @@ namespace libtorrent
boost::int64_t val = 0; boost::int64_t val = 0;
bool negative = false; bool negative = false;
if (*m_data.start == '-') negative = true; if (*m_data.start == '-') negative = true;
parse_int(negative?m_data.start+1:m_data.start, m_data.start + 100, 0, val); parse_int(negative?m_data.start+1:m_data.start, m_data.start + m_size, 'e', val);
if (negative) val = -val; if (negative) val = -val;
return val; return val;
} }
lazy_entry* lazy_entry::dict_append(char* name) lazy_entry* lazy_entry::dict_append(char const* name)
{ {
TORRENT_ASSERT(m_type == dict_t); TORRENT_ASSERT(m_type == dict_t);
TORRENT_ASSERT(m_size <= m_capacity); TORRENT_ASSERT(m_size <= m_capacity);
if (m_capacity == 0) if (m_capacity == 0)
{ {
int capacity = 10; int capacity = 10;
m_data.dict = new (std::nothrow) std::pair<char*, lazy_entry>[capacity]; m_data.dict = new (std::nothrow) std::pair<char const*, lazy_entry>[capacity];
if (m_data.dict == 0) return 0; if (m_data.dict == 0) return 0;
m_capacity = capacity; m_capacity = capacity;
} }
else if (m_size == m_capacity) else if (m_size == m_capacity)
{ {
int capacity = m_capacity * 2; int capacity = m_capacity * 2;
std::pair<char*, lazy_entry>* tmp = new (std::nothrow) std::pair<char*, lazy_entry>[capacity]; std::pair<char const*, lazy_entry>* tmp = new (std::nothrow) std::pair<char const*, lazy_entry>[capacity];
if (tmp == 0) return 0; if (tmp == 0) return 0;
std::memcpy(tmp, m_data.dict, sizeof(std::pair<char*, lazy_entry>) * m_size); std::memcpy(tmp, m_data.dict, sizeof(std::pair<char const*, lazy_entry>) * m_size);
delete[] m_data.dict; delete[] m_data.dict;
m_data.dict = tmp; m_data.dict = tmp;
m_capacity = capacity; m_capacity = capacity;
} }
TORRENT_ASSERT(m_size < m_capacity); TORRENT_ASSERT(m_size < m_capacity);
std::pair<char*, lazy_entry>& ret = m_data.dict[m_size++]; std::pair<char const*, lazy_entry>& ret = m_data.dict[m_size++];
ret.first = name; ret.first = name;
return &ret.second; return &ret.second;
} }
namespace
{
// the number of decimal digits needed
// to represent the given value
int num_digits(int val)
{
int ret = 1;
while (val > 10)
{
++ret;
val /= 10;
}
return ret;
}
}
void lazy_entry::construct_string(char const* start, int length)
{
TORRENT_ASSERT(m_type == none_t);
m_type = string_t;
m_data.start = start;
m_size = length;
m_begin = start - 1 - num_digits(length);
m_end = start + length;
}
namespace
{
// str1 is null-terminated
// str2 is not, str2 is len2 chars
bool string_equal(char const* str1, char const* str2, int len2)
{
while (len2 > 0)
{
if (*str1 != *str2) return false;
if (*str1 == 0) return false;
++str1;
++str2;
--len2;
}
return true;
}
}
lazy_entry* lazy_entry::dict_find(char const* name) lazy_entry* lazy_entry::dict_find(char const* name)
{ {
TORRENT_ASSERT(m_type == dict_t); TORRENT_ASSERT(m_type == dict_t);
for (int i = 0; i < m_size; ++i) for (int i = 0; i < m_size; ++i)
{ {
if (strcmp(name, m_data.dict[i].first) == 0) std::pair<char const*, lazy_entry> const& e = m_data.dict[i];
if (string_equal(name, e.first, e.second.m_begin - e.first))
return &m_data.dict[i].second; return &m_data.dict[i].second;
} }
return 0; return 0;
@ -242,6 +292,12 @@ namespace libtorrent
m_type = none_t; m_type = none_t;
} }
std::pair<char const*, int> lazy_entry::data_section()
{
typedef std::pair<char const*, int> return_t;
return return_t(m_begin, m_end - m_begin);
}
std::ostream& operator<<(std::ostream& os, lazy_entry const& e) std::ostream& operator<<(std::ostream& os, lazy_entry const& e)
{ {
switch (e.type()) switch (e.type())
@ -251,7 +307,7 @@ namespace libtorrent
case lazy_entry::string_t: case lazy_entry::string_t:
{ {
bool printable = true; bool printable = true;
char const* str = e.string_value(); char const* str = e.string_ptr();
for (int i = 0; i < e.string_length(); ++i) for (int i = 0; i < e.string_length(); ++i)
{ {
using namespace std; using namespace std;
@ -259,7 +315,7 @@ namespace libtorrent
printable = false; printable = false;
break; break;
} }
if (printable) return os << str; if (printable) return os << e.string_value();
for (int i = 0; i < e.string_length(); ++i) for (int i = 0; i < e.string_length(); ++i)
os << std::hex << int((unsigned char)(str[i])); os << std::hex << int((unsigned char)(str[i]));
return os; return os;

View File

@ -77,6 +77,9 @@ int test_main()
int ret = lazy_bdecode(b, b + sizeof(b)-1, e); int ret = lazy_bdecode(b, b + sizeof(b)-1, e);
TORRENT_ASSERT(ret == 0); TORRENT_ASSERT(ret == 0);
std::cout << e << std::endl; std::cout << e << std::endl;
std::pair<const char*, int> section = e.data_section();
TORRENT_ASSERT(memcmp(b, section.first, section.second) == 0);
TORRENT_ASSERT(section.second == sizeof(b) - 1);
TORRENT_ASSERT(e.type() == lazy_entry::int_t); TORRENT_ASSERT(e.type() == lazy_entry::int_t);
TORRENT_ASSERT(e.int_value() == 12453); TORRENT_ASSERT(e.int_value() == 12453);
} }
@ -87,6 +90,9 @@ int test_main()
int ret = lazy_bdecode(b, b + sizeof(b)-1, e); int ret = lazy_bdecode(b, b + sizeof(b)-1, e);
TORRENT_ASSERT(ret == 0); TORRENT_ASSERT(ret == 0);
std::cout << e << std::endl; std::cout << e << std::endl;
std::pair<const char*, int> section = e.data_section();
TORRENT_ASSERT(memcmp(b, section.first, section.second) == 0);
TORRENT_ASSERT(section.second == sizeof(b) - 1);
TORRENT_ASSERT(e.type() == lazy_entry::string_t); TORRENT_ASSERT(e.type() == lazy_entry::string_t);
TORRENT_ASSERT(e.string_value() == std::string("abcdefghijklmnopqrstuvwxyz")); TORRENT_ASSERT(e.string_value() == std::string("abcdefghijklmnopqrstuvwxyz"));
TORRENT_ASSERT(e.string_length() == 26); TORRENT_ASSERT(e.string_length() == 26);
@ -98,6 +104,9 @@ int test_main()
int ret = lazy_bdecode(b, b + sizeof(b)-1, e); int ret = lazy_bdecode(b, b + sizeof(b)-1, e);
TORRENT_ASSERT(ret == 0); TORRENT_ASSERT(ret == 0);
std::cout << e << std::endl; std::cout << e << std::endl;
std::pair<const char*, int> section = e.data_section();
TORRENT_ASSERT(memcmp(b, section.first, section.second) == 0);
TORRENT_ASSERT(section.second == sizeof(b) - 1);
TORRENT_ASSERT(e.type() == lazy_entry::list_t); TORRENT_ASSERT(e.type() == lazy_entry::list_t);
TORRENT_ASSERT(e.list_size() == 2); TORRENT_ASSERT(e.list_size() == 2);
TORRENT_ASSERT(e.list_at(0)->type() == lazy_entry::int_t); TORRENT_ASSERT(e.list_at(0)->type() == lazy_entry::int_t);
@ -105,6 +114,9 @@ int test_main()
TORRENT_ASSERT(e.list_at(0)->int_value() == 12453); TORRENT_ASSERT(e.list_at(0)->int_value() == 12453);
TORRENT_ASSERT(e.list_at(1)->string_value() == std::string("aaa")); TORRENT_ASSERT(e.list_at(1)->string_value() == std::string("aaa"));
TORRENT_ASSERT(e.list_at(1)->string_length() == 3); TORRENT_ASSERT(e.list_at(1)->string_length() == 3);
section = e.list_at(1)->data_section();
TORRENT_ASSERT(memcmp("3:aaa", section.first, section.second) == 0);
TORRENT_ASSERT(section.second == 5);
} }
{ {
@ -113,6 +125,9 @@ int test_main()
int ret = lazy_bdecode(b, b + sizeof(b)-1, e); int ret = lazy_bdecode(b, b + sizeof(b)-1, e);
TORRENT_ASSERT(ret == 0); TORRENT_ASSERT(ret == 0);
std::cout << e << std::endl; std::cout << e << std::endl;
std::pair<const char*, int> section = e.data_section();
TORRENT_ASSERT(memcmp(b, section.first, section.second) == 0);
TORRENT_ASSERT(section.second == sizeof(b) - 1);
TORRENT_ASSERT(e.type() == lazy_entry::dict_t); TORRENT_ASSERT(e.type() == lazy_entry::dict_t);
TORRENT_ASSERT(e.dict_size() == 3); TORRENT_ASSERT(e.dict_size() == 3);
TORRENT_ASSERT(e.dict_find("a")->type() == lazy_entry::int_t); TORRENT_ASSERT(e.dict_find("a")->type() == lazy_entry::int_t);