diff --git a/include/libtorrent/lazy_entry.hpp b/include/libtorrent/lazy_entry.hpp index 19294c2c1..4c47b4a43 100644 --- a/include/libtorrent/lazy_entry.hpp +++ b/include/libtorrent/lazy_entry.hpp @@ -43,9 +43,9 @@ namespace libtorrent { struct lazy_entry; - char* parse_int(char* start, char* end, char delimiter, boost::int64_t& val); + char const* parse_int(char const* start, char const* end, char delimiter, boost::int64_t& val); // return 0 = success - int lazy_bdecode(char* start, char* end, lazy_entry& ret, int depth_limit = 1000); + int lazy_bdecode(char const* start, char const* end, lazy_entry& ret, int depth_limit = 1000); struct lazy_entry { @@ -54,16 +54,21 @@ namespace libtorrent none_t, dict_t, list_t, string_t, int_t }; - lazy_entry() : m_type(none_t) { m_data.start = 0; } + lazy_entry() : m_type(none_t), m_begin(0), m_end(0) + { m_data.start = 0; } entry_type_t type() const { return m_type; } - // start is a null terminated string (decimal number) - void construct_int(char* start) + // start points to the first decimal digit + // length is the number of digits + void construct_int(char const* start, int length) { TORRENT_ASSERT(m_type == none_t); m_type = int_t; m_data.start = start; + m_size = length; + m_begin = start - 1; // include 'i' + m_end = start + length + 1; // include 'e' } boost::int64_t int_value() const; @@ -71,36 +76,46 @@ namespace libtorrent // string functions // ================ - // start is a null terminated string - void construct_string(char* start, int length) - { - TORRENT_ASSERT(m_type == none_t); - m_type = string_t; - m_data.start = start; - m_size =length; - } + void construct_string(char const* start, int length); - char const* string_value() const + // the string is not null-terminated! + char const* string_ptr() const { TORRENT_ASSERT(m_type == string_t); return m_data.start; } + // this will return a null terminated string + // it will write to the source buffer! + char const* string_cstr() const + { + TORRENT_ASSERT(m_type == string_t); + const_cast(m_data.start)[m_size] = 0; + return m_data.start; + } + + std::string string_value() const + { + TORRENT_ASSERT(m_type == string_t); + return std::string(m_data.start, m_size); + } + int string_length() const { return m_size; } // dictionary functions // ==================== - void construct_dict() + void construct_dict(char const* begin) { TORRENT_ASSERT(m_type == none_t); m_type = dict_t; m_size = 0; m_capacity = 0; + m_begin = begin; } - lazy_entry* dict_append(char* name); + lazy_entry* dict_append(char const* name); lazy_entry* dict_find(char const* name); lazy_entry const* dict_find(char const* name) const { return const_cast(this)->dict_find(name); } @@ -120,12 +135,13 @@ namespace libtorrent // list functions // ============== - void construct_list() + void construct_list(char const* begin) { TORRENT_ASSERT(m_type == none_t); m_type = list_t; m_size = 0; m_capacity = 0; + m_begin = begin; } lazy_entry* list_append(); @@ -144,22 +160,37 @@ namespace libtorrent return m_size; } + // end points one byte passed end + void set_end(char const* end) + { + TORRENT_ASSERT(end > m_begin); + m_end = end; + } + void clear(); ~lazy_entry() { clear(); } + // returns pointers into the source buffer where + // this entry has its bencoded data + std::pair data_section(); + private: entry_type_t m_type; union data_t { - std::pair* dict; + std::pair* dict; lazy_entry* list; - char* start; + char const* start; } m_data; int m_size; // if list or dictionary, the number of items int m_capacity; // if list or dictionary, allocated number of items + // used for dictionaries and lists to record the range + // in the original buffer they are based on + char const* m_begin; + char const* m_end; }; std::ostream& operator<<(std::ostream& os, lazy_entry const& e); diff --git a/src/lazy_bdecode.cpp b/src/lazy_bdecode.cpp index 6fc6e221b..2e2662585 100644 --- a/src/lazy_bdecode.cpp +++ b/src/lazy_bdecode.cpp @@ -42,7 +42,7 @@ namespace libtorrent // first occurance of the delimiter is interpreted as an int. // return the pointer to the delimiter, or 0 if there is a // parse error. val should be initialized to zero - char* parse_int(char* start, char* end, char delimiter, boost::int64_t& val) + char const* parse_int(char const* start, char const* end, char delimiter, boost::int64_t& val) { while (start < end && *start != delimiter) { @@ -55,14 +55,14 @@ namespace libtorrent return start; } - char* find_char(char* start, char* end, char delimiter) + char const* find_char(char const* start, char const* end, char delimiter) { while (start < end && *start != delimiter) ++start; return start; } // return 0 = success - int lazy_bdecode(char* start, char* end, lazy_entry& ret, int depth_limit) + int lazy_bdecode(char const* start, char const* end, lazy_entry& ret, int depth_limit) { ret.clear(); if (start == end) return 0; @@ -76,10 +76,10 @@ namespace libtorrent lazy_entry* top = stack.back(); - if (stack.size() > depth_limit) return fail_bdecode(); + if (int(stack.size()) > depth_limit) return fail_bdecode(); if (start == end) return fail_bdecode(); char t = *start; - *start++ = 0; // null terminate any previous string + ++start; if (start == end && t != 'e') return fail_bdecode(); switch (top->type()) @@ -88,6 +88,7 @@ namespace libtorrent { if (t == 'e') { + top->set_end(start); stack.pop_back(); continue; } @@ -99,13 +100,14 @@ namespace libtorrent start += len; stack.push_back(ent); t = *start; - *start++ = 0; // null terminate any previous string + ++start; break; } case lazy_entry::list_t: { if (t == 'e') { + top->set_end(start); stack.pop_back(); continue; } @@ -120,19 +122,22 @@ namespace libtorrent switch (t) { case 'd': - top->construct_dict(); + top->construct_dict(start - 1); continue; case 'l': - top->construct_list(); + top->construct_list(start - 1); continue; case 'i': - top->construct_int(start); + { + char const* int_start = start; start = find_char(start, end, 'e'); + top->construct_int(int_start, start - int_start); if (start == end) return fail_bdecode(); TORRENT_ASSERT(*start == 'e'); - *start++ = 0; + ++start; stack.pop_back(); continue; + } default: { using namespace std; @@ -159,45 +164,90 @@ namespace libtorrent boost::int64_t val = 0; bool negative = false; if (*m_data.start == '-') negative = true; - parse_int(negative?m_data.start+1:m_data.start, m_data.start + 100, 0, val); + parse_int(negative?m_data.start+1:m_data.start, m_data.start + m_size, 'e', val); if (negative) val = -val; return val; } - lazy_entry* lazy_entry::dict_append(char* name) + lazy_entry* lazy_entry::dict_append(char const* name) { TORRENT_ASSERT(m_type == dict_t); TORRENT_ASSERT(m_size <= m_capacity); if (m_capacity == 0) { int capacity = 10; - m_data.dict = new (std::nothrow) std::pair[capacity]; + m_data.dict = new (std::nothrow) std::pair[capacity]; if (m_data.dict == 0) return 0; m_capacity = capacity; } else if (m_size == m_capacity) { int capacity = m_capacity * 2; - std::pair* tmp = new (std::nothrow) std::pair[capacity]; + std::pair* tmp = new (std::nothrow) std::pair[capacity]; if (tmp == 0) return 0; - std::memcpy(tmp, m_data.dict, sizeof(std::pair) * m_size); + std::memcpy(tmp, m_data.dict, sizeof(std::pair) * m_size); delete[] m_data.dict; m_data.dict = tmp; m_capacity = capacity; } TORRENT_ASSERT(m_size < m_capacity); - std::pair& ret = m_data.dict[m_size++]; + std::pair& ret = m_data.dict[m_size++]; ret.first = name; return &ret.second; } + namespace + { + // the number of decimal digits needed + // to represent the given value + int num_digits(int val) + { + int ret = 1; + while (val > 10) + { + ++ret; + val /= 10; + } + return ret; + } + } + + void lazy_entry::construct_string(char const* start, int length) + { + TORRENT_ASSERT(m_type == none_t); + m_type = string_t; + m_data.start = start; + m_size = length; + m_begin = start - 1 - num_digits(length); + m_end = start + length; + } + + namespace + { + // str1 is null-terminated + // str2 is not, str2 is len2 chars + bool string_equal(char const* str1, char const* str2, int len2) + { + while (len2 > 0) + { + if (*str1 != *str2) return false; + if (*str1 == 0) return false; + ++str1; + ++str2; + --len2; + } + return true; + } + } + lazy_entry* lazy_entry::dict_find(char const* name) { TORRENT_ASSERT(m_type == dict_t); for (int i = 0; i < m_size; ++i) { - if (strcmp(name, m_data.dict[i].first) == 0) + std::pair const& e = m_data.dict[i]; + if (string_equal(name, e.first, e.second.m_begin - e.first)) return &m_data.dict[i].second; } return 0; @@ -242,6 +292,12 @@ namespace libtorrent m_type = none_t; } + std::pair lazy_entry::data_section() + { + typedef std::pair return_t; + return return_t(m_begin, m_end - m_begin); + } + std::ostream& operator<<(std::ostream& os, lazy_entry const& e) { switch (e.type()) @@ -251,7 +307,7 @@ namespace libtorrent case lazy_entry::string_t: { bool printable = true; - char const* str = e.string_value(); + char const* str = e.string_ptr(); for (int i = 0; i < e.string_length(); ++i) { using namespace std; @@ -259,7 +315,7 @@ namespace libtorrent printable = false; break; } - if (printable) return os << str; + if (printable) return os << e.string_value(); for (int i = 0; i < e.string_length(); ++i) os << std::hex << int((unsigned char)(str[i])); return os; diff --git a/test/test_bencoding.cpp b/test/test_bencoding.cpp index b15ec92d7..ec4e45771 100644 --- a/test/test_bencoding.cpp +++ b/test/test_bencoding.cpp @@ -77,6 +77,9 @@ int test_main() int ret = lazy_bdecode(b, b + sizeof(b)-1, e); TORRENT_ASSERT(ret == 0); std::cout << e << std::endl; + std::pair section = e.data_section(); + TORRENT_ASSERT(memcmp(b, section.first, section.second) == 0); + TORRENT_ASSERT(section.second == sizeof(b) - 1); TORRENT_ASSERT(e.type() == lazy_entry::int_t); TORRENT_ASSERT(e.int_value() == 12453); } @@ -87,6 +90,9 @@ int test_main() int ret = lazy_bdecode(b, b + sizeof(b)-1, e); TORRENT_ASSERT(ret == 0); std::cout << e << std::endl; + std::pair section = e.data_section(); + TORRENT_ASSERT(memcmp(b, section.first, section.second) == 0); + TORRENT_ASSERT(section.second == sizeof(b) - 1); TORRENT_ASSERT(e.type() == lazy_entry::string_t); TORRENT_ASSERT(e.string_value() == std::string("abcdefghijklmnopqrstuvwxyz")); TORRENT_ASSERT(e.string_length() == 26); @@ -98,6 +104,9 @@ int test_main() int ret = lazy_bdecode(b, b + sizeof(b)-1, e); TORRENT_ASSERT(ret == 0); std::cout << e << std::endl; + std::pair section = e.data_section(); + TORRENT_ASSERT(memcmp(b, section.first, section.second) == 0); + TORRENT_ASSERT(section.second == sizeof(b) - 1); TORRENT_ASSERT(e.type() == lazy_entry::list_t); TORRENT_ASSERT(e.list_size() == 2); TORRENT_ASSERT(e.list_at(0)->type() == lazy_entry::int_t); @@ -105,6 +114,9 @@ int test_main() TORRENT_ASSERT(e.list_at(0)->int_value() == 12453); TORRENT_ASSERT(e.list_at(1)->string_value() == std::string("aaa")); TORRENT_ASSERT(e.list_at(1)->string_length() == 3); + section = e.list_at(1)->data_section(); + TORRENT_ASSERT(memcmp("3:aaa", section.first, section.second) == 0); + TORRENT_ASSERT(section.second == 5); } { @@ -113,6 +125,9 @@ int test_main() int ret = lazy_bdecode(b, b + sizeof(b)-1, e); TORRENT_ASSERT(ret == 0); std::cout << e << std::endl; + std::pair section = e.data_section(); + TORRENT_ASSERT(memcmp(b, section.first, section.second) == 0); + TORRENT_ASSERT(section.second == sizeof(b) - 1); TORRENT_ASSERT(e.type() == lazy_entry::dict_t); TORRENT_ASSERT(e.dict_size() == 3); TORRENT_ASSERT(e.dict_find("a")->type() == lazy_entry::int_t);