1085 lines
28 KiB
C++
1085 lines
28 KiB
C++
/*
|
|
|
|
Copyright (c) 2015, Arvid Norberg
|
|
All rights reserved.
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions
|
|
are met:
|
|
|
|
* Redistributions of source code must retain the above copyright
|
|
notice, this list of conditions and the following disclaimer.
|
|
* Redistributions in binary form must reproduce the above copyright
|
|
notice, this list of conditions and the following disclaimer in
|
|
the documentation and/or other materials provided with the distribution.
|
|
* Neither the name of the author nor the names of its
|
|
contributors may be used to endorse or promote products derived
|
|
from this software without specific prior written permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
#include "libtorrent/bdecode.hpp"
|
|
#include "libtorrent/alloca.hpp"
|
|
#include <boost/system/error_code.hpp>
|
|
#include <limits>
|
|
#include <cstring> // for memset
|
|
|
|
#ifndef BOOST_SYSTEM_NOEXCEPT
|
|
#define BOOST_SYSTEM_NOEXCEPT throw()
|
|
#endif
|
|
|
|
namespace libtorrent
|
|
{
|
|
using detail::bdecode_token;
|
|
|
|
namespace
|
|
{
|
|
bool numeric(char c) { return c >= '0' && c <= '9'; }
|
|
|
|
// finds the end of an integer and verifies that it looks valid this does
|
|
// not detect all overflows, just the ones that are an order of magnitued
|
|
// beyond. Exact overflow checking is done when the integer value is queried
|
|
// from a bdecode_node.
|
|
char const* check_integer(char const* start, char const* end
|
|
, bdecode_errors::error_code_enum& e)
|
|
{
|
|
if (start == end)
|
|
{
|
|
e = bdecode_errors::unexpected_eof;
|
|
return start;
|
|
}
|
|
|
|
if (*start == '-')
|
|
{
|
|
++start;
|
|
if (start == end)
|
|
{
|
|
e = bdecode_errors::unexpected_eof;
|
|
return start;
|
|
}
|
|
}
|
|
|
|
int digits = 0;
|
|
do
|
|
{
|
|
if (!numeric(*start))
|
|
{
|
|
e = bdecode_errors::expected_digit;
|
|
break;
|
|
}
|
|
++start;
|
|
++digits;
|
|
|
|
if (start == end)
|
|
{
|
|
e = bdecode_errors::unexpected_eof;
|
|
break;
|
|
}
|
|
}
|
|
while (*start != 'e');
|
|
|
|
if (digits > 20)
|
|
{
|
|
e = bdecode_errors::overflow;
|
|
}
|
|
|
|
return start;
|
|
}
|
|
|
|
struct stack_frame
|
|
{
|
|
stack_frame(int t): token(t), state(0) {}
|
|
// this is an index into m_tokens
|
|
boost::uint32_t token:31;
|
|
// this is used for doctionaries to indicate whether we're
|
|
// reading a key or a vale. 0 means key 1 is value
|
|
boost::uint32_t state:1;
|
|
};
|
|
|
|
// str1 is null-terminated
|
|
// str2 is not, str2 is len2 chars
|
|
bool string_equal(char const* str1, char const* str2, int len2)
|
|
{
|
|
while (len2 > 0)
|
|
{
|
|
if (*str1 != *str2) return false;
|
|
if (*str1 == 0) return false;
|
|
++str1;
|
|
++str2;
|
|
--len2;
|
|
}
|
|
return *str1 == 0;
|
|
}
|
|
|
|
} // anonymous namespace
|
|
|
|
|
|
// fills in 'val' with what the string between start and the
|
|
// first occurance of the delimiter is interpreted as an int.
|
|
// return the pointer to the delimiter, or 0 if there is a
|
|
// parse error. val should be initialized to zero
|
|
char const* parse_int(char const* start, char const* end, char delimiter
|
|
, boost::int64_t& val, bdecode_errors::error_code_enum& ec)
|
|
{
|
|
while (start < end && *start != delimiter)
|
|
{
|
|
if (!numeric(*start))
|
|
{
|
|
ec = bdecode_errors::expected_digit;
|
|
return start;
|
|
}
|
|
if (val > (std::numeric_limits<boost::int64_t>::max)() / 10)
|
|
{
|
|
ec = bdecode_errors::overflow;
|
|
return start;
|
|
}
|
|
val *= 10;
|
|
int digit = *start - '0';
|
|
if (val > (std::numeric_limits<boost::int64_t>::max)() - digit)
|
|
{
|
|
ec = bdecode_errors::overflow;
|
|
return start;
|
|
}
|
|
val += digit;
|
|
++start;
|
|
}
|
|
if (*start != delimiter)
|
|
ec = bdecode_errors::expected_colon;
|
|
return start;
|
|
}
|
|
|
|
|
|
struct bdecode_error_category : boost::system::error_category
|
|
{
|
|
virtual const char* name() const BOOST_SYSTEM_NOEXCEPT;
|
|
virtual std::string message(int ev) const BOOST_SYSTEM_NOEXCEPT;
|
|
virtual boost::system::error_condition default_error_condition(
|
|
int ev) const BOOST_SYSTEM_NOEXCEPT
|
|
{ return boost::system::error_condition(ev, *this); }
|
|
};
|
|
|
|
const char* bdecode_error_category::name() const BOOST_SYSTEM_NOEXCEPT
|
|
{
|
|
return "bdecode error";
|
|
}
|
|
|
|
std::string bdecode_error_category::message(int ev) const BOOST_SYSTEM_NOEXCEPT
|
|
{
|
|
static char const* msgs[] =
|
|
{
|
|
"no error",
|
|
"expected digit in bencoded string",
|
|
"expected colon in bencoded string",
|
|
"unexpected end of file in bencoded string",
|
|
"expected value (list, dict, int or string) in bencoded string",
|
|
"bencoded nesting depth exceeded",
|
|
"bencoded item count limit exceeded",
|
|
"integer overflow",
|
|
};
|
|
if (ev < 0 || ev >= int(sizeof(msgs)/sizeof(msgs[0])))
|
|
return "Unknown error";
|
|
return msgs[ev];
|
|
}
|
|
|
|
boost::system::error_category& get_bdecode_category()
|
|
{
|
|
static bdecode_error_category bdecode_category;
|
|
return bdecode_category;
|
|
}
|
|
|
|
namespace bdecode_errors
|
|
{
|
|
boost::system::error_code make_error_code(error_code_enum e)
|
|
{
|
|
return boost::system::error_code(e, get_bdecode_category());
|
|
}
|
|
}
|
|
|
|
|
|
bdecode_node::bdecode_node()
|
|
: m_root_tokens(0)
|
|
, m_buffer(NULL)
|
|
, m_buffer_size(0)
|
|
, m_token_idx(-1)
|
|
, m_last_index(-1)
|
|
, m_last_token(-1)
|
|
, m_size(-1)
|
|
{}
|
|
|
|
bdecode_node::bdecode_node(bdecode_node const& n)
|
|
: m_tokens(n.m_tokens)
|
|
, m_root_tokens(n.m_root_tokens)
|
|
, m_buffer(n.m_buffer)
|
|
, m_buffer_size(n.m_buffer_size)
|
|
, m_token_idx(n.m_token_idx)
|
|
, m_last_index(n.m_last_index)
|
|
, m_last_token(n.m_last_token)
|
|
, m_size(n.m_size)
|
|
{
|
|
(*this) = n;
|
|
}
|
|
|
|
bdecode_node& bdecode_node::operator=(bdecode_node const& n)
|
|
{
|
|
m_tokens = n.m_tokens;
|
|
m_root_tokens = n.m_root_tokens;
|
|
m_buffer = n.m_buffer;
|
|
m_buffer_size = n.m_buffer_size;
|
|
m_token_idx = n.m_token_idx;
|
|
m_last_index = n.m_last_index;
|
|
m_last_token = n.m_last_token;
|
|
m_size = n.m_size;
|
|
if (!m_tokens.empty())
|
|
{
|
|
// if this is a root, make the token pointer
|
|
// point to our storage
|
|
m_root_tokens = &m_tokens[0];
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
bdecode_node::bdecode_node(bdecode_token const* tokens, char const* buf
|
|
, int len, int idx)
|
|
: m_root_tokens(tokens)
|
|
, m_buffer(buf)
|
|
, m_buffer_size(len)
|
|
, m_token_idx(idx)
|
|
, m_last_index(-1)
|
|
, m_last_token(-1)
|
|
, m_size(-1)
|
|
{
|
|
TORRENT_ASSERT(tokens != NULL);
|
|
TORRENT_ASSERT(idx >= 0);
|
|
}
|
|
|
|
bdecode_node bdecode_node::non_owning() const
|
|
{
|
|
// if we're not a root, just return a copy of ourself
|
|
if (m_tokens.empty()) return *this;
|
|
|
|
// otherwise, return a reference to this node, but without
|
|
// being an owning root node
|
|
return bdecode_node(&m_tokens[0], m_buffer, m_buffer_size, m_token_idx);
|
|
}
|
|
|
|
void bdecode_node::clear()
|
|
{
|
|
m_tokens.clear();
|
|
m_root_tokens = NULL;
|
|
m_token_idx = -1;
|
|
m_size = -1;
|
|
m_last_index = -1;
|
|
m_last_token = -1;
|
|
}
|
|
|
|
void bdecode_node::switch_underlying_buffer(char const* buf)
|
|
{
|
|
TORRENT_ASSERT(!m_tokens.empty());
|
|
if (m_tokens.empty()) return;
|
|
|
|
m_buffer = buf;
|
|
}
|
|
|
|
bdecode_node::type_t bdecode_node::type() const
|
|
{
|
|
if (m_token_idx == -1) return none_t;
|
|
return (bdecode_node::type_t)m_root_tokens[m_token_idx].type;
|
|
}
|
|
|
|
bdecode_node::operator bool() const
|
|
{ return m_token_idx != -1; }
|
|
|
|
std::pair<char const*, int> bdecode_node::data_section() const
|
|
{
|
|
if (m_token_idx == -1) return std::make_pair(m_buffer, 0);
|
|
|
|
TORRENT_ASSERT(m_token_idx != -1);
|
|
bdecode_token const& t = m_root_tokens[m_token_idx];
|
|
bdecode_token const& next = m_root_tokens[m_token_idx + t.next_item];
|
|
return std::make_pair(m_buffer + t.offset, next.offset - t.offset);
|
|
}
|
|
|
|
bdecode_node bdecode_node::list_at(int i) const
|
|
{
|
|
TORRENT_ASSERT(type() == list_t);
|
|
TORRENT_ASSERT(i >= 0);
|
|
|
|
// make sure this is a list.
|
|
bdecode_token const* tokens = m_root_tokens;
|
|
|
|
// this is the first item
|
|
int token = m_token_idx + 1;
|
|
int item = 0;
|
|
|
|
// do we have a lookup cached?
|
|
if (m_last_index <= i && m_last_index != -1)
|
|
{
|
|
token = m_last_token;
|
|
item = m_last_index;
|
|
}
|
|
|
|
while (item < i)
|
|
{
|
|
token += tokens[token].next_item;
|
|
++item;
|
|
|
|
// index 'i' out of range
|
|
TORRENT_ASSERT(tokens[token].type != bdecode_token::end);
|
|
}
|
|
|
|
m_last_token = token;
|
|
m_last_index = i;
|
|
|
|
return bdecode_node(tokens, m_buffer, m_buffer_size, token);
|
|
}
|
|
|
|
std::string bdecode_node::list_string_value_at(int i
|
|
, char const* default_val)
|
|
{
|
|
bdecode_node n = list_at(i);
|
|
if (n.type() != bdecode_node::string_t) return default_val;
|
|
return n.string_value();
|
|
}
|
|
|
|
boost::int64_t bdecode_node::list_int_value_at(int i
|
|
, boost::int64_t default_val)
|
|
{
|
|
bdecode_node n = list_at(i);
|
|
if (n.type() != bdecode_node::int_t) return default_val;
|
|
return n.int_value();
|
|
}
|
|
|
|
int bdecode_node::list_size() const
|
|
{
|
|
TORRENT_ASSERT(type() == list_t);
|
|
|
|
if (m_size != -1) return m_size;
|
|
|
|
// make sure this is a list.
|
|
bdecode_token const* tokens = m_root_tokens;
|
|
TORRENT_ASSERT(tokens[m_token_idx].type == bdecode_token::list);
|
|
|
|
// this is the first item
|
|
int token = m_token_idx + 1;
|
|
int ret = 0;
|
|
|
|
// do we have a lookup cached?
|
|
if (m_last_index != -1)
|
|
{
|
|
token = m_last_token;
|
|
ret = m_last_index;
|
|
}
|
|
while (tokens[token].type != bdecode_token::end)
|
|
{
|
|
token += tokens[token].next_item;
|
|
++ret;
|
|
}
|
|
|
|
m_size = ret;
|
|
|
|
return ret;
|
|
}
|
|
|
|
std::pair<std::string, bdecode_node> bdecode_node::dict_at(int i) const
|
|
{
|
|
TORRENT_ASSERT(type() == dict_t);
|
|
TORRENT_ASSERT(m_token_idx != -1);
|
|
|
|
bdecode_token const* tokens = m_root_tokens;
|
|
TORRENT_ASSERT(tokens[m_token_idx].type == bdecode_token::dict);
|
|
|
|
int token = m_token_idx + 1;
|
|
int item = 0;
|
|
|
|
// do we have a lookup cached?
|
|
if (m_last_index <= i && m_last_index != -1)
|
|
{
|
|
token = m_last_token;
|
|
item = m_last_index;
|
|
}
|
|
|
|
while (item < i)
|
|
{
|
|
TORRENT_ASSERT(tokens[token].type == bdecode_token::string);
|
|
|
|
// skip the key
|
|
token += tokens[token].next_item;
|
|
TORRENT_ASSERT(tokens[token].type != bdecode_token::end);
|
|
|
|
// skip the value
|
|
token += tokens[token].next_item;
|
|
|
|
++item;
|
|
|
|
// index 'i' out of range
|
|
TORRENT_ASSERT(tokens[token].type != bdecode_token::end);
|
|
}
|
|
|
|
// there's no point in caching the first item
|
|
if (i > 0)
|
|
{
|
|
m_last_token = token;
|
|
m_last_index = i;
|
|
}
|
|
|
|
int value_token = token + tokens[token].next_item;
|
|
TORRENT_ASSERT(tokens[token].type != bdecode_token::end);
|
|
|
|
return std::make_pair(
|
|
bdecode_node(tokens, m_buffer, m_buffer_size, token).string_value()
|
|
, bdecode_node(tokens, m_buffer, m_buffer_size, value_token));
|
|
}
|
|
|
|
int bdecode_node::dict_size() const
|
|
{
|
|
TORRENT_ASSERT(type() == dict_t);
|
|
TORRENT_ASSERT(m_token_idx != -1);
|
|
|
|
if (m_size != -1) return m_size;
|
|
|
|
bdecode_token const* tokens = m_root_tokens;
|
|
TORRENT_ASSERT(tokens[m_token_idx].type == bdecode_token::dict);
|
|
|
|
// this is the first item
|
|
int token = m_token_idx + 1;
|
|
int ret = 0;
|
|
|
|
if (m_last_index != -1)
|
|
{
|
|
ret = m_last_index * 2;
|
|
token = m_last_token;
|
|
}
|
|
|
|
while (tokens[token].type != bdecode_token::end)
|
|
{
|
|
token += tokens[token].next_item;
|
|
++ret;
|
|
}
|
|
|
|
// a dictionary must contain full key-value pairs. which means
|
|
// the number of entries is divisible by 2
|
|
TORRENT_ASSERT((ret % 2) == 0);
|
|
|
|
// each item is one key and one value, so divide by 2
|
|
ret /= 2;
|
|
|
|
m_size = ret;
|
|
|
|
return ret;
|
|
}
|
|
|
|
bdecode_node bdecode_node::dict_find(std::string key) const
|
|
{
|
|
TORRENT_ASSERT(type() == dict_t);
|
|
|
|
bdecode_token const* tokens = m_root_tokens;
|
|
|
|
// this is the first item
|
|
int token = m_token_idx + 1;
|
|
|
|
while (tokens[token].type != bdecode_token::end)
|
|
{
|
|
bdecode_token const& t = tokens[token];
|
|
TORRENT_ASSERT(t.type == bdecode_token::string);
|
|
int size = m_root_tokens[token + 1].offset - t.offset - t.start_offset();
|
|
if (int(key.size()) == size
|
|
&& std::equal(key.c_str(), key.c_str() + size, m_buffer
|
|
+ t.offset + t.start_offset()))
|
|
{
|
|
// skip key
|
|
token += t.next_item;
|
|
TORRENT_ASSERT(tokens[token].type != bdecode_token::end);
|
|
|
|
return bdecode_node(tokens, m_buffer, m_buffer_size, token);
|
|
}
|
|
|
|
// skip key
|
|
token += t.next_item;
|
|
TORRENT_ASSERT(tokens[token].type != bdecode_token::end);
|
|
|
|
// skip value
|
|
token += tokens[token].next_item;
|
|
}
|
|
|
|
return bdecode_node();
|
|
}
|
|
|
|
bdecode_node bdecode_node::dict_find_list(char const* key) const
|
|
{
|
|
bdecode_node ret = dict_find(key);
|
|
if (ret.type() == bdecode_node::list_t)
|
|
return ret;
|
|
return bdecode_node();
|
|
}
|
|
|
|
bdecode_node bdecode_node::dict_find_dict(std::string key) const
|
|
{
|
|
bdecode_node ret = dict_find(key);
|
|
if (ret.type() == bdecode_node::dict_t)
|
|
return ret;
|
|
return bdecode_node();
|
|
}
|
|
|
|
bdecode_node bdecode_node::dict_find_dict(char const* key) const
|
|
{
|
|
bdecode_node ret = dict_find(key);
|
|
if (ret.type() == bdecode_node::dict_t)
|
|
return ret;
|
|
return bdecode_node();
|
|
}
|
|
|
|
bdecode_node bdecode_node::dict_find_string(char const* key) const
|
|
{
|
|
bdecode_node ret = dict_find(key);
|
|
if (ret.type() == bdecode_node::string_t)
|
|
return ret;
|
|
return bdecode_node();
|
|
}
|
|
|
|
bdecode_node bdecode_node::dict_find_int(char const* key) const
|
|
{
|
|
bdecode_node ret = dict_find(key);
|
|
if (ret.type() == bdecode_node::int_t)
|
|
return ret;
|
|
return bdecode_node();
|
|
}
|
|
|
|
|
|
bdecode_node bdecode_node::dict_find(char const* key) const
|
|
{
|
|
TORRENT_ASSERT(type() == dict_t);
|
|
|
|
bdecode_token const* tokens = m_root_tokens;
|
|
|
|
// this is the first item
|
|
int token = m_token_idx + 1;
|
|
|
|
while (tokens[token].type != bdecode_token::end)
|
|
{
|
|
bdecode_token const& t = tokens[token];
|
|
TORRENT_ASSERT(t.type == bdecode_token::string);
|
|
int size = m_root_tokens[token + 1].offset - t.offset - t.start_offset();
|
|
if (string_equal(key, m_buffer + t.offset + t.start_offset(), size))
|
|
{
|
|
// skip key
|
|
token += t.next_item;
|
|
TORRENT_ASSERT(tokens[token].type != bdecode_token::end);
|
|
|
|
return bdecode_node(tokens, m_buffer, m_buffer_size, token);
|
|
}
|
|
|
|
// skip key
|
|
token += t.next_item;
|
|
TORRENT_ASSERT(tokens[token].type != bdecode_token::end);
|
|
|
|
// skip value
|
|
token += tokens[token].next_item;
|
|
}
|
|
|
|
return bdecode_node();
|
|
}
|
|
|
|
std::string bdecode_node::dict_find_string_value(char const* key
|
|
, char const* default_value) const
|
|
{
|
|
bdecode_node n = dict_find(key);
|
|
if (n.type() != bdecode_node::string_t) return default_value;
|
|
return n.string_value();
|
|
}
|
|
|
|
boost::int64_t bdecode_node::dict_find_int_value(char const* key
|
|
, boost::int64_t default_val) const
|
|
{
|
|
bdecode_node n = dict_find(key);
|
|
if (n.type() != bdecode_node::int_t) return default_val;
|
|
return n.int_value();
|
|
}
|
|
|
|
boost::int64_t bdecode_node::int_value() const
|
|
{
|
|
TORRENT_ASSERT(type() == int_t);
|
|
bdecode_token const& t = m_root_tokens[m_token_idx];
|
|
int size = m_root_tokens[m_token_idx + 1].offset - t.offset;
|
|
TORRENT_ASSERT(t.type == bdecode_token::integer);
|
|
|
|
// +1 is to skip the 'i'
|
|
char const* ptr = m_buffer + t.offset + 1;
|
|
boost::int64_t val = 0;
|
|
bool negative = false;
|
|
if (*ptr == '-') negative = true;
|
|
bdecode_errors::error_code_enum ec = bdecode_errors::no_error;
|
|
parse_int(ptr + negative
|
|
, ptr + size, 'e', val, ec);
|
|
if (ec) return 0;
|
|
if (negative) val = -val;
|
|
return val;
|
|
}
|
|
|
|
std::string bdecode_node::string_value() const
|
|
{
|
|
TORRENT_ASSERT(type() == string_t);
|
|
bdecode_token const& t = m_root_tokens[m_token_idx];
|
|
int size = m_root_tokens[m_token_idx + 1].offset - t.offset - t.start_offset();
|
|
TORRENT_ASSERT(t.type == bdecode_token::string);
|
|
|
|
return std::string(m_buffer + t.offset + t.start_offset(), size);
|
|
}
|
|
|
|
char const* bdecode_node::string_ptr() const
|
|
{
|
|
TORRENT_ASSERT(type() == string_t);
|
|
bdecode_token const& t = m_root_tokens[m_token_idx];
|
|
TORRENT_ASSERT(t.type == bdecode_token::string);
|
|
return m_buffer + t.offset + t.start_offset();
|
|
}
|
|
|
|
int bdecode_node::string_length() const
|
|
{
|
|
TORRENT_ASSERT(type() == string_t);
|
|
bdecode_token const& t = m_root_tokens[m_token_idx];
|
|
TORRENT_ASSERT(t.type == bdecode_token::string);
|
|
return m_root_tokens[m_token_idx + 1].offset - t.offset - t.start_offset();
|
|
}
|
|
|
|
void bdecode_node::reserve(int tokens)
|
|
{ m_tokens.reserve(tokens); }
|
|
|
|
void bdecode_node::swap(bdecode_node& n)
|
|
{
|
|
/*
|
|
bool lhs_is_root = (m_root_tokens == &m_tokens);
|
|
bool rhs_is_root = (n.m_root_tokens == &n.m_tokens);
|
|
|
|
// swap is only defined between non-root nodes
|
|
// and between root-nodes. They may not be mixed!
|
|
// note that when swapping root nodes, all bdecode_node
|
|
// entries that exist in those subtrees are invalidated!
|
|
TORRENT_ASSERT(lhs_is_root == rhs_is_root);
|
|
|
|
// if both are roots, m_root_tokens always point to
|
|
// its own vector, and should not get swapped (the
|
|
// underlying vectors are swapped already)
|
|
if (!lhs_is_root && !rhs_is_root)
|
|
{
|
|
// if neither is a root, we just swap the pointers
|
|
// to the token vectors, switching their roots
|
|
std::swap(m_root_tokens, n.m_root_tokens);
|
|
}
|
|
*/
|
|
m_tokens.swap(n.m_tokens);
|
|
std::swap(m_root_tokens, n.m_root_tokens);
|
|
std::swap(m_buffer, n.m_buffer);
|
|
std::swap(m_buffer_size, n.m_buffer_size);
|
|
std::swap(m_token_idx, n.m_token_idx);
|
|
std::swap(m_last_index, n.m_last_index);
|
|
std::swap(m_last_token, n.m_last_token);
|
|
std::swap(m_size, n.m_size);
|
|
}
|
|
|
|
#define TORRENT_FAIL_BDECODE(code) do { \
|
|
ec = make_error_code(code); \
|
|
if (error_pos) *error_pos = start - orig_start; \
|
|
goto done; \
|
|
} TORRENT_WHILE_0
|
|
|
|
int bdecode(char const* start, char const* end, bdecode_node& ret
|
|
, error_code& ec, int* error_pos, int depth_limit, int token_limit)
|
|
{
|
|
ec.clear();
|
|
ret.clear();
|
|
|
|
if (end - start > bdecode_token::max_offset)
|
|
{
|
|
if (error_pos) *error_pos = 0;
|
|
ec = make_error_code(bdecode_errors::limit_exceeded);
|
|
return -1;
|
|
}
|
|
|
|
// this is the stack of bdecode_token indices, into m_tokens.
|
|
// sp is the stack pointer, as index into the array, stack
|
|
int sp = 0;
|
|
stack_frame* stack = TORRENT_ALLOCA(stack_frame, depth_limit);
|
|
|
|
char const* const orig_start = start;
|
|
if (start == end) return 0;
|
|
|
|
while (start <= end)
|
|
{
|
|
if (start >= end) TORRENT_FAIL_BDECODE(bdecode_errors::unexpected_eof);
|
|
|
|
if (sp >= depth_limit)
|
|
TORRENT_FAIL_BDECODE(bdecode_errors::depth_exceeded);
|
|
|
|
--token_limit;
|
|
if (token_limit < 0)
|
|
TORRENT_FAIL_BDECODE(bdecode_errors::limit_exceeded);
|
|
|
|
// look for a new token
|
|
const char t = *start;
|
|
|
|
const int current_frame = sp;
|
|
|
|
// if we're currently parsing a dictionary, assert that
|
|
// every other node is a string.
|
|
if (current_frame > 0
|
|
&& ret.m_tokens[stack[current_frame-1].token].type == bdecode_token::dict)
|
|
{
|
|
if (stack[current_frame-1].state == 0)
|
|
{
|
|
// the current parent is a dict and we are parsing a key.
|
|
// only allow a digit (for a string) or 'e' to terminate
|
|
if (!numeric(t) && t != 'e')
|
|
TORRENT_FAIL_BDECODE(bdecode_errors::expected_digit);
|
|
}
|
|
}
|
|
|
|
switch (t)
|
|
{
|
|
case 'd':
|
|
stack[sp++] = ret.m_tokens.size();
|
|
// we push it into the stack so that we know where to fill
|
|
// in the next_node field once we pop this node off the stack.
|
|
// i.e. get to the node following the dictionary in the buffer
|
|
ret.m_tokens.push_back(bdecode_token(start - orig_start
|
|
, bdecode_token::dict));
|
|
++start;
|
|
break;
|
|
case 'l':
|
|
stack[sp++] = ret.m_tokens.size();
|
|
// we push it into the stack so that we know where to fill
|
|
// in the next_node field once we pop this node off the stack.
|
|
// i.e. get to the node following the list in the buffer
|
|
ret.m_tokens.push_back(bdecode_token(start - orig_start
|
|
, bdecode_token::list));
|
|
++start;
|
|
break;
|
|
case 'i':
|
|
{
|
|
char const* int_start = start;
|
|
bdecode_errors::error_code_enum e = bdecode_errors::no_error;
|
|
// +1 here to point to the first digit, rather than 'i'
|
|
start = check_integer(start + 1, end, e);
|
|
if (e)
|
|
{
|
|
// in order to gracefully terminate the tree,
|
|
// make sure the end of the previous token is set correctly
|
|
if (error_pos) *error_pos = start - orig_start;
|
|
error_pos = NULL;
|
|
start = int_start;
|
|
TORRENT_FAIL_BDECODE(e);
|
|
}
|
|
ret.m_tokens.push_back(bdecode_token(int_start - orig_start
|
|
, 1, bdecode_token::integer, 1));
|
|
TORRENT_ASSERT(*start == 'e');
|
|
|
|
// skip 'e'
|
|
++start;
|
|
break;
|
|
}
|
|
case 'e':
|
|
{
|
|
// this is the end of a list or dict
|
|
if (sp == 0)
|
|
TORRENT_FAIL_BDECODE(bdecode_errors::unexpected_eof);
|
|
|
|
if (sp > 0
|
|
&& ret.m_tokens[stack[sp-1].token].type == bdecode_token::dict
|
|
&& stack[sp-1].state == 1)
|
|
{
|
|
// this means we're parsing a dictionary and about to parse a
|
|
// value associated with a key. Instad, we got a termination
|
|
TORRENT_FAIL_BDECODE(bdecode_errors::expected_value);
|
|
}
|
|
|
|
// insert the end-of-sequence token
|
|
ret.m_tokens.push_back(bdecode_token(start - orig_start, 1
|
|
, bdecode_token::end));
|
|
|
|
// and back-patch the start of this sequence with the offset
|
|
// to the next token we'll insert
|
|
int top = stack[sp-1].token;
|
|
// subtract the token's own index, since this is a relative
|
|
// offset
|
|
if (ret.m_tokens.size() - top > bdecode_token::max_next_item)
|
|
TORRENT_FAIL_BDECODE(bdecode_errors::limit_exceeded);
|
|
|
|
ret.m_tokens[top].next_item = ret.m_tokens.size() - top;
|
|
|
|
// and pop it from the stack.
|
|
--sp;
|
|
++start;
|
|
break;
|
|
}
|
|
default:
|
|
{
|
|
// this is the case for strings. The start character is any
|
|
// numeric digit
|
|
if (!numeric(t))
|
|
TORRENT_FAIL_BDECODE(bdecode_errors::expected_value);
|
|
|
|
boost::int64_t len = t - '0';
|
|
char const* str_start = start;
|
|
++start;
|
|
bdecode_errors::error_code_enum e = bdecode_errors::no_error;
|
|
start = parse_int(start, end, ':', len, e);
|
|
if (e)
|
|
TORRENT_FAIL_BDECODE(e);
|
|
|
|
// remaining buffer size excluding ':'
|
|
const ptrdiff_t buff_size = end - start - 1;
|
|
if (len > buff_size)
|
|
TORRENT_FAIL_BDECODE(bdecode_errors::unexpected_eof);
|
|
if (len < 0)
|
|
TORRENT_FAIL_BDECODE(bdecode_errors::overflow);
|
|
|
|
// skip ':'
|
|
++start;
|
|
if (start >= end) TORRENT_FAIL_BDECODE(bdecode_errors::unexpected_eof);
|
|
|
|
// the bdecode_token only has 8 bits to keep the header size
|
|
// in. If it overflows, fail!
|
|
if (start - str_start - 2 > detail::bdecode_token::max_header)
|
|
TORRENT_FAIL_BDECODE(bdecode_errors::limit_exceeded);
|
|
|
|
ret.m_tokens.push_back(bdecode_token(str_start - orig_start
|
|
, 1, bdecode_token::string, start - str_start));
|
|
start += len;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (current_frame > 0
|
|
&& ret.m_tokens[stack[current_frame-1].token].type == bdecode_token::dict)
|
|
{
|
|
// the next item we parse is the opposite
|
|
stack[current_frame-1].state = ~stack[current_frame-1].state;
|
|
}
|
|
|
|
// this terminates the top level node, we're done!
|
|
if (sp == 0) break;
|
|
}
|
|
|
|
done:
|
|
|
|
// if parse failed, sp will be greater than 1
|
|
// unwind the stack by inserting terminator to make whatever we have
|
|
// so far valid
|
|
while (sp > 0) {
|
|
TORRENT_ASSERT(ec);
|
|
--sp;
|
|
|
|
// we may need to insert a dummy token to properly terminate the tree,
|
|
// in case we just parsed a key to a dict and failed in the value
|
|
if (ret.m_tokens[stack[sp].token].type == bdecode_token::dict
|
|
&& stack[sp].state == 1)
|
|
{
|
|
// insert an empty dictionary as the value
|
|
ret.m_tokens.push_back(bdecode_token(start - orig_start
|
|
, 2, bdecode_token::dict));
|
|
ret.m_tokens.push_back(bdecode_token(start - orig_start
|
|
, bdecode_token::end));
|
|
}
|
|
|
|
int top = stack[sp].token;
|
|
TORRENT_ASSERT(ret.m_tokens.size() - top <= bdecode_token::max_next_item);
|
|
ret.m_tokens[top].next_item = ret.m_tokens.size() - top;
|
|
ret.m_tokens.push_back(bdecode_token(start - orig_start, 1, bdecode_token::end));
|
|
}
|
|
|
|
ret.m_tokens.push_back(bdecode_token(start - orig_start, 0
|
|
, bdecode_token::end));
|
|
|
|
ret.m_token_idx = 0;
|
|
ret.m_buffer = orig_start;
|
|
ret.m_buffer_size = start - orig_start;
|
|
ret.m_root_tokens = &ret.m_tokens[0];
|
|
|
|
return ec ? -1 : 0;
|
|
}
|
|
|
|
namespace {
|
|
|
|
int line_longer_than(bdecode_node const& e, int limit)
|
|
{
|
|
int line_len = 0;
|
|
switch (e.type())
|
|
{
|
|
case bdecode_node::list_t:
|
|
line_len += 4;
|
|
if (line_len > limit) return -1;
|
|
for (int i = 0; i < e.list_size(); ++i)
|
|
{
|
|
int ret = line_longer_than(e.list_at(i), limit - line_len);
|
|
if (ret == -1) return -1;
|
|
line_len += ret + 2;
|
|
}
|
|
break;
|
|
case bdecode_node::dict_t:
|
|
line_len += 4;
|
|
if (line_len > limit) return -1;
|
|
for (int i = 0; i < e.dict_size(); ++i)
|
|
{
|
|
line_len += 4 + e.dict_at(i).first.size();
|
|
if (line_len > limit) return -1;
|
|
int ret = line_longer_than(e.dict_at(i).second, limit - line_len);
|
|
if (ret == -1) return -1;
|
|
line_len += ret + 1;
|
|
}
|
|
break;
|
|
case bdecode_node::string_t:
|
|
line_len += 3 + e.string_length();
|
|
break;
|
|
case bdecode_node::int_t:
|
|
{
|
|
boost::int64_t val = e.int_value();
|
|
while (val > 0)
|
|
{
|
|
++line_len;
|
|
val /= 10;
|
|
}
|
|
line_len += 2;
|
|
}
|
|
break;
|
|
case bdecode_node::none_t:
|
|
line_len += 4;
|
|
break;
|
|
}
|
|
|
|
if (line_len > limit) return -1;
|
|
return line_len;
|
|
}
|
|
|
|
void escape_string(std::string& ret, char const* str, int len)
|
|
{
|
|
for (int i = 0; i < len; ++i)
|
|
{
|
|
if (str[i] >= 32 && str[i] < 127)
|
|
{
|
|
ret += str[i];
|
|
}
|
|
else
|
|
{
|
|
char tmp[5];
|
|
snprintf(tmp, sizeof(tmp), "\\x%02x", (unsigned char)str[i]);
|
|
ret += tmp;
|
|
}
|
|
}
|
|
}
|
|
|
|
void print_string(std::string& ret, char const* str, int len, bool single_line)
|
|
{
|
|
bool printable = true;
|
|
for (int i = 0; i < len; ++i)
|
|
{
|
|
char c = str[i];
|
|
if (c >= 32 && c < 127) continue;
|
|
printable = false;
|
|
break;
|
|
}
|
|
ret += "'";
|
|
if (printable)
|
|
{
|
|
if (single_line && len > 30)
|
|
{
|
|
ret.append(str, 14);
|
|
ret += "...";
|
|
ret.append(str + len-14, 14);
|
|
}
|
|
else
|
|
ret.append(str, len);
|
|
ret += "'";
|
|
return;
|
|
}
|
|
if (single_line && len > 20)
|
|
{
|
|
escape_string(ret, str, 9);
|
|
ret += "...";
|
|
escape_string(ret, str + len - 9, 9);
|
|
}
|
|
else
|
|
{
|
|
escape_string(ret, str, len);
|
|
}
|
|
ret += "'";
|
|
}
|
|
|
|
}
|
|
|
|
std::string print_entry(bdecode_node const& e
|
|
, bool single_line, int indent)
|
|
{
|
|
char indent_str[200];
|
|
using std::memset;
|
|
memset(indent_str, ' ', 200);
|
|
indent_str[0] = ',';
|
|
indent_str[1] = '\n';
|
|
indent_str[199] = 0;
|
|
if (indent < 197 && indent >= 0) indent_str[indent+2] = 0;
|
|
std::string ret;
|
|
switch (e.type())
|
|
{
|
|
case bdecode_node::none_t: return "none";
|
|
case bdecode_node::int_t:
|
|
{
|
|
char str[100];
|
|
snprintf(str, sizeof(str), "%" PRId64, e.int_value());
|
|
return str;
|
|
}
|
|
case bdecode_node::string_t:
|
|
{
|
|
print_string(ret, e.string_ptr(), e.string_length(), single_line);
|
|
return ret;
|
|
}
|
|
case bdecode_node::list_t:
|
|
{
|
|
ret += '[';
|
|
bool one_liner = line_longer_than(e, 200) != -1 || single_line;
|
|
|
|
if (!one_liner) ret += indent_str + 1;
|
|
for (int i = 0; i < e.list_size(); ++i)
|
|
{
|
|
if (i == 0 && one_liner) ret += " ";
|
|
ret += print_entry(e.list_at(i), single_line, indent + 2);
|
|
if (i < e.list_size() - 1) ret += (one_liner?", ":indent_str);
|
|
else ret += (one_liner?" ":indent_str+1);
|
|
}
|
|
ret += "]";
|
|
return ret;
|
|
}
|
|
case bdecode_node::dict_t:
|
|
{
|
|
ret += "{";
|
|
bool one_liner = line_longer_than(e, 200) != -1 || single_line;
|
|
|
|
if (!one_liner) ret += indent_str+1;
|
|
for (int i = 0; i < e.dict_size(); ++i)
|
|
{
|
|
if (i == 0 && one_liner) ret += " ";
|
|
std::pair<std::string, bdecode_node> ent = e.dict_at(i);
|
|
print_string(ret, ent.first.c_str(), ent.first.size(), true);
|
|
ret += ": ";
|
|
ret += print_entry(ent.second, single_line, indent + 2);
|
|
if (i < e.dict_size() - 1) ret += (one_liner?", ":indent_str);
|
|
else ret += (one_liner?" ":indent_str+1);
|
|
}
|
|
ret += "}";
|
|
return ret;
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
}
|
|
|