premiere-libtorrent/include/libtorrent/lazy_entry.hpp

429 lines
14 KiB
C++
Raw Normal View History

2008-04-11 05:41:09 +02:00
/*
Copyright (c) 2003-2012, Arvid Norberg
2008-04-11 05:41:09 +02:00
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the distribution.
* Neither the name of the author nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef TORRENT_LAZY_ENTRY_HPP_INCLUDED
#define TORRENT_LAZY_ENTRY_HPP_INCLUDED
#include <utility>
#include <vector>
#include <string>
2009-11-27 08:17:10 +01:00
#include <cstring>
#include "libtorrent/config.hpp"
#include "libtorrent/assert.hpp"
#include "libtorrent/error_code.hpp"
2008-04-11 05:41:09 +02:00
namespace libtorrent
{
struct lazy_entry;
2013-08-08 03:03:54 +02:00
// This function decodes bencoded_ data.
//
// .. _bencoded: http://wiki.theory.org/index.php/BitTorrentSpecification
//
// Whenever possible, ``lazy_bdecode()`` should be preferred over ``bdecode()``.
// It is more efficient and more secure. It supports having constraints on the
// amount of memory is consumed by the parser.
//
// *lazy* refers to the fact that it doesn't copy any actual data out of the
// bencoded buffer. It builds a tree of ``lazy_entry`` which has pointers into
// the bencoded buffer. This makes it very fast and efficient. On top of that,
// it is not recursive, which saves a lot of stack space when parsing deeply
// nested trees. However, in order to protect against potential attacks, the
// ``depth_limit`` and ``item_limit`` control how many levels deep the tree is
// allowed to get. With recursive parser, a few thousand levels would be enough
// to exhaust the threads stack and terminate the process. The ``item_limit``
// protects against very large structures, not necessarily deep. Each bencoded
// item in the structure causes the parser to allocate some amount of memory,
// this memory is constant regardless of how much data actually is stored in
// the item. One potential attack is to create a bencoded list of hundreds of
// thousands empty strings, which would cause the parser to allocate a significant
// amount of memory, perhaps more than is available on the machine, and effectively
// provide a denial of service. The default item limit is set as a reasonable
// upper limit for desktop computers. Very few torrents have more items in them.
// The limit corresponds to about 25 MB, which might be a bit much for embedded
// systems.
//
// ``start`` and ``end`` defines the bencoded buffer to be decoded. ``ret`` is
// the ``lazy_entry`` which is filled in with the whole decoded tree. ``ec``
// is a reference to an ``error_code`` which is set to describe the error encountered
// in case the function fails. ``error_pos`` is an optional pointer to an int,
// which will be set to the byte offset into the buffer where an error occurred,
// in case the function fails.
TORRENT_EXPORT int lazy_bdecode(char const* start, char const* end
, lazy_entry& ret, error_code& ec, int* error_pos = 0
, int depth_limit = 1000, int item_limit = 1000000);
2008-04-11 05:41:09 +02:00
#ifndef TORRENT_NO_DEPRECATE
// for backwards compatibility, does not report error code
// deprecated in 0.16
TORRENT_DEPRECATED_PREFIX
TORRENT_EXPORT int lazy_bdecode(char const* start, char const* end
, lazy_entry& ret, int depth_limit = 1000, int item_limit = 1000000) TORRENT_DEPRECATED;
#endif
2013-08-16 07:07:09 +02:00
// this is a string that is not NULL-terminated. Instead it
// comes with a length, specified in bytes. This is particularly
// useful when parsing bencoded structures, because strings are
// not NULL-terminated internally, and requiring NULL termination
// would require copying the string.
//
// see lazy_entry::string_pstr().
struct TORRENT_EXPORT pascal_string
{
2013-09-01 19:34:05 +02:00
// construct a string pointing to the characters at ``p``
// of length ``l`` characters. No NULL termination is required.
pascal_string(char const* p, int l): len(l), ptr(p) {}
2013-09-01 19:34:05 +02:00
// the number of characters in the string.
int len;
2013-09-01 19:34:05 +02:00
// the pointer to the first character in the string. This is
// not NULL terminated, but instead consult the ``len`` field
// to know how many characters follow.
char const* ptr;
2013-08-16 07:07:09 +02:00
// lexicographical comparison of strings. Order is consisten
// with memcmp.
bool operator<(pascal_string const& rhs) const
{
2009-11-27 08:17:10 +01:00
return std::memcmp(ptr, rhs.ptr, (std::min)(len, rhs.len)) < 0
|| len < rhs.len;
}
};
2009-11-27 08:08:47 +01:00
struct lazy_dict_entry;
2013-08-16 07:07:09 +02:00
// this object represent a node in a bencoded structure. It is a variant
// type whose concrete type is one of:
//
// 1. dictionary (maps strings -> lazy_entry)
// 2. list (sequence of lazy_entry, i.e. heterogenous)
// 3. integer
// 4. string
//
// There is also a ``none`` type, which is used for uninitialized
// lazy_entries.
struct TORRENT_EXPORT lazy_entry
2008-04-11 05:41:09 +02:00
{
2013-08-16 07:07:09 +02:00
// The different types a lazy_entry can have
2008-04-11 05:41:09 +02:00
enum entry_type_t
{
none_t, dict_t, list_t, string_t, int_t
};
2013-11-27 21:09:44 +01:00
// internal
lazy_entry() : m_begin(0), m_len(0), m_size(0), m_capacity(0), m_type(none_t)
2008-04-14 00:34:04 +02:00
{ m_data.start = 0; }
2008-04-11 05:41:09 +02:00
2013-08-16 07:07:09 +02:00
// tells you which specific type this lazy entry has.
// See entry_type_t. The type determines which subset of
// member functions are valid to use.
entry_type_t type() const { return (entry_type_t)m_type; }
2008-04-11 05:41:09 +02:00
2008-04-14 00:34:04 +02:00
// start points to the first decimal digit
// length is the number of digits
void construct_int(char const* start, int length)
2008-04-11 05:41:09 +02:00
{
TORRENT_ASSERT(m_type == none_t);
m_type = int_t;
m_data.start = start;
2008-04-14 00:34:04 +02:00
m_size = length;
m_begin = start - 1; // include 'i'
m_len = length + 2; // include 'e'
2008-04-11 05:41:09 +02:00
}
2013-11-27 21:57:19 +01:00
// requires the type to be an integer. return the integer value
2013-10-23 00:41:58 +02:00
boost::int64_t int_value() const;
2008-04-11 05:41:09 +02:00
2013-08-16 07:07:09 +02:00
// internal
2008-04-14 00:34:04 +02:00
void construct_string(char const* start, int length);
// the string is not null-terminated!
2013-08-16 07:07:09 +02:00
// use string_length() to determine how many bytes
// are part of the string.
2008-04-14 00:34:04 +02:00
char const* string_ptr() const
2008-04-11 05:41:09 +02:00
{
2008-04-14 00:34:04 +02:00
TORRENT_ASSERT(m_type == string_t);
return m_data.start;
2008-04-11 05:41:09 +02:00
}
2008-04-14 00:34:04 +02:00
// this will return a null terminated string
// it will write to the source buffer!
char const* string_cstr() const
2008-04-11 05:41:09 +02:00
{
TORRENT_ASSERT(m_type == string_t);
2008-04-14 00:34:04 +02:00
const_cast<char*>(m_data.start)[m_size] = 0;
2008-04-11 05:41:09 +02:00
return m_data.start;
}
2013-08-16 07:07:09 +02:00
// if this is a string, returns a pascal_string
// representing the string value.
pascal_string string_pstr() const
{
TORRENT_ASSERT(m_type == string_t);
return pascal_string(m_data.start, m_size);
}
2013-08-16 07:07:09 +02:00
// if this is a string, returns the string as a std::string.
// (which requires a copy)
2008-04-14 00:34:04 +02:00
std::string string_value() const
{
TORRENT_ASSERT(m_type == string_t);
return std::string(m_data.start, m_size);
}
2013-08-16 07:07:09 +02:00
// if the lazy_entry is a string, returns the
// length of the string, in bytes.
int string_length() const
{ return m_size; }
2013-08-16 07:07:09 +02:00
// internal
2008-04-14 00:34:04 +02:00
void construct_dict(char const* begin)
2008-04-11 05:41:09 +02:00
{
TORRENT_ASSERT(m_type == none_t);
m_type = dict_t;
m_size = 0;
m_capacity = 0;
2008-04-14 00:34:04 +02:00
m_begin = begin;
2008-04-11 05:41:09 +02:00
}
2013-08-16 07:07:09 +02:00
// internal
2008-04-14 00:34:04 +02:00
lazy_entry* dict_append(char const* name);
2013-08-16 07:07:09 +02:00
// internal
void pop();
2013-08-16 07:07:09 +02:00
// if this is a dictionary, look for a key ``name``, and return
// a pointer to its value, or NULL if there is none.
2008-04-11 05:41:09 +02:00
lazy_entry* dict_find(char const* name);
lazy_entry const* dict_find(char const* name) const
{ return const_cast<lazy_entry*>(this)->dict_find(name); }
2013-08-16 07:07:09 +02:00
lazy_entry const* dict_find_string(char const* name) const;
2008-04-19 05:00:07 +02:00
2013-08-16 07:07:09 +02:00
// if this is a dictionary, look for a key ``name`` whose value
// is a string. If such key exist, return a pointer to
// its value, otherwise NULL.
2008-04-19 05:00:07 +02:00
std::string dict_find_string_value(char const* name) const;
pascal_string dict_find_pstr(char const* name) const;
2013-08-16 07:07:09 +02:00
// if this is a dictionary, look for a key ``name`` whose value
// is an int. If such key exist, return a pointer to its value,
// otherwise NULL.
2013-10-23 00:41:58 +02:00
boost::int64_t dict_find_int_value(char const* name, boost::int64_t default_val = 0) const;
2013-08-16 07:07:09 +02:00
lazy_entry const* dict_find_int(char const* name) const;
2013-11-27 21:09:44 +01:00
// these functions require that ``this`` is a dictionary.
// (this->type() == dict_t). They look for an element with the
// specified name in the dictionary. ``dict_find_dict`` only
// finds dictionaries and ``dict_find_list`` only finds lists.
// if no key with the corresponding value of the right type is
// found, NULL is returned.
2008-04-19 05:00:07 +02:00
lazy_entry const* dict_find_dict(char const* name) const;
lazy_entry const* dict_find_list(char const* name) const;
2013-08-16 07:07:09 +02:00
// if this is a dictionary, return the key value pair at
// position ``i`` from the dictionary.
2009-11-27 08:08:47 +01:00
std::pair<std::string, lazy_entry const*> dict_at(int i) const;
2008-04-11 05:41:09 +02:00
2013-11-27 21:09:44 +01:00
// requires that ``this`` is a dictionary. return the
// number of items in it
2008-04-11 05:41:09 +02:00
int dict_size() const
{
TORRENT_ASSERT(m_type == dict_t);
return m_size;
}
2013-08-16 07:07:09 +02:00
// internal
2008-04-14 00:34:04 +02:00
void construct_list(char const* begin)
2008-04-11 05:41:09 +02:00
{
TORRENT_ASSERT(m_type == none_t);
m_type = list_t;
m_size = 0;
m_capacity = 0;
2008-04-14 00:34:04 +02:00
m_begin = begin;
2008-04-11 05:41:09 +02:00
}
2013-08-16 07:07:09 +02:00
// internal
2008-04-11 05:41:09 +02:00
lazy_entry* list_append();
2013-08-16 07:07:09 +02:00
2013-11-27 21:09:44 +01:00
// requires that ``this`` is a list. return
// the item at index ``i``.
2008-04-11 05:41:09 +02:00
lazy_entry* list_at(int i)
{
TORRENT_ASSERT(m_type == list_t);
TORRENT_ASSERT(i < int(m_size));
2008-04-11 05:41:09 +02:00
return &m_data.list[i];
}
lazy_entry const* list_at(int i) const
{ return const_cast<lazy_entry*>(this)->list_at(i); }
2008-04-11 05:41:09 +02:00
2013-11-27 21:09:44 +01:00
// these functions require ``this`` to have the type list.
// (this->type() == list_t). ``list_string_value_at`` returns
// the string at index ``i``. ``list_pstr_at``
// returns a pascal_string of the string value at index ``i``.
// if the element at ``i`` is not a string, an empty string
// is returned.
2008-04-19 05:00:07 +02:00
std::string list_string_value_at(int i) const;
pascal_string list_pstr_at(int i) const;
2013-11-27 21:09:44 +01:00
// this function require ``this`` to have the type list.
// (this->type() == list_t). returns the integer value at
// index ``i``. If the element at ``i`` is not an integer
// ``default_val`` is returned, which defaults to 0.
2013-10-23 00:41:58 +02:00
boost::int64_t list_int_value_at(int i, boost::int64_t default_val = 0) const;
2008-04-19 05:00:07 +02:00
2013-08-16 07:07:09 +02:00
// if this is a list, return the number of items in it.
2008-04-11 05:41:09 +02:00
int list_size() const
{
TORRENT_ASSERT(m_type == list_t);
return int(m_size);
2008-04-11 05:41:09 +02:00
}
2013-11-27 21:57:19 +01:00
// internal: end points one byte passed last byte in the source
2013-08-16 07:07:09 +02:00
// buffer backing the bencoded structure.
2008-04-14 00:34:04 +02:00
void set_end(char const* end)
{
TORRENT_ASSERT(end > m_begin);
m_len = end - m_begin;
2008-04-14 00:34:04 +02:00
}
2013-08-16 07:07:09 +02:00
// internal
2008-04-11 05:41:09 +02:00
void clear();
2013-11-27 21:57:19 +01:00
// internal: releases ownership of any memory allocated
2008-05-15 10:23:32 +02:00
void release()
{
m_data.start = 0;
m_size = 0;
m_capacity = 0;
m_type = none_t;
}
2013-08-16 07:07:09 +02:00
// internal
2008-04-11 05:41:09 +02:00
~lazy_entry()
{ clear(); }
2008-04-14 00:34:04 +02:00
// returns pointers into the source buffer where
// this entry has its bencoded data
2008-04-20 05:18:49 +02:00
std::pair<char const*, int> data_section() const;
2008-04-14 00:34:04 +02:00
2013-08-16 07:07:09 +02:00
// swap values of ``this`` and ``e``.
2008-05-14 07:16:40 +02:00
void swap(lazy_entry& e)
{
using std::swap;
boost::uint32_t tmp = e.m_type;
e.m_type = m_type;
m_type = tmp;
tmp = e.m_capacity;
e.m_capacity = m_capacity;
m_capacity = tmp;
2008-05-14 07:16:40 +02:00
swap(m_data.start, e.m_data.start);
swap(m_size, e.m_size);
swap(m_begin, e.m_begin);
swap(m_len, e.m_len);
2008-05-14 07:16:40 +02:00
}
2008-04-11 05:41:09 +02:00
private:
union data_t
{
2009-11-27 08:08:47 +01:00
lazy_dict_entry* dict;
2008-04-11 05:41:09 +02:00
lazy_entry* list;
2008-04-14 00:34:04 +02:00
char const* start;
2008-04-11 05:41:09 +02:00
} m_data;
2009-11-27 08:08:47 +01:00
2008-04-14 00:34:04 +02:00
// used for dictionaries and lists to record the range
// in the original buffer they are based on
char const* m_begin;
// the number of bytes this entry extends in the
// bencoded byffer
boost::uint32_t m_len;
// if list or dictionary, the number of items
boost::uint32_t m_size;
// if list or dictionary, allocated number of items
boost::uint32_t m_capacity:29;
// element type (dict, list, int, string)
boost::uint32_t m_type:3;
// non-copyable
lazy_entry(lazy_entry const&);
lazy_entry const& operator=(lazy_entry const&);
2008-04-11 05:41:09 +02:00
};
2009-11-27 08:08:47 +01:00
struct lazy_dict_entry
{
char const* name;
lazy_entry val;
};
2013-07-20 00:26:07 +02:00
TORRENT_EXTRA_EXPORT std::string print_entry(lazy_entry const& e
2009-12-03 06:10:30 +01:00
, bool single_line = false, int indent = 0);
2008-04-11 05:41:09 +02:00
2013-11-09 03:01:26 +01:00
// get the ``error_category`` for bdecode errors
TORRENT_EXPORT boost::system::error_category& get_bdecode_category();
namespace bdecode_errors
{
// libtorrent uses boost.system's ``error_code`` class to represent errors. libtorrent has
// its own error category get_bdecode_category() whith the error codes defined by error_code_enum.
enum error_code_enum
{
// Not an error
no_error = 0,
// expected string in bencoded string
expected_string,
// expected colon in bencoded string
expected_colon,
// unexpected end of file in bencoded string
unexpected_eof,
// expected value (list, dict, int or string) in bencoded string
expected_value,
// bencoded recursion depth limit exceeded
depth_exceeded,
// bencoded item count limit exceeded
limit_exceeded,
// the number of error codes
error_code_max
};
// hidden
inline boost::system::error_code make_error_code(error_code_enum e)
{
return boost::system::error_code(e, get_bdecode_category());
}
}
}
2008-04-11 05:41:09 +02:00
#endif