premiere-libtorrent/include/libtorrent/lazy_entry.hpp

/*

Copyright (c) 2003-2014, Arvid Norberg
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:

    * Redistributions of source code must retain the above copyright
      notice, this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in
      the documentation and/or other materials provided with the distribution.
    * Neither the name of the author nor the names of its
      contributors may be used to endorse or promote products derived
      from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.

*/

#ifndef TORRENT_LAZY_ENTRY_HPP_INCLUDED
#define TORRENT_LAZY_ENTRY_HPP_INCLUDED

#include <utility>
#include <vector>
#include <string>
#include <cstring>
#include <algorithm>
#include "libtorrent/config.hpp"
#include "libtorrent/assert.hpp"
#include "libtorrent/error_code.hpp"

namespace libtorrent
{
	struct lazy_entry;

	// This function decodes bencoded_ data.
	// 
	// .. _bencoded: http://wiki.theory.org/index.php/BitTorrentSpecification
	// 
	// Whenever possible, ``lazy_bdecode()`` should be preferred over ``bdecode()``.
	// It is more efficient and more secure. It supports having constraints on the
	// amount of memory is consumed by the parser.
	// 
	// *lazy* refers to the fact that it doesn't copy any actual data out of the
	// bencoded buffer. It builds a tree of ``lazy_entry`` which has pointers into
	// the bencoded buffer. This makes it very fast and efficient. On top of that,
	// it is not recursive, which saves a lot of stack space when parsing deeply
	// nested trees. However, in order to protect against potential attacks, the
	// ``depth_limit`` and ``item_limit`` control how many levels deep the tree is
	// allowed to get. With recursive parser, a few thousand levels would be enough
	// to exhaust the threads stack and terminate the process. The ``item_limit``
	// protects against very large structures, not necessarily deep. Each bencoded
	// item in the structure causes the parser to allocate some amount of memory,
	// this memory is constant regardless of how much data actually is stored in
	// the item. One potential attack is to create a bencoded list of hundreds of
	// thousands empty strings, which would cause the parser to allocate a significant
	// amount of memory, perhaps more than is available on the machine, and effectively
	// provide a denial of service. The default item limit is set as a reasonable
	// upper limit for desktop computers. Very few torrents have more items in them.
	// The limit corresponds to about 25 MB, which might be a bit much for embedded
	// systems.
	// 
	// ``start`` and ``end`` defines the bencoded buffer to be decoded. ``ret`` is
	// the ``lazy_entry`` which is filled in with the whole decoded tree. ``ec``
	// is a reference to an ``error_code`` which is set to describe the error encountered
	// in case the function fails. ``error_pos`` is an optional pointer to an int,
	// which will be set to the byte offset into the buffer where an error occurred,
	// in case the function fails.
	TORRENT_EXPORT int lazy_bdecode(char const* start, char const* end
		, lazy_entry& ret, error_code& ec, int* error_pos = 0
		, int depth_limit = 1000, int item_limit = 1000000);

#ifndef TORRENT_NO_DEPRECATE
	// for backwards compatibility, does not report error code
	// deprecated in 0.16
	TORRENT_DEPRECATED_PREFIX
	TORRENT_EXPORT int lazy_bdecode(char const* start, char const* end
		, lazy_entry& ret, int depth_limit = 1000, int item_limit = 1000000) TORRENT_DEPRECATED;
#endif

	// this is a string that is not NULL-terminated. Instead it
	// comes with a length, specified in bytes. This is particularly
	// useful when parsing bencoded structures, because strings are
	// not NULL-terminated internally, and requiring NULL termination
	// would require copying the string.
	//
	// see lazy_entry::string_pstr().
	struct TORRENT_EXPORT pascal_string
	{
		// construct a string pointing to the characters at ``p``
		// of length ``l`` characters. No NULL termination is required.
		pascal_string(char const* p, int l): len(l), ptr(p) {}
		
		// the number of characters in the string.
		int len;

		// the pointer to the first character in the string. This is
		// not NULL terminated, but instead consult the ``len`` field
		// to know how many characters follow.
		char const* ptr;

		// lexicographical comparison of strings. Order is consisten
		// with memcmp.
		bool operator<(pascal_string const& rhs) const
		{
			return std::memcmp(ptr, rhs.ptr, (std::min)(len, rhs.len)) < 0
				|| len < rhs.len;
		}
	};

	struct lazy_dict_entry;

	// this object represent a node in a bencoded structure. It is a variant
	// type whose concrete type is one of:
	//
	// 1. dictionary (maps strings -> lazy_entry)
	// 2. list (sequence of lazy_entry, i.e. heterogenous)
	// 3. integer
	// 4. string
	//
	// There is also a ``none`` type, which is used for uninitialized
	// lazy_entries.
	struct TORRENT_EXPORT lazy_entry
	{
		// The different types a lazy_entry can have
		enum entry_type_t
		{
			none_t, dict_t, list_t, string_t, int_t
		};

		// internal
		lazy_entry() : m_begin(0), m_len(0), m_size(0), m_type(none_t)
		{ m_data.start = NULL; }

		// tells you which specific type this lazy entry has.
		// See entry_type_t. The type determines which subset of
		// member functions are valid to use.
		entry_type_t type() const { return (entry_type_t)m_type; }

		// start points to the first decimal digit
		// length is the number of digits
		void construct_int(char const* start, int length)
		{
			TORRENT_ASSERT(m_type == none_t);
			m_type = int_t;
			m_data.start = start;
			m_size = length;
			m_begin = start - 1; // include 'i'
			m_len = length + 2; // include 'e'
		}

		// requires the type to be an integer. return the integer value
		boost::int64_t int_value() const;

		// internal
		void construct_string(char const* start, int length);

		// the string is not null-terminated!
		// use string_length() to determine how many bytes
		// are part of the string.
		char const* string_ptr() const
		{
			TORRENT_ASSERT(m_type == string_t);
			return m_data.start;
		}

		// this will return a null terminated string
		// it will write to the source buffer!
		char const* string_cstr() const
		{
			TORRENT_ASSERT(m_type == string_t);
			const_cast<char*>(m_data.start)[m_size] = 0;
			return m_data.start;
		}

		// if this is a string, returns a pascal_string
		// representing the string value.
		pascal_string string_pstr() const
		{
			TORRENT_ASSERT(m_type == string_t);
			return pascal_string(m_data.start, m_size);
		}

		// if this is a string, returns the string as a std::string.
		// (which requires a copy)
		std::string string_value() const
		{
			TORRENT_ASSERT(m_type == string_t);
			return std::string(m_data.start, m_size);
		}

		// if the lazy_entry is a string, returns the
		// length of the string, in bytes.
		int string_length() const
		{ return m_size; }

		// internal
		void construct_dict(char const* begin)
		{
			TORRENT_ASSERT(m_type == none_t);
			m_type = dict_t;
			m_size = 0;
			m_begin = begin;
		}

		// internal
		lazy_entry* dict_append(char const* name);
		// internal
		void pop();

		// if this is a dictionary, look for a key ``name``, and return
		// a pointer to its value, or NULL if there is none.
		lazy_entry* dict_find(char const* name);
		lazy_entry const* dict_find(char const* name) const
		{ return const_cast<lazy_entry*>(this)->dict_find(name); }
		lazy_entry const* dict_find_string(char const* name) const;

		// if this is a dictionary, look for a key ``name`` whose value
		// is a string. If such key exist, return a pointer to
		// its value, otherwise NULL.
		std::string dict_find_string_value(char const* name) const;
		pascal_string dict_find_pstr(char const* name) const;

		// if this is a dictionary, look for a key ``name`` whose value
		// is an int. If such key exist, return a pointer to its value,
		// otherwise NULL.
		boost::int64_t dict_find_int_value(char const* name, boost::int64_t default_val = 0) const;
		lazy_entry const* dict_find_int(char const* name) const;

		// these functions require that ``this`` is a dictionary.
		// (this->type() == dict_t). They look for an element with the
		// specified name in the dictionary. ``dict_find_dict`` only
		// finds dictionaries and ``dict_find_list`` only finds lists.
		// if no key with the corresponding value of the right type is
		// found, NULL is returned.
		lazy_entry const* dict_find_dict(char const* name) const;
		lazy_entry const* dict_find_list(char const* name) const;

		// if this is a dictionary, return the key value pair at
		// position ``i`` from the dictionary.
		std::pair<std::string, lazy_entry const*> dict_at(int i) const;

		// requires that ``this`` is a dictionary. return the
		// number of items in it
		int dict_size() const
		{
			TORRENT_ASSERT(m_type == dict_t);
			return m_size;
		}

		// internal
		void construct_list(char const* begin)
		{
			TORRENT_ASSERT(m_type == none_t);
			m_type = list_t;
			m_size = 0;
			m_begin = begin;
		}

		// internal
		lazy_entry* list_append();

		// requires that ``this`` is a list. return
		// the item at index ``i``.
		lazy_entry* list_at(int i)
		{
			TORRENT_ASSERT(m_type == list_t);
			TORRENT_ASSERT(i < int(m_size));
			return &m_data.list[i+1];
		}
		lazy_entry const* list_at(int i) const
		{ return const_cast<lazy_entry*>(this)->list_at(i); }

		// these functions require ``this`` to have the type list.
		// (this->type() == list_t). ``list_string_value_at`` returns
		// the string at index ``i``. ``list_pstr_at``
		// returns a pascal_string of the string value at index ``i``.
		// if the element at ``i`` is not a string, an empty string
		// is returned.
		std::string list_string_value_at(int i) const;
		pascal_string list_pstr_at(int i) const;

		// this function require ``this`` to have the type list.
		// (this->type() == list_t). returns the integer value at
		// index ``i``. If the element at ``i`` is not an integer
		// ``default_val`` is returned, which defaults to 0.
		boost::int64_t list_int_value_at(int i, boost::int64_t default_val = 0) const;

		// if this is a list, return the number of items in it.
		int list_size() const
		{
			TORRENT_ASSERT(m_type == list_t);
			return int(m_size);
		}

		// internal: end points one byte passed last byte in the source
		// buffer backing the bencoded structure.
		void set_end(char const* end)
		{
			TORRENT_ASSERT(end > m_begin);
			m_len = end - m_begin;
		}
		
		// internal
		void clear();

		// internal: releases ownership of any memory allocated
		void release()
		{
			m_data.start = NULL;
			m_size = 0;
			m_type = none_t;
		}

		// internal
		~lazy_entry()
		{ clear(); }

		// returns pointers into the source buffer where
		// this entry has its bencoded data
		std::pair<char const*, int> data_section() const;

		// swap values of ``this`` and ``e``.
		void swap(lazy_entry& e)
		{
			using std::swap;
			boost::uint32_t tmp = e.m_type;
			e.m_type = m_type;
			m_type = tmp;
			tmp = e.m_size;
			e.m_size = m_size;
			m_size = tmp;
			swap(m_data.start, e.m_data.start);
			swap(m_begin, e.m_begin);
			swap(m_len, e.m_len);
		}

	private:

		int capacity() const;

		union data_t
		{
			// for the dict and list arrays, the first item is not part
			// of the array. Instead its m_len member indicates the capacity
			// of the allocation
			lazy_dict_entry* dict;
			lazy_entry* list;
			char const* start;
		} m_data;

		// used for dictionaries and lists to record the range
		// in the original buffer they are based on
		char const* m_begin;

		// the number of bytes this entry extends in the
		// bencoded buffer
		boost::uint32_t m_len;

		// if list or dictionary, the number of items
		boost::uint32_t m_size:29;
		// element type (dict, list, int, string)
		boost::uint32_t m_type:3;

		// non-copyable
		lazy_entry(lazy_entry const&);
		lazy_entry const& operator=(lazy_entry const&);
	};

	struct lazy_dict_entry
	{
		char const* name;
		lazy_entry val;
	};

	// print the bencoded structure in a human-readable format to a stting
	// that's returned.
	TORRENT_EXPORT std::string print_entry(lazy_entry const& e
		, bool single_line = false, int indent = 0);

	// get the ``error_category`` for bdecode errors
	TORRENT_EXPORT boost::system::error_category& get_bdecode_category();

	namespace bdecode_errors
	{
		// libtorrent uses boost.system's ``error_code`` class to represent errors. libtorrent has
		// its own error category get_bdecode_category() whith the error codes defined by error_code_enum.
		enum error_code_enum
		{
			// Not an error
			no_error = 0,
			// expected string in bencoded string
			expected_string,
			// expected colon in bencoded string
			expected_colon,
			// unexpected end of file in bencoded string
			unexpected_eof,
			// expected value (list, dict, int or string) in bencoded string
			expected_value,
			// bencoded recursion depth limit exceeded
			depth_exceeded,
			// bencoded item count limit exceeded
			limit_exceeded,
			// integer overflow
			overflow,

			// the number of error codes
			error_code_max
		};

		// hidden
		inline boost::system::error_code make_error_code(error_code_enum e)
		{
			return boost::system::error_code(e, get_bdecode_category());
		}
	}

	TORRENT_EXTRA_EXPORT char const* parse_int(char const* start
		, char const* end, char delimiter, boost::int64_t& val
		, bdecode_errors::error_code_enum& ec);

}

#endif