diff --git a/Jamfile b/Jamfile index fb163b1ff..4be76d575 100755 --- a/Jamfile +++ b/Jamfile @@ -223,6 +223,7 @@ SOURCES = escape_string http_connection http_stream + http_parser identify_client ip_filter peer_connection diff --git a/include/Makefile.am b/include/Makefile.am index 839a7920c..6a3461dab 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -21,6 +21,7 @@ libtorrent/fingerprint.hpp \ libtorrent/hasher.hpp \ libtorrent/http_connection.hpp \ libtorrent/http_stream.hpp \ +libtorrent/http_parser.hpp \ libtorrent/http_tracker_connection.hpp \ libtorrent/identify_client.hpp \ libtorrent/instantiate_connection.hpp \ diff --git a/include/libtorrent/http_parser.hpp b/include/libtorrent/http_parser.hpp new file mode 100755 index 000000000..7d308ca36 --- /dev/null +++ b/include/libtorrent/http_parser.hpp @@ -0,0 +1,111 @@ +/* + +Copyright (c) 2008, Arvid Norberg +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the distribution. + * Neither the name of the author nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef TORRENT_HTTP_PARSER_HPP_INCLUDED +#define TORRENT_HTTP_PARSER_HPP_INCLUDED + +#include +#include +#include + +#ifdef _MSC_VER +#pragma warning(push, 1) +#endif + +#include +#include +#include + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +#include "libtorrent/config.hpp" +#include "libtorrent/buffer.hpp" +#include "libtorrent/size_type.hpp" + +namespace libtorrent +{ + + class http_parser + { + public: + http_parser(); + std::string const& header(char const* key) const + { + static std::string empty; + std::map::const_iterator i + = m_header.find(key); + if (i == m_header.end()) return empty; + return i->second; + } + + std::string const& protocol() const { return m_protocol; } + int status_code() const { return m_status_code; } + std::string const& method() const { return m_method; } + std::string const& path() const { return m_path; } + std::string const& message() const { return m_server_message; } + buffer::const_interval get_body() const; + bool header_finished() const { return m_state == read_body; } + bool finished() const { return m_finished; } + boost::tuple incoming(buffer::const_interval recv_buffer + , bool& error); + int body_start() const { return m_body_start_pos; } + size_type content_length() const { return m_content_length; } + + void reset(); + + std::map const& headers() const { return m_header; } + + private: + int m_recv_pos; + int m_status_code; + std::string m_method; + std::string m_path; + std::string m_protocol; + std::string m_server_message; + + size_type m_content_length; + + enum { read_status, read_header, read_body, error_state } m_state; + + std::map m_header; + buffer::const_interval m_recv_buffer; + int m_body_start_pos; + + bool m_finished; + }; + +} + +#endif // TORRENT_HTTP_PARSER_HPP_INCLUDED + diff --git a/include/libtorrent/http_tracker_connection.hpp b/include/libtorrent/http_tracker_connection.hpp index 337bbb658..a9521e886 100755 --- a/include/libtorrent/http_tracker_connection.hpp +++ b/include/libtorrent/http_tracker_connection.hpp @@ -61,59 +61,11 @@ POSSIBILITY OF SUCH DAMAGE. #include "libtorrent/buffer.hpp" #include "libtorrent/socket_type.hpp" #include "libtorrent/connection_queue.hpp" +#include "libtorrent/http_parser.hpp" namespace libtorrent { - class http_parser - { - public: - http_parser(); - std::string const& header(char const* key) const - { - static std::string empty; - std::map::const_iterator i - = m_header.find(key); - if (i == m_header.end()) return empty; - return i->second; - } - - std::string const& protocol() const { return m_protocol; } - int status_code() const { return m_status_code; } - std::string const& method() const { return m_method; } - std::string const& path() const { return m_path; } - std::string const& message() const { return m_server_message; } - buffer::const_interval get_body() const; - bool header_finished() const { return m_state == read_body; } - bool finished() const { return m_finished; } - boost::tuple incoming(buffer::const_interval recv_buffer - , bool& error); - int body_start() const { return m_body_start_pos; } - int content_length() const { return m_content_length; } - - void reset(); - - std::map const& headers() const { return m_header; } - - private: - int m_recv_pos; - int m_status_code; - std::string m_method; - std::string m_path; - std::string m_protocol; - std::string m_server_message; - - int m_content_length; - - enum { read_status, read_header, read_body, error_state } m_state; - - std::map m_header; - buffer::const_interval m_recv_buffer; - int m_body_start_pos; - - bool m_finished; - }; - class TORRENT_EXPORT http_tracker_connection : public tracker_connection { diff --git a/src/Makefile.am b/src/Makefile.am index 9c9301d3b..26255c870 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -23,7 +23,7 @@ alert.cpp identify_client.cpp ip_filter.cpp file.cpp metadata_transfer.cpp \ logger.cpp file_pool.cpp ut_pex.cpp lsd.cpp upnp.cpp instantiate_connection.cpp \ socks5_stream.cpp socks4_stream.cpp http_stream.cpp connection_queue.cpp \ disk_io_thread.cpp ut_metadata.cpp magnet_uri.cpp udp_socket.cpp smart_ban.cpp \ -$(kademlia_sources) +http_parser.cpp $(kademlia_sources) noinst_HEADERS = \ $(top_srcdir)/include/libtorrent/alert.hpp \ @@ -52,6 +52,7 @@ $(top_srcdir)/include/libtorrent/fingerprint.hpp \ $(top_srcdir)/include/libtorrent/hasher.hpp \ $(top_srcdir)/include/libtorrent/http_connection.hpp \ $(top_srcdir)/include/libtorrent/http_stream.hpp \ +$(top_srcdir)/include/libtorrent/http_parser.hpp \ $(top_srcdir)/include/libtorrent/session_settings.hpp \ $(top_srcdir)/include/libtorrent/http_tracker_connection.hpp \ $(top_srcdir)/include/libtorrent/identify_client.hpp \ diff --git a/src/http_parser.cpp b/src/http_parser.cpp new file mode 100755 index 000000000..3e73e89d2 --- /dev/null +++ b/src/http_parser.cpp @@ -0,0 +1,234 @@ +/* + +Copyright (c) 2008, Arvid Norberg +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the distribution. + * Neither the name of the author nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "libtorrent/pch.hpp" + +#include +#include + +#include "libtorrent/config.hpp" +#include "libtorrent/http_parser.hpp" +#include "libtorrent/assert.hpp" + +using namespace libtorrent; + +namespace +{ + char to_lower(char c) { return std::tolower(c); } +} + +namespace libtorrent +{ + http_parser::http_parser() + : m_recv_pos(0) + , m_status_code(-1) + , m_content_length(-1) + , m_state(read_status) + , m_recv_buffer(0, 0) + , m_body_start_pos(0) + , m_finished(false) + {} + + boost::tuple http_parser::incoming( + buffer::const_interval recv_buffer, bool& error) + { + TORRENT_ASSERT(recv_buffer.left() >= m_recv_buffer.left()); + boost::tuple ret(0, 0); + + // early exit if there's nothing new in the receive buffer + if (recv_buffer.left() == m_recv_buffer.left()) return ret; + m_recv_buffer = recv_buffer; + + if (m_state == error_state) + { + error = true; + return ret; + } + + char const* pos = recv_buffer.begin + m_recv_pos; + if (m_state == read_status) + { + TORRENT_ASSERT(!m_finished); + char const* newline = std::find(pos, recv_buffer.end, '\n'); + // if we don't have a full line yet, wait. + if (newline == recv_buffer.end) return ret; + + if (newline == pos) + { + m_state = error_state; + error = true; + return ret; + } + + char const* line_end = newline; + if (pos != line_end && *(line_end - 1) == '\r') --line_end; + + std::istringstream line(std::string(pos, line_end)); + ++newline; + int incoming = (int)std::distance(pos, newline); + m_recv_pos += incoming; + boost::get<1>(ret) += incoming; + pos = newline; + + line >> m_protocol; + if (m_protocol.substr(0, 5) == "HTTP/") + { + line >> m_status_code; + std::getline(line, m_server_message); + } + else + { + m_method = m_protocol; + std::transform(m_method.begin(), m_method.end(), m_method.begin(), &to_lower); + m_protocol.clear(); + line >> m_path >> m_protocol; + m_status_code = 0; + } + m_state = read_header; + } + + if (m_state == read_header) + { + TORRENT_ASSERT(!m_finished); + char const* newline = std::find(pos, recv_buffer.end, '\n'); + std::string line; + + while (newline != recv_buffer.end && m_state == read_header) + { + // if the LF character is preceeded by a CR + // charachter, don't copy it into the line string. + char const* line_end = newline; + if (pos != line_end && *(line_end - 1) == '\r') --line_end; + line.assign(pos, line_end); + ++newline; + m_recv_pos += newline - pos; + boost::get<1>(ret) += newline - pos; + pos = newline; + + std::string::size_type separator = line.find(':'); + if (separator == std::string::npos) + { + // this means we got a blank line, + // the header is finished and the body + // starts. + m_state = read_body; + m_body_start_pos = m_recv_pos; + break; + } + + std::string name = line.substr(0, separator); + std::transform(name.begin(), name.end(), name.begin(), &to_lower); + ++separator; + // skip whitespace + while (separator < line.size() + && (line[separator] == ' ' || line[separator] == '\t')) + ++separator; + std::string value = line.substr(separator, std::string::npos); + m_header.insert(std::make_pair(name, value)); + + if (name == "content-length") + { + m_content_length = atoll(value.c_str()); + } + else if (name == "content-range") + { + std::stringstream range_str(value); + char dummy; + std::string bytes; + size_type range_start, range_end; + // apparently some web servers do not send the "bytes" + // in their content-range + if (value.find(' ') != std::string::npos) + range_str >> bytes; + range_str >> range_start >> dummy >> range_end; + if (!range_str || range_end < range_start) + { + m_state = error_state; + error = true; + return ret; + } + // the http range is inclusive + m_content_length = range_end - range_start + 1; + } + + TORRENT_ASSERT(m_recv_pos <= (int)recv_buffer.left()); + newline = std::find(pos, recv_buffer.end, '\n'); + } + } + + if (m_state == read_body) + { + int incoming = recv_buffer.end - pos; + if (m_recv_pos - m_body_start_pos + incoming > m_content_length + && m_content_length >= 0) + incoming = m_content_length - m_recv_pos + m_body_start_pos; + + TORRENT_ASSERT(incoming >= 0); + m_recv_pos += incoming; + boost::get<0>(ret) += incoming; + + if (m_content_length >= 0 + && m_recv_pos - m_body_start_pos >= m_content_length) + { + m_finished = true; + } + } + return ret; + } + + buffer::const_interval http_parser::get_body() const + { + TORRENT_ASSERT(m_state == read_body); + if (m_content_length >= 0) + return buffer::const_interval(m_recv_buffer.begin + m_body_start_pos + , m_recv_buffer.begin + (std::min)(size_type(m_recv_pos) + , m_body_start_pos + m_content_length)); + else + return buffer::const_interval(m_recv_buffer.begin + m_body_start_pos + , m_recv_buffer.begin + m_recv_pos); + } + + void http_parser::reset() + { + m_recv_pos = 0; + m_body_start_pos = 0; + m_status_code = -1; + m_content_length = -1; + m_finished = false; + m_state = read_status; + m_recv_buffer.begin = 0; + m_recv_buffer.end = 0; + m_header.clear(); + } + +} + diff --git a/src/http_tracker_connection.cpp b/src/http_tracker_connection.cpp index 5d2038bda..5af1ca040 100755 --- a/src/http_tracker_connection.cpp +++ b/src/http_tracker_connection.cpp @@ -94,187 +94,6 @@ namespace namespace libtorrent { - http_parser::http_parser() - : m_recv_pos(0) - , m_status_code(-1) - , m_content_length(-1) - , m_state(read_status) - , m_recv_buffer(0, 0) - , m_body_start_pos(0) - , m_finished(false) - {} - - boost::tuple http_parser::incoming( - buffer::const_interval recv_buffer, bool& error) - { - TORRENT_ASSERT(recv_buffer.left() >= m_recv_buffer.left()); - boost::tuple ret(0, 0); - - // early exit if there's nothing new in the receive buffer - if (recv_buffer.left() == m_recv_buffer.left()) return ret; - m_recv_buffer = recv_buffer; - - if (m_state == error_state) - { - error = true; - return ret; - } - - char const* pos = recv_buffer.begin + m_recv_pos; - if (m_state == read_status) - { - TORRENT_ASSERT(!m_finished); - char const* newline = std::find(pos, recv_buffer.end, '\n'); - // if we don't have a full line yet, wait. - if (newline == recv_buffer.end) return ret; - - if (newline == pos) - { - m_state = error_state; - error = true; - return ret; - } - - char const* line_end = newline; - if (pos != line_end && *(line_end - 1) == '\r') --line_end; - - std::istringstream line(std::string(pos, line_end)); - ++newline; - int incoming = (int)std::distance(pos, newline); - m_recv_pos += incoming; - boost::get<1>(ret) += incoming; - pos = newline; - - line >> m_protocol; - if (m_protocol.substr(0, 5) == "HTTP/") - { - line >> m_status_code; - std::getline(line, m_server_message); - } - else - { - m_method = m_protocol; - std::transform(m_method.begin(), m_method.end(), m_method.begin(), &to_lower); - m_protocol.clear(); - line >> m_path >> m_protocol; - m_status_code = 0; - } - m_state = read_header; - } - - if (m_state == read_header) - { - TORRENT_ASSERT(!m_finished); - char const* newline = std::find(pos, recv_buffer.end, '\n'); - std::string line; - - while (newline != recv_buffer.end && m_state == read_header) - { - // if the LF character is preceeded by a CR - // charachter, don't copy it into the line string. - char const* line_end = newline; - if (pos != line_end && *(line_end - 1) == '\r') --line_end; - line.assign(pos, line_end); - ++newline; - m_recv_pos += newline - pos; - boost::get<1>(ret) += newline - pos; - pos = newline; - - std::string::size_type separator = line.find(':'); - if (separator == std::string::npos) - { - // this means we got a blank line, - // the header is finished and the body - // starts. - m_state = read_body; - m_body_start_pos = m_recv_pos; - break; - } - - std::string name = line.substr(0, separator); - std::transform(name.begin(), name.end(), name.begin(), &to_lower); - ++separator; - // skip whitespace - while (separator < line.size() - && (line[separator] == ' ' || line[separator] == '\t')) - ++separator; - std::string value = line.substr(separator, std::string::npos); - m_header.insert(std::make_pair(name, value)); - - if (name == "content-length") - { - m_content_length = atoi(value.c_str()); - } - else if (name == "content-range") - { - std::stringstream range_str(value); - char dummy; - std::string bytes; - size_type range_start, range_end; - // apparently some web servers do not send the "bytes" - // in their content-range - if (value.find(' ') != std::string::npos) - range_str >> bytes; - range_str >> range_start >> dummy >> range_end; - if (!range_str || range_end < range_start) - { - m_state = error_state; - error = true; - return ret; - } - // the http range is inclusive - m_content_length = range_end - range_start + 1; - } - - TORRENT_ASSERT(m_recv_pos <= (int)recv_buffer.left()); - newline = std::find(pos, recv_buffer.end, '\n'); - } - } - - if (m_state == read_body) - { - int incoming = recv_buffer.end - pos; - if (m_recv_pos - m_body_start_pos + incoming > m_content_length - && m_content_length >= 0) - incoming = m_content_length - m_recv_pos + m_body_start_pos; - - TORRENT_ASSERT(incoming >= 0); - m_recv_pos += incoming; - boost::get<0>(ret) += incoming; - - if (m_content_length >= 0 - && m_recv_pos - m_body_start_pos >= m_content_length) - { - m_finished = true; - } - } - return ret; - } - - buffer::const_interval http_parser::get_body() const - { - TORRENT_ASSERT(m_state == read_body); - if (m_content_length >= 0) - return buffer::const_interval(m_recv_buffer.begin + m_body_start_pos - , m_recv_buffer.begin + (std::min)(m_recv_pos - , m_body_start_pos + m_content_length)); - else - return buffer::const_interval(m_recv_buffer.begin + m_body_start_pos - , m_recv_buffer.begin + m_recv_pos); - } - - void http_parser::reset() - { - m_recv_pos = 0; - m_body_start_pos = 0; - m_status_code = -1; - m_content_length = -1; - m_finished = false; - m_state = read_status; - m_recv_buffer.begin = 0; - m_recv_buffer.end = 0; - m_header.clear(); - } http_tracker_connection::http_tracker_connection( io_service& ios