From ee7c713af7de03e59971761f74c817252064a1e9 Mon Sep 17 00:00:00 2001 From: Arvid Norberg Date: Thu, 6 Jan 2011 04:08:57 +0000 Subject: [PATCH] support chunked encoding in http downloads (http_connection) --- ChangeLog | 1 + include/libtorrent/http_connection.hpp | 2 +- include/libtorrent/http_parser.hpp | 18 +++- src/http_connection.cpp | 29 ++++++- src/http_parser.cpp | 115 +++++++++++++++++++++---- test/test_http_connection.cpp | 11 ++- test/test_primitives.cpp | 32 +++++++ 7 files changed, 187 insertions(+), 21 deletions(-) diff --git a/ChangeLog b/ChangeLog index 771837616..47038f072 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,4 @@ + * support chunked encoding in http downloads (http_connection) * support adding torrents by url to the .torrent file * support CDATA tags in xml parser * use a python python dictionary for settings instead of session_settings object (in python bindings) diff --git a/include/libtorrent/http_connection.hpp b/include/libtorrent/http_connection.hpp index abd250d03..8d4fff1d7 100644 --- a/include/libtorrent/http_connection.hpp +++ b/include/libtorrent/http_connection.hpp @@ -158,7 +158,7 @@ private: , error_code const& e); void on_assign_bandwidth(error_code const& e); - void callback(error_code const& e, char const* data = 0, int size = 0); + void callback(error_code e, char const* data = 0, int size = 0); std::vector m_recvbuffer; socket_type m_sock; diff --git a/include/libtorrent/http_parser.hpp b/include/libtorrent/http_parser.hpp index 6ee18d3d5..49ac8e296 100644 --- a/include/libtorrent/http_parser.hpp +++ b/include/libtorrent/http_parser.hpp @@ -36,6 +36,7 @@ POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #ifdef _MSC_VER #pragma warning(push, 1) @@ -115,9 +116,10 @@ namespace libtorrent void reset(); std::map const& headers() const { return m_header; } + std::vector > const& chunks() const { return m_chunked_ranges; } private: - int m_recv_pos; + size_type m_recv_pos; int m_status_code; std::string m_method; std::string m_path; @@ -136,6 +138,20 @@ namespace libtorrent bool m_chunked_encoding; bool m_finished; + + // contains offsets of the first and one-past-end of + // each chunked range in the response + std::vector > m_chunked_ranges; + + // while reading a chunk, this is the offset where the + // current chunk will end (it refers to the first character + // in the chunk tail header or the next chunk header) + int m_cur_chunk_end; + + // the sum of all chunk headers read so far + int m_chunk_header_size; + + int m_partial_chunk_header; }; } diff --git a/src/http_connection.cpp b/src/http_connection.cpp index 7fb21b915..04b48d78c 100644 --- a/src/http_connection.cpp +++ b/src/http_connection.cpp @@ -528,13 +528,35 @@ void http_connection::on_connect(error_code const& e) } } -void http_connection::callback(error_code const& e, char const* data, int size) +void http_connection::callback(error_code e, char const* data, int size) { if (m_bottled && m_called) return; std::vector buf; if (m_bottled && m_parser.header_finished()) { + if (m_parser.chunked_encoding()) + { + // go through all chunks and compact them + // since we're bottled, and the buffer is our after all + // it's OK to mutate it + char* write_ptr = (char*)data; + // the offsets in the array are from the start of the + // buffer, not start of the body, so subtract the size + // of the HTTP header from them + int offset = m_parser.body_start(); + std::vector > const& chunks = m_parser.chunks(); + for (std::vector >::const_iterator i = chunks.begin() + , end(chunks.end()); i != end; ++i) + { + int len = i->second - i->first; + if (i->first - offset + len > size) len = size - i->first + offset; + memmove(write_ptr, data + i->first - offset, len); + write_ptr += len; + } + size = write_ptr - data; + } + std::string const& encoding = m_parser.header("content-encoding"); if ((encoding == "gzip" || encoding == "x-gzip") && size > 0 && data) { @@ -548,6 +570,11 @@ void http_connection::callback(error_code const& e, char const* data, int size) size = int(buf.size()); data = size == 0 ? 0 : &buf[0]; } + + // if we completed the whole response, no need + // to tell the user that the connection was closed by + // the server or by us. Just clear any error + if (m_parser.finished()) e.clear(); } m_called = true; error_code ec; diff --git a/src/http_parser.cpp b/src/http_parser.cpp index 87f441635..ae890d440 100644 --- a/src/http_parser.cpp +++ b/src/http_parser.cpp @@ -71,6 +71,9 @@ namespace libtorrent , m_body_start_pos(0) , m_chunked_encoding(false) , m_finished(false) + , m_cur_chunk_end(-1) + , m_chunk_header_size(0) + , m_partial_chunk_header(0) {} boost::tuple http_parser::incoming( @@ -176,7 +179,7 @@ restart_response: m_state = read_body; // if this is a request (not a response) // we're done once we reach the end of the headers - if (!m_method.empty()) m_finished = true; +// if (!m_method.empty()) m_finished = true; m_body_start_pos = m_recv_pos; break; } @@ -230,7 +233,7 @@ restart_response: m_chunked_encoding = string_begins_no_case("chunked", value.c_str()); } - TORRENT_ASSERT(m_recv_pos <= (int)recv_buffer.left()); + TORRENT_ASSERT(m_recv_pos <= recv_buffer.left()); newline = std::find(pos, recv_buffer.end, '\n'); } boost::get<1>(ret) += newline - (m_recv_buffer.begin + start_pos); @@ -239,15 +242,84 @@ restart_response: if (m_state == read_body) { int incoming = recv_buffer.end - pos; - if (m_recv_pos - m_body_start_pos + incoming > m_content_length - && m_content_length >= 0) - incoming = m_content_length - m_recv_pos + m_body_start_pos; - TORRENT_ASSERT(incoming >= 0); - m_recv_pos += incoming; - boost::get<0>(ret) += incoming; + if (m_chunked_encoding) + { + if (m_cur_chunk_end == -1) + m_cur_chunk_end = m_body_start_pos; + + while (m_cur_chunk_end <= m_recv_pos + incoming && !m_finished && incoming > 0) + { + int payload = m_cur_chunk_end - m_recv_pos; + if (payload > 0) + { + m_recv_pos += payload; + boost::get<0>(ret) += payload; + incoming -= payload; + } + buffer::const_interval buf(recv_buffer.begin + m_cur_chunk_end, recv_buffer.end); + size_type chunk_size; + int header_size; + if (parse_chunk_header(buf, &chunk_size, &header_size)) + { + if (chunk_size > 0) + { + std::pair chunk_range(m_cur_chunk_end + header_size + , m_cur_chunk_end + header_size + chunk_size); + m_chunked_ranges.push_back(chunk_range); + } + m_cur_chunk_end += header_size + chunk_size; + if (chunk_size == 0) + { + m_finished = true; + TORRENT_ASSERT(m_content_length < 0 || m_recv_pos - m_body_start_pos + - m_chunk_header_size == m_content_length); + } + header_size -= m_partial_chunk_header; + m_partial_chunk_header = 0; +// fprintf(stderr, "parse_chunk_header(%d, -> %d, -> %d) -> %d\n" +// " incoming = %d\n m_recv_pos = %d\n m_cur_chunk_end = %d\n" +// " content-length = %d\n" +// , buf.left(), int(chunk_size), header_size, 1, incoming, int(m_recv_pos) +// , m_cur_chunk_end, int(m_content_length)); + } + else + { + m_partial_chunk_header += incoming; + header_size = incoming; + +// fprintf(stderr, "parse_chunk_header(%d, -> %d, -> %d) -> %d\n" +// " incoming = %d\n m_recv_pos = %d\n m_cur_chunk_end = %d\n" +// " content-length = %d\n" +// , buf.left(), int(chunk_size), header_size, 0, incoming, int(m_recv_pos) +// , m_cur_chunk_end, int(m_content_length)); + } + m_chunk_header_size += header_size; + m_recv_pos += header_size; + boost::get<1>(ret) += header_size; + incoming -= header_size; + } + if (incoming > 0) + { + m_recv_pos += incoming; + boost::get<0>(ret) += incoming; + incoming = 0; + } + } + else + { + int payload_received = m_recv_pos - m_body_start_pos + incoming; + if (payload_received > m_content_length + && m_content_length >= 0) + incoming = m_content_length - m_recv_pos + m_body_start_pos; + + TORRENT_ASSERT(incoming >= 0); + m_recv_pos += incoming; + boost::get<0>(ret) += incoming; + } if (m_content_length >= 0 + && !m_chunked_encoding && m_recv_pos - m_body_start_pos >= m_content_length) { m_finished = true; @@ -265,8 +337,9 @@ restart_response: // is terminated by a newline. we're likely to see one // before the actual header. - if (pos[0] == '\r' && pos[1] == '\n') pos += 2; - else if (pos[0] == '\n') pos += 1; + if (pos < buf.end && pos[0] == '\r') ++pos; + if (pos < buf.end && pos[0] == '\n') ++pos; + if (pos == buf.end) return false; char const* newline = std::find(pos, buf.end, '\n'); if (newline == buf.end) return false; @@ -333,6 +406,7 @@ restart_response: ++separator; std::string value = line.substr(separator, std::string::npos); tail_headers.insert(std::make_pair(name, value)); +// fprintf(stderr, "tail_header: %s: %s\n", name.c_str(), value.c_str()); newline = std::find(pos, buf.end, '\n'); } @@ -342,17 +416,19 @@ restart_response: buffer::const_interval http_parser::get_body() const { TORRENT_ASSERT(m_state == read_body); - if (m_content_length >= 0) - return buffer::const_interval(m_recv_buffer.begin + m_body_start_pos - , m_recv_buffer.begin + (std::min)(size_type(m_recv_pos) - , m_body_start_pos + m_content_length)); - else - return buffer::const_interval(m_recv_buffer.begin + m_body_start_pos - , m_recv_buffer.begin + m_recv_pos); + size_type last_byte = m_chunked_encoding && !m_chunked_ranges.empty() + ? (std::min)(m_chunked_ranges.back().second, m_recv_pos) + : m_content_length < 0 + ? m_recv_pos : (std::min)(m_body_start_pos + m_content_length, m_recv_pos); + + TORRENT_ASSERT(last_byte >= m_body_start_pos); + return buffer::const_interval(m_recv_buffer.begin + m_body_start_pos + , m_recv_buffer.begin + last_byte); } void http_parser::reset() { + m_method.clear(); m_recv_pos = 0; m_body_start_pos = 0; m_status_code = -1; @@ -364,6 +440,11 @@ restart_response: m_recv_buffer.begin = 0; m_recv_buffer.end = 0; m_header.clear(); + m_chunked_encoding = false; + m_chunked_ranges.clear(); + m_cur_chunk_end = -1; + m_chunk_header_size = 0; + m_partial_chunk_header = 0; } } diff --git a/test/test_http_connection.cpp b/test/test_http_connection.cpp index 40069cb4c..38317a8c2 100644 --- a/test/test_http_connection.cpp +++ b/test/test_http_connection.cpp @@ -81,6 +81,7 @@ void http_handler(error_code const& ec, http_parser const& parser ++handler_called; data_size = size; g_error_code = ec; + TORRENT_ASSERT(size == 0 || parser.finished()); if (parser.header_finished()) { @@ -196,8 +197,9 @@ int test_main() ps.port = 8034; ps.username = "testuser"; ps.password = "testpass"; + int port = 0; - int port = start_web_server(); + port = start_web_server(); for (int i = 0; i < 5; ++i) { ps.type = (proxy_settings::proxy_type)i; @@ -215,6 +217,13 @@ int test_main() stop_web_server(); #endif + // test chunked encoding + port = start_web_server(false, true); + ps.type = proxy_settings::none; + run_suite("http", ps, port); + + stop_web_server(); + std::remove("test_file"); return 0; } diff --git a/test/test_primitives.cpp b/test/test_primitives.cpp index f0a6cc133..6dd03bf6a 100644 --- a/test/test_primitives.cpp +++ b/test/test_primitives.cpp @@ -100,6 +100,7 @@ tuple feed_bytes(http_parser& parser, char const* str) TORRENT_ASSERT(payload + protocol == chunk_size); } TEST_CHECK(prev == make_tuple(0, 0, false) || ret == prev); + TEST_EQUAL(ret.get<0>() + ret.get<1>(), strlen(str)); prev = ret; } return ret; @@ -1083,6 +1084,37 @@ int test_main() parser.reset(); TEST_CHECK(!parser.finished()); + // test chunked encoding + char const* chunked_test = "HTTP/1.1 200 OK\r\n" + "Content-Length: 20\r\n" + "Content-Type: text/plain\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "4\r\n" + "test\r\n" + "10\r\n" + "0123456789abcdef\r\n" + "0\r\n" + "Test-header: foobar\r\n" + "\r\n"; + + received = feed_bytes(parser, chunked_test); + + printf("payload: %d protocol: %d\n", received.get<0>(), received.get<1>()); + TEST_CHECK(received == make_tuple(20, strlen(chunked_test) - 20, false)); + TEST_CHECK(parser.finished()); + TEST_CHECK(std::equal(parser.get_body().begin, parser.get_body().end + , "4\r\ntest\r\n10\r\n0123456789abcdef")); + TEST_CHECK(parser.header("test-header") == "foobar"); + TEST_CHECK(parser.header("content-type") == "text/plain"); + TEST_CHECK(atoi(parser.header("content-length").c_str()) == 20); + TEST_CHECK(parser.chunked_encoding()); + typedef std::pair chunk_range; + std::vector cmp; + cmp.push_back(chunk_range(96, 100)); + cmp.push_back(chunk_range(106, 122)); + TEST_CHECK(cmp == parser.chunks()); + // make sure we support trackers with incorrect line endings char const* tracker_response = "HTTP/1.1 200 OK\n"