From f57e1dd6222e5279f037f2b0cfa9c0685ade08ca Mon Sep 17 00:00:00 2001 From: Arvid Norberg Date: Sun, 31 Oct 2010 03:05:11 +0000 Subject: [PATCH] chunked encoding fixes and added support for http seeds as well --- ChangeLog | 2 +- include/libtorrent/http_seed_connection.hpp | 12 +++ src/http_seed_connection.cpp | 91 +++++++++++++++++---- src/web_peer_connection.cpp | 48 +++++++---- test/test_web_seed.cpp | 5 +- 5 files changed, 119 insertions(+), 39 deletions(-) diff --git a/ChangeLog b/ChangeLog index 13c52afa1..810c1fccb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,5 @@ * graceful peer disconnect mode which finishes transactions before disconnecting peers - * support chunked encoding for web seeds (only for BEP 19, web seeds) + * support chunked encoding for web seeds * optimized session startup time * support SSL for web seeds, through all proxies * support extending web seeds with custom authorization and extra headers diff --git a/include/libtorrent/http_seed_connection.hpp b/include/libtorrent/http_seed_connection.hpp index 8d34ad22f..518e7dc03 100644 --- a/include/libtorrent/http_seed_connection.hpp +++ b/include/libtorrent/http_seed_connection.hpp @@ -121,6 +121,18 @@ namespace libtorrent // the number of bytes left to receive of the response we're // currently parsing int m_response_left; + + // this is the offset inside the current receive + // buffer where the next chunk header will be. + // this is updated for each chunk header that's + // parsed. It does not necessarily point to a valid + // offset in the receive buffer, if we haven't received + // it yet. This offset never includes the HTTP header + int m_chunk_pos; + + // this is the number of bytes we've already received + // from the next chunk header we're waiting for + int m_partial_chunk_header; }; } diff --git a/src/http_seed_connection.cpp b/src/http_seed_connection.cpp index 2b4fb6408..39b5a4cca 100644 --- a/src/http_seed_connection.cpp +++ b/src/http_seed_connection.cpp @@ -65,6 +65,8 @@ namespace libtorrent , web_seed_entry::headers_t const& extra_headers) : web_connection_base(ses, t, s, remote, url, peerinfo, auth, extra_headers) , m_url(url) + , m_chunk_pos(0) + , m_partial_chunk_header(0) { INVARIANT_CHECK; @@ -114,7 +116,7 @@ namespace libtorrent else { int receive_buffer_size = receive_buffer().left() - m_parser.body_start(); - TORRENT_ASSERT(receive_buffer_size < t->block_size()); + TORRENT_ASSERT(receive_buffer_size <= t->block_size()); ret.bytes_downloaded = t->block_size() - receive_buffer_size; } // this is used to make sure that the block_index stays within @@ -232,12 +234,12 @@ namespace libtorrent peer_request front_request = m_requests.front(); - int payload = 0; - int protocol = 0; bool header_finished = m_parser.header_finished(); if (!header_finished) { bool error = false; + int protocol = 0; + int payload = 0; boost::tie(payload, protocol) = m_parser.incoming(recv_buffer, error); m_statistics.received_bytes(0, protocol); bytes_transferred -= protocol; @@ -341,23 +343,66 @@ namespace libtorrent disconnect(errors::invalid_range, 2); return; } - if (payload > m_response_left) payload = m_response_left; m_body_start = m_parser.body_start(); - m_response_left -= payload; - m_statistics.received_bytes(payload, 0); - incoming_piece_fragment(payload); - } - else - { - payload = bytes_transferred; - if (payload > m_response_left) payload = m_response_left; - if (payload > front_request.length) payload = front_request.length; - m_statistics.received_bytes(payload, 0); - incoming_piece_fragment(payload); - m_response_left -= payload; } + recv_buffer.begin += m_body_start; + // ========================= + // === CHUNKED ENCODING === + // ========================= + while (m_parser.chunked_encoding() + && m_chunk_pos >= 0 + && m_chunk_pos < recv_buffer.left()) + { + int header_size = 0; + size_type chunk_size = 0; + buffer::const_interval chunk_start = recv_buffer; + chunk_start.begin += m_chunk_pos; + TORRENT_ASSERT(chunk_start.begin[0] == '\r' || is_hex(chunk_start.begin, 1)); + bool ret = m_parser.parse_chunk_header(chunk_start, &chunk_size, &header_size); + if (!ret) + { + TORRENT_ASSERT(bytes_transferred >= chunk_start.left() - m_partial_chunk_header); + bytes_transferred -= chunk_start.left() - m_partial_chunk_header; + m_statistics.received_bytes(0, chunk_start.left() - m_partial_chunk_header); + m_partial_chunk_header = chunk_start.left(); + if (bytes_transferred == 0) return; + break; + } + else + { +#ifdef TORRENT_VERBOSE_LOGGING + (*m_logger) << time_now_string() << " *** parsed chunk: " << chunk_size + << " header_size: " << header_size << "\n"; +#endif + TORRENT_ASSERT(bytes_transferred >= header_size - m_partial_chunk_header); + bytes_transferred -= header_size - m_partial_chunk_header; + m_statistics.received_bytes(0, header_size - m_partial_chunk_header); + m_partial_chunk_header = 0; + TORRENT_ASSERT(chunk_size != 0 || chunk_start.left() <= header_size || chunk_start.begin[header_size] == 'H'); + // cut out the chunk header from the receive buffer + cut_receive_buffer(header_size, t->block_size() + 1024, m_chunk_pos + m_body_start); + recv_buffer = receive_buffer(); + recv_buffer.begin += m_body_start; + m_chunk_pos += chunk_size; + if (chunk_size == 0) + { + TORRENT_ASSERT(receive_buffer().left() < m_chunk_pos + m_body_start + 1 + || receive_buffer()[m_chunk_pos + m_body_start] == 'H' + || (m_parser.chunked_encoding() && receive_buffer()[m_chunk_pos + m_body_start] == '\r')); + m_chunk_pos = -1; + } + } + } + + int payload = bytes_transferred; + if (payload > m_response_left) payload = m_response_left; + if (payload > front_request.length) payload = front_request.length; + m_statistics.received_bytes(payload, 0); + incoming_piece_fragment(payload); + m_response_left -= payload; + if (m_parser.status_code() == 503) { if (!m_parser.finished()) return; @@ -381,10 +426,22 @@ namespace libtorrent if (recv_buffer.left() < front_request.length) break; + // if the response is chunked, we need to receive the last + // terminating chunk and the tail headers before we can proceed + if (m_parser.chunked_encoding() && m_chunk_pos >= 0) break; + m_requests.pop_front(); incoming_piece(front_request, recv_buffer.begin); if (associated_torrent().expired()) return; - cut_receive_buffer(m_body_start + front_request.length, t->block_size() + 1024); + + int size_to_cut = m_body_start + front_request.length; + TORRENT_ASSERT(receive_buffer().left() < size_to_cut + 1 + || receive_buffer()[size_to_cut] == 'H' + || (m_parser.chunked_encoding() && receive_buffer()[size_to_cut] == '\r')); + + cut_receive_buffer(size_to_cut, t->block_size() + 1024); + if (m_response_left == 0) m_chunk_pos = 0; + else m_chunk_pos -= front_request.length; bytes_transferred -= payload; m_body_start = 0; if (m_response_left > 0) continue; diff --git a/src/web_peer_connection.cpp b/src/web_peer_connection.cpp index b71e8bf9f..0e8ea0799 100644 --- a/src/web_peer_connection.cpp +++ b/src/web_peer_connection.cpp @@ -552,10 +552,7 @@ namespace libtorrent bytes_transferred -= chunk_start.left() - m_partial_chunk_header; m_statistics.received_bytes(0, chunk_start.left() - m_partial_chunk_header); m_partial_chunk_header = chunk_start.left(); - if (bytes_transferred == 0) - { - return; - } + if (bytes_transferred == 0) return; break; } else @@ -570,7 +567,7 @@ namespace libtorrent m_partial_chunk_header = 0; TORRENT_ASSERT(chunk_size != 0 || chunk_start.left() <= header_size || chunk_start.begin[header_size] == 'H'); // cut out the chunk header from the receive buffer - cut_receive_buffer(header_size, t->block_size() + 1024, m_chunk_pos); + cut_receive_buffer(header_size, t->block_size() + 1024, m_body_start + m_chunk_pos); recv_buffer = receive_buffer(); recv_buffer.begin += m_body_start; m_chunk_pos += chunk_size; @@ -661,16 +658,24 @@ namespace libtorrent int piece_size = int(m_piece.size()); int copy_size = (std::min)((std::min)(front_request.length - piece_size , recv_buffer.left()), int(range_end - range_start - m_received_body)); - m_piece.resize(piece_size + copy_size); - TORRENT_ASSERT(copy_size > 0); - std::memcpy(&m_piece[0] + piece_size, recv_buffer.begin, copy_size); - TORRENT_ASSERT(int(m_piece.size()) <= front_request.length); - recv_buffer.begin += copy_size; - m_received_body += copy_size; - m_body_start += copy_size; - TORRENT_ASSERT(m_received_body <= range_end - range_start); - TORRENT_ASSERT(int(m_piece.size()) <= front_request.length); - incoming_piece_fragment(copy_size); + if (copy_size > 0) + { + m_piece.resize(piece_size + copy_size); + std::memcpy(&m_piece[0] + piece_size, recv_buffer.begin, copy_size); + TORRENT_ASSERT(int(m_piece.size()) <= front_request.length); + recv_buffer.begin += copy_size; + m_received_body += copy_size; + m_body_start += copy_size; + if (m_chunk_pos > 0) + { + TORRENT_ASSERT(m_chunk_pos >= copy_size); + m_chunk_pos -= copy_size; + } + TORRENT_ASSERT(m_received_body <= range_end - range_start); + TORRENT_ASSERT(int(m_piece.size()) <= front_request.length); + incoming_piece_fragment(copy_size); + } + if (int(m_piece.size()) == front_request.length) { // each call to incoming_piece() may result in us becoming @@ -707,7 +712,7 @@ namespace libtorrent m_received_body += r.length; TORRENT_ASSERT(receive_buffer().begin + m_body_start == recv_buffer.begin); TORRENT_ASSERT(m_received_body <= range_end - range_start); - cut_receive_buffer(r.length + m_body_start, t->block_size() + 1024); + cut_receive_buffer(m_body_start + r.length, t->block_size() + 1024); if (m_chunk_pos > 0) { TORRENT_ASSERT(m_chunk_pos >= r.length); @@ -742,9 +747,18 @@ namespace libtorrent } TORRENT_ASSERT(m_received_body <= range_end - range_start); - if (m_received_body == range_end - range_start) + // if we're in chunked encoding mode, we have to wait for the complete + // tail header before we can consider have received the block, otherwise + // we'll get out of sync with the next http response. m_chunk_pos is set + // to -1 when the tail header has been received + if (m_received_body == range_end - range_start + && (!m_parser.chunked_encoding() || m_chunk_pos == -1)) { int size_to_cut = recv_buffer.begin - receive_buffer().begin; + + TORRENT_ASSERT(receive_buffer().left() < size_to_cut + 1 + || receive_buffer()[size_to_cut] == 'H'); + cut_receive_buffer(size_to_cut, t->block_size() + 1024); if (m_chunk_pos > 0) { diff --git a/test/test_web_seed.cpp b/test/test_web_seed.cpp index 05d5e5417..fd853b39b 100644 --- a/test/test_web_seed.cpp +++ b/test/test_web_seed.cpp @@ -281,10 +281,7 @@ int test_main() int ret = 0; for (int i = 0; i < 2; ++i) { - // we only support chunked encoding for - // URL seeds (not HTTP seeds). - // that's why the variable limit on this loop - for (int j = 0; j < (i==0?2:1); ++j) + for (int j = 0; j < 2; ++j) { #ifdef TORRENT_USE_OPENSSL run_suite("https", i, j);