From f57e1dd6222e5279f037f2b0cfa9c0685ade08ca Mon Sep 17 00:00:00 2001
From: Arvid Norberg <arvid@libtorrent.org>
Date: Sun, 31 Oct 2010 03:05:11 +0000
Subject: [PATCH] chunked encoding fixes and added support for http seeds as
 well

---
 ChangeLog                                   |  2 +-
 include/libtorrent/http_seed_connection.hpp | 12 +++
 src/http_seed_connection.cpp                | 91 +++++++++++++++++----
 src/web_peer_connection.cpp                 | 48 +++++++----
 test/test_web_seed.cpp                      |  5 +-
 5 files changed, 119 insertions(+), 39 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 13c52afa1..810c1fccb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,5 @@
 	* graceful peer disconnect mode which finishes transactions before disconnecting peers
-	* support chunked encoding for web seeds (only for BEP 19, web seeds)
+	* support chunked encoding for web seeds
 	* optimized session startup time
 	* support SSL for web seeds, through all proxies
 	* support extending web seeds with custom authorization and extra headers
diff --git a/include/libtorrent/http_seed_connection.hpp b/include/libtorrent/http_seed_connection.hpp
index 8d34ad22f..518e7dc03 100644
--- a/include/libtorrent/http_seed_connection.hpp
+++ b/include/libtorrent/http_seed_connection.hpp
@@ -121,6 +121,18 @@ namespace libtorrent
 		// the number of bytes left to receive of the response we're
 		// currently parsing
 		int m_response_left;		
+
+		// this is the offset inside the current receive
+		// buffer where the next chunk header will be.
+		// this is updated for each chunk header that's
+		// parsed. It does not necessarily point to a valid
+		// offset in the receive buffer, if we haven't received
+		// it yet. This offset never includes the HTTP header
+		int m_chunk_pos;
+
+		// this is the number of bytes we've already received
+		// from the next chunk header we're waiting for
+		int m_partial_chunk_header;
 	};
 }
 
diff --git a/src/http_seed_connection.cpp b/src/http_seed_connection.cpp
index 2b4fb6408..39b5a4cca 100644
--- a/src/http_seed_connection.cpp
+++ b/src/http_seed_connection.cpp
@@ -65,6 +65,8 @@ namespace libtorrent
 		, web_seed_entry::headers_t const& extra_headers)
 		: web_connection_base(ses, t, s, remote, url, peerinfo, auth, extra_headers)
 		, m_url(url)
+		, m_chunk_pos(0)
+		, m_partial_chunk_header(0)
 	{
 		INVARIANT_CHECK;
 
@@ -114,7 +116,7 @@ namespace libtorrent
 		else
 		{
 			int receive_buffer_size = receive_buffer().left() - m_parser.body_start();
-			TORRENT_ASSERT(receive_buffer_size < t->block_size());
+			TORRENT_ASSERT(receive_buffer_size <= t->block_size());
 			ret.bytes_downloaded = t->block_size() - receive_buffer_size;
 		}
 		// this is used to make sure that the block_index stays within
@@ -232,12 +234,12 @@ namespace libtorrent
 
 			peer_request front_request = m_requests.front();
 
-			int payload = 0;
-			int protocol = 0;
 			bool header_finished = m_parser.header_finished();
 			if (!header_finished)
 			{
 				bool error = false;
+				int protocol = 0;
+				int payload = 0;
 				boost::tie(payload, protocol) = m_parser.incoming(recv_buffer, error);
 				m_statistics.received_bytes(0, protocol);
 				bytes_transferred -= protocol;
@@ -341,23 +343,66 @@ namespace libtorrent
 					disconnect(errors::invalid_range, 2);
 					return;
 				}
-				if (payload > m_response_left) payload = m_response_left;
 				m_body_start = m_parser.body_start();
-				m_response_left -= payload;
-				m_statistics.received_bytes(payload, 0);
-				incoming_piece_fragment(payload);
-			}
-			else
-			{
-				payload = bytes_transferred;
-				if (payload > m_response_left) payload = m_response_left;
-				if (payload > front_request.length) payload = front_request.length;
-				m_statistics.received_bytes(payload, 0);
-				incoming_piece_fragment(payload);
-				m_response_left -= payload;
 			}
+
 			recv_buffer.begin += m_body_start;
 
+			// =========================
+			// === CHUNKED ENCODING  ===
+			// =========================
+			while (m_parser.chunked_encoding()
+				&& m_chunk_pos >= 0
+				&& m_chunk_pos < recv_buffer.left())
+			{
+				int header_size = 0;
+				size_type chunk_size = 0;
+				buffer::const_interval chunk_start = recv_buffer;
+				chunk_start.begin += m_chunk_pos;
+				TORRENT_ASSERT(chunk_start.begin[0] == '\r' || is_hex(chunk_start.begin, 1));
+				bool ret = m_parser.parse_chunk_header(chunk_start, &chunk_size, &header_size);
+				if (!ret)
+				{
+					TORRENT_ASSERT(bytes_transferred >= chunk_start.left() - m_partial_chunk_header);
+					bytes_transferred -= chunk_start.left() - m_partial_chunk_header;
+					m_statistics.received_bytes(0, chunk_start.left() - m_partial_chunk_header);
+					m_partial_chunk_header = chunk_start.left();
+					if (bytes_transferred == 0) return;
+					break;
+				}
+				else
+				{
+#ifdef TORRENT_VERBOSE_LOGGING
+					(*m_logger) << time_now_string() << " *** parsed chunk: " << chunk_size
+						<< " header_size: " << header_size << "\n";
+#endif
+					TORRENT_ASSERT(bytes_transferred >= header_size - m_partial_chunk_header);
+					bytes_transferred -= header_size - m_partial_chunk_header;
+					m_statistics.received_bytes(0, header_size - m_partial_chunk_header);
+					m_partial_chunk_header = 0;
+					TORRENT_ASSERT(chunk_size != 0 || chunk_start.left() <= header_size || chunk_start.begin[header_size] == 'H');
+					// cut out the chunk header from the receive buffer
+					cut_receive_buffer(header_size, t->block_size() + 1024, m_chunk_pos + m_body_start);
+					recv_buffer = receive_buffer();
+					recv_buffer.begin += m_body_start;
+					m_chunk_pos += chunk_size;
+					if (chunk_size == 0)
+					{
+						TORRENT_ASSERT(receive_buffer().left() < m_chunk_pos + m_body_start + 1
+							|| receive_buffer()[m_chunk_pos + m_body_start] == 'H'
+							|| (m_parser.chunked_encoding() && receive_buffer()[m_chunk_pos + m_body_start] == '\r'));
+						m_chunk_pos = -1;
+					}
+				}
+			}
+
+			int payload = bytes_transferred;
+			if (payload > m_response_left) payload = m_response_left;
+			if (payload > front_request.length) payload = front_request.length;
+			m_statistics.received_bytes(payload, 0);
+			incoming_piece_fragment(payload);
+			m_response_left -= payload;
+
 			if (m_parser.status_code() == 503)
 			{
 				if (!m_parser.finished()) return;
@@ -381,10 +426,22 @@ namespace libtorrent
 
 			if (recv_buffer.left() < front_request.length) break;
 
+			// if the response is chunked, we need to receive the last
+			// terminating chunk and the tail headers before we can proceed
+			if (m_parser.chunked_encoding() && m_chunk_pos >= 0) break;
+
 			m_requests.pop_front();
 			incoming_piece(front_request, recv_buffer.begin);
 			if (associated_torrent().expired()) return;
-			cut_receive_buffer(m_body_start + front_request.length, t->block_size() + 1024);
+
+			int size_to_cut = m_body_start + front_request.length;
+			TORRENT_ASSERT(receive_buffer().left() < size_to_cut + 1
+				|| receive_buffer()[size_to_cut] == 'H'
+				|| (m_parser.chunked_encoding() && receive_buffer()[size_to_cut] == '\r'));
+
+			cut_receive_buffer(size_to_cut, t->block_size() + 1024);
+			if (m_response_left == 0) m_chunk_pos = 0;
+			else m_chunk_pos -= front_request.length;
 			bytes_transferred -= payload;
 			m_body_start = 0;
 			if (m_response_left > 0) continue;
diff --git a/src/web_peer_connection.cpp b/src/web_peer_connection.cpp
index b71e8bf9f..0e8ea0799 100644
--- a/src/web_peer_connection.cpp
+++ b/src/web_peer_connection.cpp
@@ -552,10 +552,7 @@ namespace libtorrent
 					bytes_transferred -= chunk_start.left() - m_partial_chunk_header;
 					m_statistics.received_bytes(0, chunk_start.left() - m_partial_chunk_header);
 					m_partial_chunk_header = chunk_start.left();
-					if (bytes_transferred == 0)
-					{
-						return;
-					}
+					if (bytes_transferred == 0) return;
 					break;
 				}
 				else
@@ -570,7 +567,7 @@ namespace libtorrent
 					m_partial_chunk_header = 0;
 					TORRENT_ASSERT(chunk_size != 0 || chunk_start.left() <= header_size || chunk_start.begin[header_size] == 'H');
 					// cut out the chunk header from the receive buffer
-					cut_receive_buffer(header_size, t->block_size() + 1024, m_chunk_pos);
+					cut_receive_buffer(header_size, t->block_size() + 1024, m_body_start + m_chunk_pos);
 					recv_buffer = receive_buffer();
 					recv_buffer.begin += m_body_start;
 					m_chunk_pos += chunk_size;
@@ -661,16 +658,24 @@ namespace libtorrent
 				int piece_size = int(m_piece.size());
 				int copy_size = (std::min)((std::min)(front_request.length - piece_size
 					, recv_buffer.left()), int(range_end - range_start - m_received_body));
-				m_piece.resize(piece_size + copy_size);
-				TORRENT_ASSERT(copy_size > 0);
-				std::memcpy(&m_piece[0] + piece_size, recv_buffer.begin, copy_size);
-				TORRENT_ASSERT(int(m_piece.size()) <= front_request.length);
-				recv_buffer.begin += copy_size;
-				m_received_body += copy_size;
-				m_body_start += copy_size;
-				TORRENT_ASSERT(m_received_body <= range_end - range_start);
-				TORRENT_ASSERT(int(m_piece.size()) <= front_request.length);
-				incoming_piece_fragment(copy_size);
+				if (copy_size > 0)
+				{
+					m_piece.resize(piece_size + copy_size);
+					std::memcpy(&m_piece[0] + piece_size, recv_buffer.begin, copy_size);
+					TORRENT_ASSERT(int(m_piece.size()) <= front_request.length);
+					recv_buffer.begin += copy_size;
+					m_received_body += copy_size;
+					m_body_start += copy_size;
+					if (m_chunk_pos > 0)
+					{
+						TORRENT_ASSERT(m_chunk_pos >= copy_size);
+						m_chunk_pos -= copy_size;
+					}
+					TORRENT_ASSERT(m_received_body <= range_end - range_start);
+					TORRENT_ASSERT(int(m_piece.size()) <= front_request.length);
+					incoming_piece_fragment(copy_size);
+				}
+
 				if (int(m_piece.size()) == front_request.length)
 				{
 					// each call to incoming_piece() may result in us becoming
@@ -707,7 +712,7 @@ namespace libtorrent
 				m_received_body += r.length;
 				TORRENT_ASSERT(receive_buffer().begin + m_body_start == recv_buffer.begin);
 				TORRENT_ASSERT(m_received_body <= range_end - range_start);
-				cut_receive_buffer(r.length + m_body_start, t->block_size() + 1024);
+				cut_receive_buffer(m_body_start + r.length, t->block_size() + 1024);
 				if (m_chunk_pos > 0)
 				{
 					TORRENT_ASSERT(m_chunk_pos >= r.length);
@@ -742,9 +747,18 @@ namespace libtorrent
 			}
 
 			TORRENT_ASSERT(m_received_body <= range_end - range_start);
-			if (m_received_body == range_end - range_start)
+			// if we're in chunked encoding mode, we have to wait for the complete
+			// tail header before we can consider have received the block, otherwise
+			// we'll get out of sync with the next http response. m_chunk_pos is set
+			// to -1 when the tail header has been received
+			if (m_received_body == range_end - range_start
+				&& (!m_parser.chunked_encoding() || m_chunk_pos == -1))
 			{
 				int size_to_cut = recv_buffer.begin - receive_buffer().begin;
+
+				TORRENT_ASSERT(receive_buffer().left() < size_to_cut + 1
+					|| receive_buffer()[size_to_cut] == 'H');
+
 				cut_receive_buffer(size_to_cut, t->block_size() + 1024);
 				if (m_chunk_pos > 0)
 				{
diff --git a/test/test_web_seed.cpp b/test/test_web_seed.cpp
index 05d5e5417..fd853b39b 100644
--- a/test/test_web_seed.cpp
+++ b/test/test_web_seed.cpp
@@ -281,10 +281,7 @@ int test_main()
 	int ret = 0;
 	for (int i = 0; i < 2; ++i)
 	{
-		// we only support chunked encoding for
-		// URL seeds (not HTTP seeds).
-		// that's why the variable limit on this loop
-		for (int j = 0; j < (i==0?2:1); ++j)
+		for (int j = 0; j < 2; ++j)
 		{
 #ifdef TORRENT_USE_OPENSSL
 			run_suite("https", i, j);