support chunked encoding in http downloads (http_connection)

This commit is contained in:
Arvid Norberg 2011-01-06 04:08:57 +00:00
parent 1cd1791375
commit ee7c713af7
7 changed files with 187 additions and 21 deletions

View File

@ -1,3 +1,4 @@
* support chunked encoding in http downloads (http_connection)
* support adding torrents by url to the .torrent file
* support CDATA tags in xml parser
* use a python python dictionary for settings instead of session_settings object (in python bindings)

View File

@ -158,7 +158,7 @@ private:
, error_code const& e);
void on_assign_bandwidth(error_code const& e);
void callback(error_code const& e, char const* data = 0, int size = 0);
void callback(error_code e, char const* data = 0, int size = 0);
std::vector<char> m_recvbuffer;
socket_type m_sock;

View File

@ -36,6 +36,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include <map>
#include <string>
#include <utility>
#include <vector>
#ifdef _MSC_VER
#pragma warning(push, 1)
@ -115,9 +116,10 @@ namespace libtorrent
void reset();
std::map<std::string, std::string> const& headers() const { return m_header; }
std::vector<std::pair<size_type, size_type> > const& chunks() const { return m_chunked_ranges; }
private:
int m_recv_pos;
size_type m_recv_pos;
int m_status_code;
std::string m_method;
std::string m_path;
@ -136,6 +138,20 @@ namespace libtorrent
bool m_chunked_encoding;
bool m_finished;
// contains offsets of the first and one-past-end of
// each chunked range in the response
std::vector<std::pair<size_type, size_type> > m_chunked_ranges;
// while reading a chunk, this is the offset where the
// current chunk will end (it refers to the first character
// in the chunk tail header or the next chunk header)
int m_cur_chunk_end;
// the sum of all chunk headers read so far
int m_chunk_header_size;
int m_partial_chunk_header;
};
}

View File

@ -528,13 +528,35 @@ void http_connection::on_connect(error_code const& e)
}
}
void http_connection::callback(error_code const& e, char const* data, int size)
void http_connection::callback(error_code e, char const* data, int size)
{
if (m_bottled && m_called) return;
std::vector<char> buf;
if (m_bottled && m_parser.header_finished())
{
if (m_parser.chunked_encoding())
{
// go through all chunks and compact them
// since we're bottled, and the buffer is our after all
// it's OK to mutate it
char* write_ptr = (char*)data;
// the offsets in the array are from the start of the
// buffer, not start of the body, so subtract the size
// of the HTTP header from them
int offset = m_parser.body_start();
std::vector<std::pair<size_type, size_type> > const& chunks = m_parser.chunks();
for (std::vector<std::pair<size_type, size_type> >::const_iterator i = chunks.begin()
, end(chunks.end()); i != end; ++i)
{
int len = i->second - i->first;
if (i->first - offset + len > size) len = size - i->first + offset;
memmove(write_ptr, data + i->first - offset, len);
write_ptr += len;
}
size = write_ptr - data;
}
std::string const& encoding = m_parser.header("content-encoding");
if ((encoding == "gzip" || encoding == "x-gzip") && size > 0 && data)
{
@ -548,6 +570,11 @@ void http_connection::callback(error_code const& e, char const* data, int size)
size = int(buf.size());
data = size == 0 ? 0 : &buf[0];
}
// if we completed the whole response, no need
// to tell the user that the connection was closed by
// the server or by us. Just clear any error
if (m_parser.finished()) e.clear();
}
m_called = true;
error_code ec;

View File

@ -71,6 +71,9 @@ namespace libtorrent
, m_body_start_pos(0)
, m_chunked_encoding(false)
, m_finished(false)
, m_cur_chunk_end(-1)
, m_chunk_header_size(0)
, m_partial_chunk_header(0)
{}
boost::tuple<int, int> http_parser::incoming(
@ -176,7 +179,7 @@ restart_response:
m_state = read_body;
// if this is a request (not a response)
// we're done once we reach the end of the headers
if (!m_method.empty()) m_finished = true;
// if (!m_method.empty()) m_finished = true;
m_body_start_pos = m_recv_pos;
break;
}
@ -230,7 +233,7 @@ restart_response:
m_chunked_encoding = string_begins_no_case("chunked", value.c_str());
}
TORRENT_ASSERT(m_recv_pos <= (int)recv_buffer.left());
TORRENT_ASSERT(m_recv_pos <= recv_buffer.left());
newline = std::find(pos, recv_buffer.end, '\n');
}
boost::get<1>(ret) += newline - (m_recv_buffer.begin + start_pos);
@ -239,15 +242,84 @@ restart_response:
if (m_state == read_body)
{
int incoming = recv_buffer.end - pos;
if (m_recv_pos - m_body_start_pos + incoming > m_content_length
&& m_content_length >= 0)
incoming = m_content_length - m_recv_pos + m_body_start_pos;
TORRENT_ASSERT(incoming >= 0);
m_recv_pos += incoming;
boost::get<0>(ret) += incoming;
if (m_chunked_encoding)
{
if (m_cur_chunk_end == -1)
m_cur_chunk_end = m_body_start_pos;
while (m_cur_chunk_end <= m_recv_pos + incoming && !m_finished && incoming > 0)
{
int payload = m_cur_chunk_end - m_recv_pos;
if (payload > 0)
{
m_recv_pos += payload;
boost::get<0>(ret) += payload;
incoming -= payload;
}
buffer::const_interval buf(recv_buffer.begin + m_cur_chunk_end, recv_buffer.end);
size_type chunk_size;
int header_size;
if (parse_chunk_header(buf, &chunk_size, &header_size))
{
if (chunk_size > 0)
{
std::pair<int, int> chunk_range(m_cur_chunk_end + header_size
, m_cur_chunk_end + header_size + chunk_size);
m_chunked_ranges.push_back(chunk_range);
}
m_cur_chunk_end += header_size + chunk_size;
if (chunk_size == 0)
{
m_finished = true;
TORRENT_ASSERT(m_content_length < 0 || m_recv_pos - m_body_start_pos
- m_chunk_header_size == m_content_length);
}
header_size -= m_partial_chunk_header;
m_partial_chunk_header = 0;
// fprintf(stderr, "parse_chunk_header(%d, -> %d, -> %d) -> %d\n"
// " incoming = %d\n m_recv_pos = %d\n m_cur_chunk_end = %d\n"
// " content-length = %d\n"
// , buf.left(), int(chunk_size), header_size, 1, incoming, int(m_recv_pos)
// , m_cur_chunk_end, int(m_content_length));
}
else
{
m_partial_chunk_header += incoming;
header_size = incoming;
// fprintf(stderr, "parse_chunk_header(%d, -> %d, -> %d) -> %d\n"
// " incoming = %d\n m_recv_pos = %d\n m_cur_chunk_end = %d\n"
// " content-length = %d\n"
// , buf.left(), int(chunk_size), header_size, 0, incoming, int(m_recv_pos)
// , m_cur_chunk_end, int(m_content_length));
}
m_chunk_header_size += header_size;
m_recv_pos += header_size;
boost::get<1>(ret) += header_size;
incoming -= header_size;
}
if (incoming > 0)
{
m_recv_pos += incoming;
boost::get<0>(ret) += incoming;
incoming = 0;
}
}
else
{
int payload_received = m_recv_pos - m_body_start_pos + incoming;
if (payload_received > m_content_length
&& m_content_length >= 0)
incoming = m_content_length - m_recv_pos + m_body_start_pos;
TORRENT_ASSERT(incoming >= 0);
m_recv_pos += incoming;
boost::get<0>(ret) += incoming;
}
if (m_content_length >= 0
&& !m_chunked_encoding
&& m_recv_pos - m_body_start_pos >= m_content_length)
{
m_finished = true;
@ -265,8 +337,9 @@ restart_response:
// is terminated by a newline. we're likely to see one
// before the actual header.
if (pos[0] == '\r' && pos[1] == '\n') pos += 2;
else if (pos[0] == '\n') pos += 1;
if (pos < buf.end && pos[0] == '\r') ++pos;
if (pos < buf.end && pos[0] == '\n') ++pos;
if (pos == buf.end) return false;
char const* newline = std::find(pos, buf.end, '\n');
if (newline == buf.end) return false;
@ -333,6 +406,7 @@ restart_response:
++separator;
std::string value = line.substr(separator, std::string::npos);
tail_headers.insert(std::make_pair(name, value));
// fprintf(stderr, "tail_header: %s: %s\n", name.c_str(), value.c_str());
newline = std::find(pos, buf.end, '\n');
}
@ -342,17 +416,19 @@ restart_response:
buffer::const_interval http_parser::get_body() const
{
TORRENT_ASSERT(m_state == read_body);
if (m_content_length >= 0)
return buffer::const_interval(m_recv_buffer.begin + m_body_start_pos
, m_recv_buffer.begin + (std::min)(size_type(m_recv_pos)
, m_body_start_pos + m_content_length));
else
return buffer::const_interval(m_recv_buffer.begin + m_body_start_pos
, m_recv_buffer.begin + m_recv_pos);
size_type last_byte = m_chunked_encoding && !m_chunked_ranges.empty()
? (std::min)(m_chunked_ranges.back().second, m_recv_pos)
: m_content_length < 0
? m_recv_pos : (std::min)(m_body_start_pos + m_content_length, m_recv_pos);
TORRENT_ASSERT(last_byte >= m_body_start_pos);
return buffer::const_interval(m_recv_buffer.begin + m_body_start_pos
, m_recv_buffer.begin + last_byte);
}
void http_parser::reset()
{
m_method.clear();
m_recv_pos = 0;
m_body_start_pos = 0;
m_status_code = -1;
@ -364,6 +440,11 @@ restart_response:
m_recv_buffer.begin = 0;
m_recv_buffer.end = 0;
m_header.clear();
m_chunked_encoding = false;
m_chunked_ranges.clear();
m_cur_chunk_end = -1;
m_chunk_header_size = 0;
m_partial_chunk_header = 0;
}
}

View File

@ -81,6 +81,7 @@ void http_handler(error_code const& ec, http_parser const& parser
++handler_called;
data_size = size;
g_error_code = ec;
TORRENT_ASSERT(size == 0 || parser.finished());
if (parser.header_finished())
{
@ -196,8 +197,9 @@ int test_main()
ps.port = 8034;
ps.username = "testuser";
ps.password = "testpass";
int port = 0;
int port = start_web_server();
port = start_web_server();
for (int i = 0; i < 5; ++i)
{
ps.type = (proxy_settings::proxy_type)i;
@ -215,6 +217,13 @@ int test_main()
stop_web_server();
#endif
// test chunked encoding
port = start_web_server(false, true);
ps.type = proxy_settings::none;
run_suite("http", ps, port);
stop_web_server();
std::remove("test_file");
return 0;
}

View File

@ -100,6 +100,7 @@ tuple<int, int, bool> feed_bytes(http_parser& parser, char const* str)
TORRENT_ASSERT(payload + protocol == chunk_size);
}
TEST_CHECK(prev == make_tuple(0, 0, false) || ret == prev);
TEST_EQUAL(ret.get<0>() + ret.get<1>(), strlen(str));
prev = ret;
}
return ret;
@ -1083,6 +1084,37 @@ int test_main()
parser.reset();
TEST_CHECK(!parser.finished());
// test chunked encoding
char const* chunked_test = "HTTP/1.1 200 OK\r\n"
"Content-Length: 20\r\n"
"Content-Type: text/plain\r\n"
"Transfer-Encoding: chunked\r\n"
"\r\n"
"4\r\n"
"test\r\n"
"10\r\n"
"0123456789abcdef\r\n"
"0\r\n"
"Test-header: foobar\r\n"
"\r\n";
received = feed_bytes(parser, chunked_test);
printf("payload: %d protocol: %d\n", received.get<0>(), received.get<1>());
TEST_CHECK(received == make_tuple(20, strlen(chunked_test) - 20, false));
TEST_CHECK(parser.finished());
TEST_CHECK(std::equal(parser.get_body().begin, parser.get_body().end
, "4\r\ntest\r\n10\r\n0123456789abcdef"));
TEST_CHECK(parser.header("test-header") == "foobar");
TEST_CHECK(parser.header("content-type") == "text/plain");
TEST_CHECK(atoi(parser.header("content-length").c_str()) == 20);
TEST_CHECK(parser.chunked_encoding());
typedef std::pair<size_type, size_type> chunk_range;
std::vector<chunk_range> cmp;
cmp.push_back(chunk_range(96, 100));
cmp.push_back(chunk_range(106, 122));
TEST_CHECK(cmp == parser.chunks());
// make sure we support trackers with incorrect line endings
char const* tracker_response =
"HTTP/1.1 200 OK\n"