refactored gzip code and added gzip support to http_connection

This commit is contained in:
Arvid Norberg 2008-01-30 18:32:13 +00:00
parent ebde862341
commit 6caca17883
12 changed files with 315 additions and 209 deletions

View File

@ -228,6 +228,7 @@ SOURCES =
connection_queue
entry
escape_string
gzip
http_connection
http_stream
http_parser

View File

@ -18,6 +18,7 @@ libtorrent/extensions.hpp \
libtorrent/file.hpp \
libtorrent/file_pool.hpp \
libtorrent/fingerprint.hpp \
libtorrent/gzip.hpp \
libtorrent/hasher.hpp \
libtorrent/http_connection.hpp \
libtorrent/http_stream.hpp \

View File

@ -0,0 +1,43 @@
/*
Copyright (c) 2007, Arvid Norberg
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the distribution.
* Neither the name of the author nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
namespace libtorrent
{
TORRENT_EXPORT bool inflate_gzip(
char const* in, int size
, std::vector<char>& buffer
, int maximum_size
, std::string& error);
}

View File

@ -63,7 +63,6 @@ typedef boost::function<void(asio::error_code const&
typedef boost::function<void(http_connection&)> http_connect_handler;
// TODO: add bind interface
// TODO: add gzip support
// when bottled, the last two arguments to the handler
// will always be 0
@ -101,7 +100,8 @@ struct http_connection : boost::enable_shared_from_this<http_connection>, boost:
std::string sendbuffer;
void get(std::string const& url, time_duration timeout = seconds(30)
, proxy_settings const* ps = 0, int handle_redirects = 5);
, proxy_settings const* ps = 0, int handle_redirects = 5
, std::string const& user_agent = "");
void start(std::string const& hostname, std::string const& port
, time_duration timeout, proxy_settings const* ps = 0, bool ssl = false

View File

@ -143,12 +143,6 @@ namespace libtorrent
tracker_manager* m_manager;
};
TORRENT_EXPORT bool inflate_gzip(
std::vector<char>& buffer
, tracker_request const& req
, request_callback* requester
, int maximum_tracker_response_length);
struct TORRENT_EXPORT timeout_handler
: intrusive_ptr_base<timeout_handler>
, boost::noncopyable

View File

@ -23,7 +23,7 @@ alert.cpp identify_client.cpp ip_filter.cpp file.cpp metadata_transfer.cpp \
logger.cpp file_pool.cpp ut_pex.cpp lsd.cpp upnp.cpp instantiate_connection.cpp \
socks5_stream.cpp socks4_stream.cpp http_stream.cpp connection_queue.cpp \
disk_io_thread.cpp ut_metadata.cpp magnet_uri.cpp udp_socket.cpp smart_ban.cpp \
http_parser.cpp $(kademlia_sources)
http_parser.cpp gzip.cpp $(kademlia_sources)
noinst_HEADERS = \
$(top_srcdir)/include/libtorrent/alert.hpp \
@ -49,6 +49,7 @@ $(top_srcdir)/include/libtorrent/extensions/ut_pex.hpp \
$(top_srcdir)/include/libtorrent/file.hpp \
$(top_srcdir)/include/libtorrent/file_pool.hpp \
$(top_srcdir)/include/libtorrent/fingerprint.hpp \
$(top_srcdir)/include/libtorrent/gzip.hpp \
$(top_srcdir)/include/libtorrent/hasher.hpp \
$(top_srcdir)/include/libtorrent/http_connection.hpp \
$(top_srcdir)/include/libtorrent/http_stream.hpp \

212
src/gzip.cpp Normal file
View File

@ -0,0 +1,212 @@
/*
Copyright (c) 2007, Arvid Norberg
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the distribution.
* Neither the name of the author nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include "libtorrent/assert.hpp"
#include "zlib.h"
#include <vector>
namespace
{
enum
{
FTEXT = 0x01,
FHCRC = 0x02,
FEXTRA = 0x04,
FNAME = 0x08,
FCOMMENT = 0x10,
FRESERVED = 0xe0,
GZIP_MAGIC0 = 0x1f,
GZIP_MAGIC1 = 0x8b
};
}
namespace libtorrent
{
// returns -1 if gzip header is invalid or the header size in bytes
int gzip_header(const char* buf, int size)
{
TORRENT_ASSERT(buf != 0);
TORRENT_ASSERT(size > 0);
const unsigned char* buffer = reinterpret_cast<const unsigned char*>(buf);
const int total_size = size;
// The zip header cannot be shorter than 10 bytes
if (size < 10) return -1;
// check the magic header of gzip
if ((buffer[0] != GZIP_MAGIC0) || (buffer[1] != GZIP_MAGIC1)) return -1;
int method = buffer[2];
int flags = buffer[3];
// check for reserved flag and make sure it's compressed with the correct metod
if (method != Z_DEFLATED || (flags & FRESERVED) != 0) return -1;
// skip time, xflags, OS code
size -= 10;
buffer += 10;
if (flags & FEXTRA)
{
int extra_len;
if (size < 2) return -1;
extra_len = (buffer[1] << 8) | buffer[0];
if (size < (extra_len+2)) return -1;
size -= (extra_len + 2);
buffer += (extra_len + 2);
}
if (flags & FNAME)
{
while (size && *buffer)
{
--size;
++buffer;
}
if (!size || *buffer) return -1;
--size;
++buffer;
}
if (flags & FCOMMENT)
{
while (size && *buffer)
{
--size;
++buffer;
}
if (!size || *buffer) return -1;
--size;
++buffer;
}
if (flags & FHCRC)
{
if (size < 2) return -1;
size -= 2;
buffer += 2;
}
return total_size - size;
}
bool inflate_gzip(
char const* in
, int size
, std::vector<char>& buffer
, int maximum_size
, std::string& error)
{
TORRENT_ASSERT(maximum_size > 0);
int header_len = gzip_header(in, size);
if (header_len < 0)
{
error = "invalid gzip header in tracker response";
return true;
}
// start off with one kilobyte and grow
// if needed
buffer.resize(1024);
// initialize the zlib-stream
z_stream str;
// subtract 8 from the end of the buffer since that's CRC32 and input size
// and those belong to the gzip file
str.avail_in = (int)size - header_len - 8;
str.next_in = reinterpret_cast<Bytef*>(const_cast<char*>(in + header_len));
str.next_out = reinterpret_cast<Bytef*>(&buffer[0]);
str.avail_out = (int)buffer.size();
str.zalloc = Z_NULL;
str.zfree = Z_NULL;
str.opaque = 0;
// -15 is really important. It will make inflate() not look for a zlib header
// and just deflate the buffer
if (inflateInit2(&str, -15) != Z_OK)
{
error = "gzip out of memory";
return true;
}
// inflate and grow inflate_buffer as needed
int ret = inflate(&str, Z_SYNC_FLUSH);
while (ret == Z_OK)
{
if (str.avail_out == 0)
{
if (buffer.size() >= (unsigned)maximum_size)
{
inflateEnd(&str);
error = "response too large";
return true;
}
int new_size = (int)buffer.size() * 2;
if (new_size > maximum_size)
new_size = maximum_size;
int old_size = (int)buffer.size();
buffer.resize(new_size);
str.next_out = reinterpret_cast<Bytef*>(&buffer[old_size]);
str.avail_out = new_size - old_size;
}
ret = inflate(&str, Z_SYNC_FLUSH);
}
buffer.resize(buffer.size() - str.avail_out);
inflateEnd(&str);
if (ret != Z_STREAM_END)
{
error = "gzip error";
return true;
}
// commit the resulting buffer
return false;
}
}

View File

@ -33,6 +33,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "libtorrent/http_connection.hpp"
#include "libtorrent/escape_string.hpp"
#include "libtorrent/instantiate_connection.hpp"
#include "libtorrent/gzip.hpp"
#include <boost/bind.hpp>
#include <boost/lexical_cast.hpp>
@ -41,13 +42,18 @@ POSSIBILITY OF SUCH DAMAGE.
using boost::bind;
namespace libtorrent
{
namespace libtorrent {
namespace
{
char to_lower(char c) { return std::tolower(c); }
}
enum { max_bottled_buffer = 1024 * 1024 };
enum { max_bottled_buffer = 1024 * 1024 };
void http_connection::get(std::string const& url, time_duration timeout
, proxy_settings const* ps, int handle_redirects)
, proxy_settings const* ps, int handle_redirects, std::string const& user_agent)
{
std::string protocol;
std::string auth;
@ -73,8 +79,7 @@ void http_connection::get(std::string const& url, time_duration timeout
{
// if we're using an http proxy and not an ssl
// connection, just do a regular http proxy request
headers << "GET " << url << " HTTP/1.0\r\n"
"Connection: close\r\n";
headers << "GET " << url << " HTTP/1.0\r\n";
if (ps->type == proxy_settings::http_pw)
headers << "Proxy-Authorization: Basic " << base64encode(
ps->username + ":" + ps->password) << "\r\n";
@ -85,13 +90,20 @@ void http_connection::get(std::string const& url, time_duration timeout
else
{
headers << "GET " << path << " HTTP/1.0\r\n"
"Host:" << hostname << "\r\n"
"Connection: close\r\n";
"Host:" << hostname << "\r\n";
}
if (!auth.empty())
headers << "Authorization: Basic " << base64encode(auth) << "\r\n";
headers << "\r\n";
if (!user_agent.empty())
headers << "User-Agent: " << user_agent << "\r\n";
headers <<
"Connection: close\r\n"
"Accept-Encoding: gzip\r\n"
"\r\n";
sendbuffer = headers.str();
start(hostname, boost::lexical_cast<std::string>(port), timeout, ps
, ssl, handle_redirects);
@ -250,6 +262,23 @@ void http_connection::callback(asio::error_code const& e, char const* data, int
{
if (!m_bottled || !m_called)
{
std::vector<char> buf;
if (m_bottled && m_parser.finished())
{
std::string const& encoding = m_parser.header("content-encoding");
if (encoding == "gzip" || encoding == "x-gzip")
{
std::string error;
if (inflate_gzip(data, size, buf, max_bottled_buffer, error))
{
callback(asio::error::fault, data, size);
close();
return;
}
data = &buf[0];
size = int(buf.size());
}
}
m_called = true;
m_timer.cancel();
if (m_handler) m_handler(e, m_parser, data, size);

View File

@ -40,7 +40,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include <algorithm>
#include "libtorrent/config.hpp"
#include "zlib.h"
#include "libtorrent/gzip.hpp"
#ifdef _MSC_VER
#pragma warning(push, 1)
@ -70,21 +70,6 @@ namespace
minimum_tracker_response_length = 3,
http_buffer_size = 2048
};
enum
{
FTEXT = 0x01,
FHCRC = 0x02,
FEXTRA = 0x04,
FNAME = 0x08,
FCOMMENT = 0x10,
FRESERVED = 0xe0,
GZIP_MAGIC0 = 0x1f,
GZIP_MAGIC1 = 0x8b
};
}
namespace
@ -637,13 +622,16 @@ namespace libtorrent
close();
return;
}
m_buffer.erase(m_buffer.begin(), m_buffer.begin() + m_parser.body_start());
if (inflate_gzip(m_buffer, tracker_req(), cb.get(),
m_settings.tracker_maximum_response_length))
std::vector<char> buffer;
std::string error;
if (inflate_gzip(&m_buffer[0] + m_parser.body_start(), m_buffer.size(), buffer
, m_settings.tracker_maximum_response_length, error))
{
cb->tracker_request_error(tracker_req(), 200, error);
close();
return;
}
m_buffer.swap(buffer);
buf.begin = &m_buffer[0];
buf.end = &m_buffer[0] + m_buffer.size();
}

View File

@ -38,8 +38,6 @@ POSSIBILITY OF SUCH DAMAGE.
#include <iomanip>
#include <sstream>
#include "zlib.h"
#include <boost/bind.hpp>
#include "libtorrent/tracker_manager.hpp"
@ -63,177 +61,10 @@ namespace
http_buffer_size = 2048
};
enum
{
FTEXT = 0x01,
FHCRC = 0x02,
FEXTRA = 0x04,
FNAME = 0x08,
FCOMMENT = 0x10,
FRESERVED = 0xe0,
GZIP_MAGIC0 = 0x1f,
GZIP_MAGIC1 = 0x8b
};
}
namespace libtorrent
{
// returns -1 if gzip header is invalid or the header size in bytes
int gzip_header(const char* buf, int size)
{
TORRENT_ASSERT(buf != 0);
TORRENT_ASSERT(size > 0);
const unsigned char* buffer = reinterpret_cast<const unsigned char*>(buf);
const int total_size = size;
// The zip header cannot be shorter than 10 bytes
if (size < 10) return -1;
// check the magic header of gzip
if ((buffer[0] != GZIP_MAGIC0) || (buffer[1] != GZIP_MAGIC1)) return -1;
int method = buffer[2];
int flags = buffer[3];
// check for reserved flag and make sure it's compressed with the correct metod
if (method != Z_DEFLATED || (flags & FRESERVED) != 0) return -1;
// skip time, xflags, OS code
size -= 10;
buffer += 10;
if (flags & FEXTRA)
{
int extra_len;
if (size < 2) return -1;
extra_len = (buffer[1] << 8) | buffer[0];
if (size < (extra_len+2)) return -1;
size -= (extra_len + 2);
buffer += (extra_len + 2);
}
if (flags & FNAME)
{
while (size && *buffer)
{
--size;
++buffer;
}
if (!size || *buffer) return -1;
--size;
++buffer;
}
if (flags & FCOMMENT)
{
while (size && *buffer)
{
--size;
++buffer;
}
if (!size || *buffer) return -1;
--size;
++buffer;
}
if (flags & FHCRC)
{
if (size < 2) return -1;
size -= 2;
buffer += 2;
}
return total_size - size;
}
bool inflate_gzip(
std::vector<char>& buffer
, tracker_request const& req
, request_callback* requester
, int maximum_tracker_response_length)
{
TORRENT_ASSERT(maximum_tracker_response_length > 0);
int header_len = gzip_header(&buffer[0], (int)buffer.size());
if (header_len < 0)
{
requester->tracker_request_error(req, 200, "invalid gzip header in tracker response");
return true;
}
// start off wth one kilobyte and grow
// if needed
std::vector<char> inflate_buffer(1024);
// initialize the zlib-stream
z_stream str;
// subtract 8 from the end of the buffer since that's CRC32 and input size
// and those belong to the gzip file
str.avail_in = (int)buffer.size() - header_len - 8;
str.next_in = reinterpret_cast<Bytef*>(&buffer[header_len]);
str.next_out = reinterpret_cast<Bytef*>(&inflate_buffer[0]);
str.avail_out = (int)inflate_buffer.size();
str.zalloc = Z_NULL;
str.zfree = Z_NULL;
str.opaque = 0;
// -15 is really important. It will make inflate() not look for a zlib header
// and just deflate the buffer
if (inflateInit2(&str, -15) != Z_OK)
{
requester->tracker_request_error(req, 200, "gzip out of memory");
return true;
}
// inflate and grow inflate_buffer as needed
int ret = inflate(&str, Z_SYNC_FLUSH);
while (ret == Z_OK)
{
if (str.avail_out == 0)
{
if (inflate_buffer.size() >= (unsigned)maximum_tracker_response_length)
{
inflateEnd(&str);
requester->tracker_request_error(req, 200
, "tracker response too large");
return true;
}
int new_size = (int)inflate_buffer.size() * 2;
if (new_size > maximum_tracker_response_length) new_size = maximum_tracker_response_length;
int old_size = (int)inflate_buffer.size();
inflate_buffer.resize(new_size);
str.next_out = reinterpret_cast<Bytef*>(&inflate_buffer[old_size]);
str.avail_out = new_size - old_size;
}
ret = inflate(&str, Z_SYNC_FLUSH);
}
inflate_buffer.resize(inflate_buffer.size() - str.avail_out);
inflateEnd(&str);
if (ret != Z_STREAM_END)
{
requester->tracker_request_error(req, 200, "gzip error");
return true;
}
// commit the resulting buffer
std::swap(buffer, inflate_buffer);
return false;
}
timeout_handler::timeout_handler(io_service& ios)
: m_start_time(time_now())
, m_read_time(time_now())

View File

@ -80,7 +80,7 @@ void start_web_server(int port, bool ssl)
}
std::ofstream f("lighty_config");
f << "server.modules = (\"mod_access\", \"mod_redirect\")\n"
f << "server.modules = (\"mod_access\", \"mod_redirect\", \"mod_setenv\")\n"
"server.document-root = \"" << boost::filesystem::initial_path().string() << "\"\n"
"server.range-requests = \"enable\"\n"
"server.port = " << port << "\n"
@ -88,7 +88,11 @@ void start_web_server(int port, bool ssl)
"url.redirect = (\"^/redirect$\" => \""
<< (ssl?"https":"http") << "://127.0.0.1:" << port << "/test_file\", "
"\"^/infinite_redirect$\" => \""
<< (ssl?"https":"http") << "://127.0.0.1:" << port << "/infinite_redirect\")\n";
<< (ssl?"https":"http") << "://127.0.0.1:" << port << "/infinite_redirect\")\n"
"$HTTP[\"url\"] == \"/test_file.gz\" {\n"
" setenv.add-response-header = ( \"Content-Encoding\" => \"gzip\" )\n"
" mimetype.assign = ()\n"
"}\n";
// this requires lighttpd to be built with ssl support.
// The port distribution for mac is not built with ssl
// support by default.

View File

@ -106,6 +106,7 @@ void run_suite(std::string const& protocol, proxy_settings const& ps)
run_test(protocol + "://127.0.0.1:8001/redirect", 3216, 200, 2, asio::error_code(), ps);
run_test(protocol + "://127.0.0.1:8001/infinite_redirect", 0, 301, 6, asio::error_code(), ps);
run_test(protocol + "://127.0.0.1:8001/test_file", 3216, 200, 1, asio::error_code(), ps);
run_test(protocol + "://127.0.0.1:8001/test_file.gz", 3216, 200, 1, asio::error_code(), ps);
run_test(protocol + "://127.0.0.1:8001/non-existing-file", -1, 404, 1, err(), ps);
// if we're going through an http proxy, we won't get the same error as if the hostname
// resolution failed
@ -123,7 +124,8 @@ int test_main()
std::srand(std::time(0));
std::generate(data_buffer, data_buffer + sizeof(data_buffer), &std::rand);
std::ofstream("test_file").write(data_buffer, 3216);
std::system("gzip -9 -c test_file > test_file.gz");
proxy_settings ps;
ps.hostname = "127.0.0.1";
ps.port = 8034;