From 6caca178838b52a03e50e180640a161e6227080c Mon Sep 17 00:00:00 2001 From: Arvid Norberg Date: Wed, 30 Jan 2008 18:32:13 +0000 Subject: [PATCH] refactored gzip code and added gzip support to http_connection --- Jamfile | 1 + include/Makefile.am | 1 + include/libtorrent/gzip.hpp | 43 +++++ include/libtorrent/http_connection.hpp | 4 +- include/libtorrent/tracker_manager.hpp | 6 - src/Makefile.am | 3 +- src/gzip.cpp | 212 +++++++++++++++++++++++++ src/http_connection.cpp | 47 ++++-- src/http_tracker_connection.cpp | 26 +-- src/tracker_manager.cpp | 169 -------------------- test/setup_transfer.cpp | 8 +- test/test_http_connection.cpp | 4 +- 12 files changed, 315 insertions(+), 209 deletions(-) create mode 100644 include/libtorrent/gzip.hpp create mode 100644 src/gzip.cpp diff --git a/Jamfile b/Jamfile index 3571264b3..a2623c854 100755 --- a/Jamfile +++ b/Jamfile @@ -228,6 +228,7 @@ SOURCES = connection_queue entry escape_string + gzip http_connection http_stream http_parser diff --git a/include/Makefile.am b/include/Makefile.am index 6a3461dab..e33f2acbd 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -18,6 +18,7 @@ libtorrent/extensions.hpp \ libtorrent/file.hpp \ libtorrent/file_pool.hpp \ libtorrent/fingerprint.hpp \ +libtorrent/gzip.hpp \ libtorrent/hasher.hpp \ libtorrent/http_connection.hpp \ libtorrent/http_stream.hpp \ diff --git a/include/libtorrent/gzip.hpp b/include/libtorrent/gzip.hpp new file mode 100644 index 000000000..0528b58f2 --- /dev/null +++ b/include/libtorrent/gzip.hpp @@ -0,0 +1,43 @@ +/* + +Copyright (c) 2007, Arvid Norberg +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the distribution. + * Neither the name of the author nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +*/ + +namespace libtorrent +{ + + TORRENT_EXPORT bool inflate_gzip( + char const* in, int size + , std::vector& buffer + , int maximum_size + , std::string& error); + +} + diff --git a/include/libtorrent/http_connection.hpp b/include/libtorrent/http_connection.hpp index 4f0113e7b..5b1e4b1b9 100644 --- a/include/libtorrent/http_connection.hpp +++ b/include/libtorrent/http_connection.hpp @@ -63,7 +63,6 @@ typedef boost::function http_connect_handler; // TODO: add bind interface -// TODO: add gzip support // when bottled, the last two arguments to the handler // will always be 0 @@ -101,7 +100,8 @@ struct http_connection : boost::enable_shared_from_this, boost: std::string sendbuffer; void get(std::string const& url, time_duration timeout = seconds(30) - , proxy_settings const* ps = 0, int handle_redirects = 5); + , proxy_settings const* ps = 0, int handle_redirects = 5 + , std::string const& user_agent = ""); void start(std::string const& hostname, std::string const& port , time_duration timeout, proxy_settings const* ps = 0, bool ssl = false diff --git a/include/libtorrent/tracker_manager.hpp b/include/libtorrent/tracker_manager.hpp index ad2a4f80b..823ae3059 100755 --- a/include/libtorrent/tracker_manager.hpp +++ b/include/libtorrent/tracker_manager.hpp @@ -143,12 +143,6 @@ namespace libtorrent tracker_manager* m_manager; }; - TORRENT_EXPORT bool inflate_gzip( - std::vector& buffer - , tracker_request const& req - , request_callback* requester - , int maximum_tracker_response_length); - struct TORRENT_EXPORT timeout_handler : intrusive_ptr_base , boost::noncopyable diff --git a/src/Makefile.am b/src/Makefile.am index 26255c870..e47774d8b 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -23,7 +23,7 @@ alert.cpp identify_client.cpp ip_filter.cpp file.cpp metadata_transfer.cpp \ logger.cpp file_pool.cpp ut_pex.cpp lsd.cpp upnp.cpp instantiate_connection.cpp \ socks5_stream.cpp socks4_stream.cpp http_stream.cpp connection_queue.cpp \ disk_io_thread.cpp ut_metadata.cpp magnet_uri.cpp udp_socket.cpp smart_ban.cpp \ -http_parser.cpp $(kademlia_sources) +http_parser.cpp gzip.cpp $(kademlia_sources) noinst_HEADERS = \ $(top_srcdir)/include/libtorrent/alert.hpp \ @@ -49,6 +49,7 @@ $(top_srcdir)/include/libtorrent/extensions/ut_pex.hpp \ $(top_srcdir)/include/libtorrent/file.hpp \ $(top_srcdir)/include/libtorrent/file_pool.hpp \ $(top_srcdir)/include/libtorrent/fingerprint.hpp \ +$(top_srcdir)/include/libtorrent/gzip.hpp \ $(top_srcdir)/include/libtorrent/hasher.hpp \ $(top_srcdir)/include/libtorrent/http_connection.hpp \ $(top_srcdir)/include/libtorrent/http_stream.hpp \ diff --git a/src/gzip.cpp b/src/gzip.cpp new file mode 100644 index 000000000..929816240 --- /dev/null +++ b/src/gzip.cpp @@ -0,0 +1,212 @@ +/* + +Copyright (c) 2007, Arvid Norberg +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the distribution. + * Neither the name of the author nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "libtorrent/assert.hpp" + +#include "zlib.h" + +#include + +namespace +{ + enum + { + FTEXT = 0x01, + FHCRC = 0x02, + FEXTRA = 0x04, + FNAME = 0x08, + FCOMMENT = 0x10, + FRESERVED = 0xe0, + + GZIP_MAGIC0 = 0x1f, + GZIP_MAGIC1 = 0x8b + }; + +} + +namespace libtorrent +{ + // returns -1 if gzip header is invalid or the header size in bytes + int gzip_header(const char* buf, int size) + { + TORRENT_ASSERT(buf != 0); + TORRENT_ASSERT(size > 0); + + const unsigned char* buffer = reinterpret_cast(buf); + const int total_size = size; + + // The zip header cannot be shorter than 10 bytes + if (size < 10) return -1; + + // check the magic header of gzip + if ((buffer[0] != GZIP_MAGIC0) || (buffer[1] != GZIP_MAGIC1)) return -1; + + int method = buffer[2]; + int flags = buffer[3]; + + // check for reserved flag and make sure it's compressed with the correct metod + if (method != Z_DEFLATED || (flags & FRESERVED) != 0) return -1; + + // skip time, xflags, OS code + size -= 10; + buffer += 10; + + if (flags & FEXTRA) + { + int extra_len; + + if (size < 2) return -1; + + extra_len = (buffer[1] << 8) | buffer[0]; + + if (size < (extra_len+2)) return -1; + size -= (extra_len + 2); + buffer += (extra_len + 2); + } + + if (flags & FNAME) + { + while (size && *buffer) + { + --size; + ++buffer; + } + if (!size || *buffer) return -1; + + --size; + ++buffer; + } + + if (flags & FCOMMENT) + { + while (size && *buffer) + { + --size; + ++buffer; + } + if (!size || *buffer) return -1; + + --size; + ++buffer; + } + + if (flags & FHCRC) + { + if (size < 2) return -1; + + size -= 2; + buffer += 2; + } + + return total_size - size; + } + + bool inflate_gzip( + char const* in + , int size + , std::vector& buffer + , int maximum_size + , std::string& error) + { + TORRENT_ASSERT(maximum_size > 0); + + int header_len = gzip_header(in, size); + if (header_len < 0) + { + error = "invalid gzip header in tracker response"; + return true; + } + + // start off with one kilobyte and grow + // if needed + buffer.resize(1024); + + // initialize the zlib-stream + z_stream str; + + // subtract 8 from the end of the buffer since that's CRC32 and input size + // and those belong to the gzip file + str.avail_in = (int)size - header_len - 8; + str.next_in = reinterpret_cast(const_cast(in + header_len)); + str.next_out = reinterpret_cast(&buffer[0]); + str.avail_out = (int)buffer.size(); + str.zalloc = Z_NULL; + str.zfree = Z_NULL; + str.opaque = 0; + // -15 is really important. It will make inflate() not look for a zlib header + // and just deflate the buffer + if (inflateInit2(&str, -15) != Z_OK) + { + error = "gzip out of memory"; + return true; + } + + // inflate and grow inflate_buffer as needed + int ret = inflate(&str, Z_SYNC_FLUSH); + while (ret == Z_OK) + { + if (str.avail_out == 0) + { + if (buffer.size() >= (unsigned)maximum_size) + { + inflateEnd(&str); + error = "response too large"; + return true; + } + int new_size = (int)buffer.size() * 2; + if (new_size > maximum_size) + new_size = maximum_size; + int old_size = (int)buffer.size(); + + buffer.resize(new_size); + str.next_out = reinterpret_cast(&buffer[old_size]); + str.avail_out = new_size - old_size; + } + + ret = inflate(&str, Z_SYNC_FLUSH); + } + + buffer.resize(buffer.size() - str.avail_out); + inflateEnd(&str); + + if (ret != Z_STREAM_END) + { + error = "gzip error"; + return true; + } + + // commit the resulting buffer + return false; + } + +} + diff --git a/src/http_connection.cpp b/src/http_connection.cpp index 2a93e3e47..923e92e11 100644 --- a/src/http_connection.cpp +++ b/src/http_connection.cpp @@ -33,6 +33,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "libtorrent/http_connection.hpp" #include "libtorrent/escape_string.hpp" #include "libtorrent/instantiate_connection.hpp" +#include "libtorrent/gzip.hpp" #include #include @@ -41,13 +42,18 @@ POSSIBILITY OF SUCH DAMAGE. using boost::bind; -namespace libtorrent -{ +namespace libtorrent { + +namespace +{ + char to_lower(char c) { return std::tolower(c); } +} + +enum { max_bottled_buffer = 1024 * 1024 }; - enum { max_bottled_buffer = 1024 * 1024 }; void http_connection::get(std::string const& url, time_duration timeout - , proxy_settings const* ps, int handle_redirects) + , proxy_settings const* ps, int handle_redirects, std::string const& user_agent) { std::string protocol; std::string auth; @@ -73,8 +79,7 @@ void http_connection::get(std::string const& url, time_duration timeout { // if we're using an http proxy and not an ssl // connection, just do a regular http proxy request - headers << "GET " << url << " HTTP/1.0\r\n" - "Connection: close\r\n"; + headers << "GET " << url << " HTTP/1.0\r\n"; if (ps->type == proxy_settings::http_pw) headers << "Proxy-Authorization: Basic " << base64encode( ps->username + ":" + ps->password) << "\r\n"; @@ -85,13 +90,20 @@ void http_connection::get(std::string const& url, time_duration timeout else { headers << "GET " << path << " HTTP/1.0\r\n" - "Host:" << hostname << "\r\n" - "Connection: close\r\n"; + "Host:" << hostname << "\r\n"; } if (!auth.empty()) headers << "Authorization: Basic " << base64encode(auth) << "\r\n"; - headers << "\r\n"; + + if (!user_agent.empty()) + headers << "User-Agent: " << user_agent << "\r\n"; + + headers << + "Connection: close\r\n" + "Accept-Encoding: gzip\r\n" + "\r\n"; + sendbuffer = headers.str(); start(hostname, boost::lexical_cast(port), timeout, ps , ssl, handle_redirects); @@ -250,6 +262,23 @@ void http_connection::callback(asio::error_code const& e, char const* data, int { if (!m_bottled || !m_called) { + std::vector buf; + if (m_bottled && m_parser.finished()) + { + std::string const& encoding = m_parser.header("content-encoding"); + if (encoding == "gzip" || encoding == "x-gzip") + { + std::string error; + if (inflate_gzip(data, size, buf, max_bottled_buffer, error)) + { + callback(asio::error::fault, data, size); + close(); + return; + } + data = &buf[0]; + size = int(buf.size()); + } + } m_called = true; m_timer.cancel(); if (m_handler) m_handler(e, m_parser, data, size); diff --git a/src/http_tracker_connection.cpp b/src/http_tracker_connection.cpp index 5c1879831..24b637447 100755 --- a/src/http_tracker_connection.cpp +++ b/src/http_tracker_connection.cpp @@ -40,7 +40,7 @@ POSSIBILITY OF SUCH DAMAGE. #include #include "libtorrent/config.hpp" -#include "zlib.h" +#include "libtorrent/gzip.hpp" #ifdef _MSC_VER #pragma warning(push, 1) @@ -70,21 +70,6 @@ namespace minimum_tracker_response_length = 3, http_buffer_size = 2048 }; - - - enum - { - FTEXT = 0x01, - FHCRC = 0x02, - FEXTRA = 0x04, - FNAME = 0x08, - FCOMMENT = 0x10, - FRESERVED = 0xe0, - - GZIP_MAGIC0 = 0x1f, - GZIP_MAGIC1 = 0x8b - }; - } namespace @@ -637,13 +622,16 @@ namespace libtorrent close(); return; } - m_buffer.erase(m_buffer.begin(), m_buffer.begin() + m_parser.body_start()); - if (inflate_gzip(m_buffer, tracker_req(), cb.get(), - m_settings.tracker_maximum_response_length)) + std::vector buffer; + std::string error; + if (inflate_gzip(&m_buffer[0] + m_parser.body_start(), m_buffer.size(), buffer + , m_settings.tracker_maximum_response_length, error)) { + cb->tracker_request_error(tracker_req(), 200, error); close(); return; } + m_buffer.swap(buffer); buf.begin = &m_buffer[0]; buf.end = &m_buffer[0] + m_buffer.size(); } diff --git a/src/tracker_manager.cpp b/src/tracker_manager.cpp index ec3f85dd6..e72c55eb0 100755 --- a/src/tracker_manager.cpp +++ b/src/tracker_manager.cpp @@ -38,8 +38,6 @@ POSSIBILITY OF SUCH DAMAGE. #include #include -#include "zlib.h" - #include #include "libtorrent/tracker_manager.hpp" @@ -63,177 +61,10 @@ namespace http_buffer_size = 2048 }; - - enum - { - FTEXT = 0x01, - FHCRC = 0x02, - FEXTRA = 0x04, - FNAME = 0x08, - FCOMMENT = 0x10, - FRESERVED = 0xe0, - - GZIP_MAGIC0 = 0x1f, - GZIP_MAGIC1 = 0x8b - }; - } namespace libtorrent { - // returns -1 if gzip header is invalid or the header size in bytes - int gzip_header(const char* buf, int size) - { - TORRENT_ASSERT(buf != 0); - TORRENT_ASSERT(size > 0); - - const unsigned char* buffer = reinterpret_cast(buf); - const int total_size = size; - - // The zip header cannot be shorter than 10 bytes - if (size < 10) return -1; - - // check the magic header of gzip - if ((buffer[0] != GZIP_MAGIC0) || (buffer[1] != GZIP_MAGIC1)) return -1; - - int method = buffer[2]; - int flags = buffer[3]; - - // check for reserved flag and make sure it's compressed with the correct metod - if (method != Z_DEFLATED || (flags & FRESERVED) != 0) return -1; - - // skip time, xflags, OS code - size -= 10; - buffer += 10; - - if (flags & FEXTRA) - { - int extra_len; - - if (size < 2) return -1; - - extra_len = (buffer[1] << 8) | buffer[0]; - - if (size < (extra_len+2)) return -1; - size -= (extra_len + 2); - buffer += (extra_len + 2); - } - - if (flags & FNAME) - { - while (size && *buffer) - { - --size; - ++buffer; - } - if (!size || *buffer) return -1; - - --size; - ++buffer; - } - - if (flags & FCOMMENT) - { - while (size && *buffer) - { - --size; - ++buffer; - } - if (!size || *buffer) return -1; - - --size; - ++buffer; - } - - if (flags & FHCRC) - { - if (size < 2) return -1; - - size -= 2; - buffer += 2; - } - - return total_size - size; - } - - bool inflate_gzip( - std::vector& buffer - , tracker_request const& req - , request_callback* requester - , int maximum_tracker_response_length) - { - TORRENT_ASSERT(maximum_tracker_response_length > 0); - - int header_len = gzip_header(&buffer[0], (int)buffer.size()); - if (header_len < 0) - { - requester->tracker_request_error(req, 200, "invalid gzip header in tracker response"); - return true; - } - - // start off wth one kilobyte and grow - // if needed - std::vector inflate_buffer(1024); - - // initialize the zlib-stream - z_stream str; - - // subtract 8 from the end of the buffer since that's CRC32 and input size - // and those belong to the gzip file - str.avail_in = (int)buffer.size() - header_len - 8; - str.next_in = reinterpret_cast(&buffer[header_len]); - str.next_out = reinterpret_cast(&inflate_buffer[0]); - str.avail_out = (int)inflate_buffer.size(); - str.zalloc = Z_NULL; - str.zfree = Z_NULL; - str.opaque = 0; - // -15 is really important. It will make inflate() not look for a zlib header - // and just deflate the buffer - if (inflateInit2(&str, -15) != Z_OK) - { - requester->tracker_request_error(req, 200, "gzip out of memory"); - return true; - } - - // inflate and grow inflate_buffer as needed - int ret = inflate(&str, Z_SYNC_FLUSH); - while (ret == Z_OK) - { - if (str.avail_out == 0) - { - if (inflate_buffer.size() >= (unsigned)maximum_tracker_response_length) - { - inflateEnd(&str); - requester->tracker_request_error(req, 200 - , "tracker response too large"); - return true; - } - int new_size = (int)inflate_buffer.size() * 2; - if (new_size > maximum_tracker_response_length) new_size = maximum_tracker_response_length; - int old_size = (int)inflate_buffer.size(); - - inflate_buffer.resize(new_size); - str.next_out = reinterpret_cast(&inflate_buffer[old_size]); - str.avail_out = new_size - old_size; - } - - ret = inflate(&str, Z_SYNC_FLUSH); - } - - inflate_buffer.resize(inflate_buffer.size() - str.avail_out); - inflateEnd(&str); - - if (ret != Z_STREAM_END) - { - requester->tracker_request_error(req, 200, "gzip error"); - return true; - } - - // commit the resulting buffer - std::swap(buffer, inflate_buffer); - return false; - } - timeout_handler::timeout_handler(io_service& ios) : m_start_time(time_now()) , m_read_time(time_now()) diff --git a/test/setup_transfer.cpp b/test/setup_transfer.cpp index 3cd997e75..d4d696c6f 100644 --- a/test/setup_transfer.cpp +++ b/test/setup_transfer.cpp @@ -80,7 +80,7 @@ void start_web_server(int port, bool ssl) } std::ofstream f("lighty_config"); - f << "server.modules = (\"mod_access\", \"mod_redirect\")\n" + f << "server.modules = (\"mod_access\", \"mod_redirect\", \"mod_setenv\")\n" "server.document-root = \"" << boost::filesystem::initial_path().string() << "\"\n" "server.range-requests = \"enable\"\n" "server.port = " << port << "\n" @@ -88,7 +88,11 @@ void start_web_server(int port, bool ssl) "url.redirect = (\"^/redirect$\" => \"" << (ssl?"https":"http") << "://127.0.0.1:" << port << "/test_file\", " "\"^/infinite_redirect$\" => \"" - << (ssl?"https":"http") << "://127.0.0.1:" << port << "/infinite_redirect\")\n"; + << (ssl?"https":"http") << "://127.0.0.1:" << port << "/infinite_redirect\")\n" + "$HTTP[\"url\"] == \"/test_file.gz\" {\n" + " setenv.add-response-header = ( \"Content-Encoding\" => \"gzip\" )\n" + " mimetype.assign = ()\n" + "}\n"; // this requires lighttpd to be built with ssl support. // The port distribution for mac is not built with ssl // support by default. diff --git a/test/test_http_connection.cpp b/test/test_http_connection.cpp index c5005f37e..57dee078a 100644 --- a/test/test_http_connection.cpp +++ b/test/test_http_connection.cpp @@ -106,6 +106,7 @@ void run_suite(std::string const& protocol, proxy_settings const& ps) run_test(protocol + "://127.0.0.1:8001/redirect", 3216, 200, 2, asio::error_code(), ps); run_test(protocol + "://127.0.0.1:8001/infinite_redirect", 0, 301, 6, asio::error_code(), ps); run_test(protocol + "://127.0.0.1:8001/test_file", 3216, 200, 1, asio::error_code(), ps); + run_test(protocol + "://127.0.0.1:8001/test_file.gz", 3216, 200, 1, asio::error_code(), ps); run_test(protocol + "://127.0.0.1:8001/non-existing-file", -1, 404, 1, err(), ps); // if we're going through an http proxy, we won't get the same error as if the hostname // resolution failed @@ -123,7 +124,8 @@ int test_main() std::srand(std::time(0)); std::generate(data_buffer, data_buffer + sizeof(data_buffer), &std::rand); std::ofstream("test_file").write(data_buffer, 3216); - + std::system("gzip -9 -c test_file > test_file.gz"); + proxy_settings ps; ps.hostname = "127.0.0.1"; ps.port = 8034;