diff --git a/CMakeLists.txt b/CMakeLists.txt index 427f3fac0..605f63e18 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,6 +31,7 @@ set(sources http_seed_connection instantiate_connection natpmp + packet_buffer piece_picker policy puff @@ -206,6 +207,7 @@ endif(MSVC) add_definitions(-D_FILE_OFFSET_BITS=64) add_definitions(-DBOOST_DISABLE_EXCEPTION) +add_definitions(-DBOOST_ASIO_ENABLE_CANCELIO) if (tcmalloc) target_link_libraries(torrent-rasterbar tcmalloc) diff --git a/ChangeLog b/ChangeLog index df8085e53..d29dc7d16 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,9 @@ * support trackerid tracker extension * graceful peer disconnect mode which finishes transactions before disconnecting peers * support chunked encoding for web seeds + * uTP protocol support + * resistance towards certain flood attacks + * support chunked encoding for web seeds (only for BEP 19, web seeds) * optimized session startup time * support SSL for web seeds, through all proxies * support extending web seeds with custom authorization and extra headers diff --git a/Jamfile b/Jamfile index ce6b967d2..78f792cd2 100755 --- a/Jamfile +++ b/Jamfile @@ -81,8 +81,9 @@ rule linking ( properties * ) { result += ws2_32 wsock32 + iphlpapi WIN32_LEAN_AND_MEAN - _WIN32_WINNT=0x0500 + _WIN32_WINNT=0x0600 __USE_W32_SOCKETS WIN32 _WIN32 @@ -353,6 +354,7 @@ lib GeoIP : : GeoIP shared ; # socket libraries on windows lib wsock32 : : wsock32 shared ; lib ws2_32 : : ws2_32 shared ; +lib iphlpapi : : iphlpapi shared ; SOURCES = alert @@ -384,6 +386,7 @@ SOURCES = i2p_stream instantiate_connection natpmp + packet_buffer piece_picker policy puff @@ -403,8 +406,11 @@ SOURCES = http_tracker_connection udp_tracker_connection sha1 + timestamp_history udp_socket upnp + utp_socket_manager + utp_stream logger file_pool lsd @@ -443,6 +449,8 @@ local usage-requirements = debug:TORRENT_DEBUG _FILE_OFFSET_BITS=64 BOOST_EXCEPTION_DISABLE +# enable cancel support in asio + BOOST_ASIO_ENABLE_CANCELIO @linking # these compiler settings just makes the compiler standard conforming msvc:/Zc:wchar_t diff --git a/configure.ac b/configure.ac index aa4a788e2..f09a60faf 100644 --- a/configure.ac +++ b/configure.ac @@ -593,6 +593,9 @@ COMPILETIME_OPTIONS+="-DBOOST_ASIO_HASH_MAP_BUCKETS=1021 " AC_DEFINE([BOOST_EXCEPTION_DISABLE],[1],[Define to disable the boost.exception features.]) COMPILETIME_OPTIONS+="-DBOOST_EXCEPTION_DISABLE " +AC_DEFINE([BOOST_ASIO_ENABLE_CANCELIO],[1],[Define to enable cancel support in asio on windows XP and older.]) +COMPILETIME_OPTIONS+="-DBOOST_ASIO_ENABLE_CANCELIO " + dnl Use possibly specific python install params AC_ARG_VAR([PYTHON_INSTALL_PARAMS], [Set specific install parameters for python bindings.]) AS_IF([test "x$PYTHON_INSTALL_PARAMS" = "x"], diff --git a/docs/cwnd.png b/docs/cwnd.png new file mode 100644 index 000000000..9f850caa7 Binary files /dev/null and b/docs/cwnd.png differ diff --git a/docs/cwnd_thumb.png b/docs/cwnd_thumb.png new file mode 100644 index 000000000..1c09122da Binary files /dev/null and b/docs/cwnd_thumb.png differ diff --git a/docs/delays.png b/docs/delays.png new file mode 100644 index 000000000..66db05e98 Binary files /dev/null and b/docs/delays.png differ diff --git a/docs/delays_thumb.png b/docs/delays_thumb.png new file mode 100644 index 000000000..84739446c Binary files /dev/null and b/docs/delays_thumb.png differ diff --git a/docs/features.html b/docs/features.html index 3c72e82a3..0f64ccdc8 100644 --- a/docs/features.html +++ b/docs/features.html @@ -56,12 +56,13 @@
  • high performance disk subsystem
  • network buffers
  • piece picker
  • -
  • merkle hash tree torrents
  • -
  • customizable file storage
  • -
  • easy to use API
  • +
  • share mode
  • +
  • merkle hash tree torrents
  • +
  • customizable file storage
  • +
  • easy to use API
  • -
  • portability
  • +
  • portability
  • @@ -97,9 +98,12 @@ uTorrent interpretation).
  • supports the compact=1 tracker parameter.
  • super seeding/initial seeding (BEP 16).
  • private torrents (BEP 27).
  • +
  • upload-only extension (BEP 21).
  • support for IPv6, including BEP 7 and BEP 24.
  • support for merkle hash tree torrents. This makes the size of torrent files scale well with the size of the content.
  • +
  • share-mode. This is a special mode torrents can be put in to optimize share +ratio rather than downloading the torrent.
  • @@ -124,6 +128,8 @@ piece's hash is verified the first time it is requested.

    network

      +
    • a high quality uTP implementation (BEP29_). A transport protocol with +delay based congestion control. See separate article.
    • adjusts the length of the request queue depending on download rate.
    • serves multiple torrents on a single port and in a single thread
    • piece picking on block-level (as opposed to piece-level). @@ -253,6 +259,15 @@ makes slow peers pick blocks from the same piece, and fast peers pick from the s and hence decreasing the likelihood of slow peers blocking the completion of pieces.

      The piece picker can also be set to download pieces in sequential order.

    +
    +

    share mode

    +

    The share mode feature in libtorrent is intended for users who are only interested in +helping out swarms, not downloading the torrents.

    +

    It works by predicting the demand for pieces, and only download pieces if there is enough +demand. New pieces will only be downloaded once the share ratio has hit a certain target.

    +

    This feature is especially useful when combined with RSS, so that a client can be set up +to provide additional bandwidth to an entire feed.

    +

    merkle hash tree torrents

    Merkle hash tree torrents is an extension that lets a torrent file only contain the @@ -363,6 +378,12 @@ epoll on linux and kqueue on MacOS X and BSD.

  • GCC 2.95.4
  • msvc6
  • +
    +
    +

    Docutils System Messages

    +
    +

    System Message: ERROR/3 (features.rst, line 82); backlink

    +Unknown target name: "bep29".
    @@ -4626,7 +4794,7 @@ public: }; -
    +

    ip_filter()

    @@ -6550,6 +6718,11 @@ potentially better peer
     invalid_lt_tracker_message
     The peer sent an invalid tracker exchange message
     
    +108
    +too_frequent_pex
    +The peer sent an pex messages too often. This is a possible
    +attempt of and attack
    +
     
     
     

    NAT-PMP errors:

    @@ -6980,7 +7153,7 @@ int sparse_end(int start) const; region). The purpose of this is to skip parts of files that can be known to contain zeros when checking files.

    -
    +

    move_storage()

    @@ -7057,7 +7230,7 @@ should be moved to slot1This is only used in compact mode.

    Returning true indicates an error occurred.

    -
    +

    rename_file()

    diff --git a/docs/manual.rst b/docs/manual.rst
    index e10c7fd0d..b35afa729 100644
    --- a/docs/manual.rst
    +++ b/docs/manual.rst
    @@ -618,6 +618,15 @@ struct has the following members::
     		int branch_factor;
     	};
     
    +	struct utp_status
    +	{
    +		int num_idle;
    +		int num_syn_sent;
    +		int num_connected;
    +		int num_fin_sent;
    +		int num_close_wait;
    +	};
    +
     	struct session_status
     	{
     		bool has_incoming_connections;
    @@ -663,6 +672,8 @@ struct has the following members::
     		size_type dht_global_nodes;
     		std::vector active_requests;
     		int dht_total_allocations;
    +
    +		utp_status utp_stats;
     	};
     
     ``has_incoming_connections`` is false as long as no incoming connections have been
    @@ -730,6 +741,8 @@ network.
     particular DHT lookup. This represents roughly the amount of memory used
     by the DHT.
     
    +``utp_stats`` contains statistics on the uTP sockets.
    +
     get_cache_status()
     ------------------
     
    @@ -1074,7 +1087,6 @@ struct has the following members::
     	{
     		int max_peers_reply;
     		int search_branching;
    -		int service_port;
     		int max_fail_count;
     	};
     
    @@ -1085,19 +1097,18 @@ response to a ``get_peers`` message from another node.
     send when announcing and refreshing the routing table. This parameter is
     called alpha in the kademlia paper.
     
    -``service_port`` is the udp port the node will listen to. This will default
    -to 0, which means the udp listen port will be the same as the tcp listen
    -port. This is in general a good idea, since some NAT implementations
    -reserves the udp port for any mapped tcp port, and vice versa. NAT-PMP
    -guarantees this for example.
    -
     ``max_fail_count`` is the maximum number of failed tries to contact a node
     before it is removed from the routing table. If there are known working nodes
     that are ready to replace a failing node, it will be replaced immediately,
     this limit is only used to clear out nodes that don't have any node that can
     replace them.
     
    -``is_dht_running`` returns true if the DHT support has been started and false
    +The ``dht_settings`` struct used to contain a ``service_port`` member to control
    +which port the DHT would listen on and send messages from. This field is deprecated
    +and ignored. libtorrent always tries to open the UDP socket on the same port
    +as the TCP socket.
    +
    +``is_dht_running()`` returns true if the DHT support has been started and false
     otherwise.
     
     
    @@ -3387,6 +3398,7 @@ It contains the following fields::
     			optimistic_unchoke = 0x800,
     			snubbed = 0x1000,
     			upload_only = 0x2000,
    +			holepunched = 0x4000,
     			rc4_encrypted = 0x100000,
     			plaintext_encrypted = 0x200000
     		};
    @@ -3535,6 +3547,11 @@ any combination of the enums above. The following table describes each flag:
     |                         | will not downloading anything more, regardless of     |
     |                         | which pieces we have.                                 |
     +-------------------------+-------------------------------------------------------+
    +| ``holepunched``         | This flag is set if the peer was in holepunch mode    |
    +|                         | when the connection succeeded. This typically only    |
    +|                         | happens if both peers are behind a NAT and the peers  |
    +|                         | connect via the NAT holepunch mechanism.              |
    ++-------------------------+-------------------------------------------------------+
     
     __ extension_protocol.html
     
    @@ -3673,8 +3690,19 @@ that may give away something about which software is running in the other end.
     In the case of a web seed, the server type and version will be a part of this
     string.
     
    -``connection_type`` can currently be one of ``standard_bittorrent`` or
    -``web_seed``. These are currently the only implemented protocols.
    +``connection_type`` can currently be one of:
    +
    ++---------------------------------------+-------------------------------------------------------+
    +| type                                  | meaning                                               |
    ++=======================================+=======================================================+
    +| ``peer_info::standard_bittorrent``    | Regular bittorrent connection over TCP                |
    ++---------------------------------------+-------------------------------------------------------+
    +| ``peer_info::bittorrent_utp``         | Bittorrent connection over uTP                        |
    ++---------------------------------------+-------------------------------------------------------+
    +| ``peer_info::web_sesed``              | HTTP connection using the `BEP 19`_ protocol          |
    ++---------------------------------------+-------------------------------------------------------+
    +| ``peer_info::http_seed``              | HTTP connection using the `BEP 17`_ protocol          |
    ++---------------------------------------+-------------------------------------------------------+
     
     ``remote_dl_rate`` is an estimate of the rate this peer is downloading at, in
     bytes per second.
    @@ -3927,10 +3955,17 @@ session_settings
     		int default_peer_upload_rate;
     		int default_peer_download_rate;
     		bool broadcast_lsd;
    +
    +		bool enable_outgoing_utp;
    +		bool enable_incoming_utp;
    +		bool enable_outgoing_tcp;
    +		bool enable_incoming_tcp;
    +		int max_pex_peers;
     		bool ignore_resume_timestamps;
     		bool anonymous_mode;
     		int tick_interval;
     		int share_mode_target;
    +
     		int upload_rate_limit;
     		int download_rate_limit;
     		int local_upload_rate_limit;
    @@ -3939,6 +3974,24 @@ session_settings
     		int half_open_limit;
     		int connections_limit;
     
    +		int utp_target_delay;
    +		int utp_gain_factor;
    +		int utp_min_timeout;
    +		int utp_syn_resends;
    +		int utp_num_resends;
    +		int utp_connect_timeout;
    +		int utp_delayed_ack;
    +		bool utp_dynamic_sock_buf;
    +
    +		enum bandwidth_mixed_algo_t
    +		{
    +			prefer_tcp = 0,
    +			peer_proportional = 1
    +
    +		};
    +		int mixed_mode_algorithm;
    +		bool rate_limit_utp;
    +
     		int listen_queue_size;
     	};
     
    @@ -4604,6 +4657,11 @@ if ``broadcast_lsd`` is set to true, the local peer discovery
     broadcast its messages. This can be useful when running on networks
     that don't support multicast. It's off by default since it's inefficient.
     
    +``enable_outgoing_utp``, ``enable_incoming_utp``, ``enable_outgoing_tcp``,
    +``enable_incoming_tcp`` all determines if libtorrent should attempt to make
    +outgoing connections of the specific type, or allow incoming connection. By
    +default all of them are enabled.
    +
     ``ignore_resume_timestamps`` determines if the storage, when loading
     resume data files, should verify that the file modification time
     with the timestamps in the resume data. This defaults to false, which
    @@ -4673,6 +4731,58 @@ opened. The number of connections is set to a hard minimum of at least two per
     torrent, so if you set a too low connections limit, and open too many torrents,
     the limit will not be met.
     
    +``utp_target_delay`` is the target delay for uTP sockets in milliseconds. A high
    +value will make uTP connections more aggressive and cause longer queues in the upload
    +bottleneck. It cannot be too low, since the noise in the measurements would cause
    +it to send too slow. The default is 50 milliseconds.
    +
    +``utp_gain_factor`` is the number of bytes the uTP congestion window can increase
    +at the most in one RTT. This defaults to 300 bytes. If this is set too high,
    +the congestion controller reacts too hard to noise and will not be stable, if it's
    +set too low, it will react slow to congestion and not back off as fast.
    +
    +``utp_min_timeout`` is the shortest allowed uTP socket timeout, specified in milliseconds.
    +This defaults to 500 milliseconds. The timeout depends on the RTT of the connection, but
    +is never smaller than this value. A connection times out when every packet in a window
    +is lost, or when a packet is lost twice in a row (i.e. the resent packet is lost as well).
    +
    +The shorter the timeout is, the faster the connection will recover from this situation,
    +assuming the RTT is low enough.
    +
    +``utp_syn_resends`` is the number of SYN packets that are sent (and timed out) before
    +giving up and closing the socket.
    +
    +``utp_num_resends`` is the number of times a packet is sent (and lossed or timed out)
    +before giving up and closing the connection.
    +
    +``utp_connect_timeout`` is the number of milliseconds of timeout for the initial SYN
    +packet for uTP connections. For each timed out packet (in a row), the timeout is doubled.
    +
    +``utp_delayed_ack`` is the number of milliseconds to delay ACKs the most. Delaying ACKs
    +significantly helps reducing the amount of protocol overhead in the reverse direction
    +from downloads. It defaults to 100 milliseconds. If set to 0, delayed ACKs are disabled
    +and every incoming payload packet is ACKed. The granularity of this timer is capped by
    +the tick interval (as specified by ``tick_interval``).
    +
    +``utp_dynamic_sock_buf`` controls if the uTP socket manager is allowed to increase
    +the socket buffer if a network interface with a large MTU is used (such as loopback
    +or ethernet jumbo frames). This defaults to true and might improve uTP throughput.
    +For RAM constrained systems, disabling this typically saves around 30kB in user space
    +and probably around 400kB in kernel socket buffers (it adjusts the send and receive
    +buffer size on the kernel socket, both for IPv4 and IPv6).
    +
    +The ``mixed_mode_algorithm`` determines how to treat TCP connections when there are
    +uTP connections. Since uTP is designed to yield to TCP, there's an inherent problem
    +when using swarms that have both TCP and uTP connections. If nothing is done, uTP
    +connections would often be starved out for bandwidth by the TCP connections. This mode
    +is ``prefer_tcp``. The ``peer_proportional`` mode simply looks at the current throughput
    +and rate limits all TCP connections to their proportional share based on how many of
    +the connections are TCP. This works best if uTP connections are not rate limited by
    +the global rate limiter (which they aren't by default).
    +
    +``rate_limit_utp`` determines if uTP connections should be throttled by the global rate
    +limiter or not. By default they are not, since uTP manages its own rate.
    +
     ``listen_queue_size`` is the value passed in to listen() for the listen socket.
     It is the number of outstanding incoming connections to queue up while we're not
     actively waiting for a connection to be accepted. The default is 5 which should
    @@ -6797,6 +6907,9 @@ code   symbol                                    description
     106    invalid_pex_message                       The peer sent an invalid peer exchange message
     ------ ----------------------------------------- -----------------------------------------------------------------
     107    invalid_lt_tracker_message                The peer sent an invalid tracker exchange message
    +------ ----------------------------------------- -----------------------------------------------------------------
    +108    too_frequent_pex                          The peer sent an pex messages too often. This is a possible
    +                                                 attempt of and attack
     ====== ========================================= =================================================================
     
     NAT-PMP errors:
    diff --git a/docs/our_delay_base.png b/docs/our_delay_base.png
    new file mode 100644
    index 000000000..d14ad9ed8
    Binary files /dev/null and b/docs/our_delay_base.png differ
    diff --git a/docs/our_delay_base_thumb.png b/docs/our_delay_base_thumb.png
    new file mode 100644
    index 000000000..cb4c6f449
    Binary files /dev/null and b/docs/our_delay_base_thumb.png differ
    diff --git a/docs/tuning.html b/docs/tuning.html
    index e8cbe8770..dce4511d4 100644
    --- a/docs/tuning.html
    +++ b/docs/tuning.html
    @@ -203,6 +203,16 @@ of C++ exceptions. By disabling exceptions (-fno-exceptions on GCC), you can
     reduce the executable size with up to 45%. In order to build without exception
     support, you need to patch parts of boost.

    Also make sure to optimize for size when compiling.

    +

    Another way of reducing the executable size is to disable code that isn't used. +There are a number of TORRENT_* macros that control which features are included +in libtorrent. If these macros are used to strip down libtorrent, make sure the same +macros are defined when building libtorrent as when linking against it. If these +are different the structures will look different from the libtorrent side and from +the client side and memory corruption will follow.

    +

    One, probably, safe macro to define is TORRENT_NO_DEPRECATE which removes all +deprecated functions and struct members. As long as no deprecated functions are +relied upon, this should be a simple way to eliminate a little bit of code.

    +

    For all available options, see the building libtorrent secion.

    reduce statistics

    diff --git a/docs/utp.html b/docs/utp.html new file mode 100644 index 000000000..4c6ae138c --- /dev/null +++ b/docs/utp.html @@ -0,0 +1,342 @@ + + + + + + +libtorrent manual + + + + + + + +
    +
    +
    + +
    + +
    +

    libtorrent manual

    + +++ + + + + + +
    Author:Arvid Norberg, arvid@rasterbar.com
    Version:0.16.0
    + +
    +

    uTP

    +

    uTP (uTorrent transport protocol) is a transport protocol which uses one-way +delay measurements for its congestion controller. This article is about uTP +in general and specifically about libtorrent's implementation of it.

    +
    +

    rationale

    +

    One of the most common problems users are experiencing using bittorrent is +that their internet "stops working". This can be caused by a number of things, +for example:

    +
      +
    1. a home router that crashes or slows down when its NAT pin-hole +table overflows, triggered by DHT or simply many TCP connections.
    2. +
    3. a home router that crashes or slows down by UDP traffic (caused by +the DHT)
    4. +
    5. a home DSL or cable modem having its send buffer filled up by outgoing +data, and the buffer fits seconds worth of bytes. This adds seconds +of delay on interactive traffic. For a web site that needs 10 round +trips to load this may mean 10s of seconds of delay to load compared +to without bittorrent. Skype or other delay sensitive applications +would be affected even more.
    6. +
    +

    This document will cover (3).

    +

    Typically this is solved by asking the user to enter a number of bytes +that the client is allowed to send per second (i.e. setting an upload +rate limit). The common recommendation is to set this limit to 80% of the +uplink's capacity. This is to leave some headroom for things like TCP +ACKs as well as the user's interactive use of the connection such as +browsing the web or checking email.

    +

    There are two major drawbacks with this technique:

    +
      +
    1. The user needs to actively make this setting (very few protocols +require the user to provide this sort of information). This also +means the user needs to figure out what its up-link capacity is. +This is unfortunately a number that many ISPs are not advertizing +(because it's often much lower than the download capacity) which +might make it hard to find.
    2. +
    3. The 20% headroom is wasted most of the time. Whenever the user +is not using the internet connection for anything, those extra 20% +could have been used by bittorrent to upload, but they're already +allocated for interactive traffic. On top of that, 20% of the up-link +is often not enough to give a good and responsive browsing experience.
    4. +
    +

    The ideal bandwidth allocation would be to use 100% for bittorrent when +there is no interactive cross traffic, and 100% for interactive traffic +whenever there is any. This would not waste any bandwidth while the user +is idling, and it would make for a much better experience when the user +is using the internet connection for other things.

    +

    This is what uTP does.

    +
    +
    +

    TCP

    +

    The reason TCP will fill the send buffer, and cause the delay on all traffic, +is because its congestion control is only based on packet loss (and timeout).

    +

    Since the modem is buffering, packets won't get dropped until the entire queue +is full, and no more packets will fit. The packets will be dropped, TCP will +detect this within an RTT or so. When TCP notices a packet loss, it will slow +down its send rate and the queue will start to drain again. However, TCP will +immediately start to ramp up its send rate again until the buffer is full and +it detects packet loss again.

    +

    TCP is designed to fully utilize the link capacity, without causing congestion. +Whenever it sense congestion (through packet loss) it backs off. TCP is not +designed to keep delays low. When you get the first packet loss (assuming the +kind of queue described above, tail-queue) it is already too late. Your queue +is full and you have the maximum amount of delay your modem can provide.

    +

    TCP controls its send rate by limiting the number of bytes in-flight at any +given time. This limit is called congestion window (cwnd for short). During +steady state, the congestion window is constantly increasing linearly. Each +packet that is successfully transferred will increase cwnd.

    +
    +            cwnd
    +send_rate = ----
    +            RTT
    +
    +

    Send rate is proportional to cwnd divided by RTT. A smaller cwnd will cause +the send rate to be lower and a larger cwnd will cause the send rate to be +higher.

    +

    Using a congestion window instead of controlling the rate directly is simple +because it also introduces an upper bound for memory usage for packets that +haven't been ACKed yet and needs to be kept around.

    +

    The behavior of TCP, where it bumps up against the ceiling, backs off and then +starts increasing again until it hits the ceiling again, forms a saw tooth shape. +If the modem wouldn't have any send buffer at all, a single TCP stream would +not be able to fully utilize the link because of this behavior, since it would +only fully utilize the link right before the packet loss and the back-off.

    +
    +
    +

    LEDBAT congestion controller

    +

    The congestion controller in uTP is called LEDBAT, which also is an IETF working +group attempting to standardize it. The congestion controller, on top of reacting +to packet loss the same way TCP does, also reacts to changes in delays.

    +

    For any uTP (or LEDBAT) implementation, there is a target delay. This is the +amount of delay that is acceptable, and is in fact targeted for the connection. +The target delay is defined to 25 ms in LEDBAT, uTorrent uses 100 ms and +libtorrent uses 75 ms. Whenever a delay measurement is lower than the target, +cwnd is increased proportional to (target_delay - delay). Whenever the measurement +is higher than the target, cwnd is decreased proportional to (delay - target_delay).

    +

    It can simply be expressed as:

    +
    +cwnd += gain * (target_delay - delay)
    +
    +cwnd_thumb.png +

    Similarly to TCP, this is scaled so that the increase is evened out over one RTT.

    +

    The linear controller will adjust the cwnd more for delays that are far off the +target, and less for delays that are close to the target. This makes it converge +at the target delay. Although, due to noise there is almost always some amount of +oscillation. This oscillation is typically smaller than the saw tooth TCP forms.

    +

    The figure to the right shows how (TCP) cross traffic causese uTP to essentially +entirely stop sending anything. Its delay measurements are mostly well above the target +during this time. The cross traffic is only a single TCP stream in this test.

    +

    As soon as the cross traffic ceases, uTP will pick up its original send rate within +a second.

    +

    Since uTP constantly measures the delay, with every single packet, the reaction time +to cross traffic causing delays is a single RTT (typically a fraction of a second).

    +
    +
    +

    one way delays

    +

    uTP measures the delay imposed on packets being sent to the other end +of the connection. This measurement only includes buffering delay along +the link, not propagation delay (the speed of light times distance) nor +the routing delay (the time routers spend figuring out where to forward +the packet). It does this by always comparing all measurements to a +baseline measurement, to cancel out any fixed delay. By focusing on the +variable delay along a link, it will specifically detect points where +there might be congestion, since those points will have buffers.

    +delays_thumb.png +

    Delay on the return link is explicitly not included in the delay measurement. +This is because in a peer-to-peer application, the other end is likely to also +be connected via a modem, with the same send buffer restrictions as we assume +for the sending side. The other end having its send queue full is not an indication +of congestion on the path going the other way.

    +

    In order to measure one way delays for packets, we cannot rely on clocks being +synchronized, especially not at the microsecond level. Instead, the actual time +it takes for a packet to arrive at the destination is not measured, only the changes +in the transit time is measured.

    +

    Each packet that is sent includes a time stamp of the current time, in microseconds, +of the sending machine. The receiving machine calculates the difference between its +own timestamp and the one in the packet and sends this back in the ACK. This difference, +since it is in microseconds, will essentially be a random 32 bit number. However, +the difference will stay somewhat similar over time. Any changes in this difference +indicates that packets are either going through faster or slower.

    +

    In order to measure the one-way buffering delay, a base delay is established. The +base delay is the lowest ever seen value of the time stamp difference. Each delay +sample we receive back, is compared against the base delay and the delay is the +difference.

    +

    This is the delay that's fed into the congestion controller.

    +

    A histogram of typical delay measurements is shown to the right. This is from +a transfer between a cable modem connection and a DSL connection.

    +

    The details of the delay measurements are slightly more complicated since the +values needs to be able to wrap (cross the 2^32 boundry and start over at 0).

    +
    +
    +

    Path MTU discovery

    +

    MTU is short for Maximum Transfer Unit and describes the largest packet size that +can be sent over a link. Any datagrams which size exceeds this limit will either +be fragmented or dropped. A fragmented datagram means that the payload is split up +in multiple packets, each with its own individual packet header.

    +

    There are several reasons to avoid sending datagrams that get fragmented:

    +
      +
    1. A fragmented datagram is more likely to be lost. If any fragment is lost, +the whole datagram is dropped.
    2. +
    3. Bandwidth is likely to be wasted. If the datagram size is not divisible +by the MTU the last packet will not contain as much payload as it could, and the +payload over protocol header ratio decreases.
    4. +
    5. It's expensive to fragment datagrams. Few routers are optimized to handle large +numbers of fragmented packets. Datagrams that have to fragment are likely to +be delayed significantly, and contribute to more CPU being used on routers. +Typically fragmentation (and other advanced IP features) are implemented in +software (slow) and not hardware (fast).
    6. +
    +

    The path MTU is the lowest MTU of any link along a path from two endpoints on the +internet. The MTU bottleneck isn't necessarily at one of the endpoints, but can +be anywhere in between.

    +

    The most common MTU is 1500 bytes, which is the largest packet size for ethernet +networks. Many home DSL connections, however, tunnel IP through PPPoE (Point to +Point Protocol over Ethernet. Yes, that is the old dial-up modem protocol). This +protocol uses up 8 bytes per packet for its own header.

    +

    If the user happens to be on an internet connection over a VPN, it will add another +layer, with its own packet headers.

    +

    In short; if you would pick the largest possible packet size on an ethernet network, +1472, and stick with it, you would be quite likely to generate fragments for a lot +of connections. The fragments that will be created will be very small and especially +inflate the overhead waste.

    +

    The other approach of picking a very conservative packet size, that would be very +unlikely to get fragmented has the following drawbacks:

    +
      +
    1. People on good, normal, networks will be penalized with a small packet size. +Both in terms of router load but also bandwidth waste.
    2. +
    3. Software routers are typically not limited by the number of bytes they can route, +but the number of packets. Small packets means more of them, and more load on +software routers.
    4. +
    +

    The solution to the problem of finding the optimal packet size, is to dynamically +adjust the packet size and search for the largest size that can make it through +without being fragmented along the path.

    +

    To help do this, you can set the DF bit (Don't Fragment) in your Datagrams. This +asks routers that otherwise would fragment packets to instead drop them, and send +back an ICMP message reporting the MTU of the link the packet couldn't fit. With +this message, it's very simple to discover the path MTU. You simply mark your packets +not to be fragmented, and change your packet size whenever you receive the ICMP +packet-too-big message.

    +

    Unfortunately it's not quite that simple. There are a significant number of firewalls +in the wild blocking all ICMP messages. This means we can't rely on them, we also have +to guess that a packet was dropped because of its size. This is done by only marking +certain packets with DF, and if all other packets go through, except for the MTU probes, +we know that we need to lower our packet sizes.

    +

    If we set up bounds for the path MTU (say the minimum internet MTU, 576 and ethernet's 1500), +we can do a binary search for the MTU. This would let us find it in just a few round-trips.

    +

    On top of this, libtorrent has an optimization where it figures out which interface a +uTP connection will be sent over, and initialize the MTU ceiling to that interface's MTU. +This means that a VPN tunnel would advertize its MTU as lower, and the uTP connection would +immediately know to send smaller packets, no search required. It also has the side-effect +of being able to use much larger packet sizes for non-ethernet interfaces or ethernet links +with jumbo frames.

    +
    +
    +

    clock drift

    +our_delay_base_thumb.png +

    Clock drift is clocks progressing at different rates. It's different from clock +skew which means clocks set to different values (but which may progress at the same +rate).

    +

    Any clock drift between the two machines involved in a uTP transfer will result +in systematically inflated or deflated delay measurements.

    +

    This can be solved by letting the base delay be the lowest seen sample in the last +n minutes. This is a trade-off between seeing a single packet go straight through +the queue, with no delay, and the amount of clock drift one can assume on normal computers.

    +

    It turns out that it's fairly safe to assume that one of your packets will in fact go +straight through without any significant delay, once every 20 minutes or so. However, +the clock drift between normal computers can be as much as 17 ms in 10 minutes. 17 ms +is quite significant, especially if your target delay is 25 ms (as in the LEDBAT spec).

    +

    Clocks progresses at different rates depending on temperature. This means computers +running hot are likely to have a clock drift compared to computers running cool.

    +

    So, by updating the delay base periodically based on the lowest seen sample, you'll either +end up changing it upwards (artificaially making the delay samples appear small) without +the congestion or delay actually having changed, or you'll end up with a significant clock +drift and have artificially low samples because of that.

    +

    The solution to this problem is based on the fact that the clock drift is only a problem +for one of the sides of the connection. Only when your delay measurements keep increasing +is it a problem. If your delay measurements keep decreasing, the samples will simply push +down the delay base along with it. With this in mind, we can simply keep track of the +other end's delay measurements as well, applying the same logic to it. Whenever the +other end's base delay is adjusted downwards, we adjust our base delay upwards by the same +amount.

    +

    This will accurately keep the base delay updated with the clock drift and improve +the delay measurements. The figure on the right shows the absolute timestamp differences +along with the base delay. The slope of the measurements is caused by clock drift.

    +

    For more information on the clock drift compensation, see the slides from BitTorrent's +presentation at IPTPS10.

    +
    +
    +

    features

    +

    libtorrent's uTP implementation includes the following features:

    +
      +
    • Path MTU discovery, including jumbo frames and detecting restricted +MTU tunnels. Binary search packet sizes to find the largest non-fragmented.
    • +
    • Selective ACK. The ability to acknowledge individual packets in the +event of packet loss
    • +
    • Fast resend. The first time a packet is lost, it's resent immediately. +Triggered by duplicate ACKs.
    • +
    • Nagle's algorithm. Minimize protocol overhead by attempting to lump +full packets of payload together before sending a packet.
    • +
    • Delayed ACKs to minimize protocol overhead.
    • +
    • Microsecond resolution timestamps.
    • +
    • Advertised receive window, to support download rate limiting.
    • +
    • Correct handling of wrapping sequence numbers.
    • +
    • Easy configuration of target-delay, gain-factor, timeouts, delayed-ack +and socket buffers.
    • +
    +
    +
    +
    + +
    + + +
    + + diff --git a/docs/utp.rst b/docs/utp.rst new file mode 100644 index 000000000..b744109b7 --- /dev/null +++ b/docs/utp.rst @@ -0,0 +1,347 @@ +================= +libtorrent manual +================= + +:Author: Arvid Norberg, arvid@rasterbar.com +:Version: 0.16.0 + +.. contents:: Table of contents + :depth: 2 + :backlinks: none + +uTP +=== + +uTP (uTorrent transport protocol) is a transport protocol which uses one-way +delay measurements for its congestion controller. This article is about uTP +in general and specifically about libtorrent's implementation of it. + +rationale +--------- + +One of the most common problems users are experiencing using bittorrent is +that their internet "stops working". This can be caused by a number of things, +for example: + +1. a home router that crashes or slows down when its NAT pin-hole + table overflows, triggered by DHT or simply many TCP connections. + +2. a home router that crashes or slows down by UDP traffic (caused by + the DHT) + +3. a home DSL or cable modem having its send buffer filled up by outgoing + data, and the buffer fits seconds worth of bytes. This adds seconds + of delay on interactive traffic. For a web site that needs 10 round + trips to load this may mean 10s of seconds of delay to load compared + to without bittorrent. Skype or other delay sensitive applications + would be affected even more. + +This document will cover (3). + +Typically this is solved by asking the user to enter a number of bytes +that the client is allowed to send per second (i.e. setting an upload +rate limit). The common recommendation is to set this limit to 80% of the +uplink's capacity. This is to leave some headroom for things like TCP +ACKs as well as the user's interactive use of the connection such as +browsing the web or checking email. + +There are two major drawbacks with this technique: + +1. The user needs to actively make this setting (very few protocols + require the user to provide this sort of information). This also + means the user needs to figure out what its up-link capacity is. + This is unfortunately a number that many ISPs are not advertizing + (because it's often much lower than the download capacity) which + might make it hard to find. + +2. The 20% headroom is wasted most of the time. Whenever the user + is not using the internet connection for anything, those extra 20% + could have been used by bittorrent to upload, but they're already + allocated for interactive traffic. On top of that, 20% of the up-link + is often not enough to give a good and responsive browsing experience. + +The ideal bandwidth allocation would be to use 100% for bittorrent when +there is no interactive cross traffic, and 100% for interactive traffic +whenever there is any. This would not waste any bandwidth while the user +is idling, and it would make for a much better experience when the user +is using the internet connection for other things. + +This is what uTP does. + +TCP +--- + +The reason TCP will fill the send buffer, and cause the delay on all traffic, +is because its congestion control is *only* based on packet loss (and timeout). + +Since the modem is buffering, packets won't get dropped until the entire queue +is full, and no more packets will fit. The packets will be dropped, TCP will +detect this within an RTT or so. When TCP notices a packet loss, it will slow +down its send rate and the queue will start to drain again. However, TCP will +immediately start to ramp up its send rate again until the buffer is full and +it detects packet loss again. + +TCP is designed to fully utilize the link capacity, without causing congestion. +Whenever it sense congestion (through packet loss) it backs off. TCP is not +designed to keep delays low. When you get the first packet loss (assuming the +kind of queue described above, tail-queue) it is already too late. Your queue +is full and you have the maximum amount of delay your modem can provide. + +TCP controls its send rate by limiting the number of bytes in-flight at any +given time. This limit is called congestion window (*cwnd* for short). During +steady state, the congestion window is constantly increasing linearly. Each +packet that is successfully transferred will increase cwnd. + +:: + + cwnd + send_rate = ---- + RTT + + +Send rate is proportional to cwnd divided by RTT. A smaller cwnd will cause +the send rate to be lower and a larger cwnd will cause the send rate to be +higher. + +Using a congestion window instead of controlling the rate directly is simple +because it also introduces an upper bound for memory usage for packets that +haven't been ACKed yet and needs to be kept around. + +The behavior of TCP, where it bumps up against the ceiling, backs off and then +starts increasing again until it hits the ceiling again, forms a saw tooth shape. +If the modem wouldn't have any send buffer at all, a single TCP stream would +not be able to fully utilize the link because of this behavior, since it would +only fully utilize the link right before the packet loss and the back-off. + +LEDBAT congestion controller +---------------------------- + +The congestion controller in uTP is called LEDBAT_, which also is an IETF working +group attempting to standardize it. The congestion controller, on top of reacting +to packet loss the same way TCP does, also reacts to changes in delays. + +For any uTP (or LEDBAT_) implementation, there is a target delay. This is the +amount of delay that is acceptable, and is in fact targeted for the connection. +The target delay is defined to 25 ms in LEDBAT_, uTorrent uses 100 ms and +libtorrent uses 75 ms. Whenever a delay measurement is lower than the target, +cwnd is increased proportional to (target_delay - delay). Whenever the measurement +is higher than the target, cwnd is decreased proportional to (delay - target_delay). + +It can simply be expressed as:: + + cwnd += gain * (target_delay - delay) + +.. image:: cwnd_thumb.png + :target: cwnd.png + :align: right + +Similarly to TCP, this is scaled so that the increase is evened out over one RTT. + +The linear controller will adjust the cwnd more for delays that are far off the +target, and less for delays that are close to the target. This makes it converge +at the target delay. Although, due to noise there is almost always some amount of +oscillation. This oscillation is typically smaller than the saw tooth TCP forms. + +The figure to the right shows how (TCP) cross traffic causese uTP to essentially +entirely stop sending anything. Its delay measurements are mostly well above the target +during this time. The cross traffic is only a single TCP stream in this test. + +As soon as the cross traffic ceases, uTP will pick up its original send rate within +a second. + +Since uTP constantly measures the delay, with every single packet, the reaction time +to cross traffic causing delays is a single RTT (typically a fraction of a second). + +one way delays +-------------- + +uTP measures the delay imposed on packets being sent to the other end +of the connection. This measurement only includes buffering delay along +the link, not propagation delay (the speed of light times distance) nor +the routing delay (the time routers spend figuring out where to forward +the packet). It does this by always comparing all measurements to a +baseline measurement, to cancel out any fixed delay. By focusing on the +variable delay along a link, it will specifically detect points where +there might be congestion, since those points will have buffers. + +.. image:: delays_thumb.png + :target: delays.png + :align: right + +Delay on the return link is explicitly not included in the delay measurement. +This is because in a peer-to-peer application, the other end is likely to also +be connected via a modem, with the same send buffer restrictions as we assume +for the sending side. The other end having its send queue full is not an indication +of congestion on the path going the other way. + +In order to measure one way delays for packets, we cannot rely on clocks being +synchronized, especially not at the microsecond level. Instead, the actual time +it takes for a packet to arrive at the destination is not measured, only the changes +in the transit time is measured. + +Each packet that is sent includes a time stamp of the current time, in microseconds, +of the sending machine. The receiving machine calculates the difference between its +own timestamp and the one in the packet and sends this back in the ACK. This difference, +since it is in microseconds, will essentially be a random 32 bit number. However, +the difference will stay somewhat similar over time. Any changes in this difference +indicates that packets are either going through faster or slower. + +In order to measure the one-way buffering delay, a base delay is established. The +base delay is the lowest ever seen value of the time stamp difference. Each delay +sample we receive back, is compared against the base delay and the delay is the +difference. + +This is the delay that's fed into the congestion controller. + +A histogram of typical delay measurements is shown to the right. This is from +a transfer between a cable modem connection and a DSL connection. + +The details of the delay measurements are slightly more complicated since the +values needs to be able to wrap (cross the 2^32 boundry and start over at 0). + +Path MTU discovery +------------------ + +MTU is short for *Maximum Transfer Unit* and describes the largest packet size that +can be sent over a link. Any datagrams which size exceeds this limit will either +be *fragmented* or dropped. A fragmented datagram means that the payload is split up +in multiple packets, each with its own individual packet header. + +There are several reasons to avoid sending datagrams that get fragmented: + +1. A fragmented datagram is more likely to be lost. If any fragment is lost, + the whole datagram is dropped. + +2. Bandwidth is likely to be wasted. If the datagram size is not divisible + by the MTU the last packet will not contain as much payload as it could, and the + payload over protocol header ratio decreases. + +3. It's expensive to fragment datagrams. Few routers are optimized to handle large + numbers of fragmented packets. Datagrams that have to fragment are likely to + be delayed significantly, and contribute to more CPU being used on routers. + Typically fragmentation (and other advanced IP features) are implemented in + software (slow) and not hardware (fast). + +The path MTU is the lowest MTU of any link along a path from two endpoints on the +internet. The MTU bottleneck isn't necessarily at one of the endpoints, but can +be anywhere in between. + +The most common MTU is 1500 bytes, which is the largest packet size for ethernet +networks. Many home DSL connections, however, tunnel IP through PPPoE (Point to +Point Protocol over Ethernet. Yes, that is the old dial-up modem protocol). This +protocol uses up 8 bytes per packet for its own header. + +If the user happens to be on an internet connection over a VPN, it will add another +layer, with its own packet headers. + +In short; if you would pick the largest possible packet size on an ethernet network, +1472, and stick with it, you would be quite likely to generate fragments for a lot +of connections. The fragments that will be created will be very small and especially +inflate the overhead waste. + +The other approach of picking a very conservative packet size, that would be very +unlikely to get fragmented has the following drawbacks: + +1. People on good, normal, networks will be penalized with a small packet size. + Both in terms of router load but also bandwidth waste. + +2. Software routers are typically not limited by the number of bytes they can route, + but the number of packets. Small packets means more of them, and more load on + software routers. + +The solution to the problem of finding the optimal packet size, is to dynamically +adjust the packet size and search for the largest size that can make it through +without being fragmented along the path. + +To help do this, you can set the DF bit (Don't Fragment) in your Datagrams. This +asks routers that otherwise would fragment packets to instead drop them, and send +back an ICMP message reporting the MTU of the link the packet couldn't fit. With +this message, it's very simple to discover the path MTU. You simply mark your packets +not to be fragmented, and change your packet size whenever you receive the ICMP +packet-too-big message. + +Unfortunately it's not quite that simple. There are a significant number of firewalls +in the wild blocking all ICMP messages. This means we can't rely on them, we also have +to guess that a packet was dropped because of its size. This is done by only marking +certain packets with DF, and if all other packets go through, except for the MTU probes, +we know that we need to lower our packet sizes. + +If we set up bounds for the path MTU (say the minimum internet MTU, 576 and ethernet's 1500), +we can do a binary search for the MTU. This would let us find it in just a few round-trips. + +On top of this, libtorrent has an optimization where it figures out which interface a +uTP connection will be sent over, and initialize the MTU ceiling to that interface's MTU. +This means that a VPN tunnel would advertize its MTU as lower, and the uTP connection would +immediately know to send smaller packets, no search required. It also has the side-effect +of being able to use much larger packet sizes for non-ethernet interfaces or ethernet links +with jumbo frames. + +clock drift +----------- + +.. image:: our_delay_base_thumb.png + :target: our_delay_base.png + :align: right + +Clock drift is clocks progressing at different rates. It's different from clock +skew which means clocks set to different values (but which may progress at the same +rate). + +Any clock drift between the two machines involved in a uTP transfer will result +in systematically inflated or deflated delay measurements. + +This can be solved by letting the base delay be the lowest seen sample in the last +*n* minutes. This is a trade-off between seeing a single packet go straight through +the queue, with no delay, and the amount of clock drift one can assume on normal computers. + +It turns out that it's fairly safe to assume that one of your packets will in fact go +straight through without any significant delay, once every 20 minutes or so. However, +the clock drift between normal computers can be as much as 17 ms in 10 minutes. 17 ms +is quite significant, especially if your target delay is 25 ms (as in the LEDBAT_ spec). + +Clocks progresses at different rates depending on temperature. This means computers +running hot are likely to have a clock drift compared to computers running cool. + +So, by updating the delay base periodically based on the lowest seen sample, you'll either +end up changing it upwards (artificaially making the delay samples appear small) without +the congestion or delay actually having changed, or you'll end up with a significant clock +drift and have artificially low samples because of that. + +The solution to this problem is based on the fact that the clock drift is only a problem +for one of the sides of the connection. Only when your delay measurements keep increasing +is it a problem. If your delay measurements keep decreasing, the samples will simply push +down the delay base along with it. With this in mind, we can simply keep track of the +other end's delay measurements as well, applying the same logic to it. Whenever the +other end's base delay is adjusted downwards, we adjust our base delay upwards by the same +amount. + +This will accurately keep the base delay updated with the clock drift and improve +the delay measurements. The figure on the right shows the absolute timestamp differences +along with the base delay. The slope of the measurements is caused by clock drift. + +For more information on the clock drift compensation, see the slides from BitTorrent's +presentation at IPTPS10_. + +.. _IPTPS10: http://www.usenix.org/event/iptps10/tech/slides/cohen.pdf +.. _LEDBAT: https://datatracker.ietf.org/doc/draft-ietf-ledbat-congestion/ + +features +-------- + +libtorrent's uTP implementation includes the following features: + +* Path MTU discovery, including jumbo frames and detecting restricted + MTU tunnels. Binary search packet sizes to find the largest non-fragmented. +* Selective ACK. The ability to acknowledge individual packets in the + event of packet loss +* Fast resend. The first time a packet is lost, it's resent immediately. + Triggered by duplicate ACKs. +* Nagle's algorithm. Minimize protocol overhead by attempting to lump + full packets of payload together before sending a packet. +* Delayed ACKs to minimize protocol overhead. +* Microsecond resolution timestamps. +* Advertised receive window, to support download rate limiting. +* Correct handling of wrapping sequence numbers. +* Easy configuration of target-delay, gain-factor, timeouts, delayed-ack + and socket buffers. + diff --git a/examples/Makefile.am b/examples/Makefile.am index 73457a3ba..0c422e523 100644 --- a/examples/Makefile.am +++ b/examples/Makefile.am @@ -3,7 +3,8 @@ example_programs = \ dump_torrent \ enum_if \ make_torrent \ - simple_client + simple_client \ + utp_test if ENABLE_EXAMPLES bin_PROGRAMS = $(example_programs) diff --git a/examples/client_test.cpp b/examples/client_test.cpp index 6f1de8803..1355ec196 100644 --- a/examples/client_test.cpp +++ b/examples/client_test.cpp @@ -174,6 +174,7 @@ bool print_file_progress = false; bool show_pad_files = false; bool show_dht_status = false; bool sequential_download = false; +bool print_utp_stats = false; bool print_ip = true; bool print_as = false; @@ -384,7 +385,7 @@ int peer_index(libtorrent::tcp::endpoint addr, std::vector const& peers) { using namespace libtorrent; - if (print_ip) out += "IP "; + if (print_ip) out += "IP "; #ifndef TORRENT_DISABLE_GEO_IP if (print_as) out += "AS "; #endif @@ -409,8 +410,8 @@ void print_peer_info(std::string& out, std::vector const& if (print_ip) { - error_code ec; - snprintf(str, sizeof(str), "%-22s %22s ", print_endpoint(i->ip).c_str() + snprintf(str, sizeof(str), "%-30s %-22s", (print_endpoint(i->ip) + + (i->connection_type == peer_info::bittorrent_utp ? " [uTP]" : "")).c_str() , print_endpoint(i->local_endpoint).c_str()); out += str; } @@ -425,7 +426,7 @@ void print_peer_info(std::string& out, std::vector const& #endif snprintf(str, sizeof(str) - , "%s%s (%s|%s) %s%s (%s|%s) %s%3d (%3d) %3d %c%c%c%c%c%c%c%c%c%c%c%c%c%c %c%c%c%c%c%c " + , "%s%s (%s|%s) %s%s (%s|%s) %s%3d (%3d) %3d %c%c%c%c%c%c%c%c%c%c%c%c%c%c%c %c%c%c%c%c%c " , esc("32"), add_suffix(i->down_speed, "/s").c_str() , add_suffix(i->total_download).c_str(), add_suffix(i->download_rate_peak, "/s").c_str() , esc("31"), add_suffix(i->up_speed, "/s").c_str(), add_suffix(i->total_upload).c_str() @@ -456,6 +457,8 @@ void print_peer_info(std::string& out, std::vector const& #else , '.' #endif + , (i->flags & peer_info::holepunched)?'h':'.' + , (i->source & peer_info::tracker)?'T':'_' , (i->source & peer_info::pex)?'P':'_' , (i->source & peer_info::dht)?'D':'_' @@ -833,9 +836,12 @@ int main(int argc, char* argv[]) " -L Use the specified username and password for the\n" " proxy specified by -P\n" " -H Don't start DHT\n" + " -M Disable TCP/uTP bandwidth balancing\n" " -W Set the max number of peers to keep in the peer list\n" " -N Do not attempt to use UPnP and NAT-PMP to forward ports\n" " -Y Rate limit local peers\n" + " -y Disable TCP connections (disable outgoing TCP and reject\n" + " incoming TCP connections)\n" " -q automatically quit the client after of refreshes\n" " this is useful for scripting tests\n" " " @@ -903,8 +909,6 @@ int main(int argc, char* argv[]) { if (argv[i][0] != '-') { - // interpret this as a torrent - // match it against the @ format if (strlen(argv[i]) > 45 && is_hex(argv[i], 40) @@ -955,7 +959,7 @@ int main(int argc, char* argv[]) case 'U': torrent_upload_limit = atoi(arg) * 1000; break; case 'D': torrent_download_limit = atoi(arg) * 1000; break; case 'm': monitor_dir = arg; break; - case 'M': share_mode = true; --i; break; + case 'Q': share_mode = true; --i; break; case 'b': bind_to_interface = arg; break; case 'w': settings.urlseed_wait_retry = atoi(arg); break; case 't': poll_interval = atoi(arg); break; @@ -1006,6 +1010,8 @@ int main(int argc, char* argv[]) case 'A': settings.allowed_fast_set_size = atoi(arg); break; case 'R': settings.read_cache_line_size = atoi(arg); break; case 'O': settings.allow_reordered_disk_operations = false; --i; break; + case 'M': settings.mixed_mode_algorithm = session_settings::prefer_tcp; --i; break; + case 'y': settings.enable_outgoing_tcp = false; settings.enable_incoming_tcp = false; --i; break; case 'P': { char* port = (char*) strrchr(arg, ':'); @@ -1297,6 +1303,7 @@ int main(int argc, char* argv[]) if (c == 'h') show_pad_files = !show_pad_files; if (c == 'a') print_piece_bar = !print_piece_bar; if (c == 'g') show_dht_status = !show_dht_status; + if (c == 'u') print_utp_stats = !print_utp_stats; // toggle columns if (c == '1') print_ip = !print_ip; if (c == '2') print_as = !print_as; @@ -1566,6 +1573,15 @@ int main(int argc, char* argv[]) } #endif + if (print_utp_stats) + { + snprintf(str, sizeof(str), "uTP idle: %d syn: %d est: %d fin: %d wait: %d\n" + , sess_stat.utp_stats.num_idle, sess_stat.utp_stats.num_syn_sent + , sess_stat.utp_stats.num_connected, sess_stat.utp_stats.num_fin_sent + , sess_stat.utp_stats.num_close_wait); + out += str; + } + if (active_handle.is_valid()) { torrent_handle h = active_handle; diff --git a/examples/enum_if.cpp b/examples/enum_if.cpp index cfd9aa7ec..846596591 100644 --- a/examples/enum_if.cpp +++ b/examples/enum_if.cpp @@ -61,15 +61,16 @@ int main() return 1; } - printf("%-18s%-18s%-35sinterface name\n", "destination", "network", "gateway"); + printf("%-18s%-18s%-35s%-7sinterface\n", "destination", "network", "gateway", "mtu"); for (std::vector::const_iterator i = routes.begin() , end(routes.end()); i != end; ++i) { - printf("%-18s%-18s%-35s%s\n" + printf("%-18s%-18s%-35s%-7d%s\n" , i->destination.to_string(ec).c_str() , i->netmask.to_string(ec).c_str() , i->gateway.to_string(ec).c_str() + , i->mtu , i->name); } @@ -82,15 +83,16 @@ int main() return 1; } - printf("%-18s%-18s%-35sflags\n", "address", "netmask", "name"); + printf("%-35s%-18s%-40s%-8sflags\n", "address", "netmask", "name", "mtu"); for (std::vector::const_iterator i = net.begin() , end(net.end()); i != end; ++i) { - printf("%-18s%-18s%-35s%s%s%s\n" + printf("%-35s%-18s%-40s%-8d%s%s%s\n" , i->interface_address.to_string(ec).c_str() , i->netmask.to_string(ec).c_str() , i->name + , i->mtu , (is_multicast(i->interface_address)?"multicast ":"") , (is_local(i->interface_address)?"local ":"") , (is_loopback(i->interface_address)?"loopback ":"") diff --git a/examples/utp_test.cpp b/examples/utp_test.cpp new file mode 100644 index 000000000..b26003c4a --- /dev/null +++ b/examples/utp_test.cpp @@ -0,0 +1,48 @@ +#include "libtorrent/error_code.hpp" +#include "libtorrent/session.hpp" +#include "libtorrent/socket_type.hpp" +#include "libtorrent/utp_socket_manager.hpp" +#include "libtorrent/utp_stream.hpp" + +using namespace libtorrent; + +void on_connect(error_code const& e) +{ +} + +void on_udp_receive(error_code const& e, udp::endpoint const& ep + , char const* buf, int size) +{ +} + +void on_utp_incoming(void* userdata + , boost::shared_ptr const& utp_sock) +{ +} + +int main(int argc, char* argv[]) +{ + //int rtt, rtt_var; + //int max_window, cur_window; + //int delay_factor, window_factor, scaled_gain; + + /*session s; + s.listen_on(std::make_pair(6881, 6889));*/ + + io_service ios; + connection_queue cc(ios); + udp_socket udp_sock(ios, boost::bind(&on_udp_receive, _1, _2, _3, _4), cc); + + void* userdata; + utp_socket_manager utp_sockets(udp_sock, boost::bind(&on_utp_incoming, _1, _2), userdata); + + /*error_code ec; + utp_stream sock(ios, cc); + sock.bind(udp::endpoint(address_v4::any(), 0), ec); + + tcp::endpoint ep(address_v4::from_string("239.192.152.143", ec), 6771); + + sock.async_connect(ep, boost::bind(on_connect, _1));*/ + + return 0; +} diff --git a/include/libtorrent/Makefile.am b/include/libtorrent/Makefile.am index 93774e281..9bf0fac2f 100644 --- a/include/libtorrent/Makefile.am +++ b/include/libtorrent/Makefile.am @@ -94,6 +94,7 @@ nobase_include_HEADERS = \ storage_defs.hpp \ thread.hpp \ time.hpp \ + timestamp_history.hpp \ torrent_handle.hpp \ torrent.hpp \ torrent_info.hpp \ @@ -102,6 +103,8 @@ nobase_include_HEADERS = \ udp_tracker_connection.hpp \ union_endpoint.hpp \ upnp.hpp \ + utp_socket_manager.hpp \ + utp_stream.hpp \ utf8.hpp \ version.hpp \ web_peer_connection.hpp \ diff --git a/include/libtorrent/assert.hpp b/include/libtorrent/assert.hpp index 752fd18e0..f5997bcf4 100644 --- a/include/libtorrent/assert.hpp +++ b/include/libtorrent/assert.hpp @@ -51,11 +51,17 @@ std::string demangle(char const* name); #if (defined __linux__ || defined __MACH__) && defined __GNUC__ +#if TORRENT_USE_IOSTREAM #include +#endif TORRENT_EXPORT void assert_fail(const char* expr, int line, char const* file, char const* function, char const* val); #define TORRENT_ASSERT(x) do { if (x) {} else assert_fail(#x, __LINE__, __FILE__, __PRETTY_FUNCTION__, 0); } while (false) +#if TORRENT_USE_IOSTREAM #define TORRENT_ASSERT_VAL(x, y) do { if (x) {} else { std::stringstream __s__; __s__ << #y ": " << y; assert_fail(#x, __LINE__, __FILE__, __PRETTY_FUNCTION__, __s__.str().c_str()); } } while (false) +#else +#define TORRENT_ASSERT_VAL(x, y) TORRENT_ASSERT(x) +#endif #else #include diff --git a/include/libtorrent/aux_/session_impl.hpp b/include/libtorrent/aux_/session_impl.hpp index fa0b52ffc..fde526ae5 100644 --- a/include/libtorrent/aux_/session_impl.hpp +++ b/include/libtorrent/aux_/session_impl.hpp @@ -82,6 +82,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "libtorrent/deadline_timer.hpp" #include "libtorrent/socket_io.hpp" // for print_address #include "libtorrent/address.hpp" +#include "libtorrent/utp_socket_manager.hpp" #ifdef TORRENT_STATS #include @@ -522,6 +523,15 @@ namespace libtorrent bandwidth_channel m_local_download_channel; bandwidth_channel m_local_upload_channel; + // all tcp peer connections are subject to these + // bandwidth limits. Local peers are excempted + // from this limit. The purpose is to be able to + // throttle TCP that passes over the internet + // bottleneck (i.e. modem) to avoid starving out + // uTP connections. + bandwidth_channel m_tcp_download_channel; + bandwidth_channel m_tcp_upload_channel; + bandwidth_channel* m_bandwidth_channel[2]; tracker_manager m_tracker_manager; @@ -725,6 +735,8 @@ namespace libtorrent rate_limited_udp_socket m_udp_socket; + utp_socket_manager m_utp_socket_manager; + #ifndef TORRENT_DISABLE_ENCRYPTION pe_settings m_pe_settings; #endif diff --git a/include/libtorrent/broadcast_socket.hpp b/include/libtorrent/broadcast_socket.hpp index 040e4d2b6..f560357a9 100644 --- a/include/libtorrent/broadcast_socket.hpp +++ b/include/libtorrent/broadcast_socket.hpp @@ -49,6 +49,7 @@ namespace libtorrent TORRENT_EXPORT bool is_loopback(address const& addr); TORRENT_EXPORT bool is_multicast(address const& addr); TORRENT_EXPORT bool is_any(address const& addr); + TORRENT_EXPORT bool is_teredo(address const& addr); TORRENT_EXPORT int cidr_distance(address const& a1, address const& a2); // determines if the operating system supports IPv6 diff --git a/include/libtorrent/bt_peer_connection.hpp b/include/libtorrent/bt_peer_connection.hpp index f5a3f0f85..6373cc089 100644 --- a/include/libtorrent/bt_peer_connection.hpp +++ b/include/libtorrent/bt_peer_connection.hpp @@ -102,7 +102,12 @@ namespace libtorrent void start(); - enum { upload_only_msg = 2, share_mode_msg = 3 }; + enum + { + upload_only_msg = 2, + holepunch_msg = 3, + share_mode_msg = 4 + }; ~bt_peer_connection(); @@ -140,6 +145,20 @@ namespace libtorrent num_supported_messages }; + enum hp_message_t + { + // msg_types + hp_rendezvous = 0, + hp_connect = 1, + hp_failed = 2, + + // error codes + hp_no_such_peer = 1, + hp_not_connected = 2, + hp_no_support = 3, + hp_no_self = 4 + }; + // called from the main loop when this connection has any // work to do. @@ -151,6 +170,9 @@ namespace libtorrent virtual void get_specific_peer_info(peer_info& p) const; virtual bool in_handshake() const; + bool supports_holepunch() const { return m_holepunch_id != 0; } + void write_holepunch_msg(int type, tcp::endpoint const& ep, int error); + #ifndef TORRENT_DISABLE_EXTENSIONS bool support_extensions() const { return m_supports_extensions; } #endif @@ -183,6 +205,7 @@ namespace libtorrent void on_have_none(int received); void on_reject_request(int received); void on_allowed_fast(int received); + void on_holepunch(); void on_extended(int received); @@ -283,7 +306,7 @@ public: #endif } #endif - peer_connection::append_send_buffer(buffer, size, destructor); + peer_connection::append_send_buffer(buffer, size, destructor, true); } void setup_send(); @@ -366,6 +389,9 @@ private: // 0 if not supported int m_upload_only_id; + // the message ID for holepunch messages + int m_holepunch_id; + // the message ID for share mode message // 0 if not supported int m_share_mode_id; diff --git a/include/libtorrent/enum_net.hpp b/include/libtorrent/enum_net.hpp index 68a7dec59..69937d44c 100644 --- a/include/libtorrent/enum_net.hpp +++ b/include/libtorrent/enum_net.hpp @@ -42,11 +42,13 @@ POSSIBILITY OF SUCH DAMAGE. namespace libtorrent { + // the interface should not have a netmask struct ip_interface { address interface_address; address netmask; char name[64]; + int mtu; }; struct ip_route @@ -55,6 +57,7 @@ namespace libtorrent address netmask; address gateway; char name[64]; + int mtu; }; // returns a list of the configured IP interfaces @@ -64,9 +67,8 @@ namespace libtorrent TORRENT_EXPORT std::vector enum_routes(io_service& ios, error_code& ec); - // returns true if the specified address is on the same - // local network as the specified interface - TORRENT_EXPORT bool in_subnet(address const& addr, ip_interface const& iface); + // return (a1 & mask) == (a2 & mask) + TORRENT_EXPORT bool match_addr_mask(address const& a1, address const& a2, address const& mask); // returns true if the specified address is on the same // local network as us diff --git a/include/libtorrent/error_code.hpp b/include/libtorrent/error_code.hpp index b323696ee..813c8936f 100644 --- a/include/libtorrent/error_code.hpp +++ b/include/libtorrent/error_code.hpp @@ -165,7 +165,7 @@ namespace libtorrent pex_message_too_large, invalid_pex_message, invalid_lt_tracker_message, - reserved108, + too_frequent_pex, reserved109, reserved110, reserved111, diff --git a/include/libtorrent/extensions.hpp b/include/libtorrent/extensions.hpp index b06a0fc74..10294ece6 100644 --- a/include/libtorrent/extensions.hpp +++ b/include/libtorrent/extensions.hpp @@ -90,6 +90,8 @@ namespace libtorrent { virtual ~peer_plugin() {} + virtual char const* type() const { return ""; } + // can add entries to the extension handshake // this is not called for web seeds virtual void add_handshake(entry&) {} diff --git a/include/libtorrent/instantiate_connection.hpp b/include/libtorrent/instantiate_connection.hpp index d0719f38e..a0e95d859 100644 --- a/include/libtorrent/instantiate_connection.hpp +++ b/include/libtorrent/instantiate_connection.hpp @@ -39,10 +39,12 @@ POSSIBILITY OF SUCH DAMAGE. namespace libtorrent { struct proxy_settings; + struct utp_socket_manager; bool instantiate_connection(io_service& ios , proxy_settings const& ps, socket_type& s - , void* ssl_context = 0); + , void* ssl_context = 0 + , utp_socket_manager* sm = 0); } #endif diff --git a/include/libtorrent/max.hpp b/include/libtorrent/max.hpp index 64d12cd95..2a7753aa0 100644 --- a/include/libtorrent/max.hpp +++ b/include/libtorrent/max.hpp @@ -93,6 +93,18 @@ namespace libtorrent value = max3::value }; }; + + template + struct max8 + { + enum + { + temp1 = max::value, + temp2 = max3::value, + temp3 = max3::value, + value = max3::value + }; + }; } #endif diff --git a/include/libtorrent/packet_buffer.hpp b/include/libtorrent/packet_buffer.hpp new file mode 100644 index 000000000..1092760dd --- /dev/null +++ b/include/libtorrent/packet_buffer.hpp @@ -0,0 +1,108 @@ +/* + +Copyright (c) 2010, Arvid Norberg, Daniel Wallin. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the distribution. + * Neither the name of the author nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef TORRENT_PACKET_BUFFER_HPP_INCLUDED +#define TORRENT_PACKET_BUFFER_HPP_INCLUDED + +#include "boost/cstdint.hpp" +#include + +namespace libtorrent +{ + // this is a circular buffer that automatically resizes + // itself as elements are inserted. Elements are indexed + // by integers and are assumed to be sequential. Unless the + // old elements are removed when new elements are inserted, + // the buffer will be resized. + + // if m_mask is 0xf, m_array has 16 elements + // m_cursor is the lowest index that has an element + // it also determines which indices the other slots + // refers to. Since it's a circular buffer, it wraps + // around. For example + + // m_cursor = 9 + // | refers to index 14 + // | | + // V V + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // | | | | | | | | | | | | | | | | | m_mask = 0xf + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // ^ + // | + // refers to index 15 + + // whenever the element at the cursor is removed, the + // cursor is bumped to the next occupied element + + class packet_buffer + { + public: + typedef boost::uint32_t index_type; + + packet_buffer(); + ~packet_buffer(); + + void* insert(index_type idx, void* value); + + std::size_t size() const + { return m_size; } + + std::size_t capacity() const + { return m_capacity; } + + void* at(index_type idx) const; + + void* remove(index_type idx); + + void reserve(std::size_t size); + + index_type cursor() const + { return m_first; } + + index_type span() const + { return (m_last - m_first) & 0xffff; } + + private: + void** m_storage; + std::size_t m_capacity; + std::size_t m_size; + + // This defines the first index that is part of the m_storage. + // The last index is (m_first + (m_capacity - 1)) & 0xffff. + index_type m_first; + index_type m_last; + }; +} + +#endif // TORRENT_PACKET_BUFFER_HPP_INCLUDED + diff --git a/include/libtorrent/peer_connection.hpp b/include/libtorrent/peer_connection.hpp index f74da5ef5..8d8d7f27f 100644 --- a/include/libtorrent/peer_connection.hpp +++ b/include/libtorrent/peer_connection.hpp @@ -78,6 +78,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "libtorrent/bandwidth_socket.hpp" #include "libtorrent/socket_type_fwd.hpp" #include "libtorrent/error_code.hpp" +#include "libtorrent/sliding_average.hpp" #ifdef TORRENT_STATS #include "libtorrent/aux_/session_impl.hpp" @@ -86,9 +87,11 @@ POSSIBILITY OF SUCH DAMAGE. namespace libtorrent { class torrent; - struct peer_plugin; struct peer_info; struct disk_io_job; +#ifndef TORRENT_DISABLE_EXTENSIONS + struct peer_plugin; +#endif namespace detail { @@ -202,6 +205,7 @@ namespace libtorrent #ifndef TORRENT_DISABLE_EXTENSIONS void add_extension(boost::shared_ptr); + peer_plugin const* find_plugin(char const* type); #endif // this function is called once the torrent associated @@ -284,6 +288,14 @@ namespace libtorrent void set_upload_only(bool u); bool upload_only() const { return m_upload_only; } + void set_holepunch_mode() + { + m_holepunch_mode = true; +#ifdef TORRENT_VERBOSE_LOGGING + (*m_logger) << time_now_string() << "*** HOLEPUNCH MODE ***\n"; +#endif + } + // will send a keep-alive message to the peer void keep_alive(); @@ -341,6 +353,9 @@ namespace libtorrent void on_timeout(); // this will cause this peer_connection to be disconnected. virtual void disconnect(error_code const& ec, int error = 0); + // called when a connect attempt fails (not when an + // established connection fails) + void connect_failed(error_code const& e); bool is_disconnecting() const { return m_disconnecting; } // this is called when the connection attempt has succeeded @@ -522,11 +537,17 @@ namespace libtorrent #endif template - void append_send_buffer(char* buffer, int size, Destructor const& destructor) + void append_send_buffer(char* buffer, int size, Destructor const& destructor + , bool encrypted = false) { #if defined TORRENT_STATS && defined TORRENT_DISK_STATS log_buffer_usage(buffer, size, "queued send buffer"); #endif + // bittorrent connections should never use this function, since + // they might be encrypted and this would circumvent the actual + // encryption. bt_peer_connection overrides this function with + // its own version. + TORRENT_ASSERT(encrypted || type() != bittorrent_connection); m_send_buffer.append_buffer(buffer, size, size, destructor); } @@ -634,6 +655,8 @@ namespace libtorrent bool verify_piece(peer_request const& p) const; + void update_desired_queue_size(); + // the bandwidth channels, upload and download // keeps track of the current quotas bandwidth_channel m_bandwidth_channel[num_channels]; @@ -665,6 +688,10 @@ namespace libtorrent // web seeds also has a limit on the queue size. int m_max_out_request_queue; + // the average rate of receiving complete piece messages + sliding_average<20> m_piece_rate; + sliding_average<20> m_send_rate; + void set_timeout(int s) { m_timeout = s; } #ifndef TORRENT_DISABLE_EXTENSIONS @@ -711,6 +738,7 @@ namespace libtorrent // the time when we last got a part of a // piece packet from this peer ptime m_last_piece; + // the time we sent a request to // this peer the last time ptime m_last_request; @@ -1076,6 +1104,9 @@ namespace libtorrent // set to true when we've sent the first round of suggests bool m_sent_suggests:1; + // set to true while we're trying to holepunch + bool m_holepunch_mode:1; + // when this is set, the transfer stats for this connection // is not included in the torrent or session stats bool m_ignore_stats:1; diff --git a/include/libtorrent/peer_info.hpp b/include/libtorrent/peer_info.hpp index 00750d08e..b094e387d 100644 --- a/include/libtorrent/peer_info.hpp +++ b/include/libtorrent/peer_info.hpp @@ -59,7 +59,8 @@ namespace libtorrent seed = 0x400, optimistic_unchoke = 0x800, snubbed = 0x1000, - upload_only = 0x2000 + upload_only = 0x2000, + holepunched = 0x4000 #ifndef TORRENT_DISABLE_ENCRYPTION , rc4_encrypted = 0x100000, plaintext_encrypted = 0x200000 @@ -186,7 +187,8 @@ namespace libtorrent { standard_bittorrent = 0, web_seed = 1, - http_seed = 2 + http_seed = 2, + bittorrent_utp = 3 }; int connection_type; diff --git a/include/libtorrent/policy.hpp b/include/libtorrent/policy.hpp index b3ce149d7..0f5b0d3a8 100644 --- a/include/libtorrent/policy.hpp +++ b/include/libtorrent/policy.hpp @@ -164,7 +164,8 @@ namespace libtorrent // 43 1 1 failcount, connectable, optimistically_unchoked, seed // 44 1 1 fast_reconnects, trust_points // 45 1 1 source, pe_support, is_v6_addr -// 46 1 1 on_parole, banned, added_to_dht +// 46 1 1 on_parole, banned, added_to_dht, supports_utp, +// supports_holepunch // 47 1 1 // 48 struct TORRENT_EXPORT peer @@ -311,6 +312,11 @@ namespace libtorrent // pinged by the DHT bool added_to_dht:1; #endif + // we think this peer supports uTP + bool supports_utp:1; + // we have been connected via uTP at least once + bool confirmed_supports_utp:1; + bool supports_holepunch:1; #ifdef TORRENT_DEBUG bool in_use:1; #endif diff --git a/include/libtorrent/session_settings.hpp b/include/libtorrent/session_settings.hpp index c9790aced..43a6335e9 100644 --- a/include/libtorrent/session_settings.hpp +++ b/include/libtorrent/session_settings.hpp @@ -222,6 +222,11 @@ namespace libtorrent , default_peer_upload_rate(0) , default_peer_download_rate(0) , broadcast_lsd(false) + , enable_outgoing_utp(true) + , enable_incoming_utp(true) + , enable_outgoing_tcp(true) + , enable_incoming_tcp(true) + , max_pex_peers(200) , ignore_resume_timestamps(false) , anonymous_mode(false) , tick_interval(100) @@ -234,6 +239,17 @@ namespace libtorrent , unchoke_slots_limit(8) , half_open_limit(0) , connections_limit(200) + , utp_target_delay(75) // milliseconds + , utp_gain_factor(1500) // bytes per rtt + , utp_min_timeout(500) // milliseconds + , utp_syn_resends(2) + , utp_fin_resends(2) + , utp_num_resends(6) + , utp_connect_timeout(3000) // milliseconds + , utp_delayed_ack(0) // milliseconds + , utp_dynamic_sock_buf(true) + , mixed_mode_algorithm(peer_proportional) + , rate_limit_utp(false) , listen_queue_size(5) {} @@ -856,6 +872,24 @@ namespace libtorrent // a network is known not to support multicast, this can be enabled bool broadcast_lsd; + // when set to true, libtorrent will try to make outgoing utp connections + bool enable_outgoing_utp; + + // if set to false, libtorrent will reject incoming utp connections + bool enable_incoming_utp; + + // when set to false, no outgoing TCP connections will be made + bool enable_outgoing_tcp; + + // if set to false, libtorrent will reject incoming tcp connections + bool enable_incoming_tcp; + + // the max number of peers we accept from pex messages from a single peer. + // this limits the number of concurrent peers any of our peers claims to + // be connected to. If they clain to be connected to more than this, we'll + // ignore any peer that exceeds this limit + int max_pex_peers; + // when set to true, the file modification time is ignored when loading // resume data. The resume data includes the expected timestamp of each // file and is typically compared to make sure the files haven't changed @@ -903,6 +937,57 @@ namespace libtorrent // the max number of connections in the session int connections_limit; + // target delay, milliseconds + int utp_target_delay; + + // max number of bytes to increase cwnd per rtt in uTP + // congestion controller + int utp_gain_factor; + + // the shortest allowed uTP connection timeout in milliseconds + // defaults to 500 milliseconds. The shorter timeout, the + // faster the connection recovers from a loss of an entire window + int utp_min_timeout; + + // the number of SYN packets that are sent before giving up + int utp_syn_resends; + + // the number of resent packets sent on a closed socket before giving up + int utp_fin_resends; + + // the number of times to send a packet before giving up + int utp_num_resends; + + // initial timeout for uTP SYN packets + int utp_connect_timeout; + + // number of milliseconds of delaying ACKing packets the most + int utp_delayed_ack; + + // set to true if the uTP socket buffer size is allowed to increase + // dynamically based on the NIC MTU setting. This is true by default + // and improves uTP performance for networks with larger frame sizes + // including loopback + bool utp_dynamic_sock_buf; + + enum bandwidth_mixed_algo_t + { + // disables the mixed mode bandwidth balancing + prefer_tcp = 0, + + // does not throttle uTP, throttles TCP to the same proportion + // of throughput as there are TCP connections + peer_proportional = 1 + + }; + // the algorithm to use to balance bandwidth between tcp + // connections and uTP connections + int mixed_mode_algorithm; + + // set to true if uTP connections should be rate limited + // defaults to false + bool rate_limit_utp; + // this is the number passed in to listen(). i.e. // the number of connections to accept while we're // not waiting in an accept() call. @@ -915,7 +1000,9 @@ namespace libtorrent dht_settings() : max_peers_reply(100) , search_branching(5) +#ifndef TORRENT_NO_DEPRECATE , service_port(0) +#endif , max_fail_count(20) , max_torrent_search_reply(20) {} @@ -928,9 +1015,11 @@ namespace libtorrent // searching the DHT. int search_branching; +#ifndef TORRENT_NO_DEPRECATE // the listen port for the dht. This is a UDP port. // zero means use the same as the tcp interface int service_port; +#endif // the maximum number of times a node can fail // in a row before it is removed from the table. diff --git a/include/libtorrent/session_status.hpp b/include/libtorrent/session_status.hpp index 9cb3849f8..188951503 100644 --- a/include/libtorrent/session_status.hpp +++ b/include/libtorrent/session_status.hpp @@ -35,6 +35,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "libtorrent/config.hpp" #include "libtorrent/size_type.hpp" +#include namespace libtorrent { @@ -53,6 +54,15 @@ namespace libtorrent #endif + struct utp_status + { + int num_idle; + int num_syn_sent; + int num_connected; + int num_fin_sent; + int num_close_wait; + }; + struct TORRENT_EXPORT session_status { bool has_incoming_connections; @@ -107,6 +117,8 @@ namespace libtorrent int dht_total_allocations; #endif + utp_status utp_stats; + int peerlist_size; }; diff --git a/include/libtorrent/sliding_average.hpp b/include/libtorrent/sliding_average.hpp index 8c0a689d2..3bbdb6c15 100644 --- a/include/libtorrent/sliding_average.hpp +++ b/include/libtorrent/sliding_average.hpp @@ -30,6 +30,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef TORRENT_SLIDING_AVERAGE_HPP_INCLUDED +#define TORRENT_SLIDING_AVERAGE_HPP_INCLUDED + namespace libtorrent { // a sliding average accumulator. Add samples to it and it @@ -70,3 +73,5 @@ private: } +#endif + diff --git a/include/libtorrent/socket.hpp b/include/libtorrent/socket.hpp index b3f7c6fb5..aeaa7e376 100644 --- a/include/libtorrent/socket.hpp +++ b/include/libtorrent/socket.hpp @@ -146,6 +146,40 @@ namespace libtorrent size_t size(Protocol const&) const { return sizeof(m_value); } char m_value; }; + +#if defined IP_DONTFRAG || defined IP_MTU_DISCOVER || defined IP_DONTFRAGMENT +#define TORRENT_HAS_DONT_FRAGMENT +#endif + +#ifdef TORRENT_HAS_DONT_FRAGMENT + struct dont_fragment + { + dont_fragment(bool val) +#ifdef IP_PMTUDISCOVER_DO + : m_value(val ? IP_PMTUDISC_DO : IP_PMTUDISC_DONT) {} +#else + : m_value(val) {} +#endif + template + int level(Protocol const&) const { return IPPROTO_IP; } + template + int name(Protocol const&) const +#if defined IP_DONTFRAG + { return IP_DONTFRAG; } +#elif defined IP_MTU_DISCOVER + { return IP_MTU_DISCOVER; } +#elif defined IP_DONTFRAGMENT + { return IP_DONTFRAGMENT; } +#else + {} +#endif + template + int const* data(Protocol const&) const { return &m_value; } + template + size_t size(Protocol const&) const { return sizeof(m_value); } + int m_value; + }; +#endif // TORRENT_HAS_DONT_FRAGMENT } #endif // TORRENT_SOCKET_HPP_INCLUDED diff --git a/include/libtorrent/socket_type.hpp b/include/libtorrent/socket_type.hpp index 26ddcee78..c6244e9be 100644 --- a/include/libtorrent/socket_type.hpp +++ b/include/libtorrent/socket_type.hpp @@ -38,6 +38,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "libtorrent/socks5_stream.hpp" #include "libtorrent/http_stream.hpp" #include "libtorrent/i2p_stream.hpp" +#include "libtorrent/utp_stream.hpp" #include "libtorrent/io_service.hpp" #include "libtorrent/max.hpp" #include "libtorrent/assert.hpp" @@ -96,6 +97,8 @@ POSSIBILITY OF SUCH DAMAGE. get()->x; break; \ case socket_type_int_impl::value: \ get()->x; break; \ + case socket_type_int_impl::value: \ + get()->x; break; \ TORRENT_SOCKTYPE_I2P_FORWARD(x) \ TORRENT_SOCKTYPE_SSL_FORWARD(x) \ default: TORRENT_ASSERT(false); \ @@ -109,6 +112,8 @@ POSSIBILITY OF SUCH DAMAGE. return get()->x; \ case socket_type_int_impl::value: \ return get()->x; \ + case socket_type_int_impl::value: \ + return get()->x; \ TORRENT_SOCKTYPE_I2P_FORWARD_RET(x, def) \ TORRENT_SOCKTYPE_SSL_FORWARD_RET(x, def) \ default: TORRENT_ASSERT(false); return def; \ @@ -133,36 +138,38 @@ namespace libtorrent struct socket_type_int_impl { enum { value = 3 }; }; + template <> + struct socket_type_int_impl + { enum { value = 4 }; }; + #if TORRENT_USE_I2P template <> struct socket_type_int_impl - { enum { value = 4 }; }; + { enum { value = 5 }; }; #endif #ifdef TORRENT_USE_OPENSSL template <> struct socket_type_int_impl > - { enum { value = 5 }; }; - - template <> - struct socket_type_int_impl > { enum { value = 6 }; }; template <> - struct socket_type_int_impl > + struct socket_type_int_impl > { enum { value = 7 }; }; + + template <> + struct socket_type_int_impl > + { enum { value = 8 }; }; #endif struct TORRENT_EXPORT socket_type { - typedef stream_socket::lowest_layer_type lowest_layer_type; typedef stream_socket::endpoint_type endpoint_type; typedef stream_socket::protocol_type protocol_type; explicit socket_type(io_service& ios): m_io_service(ios), m_type(0) {} ~socket_type(); - lowest_layer_type& lowest_layer(); io_service& get_io_service() const; bool is_open() const; @@ -253,10 +260,11 @@ namespace libtorrent io_service& m_io_service; int m_type; - enum { storage_size = max7< + enum { storage_size = max8< sizeof(stream_socket) , sizeof(socks5_stream) , sizeof(http_stream) + , sizeof(utp_stream) #if TORRENT_USE_I2P , sizeof(i2p_stream) #else diff --git a/include/libtorrent/timestamp_history.hpp b/include/libtorrent/timestamp_history.hpp new file mode 100644 index 000000000..0ad977d50 --- /dev/null +++ b/include/libtorrent/timestamp_history.hpp @@ -0,0 +1,80 @@ +/* + +Copyright (c) 2009, Arvid Norberg +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the distribution. + * Neither the name of the author nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef TIMESTAMP_HISTORY_HPP +#define TIMESTAMP_HISTORY_HPP + +#include "boost/cstdint.hpp" +#include "libtorrent/assert.hpp" + +namespace libtorrent { + +// timestamp history keeps a history of the lowest timestamps we've +// seen in the last 20 minutes +struct timestamp_history +{ + enum { history_size = 20 }; + + timestamp_history() : m_index(0), m_initialized(false), m_base(0), m_num_samples(0) {} + bool initialized() const { return m_initialized; } + + // add a sample to the timestamp history. If step is true, it's been + // a minute since the last step + boost::uint32_t add_sample(boost::uint32_t sample, bool step); + boost::uint32_t base() const { TORRENT_ASSERT(m_initialized); return m_base; } + void adjust_base(int change); + +private: + + // this is a circular buffer + boost::uint32_t m_history[history_size]; + + // and this is the index we're currently at + // in the circular buffer + boost::uint16_t m_index; + + bool m_initialized:1; + + // this is the lowest sample seen in the + // last 'history_size' minutes + boost::uint32_t m_base; + + // this is the number of samples since the + // last time we stepped one minute. If we + // don't have enough samples, we won't step + int m_num_samples; +}; + +} + +#endif + diff --git a/include/libtorrent/torrent.hpp b/include/libtorrent/torrent.hpp index 3ac9e1313..3a9360d39 100644 --- a/include/libtorrent/torrent.hpp +++ b/include/libtorrent/torrent.hpp @@ -91,6 +91,7 @@ namespace libtorrent struct tracker_request; struct add_torrent_params; struct storage_interface; + struct bt_peer_connection; namespace aux { @@ -136,6 +137,15 @@ namespace libtorrent // it will initialize the storage and the piece-picker void init(); + // find the peer that introduced us to the given endpoint. This is + // used when trying to holepunch. We need the introducer so that we + // can send a rendezvous connect message + bt_peer_connection* find_introducer(tcp::endpoint const& ep) const; + + // if we're connected to a peer at ep, return its peer connection + // only count BitTorrent peers + bt_peer_connection* find_peer(tcp::endpoint const& ep) const; + void on_resume_data_checked(int ret, disk_io_job const& j); void on_force_recheck(int ret, disk_io_job const& j); void on_piece_checked(int ret, disk_io_job const& j); @@ -291,7 +301,7 @@ namespace libtorrent tcp::endpoint get_interface() const; void connect_to_url_seed(std::list::iterator url); - bool connect_to_peer(policy::peer* peerinfo); + bool connect_to_peer(policy::peer* peerinfo, bool ignore_limit = false); void set_ratio(float r) { TORRENT_ASSERT(r >= 0.0f); m_ratio = r; } diff --git a/include/libtorrent/udp_socket.hpp b/include/libtorrent/udp_socket.hpp index 277996d6c..6dbc8cc36 100644 --- a/include/libtorrent/udp_socket.hpp +++ b/include/libtorrent/udp_socket.hpp @@ -82,13 +82,31 @@ namespace libtorrent proxy_settings const& get_proxy_settings() { return m_proxy_settings; } bool is_closed() const { return m_abort; } - tcp::endpoint local_endpoint() const + tcp::endpoint local_endpoint(error_code& ec) const { - error_code ec; udp::endpoint ep = m_ipv4_sock.local_endpoint(ec); return tcp::endpoint(ep.address(), ep.port()); } + void set_buf_size(int s); + + template + void set_option(SocketOption const& opt, error_code& ec) + { + m_ipv4_sock.set_option(opt, ec); +#if TORRENT_USE_IPV6 + m_ipv6_sock.set_option(opt, ec); +#endif + } + + template + void get_option(SocketOption& opt, error_code& ec) + { + m_ipv4_sock.get_option(opt, ec); + } + + udp::endpoint proxy_addr() const { return m_proxy_addr; } + protected: struct queued_packet @@ -129,6 +147,8 @@ namespace libtorrent void wrap(char const* hostname, int port, char const* p, int len, error_code& ec); void unwrap(error_code const& e, char const* buf, int size); + void maybe_realloc_buffers(); + #ifdef TORRENT_DEBUG #if defined BOOST_HAS_PTHREADS mutable pthread_t m_thread; @@ -146,12 +166,14 @@ namespace libtorrent udp::socket m_ipv4_sock; udp::endpoint m_v4_ep; - char m_v4_buf[1600]; + int m_v4_buf_size; + char* m_v4_buf; #if TORRENT_USE_IPV6 udp::socket m_ipv6_sock; udp::endpoint m_v6_ep; - char m_v6_buf[1600]; + int m_v6_buf_size; + char* m_v6_buf; #endif int m_bind_port; @@ -166,6 +188,11 @@ namespace libtorrent bool m_queue_packets; bool m_tunnel_packets; bool m_abort; + // this is set to true to indicate that the m_v4_buf + // and m_v6_buf should be reallocated to the size + // of the buffer size members the next time their + // read handler gets triggered + bool m_reallocate_buffers; udp::endpoint m_proxy_addr; // while we're connecting to the proxy // we have to queue the packets, we'll flush diff --git a/include/libtorrent/utp_socket_manager.hpp b/include/libtorrent/utp_socket_manager.hpp new file mode 100644 index 000000000..ad8adbcea --- /dev/null +++ b/include/libtorrent/utp_socket_manager.hpp @@ -0,0 +1,116 @@ +/* + +Copyright (c) 2009, Arvid Norberg +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the distribution. + * Neither the name of the author nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef TORRENT_UTP_SOCKET_MANAGER_HPP_INCLUDED +#define TORRENT_UTP_SOCKET_MANAGER_HPP_INCLUDED + +#include + +#include "libtorrent/socket_type.hpp" +#include "libtorrent/session_status.hpp" +#include "libtorrent/enum_net.hpp" + +namespace libtorrent +{ + class udp_socket; + class utp_stream; + struct utp_socket_impl; + + typedef boost::function const&)> incoming_utp_callback_t; + + struct utp_socket_manager + { + utp_socket_manager(session_settings const& sett, udp_socket& s, incoming_utp_callback_t cb); + ~utp_socket_manager(); + + void get_status(utp_status& s) const; + + // return false if this is not a uTP packet + bool incoming_packet(char const* p, int size, udp::endpoint const& ep); + + void tick(ptime now); + + tcp::endpoint local_endpoint(error_code& ec) const; + + // flags for send_packet + enum { dont_fragment = 1 }; + void send_packet(udp::endpoint const& ep, char const* p, int len + , error_code& ec, int flags = 0); + + // internal, used by utp_stream + void remove_socket(boost::uint16_t id); + + utp_socket_impl* new_utp_socket(utp_stream* str); + int gain_factor() const { return m_sett.utp_gain_factor; } + int target_delay() const { return m_sett.utp_target_delay * 1000; } + int syn_resends() const { return m_sett.utp_syn_resends; } + int fin_resends() const { return m_sett.utp_fin_resends; } + int num_resends() const { return m_sett.utp_num_resends; } + int connect_timeout() const { return m_sett.utp_connect_timeout; } + int delayed_ack() const { return m_sett.utp_delayed_ack; } + int min_timeout() const { return m_sett.utp_min_timeout; } + bool allow_dynamic_sock_buf() const { return m_sett.utp_dynamic_sock_buf; } + + void mtu_for_dest(address const& addr, int& link_mtu, int& utp_mtu); + void set_sock_buf(int size); + + private: + udp_socket& m_sock; + incoming_utp_callback_t m_cb; + + // replace with a hash-map + typedef std::multimap socket_map_t; + socket_map_t m_utp_sockets; + + // the last socket we received a packet on + utp_socket_impl* m_last_socket; + + int m_new_connection; + + session_settings const& m_sett; + + // this is a copy of the routing table, used + // to initialize MTU sizes of uTP sockets + std::vector m_routes; + + // the timestamp for the last time we updated + // the routing table + ptime m_last_route_update; + + // the buffer size of the socket. This is used + // to now lower the buffer size + int m_sock_buf_size; + }; +} + +#endif + diff --git a/include/libtorrent/utp_stream.hpp b/include/libtorrent/utp_stream.hpp new file mode 100644 index 000000000..e2d3157a0 --- /dev/null +++ b/include/libtorrent/utp_stream.hpp @@ -0,0 +1,386 @@ +/* + +Copyright (c) 2009, Arvid Norberg +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the distribution. + * Neither the name of the author nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef TORRENT_UTP_STREAM_HPP_INCLUDED +#define TORRENT_UTP_STREAM_HPP_INCLUDED + +#include "libtorrent/connection_queue.hpp" +#include "libtorrent/proxy_base.hpp" +#include "libtorrent/udp_socket.hpp" +#include "libtorrent/io.hpp" +#include "libtorrent/packet_buffer.hpp" +#include "libtorrent/error_code.hpp" + +#include +#include +#include + +#define CCONTROL_TARGET 100 + +namespace libtorrent +{ + struct utp_socket_manager; + + // some MTU and protocol header sizes constants + enum + { + TORRENT_IPV4_HEADER = 20, + TORRENT_IPV6_HEADER = 40, + TORRENT_UDP_HEADER = 8, + TORRENT_SOCKS5_HEADER = 6, // plus the size of the destination address + + TORRENT_ETHERNET_MTU = 1500, + TORRENT_TEREDO_MTU = 1280, + TORRENT_INET_MIN_MTU = 576, + TORRENT_INET_MAX_MTU = 0xffff + }; + + // the point of the bif_endian_int is two-fold + // one purpuse is to not have any alignment requirements + // so that any byffer received from the network can be cast + // to it and read as an integer of various sizes without + // triggering a bus error. The other purpose is to convert + // from network byte order to host byte order when read and + // written, to offer a convenient interface to both interpreting + // and writing network packets + template struct big_endian_int + { + big_endian_int& operator=(T v) + { + char* p = m_storage; + detail::write_impl(v, p); + return *this; + } + operator T() const + { + const char* p = m_storage; + return detail::read_impl(p, detail::type()); + } + private: + char m_storage[sizeof(T)]; + }; + + typedef big_endian_int be_uint64; + typedef big_endian_int be_uint32; + typedef big_endian_int be_uint16; + typedef big_endian_int be_int64; + typedef big_endian_int be_int32; + typedef big_endian_int be_int16; + +/* + uTP header from BEP 29 + + 0 4 8 16 24 32 + +-------+-------+---------------+---------------+---------------+ + | type | ver | extension | connection_id | + +-------+-------+---------------+---------------+---------------+ + | timestamp_microseconds | + +---------------+---------------+---------------+---------------+ + | timestamp_difference_microseconds | + +---------------+---------------+---------------+---------------+ + | wnd_size | + +---------------+---------------+---------------+---------------+ + | seq_nr | ack_nr | + +---------------+---------------+---------------+---------------+ + +*/ + + enum type { ST_DATA = 0, ST_FIN, ST_STATE, ST_RESET, ST_SYN, NUM_TYPES }; + + struct utp_header + { + unsigned char type_ver; + unsigned char extension; + be_uint16 connection_id; + be_uint32 timestamp_microseconds; + be_uint32 timestamp_difference_microseconds; + be_uint32 wnd_size; + be_uint16 seq_nr; + be_uint16 ack_nr; + + int get_type() const { return type_ver >> 4; } + int get_version() const { return type_ver & 0xf; } + }; + +struct utp_socket_impl; + +utp_socket_impl* construct_utp_impl(boost::uint16_t recv_id + , boost::uint16_t send_id, void* userdata + , utp_socket_manager* sm); +void detach_utp_impl(utp_socket_impl* s); +void delete_utp_impl(utp_socket_impl* s); +bool should_delete(utp_socket_impl* s); +void tick_utp_impl(utp_socket_impl* s, ptime const& now); +void utp_init_mtu(utp_socket_impl* s, int link_mtu, int utp_mtu); +bool utp_incoming_packet(utp_socket_impl* s, char const* p + , int size, udp::endpoint const& ep, ptime receive_time); +bool utp_match(utp_socket_impl* s, udp::endpoint const& ep, boost::uint16_t id); +udp::endpoint utp_remote_endpoint(utp_socket_impl* s); +boost::uint16_t utp_receive_id(utp_socket_impl* s); +int utp_socket_state(utp_socket_impl const* s); + +#if defined TORRENT_VERBOSE_LOGGING || defined TORRENT_LOGGING || defined TORRENT_ERROR_LOGGING +int socket_impl_size(); +#endif + +// this is the user-level stream interface to utp sockets. +// the reason why it's split up in a utp_stream class and +// an implementation class is because the socket state has +// to be able to out-live the user level socket. For instance +// when sending data on a stream and then closing it, the +// state holding the send buffer has to be kept around until +// it has been flushed, which may be longer than the client +// will keep the utp_stream object around for. +// for more details, see utp_socket_impl, which is analogous +// to the kernel state for a socket. It's defined in utp_stream.cpp +class utp_stream +{ +public: + + typedef stream_socket::endpoint_type endpoint_type; + typedef stream_socket::protocol_type protocol_type; + + explicit utp_stream(asio::io_service& io_service); + ~utp_stream(); + + // used for incoming connections + void set_impl(utp_socket_impl* s); + utp_socket_impl* get_impl(); + +#ifndef BOOST_NO_EXCEPTIONS + template + void io_control(IO_Control_Command& ioc) {} +#endif + + template + void io_control(IO_Control_Command& ioc, error_code& ec) {} + +#ifndef BOOST_NO_EXCEPTIONS + void bind(endpoint_type const& endpoint) {} +#endif + + void bind(endpoint_type const& endpoint, error_code& ec); + +#ifndef BOOST_NO_EXCEPTIONS + template + void set_option(SettableSocketOption const& opt) {} +#endif + + template + error_code set_option(SettableSocketOption const& opt, error_code& ec) { return ec; } + + void close(); + void close(error_code const& ec) { close(); } + bool is_open() const { return m_open; } + + int read_buffer_size() const; + static void on_read(void* self, size_t bytes_transferred, error_code const& ec, bool kill); + static void on_write(void* self, size_t bytes_transferred, error_code const& ec, bool kill); + static void on_connect(void* self, error_code const& ec, bool kill); + + typedef void(*handler_t)(void*, size_t, error_code const&, bool); + typedef void(*connect_handler_t)(void*, error_code const&, bool); + + void add_read_buffer(void* buf, size_t len); + void set_read_handler(handler_t h); + void add_write_buffer(void const* buf, size_t len); + void set_write_handler(handler_t h); + size_t read_some(bool clear_buffers); + + void do_connect(tcp::endpoint const& ep, connect_handler_t h); + + endpoint_type local_endpoint() const + { + error_code ec; + return local_endpoint(ec); + } + + endpoint_type local_endpoint(error_code& ec) const; + + endpoint_type remote_endpoint() const + { + error_code ec; + return remote_endpoint(ec); + } + + endpoint_type remote_endpoint(error_code& ec) const; + + std::size_t available() const; + std::size_t available(error_code& ec) const { return available(); } + + asio::io_service& io_service() + { return m_io_service; } + + template + void async_connect(endpoint_type const& endpoint, Handler const& handler) + { + if (!endpoint.address().is_v4()) + { + error_code ec = asio::error::operation_not_supported; + m_io_service.post(boost::bind(handler, asio::error::operation_not_supported, 0)); + return; + } + + if (m_impl == 0) + { + m_io_service.post(boost::bind(handler, asio::error::not_connected, 0)); + return; + } + + m_connect_handler = handler; + do_connect(endpoint, &utp_stream::on_connect); + } + + template + void async_read_some(Mutable_Buffers const& buffers, Handler const& handler) + { + if (m_impl == 0) + { + m_io_service.post(boost::bind(handler, asio::error::not_connected, 0)); + return; + } + + TORRENT_ASSERT(!m_read_handler); + if (m_read_handler) + { + m_io_service.post(boost::bind(handler, asio::error::operation_not_supported, 0)); + return; + } + for (typename Mutable_Buffers::const_iterator i = buffers.begin() + , end(buffers.end()); i != end; ++i) + { + TORRENT_ASSERT(buffer_size(*i) > 0); + using asio::buffer_cast; + using asio::buffer_size; + add_read_buffer(buffer_cast(*i), buffer_size(*i)); + } + m_read_handler = handler; + set_read_handler(&utp_stream::on_read); + } + + void do_async_connect(endpoint_type const& ep + , boost::function const& handler); + + template + void open(Protocol const& p, error_code& ec) + { m_open = true; } + + template + void open(Protocol const& p) + { m_open = true; } + + template + std::size_t read_some(Mutable_Buffers const& buffers, error_code& ec) + { + TORRENT_ASSERT(!m_read_handler); + if (m_impl == 0) + { + ec = asio::error::not_connected; + return 0; + } + + if (read_buffer_size() == 0) + { + ec = asio::error::would_block; + return 0; + } +#ifdef TORRENT_DEBUG + int buf_size = 0; +#endif + + for (typename Mutable_Buffers::const_iterator i = buffers.begin() + , end(buffers.end()); i != end; ++i) + { + using asio::buffer_cast; + using asio::buffer_size; + add_read_buffer(buffer_cast(*i), buffer_size(*i)); +#ifdef TORRENT_DEBUG + buf_size += buffer_size(*i); +#endif + } + std::size_t ret = read_some(true); + TORRENT_ASSERT(int(ret) <= buf_size); + TORRENT_ASSERT(ret > 0); + return ret; + } + + template + std::size_t write_some(Const_Buffers const& buffers, error_code& ec) + { + // TODO: implement + return 0; + } + + template + void async_write_some(Const_Buffers const& buffers, Handler const& handler) + { + if (m_impl == 0) + { + m_io_service.post(boost::bind(handler, asio::error::not_connected, 0)); + return; + } + + TORRENT_ASSERT(!m_write_handler); + if (m_write_handler) + { + m_io_service.post(boost::bind(handler, asio::error::operation_not_supported, 0)); + return; + } + + for (typename Const_Buffers::const_iterator i = buffers.begin() + , end(buffers.end()); i != end; ++i) + { + TORRENT_ASSERT(buffer_size(*i) > 0); + using asio::buffer_cast; + using asio::buffer_size; + add_write_buffer((void*)buffer_cast(*i), buffer_size(*i)); + } + m_write_handler = handler; + set_write_handler(&utp_stream::on_write); + } + +//private: + + void cancel_handlers(error_code const&); + + boost::function1 m_connect_handler; + boost::function2 m_read_handler; + boost::function2 m_write_handler; + + asio::io_service& m_io_service; + utp_socket_impl* m_impl; + bool m_open; +}; + +} + +#endif diff --git a/parse_sample.py b/parse_sample.py index 3258c9ea3..6310eb38c 100644 --- a/parse_sample.py +++ b/parse_sample.py @@ -83,6 +83,12 @@ for l in f: if 'std::string::append' in fun: fold = indentation if 'getipnodebyname' == fun: fold = indentation if '__gnu_debug::_Safe_iteratorget() + ? peer_info::bittorrent_utp + : peer_info::standard_bittorrent; } bool bt_peer_connection::in_handshake() const @@ -657,6 +659,7 @@ namespace libtorrent void bt_peer_connection::append_const_send_buffer(char const* buffer, int size) { + TORRENT_ASSERT(!m_rc4_encrypted || send_buffer_size() == m_encrypted_bytes); // if we're encrypting this buffer, we need to make a copy // since we'll mutate it #ifndef TORRENT_DISABLE_ENCRYPTION @@ -1349,7 +1352,7 @@ namespace libtorrent m_supports_dht_port = true; #ifndef TORRENT_DISABLE_DHT if (m_supports_dht_port && m_ses.m_dht) - write_dht_port(m_ses.get_dht_settings().service_port); + write_dht_port(m_ses.m_external_udp_port); #endif } } @@ -1443,6 +1446,203 @@ namespace libtorrent incoming_allowed_fast(index); } + // ----------------------------- + // -------- RENDEZVOUS --------- + // ----------------------------- + + void bt_peer_connection::on_holepunch() + { + INVARIANT_CHECK; + + if (!packet_finished()) return; + + // we can't accept holepunch messages from peers + // that don't support the holepunch extension + // because we wouldn't be able to respond + if (m_holepunch_id == 0) return; + + buffer::const_interval recv_buffer = receive_buffer(); + TORRENT_ASSERT(*recv_buffer.begin == msg_extended); + ++recv_buffer.begin; + TORRENT_ASSERT(*recv_buffer.begin == holepunch_msg); + ++recv_buffer.begin; + + const char* ptr = recv_buffer.begin; + + // ignore invalid messages + if (recv_buffer.left() < 2) return; + + int msg_type = detail::read_uint8(ptr); + int addr_type = detail::read_uint8(ptr); + + tcp::endpoint ep; + + if (addr_type == 0) + { + if (recv_buffer.left() < 2 + 4 + 2) return; + // IPv4 address + ep = detail::read_v4_endpoint(ptr); + } +#if TORRENT_USE_IPV6 + else if (addr_type == 1) + { + // IPv6 address + if (recv_buffer.left() < 2 + 18 + 2) return; + ep = detail::read_v6_endpoint(ptr); + } +#endif + else + { +#if defined TORRENT_VERBOSE_LOGGING || defined TORRENT_LOGGING + error_code ec; + static const char* hp_msg_name[] = {"rendezvous", "connect", "failed"}; + (*m_logger) << time_now_string() << " <== HOLEPUNCH [ msg:" + << (msg_type >= 0 && msg_type < 3 ? hp_msg_name[msg_type] : "unknown message type") + << " from:" << remote().address().to_string(ec) + << " to: unknown address type ]\n"; +#endif + + return; // unknown address type + } + + boost::shared_ptr t = associated_torrent().lock(); + if (!t) return; + + switch (msg_type) + { + case hp_rendezvous: // rendezvous + { +#if defined TORRENT_VERBOSE_LOGGING || defined TORRENT_LOGGING + error_code ec; + (*m_logger) << time_now_string() << " <== HOLEPUNCH [ msg:rendezvous" + << " to:" << ep.address().to_string(ec) << " ]\n"; +#endif + // this peer is asking us to introduce it to + // the peer at 'ep'. We need to find which of + // our connections points to that endpoint + bt_peer_connection* p = t->find_peer(ep); + if (p == 0) + { + // we're not connected to this peer + write_holepunch_msg(hp_failed, ep, hp_not_connected); + break; + } + if (!p->supports_holepunch()) + { + write_holepunch_msg(hp_failed, ep, hp_no_support); + break; + } + if (p == this) + { + write_holepunch_msg(hp_failed, ep, hp_no_self); + break; + } + + write_holepunch_msg(hp_connect, ep, 0); + p->write_holepunch_msg(hp_connect, remote(), 0); + } break; + case hp_connect: + { + // add or find the peer with this endpoint + policy::peer* p = t->get_policy().add_peer(ep, peer_id(0), peer_info::pex, 0); + if (p == 0 || p->connection) + { +#if defined TORRENT_VERBOSE_LOGGING || defined TORRENT_LOGGING + error_code ec; + (*m_logger) << time_now_string() << " <== HOLEPUNCH [ msg:connect" + << " to:" << ep.address().to_string(ec) << " error:failed to add peer ]\n"; +#endif + // we either couldn't add this peer, or it's + // already connected. Just ignore the connect message + break; + } + if (p->banned) + { +#if defined TORRENT_VERBOSE_LOGGING || defined TORRENT_LOGGING + error_code ec; + (*m_logger) << time_now_string() << " <== HOLEPUNCH [ msg:connect" + << " to:" << ep.address().to_string(ec) << " error:peer banned ]\n"; +#endif + // this peer is banned, don't connect to it + break; + + } + // to make sure we use the uTP protocol + p->supports_utp = true; + // #error make sure we make this a connection candidate + // in case it has too many failures for instance + t->connect_to_peer(p, true); + // mark this connection to be in holepunch mode + // so that it will retry faster and stick to uTP while it's + // retrying + if (p->connection) + p->connection->set_holepunch_mode(); +#if defined TORRENT_VERBOSE_LOGGING || defined TORRENT_LOGGING + error_code ec; + (*m_logger) << time_now_string() << " <== HOLEPUNCH [ msg:connect" + << " to:" << ep.address().to_string(ec) << " ]\n"; +#endif + } break; + case hp_failed: + { + boost::uint32_t error = detail::read_uint32(ptr); +#if defined TORRENT_VERBOSE_LOGGING || defined TORRENT_LOGGING + error_code ec; + char const* err_msg[] = {"no such peer", "not connected", "no support", "no self"}; + (*m_logger) << time_now_string() << " <== HOLEPUNCH [ msg:failed" + " error:" << error << + " msg:" << ((error >= 0 && error < 4)?err_msg[error]:"unknown message id") << + " ]\n"; +#endif + // #error deal with holepunch errors + } break; +#if defined TORRENT_VERBOSE_LOGGING || defined TORRENT_LOGGING + default: + { + error_code ec; + (*m_logger) << time_now_string() << " <== HOLEPUNCH [" + " msg:unknown message type (" << msg_type << ")" + << " to:" << ep.address().to_string(ec) << " ]\n"; + } +#endif + } + } + + void bt_peer_connection::write_holepunch_msg(int type, tcp::endpoint const& ep, int error) + { + char buf[35]; + char* ptr = buf + 6; + detail::write_uint8(type, ptr); + if (ep.address().is_v4()) detail::write_uint8(0, ptr); + else detail::write_uint8(1, ptr); + detail::write_endpoint(ep, ptr); + +#if defined TORRENT_VERBOSE_LOGGING || defined TORRENT_LOGGING + error_code ec; + static const char* hp_msg_name[] = {"rendezvous", "connect", "failed"}; + static const char* hp_error_string[] = {"", "no such peer", "not connected", "no support", "no self"}; + (*m_logger) << time_now_string() << " ==> HOLEPUNCH [ msg:" + << (type >= 0 && type < 3 ? hp_msg_name[type] : "unknown message type") + << " to:" << ep.address().to_string(ec) + << " error:" << hp_error_string[error] + << " ]\n"; +#endif + if (type == hp_failed) + { + detail::write_uint32(error, ptr); + } + + // write the packet length and type + char* hdr = buf; + detail::write_uint32(ptr - buf - 4, hdr); + detail::write_uint8(msg_extended, hdr); + detail::write_uint8(m_holepunch_id, hdr); + + TORRENT_ASSERT(ptr <= buf + sizeof(buf)); + + send_buffer(buf, ptr - buf); + } + // ----------------------------- // --------- EXTENDED ---------- // ----------------------------- @@ -1479,6 +1679,31 @@ namespace libtorrent return; } + if (extended_id == upload_only_msg) + { + if (!packet_finished()) return; + bool ul = detail::read_uint8(recv_buffer.begin); +#ifdef TORRENT_VERBOSE_LOGGING + (*m_logger) << time_now_string() << " <== UPLOAD_ONLY [ " << (ul?"true":"false") << " ]\n"; +#endif + set_upload_only(ul); + return; + } + + if (extended_id == holepunch_msg) + { + if (!packet_finished()) return; + on_holepunch(); + return; + } + +#ifdef TORRENT_VERBOSE_LOGGING + if (packet_finished()) + (*m_logger) << time_now_string() << " <== EXTENSION MESSAGE [" + " msg:" << extended_id << + " size:" << packet_size() << " ]\n"; +#endif + #ifndef TORRENT_DISABLE_EXTENSIONS for (extension_list_t::iterator i = m_extensions.begin() , end(m_extensions.end()); i != end; ++i) @@ -1549,7 +1774,10 @@ namespace libtorrent // upload_only if (lazy_entry const* m = root.dict_find_dict("m")) + { m_upload_only_id = m->dict_find_int_value("upload_only", 0); + m_holepunch_id = m->dict_find_int_value("ut_holepunch", 0); + } // there is supposed to be a remote listen port int listen_port = root.dict_find_int_value("p"); @@ -1929,7 +2157,9 @@ namespace libtorrent TORRENT_ASSERT(t); m["upload_only"] = upload_only_msg; + m["ut_holepunch"] = holepunch_msg; m["share_mode"] = share_mode_msg; + int complete_ago = -1; if (t->last_seen_complete() > 0) complete_ago = t->time_since_complete(); handshake["complete_ago"] = complete_ago; @@ -1968,6 +2198,19 @@ namespace libtorrent (*i)->add_handshake(handshake); } +#ifndef NDEBUG + // make sure there are not conflicting extensions + std::set ext; + for (entry::dictionary_type::const_iterator i = m.begin() + , end(m.end()); i != end; ++i) + { + if (i->second.type() != entry::int_t) continue; + int val = i->second.integer(); + TORRENT_ASSERT(ext.find(val) == ext.end()); + ext.insert(val); + } +#endif + std::vector msg; bencode(std::back_inserter(msg), handshake); @@ -1986,9 +2229,9 @@ namespace libtorrent TORRENT_ASSERT(i.begin == i.end); #if defined TORRENT_VERBOSE_LOGGING && TORRENT_USE_IOSTREAM - std::stringstream ext; - handshake.print(ext); - (*m_logger) << time_now_string() << " ==> EXTENDED HANDSHAKE: \n" << ext.str(); + std::stringstream handshake_str; + handshake.print(handshake_str); + (*m_logger) << time_now_string() << " ==> EXTENDED HANDSHAKE: \n" << handshake_str.str(); #endif setup_send(); @@ -2198,6 +2441,8 @@ namespace libtorrent (*m_logger) << time_now_string() << " received DH key\n"; #endif + TORRENT_ASSERT(!m_rc4_encrypted || send_buffer_size() == m_encrypted_bytes); + // PadA/B can be a max of 512 bytes, and 20 bytes more for // the sync hash (if incoming), or 8 bytes more for the // encrypted verification constant (if outgoing). Instead @@ -2981,7 +3226,7 @@ namespace libtorrent write_bitfield(); #ifndef TORRENT_DISABLE_DHT if (m_supports_dht_port && m_ses.m_dht) - write_dht_port(m_ses.get_dht_settings().service_port); + write_dht_port(m_ses.m_external_udp_port); #endif } diff --git a/src/enum_net.cpp b/src/enum_net.cpp index 4de007dbc..d786af0b0 100644 --- a/src/enum_net.cpp +++ b/src/enum_net.cpp @@ -33,6 +33,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "libtorrent/config.hpp" #include #include +#include // for wcstombscstombs #include "libtorrent/enum_net.hpp" #include "libtorrent/broadcast_socket.hpp" #include "libtorrent/error_code.hpp" @@ -84,31 +85,44 @@ POSSIBILITY OF SUCH DAMAGE. namespace libtorrent { namespace { - address inaddr_to_address(in_addr const* ina) + address inaddr_to_address(in_addr const* ina, int len = 4) { typedef asio::ip::address_v4::bytes_type bytes_t; bytes_t b; - std::memcpy(&b[0], ina, b.size()); + std::memset(&b[0], 0, b.size()); + if (len > 0) std::memcpy(&b[0], ina, (std::min)(len, int(b.size()))); return address_v4(b); } #if TORRENT_USE_IPV6 - address inaddr6_to_address(in6_addr const* ina6) + address inaddr6_to_address(in6_addr const* ina6, int len = 16) { typedef asio::ip::address_v6::bytes_type bytes_t; bytes_t b; - std::memcpy(&b[0], ina6, b.size()); + std::memset(&b[0], 0, b.size()); + if (len > 0) std::memcpy(&b[0], ina6, (std::min)(len, int(b.size()))); return address_v6(b); } #endif - address sockaddr_to_address(sockaddr const* sin) + int sockaddr_len(sockaddr const* sin) { - if (sin->sa_family == AF_INET) - return inaddr_to_address(&((sockaddr_in const*)sin)->sin_addr); +#if defined TORRENT_WINDOWS || TORRENT_MINGW || defined TORRENT_LINUX + return sin->sa_family == AF_INET ? sizeof(sockaddr_in) : sizeof(sockaddr_in6); +#else + return sin->sa_len; +#endif + } + + address sockaddr_to_address(sockaddr const* sin, int assume_family = -1) + { + if (sin->sa_family == AF_INET || assume_family == AF_INET) + return inaddr_to_address(&((sockaddr_in const*)sin)->sin_addr + , sockaddr_len(sin) - offsetof(sockaddr, sa_data)); #if TORRENT_USE_IPV6 - else if (sin->sa_family == AF_INET6) - return inaddr6_to_address(&((sockaddr_in6 const*)sin)->sin6_addr); + else if (sin->sa_family == AF_INET6 || assume_family == AF_INET6) + return inaddr6_to_address(&((sockaddr_in6 const*)sin)->sin6_addr + , sockaddr_len(sin) - offsetof(sockaddr, sa_data)); #endif return address(); } @@ -142,13 +156,15 @@ namespace libtorrent { namespace return msg_len; } - bool parse_route(nlmsghdr* nl_hdr, ip_route* rt_info) + bool parse_route(int s, nlmsghdr* nl_hdr, ip_route* rt_info) { rtmsg* rt_msg = (rtmsg*)NLMSG_DATA(nl_hdr); - if((rt_msg->rtm_family != AF_INET) || (rt_msg->rtm_table != RT_TABLE_MAIN)) + if((rt_msg->rtm_family != AF_INET && rt_msg->rtm_family != AF_INET6) || (rt_msg->rtm_table != RT_TABLE_MAIN + && rt_msg->rtm_table != RT_TABLE_LOCAL)) return false; + int if_index = 0; int rt_len = RTM_PAYLOAD(nl_hdr); for (rtattr* rt_attr = (rtattr*)RTM_RTA(rt_msg); RTA_OK(rt_attr,rt_len); rt_attr = RTA_NEXT(rt_attr,rt_len)) @@ -156,23 +172,52 @@ namespace libtorrent { namespace switch(rt_attr->rta_type) { case RTA_OIF: - if_indextoname(*(int*)RTA_DATA(rt_attr), rt_info->name); + if_index = *(int*)RTA_DATA(rt_attr); break; case RTA_GATEWAY: - rt_info->gateway = address_v4(ntohl(*(u_int*)RTA_DATA(rt_attr))); +#if TORRENT_USE_IPV6 + if (rt_msg->rtm_family == AF_INET6) + { + rt_info->gateway = inaddr6_to_address((in6_addr*)RTA_DATA(rt_attr)); + } + else +#endif + { + rt_info->gateway = inaddr_to_address((in_addr*)RTA_DATA(rt_attr)); + } break; case RTA_DST: - rt_info->destination = address_v4(ntohl(*(u_int*)RTA_DATA(rt_attr))); +#if TORRENT_USE_IPV6 + if (rt_msg->rtm_family == AF_INET6) + { + rt_info->destination = inaddr6_to_address((in6_addr*)RTA_DATA(rt_attr)); + } + else +#endif + { + rt_info->destination = inaddr_to_address((in_addr*)RTA_DATA(rt_attr)); + } break; } } + + if_indextoname(if_index, rt_info->name); + ifreq req; + memset(&req, 0, sizeof(req)); + if_indextoname(if_index, req.ifr_name); + ioctl(s, SIOCGIFMTU, &req); + rt_info->mtu = req.ifr_mtu; +// obviously this doesn't work correctly. How do you get the netmask for a route? +// if (ioctl(s, SIOCGIFNETMASK, &req) == 0) { +// rt_info->netmask = sockaddr_to_address(&req.ifr_addr, req.ifr_addr.sa_family); +// } return true; } #endif #if defined TORRENT_BSD - bool parse_route(rt_msghdr* rtm, ip_route* rt_info) + bool parse_route(int s, rt_msghdr* rtm, ip_route* rt_info) { sockaddr* rti_info[RTAX_MAX]; sockaddr* sa = (sockaddr*)(rtm + 1); @@ -205,9 +250,17 @@ namespace libtorrent { namespace return false; rt_info->gateway = sockaddr_to_address(rti_info[RTAX_GATEWAY]); - rt_info->netmask = sockaddr_to_address(rti_info[RTAX_NETMASK]); rt_info->destination = sockaddr_to_address(rti_info[RTAX_DST]); + rt_info->netmask = sockaddr_to_address(rti_info[RTAX_NETMASK] + , rt_info->destination.is_v4() ? AF_INET : AF_INET6); if_indextoname(rtm->rtm_index, rt_info->name); + + ifreq req; + memset(&req, 0, sizeof(req)); + if_indextoname(rtm->rtm_index, req.ifr_name); + if (ioctl(s, SIOCGIFMTU, &req) < 0) return false; + rt_info->mtu = req.ifr_mtu; + return true; } #endif @@ -231,17 +284,29 @@ namespace libtorrent { namespace namespace libtorrent { - bool in_subnet(address const& addr, ip_interface const& iface) + // return (a1 & mask) == (a2 & mask) + bool match_addr_mask(address const& a1, address const& a2, address const& mask) { - if (addr.is_v4() != iface.interface_address.is_v4()) return false; - // since netmasks seems unreliable for IPv6 interfaces - // (MacOS X returns AF_INET addresses as bitmasks) assume - // that any IPv6 address belongs to the subnet of any - // interface with an IPv6 address - if (addr.is_v6()) return true; + // all 3 addresses needs to belong to the same family + if (a1.is_v4() != a2.is_v4()) return false; + if (a1.is_v4() != mask.is_v4()) return false; - return (addr.to_v4().to_ulong() & iface.netmask.to_v4().to_ulong()) - == (iface.interface_address.to_v4().to_ulong() & iface.netmask.to_v4().to_ulong()); +#if TORRENT_USE_IPV6 + if (a1.is_v6()) + { + address_v6::bytes_type b1; + address_v6::bytes_type b2; + address_v6::bytes_type m; + b1 = a1.to_v6().to_bytes(); + b2 = a2.to_v6().to_bytes(); + m = mask.to_v6().to_bytes(); + for (int i = 0; i < b1.size(); ++i) + b1[i] &= m[i]; + return memcmp(&b1[0], &b2[0], b1.size()); + } +#endif + return (a1.to_v4().to_ulong() & mask.to_v4().to_ulong()) + == (a2.to_v4().to_ulong() & mask.to_v4().to_ulong()); } bool in_local_network(io_service& ios, address const& addr, error_code& ec) @@ -251,10 +316,56 @@ namespace libtorrent for (std::vector::iterator i = net.begin() , end(net.end()); i != end; ++i) { - if (in_subnet(addr, *i)) return true; + if (match_addr_mask(addr, i->interface_address, i->netmask)) return true; } return false; } + +#if defined TORRENT_WINDOWS || defined TORRENT_MINGW + address build_netmask(int bits, int family) + { + if (family == AF_INET) + { + typedef asio::ip::address_v4::bytes_type bytes_t; + bytes_t b; + std::memset(&b[0], 0xff, b.size()); + for (int i = sizeof(bytes_t)/8-1; i > 0; --i) + { + if (bits < 8) + { + b[i] <<= bits; + break; + } + b[i] = 0; + bits -= 8; + } + return address_v4(b); + } +#if TORRENT_USE_IPV6 + else if (family == AF_INET6) + { + typedef asio::ip::address_v6::bytes_type bytes_t; + bytes_t b; + std::memset(&b[0], 0xff, b.size()); + for (int i = sizeof(bytes_t)/8-1; i > 0; --i) + { + if (bits < 8) + { + b[i] <<= bits; + break; + } + b[i] = 0; + bits -= 8; + } + return address_v6(b); + } +#endif + else + { + return address(); + } + } +#endif std::vector enum_net_interfaces(io_service& ios, error_code& ec) { @@ -295,8 +406,20 @@ namespace libtorrent iface.interface_address = sockaddr_to_address(&item.ifr_addr); strcpy(iface.name, item.ifr_name); - ifreq netmask = item; - if (ioctl(s, SIOCGIFNETMASK, &netmask) < 0) + ifreq req; + memset(&req, 0, sizeof(req)); + strncpy(req.ifr_name, item.ifr_name, IF_NAMESIZE); + if (ioctl(s, SIOCGIFMTU, &req) < 0) + { + ec = error_code(errno, asio::error::system_category); + close(s); + return ret; + } + iface.mtu = req.ifr_mtu; + + memset(&req, 0, sizeof(req)); + strncpy(req.ifr_name, item.ifr_name, IF_NAMESIZE); + if (ioctl(s, SIOCGIFNETMASK, &req) < 0) { #if TORRENT_USE_IPV6 if (iface.interface_address.is_v6()) @@ -314,7 +437,7 @@ namespace libtorrent } else { - iface.netmask = sockaddr_to_address(&netmask.ifr_addr); + iface.netmask = sockaddr_to_address(&req.ifr_addr, item.ifr_addr.sa_family); } ret.push_back(iface); } @@ -331,6 +454,65 @@ namespace libtorrent #elif defined TORRENT_WINDOWS || defined TORRENT_MINGW + // Load Iphlpapi library + HMODULE iphlp = LoadLibraryA("Iphlpapi.dll"); + if (iphlp) + { + // Get GetAdaptersAddresses() pointer + typedef ULONG (WINAPI *GetAdaptersAddresses_t)(ULONG,ULONG,PVOID,PIP_ADAPTER_ADDRESSES,PULONG); + GetAdaptersAddresses_t GetAdaptersAddresses = (GetAdaptersAddresses_t)GetProcAddress( + iphlp, "GetAdaptersAddresses"); + + if (GetAdaptersAddresses) + { + PIP_ADAPTER_ADDRESSES adapter_addresses = 0; + ULONG out_buf_size = 0; + if (GetAdaptersAddresses(AF_UNSPEC, GAA_FLAG_SKIP_MULTICAST | GAA_FLAG_SKIP_DNS_SERVER + | GAA_FLAG_SKIP_ANYCAST, NULL, adapter_addresses, &out_buf_size) != ERROR_BUFFER_OVERFLOW) + { + FreeLibrary(iphlp); + ec = asio::error::operation_not_supported; + return std::vector(); + } + + adapter_addresses = (IP_ADAPTER_ADDRESSES*)malloc(out_buf_size); + if (!adapter_addresses) + { + FreeLibrary(iphlp); + ec = asio::error::no_memory; + return std::vector(); + } + + if (GetAdaptersAddresses(AF_UNSPEC, GAA_FLAG_SKIP_MULTICAST | GAA_FLAG_SKIP_DNS_SERVER + | GAA_FLAG_SKIP_ANYCAST, NULL, adapter_addresses, &out_buf_size) == NO_ERROR) + { + for (PIP_ADAPTER_ADDRESSES adapter = adapter_addresses; + adapter != 0; adapter = adapter->Next) + { + ip_interface r; + strncpy(r.name, adapter->AdapterName, sizeof(r.name)); + r.name[sizeof(r.name)-1] = 0; + r.mtu = adapter->Mtu; + IP_ADAPTER_UNICAST_ADDRESS* unicast = adapter->FirstUnicastAddress; + while (unicast) + { + r.interface_address = sockaddr_to_address(unicast->Address.lpSockaddr); + + ret.push_back(r); + + unicast = unicast->Next; + } + } + } + + // Free memory + free(adapter_addresses); + FreeLibrary(iphlp); + return ret; + } + FreeLibrary(iphlp); + } + SOCKET s = socket(AF_INET, SOCK_DGRAM, 0); if (s == SOCKET_ERROR) { @@ -356,9 +538,11 @@ namespace libtorrent for (int i = 0; i < n; ++i) { iface.interface_address = sockaddr_to_address(&buffer[i].iiAddress.Address); - iface.netmask = sockaddr_to_address(&buffer[i].iiNetmask.Address); - iface.name[0] = 0; if (iface.interface_address == address_v4::any()) continue; + iface.netmask = sockaddr_to_address(&buffer[i].iiNetmask.Address + , iface.interface_address.is_v4() ? AF_INET : AF_INET6); + iface.name[0] = 0; + iface.mtu = 1500; // how to get the MTU? ret.push_back(iface); } @@ -372,6 +556,7 @@ namespace libtorrent for (;i != udp::resolver_iterator(); ++i) { iface.interface_address = i->endpoint().address(); + iface.mtu = 1500; if (iface.interface_address.is_v4()) iface.netmask = address_v4::netmask(iface.interface_address.to_v4()); ret.push_back(iface); @@ -528,6 +713,12 @@ namespace libtorrent char* end = buf.get() + needed; + int s = socket(AF_INET, SOCK_DGRAM, 0); + if (s < 0) + { + ec = error_code(errno, asio::error::system_category); + return std::vector(); + } rt_msghdr* rtm; for (char* next = buf.get(); next < end; next += rtm->rtm_msglen) { @@ -536,11 +727,13 @@ namespace libtorrent continue; ip_route r; - if (parse_route(rtm, &r)) ret.push_back(r); + if (parse_route(s, rtm, &r)) ret.push_back(r); } + close(s); #elif defined TORRENT_WINDOWS || defined TORRENT_MINGW - +/* + move this to enum_net_interfaces // Load Iphlpapi library HMODULE iphlp = LoadLibraryA("Iphlpapi.dll"); if (!iphlp) @@ -596,11 +789,109 @@ namespace libtorrent ret.push_back(r); } } - + // Free memory free(adapter_info); FreeLibrary(iphlp); +*/ + // Load Iphlpapi library + HMODULE iphlp = LoadLibraryA("Iphlpapi.dll"); + if (!iphlp) + { + ec = asio::error::operation_not_supported; + return std::vector(); + } + + typedef DWORD (WINAPI *GetIpForwardTable2_t)( + ADDRESS_FAMILY, PMIB_IPFORWARD_TABLE2*); + typedef void (WINAPI *FreeMibTable_t)(PVOID Memory); + + GetIpForwardTable2_t GetIpForwardTable2 = (GetIpForwardTable2_t)GetProcAddress( + iphlp, "GetIpForwardTable2"); + FreeMibTable_t FreeMibTable = (FreeMibTable_t)GetProcAddress( + iphlp, "FreeMibTable"); + if (GetIpForwardTable2 && FreeMibTable) + { + MIB_IPFORWARD_TABLE2* routes = NULL; + int res = GetIpForwardTable2(AF_UNSPEC, &routes); + if (res == NO_ERROR) + { + for (int i = 0; i < routes->NumEntries; ++i) + { + ip_route r; + r.gateway = sockaddr_to_address((const sockaddr*)&routes->Table[i].NextHop); + r.destination = sockaddr_to_address( + (const sockaddr*)&routes->Table[i].DestinationPrefix.Prefix); + r.netmask = build_netmask(routes->Table[i].SitePrefixLength + , routes->Table[i].DestinationPrefix.Prefix.si_family); + MIB_IFROW ifentry; + ifentry.dwIndex = routes->Table[i].InterfaceIndex; + if (GetIfEntry(&ifentry) == NO_ERROR) + { + wcstombs(r.name, ifentry.wszName, sizeof(r.name)); + r.mtu = ifentry.dwMtu; + ret.push_back(r); + } + } + } + if (routes) FreeMibTable(routes); + FreeLibrary(iphlp); + return ret; + } + + // Get GetIpForwardTable() pointer + typedef DWORD (WINAPI *GetIpForwardTable_t)(PMIB_IPFORWARDTABLE pIpForwardTable,PULONG pdwSize,BOOL bOrder); + + GetIpForwardTable_t GetIpForwardTable = (GetIpForwardTable_t)GetProcAddress( + iphlp, "GetIpForwardTable"); + if (!GetIpForwardTable) + { + FreeLibrary(iphlp); + ec = asio::error::operation_not_supported; + return std::vector(); + } + + MIB_IPFORWARDTABLE* routes = NULL; + ULONG out_buf_size = 0; + if (GetIpForwardTable(routes, &out_buf_size, FALSE) != ERROR_INSUFFICIENT_BUFFER) + { + FreeLibrary(iphlp); + ec = asio::error::operation_not_supported; + return std::vector(); + } + + routes = (MIB_IPFORWARDTABLE*)malloc(out_buf_size); + if (!routes) + { + FreeLibrary(iphlp); + ec = asio::error::no_memory; + return std::vector(); + } + + if (GetIpForwardTable(routes, &out_buf_size, FALSE) == NO_ERROR) + { + for (int i = 0; i < routes->dwNumEntries; ++i) + { + ip_route r; + r.destination = inaddr_to_address((in_addr const*)&routes->table[i].dwForwardDest); + r.netmask = inaddr_to_address((in_addr const*)&routes->table[i].dwForwardMask); + r.gateway = inaddr_to_address((in_addr const*)&routes->table[i].dwForwardNextHop); + MIB_IFROW ifentry; + ifentry.dwIndex = routes->table[i].dwForwardIfIndex; + if (GetIfEntry(&ifentry) == NO_ERROR) + { + wcstombs(r.name, ifentry.wszName, sizeof(r.name)); + r.name[sizeof(r.name)-1] = 0; + r.mtu = ifentry.dwMtu; + ret.push_back(r); + } + } + } + + // Free memory + free(routes); + FreeLibrary(iphlp); #elif defined TORRENT_LINUX enum { BUFSIZE = 8192 }; @@ -639,11 +930,18 @@ namespace libtorrent return std::vector(); } + int s = socket(AF_INET, SOCK_DGRAM, 0); + if (s < 0) + { + ec = error_code(errno, asio::error::system_category); + return std::vector(); + } for (; NLMSG_OK(nl_msg, len); nl_msg = NLMSG_NEXT(nl_msg, len)) { ip_route r; - if (parse_route(nl_msg, &r)) ret.push_back(r); + if (parse_route(s, nl_msg, &r)) ret.push_back(r); } + close(s); close(sock); #endif diff --git a/src/error_code.cpp b/src/error_code.cpp index be7b19784..7d2404a1a 100644 --- a/src/error_code.cpp +++ b/src/error_code.cpp @@ -156,7 +156,7 @@ namespace libtorrent "pex message too large", "invalid pex message", "invalid lt_tracker message", - "", + "pex messages sent too frequent (possible attack)", "", "", "", diff --git a/src/instantiate_connection.cpp b/src/instantiate_connection.cpp index 81a8cbc7a..8842fdbba 100644 --- a/src/instantiate_connection.cpp +++ b/src/instantiate_connection.cpp @@ -35,6 +35,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "libtorrent/socket.hpp" #include "libtorrent/session_settings.hpp" #include "libtorrent/socket_type.hpp" +#include "libtorrent/utp_socket_manager.hpp" #include #include @@ -43,9 +44,15 @@ namespace libtorrent bool instantiate_connection(io_service& ios , proxy_settings const& ps, socket_type& s - , void* ssl_context) + , void* ssl_context + , utp_socket_manager* sm) { - if (ps.type == proxy_settings::none) + if (sm) + { + s.instantiate(ios); + s.get()->set_impl(sm->new_utp_socket(s.get())); + } + else if (ps.type == proxy_settings::none) { #ifdef TORRENT_USE_OPENSSL if (ssl_context) diff --git a/src/metadata_transfer.cpp b/src/metadata_transfer.cpp index 6429527c2..f13bb5606 100644 --- a/src/metadata_transfer.cpp +++ b/src/metadata_transfer.cpp @@ -248,6 +248,8 @@ namespace libtorrent { namespace , m_tp(tp) {} + virtual char const* type() const { return "LT_metadata"; } + // can add entries to the extension handshake virtual void add_handshake(entry& h) { diff --git a/src/packet_buffer.cpp b/src/packet_buffer.cpp new file mode 100644 index 000000000..b48a04e41 --- /dev/null +++ b/src/packet_buffer.cpp @@ -0,0 +1,189 @@ +/* + +Copyright (c) 2010, Arvid Norberg +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the distribution. + * Neither the name of the author nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include // free and calloc +#include "libtorrent/packet_buffer.hpp" +#include "libtorrent/assert.hpp" + +namespace libtorrent { + + bool compare_less_wrap(boost::uint32_t lhs, boost::uint32_t rhs + , boost::uint32_t mask); + + packet_buffer::packet_buffer() + : m_storage(0) + , m_capacity(0) + , m_size(0) + , m_first(0) + , m_last(0) + {} + + packet_buffer::~packet_buffer() + { + free(m_storage); + } + + void* packet_buffer::insert(index_type idx, void* value) + { + TORRENT_ASSERT_VAL(idx <= 0xffff, idx); + // you're not allowed to insert NULLs! + TORRENT_ASSERT(value); + + if (m_size != 0) + { + if (compare_less_wrap(idx, m_first, 0xffff)) + { + // Index comes before m_first. If we have room, we can simply + // adjust m_first backward. + + std::size_t free_space = 0; + + for (index_type i = (m_first - 1) & (m_capacity - 1); + i != (m_first & (m_capacity - 1)); i = (i - 1) & (m_capacity - 1)) + { + if (m_storage[i & (m_capacity - 1)]) + break; + ++free_space; + } + + if (((m_first - idx) & 0xffff) > free_space) + reserve(((m_first - idx) & 0xffff) + m_capacity - free_space); + + m_first = idx; + } + else if (idx >= m_first + m_capacity) + { + reserve(idx - m_first + 1); + } + else if (idx < m_first) + { + // We have wrapped. + if (idx > ((m_first + m_capacity) & 0xffff) && m_capacity < 0xffff) + { + reserve(m_capacity + (idx - ((m_first + m_capacity) & 0xffff))); + } + } + if (compare_less_wrap(m_last, (idx + 1) & 0xffff, 0xffff)) + m_last = (idx + 1) & 0xffff; + } + else + { + m_first = idx; + m_last = (idx + 1) & 0xffff; + } + + if (m_capacity == 0) reserve(16); + + void* old_value = m_storage[idx & (m_capacity - 1)]; + m_storage[idx & (m_capacity - 1)] = value; + + if (m_size++ == 0) + { + m_first = idx; + } + + TORRENT_ASSERT_VAL(m_first <= 0xffff, m_first); + return old_value; + } + + void* packet_buffer::at(index_type idx) const + { + if (idx >= m_first + m_capacity) + return 0; + + if (compare_less_wrap(idx, m_first, 0xffff)) + { + return 0; + } + + return m_storage[idx & (m_capacity - 1)]; + } + + void packet_buffer::reserve(std::size_t size) + { + TORRENT_ASSERT_VAL(size <= 0xffff, size); + std::size_t new_size = m_capacity == 0 ? 16 : m_capacity; + + while (new_size < size) + new_size <<= 1; + + void** new_storage = (void**)malloc(sizeof(void*) * new_size); + + for (index_type i = 0; i < new_size; ++i) + new_storage[i] = 0; + + for (index_type i = m_first; i < (m_first + m_capacity); ++i) + new_storage[i & (new_size - 1)] = m_storage[i & (m_capacity - 1)]; + + free(m_storage); + + m_storage = new_storage; + m_capacity = new_size; + } + + void* packet_buffer::remove(index_type idx) + { + // TODO: use compare_less_wrap for this comparison as well + if (idx >= m_first + m_capacity) + return 0; + + if (compare_less_wrap(idx, m_first, 0xffff)) + return 0; + + void* old_value = m_storage[idx & (m_capacity - 1)]; + m_storage[idx & (m_capacity - 1)] = 0; + + if (old_value) + { + --m_size; + if (m_size == 0) m_last = m_first; + } + + if (idx == m_first && m_size != 0) + { + while (!m_storage[++m_first & (m_capacity - 1)]); + m_first &= 0xffff; + } + + if (((idx + 1) & 0xffff) == m_last && m_size != 0) + { + while (!m_storage[--m_last & (m_capacity - 1)]); + ++m_last; + m_last &= 0xffff; + } + + TORRENT_ASSERT_VAL(m_first <= 0xffff, m_first); + return old_value; + } + +} + diff --git a/src/peer_connection.cpp b/src/peer_connection.cpp index 34a92268a..fbfd39970 100644 --- a/src/peer_connection.cpp +++ b/src/peer_connection.cpp @@ -53,6 +53,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "libtorrent/broadcast_socket.hpp" #include "libtorrent/torrent.hpp" #include "libtorrent/peer_info.hpp" +#include "libtorrent/bt_peer_connection.hpp" #ifdef TORRENT_DEBUG #include @@ -150,6 +151,7 @@ namespace libtorrent , m_bitfield_received(false) , m_no_download(false) , m_sent_suggests(false) + , m_holepunch_mode(false) , m_ignore_stats(false) #ifdef TORRENT_DEBUG , m_in_constructor(true) @@ -195,6 +197,8 @@ namespace libtorrent + to_string(m_remote.port()).elems, m_ses.listen_port()); (*m_logger) << time_now_string() << " *** OUTGOING CONNECTION: " << print_endpoint(m_remote) << "\n"; + if (m_socket->get()) (*m_logger) << "uTP connection\n"; + else (*m_logger) << "TCP connection\n"; #endif #ifdef TORRENT_DEBUG piece_failed = false; @@ -289,6 +293,7 @@ namespace libtorrent , m_bitfield_received(false) , m_no_download(false) , m_sent_suggests(false) + , m_holepunch_mode(false) , m_ignore_stats(false) #ifdef TORRENT_DEBUG , m_in_constructor(true) @@ -335,6 +340,8 @@ namespace libtorrent + to_string(remote().port()).elems, m_ses.listen_port()); (*m_logger) << time_now_string() << " *** INCOMING CONNECTION: " << print_endpoint(m_remote) << "\n"; + if (m_socket->get()) (*m_logger) << "uTP connection\n"; + else (*m_logger) << "TCP connection\n"; #endif #ifndef TORRENT_DISABLE_GEO_IP @@ -557,6 +564,7 @@ namespace libtorrent return; } m_remote = m_socket->remote_endpoint(ec); + TORRENT_ASSERT(m_remote.address() != address_v4::any()); if (ec) { disconnect(ec); @@ -626,6 +634,16 @@ namespace libtorrent { m_extensions.push_back(ext); } + + peer_plugin const* peer_connection::find_plugin(char const* type) + { + for (extension_list_t::iterator i = m_extensions.begin() + , end(m_extensions.end()); i != end; ++i) + { + if (strcmp((*i)->type(), type) == 0) return (*i).get(); + } + return 0; + } #endif void peer_connection::send_allowed_set() @@ -2215,6 +2233,8 @@ namespace libtorrent if (!m_bitfield_received) incoming_have_none(); if (is_disconnecting()) return; + update_desired_queue_size(); + #ifndef TORRENT_DISABLE_EXTENSIONS for (extension_list_t::iterator i = m_extensions.begin() , end(m_extensions.end()); i != end; ++i) @@ -3238,13 +3258,62 @@ namespace libtorrent TORRENT_ASSERT(m_ses.is_network_thread()); TORRENT_ASSERT(m_connecting); + connect_failed(errors::timed_out); + } + + void peer_connection::connect_failed(error_code const& e) + { + TORRENT_ASSERT(m_connecting); + TORRENT_ASSERT(e); + #if defined TORRENT_VERBOSE_LOGGING || defined TORRENT_ERROR_LOGGING - peer_log("CONNECTION TIMED OUT: %s", print_endpoint(m_remote).c_str()); + peer_log("CONNECTION FAILED: %s", print_endpoint(m_remote).c_str()); #endif #if defined TORRENT_VERBOSE_LOGGING || defined TORRENT_LOGGING || defined TORRENT_ERROR_LOGGING - (*m_ses.m_logger) << "CONNECTION TIMED OUT: " << print_endpoint(m_remote) << "\n"; + (*m_ses.m_logger) << "CONNECTION FAILED: " << print_endpoint(m_remote) << "\n"; #endif - disconnect(errors::timed_out, 1); + + if (m_connection_ticket != -1) + { + m_ses.m_half_open.done(m_connection_ticket); + m_connecting = false; + } + + // a connection attempt using uTP just failed + // mark this peer as not supporting uTP + // we'll never try it again (unless we're trying holepunch) + if (m_socket->get() + && m_peer_info + && m_peer_info->supports_utp + && !m_holepunch_mode) + { + m_peer_info->supports_utp = false; + // reconnect immediately using TCP + policy::peer* pi = peer_info_struct(); + boost::shared_ptr t = m_torrent.lock(); + fast_reconnect(true); + disconnect(e, 0); + if (t && pi) t->connect_to_peer(pi, true); + return; + } + + if (m_holepunch_mode) + fast_reconnect(true); + + if ((!m_socket->get() || !m_ses.m_settings.enable_outgoing_tcp) + && m_peer_info + && m_peer_info->supports_holepunch + && !m_holepunch_mode) + { + boost::shared_ptr t = m_torrent.lock(); + // see if we can try a holepunch + bt_peer_connection* p = t->find_introducer(remote()); + if (p) + p->write_holepunch_msg(bt_peer_connection::hp_rendezvous, remote(), 0); + } + + disconnect(e, 1); + return; } // the error argument defaults to 0, which means deliberate disconnect @@ -3524,6 +3593,7 @@ namespace libtorrent p.flags |= is_seed() ? peer_info::seed : 0; p.flags |= m_snubbed ? peer_info::snubbed : 0; p.flags |= m_upload_only ? peer_info::upload_only : 0; + p.flags |= m_holepunch_mode ? peer_info::holepunched : 0; if (peer_info_struct()) { policy::peer* pi = peer_info_struct(); @@ -3680,6 +3750,37 @@ namespace libtorrent m_superseed_piece = index; } + void peer_connection::update_desired_queue_size() + { + if (m_snubbed) + { + m_desired_queue_size = 1; + return; + } + + int download_rate = statistics().download_rate(); + + // calculate the desired download queue size + const int queue_time = m_ses.settings().request_queue_time; + // (if the latency is more than this, the download will stall) + // so, the queue size is queue_time * down_rate / 16 kiB + // (16 kB is the size of each request) + // the minimum number of requests is 2 and the maximum is 48 + // the block size doesn't have to be 16. So we first query the + // torrent for it + boost::shared_ptr t = m_torrent.lock(); + const int block_size = t->block_size(); + + TORRENT_ASSERT(block_size > 0); + + m_desired_queue_size = queue_time * download_rate / block_size; + + if (m_desired_queue_size > m_max_out_request_queue) + m_desired_queue_size = m_max_out_request_queue; + if (m_desired_queue_size < min_request_queue) + m_desired_queue_size = min_request_queue; + } + void peer_connection::second_tick(int tick_interval_ms) { ptime now = time_now(); @@ -3880,36 +3981,13 @@ namespace libtorrent if (!t->ready_for_connections()) return; - // calculate the desired download queue size - const int queue_time = m_ses.settings().request_queue_time; - // (if the latency is more than this, the download will stall) - // so, the queue size is queue_time * down_rate / 16 kiB - // (16 kB is the size of each request) - // the minimum number of requests is 2 and the maximum is 48 - // the block size doesn't have to be 16. So we first query the - // torrent for it - const int block_size = t->block_size(); - TORRENT_ASSERT(block_size > 0); - - if (m_snubbed) - { - m_desired_queue_size = 1; - } - else - { - m_desired_queue_size = queue_time - * statistics().download_rate() / block_size; - if (m_desired_queue_size > m_max_out_request_queue) - m_desired_queue_size = m_max_out_request_queue; - if (m_desired_queue_size < min_request_queue) - m_desired_queue_size = min_request_queue; + update_desired_queue_size(); - if (m_desired_queue_size == m_max_out_request_queue + if (m_desired_queue_size == m_max_out_request_queue && t->alerts().should_post()) - { - t->alerts().post_alert(performance_alert(t->get_handle() - , performance_alert::outstanding_request_limit_reached)); - } + { + t->alerts().post_alert(performance_alert(t->get_handle() + , performance_alert::outstanding_request_limit_reached)); } int piece_timeout = m_ses.settings().piece_timeout; @@ -3926,6 +4004,7 @@ namespace libtorrent // allowed to download. If it is impossible to beat the piece // timeout at this rate, adjust it to be realistic + const int block_size = t->block_size(); int rate_limit_timeout = rate_limit / block_size; if (piece_timeout < rate_limit_timeout) piece_timeout = rate_limit_timeout; @@ -4131,8 +4210,11 @@ namespace libtorrent // only add new piece-chunks if the send buffer is small enough // otherwise there will be no end to how large it will be! - int buffer_size_watermark = int(m_statistics.upload_rate()) + int upload_rate = int(m_statistics.upload_rate()); + + int buffer_size_watermark = upload_rate * m_ses.settings().send_buffer_watermark_factor; + if (buffer_size_watermark < 512) buffer_size_watermark = 512; else if (buffer_size_watermark > m_ses.settings().send_buffer_watermark) { @@ -4351,13 +4433,16 @@ namespace libtorrent { if (!m_ignore_bandwidth_limits) { + bool utp = m_socket->get(); + // in this case, we have data to send, but no // bandwidth. So, we simply request bandwidth // from the bandwidth manager request_upload_bandwidth( - &m_ses.m_upload_channel + (m_ses.m_settings.rate_limit_utp || !utp) ? &m_ses.m_upload_channel : 0 , &t->m_bandwidth_channel[upload_channel] - , &m_bandwidth_channel[upload_channel]); + , &m_bandwidth_channel[upload_channel] + , !utp ? &m_ses.m_tcp_upload_channel : 0); } else { @@ -4461,13 +4546,16 @@ namespace libtorrent { if (!m_ignore_bandwidth_limits) { + bool utp = m_socket->get(); + // in this case, we have outstanding data to // receive, but no bandwidth quota. So, we simply // request bandwidth from the bandwidth manager request_download_bandwidth( - &m_ses.m_download_channel + (m_ses.m_settings.rate_limit_utp || !utp) ? &m_ses.m_download_channel : 0 , &t->m_bandwidth_channel[download_channel] - , &m_bandwidth_channel[download_channel]); + , &m_bandwidth_channel[download_channel] + , !utp ? &m_ses.m_tcp_download_channel : 0); } else { @@ -4785,6 +4873,12 @@ namespace libtorrent TORRENT_ASSERT(m_ses.is_network_thread()); INVARIANT_CHECK; +#ifdef TORRENT_VERBOSE_LOGGING + (*m_logger) << time_now_string() << " *** ON_RECEIVE_DATA [" + " bytes: " << bytes_transferred << + " error: " << error.message() << + " ]\n"; +#endif #if defined TORRENT_ASIO_DEBUGGING complete_async("peer_connection::on_receive_data"); #endif @@ -4816,6 +4910,7 @@ namespace libtorrent int num_loops = 0; do { + TORRENT_ASSERT(m_recv_pos + bytes_transferred <= m_packet_size); #ifdef TORRENT_VERBOSE_LOGGING peer_log("<<< read %d bytes", int(bytes_transferred)); #endif @@ -4869,6 +4964,7 @@ namespace libtorrent error_code ec; bytes_transferred = try_read(read_sync, ec); + TORRENT_ASSERT(bytes_transferred > 0 || ec); if (ec && ec != asio::error::would_block) { m_statistics.trancieve_ip_packet(bytes_in_loop, m_remote.address().is_v6()); @@ -5036,27 +5132,25 @@ namespace libtorrent if (m_disconnecting) return; - m_connecting = false; - m_ses.m_half_open.done(m_connection_ticket); - error_code ec; if (e) { -#if defined TORRENT_VERBOSE_LOGGING || defined TORRENT_LOGGING || defined TORRENT_ERROR_LOGGING - (*m_ses.m_logger) << time_now_string() << " CONNECTION FAILED: " << print_endpoint(m_remote) - << ": " << e.message() << "\n"; -#endif -#if defined TORRENT_VERBOSE_LOGGING || defined TORRENT_ERROR_LOGGING - (*m_logger) << time_now_string() << " CONNECTION FAILED: " << print_endpoint(m_remote) - << ": " << e.message() << "\n"; -#endif - disconnect(e, 1); + connect_failed(e); return; } + m_connecting = false; + m_ses.m_half_open.done(m_connection_ticket); + if (m_disconnecting) return; m_last_receive = time_now(); + if (m_socket->get() && m_peer_info) + { + m_peer_info->confirmed_supports_utp = true; + m_peer_info->supports_utp = false; + } + // this means the connection just succeeded m_statistics.received_synack(m_remote.address().is_v6()); @@ -5110,6 +5204,13 @@ namespace libtorrent { TORRENT_ASSERT(m_ses.is_network_thread()); +#ifdef TORRENT_VERBOSE_LOGGING + (*m_logger) << time_now_string() << " *** ON_SEND_DATA [" + " bytes: " << bytes_transferred << + " error: " << error.message() << + " ]\n"; +#endif + INVARIANT_CHECK; #if defined TORRENT_ASIO_DEBUGGING diff --git a/src/policy.cpp b/src/policy.cpp index e9972f6ed..ebe188b77 100644 --- a/src/policy.cpp +++ b/src/policy.cpp @@ -108,7 +108,7 @@ namespace libtorrent { // returns the rank of a peer's source. We have an affinity // to connecting to peers with higher rank. This is to avoid - // problems when out peer list is diluted by stale peers from + // problems when our peer list is diluted by stale peers from // the resume data for instance int source_rank(int source_bitmask) { @@ -1008,6 +1008,10 @@ namespace libtorrent p->seed = true; ++m_num_seeds; } + if (flags & 0x04) + p->supports_utp = true; + if (flags & 0x08) + p->supports_holepunch = true; #ifndef TORRENT_DISABLE_GEO_IP int as = m_torrent->session().as_for_ip(p->address()); @@ -1048,6 +1052,10 @@ namespace libtorrent if (!p->seed) ++m_num_seeds; p->seed = true; } + if (flags & 0x04) + p->supports_utp = true; + if (flags & 0x08) + p->supports_holepunch = true; #if defined TORRENT_VERBOSE_LOGGING || defined TORRENT_LOGGING if (p->connection) @@ -1362,7 +1370,13 @@ namespace libtorrent TORRENT_ASSERT(c); error_code ec; - TORRENT_ASSERT(c->remote() == c->get_socket()->remote_endpoint(ec) || ec); + if (c->remote() != c->get_socket()->remote_endpoint(ec) && !ec) + { + fprintf(stderr, "c->remote: %s\nc->get_socket()->remote_endpoint: %s\n" + , print_endpoint(c->remote()).c_str() + , print_endpoint(c->get_socket()->remote_endpoint(ec)).c_str()); + TORRENT_ASSERT(false); + } return std::find_if( m_peers.begin() @@ -1525,6 +1539,9 @@ namespace libtorrent #ifndef TORRENT_DISABLE_DHT , added_to_dht(false) #endif + , supports_utp(true) // assume peers support utp + , confirmed_supports_utp(false) + , supports_holepunch(false) { TORRENT_ASSERT((src & 0xff) == src); } diff --git a/src/session.cpp b/src/session.cpp index 71ab3dfa3..2afc612b7 100644 --- a/src/session.cpp +++ b/src/session.cpp @@ -154,6 +154,9 @@ namespace libtorrent set.coalesce_reads = false; set.coalesce_writes = false; + // disallow the buffer size to grow for the uTP socket + set.utp_dynamic_sock_buf = false; + return set; } @@ -226,6 +229,9 @@ namespace libtorrent // connect to us if they want to set.max_failcount = 1; + // allow the buffer size to grow for the uTP socket + set.utp_dynamic_sock_buf = true; + return set; } diff --git a/src/session_impl.cpp b/src/session_impl.cpp index 9d5fb7309..50fdc6a25 100644 --- a/src/session_impl.cpp +++ b/src/session_impl.cpp @@ -319,6 +319,11 @@ namespace aux { TORRENT_SETTING(integer, default_peer_upload_rate) TORRENT_SETTING(integer, default_peer_download_rate) TORRENT_SETTING(boolean, broadcast_lsd) + TORRENT_SETTING(boolean, enable_outgoing_utp) + TORRENT_SETTING(boolean, enable_incoming_utp) + TORRENT_SETTING(boolean, enable_outgoing_tcp) + TORRENT_SETTING(boolean, enable_incoming_tcp) + TORRENT_SETTING(integer, max_pex_peers) TORRENT_SETTING(boolean, ignore_resume_timestamps) TORRENT_SETTING(boolean, anonymous_mode) TORRENT_SETTING(integer, tick_interval) @@ -329,6 +334,16 @@ namespace aux { TORRENT_SETTING(integer, unchoke_slots_limit) TORRENT_SETTING(integer, half_open_limit) TORRENT_SETTING(integer, connections_limit) + TORRENT_SETTING(integer, utp_target_delay) + TORRENT_SETTING(integer, utp_gain_factor) + TORRENT_SETTING(integer, utp_syn_resends) + TORRENT_SETTING(integer, utp_fin_resends) + TORRENT_SETTING(integer, utp_num_resends) + TORRENT_SETTING(integer, utp_connect_timeout) + TORRENT_SETTING(integer, utp_delayed_ack) + TORRENT_SETTING(boolean, utp_dynamic_sock_buf) + TORRENT_SETTING(integer, mixed_mode_algorithm) + TORRENT_SETTING(boolean, rate_limit_utp) TORRENT_SETTING(integer, listen_queue_size) }; @@ -482,6 +497,8 @@ namespace aux { , boost::bind(&session_impl::on_receive_udp, this, _1, _2, _3, _4) , boost::bind(&session_impl::on_receive_udp_hostname, this, _1, _2, _3, _4) , m_half_open) + , m_utp_socket_manager(m_settings, m_udp_socket + , boost::bind(&session_impl::incoming_connection, this, _1)) , m_timer(m_io_service) , m_lsd_announce_timer(m_io_service) , m_host_resolver(m_io_service) @@ -645,6 +662,7 @@ namespace aux { PRINT_SIZEOF(stat) PRINT_SIZEOF(bandwidth_channel) PRINT_SIZEOF(policy) + (*m_logger) << "sizeof(utp_socket_impl): " << socket_impl_size() << "\n"; PRINT_SIZEOF(file_entry) @@ -1792,12 +1810,15 @@ namespace aux { { // this is probably a dht message m_dht->on_receive(ep, buf, len); + return; } + + if (m_utp_socket_manager.incoming_packet(buf, len, ep)) + return; + // maybe it's a udp tracker response - else if (m_tracker_manager.incoming_udp(e, ep, buf, len)) - { + if (m_tracker_manager.incoming_udp(e, ep, buf, len)) m_stat.received_tracker_bytes(len + 28); - } } void session_impl::on_receive_udp_hostname(error_code const& e @@ -1901,10 +1922,34 @@ namespace aux { return; } + TORRENT_ASSERT(endp.address() != address_v4::any()); + #if defined(TORRENT_VERBOSE_LOGGING) || defined(TORRENT_LOGGING) (*m_logger) << time_now_string() << " <== INCOMING CONNECTION " << endp << "\n"; #endif + if (!m_settings.enable_incoming_utp + && s->get()) + { +#if defined(TORRENT_VERBOSE_LOGGING) || defined(TORRENT_LOGGING) + (*m_logger) << " rejected uTP connection\n"; +#endif + if (m_alerts.should_post()) + m_alerts.post_alert(peer_blocked_alert(torrent_handle(), endp.address())); + return; + } + + if (!m_settings.enable_incoming_tcp + && s->get()) + { +#if defined(TORRENT_VERBOSE_LOGGING) || defined(TORRENT_LOGGING) + (*m_logger) << " rejected TCP connection\n"; +#endif + if (m_alerts.should_post()) + m_alerts.post_alert(peer_blocked_alert(torrent_handle(), endp.address())); + return; + } + // local addresses do not count, since it's likely // coming from our own client through local service discovery // and it does not reflect whether or not a router is open @@ -2184,6 +2229,8 @@ namespace aux { m_last_tick = now; + m_utp_socket_manager.tick(now); + // only tick the following once per second if (now - m_last_second_tick < seconds(1)) return; @@ -2225,6 +2272,45 @@ namespace aux { } } + switch (m_settings.mixed_mode_algorithm) + { + case session_settings::prefer_tcp: + m_tcp_upload_channel.throttle(0); + m_tcp_download_channel.throttle(0); + break; + case session_settings::peer_proportional: + { + int num_tcp_peers = 0; + int num_peers = 0; + for (connection_map::iterator i = m_connections.begin() + , end(m_connections.end());i != end; ++i) + { + peer_connection& p = *(*i); + if (p.in_handshake()) continue; + if (!p.get_socket()->get()) ++num_tcp_peers; + ++num_peers; + } + + if (num_peers == 0) + { + m_tcp_upload_channel.throttle(0); + m_tcp_download_channel.throttle(0); + } + else + { + if (num_tcp_peers == 0) num_tcp_peers = 1; + int upload_rate = (std::max)(m_stat.upload_rate(), 5000); + int download_rate = (std::max)(m_stat.download_rate(), 5000); + if (m_upload_channel.throttle()) upload_rate = m_upload_channel.throttle(); + if (m_download_channel.throttle()) download_rate = m_download_channel.throttle(); + + m_tcp_upload_channel.throttle(upload_rate * num_tcp_peers / num_peers); + m_tcp_download_channel.throttle(download_rate * num_tcp_peers / num_peers); + } + } + break; + } + #ifdef TORRENT_STATS ++m_second_counter; int downloading_torrents = 0; @@ -3682,6 +3768,8 @@ namespace aux { } #endif + m_utp_socket_manager.get_status(s.utp_stats); + int peerlist_size = 0; for (torrent_map::const_iterator i = m_torrents.begin() , end(m_torrents.end()); i != end; ++i) diff --git a/src/socket_type.cpp b/src/socket_type.cpp index 86be2fce5..e93b5e29c 100644 --- a/src/socket_type.cpp +++ b/src/socket_type.cpp @@ -53,6 +53,9 @@ namespace libtorrent case socket_type_int_impl::value: get()->~http_stream(); break; + case socket_type_int_impl::value: + get()->~utp_stream(); + break; #if TORRENT_USE_I2P case socket_type_int_impl::value: get()->~i2p_stream(); @@ -69,6 +72,7 @@ namespace libtorrent get >()->~ssl_stream(); break; #endif + default: TORRENT_ASSERT(false); } m_type = 0; } @@ -88,6 +92,9 @@ namespace libtorrent case socket_type_int_impl::value: new ((http_stream*)m_data) http_stream(m_io_service); break; + case socket_type_int_impl::value: + new ((utp_stream*)m_data) utp_stream(m_io_service); + break; #if TORRENT_USE_I2P case socket_type_int_impl::value: new ((i2p_stream*)m_data) i2p_stream(m_io_service); @@ -110,6 +117,7 @@ namespace libtorrent , *((boost::asio::ssl::context*)userdata)); break; #endif + default: TORRENT_ASSERT(false); } m_type = type; @@ -127,9 +135,6 @@ namespace libtorrent TORRENT_SOCKTYPE_FORWARD_RET(is_open(), false) } - socket_type::lowest_layer_type& socket_type::lowest_layer() - { TORRENT_SOCKTYPE_FORWARD_RET(lowest_layer(), *((lowest_layer_type*)m_data)) } - void socket_type::open(protocol_type const& p, error_code& ec) { TORRENT_SOCKTYPE_FORWARD(open(p, ec)) } diff --git a/src/storage.cpp b/src/storage.cpp index 58307e63f..140269dbe 100644 --- a/src/storage.cpp +++ b/src/storage.cpp @@ -347,10 +347,10 @@ namespace libtorrent storage(file_storage const& fs, file_storage const* mapped, std::string const& path , file_pool& fp, std::vector const& file_prio) : m_files(fs) + , m_file_priority(file_prio) , m_pool(fp) , m_page_size(page_size()) , m_allocate_files(false) - , m_file_priority(file_prio) { if (mapped) m_mapped_files.reset(new file_storage(*mapped)); diff --git a/src/timestamp_history.cpp b/src/timestamp_history.cpp new file mode 100644 index 000000000..c679e918c --- /dev/null +++ b/src/timestamp_history.cpp @@ -0,0 +1,105 @@ +/* + +Copyright (c) 2009, Arvid Norberg +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the distribution. + * Neither the name of the author nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +*/ + + +#include "libtorrent/timestamp_history.hpp" + +namespace libtorrent { + +enum +{ + TIME_MASK = 0xffffffff +}; +// defined in utp_stream.cpp +bool compare_less_wrap(boost::uint32_t lhs, boost::uint32_t rhs + , boost::uint32_t mask); + +boost::uint32_t timestamp_history::add_sample(boost::uint32_t sample, bool step) +{ + if (!m_initialized) + { + for (int i = 0; i < history_size; ++i) + m_history[i] = sample; + m_base = sample; + m_initialized = true; + } + + ++m_num_samples; + + // if sample is less than base, update the base + // and update the history entry (because it will + // be less than that too) + if (compare_less_wrap(sample, m_base, TIME_MASK)) + { + m_base = sample; + m_history[m_index] = sample; + } + // if sample is less than our history entry, update it + else if (compare_less_wrap(sample, m_history[m_index], TIME_MASK)) + { + m_history[m_index] = sample; + } + + boost::uint32_t ret = sample - m_base; + + // don't step base delay history unless we have at least 120 + // samples. Anything less would suggest that the connection is + // essentially idle and the samples are probably not very reliable + if (step && m_num_samples > 120) + { + m_num_samples = 0; + m_index = (m_index + 1) % history_size; + + m_history[m_index] = sample; + // update m_base + m_base = sample; + for (int i = 0; i < history_size; ++i) + { + if (compare_less_wrap(m_history[i], m_base, TIME_MASK)) + m_base = m_history[i]; + } + } + return ret; +} + +void timestamp_history::adjust_base(int change) +{ + m_base += change; + // make sure this adjustment sticks by updating all history slots + for (int i = 0; i < history_size; ++i) + { + if (compare_less_wrap(m_history[i], m_base, TIME_MASK)) + m_history[i] = m_base; + } +} + +} diff --git a/src/torrent.cpp b/src/torrent.cpp index 65c5c9c6c..0d883770b 100644 --- a/src/torrent.cpp +++ b/src/torrent.cpp @@ -313,6 +313,9 @@ namespace libtorrent return ret; } + // defined in ut_pex.cpp + bool was_introduced_by(peer_plugin const*, tcp::endpoint const&); + torrent::torrent( session_impl& ses , tcp::endpoint const& net_interface @@ -986,6 +989,33 @@ namespace libtorrent , shared_from_this(), _1, _2)); } + bt_peer_connection* torrent::find_introducer(tcp::endpoint const& ep) const + { +#ifndef TORRENT_DISABLE_EXTENSIONS + for (const_peer_iterator i = m_connections.begin(); i != m_connections.end(); ++i) + { + if ((*i)->type() != peer_connection::bittorrent_connection) continue; + bt_peer_connection* p = (bt_peer_connection*)(*i); + if (!p->supports_holepunch()) continue; + peer_plugin const* pp = p->find_plugin("ut_pex"); + if (!pp) continue; + if (was_introduced_by(pp, ep)) return (bt_peer_connection*)p; + } +#endif + return 0; + } + + bt_peer_connection* torrent::find_peer(tcp::endpoint const& ep) const + { + for (const_peer_iterator i = m_connections.begin(); i != m_connections.end(); ++i) + { + peer_connection* p = *i; + if (p->type() != peer_connection::bittorrent_connection) continue; + if (p->remote() == ep) return (bt_peer_connection*)p; + } + return 0; + } + void torrent::on_resume_data_checked(int ret, disk_io_job const& j) { TORRENT_ASSERT(m_ses.is_network_thread()); @@ -4366,7 +4396,7 @@ namespace libtorrent } - bool torrent::connect_to_peer(policy::peer* peerinfo) + bool torrent::connect_to_peer(policy::peer* peerinfo, bool ignore_limit) { INVARIANT_CHECK; @@ -4390,8 +4420,8 @@ namespace libtorrent #endif #endif - TORRENT_ASSERT(want_more_peers()); - TORRENT_ASSERT(m_ses.num_connections() < m_ses.settings().connections_limit); + TORRENT_ASSERT(want_more_peers() || ignore_limit); + TORRENT_ASSERT(m_ses.num_connections() < m_ses.settings().connections_limit || ignore_limit); tcp::endpoint a(peerinfo->ip()); TORRENT_ASSERT((m_ses.m_ip_filter.access(peerinfo->address()) & ip_filter::blocked) == 0); @@ -4412,7 +4442,21 @@ namespace libtorrent else #endif { - bool ret = instantiate_connection(m_ses.m_io_service, m_ses.proxy(), *s); + // this is where we determine if we open a regular TCP connection + // or a uTP connection. If the m_utp_socket_manager pointer is not passed in + // we'll instantiate a TCP connection + utp_socket_manager* sm = 0; + + if (m_ses.m_settings.enable_outgoing_utp + && (!m_ses.m_settings.enable_outgoing_tcp + || peerinfo->supports_utp + || peerinfo->confirmed_supports_utp)) + sm = &m_ses.m_utp_socket_manager; + + // don't make a TCP connection if it's disabled + if (sm == 0 && !m_ses.m_settings.enable_outgoing_tcp) return false; + + bool ret = instantiate_connection(m_ses.m_io_service, m_ses.proxy(), *s, 0, sm); (void)ret; TORRENT_ASSERT(ret); } diff --git a/src/udp_socket.cpp b/src/udp_socket.cpp index ed41f3aae..b58380c40 100644 --- a/src/udp_socket.cpp +++ b/src/udp_socket.cpp @@ -59,8 +59,12 @@ udp_socket::udp_socket(asio::io_service& ios : m_callback(c) , m_callback2(c2) , m_ipv4_sock(ios) + , m_v4_buf_size(0) + , m_v4_buf(0) #if TORRENT_USE_IPV6 , m_ipv6_sock(ios) + , m_v6_buf_size(0) + , m_v6_buf(0) #endif , m_bind_port(0) , m_outstanding(0) @@ -71,6 +75,7 @@ udp_socket::udp_socket(asio::io_service& ios , m_queue_packets(false) , m_tunnel_packets(false) , m_abort(false) + , m_reallocate_buffers(false) { #ifdef TORRENT_DEBUG m_magic = 0x1337; @@ -79,11 +84,22 @@ udp_socket::udp_socket(asio::io_service& ios #if defined BOOST_HAS_PTHREADS m_thread = 0; #endif +#endif + + m_v4_buf_size = 1600; + m_v4_buf = (char*)malloc(m_v4_buf_size); +#if TORRENT_USE_IPV6 + m_v6_buf_size = 1600; + m_v6_buf = (char*)malloc(m_v6_buf_size); #endif } udp_socket::~udp_socket() { + free(m_v4_buf); +#if TORRENT_USE_IPV6 + free(m_v6_buf); +#endif #ifdef TORRENT_DEBUG TORRENT_ASSERT(m_magic == 0x1337); TORRENT_ASSERT(!m_callback || !m_started); @@ -168,6 +184,18 @@ void udp_socket::send(udp::endpoint const& ep, char const* p, int len, error_cod #endif } +void udp_socket::maybe_realloc_buffers() +{ + if (m_reallocate_buffers) + { + m_v4_buf = (char*)realloc(m_v4_buf, m_v4_buf_size); +#if TORRENT_USE_IPV6 + m_v6_buf = (char*)realloc(m_v6_buf, m_v6_buf_size); +#endif + m_reallocate_buffers = false; + } +} + void udp_socket::on_read(udp::socket* s, error_code const& e, std::size_t bytes_transferred) { #if defined TORRENT_ASIO_DEBUGGING @@ -211,6 +239,7 @@ void udp_socket::on_read(udp::socket* s, error_code const& e, std::size_t bytes_ #ifndef BOOST_NO_EXCEPTIONS } catch(std::exception&) {} #endif + // don't stop listening on recoverable errors if (e != asio::error::host_unreachable && e != asio::error::fault @@ -230,17 +259,19 @@ void udp_socket::on_read(udp::socket* s, error_code const& e, std::size_t bytes_ if (m_abort) return; + maybe_realloc_buffers(); + #if defined TORRENT_ASIO_DEBUGGING add_outstanding_async("udp_socket::on_read"); #endif #if TORRENT_USE_IPV6 if (s == &m_ipv4_sock) #endif - s->async_receive_from(asio::buffer(m_v4_buf, sizeof(m_v4_buf)) + s->async_receive_from(asio::buffer(m_v4_buf, m_v4_buf_size) , m_v4_ep, boost::bind(&udp_socket::on_read, this, s, _1, _2)); #if TORRENT_USE_IPV6 else - s->async_receive_from(asio::buffer(m_v6_buf, sizeof(m_v6_buf)) + s->async_receive_from(asio::buffer(m_v6_buf, m_v6_buf_size) , m_v6_ep, boost::bind(&udp_socket::on_read, this, s, _1, _2)); #endif @@ -277,10 +308,12 @@ void udp_socket::on_read(udp::socket* s, error_code const& e, std::size_t bytes_ if (m_abort) return; + maybe_realloc_buffers(); + #if defined TORRENT_ASIO_DEBUGGING add_outstanding_async("udp_socket::on_read"); #endif - s->async_receive_from(asio::buffer(m_v4_buf, sizeof(m_v4_buf)) + s->async_receive_from(asio::buffer(m_v4_buf, m_v4_buf_size) , m_v4_ep, boost::bind(&udp_socket::on_read, this, s, _1, _2)); } #if TORRENT_USE_IPV6 @@ -307,10 +340,12 @@ void udp_socket::on_read(udp::socket* s, error_code const& e, std::size_t bytes_ if (m_abort) return; + maybe_realloc_buffers(); + #if defined TORRENT_ASIO_DEBUGGING add_outstanding_async("udp_socket::on_read"); #endif - s->async_receive_from(asio::buffer(m_v6_buf, sizeof(m_v6_buf)) + s->async_receive_from(asio::buffer(m_v6_buf, m_v6_buf_size) , m_v6_ep, boost::bind(&udp_socket::on_read, this, s, _1, _2)); } #endif @@ -421,6 +456,10 @@ void udp_socket::unwrap(error_code const& e, char const* buf, int size) m_callback(e, sender, p, size - (p - buf)); } +#ifndef BOOST_ASIO_ENABLE_CANCELIO +#error BOOST_ASIO_ENABLE_CANCELIO needs to be defined when building libtorrent to enable cancel() in asio on windows +#endif + void udp_socket::close() { TORRENT_ASSERT(is_single_thread()); @@ -461,6 +500,18 @@ void udp_socket::close() } } +void udp_socket::set_buf_size(int s) +{ + if (s > m_v4_buf_size) + { + m_v4_buf_size = s; +#if TORRENT_USE_IPV6 + m_v6_buf_size = s; +#endif + m_reallocate_buffers = true; + } +} + void udp_socket::bind(udp::endpoint const& ep, error_code& ec) { CHECK_MAGIC; @@ -474,6 +525,8 @@ void udp_socket::bind(udp::endpoint const& ep, error_code& ec) if (m_ipv6_sock.is_open()) m_ipv6_sock.close(ec); #endif + maybe_realloc_buffers(); + if (ep.address().is_v4()) { m_ipv4_sock.open(udp::v4(), ec); @@ -483,7 +536,7 @@ void udp_socket::bind(udp::endpoint const& ep, error_code& ec) #if defined TORRENT_ASIO_DEBUGGING add_outstanding_async("udp_socket::on_read"); #endif - m_ipv4_sock.async_receive_from(asio::buffer(m_v4_buf, sizeof(m_v4_buf)) + m_ipv4_sock.async_receive_from(asio::buffer(m_v4_buf, m_v4_buf_size) , m_v4_ep, boost::bind(&udp_socket::on_read, this, &m_ipv4_sock, _1, _2)); ++m_outstanding; } @@ -497,7 +550,7 @@ void udp_socket::bind(udp::endpoint const& ep, error_code& ec) #if defined TORRENT_ASIO_DEBUGGING add_outstanding_async("udp_socket::on_read"); #endif - m_ipv6_sock.async_receive_from(asio::buffer(m_v6_buf, sizeof(m_v6_buf)) + m_ipv6_sock.async_receive_from(asio::buffer(m_v6_buf, m_v6_buf_size) , m_v6_ep, boost::bind(&udp_socket::on_read, this, &m_ipv6_sock, _1, _2)); ++m_outstanding; } @@ -523,6 +576,8 @@ void udp_socket::bind(int port) if (m_ipv6_sock.is_open()) m_ipv6_sock.close(ec); #endif + maybe_realloc_buffers(); + m_ipv4_sock.open(udp::v4(), ec); if (!ec) { @@ -530,7 +585,7 @@ void udp_socket::bind(int port) add_outstanding_async("udp_socket::on_read"); #endif m_ipv4_sock.bind(udp::endpoint(address_v4::any(), port), ec); - m_ipv4_sock.async_receive_from(asio::buffer(m_v4_buf, sizeof(m_v4_buf)) + m_ipv4_sock.async_receive_from(asio::buffer(m_v4_buf, m_v4_buf_size) , m_v4_ep, boost::bind(&udp_socket::on_read, this, &m_ipv4_sock, _1, _2)); ++m_outstanding; #ifdef TORRENT_DEBUG @@ -546,7 +601,7 @@ void udp_socket::bind(int port) #endif m_ipv6_sock.set_option(v6only(true), ec); m_ipv6_sock.bind(udp::endpoint(address_v6::any(), port), ec); - m_ipv6_sock.async_receive_from(asio::buffer(m_v6_buf, sizeof(m_v6_buf)) + m_ipv6_sock.async_receive_from(asio::buffer(m_v6_buf, m_v6_buf_size) , m_v6_ep, boost::bind(&udp_socket::on_read, this, &m_ipv6_sock, _1, _2)); ++m_outstanding; #ifdef TORRENT_DEBUG diff --git a/src/ut_metadata.cpp b/src/ut_metadata.cpp index dc2a3057f..93ca997f0 100644 --- a/src/ut_metadata.cpp +++ b/src/ut_metadata.cpp @@ -210,6 +210,8 @@ namespace libtorrent { namespace , m_tp(tp) {} + virtual char const* type() const { return "ut_metadata"; } + // can add entries to the extension handshake virtual void add_handshake(entry& h) { diff --git a/src/ut_pex.cpp b/src/ut_pex.cpp index e1cfb4257..2dff2e94f 100644 --- a/src/ut_pex.cpp +++ b/src/ut_pex.cpp @@ -145,10 +145,20 @@ namespace libtorrent { namespace // no supported flags to set yet // 0x01 - peer supports encryption // 0x02 - peer is a seed + // 0x04 - supports uTP. This is only a positive flags + // passing 0 doesn't mean the peer doesn't + // support uTP + // 0x08 - supports holepunching protocol. If this + // flag is received from a peer, it can be + // used as a rendezvous point in case direct + // connections to the peer fail int flags = p->is_seed() ? 2 : 0; #ifndef TORRENT_DISABLE_ENCRYPTION flags |= p->supports_encryption() ? 1 : 0; #endif + flags |= p->get_socket()->get() ? 4 : 0; + flags |= p->supports_holepunch() ? 8 : 0; + // i->first was added since the last time if (remote.address().is_v4()) { @@ -205,11 +215,14 @@ namespace libtorrent { namespace : m_torrent(t) , m_pc(pc) , m_tp(tp) + , m_last_pex(min_time()) , m_1_minute(55) , m_message_index(0) , m_first_time(true) {} + virtual char const* type() const { return "ut_pex"; } + virtual void add_handshake(entry& h) { entry& messages = h["m"]; @@ -239,9 +252,20 @@ namespace libtorrent { namespace m_pc.disconnect(errors::pex_message_too_large, 2); return true; } + + ptime now = time_now(); + if (now - m_last_pex < seconds(10)) + { + // this client appears to be trying to flood us + // with pex messages. Don't allow that. + m_pc.disconnect(errors::too_frequent_pex); + return true; + } if (body.left() < length) return true; + m_last_pex = now; + lazy_entry pex_msg; error_code ec; int ret = lazy_bdecode(body.begin, body.end, pex_msg, ec); @@ -251,13 +275,34 @@ namespace libtorrent { namespace return true; } - lazy_entry const* p = pex_msg.dict_find("added"); - lazy_entry const* pf = pex_msg.dict_find("added.f"); + lazy_entry const* p = pex_msg.dict_find_string("dropped"); +#ifdef TORRENT_VERBOSE_LOGGING + (*m_pc.m_logger) << time_now_string() << " <== PEX [" + " dropped:" << (p?p->string_length():0); +#endif + if (p) + { + int num_peers = p->string_length() / 6; + char const* in = p->string_ptr(); + + for (int i = 0; i < num_peers; ++i) + { + tcp::endpoint adr = detail::read_v4_endpoint(in); + peers4_t::value_type v(adr.address().to_v4().to_bytes(), adr.port()); + peers4_t::iterator j = std::lower_bound(m_peers.begin(), m_peers.end(), v); + if (j != m_peers.end() && *j == v) m_peers.erase(j); + } + } + + p = pex_msg.dict_find_string("added"); + lazy_entry const* pf = pex_msg.dict_find_string("added.f"); + +#ifdef TORRENT_VERBOSE_LOGGING + (*m_pc.m_logger) << " added:" << (p?p->string_length():0) << " ]\n"; +#endif if (p != 0 && pf != 0 - && p->type() == lazy_entry::string_t - && pf->type() == lazy_entry::string_t && pf->string_length() == p->string_length() / 6) { int num_peers = pf->string_length(); @@ -270,14 +315,39 @@ namespace libtorrent { namespace { tcp::endpoint adr = detail::read_v4_endpoint(in); char flags = *fin++; + + if (m_peers.size() >= m_torrent.settings().max_pex_peers) break; + // ignore local addresses unless the peer is local to us if (is_local(adr.address()) && !is_local(m_pc.remote().address())) continue; + + peers4_t::value_type v(adr.address().to_v4().to_bytes(), adr.port()); + peers4_t::iterator j = std::lower_bound(m_peers.begin(), m_peers.end(), v); + // do we already know about this peer? + if (j != m_peers.end() && *j == v) continue; + m_peers.insert(j, v); p.add_peer(adr, pid, peer_info::pex, flags); } } #if TORRENT_USE_IPV6 - lazy_entry const* p6 = pex_msg.dict_find("added6"); + + lazy_entry const* p6 = pex_msg.dict_find("dropped6"); + if (p6 != 0 && p6->type() == lazy_entry::string_t) + { + int num_peers = p6->string_length() / 18; + char const* in = p6->string_ptr(); + + for (int i = 0; i < num_peers; ++i) + { + tcp::endpoint adr = detail::read_v6_endpoint(in); + peers6_t::value_type v(adr.address().to_v6().to_bytes(), adr.port()); + peers6_t::iterator j = std::lower_bound(m_peers6.begin(), m_peers6.end(), v); + if (j != m_peers6.end() && *j == v) m_peers6.erase(j); + } + } + + p6 = pex_msg.dict_find("added6"); lazy_entry const* p6f = pex_msg.dict_find("added6.f"); if (p6 != 0 && p6f != 0 @@ -297,6 +367,13 @@ namespace libtorrent { namespace char flags = *fin++; // ignore local addresses unless the peer is local to us if (is_local(adr.address()) && !is_local(m_pc.remote().address())) continue; + if (m_peers6.size() >= m_torrent.settings().max_pex_peers) break; + + peers6_t::value_type v(adr.address().to_v6().to_bytes(), adr.port()); + peers6_t::iterator j = std::lower_bound(m_peers6.begin(), m_peers6.end(), v); + // do we already know about this peer? + if (j != m_peers6.end() && *j == v) continue; + m_peers6.insert(j, v); p.add_peer(adr, pid, peer_info::pex, flags); } } @@ -323,8 +400,6 @@ namespace libtorrent { namespace m_1_minute = 0; } - private: - void send_ut_peer_diff() { // if there's no change in out peer set, don't send anything @@ -419,6 +494,23 @@ namespace libtorrent { namespace torrent& m_torrent; peer_connection& m_pc; ut_pex_plugin& m_tp; + // stores all peers this this peer is connected to. These lists + // are updated with each pex message and are limited in size + // to protect against malicious clients. These lists are also + // used for looking up which peer a peer that supports holepunch + // came from. + // these are vectors to save memory and keep the items close + // together for performance. Inserting and removing is relatively + // cheap since the lists' size is limited + typedef std::vector > peers4_t; + peers4_t m_peers; +#if TORRENT_USE_IPV6 + typedef std::vector > peers6_t; + peers6_t m_peers6; +#endif + // the last pex message we received + ptime m_last_pex; + int m_1_minute; int m_message_index; @@ -438,11 +530,10 @@ namespace libtorrent { namespace return boost::shared_ptr(new ut_pex_peer_plugin(m_torrent , *pc, *this)); } -}} +} } namespace libtorrent { - boost::shared_ptr create_ut_pex_plugin(torrent* t, void*) { if (t->torrent_file().priv() || (t->torrent_file().is_i2p() @@ -453,6 +544,28 @@ namespace libtorrent return boost::shared_ptr(new ut_pex_plugin(*t)); } + bool was_introduced_by(peer_plugin const* pp, tcp::endpoint const& ep) + { + ut_pex_peer_plugin* p = (ut_pex_peer_plugin*)pp; +#if TORRENT_USE_IPV6 + if (ep.address().is_v4()) + { +#endif + ut_pex_peer_plugin::peers4_t::value_type v(ep.address().to_v4().to_bytes(), ep.port()); + ut_pex_peer_plugin::peers4_t::const_iterator i + = std::lower_bound(p->m_peers.begin(), p->m_peers.end(), v); + return i != p->m_peers.end() && *i == v; +#if TORRENT_USE_IPV6 + } + else + { + ut_pex_peer_plugin::peers6_t::value_type v(ep.address().to_v6().to_bytes(), ep.port()); + ut_pex_peer_plugin::peers6_t::iterator i + = std::lower_bound(p->m_peers6.begin(), p->m_peers6.end(), v); + return i != p->m_peers6.end() && *i == v; + } +#endif + } } diff --git a/src/utp_socket_manager.cpp b/src/utp_socket_manager.cpp new file mode 100644 index 000000000..2f512f66b --- /dev/null +++ b/src/utp_socket_manager.cpp @@ -0,0 +1,324 @@ +/* + +Copyright (c) 2009, Arvid Norberg +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the distribution. + * Neither the name of the author nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "libtorrent/utp_stream.hpp" +#include "libtorrent/udp_socket.hpp" +#include "libtorrent/utp_socket_manager.hpp" +#include "libtorrent/instantiate_connection.hpp" +#include "libtorrent/socket_io.hpp" +#include "libtorrent/broadcast_socket.hpp" // for is_teredo + +// #define TORRENT_DEBUG_MTU 1135 + +namespace libtorrent +{ + + utp_socket_manager::utp_socket_manager(session_settings const& sett, udp_socket& s + , incoming_utp_callback_t cb) + : m_sock(s) + , m_cb(cb) + , m_last_socket(0) + , m_new_connection(-1) + , m_sett(sett) + , m_sock_buf_size(0) + {} + + utp_socket_manager::~utp_socket_manager() + { + for (socket_map_t::iterator i = m_utp_sockets.begin() + , end(m_utp_sockets.end()); i != end; ++i) + { + delete_utp_impl(i->second); + } + } + + void utp_socket_manager::get_status(utp_status& s) const + { + s.num_idle = 0; + s.num_syn_sent = 0; + s.num_connected = 0; + s.num_fin_sent = 0; + s.num_close_wait = 0; + + for (socket_map_t::const_iterator i = m_utp_sockets.begin() + , end(m_utp_sockets.end()); i != end; ++i) + { + int state = utp_socket_state(i->second); + switch (state) + { + case 0: ++s.num_idle; break; + case 1: ++s.num_syn_sent; break; + case 2: ++s.num_connected; break; + case 3: ++s.num_fin_sent; break; + case 4: ++s.num_close_wait; break; + case 5: ++s.num_close_wait; break; + } + } + } + + void utp_socket_manager::tick(ptime now) + { + for (socket_map_t::iterator i = m_utp_sockets.begin() + , end(m_utp_sockets.end()); i != end;) + { + if (should_delete(i->second)) + { + delete_utp_impl(i->second); + if (m_last_socket == i->second) m_last_socket = 0; + m_utp_sockets.erase(i++); + continue; + } + tick_utp_impl(i->second, now); + ++i; + } + } + + void utp_socket_manager::mtu_for_dest(address const& addr, int& link_mtu, int& utp_mtu) + { + if (time_now() - m_last_route_update > seconds(60)) + { + m_last_route_update = time_now(); + error_code ec; + m_routes = enum_routes(m_sock.get_io_service(), ec); + } + + int mtu = 0; + if (!m_routes.empty()) + { + for (std::vector::iterator i = m_routes.begin() + , end(m_routes.end()); i != end; ++i) + { + if (!match_addr_mask(addr, i->destination, i->netmask)) continue; + + // assume that we'll actually use the route with the largest + // MTU (seems like a reasonable assumption). + // this could however be improved by using the route metrics + // and the prefix length of the netmask to order the matches + if (mtu < i->mtu) mtu = i->mtu; + } + } + + if (mtu == 0) + { + if (is_teredo(addr)) mtu = TORRENT_TEREDO_MTU; + else mtu = TORRENT_ETHERNET_MTU; + } + + // clamp the MTU within reasonable bounds + if (mtu < TORRENT_INET_MIN_MTU) mtu = TORRENT_INET_MIN_MTU; + else if (mtu > TORRENT_INET_MAX_MTU) mtu = TORRENT_INET_MAX_MTU; + + link_mtu = mtu; + + mtu -= TORRENT_UDP_HEADER; + + if (m_sock.get_proxy_settings().type == proxy_settings::socks5 + || m_sock.get_proxy_settings().type == proxy_settings::socks5_pw) + { + // this is for the IP layer + address proxy_addr = m_sock.proxy_addr().address(); + if (proxy_addr.is_v4()) mtu -= TORRENT_IPV4_HEADER; + else mtu -= TORRENT_IPV6_HEADER; + + // this is for the SOCKS layer + mtu -= TORRENT_SOCKS5_HEADER; + + // the address field in the SOCKS header + if (addr.is_v4()) mtu -= 4; + else mtu -= 16; + + } + else + { + if (addr.is_v4()) mtu -= TORRENT_IPV4_HEADER; + else mtu -= TORRENT_IPV6_HEADER; + } + + utp_mtu = mtu; + } + + void utp_socket_manager::send_packet(udp::endpoint const& ep, char const* p + , int len, error_code& ec, int flags) + { + if (!m_sock.is_open()) + { + ec = asio::error::operation_aborted; + return; + } + +#ifdef TORRENT_DEBUG_MTU + // drop packets that exceed the debug MTU + if ((flags & dont_fragment) && len > TORRENT_DEBUG_MTU) return; +#endif + +#ifdef TORRENT_HAS_DONT_FRAGMENT + error_code tmp; + if (flags & utp_socket_manager::dont_fragment) + m_sock.set_option(libtorrent::dont_fragment(true), tmp); +#endif + m_sock.send(ep, p, len, ec); +#ifdef TORRENT_HAS_DONT_FRAGMENT + if (flags & utp_socket_manager::dont_fragment) + m_sock.set_option(libtorrent::dont_fragment(false), tmp); +#endif + } + + tcp::endpoint utp_socket_manager::local_endpoint(error_code& ec) const + { + return m_sock.local_endpoint(ec); + } + + bool utp_socket_manager::incoming_packet(char const* p, int size, udp::endpoint const& ep) + { +// UTP_LOGV("incoming packet size:%d\n", size); + + if (size < sizeof(utp_header)) return false; + + utp_header const* ph = (utp_header*)p; + +// UTP_LOGV("incoming packet version:%d\n", int(ph->get_version())); + + if (ph->get_version() != 1) return false; + + const ptime receive_time = time_now_hires(); + + // parse out connection ID and look for existing + // connections. If found, forward to the utp_stream. + boost::uint16_t id = ph->connection_id; + + // first test to see if it's the same socket as last time + // in most cases it is + if (m_last_socket + && utp_match(m_last_socket, ep, id)) + { + return utp_incoming_packet(m_last_socket, p, size, ep, receive_time); + } + + socket_map_t::iterator i = m_utp_sockets.find(id); + + std::pair r = + m_utp_sockets.equal_range(id); + + for (; r.first != r.second; ++r.first) + { + if (!utp_match(r.first->second, ep, id)) continue; + bool ret = utp_incoming_packet(r.first->second, p, size, ep, receive_time); + if (ret) m_last_socket = r.first->second; + return ret; + } + +// UTP_LOGV("incoming packet id:%d source:%s\n", id, print_endpoint(ep).c_str()); + + if (!m_sett.enable_incoming_utp) + return false; + + // if not found, see if it's a SYN packet, if it is, + // create a new utp_stream + if (ph->get_type() == ST_SYN) + { + // create the new socket with this ID + m_new_connection = id; + +// UTP_LOGV("not found, new connection id:%d\n", m_new_connection); + + boost::shared_ptr c(new (std::nothrow) socket_type(m_sock.get_io_service())); + if (!c) return false; + instantiate_connection(m_sock.get_io_service(), proxy_settings(), *c, 0, this); + utp_stream* str = c->get(); + TORRENT_ASSERT(str); + int link_mtu, utp_mtu; + mtu_for_dest(ep.address(), link_mtu, utp_mtu); + utp_init_mtu(str->get_impl(), link_mtu, utp_mtu); + bool ret = utp_incoming_packet(str->get_impl(), p, size, ep, receive_time); + if (!ret) return false; + m_cb(c); + // the connection most likely changed its connection ID here + // we need to move it to the correct ID + return true; + } + + // #error send reset + + return false; + } + + void utp_socket_manager::remove_socket(boost::uint16_t id) + { + socket_map_t::iterator i = m_utp_sockets.find(id); + if (i == m_utp_sockets.end()) return; + delete_utp_impl(i->second); + if (m_last_socket == i->second) m_last_socket = 0; + m_utp_sockets.erase(i); + } + + void utp_socket_manager::set_sock_buf(int size) + { + if (size < m_sock_buf_size) return; + m_sock.set_buf_size(size); + error_code ec; + // add more socket buffer storage on the lower level socket + // to avoid dropping packets because of a full receive buffer + // while processing a packet + + // only update the buffer size if it's bigger than + // what we already have + datagram_socket::receive_buffer_size recv_buf_size_opt; + m_sock.get_option(recv_buf_size_opt, ec); + if (recv_buf_size_opt.value() < size * 10) + { + m_sock.set_option(datagram_socket::receive_buffer_size(size * 10), ec); + m_sock.set_option(datagram_socket::send_buffer_size(size * 3), ec); + } + m_sock_buf_size = size; + } + + utp_socket_impl* utp_socket_manager::new_utp_socket(utp_stream* str) + { + boost::uint16_t send_id = 0; + boost::uint16_t recv_id = 0; + if (m_new_connection != -1) + { + send_id = m_new_connection; + recv_id = m_new_connection + 1; + m_new_connection = -1; + } + else + { + send_id = rand(); + recv_id = send_id - 1; + } + utp_socket_impl* impl = construct_utp_impl(recv_id, send_id, str, this); + m_utp_sockets.insert(std::make_pair(recv_id, impl)); + return impl; + } +} + diff --git a/src/utp_stream.cpp b/src/utp_stream.cpp new file mode 100644 index 000000000..9fbd63bc7 --- /dev/null +++ b/src/utp_stream.cpp @@ -0,0 +1,2900 @@ +/* + +Copyright (c) 2009, Arvid Norberg +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the distribution. + * Neither the name of the author nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "libtorrent/utp_stream.hpp" +#include "libtorrent/sliding_average.hpp" +#include "libtorrent/utp_socket_manager.hpp" +#include "libtorrent/alloca.hpp" +#include "libtorrent/timestamp_history.hpp" +#include "libtorrent/error.hpp" +#include + +#define TORRENT_UTP_LOG 0 +#define TORRENT_VERBOSE_UTP_LOG 0 +#define TORRENT_UT_SEQ 1 + +#if TORRENT_UTP_LOG +#include +#include "libtorrent/socket_io.hpp" +#endif + +namespace libtorrent { + +#if TORRENT_UTP_LOG + +char const* packet_type_names[] = { "ST_DATA", "ST_FIN", "ST_STATE", "ST_RESET", "ST_SYN" }; +char const* socket_state_names[] = { "NONE", "SYN_SENT", "CONNECTED", "FIN_SENT", "ERROR", "DELETE" }; + +static struct utp_logger +{ + FILE* utp_log_file; + mutex utp_log_mutex; + + utp_logger() : utp_log_file(0) + { + utp_log_file = fopen("utp.log", "w+"); + } + ~utp_logger() + { + if (utp_log_file) fclose(utp_log_file); + } +} log_file_holder; + +void utp_log(char const* fmt, ...) +{ + mutex::scoped_lock lock(log_file_holder.utp_log_mutex); + static ptime start = time_now_hires(); + fprintf(log_file_holder.utp_log_file, "[%012"PRId64"] ", total_microseconds(time_now_hires() - start)); + va_list l; + va_start(l, fmt); + vfprintf(log_file_holder.utp_log_file, fmt, l); + va_end(l); +} + +#define UTP_LOG utp_log +#if TORRENT_VERBOSE_UTP_LOG +#define UTP_LOGV utp_log +#else +#define UTP_LOGV if (false) printf +#endif + +#else + +#define UTP_LOG if (false) printf +#define UTP_LOGV if (false) printf + +#endif + +enum +{ + ACK_MASK = 0xffff, + + // the number of packets that'll fit in the reorder buffer + max_packets_reorder = 512, + + // if a packet receives more than this number of + // duplicate acks, we'll trigger a fast re-send + dup_ack_limit = 3, + + // the max number of packets to fast-resend per + // selective ack message + sack_resend_limit = 3, +}; + +// compare if lhs is less than rhs, taking wrapping +// into account. if lhs is close to UINT_MAX and rhs +// is close to 0, lhs is assumed to have wrapped and +// considered smaller +TORRENT_EXPORT bool compare_less_wrap(boost::uint32_t lhs, boost::uint32_t rhs, boost::uint32_t mask) +{ + // distance walking from lhs to rhs, downwards + boost::uint32_t dist_down = (lhs - rhs) & mask; + // distance walking from lhs to rhs, upwards + boost::uint32_t dist_up = (rhs - lhs) & mask; + + // if the distance walking up is shorter, lhs + // is less than rhs. If the distance walking down + // is shorter, then rhs is less than lhs + return dist_up < dist_down; +} + +// used for out-of-order incoming packets +// as well as sent packets that are waiting to be ACKed +struct packet +{ + // the last time this packet was sent + ptime send_time; + + // the size of the buffer 'buf' pointst to + boost::uint16_t size; + + // this is the offset to the payload inside the buffer + // this is also used as a cursor to describe where the + // next payload that hasn't been consumed yet starts + boost::uint16_t header_size; + + // the number of times this packet has been sent + boost::uint8_t num_transmissions:7; + + // true if we need to send this packet again. All + // outstanding packets are marked as needing to be + // resent on timeouts + bool need_resend:1; + + // this is set to true for packets that were + // sent with the DF bit set (Don't Fragment) + bool mtu_probe:1; + + // the actual packet buffer + char buf[]; +}; + +// since the uTP socket state may be needed after the +// utp_stream is closed, it's kept in a separate struct +// whose lifetime is not tied to the lifetime of utp_stream + +// the utp socket is closely modelled after the asio async +// operations and handler model. For writing to the socket, +// the client provides a list of buffers (for gather/writev +// style of I/O) and whenever the socket can write another +// packet to the stream, it picks up data from these buffers. +// When all of the data has been written, or enough time has +// passed since we first started writing, the write handler +// is called and the write buffer is reset. This means that +// we're not writing anything at all while waiting for the +// client to re-issue a write request. + +// reading is a little bit more complicated, since we must +// be able to receive data even when the user doesn't have +// an outstanding read operation on the socket. When the user +// does however, we want to receive data directly into the +// user's buffer instead of first copying it into our receive +// buffer. This is why the receive case is more complicated. +// There are two receive buffers. One provided by the user, +// which when present is always used. The other one is used +// when the user doesn't have an outstanding read request, +// and hence hasn't provided any buffer space to receive into. + +// the user provided read buffer is called "m_read_buffer" and +// its size is "m_read_buffer_size". The buffer we spill over +// into when the user provided buffer is full or when there +// is none, is "m_receive_buffer" and "m_receive_buffer_size" +// respectively. + +// in order to know when to trigger the read and write handlers +// there are two counters, m_read and m_written, which count +// the number of bytes we've stuffed into the user provided +// read buffer or written to the stream from the write buffer. +// These are used to trigger the handlers if we're written a +// large number of bytes. It's also triggered if we're filled +// the whole read buffer, or written the entire write buffer. +// The last way the handlers can be triggered is if we're read +// or written some, and enough time has elapsed since then. + +// when we receive data into m_receive_buffer (i.e. the buffer +// used when there's no user provided one) is stored as a +// number of heap allocated packets. This is just because it's +// simple to reuse the data structured and it provides all the +// functionality needed for this buffer. + +struct utp_socket_impl +{ + utp_socket_impl(boost::uint16_t recv_id, boost::uint16_t send_id + , void* userdata, utp_socket_manager* sm) + : m_sm(sm) + , m_userdata(userdata) + , m_read_handler(0) + , m_write_handler(0) + , m_connect_handler(0) + , m_remote_address() + , m_read_timeout() + , m_write_timeout() + , m_timeout(time_now_hires() + milliseconds(m_sm->connect_timeout())) + , m_last_cwnd_hit(min_time()) + , m_ack_timer(time_now() + minutes(10)) + , m_last_history_step(time_now_hires()) + , m_cwnd(TORRENT_ETHERNET_MTU << 16) + , m_buffered_incoming_bytes(0) + , m_reply_micro(0) + , m_adv_wnd(TORRENT_ETHERNET_MTU) + , m_bytes_in_flight(0) + , m_read(0) + , m_write_buffer_size(0) + , m_written(0) + , m_receive_buffer_size(0) + , m_read_buffer_size(0) + , m_in_buf_size(100 * 1024 * 1024) + , m_in_packets(0) + , m_out_packets(0) + , m_port(0) + , m_send_id(send_id) + , m_recv_id(recv_id) + , m_ack_nr(0) + , m_seq_nr(0) + , m_acked_seq_nr(0) + , m_fast_resend_seq_nr(0) + , m_eof_seq_nr(0) + , m_mtu(TORRENT_ETHERNET_MTU - TORRENT_IPV4_HEADER - TORRENT_UDP_HEADER - 8 - 24 - 36) + , m_mtu_floor(TORRENT_INET_MIN_MTU - TORRENT_IPV4_HEADER - TORRENT_UDP_HEADER) + , m_mtu_ceiling(TORRENT_ETHERNET_MTU - TORRENT_IPV4_HEADER - TORRENT_UDP_HEADER) + , m_mtu_seq(0) + , m_duplicate_acks(0) + , m_num_timeouts(0) + , m_delay_sample_idx(0) + , m_state(UTP_STATE_NONE) + , m_eof(false) + , m_attached(true) + , m_nagle(true) + { + for (int i = 0; i != num_delay_hist; ++i) + m_delay_sample_hist[i] = UINT_MAX; + } + + ~utp_socket_impl(); + + void init(udp::endpoint const& ep, boost::uint16_t id, void* userdata + , utp_socket_manager* sm) + { + m_remote_address = ep.address(); + m_port = ep.port(); + m_send_id = id + 1; + m_recv_id = id; + m_userdata = userdata; + m_sm = sm; + } + + void tick(ptime const& now); + void init_mtu(int link_mtu, int utp_mtu); + bool incoming_packet(char const* buf, int size + , udp::endpoint const& ep, ptime receive_time); + bool should_delete() const; + tcp::endpoint remote_endpoint(error_code& ec) const + { + if (m_state == UTP_STATE_NONE) + ec = asio::error::not_connected; + else + TORRENT_ASSERT(m_remote_address != address_v4::any()); + return tcp::endpoint(m_remote_address, m_port); + } + std::size_t available() const; + void destroy(); + void detach(); + void send_syn(); + void send_fin(); + + bool send_pkt(bool ack); + bool resend_packet(packet* p, bool fast_resend = false); + void send_reset(utp_header* ph); + void parse_sack(boost::uint16_t packet_ack, char const* ptr, int size, int* acked_bytes + , ptime const now, boost::uint32_t& min_rtt); + void write_payload(char* ptr, int size); + void ack_packet(packet* p, ptime const& receive_time + , boost::uint32_t& min_rtt, boost::uint16_t seq_nr); + void write_sack(char* buf, int size) const; + void incoming(char const* buf, int size, packet* p, ptime now); + void do_ledbat(int acked_bytes, int delay, int in_flight, ptime const now); + int packet_timeout() const; + bool test_socket_state(); + void maybe_trigger_receive_callback(ptime now); + void maybe_trigger_send_callback(ptime now); + bool cancel_handlers(error_code const& ec, bool kill); + bool consume_incoming_data( + utp_header const* ph, char const* ptr, int payload_size, ptime now); + void update_mtu_limits(); + void experienced_loss(int seq_nr); + + void check_receive_buffers() const; + + utp_socket_manager* m_sm; + + // userdata pointer passed along + // with any callback. This is initialized to 0 + // then set to point to the utp_stream when + // hooked up, and then reset to 0 once the utp_stream + // detaches. This is used to know whether or not + // the socket impl is still attached to a utp_stream + // object. When it isn't, we'll never be able to + // signal anything back to the client, and in case + // of errors, we just have to delete ourselves + // i.e. transition to the UTP_STATE_DELETED state + void* m_userdata; + + // This is a platform-independent replacement + // for the regular iovec type in posix. Since + // it's not used in any system call, we might as + // well define our own type instead of wrapping + // the system's type. + struct iovec_t + { + iovec_t(void* b, size_t l): buf(b), len(l) {} + void* buf; + size_t len; + }; + + // if there's currently an async read or write + // operation in progress, these buffers are initialized + // and used, otherwise any bytes received are stuck in + // m_receive_buffer until another read is made + // as we flush from the write buffer, individual iovecs + // are updated to only refer to unflushed portions of the + // buffers. Buffers that empty are erased from the vector. + std::vector m_write_buffer; + + // the user provided read buffer. If this has a size greater + // than 0, we'll always prefer using it over putting received + // data in the m_receive_buffer. As data is stored in the + // read buffer, the iovec_t elements are adjusted to only + // refer to the unwritten portions of the buffers, and the + // ones that fill up are erased from the vector + std::vector m_read_buffer; + + // packets we've received without a read operation + // active. Store them here until the client triggers + // an async_read_some + std::vector m_receive_buffer; + + // this is the error on this socket. If m_state is + // set to UTP_STATE_ERROR_WAIT, this error should be + // forwarded to the client as soon as we have a new + // async operation initiated + error_code m_error; + + // these are the callbacks made into the utp_stream object + // on read/write/connect events + utp_stream::handler_t m_read_handler; + utp_stream::handler_t m_write_handler; + utp_stream::connect_handler_t m_connect_handler; + + // the address of the remote endpoint + address m_remote_address; + + // the send and receive buffers + // maps packet sequence numbers + packet_buffer m_inbuf; + packet_buffer m_outbuf; + + // timers when we should trigger the read and + // write callbacks (unless the buffers fill up + // before) + ptime m_read_timeout; + ptime m_write_timeout; + + // the time when the last packet we sent times out. Including re-sends. + // if we ever end up not having sent anything in one second ( + // or one mean rtt + 2 average deviations, whichever is greater) + // we set our cwnd to 1 MSS. This condition can happen either because + // a packet has timed out and needs to be resent or because our + // cwnd is set to less than one MSS during congestion control. + // it can also happen if the other end sends an advertized window + // size less than one MSS. + ptime m_timeout; + + // the last time we wanted to send more data, but couldn't because + // it would bring the number of outstanding bytes above the cwnd. + // this is used to restrict increasing the cwnd size when we're + // not sending fast enough to need it bigger + ptime m_last_cwnd_hit; + + // the next time we need to send an ACK the latest + // updated every time we send an ACK and every time we + // put off sending an ACK for a received packet + ptime m_ack_timer; + + // the last time we stepped the timestamp history + ptime m_last_history_step; + + // the max number of bytes in-flight. This is a fixed point + // value, to get the true number of bytes, shift right 16 bits + // the value is always >= 0, but the calculations performed on + // it in do_ledbat() is signed. + boost::int64_t m_cwnd; + + timestamp_history m_delay_hist; + timestamp_history m_their_delay_hist; + + // the number of bytes we have buffered in m_inbuf + boost::int32_t m_buffered_incoming_bytes; + + // the timestamp diff in the last packet received + // this is what we'll send back + boost::uint32_t m_reply_micro; + + // this is the advertized receive window the other end sent + // we'll never have more un-acked bytes in flight + // if this ever gets set to zero, we'll try one packet every + // second until the window opens up again + boost::uint32_t m_adv_wnd; + + // the number of un-acked bytes we have sent + boost::int32_t m_bytes_in_flight; + + // the number of bytes read into the user provided + // buffer. If this grows too big, we'll trigger the + // read handler. + boost::int32_t m_read; + + // the sum of the lengths of all iovec in m_write_buffer + boost::int32_t m_write_buffer_size; + + // the number of bytes already written to packets + // from m_write_buffer + boost::int32_t m_written; + + // the sum of all packets stored in m_receive_buffer + boost::int32_t m_receive_buffer_size; + + // the sum of all buffers in m_read_buffer + boost::int32_t m_read_buffer_size; + + // max number of bytes to allocate for receive buffer + boost::int32_t m_in_buf_size; + + // this holds the 3 last delay measurements, + // these are the actual corrected delay measurements. + // the lowest of the 3 last ones is used in the congestion + // controller. This is to not completely close the cwnd + // by a single outlier. + enum { num_delay_hist = 3 }; + boost::uint32_t m_delay_sample_hist[num_delay_hist]; + + // counters + boost::uint32_t m_in_packets; + boost::uint32_t m_out_packets; + + // average RTT + sliding_average<16> m_rtt; + + // port of destination endpoint + boost::uint16_t m_port; + + boost::uint16_t m_send_id; + boost::uint16_t m_recv_id; + + // this is the ack we're sending back. We have + // received all packets up to this sequence number + boost::uint16_t m_ack_nr; + + // the sequence number of the next packet + // we'll send + boost::uint16_t m_seq_nr; + + // this is the sequence number of the packet that + // everything has been ACKed up to. Everything we've + // sent up to this point has been received by the other + // end. + boost::uint16_t m_acked_seq_nr; + + // each packet gets one chance of "fast resend". i.e. + // if we have multiple duplicate acks, we may send a + // packet immediately, if m_fast_resend_seq_nr is set + // to that packet's sequence number + boost::uint16_t m_fast_resend_seq_nr; + + // this is the sequence number of the FIN packet + // we've received. This sequence number is only + // valid if m_eof is true. We should not accept + // any packets beyond this sequence number from the + // other end + boost::uint16_t m_eof_seq_nr; + + // this is the lowest sequence number that, when lost, + // will cause the window size to be cut in half + boost::uint16_t m_loss_seq_nr; + + // the max number of bytes we can send in a packet + // including the header + boost::uint16_t m_mtu; + + // the floor is the largest packet that we have + // been able to get through without fragmentation + boost::uint16_t m_mtu_floor; + + // the ceiling is the largest packet that we might + // be able to get through without fragmentation. + // i.e. ceiling +1 is very likely to not get through + // or we have in fact experienced a drop or ICMP + // message indicating that it is + boost::uint16_t m_mtu_ceiling; + + // the sequence number of the probe in-flight + // this is 0 if there is no probe in flight + boost::uint16_t m_mtu_seq; + + // this is a counter of how many times the current m_acked_seq_nr + // has been ACKed. If it's ACKed more than 3 times, we assume the + // packet with the next sequence number has been lost, and we trigger + // a re-send. Ovbiously an ACK only counts as a duplicate as long as + // we have outstanding packets following it. + boost::uint8_t m_duplicate_acks; + + // the number of packet timeouts we've seen in a row + // this affects the packet timeout time + boost::uint8_t m_num_timeouts; + + enum state_t { + // not yet connected + UTP_STATE_NONE, + // sent a syn packet, not received any acks + UTP_STATE_SYN_SENT, + // syn-ack received and in normal operation + // of sending and receiving data + UTP_STATE_CONNECTED, + // fin sent, but all packets up to the fin packet + // have not yet been acked. We might still be waiting + // for a FIN from the other end + UTP_STATE_FIN_SENT, + + // ====== states beyond this point ===== + // === are considered closing states === + // === and will cause the socket to ==== + // ============ be deleted ============= + + // the socket has been gracefully disconnected + // and is waiting for the client to make a + // socket call so that we can communicate this + // fact and actually delete all the state, or + // there is an error on this socket and we're + // waiting to communicate this to the client in + // a callback. The error in either case is stored + // in m_error. If the socket has gracefully shut + // down, the error is error::eof. + UTP_STATE_ERROR_WAIT, + + // there are no more references to this socket + // and we can delete it + UTP_STATE_DELETE + }; + + // this is the cursor into m_delay_sample_hist + boost::uint8_t m_delay_sample_idx:2; + + // the state the socket is in + boost::uint8_t m_state:3; + + // this is set to true when we receive a fin + bool m_eof:1; + + // is this socket state attached to a user space socket? + bool m_attached:1; + + // this is true if nagle is enabled (which it is by default) + // TODO: support the option to turn it off + bool m_nagle:1; +}; + +#if defined TORRENT_VERBOSE_LOGGING || defined TORRENT_LOGGING || defined TORRENT_ERROR_LOGGING +int socket_impl_size() { return sizeof(utp_socket_impl); } +#endif + +utp_socket_impl* construct_utp_impl(boost::uint16_t recv_id + , boost::uint16_t send_id, void* userdata + , utp_socket_manager* sm) +{ + return new utp_socket_impl(recv_id, send_id, userdata, sm); +} + +void detach_utp_impl(utp_socket_impl* s) +{ + s->detach(); +} + +void delete_utp_impl(utp_socket_impl* s) +{ + delete s; +} + +bool should_delete(utp_socket_impl* s) +{ + return s->should_delete(); +} + +void tick_utp_impl(utp_socket_impl* s, ptime const& now) +{ + s->tick(now); +} + +void utp_init_mtu(utp_socket_impl* s, int link_mtu, int utp_mtu) +{ + s->init_mtu(link_mtu, utp_mtu); +} + +bool utp_incoming_packet(utp_socket_impl* s, char const* p + , int size, udp::endpoint const& ep, ptime receive_time) +{ + return s->incoming_packet(p, size, ep, receive_time); +} + +bool utp_match(utp_socket_impl* s, udp::endpoint const& ep, boost::uint16_t id) +{ + return s->m_remote_address == ep.address() + && s->m_port == ep.port() + && s->m_recv_id == id; +} + +udp::endpoint utp_remote_endpoint(utp_socket_impl* s) +{ + return udp::endpoint(s->m_remote_address, s->m_port); +} + +boost::uint16_t utp_receive_id(utp_socket_impl* s) +{ + return s->m_recv_id; +} + +void utp_socket_impl::update_mtu_limits() +{ + TORRENT_ASSERT(m_mtu_floor <= m_mtu_ceiling); + m_mtu = (m_mtu_floor + m_mtu_ceiling) / 2; + + // clear the mtu probe sequence number since + // it was either dropped or acked + m_mtu_seq = 0; + + if (m_mtu_ceiling - m_mtu_floor < 10) + { + // we have narrowed down the mtu within 10 + // bytes. That's good enough, start using + // floor as the packet size from now on. + // set the ceiling to the floor as well to + // disable more probes to be sent + // we'll never re-probe this connection + m_mtu = m_mtu_ceiling = m_mtu_floor; + } +} + +int utp_socket_state(utp_socket_impl const* s) +{ + return s->m_state; +} + +utp_stream::utp_stream(asio::io_service& io_service) + : m_io_service(io_service) + , m_impl(0) + , m_open(false) +{ +} + +utp_socket_impl* utp_stream::get_impl() +{ + return m_impl; +} + +void utp_stream::close() +{ + if (!m_impl) return; + m_impl->destroy(); +} + +std::size_t utp_stream::available() const +{ + return m_impl->available(); +} + +utp_stream::endpoint_type utp_stream::remote_endpoint(error_code& ec) const +{ + if (!m_impl) + { + ec = asio::error::not_connected; + return endpoint_type(); + } + return m_impl->remote_endpoint(ec); +} + +utp_stream::endpoint_type utp_stream::local_endpoint(error_code& ec) const +{ + if (m_impl == 0 || m_impl->m_sm == 0) + { + ec = asio::error::not_connected; + return endpoint_type(); + } + return m_impl->m_sm->local_endpoint(ec); +} + +utp_stream::~utp_stream() +{ + if (m_impl) + { + UTP_LOGV("%8p: utp_stream destructed\n", m_impl); + m_impl->destroy(); + detach_utp_impl(m_impl); + } + + m_impl = 0; +} + +void utp_stream::set_impl(utp_socket_impl* impl) +{ + TORRENT_ASSERT(m_impl == 0); + TORRENT_ASSERT(!m_open); + m_impl = impl; + m_open = true; +} + +int utp_stream::read_buffer_size() const +{ + TORRENT_ASSERT(m_impl); + return m_impl->m_receive_buffer_size; +} + +void utp_stream::on_read(void* self, size_t bytes_transferred, error_code const& ec, bool kill) +{ + utp_stream* s = (utp_stream*)self; + + UTP_LOGV("%8p: calling read handler read:%d ec:%s kill:%d\n", s->m_impl + , int(bytes_transferred), ec.message().c_str(), kill); + + TORRENT_ASSERT(s->m_read_handler); + TORRENT_ASSERT(bytes_transferred > 0 || ec); + s->m_io_service.post(boost::bind(s->m_read_handler, ec, bytes_transferred)); + s->m_read_handler.clear(); + if (kill && s->m_impl) + { + detach_utp_impl(s->m_impl); + s->m_impl = 0; + } +} + +void utp_stream::on_write(void* self, size_t bytes_transferred, error_code const& ec, bool kill) +{ + utp_stream* s = (utp_stream*)self; + + UTP_LOGV("%8p: calling write handler written:%d ec:%s kill:%d\n", s->m_impl + , int(bytes_transferred), ec.message().c_str(), kill); + + TORRENT_ASSERT(s->m_write_handler); + TORRENT_ASSERT(bytes_transferred > 0 || ec); + s->m_io_service.post(boost::bind(s->m_write_handler, ec, bytes_transferred)); + s->m_write_handler.clear(); + if (kill && s->m_impl) + { + detach_utp_impl(s->m_impl); + s->m_impl = 0; + } +} + +void utp_stream::on_connect(void* self, error_code const& ec, bool kill) +{ + utp_stream* s = (utp_stream*)self; + + UTP_LOGV("%8p: calling connect handler ec:%s kill:%d\n" + , s->m_impl, ec.message().c_str(), kill); + + TORRENT_ASSERT(s->m_connect_handler); + s->m_io_service.post(boost::bind(s->m_connect_handler, ec)); + s->m_connect_handler.clear(); + if (kill && s->m_impl) + { + detach_utp_impl(s->m_impl); + s->m_impl = 0; + } +} + +void utp_stream::add_read_buffer(void* buf, size_t len) +{ + TORRENT_ASSERT(m_impl); + TORRENT_ASSERT(len < INT_MAX); + TORRENT_ASSERT(len > 0); + TORRENT_ASSERT(buf); + m_impl->m_read_buffer.push_back(utp_socket_impl::iovec_t(buf, len)); + m_impl->m_read_buffer_size += len; + + UTP_LOGV("%8p: add_read_buffer %d bytes\n", m_impl, int(len)); +} + +// this is the wrapper to add a user provided write buffer to the +// utp_socket_impl. It makes sure the m_write_buffer_size is kept +// up to date +void utp_stream::add_write_buffer(void const* buf, size_t len) +{ + TORRENT_ASSERT(m_impl); + TORRENT_ASSERT(len < INT_MAX); + TORRENT_ASSERT(len > 0); + TORRENT_ASSERT(buf); + +#ifdef TORRENT_DEBUG + int write_buffer_size = 0; + for (std::vector::iterator i = m_impl->m_write_buffer.begin() + , end(m_impl->m_write_buffer.end()); i != end; ++i) + { + write_buffer_size += i->len; + } + TORRENT_ASSERT(m_impl->m_write_buffer_size == write_buffer_size); +#endif + + m_impl->m_write_buffer.push_back(utp_socket_impl::iovec_t((void*)buf, len)); + m_impl->m_write_buffer_size += len; + +#ifdef TORRENT_DEBUG + write_buffer_size = 0; + for (std::vector::iterator i = m_impl->m_write_buffer.begin() + , end(m_impl->m_write_buffer.end()); i != end; ++i) + { + write_buffer_size += i->len; + } + TORRENT_ASSERT(m_impl->m_write_buffer_size == write_buffer_size); +#endif + + UTP_LOGV("%8p: add_write_buffer %d bytes\n", m_impl, int(len)); +} + +// this is called when all user provided read buffers have been added +// and it's time to execute the async operation. The first thing we +// do is to copy any data stored in m_receive_buffer into the user +// provided buffer. This might be enough to in turn trigger the read +// handler immediately. +void utp_stream::set_read_handler(handler_t h) +{ + m_impl->m_read_handler = h; + if (m_impl->test_socket_state()) return; + + UTP_LOGV("%8p: new read handler. %d bytes in buffer\n" + , m_impl, m_impl->m_receive_buffer_size); + + TORRENT_ASSERT(m_impl->m_read_buffer_size > 0); + + // so, the client wants to read. If we already + // have some data in the read buffer, move it into the + // client's buffer right away + + m_impl->m_read += read_some(false); + m_impl->maybe_trigger_receive_callback(time_now_hires()); +} + +size_t utp_stream::read_some(bool clear_buffers) +{ + if (m_impl->m_receive_buffer_size == 0) + { + if (clear_buffers) + { + m_impl->m_read_buffer_size = 0; + m_impl->m_read_buffer.clear(); + } + return 0; + } + + std::vector::iterator target = m_impl->m_read_buffer.begin(); + + size_t ret = 0; + + int pop_packets = 0; + for (std::vector::iterator i = m_impl->m_receive_buffer.begin() + , end(m_impl->m_receive_buffer.end()); i != end;) + { + if (target == m_impl->m_read_buffer.end()) + { + UTP_LOGV(" No more target buffers: %d bytes left in buffer\n" + , m_impl->m_receive_buffer_size); + TORRENT_ASSERT(m_impl->m_read_buffer.empty()); + break; + } + + m_impl->check_receive_buffers(); + + packet* p = *i; + int to_copy = (std::min)(p->size - p->header_size, int(target->len)); + TORRENT_ASSERT(to_copy >= 0); + memcpy(target->buf, p->buf + p->header_size, to_copy); + ret += to_copy; + target->buf = ((char*)target->buf) + to_copy; + TORRENT_ASSERT(target->len >= to_copy); + target->len -= to_copy; + m_impl->m_receive_buffer_size -= to_copy; + TORRENT_ASSERT(m_impl->m_read_buffer_size >= to_copy); + m_impl->m_read_buffer_size -= to_copy; + p->header_size += to_copy; + if (target->len == 0) target = m_impl->m_read_buffer.erase(target); + + m_impl->check_receive_buffers(); + + TORRENT_ASSERT(m_impl->m_receive_buffer_size >= 0); + + // Consumed entire packet + if (p->header_size == p->size) + { + free(p); + ++pop_packets; + *i = 0; + ++i; + } + + if (m_impl->m_receive_buffer_size == 0) + { + UTP_LOGV(" Didn't fill entire target: %d bytes left in buffer\n" + , m_impl->m_receive_buffer_size); + break; + } + } + // remove the packets from the receive_buffer that we already copied over + // and freed + m_impl->m_receive_buffer.erase(m_impl->m_receive_buffer.begin() + , m_impl->m_receive_buffer.begin() + pop_packets); + // we exited either because we ran out of bytes to copy + // or because we ran out of space to copy the bytes to + TORRENT_ASSERT(m_impl->m_receive_buffer_size == 0 + || m_impl->m_read_buffer.empty()); + + UTP_LOGV("%8p: %d packets moved from buffer to user space\n" + , m_impl, pop_packets); + + if (clear_buffers) + { + m_impl->m_read_buffer_size = 0; + m_impl->m_read_buffer.clear(); + } + TORRENT_ASSERT(ret > 0); + return ret; +} + +// this is called when all user provided write buffers have been +// added. Start trying to send packets with the payload immediately. +void utp_stream::set_write_handler(handler_t h) +{ + UTP_LOGV("%8p: new write handler. %d bytes to write\n" + , m_impl, m_impl->m_write_buffer_size); + + TORRENT_ASSERT(m_impl->m_write_buffer_size > 0); + + m_impl->m_write_handler = h; + m_impl->m_written = 0; + if (m_impl->test_socket_state()) return; + + // try to write. send_pkt returns false if there's + // no more payload to send or if the congestion window + // is full and we can't send more packets right now + while (m_impl->send_pkt(false)); + + // if there was an error in send_pkt(), m_impl may be + // 0 at this point + if (m_impl) m_impl->maybe_trigger_send_callback(time_now_hires()); +} + +void utp_stream::do_connect(tcp::endpoint const& ep, utp_stream::connect_handler_t handler) +{ + int link_mtu, utp_mtu; + m_impl->m_sm->mtu_for_dest(ep.address(), link_mtu, utp_mtu); + m_impl->init_mtu(link_mtu, utp_mtu); + TORRENT_ASSERT(m_impl->m_connect_handler == 0); + m_impl->m_remote_address = ep.address(); + m_impl->m_port = ep.port(); + m_impl->m_connect_handler = handler; + + if (m_impl->test_socket_state()) return; + m_impl->send_syn(); +} + +// =========== utp_socket_impl ============ + +utp_socket_impl::~utp_socket_impl() +{ + TORRENT_ASSERT(!m_attached); + + UTP_LOGV("%8p: destroying utp socket state\n", this); + + // free any buffers we're holding + for (boost::uint16_t i = m_inbuf.cursor(), end((m_inbuf.cursor() + + m_inbuf.capacity()) & ACK_MASK); + i != end; i = (i + 1) & ACK_MASK) + { + void* p = m_inbuf.remove(i); + free(p); + } + for (boost::uint16_t i = m_outbuf.cursor(), end((m_outbuf.cursor() + + m_outbuf.capacity()) & ACK_MASK); + i != end; i = (i + 1) & ACK_MASK) + { + void* p = m_outbuf.remove(i); + free(p); + } + + for (std::vector::iterator i = m_receive_buffer.begin() + , end = m_receive_buffer.end(); i != end; ++i) + { + free(*i); + } +} + +bool utp_socket_impl::should_delete() const +{ + // if the socket state is not attached anymore we're free + // to delete it from the client's point of view. The other + // endpoint however might still need to be told that we're + // closing the socket. Only delete the state if we're not + // attached and we're in a state where the other end doesn't + // expect the socket to still be alive + bool ret = (m_state >= UTP_STATE_ERROR_WAIT || m_state == UTP_STATE_NONE) + && !m_attached; + + if (ret) + { + UTP_LOGV("%8p: should_delete() = true\n", this); + } + + return ret; +} + +void utp_socket_impl::maybe_trigger_receive_callback(ptime now) +{ + // nothing has been read or there's no outstanding read operation + if (m_read == 0 || m_read_handler == 0) return; + + if (m_read > m_read_buffer_size / 2 || now >= m_read_timeout) + { + UTP_LOGV("%8p: calling read handler read:%d\n", this, m_read); + m_read_handler(m_userdata, m_read, m_error, false); + m_read_handler = 0; + m_read = 0; + m_read_buffer_size = 0; + m_read_buffer.clear(); + } +} + +void utp_socket_impl::maybe_trigger_send_callback(ptime now) +{ + // nothing has been written or there's no outstanding write operation + if (m_written == 0 || m_write_handler == 0) return; + + if (m_written > m_write_buffer_size / 2 || now >= m_write_timeout) + { + UTP_LOGV("%8p: calling write handler written:%d\n", this, m_written); + + m_write_handler(m_userdata, m_written, m_error, false); + m_write_handler = 0; + m_written = 0; + m_write_buffer_size = 0; + m_write_buffer.clear(); + } +} + +void utp_socket_impl::destroy() +{ +#if TORRENT_UTP_LOG + UTP_LOGV("%8p: destroy state:%s\n", this, socket_state_names[m_state]); +#endif + + if (m_userdata == 0) return; + + if (m_state == UTP_STATE_CONNECTED) + { + send_fin(); + if (m_state == UTP_STATE_ERROR_WAIT || m_state == UTP_STATE_DELETE) return; + } + + bool cancelled = cancel_handlers(asio::error::operation_aborted, true); + + m_userdata = 0; + m_read_buffer.clear(); + m_read_buffer_size = 0; + + m_write_buffer.clear(); + m_write_buffer_size = 0; + + if ((m_state == UTP_STATE_ERROR_WAIT + || m_state == UTP_STATE_NONE + || m_state == UTP_STATE_SYN_SENT) && cancelled) + { + m_state = UTP_STATE_DELETE; +#if TORRENT_UTP_LOG + UTP_LOGV("%8p: state:%s\n", this, socket_state_names[m_state]); +#endif + return; + } + + // #error our end is closing. Wait for everything to be acked +} + +void utp_socket_impl::detach() +{ + UTP_LOGV("%8p: detach()\n", this); + m_attached = false; +} + +void utp_socket_impl::send_syn() +{ + m_seq_nr = rand(); + m_acked_seq_nr = (m_seq_nr - 1) & ACK_MASK; + m_loss_seq_nr = m_acked_seq_nr; + m_ack_nr = 0; + m_fast_resend_seq_nr = m_seq_nr; + + packet* p = (packet*)malloc(sizeof(packet) + sizeof(utp_header)); + p->size = sizeof(utp_header); + p->header_size = sizeof(utp_header); + p->num_transmissions = 1; + p->need_resend = false; + utp_header* h = (utp_header*)p->buf; + h->type_ver = (ST_SYN << 4) | 1; + h->extension = 0; + // using recv_id here is intentional! This is an odd + // thing in uTP. The syn packet is sent with the connection + // ID that it expects to receive the syn ack on. All + // subsequent connection IDs will be this plus one. + h->connection_id = m_recv_id; + h->timestamp_difference_microseconds = m_reply_micro; + h->wnd_size = 0; + h->seq_nr = m_seq_nr; + h->ack_nr = 0; + + ptime now = time_now_hires(); + p->send_time = now; + h->timestamp_microseconds = boost::uint32_t(total_microseconds(now - min_time())); + +#if TORRENT_UTP_LOG + UTP_LOGV("%8p: send_syn seq_nr:%d id:%d target:%s\n" + , this, int(m_seq_nr), int(m_recv_id) + , print_endpoint(udp::endpoint(m_remote_address, m_port)).c_str()); +#endif + + error_code ec; + m_sm->send_packet(udp::endpoint(m_remote_address, m_port), (char const*)h + , sizeof(utp_header), ec); + + if (ec) + { + free(p); + m_error = ec; + m_state = UTP_STATE_ERROR_WAIT; + test_socket_state(); + return; + } + + TORRENT_ASSERT(!m_outbuf.at(m_seq_nr)); + m_outbuf.insert(m_seq_nr, p); + + m_seq_nr = (m_seq_nr + 1) & ACK_MASK; + + TORRENT_ASSERT(!m_error); + m_state = UTP_STATE_SYN_SENT; +#if TORRENT_UTP_LOG + UTP_LOGV("%8p: state:%s\n", this, socket_state_names[m_state]); +#endif +} + +void utp_socket_impl::send_fin() +{ + TORRENT_ASSERT(m_state != UTP_STATE_FIN_SENT); + + // we need a heap allocated packet in order to stick it + // in the send buffer, so that we can resend it + packet* p = (packet*)malloc(sizeof(packet) + sizeof(utp_header)); + + p->size = sizeof(utp_header); + p->header_size = sizeof(utp_header); + p->num_transmissions = 1; + p->need_resend = false; + utp_header* h = (utp_header*)p->buf; + + h->type_ver = (ST_FIN << 4) | 1; + h->extension = 0; + h->connection_id = m_send_id; + h->timestamp_difference_microseconds = m_reply_micro; + h->wnd_size = m_in_buf_size - m_buffered_incoming_bytes - m_receive_buffer_size; + h->seq_nr = m_seq_nr; + h->ack_nr = m_ack_nr; + + ptime now = time_now_hires(); + p->send_time = now; + h->timestamp_microseconds = boost::uint32_t(total_microseconds(now - min_time())); + + error_code ec; + m_sm->send_packet(udp::endpoint(m_remote_address, m_port) + , (char const*)h, sizeof(utp_header), ec); + +#if TORRENT_UTP_LOG + UTP_LOGV("%8p: sending FIN seq_nr:%d ack_nr:%d type:%s " + "id:%d target:%s size:%d error:%s send_buffer_size:%d\n" + , this, int(h->seq_nr), int(h->ack_nr), packet_type_names[h->get_type()] + , m_send_id, print_endpoint(udp::endpoint(m_remote_address, m_port)).c_str() + , int(sizeof(utp_header)), ec.message().c_str(), m_write_buffer_size); +#endif + + if (ec) + { + m_error = ec; + m_state = UTP_STATE_ERROR_WAIT; + test_socket_state(); + free(p); + return; + } + +#if !TORRENT_UT_SEQ + // if the other end closed the connection immediately + // our FIN packet will end up having the same sequence + // number as the SYN, so this assert is invalid + TORRENT_ASSERT(!m_outbuf.at(m_seq_nr)); +#endif + + packet* old = (packet*)m_outbuf.insert(m_seq_nr, p); + if (old) + { + if (!old->need_resend) m_bytes_in_flight -= old->size - old->header_size; + free(old); + } + m_seq_nr = (m_seq_nr + 1) & ACK_MASK; + m_fast_resend_seq_nr = m_seq_nr; + + TORRENT_ASSERT(!m_error); + m_state = UTP_STATE_FIN_SENT; + +#if TORRENT_UTP_LOG + UTP_LOGV("%8p: state:%s\n", this, socket_state_names[m_state]); +#endif +} + +void utp_socket_impl::send_reset(utp_header* ph) +{ + utp_header h; + h.type_ver = (ST_RESET << 4) | 1; + h.extension = 0; + h.connection_id = m_send_id; + h.timestamp_difference_microseconds = m_reply_micro; + h.wnd_size = 0; + h.seq_nr = rand(); + h.ack_nr = ph->seq_nr; + ptime now = time_now_hires(); + h.timestamp_microseconds = boost::uint32_t(total_microseconds(now - min_time())); + + UTP_LOGV("%8p: send_reset seq_nr:%d id:%d ack_nr:%d\n" + , this, int(h.seq_nr), int(m_send_id), int(ph->seq_nr)); + + // ignore errors here + error_code ec; + m_sm->send_packet(udp::endpoint(m_remote_address, m_port), (char const*)&h, sizeof(h), ec); +} + +std::size_t utp_socket_impl::available() const +{ + return m_receive_buffer_size; +} + +void utp_socket_impl::parse_sack(boost::uint16_t packet_ack, char const* ptr + , int size, int* acked_bytes, ptime const now, boost::uint32_t& min_rtt) +{ + if (size == 0) return; + + // this is the sequence number the current bit represents + int ack_nr = (packet_ack + 2) & ACK_MASK; + +#if TORRENT_UTP_LOG + std::string bitmask; + for (char const* b = ptr, *end = ptr + size; b != end; ++b) + { + unsigned char bitfield = unsigned(*b); + unsigned char mask = 1; + // for each bit + for (int i = 0; i < 8; ++i) + { + bitmask += (mask & bitfield) ? "1" : "0"; + mask <<= 1; + } + } + UTP_LOGV("%8p: got SACK first:%d %s our_seq_nr:%u\n" + , this, ack_nr, bitmask.c_str(), m_seq_nr); +#endif + + // the number of acked packets past the fast re-send sequence number + // this is used to determine if we should trigger more fast re-sends + int dups = 0; + + // the sequence number of the last ACKed packet + int last_ack = packet_ack; + + // for each byte + for (char const* end = ptr + size; ptr != end; ++ptr) + { + unsigned char bitfield = unsigned(*ptr); + unsigned char mask = 1; + // for each bit + for (int i = 0; i < 8; ++i) + { + if (mask & bitfield) + { + last_ack = ack_nr; + if (m_fast_resend_seq_nr == ack_nr) + m_fast_resend_seq_nr = (m_fast_resend_seq_nr + 1) & ACK_MASK; + + if (compare_less_wrap(m_fast_resend_seq_nr, ack_nr, 0xffff)) ++dups; + // this bit was set, ack_nr was received + packet* p = (packet*)m_outbuf.remove(ack_nr); + if (p) + { + acked_bytes += p->size - p->header_size; + // each ACKed packet counts as a duplicate ack + UTP_LOGV("%8p: duplicate_acks:%u fast_resend_seq_nr:%u\n" + , this, m_duplicate_acks, m_fast_resend_seq_nr); + ack_packet(p, now, min_rtt, ack_nr); + } + else if ((m_acked_seq_nr + 1) == ack_nr) + { + // this packet must have been acked by a previous + // selective ack + m_acked_seq_nr = ack_nr; + } + } + + mask <<= 1; + ack_nr = (ack_nr + 1) & ACK_MASK; + + // we haven't sent packets past this point. + // if there are any more bits set, we have to + // ignore them anyway + if (ack_nr == m_seq_nr) break; + } + if (ack_nr == m_seq_nr) break; + } + + // we received more than dup_ack_limit ACKs in this SACK message. + // trigger fast re-send + if (dups >= dup_ack_limit && compare_less_wrap(m_fast_resend_seq_nr, last_ack, 0xffff)) + { + experienced_loss(m_fast_resend_seq_nr); + int num_resent = 0; + for (; m_fast_resend_seq_nr != last_ack; m_fast_resend_seq_nr = (m_fast_resend_seq_nr + 1) & ACK_MASK) + { + packet* p = (packet*)m_outbuf.at(m_fast_resend_seq_nr); + if (!p) continue; + ++num_resent; + if (!resend_packet(p, true)) break; + m_duplicate_acks = 0; + if (num_resent >= sack_resend_limit) break; + } + } +} + +// copies data from the write buffer into the packet +// pointed to by ptr +void utp_socket_impl::write_payload(char* ptr, int size) +{ +#ifdef TORRENT_DEBUG + int write_buffer_size = 0; + for (std::vector::iterator i = m_write_buffer.begin() + , end(m_write_buffer.end()); i != end; ++i) + { + write_buffer_size += i->len; + } + TORRENT_ASSERT(m_write_buffer_size == write_buffer_size); +#endif + TORRENT_ASSERT(!m_write_buffer.empty() || size == 0); + TORRENT_ASSERT(m_write_buffer_size >= size); + std::vector::iterator i = m_write_buffer.begin(); + + if (size == 0) return; + + ptime now = time_now_hires(); + + int buffers_to_clear = 0; + while (size > 0) + { + // i points to the iovec we'll start copying from + int to_copy = (std::min)(size, int(i->len)); + memcpy(ptr, static_cast(i->buf), to_copy); + size -= to_copy; + if (m_written == 0) + { + m_write_timeout = now + milliseconds(100); + UTP_LOGV("%8p: setting write timeout to 100 ms from now\n", this); + } + TORRENT_ASSERT(to_copy >= 0); + TORRENT_ASSERT(to_copy < INT_MAX / 2 && m_written < INT_MAX / 2); + m_written += to_copy; + ptr += to_copy; + i->len -= to_copy; + TORRENT_ASSERT(m_write_buffer_size >= to_copy); + m_write_buffer_size -= to_copy; + ((char const*&)i->buf) += to_copy; + if (i->len == 0) ++buffers_to_clear; + ++i; + } + + if (buffers_to_clear) + m_write_buffer.erase(m_write_buffer.begin() + , m_write_buffer.begin() + buffers_to_clear); + +#ifdef TORRENT_DEBUG + write_buffer_size = 0; + for (std::vector::iterator i = m_write_buffer.begin() + , end(m_write_buffer.end()); i != end; ++i) + { + write_buffer_size += i->len; + } + TORRENT_ASSERT(m_write_buffer_size == write_buffer_size); +#endif +} + +// sends a packet, pulls data from the write buffer (if there's any) +// if ack is true, we need to send a packet regardless of if there's +// any data. Returns true if we could send more data (i.e. call +// send_pkt() again) +bool utp_socket_impl::send_pkt(bool ack) +{ + // This assert is bad because we call this function to ack + // received FIN when we're in UTP_STATE_FIN_SENT. + // + // TORRENT_ASSERT(m_state != UTP_STATE_FIN_SENT); + + // first see if we need to resend any packets + + for (int i = (m_acked_seq_nr + 1) & ACK_MASK; i != m_seq_nr; i = (i + 1) & ACK_MASK) + { + packet* p = (packet*)m_outbuf.at(i); + if (!p) continue; + if (!p->need_resend) continue; + if (!resend_packet(p)) + { + // we couldn't resend the packet. It probably doesn't + // fit in our cwnd. If ack is set, we need to continue + // to send our ack anyway, if we don't have to send an + // ack, we might as well return + if (!ack) return false; + // resend_packet might have failed + if (m_state == UTP_STATE_ERROR_WAIT || m_state == UTP_STATE_DELETE) return false; + break; + } + + // don't fast-resend this packet + if (m_fast_resend_seq_nr == i) + m_fast_resend_seq_nr = (m_fast_resend_seq_nr + 1) & ACK_MASK; + } + + bool ret = false; + + int sack = 0; + if (m_inbuf.size()) + { + // the SACK bitfield should ideally fit all + // the pieces we have successfully received + sack = (m_inbuf.span() + 7) / 8; + if (sack > 32) sack = 32; + } + + int header_size = sizeof(utp_header) + (sack ? sack + 2 : 0); + int payload_size = m_write_buffer_size; + if (m_mtu - header_size < payload_size) + { + payload_size = m_mtu - header_size; + ret = true; // there's more data to send + } + + // if we have one MSS worth of data, make sure it fits in our + // congestion window and the advertized receive window from + // the other end. + if (m_bytes_in_flight + payload_size > (std::min)(int(m_cwnd >> 16), int(m_adv_wnd - m_bytes_in_flight))) + { + // this means there's not enough room in the send window for + // another packet. We have to hold off sending this data. + // we still need to send an ACK though + payload_size = 0; + + // we're restrained by the window size + m_last_cwnd_hit = time_now_hires(); + + // there's no more space in the cwnd, no need to + // try to send more right now + ret = false; + + UTP_LOGV("%8p: no space in window send_buffer_size:%d cwnd:%d " + "ret:%d adv_wnd:%d in-flight:%d mtu:%d\n" + , this, m_write_buffer_size, int(m_cwnd >> 16) + , ret, m_adv_wnd, m_bytes_in_flight, m_mtu); + } + + // if we don't have any data to send, or can't send any data + // and we don't have any data to ack, don't send a packet + if (payload_size == 0 && !ack) + { +#if TORRENT_UTP_LOG + UTP_LOGV("%8p: skipping send seq_nr:%d ack_nr:%d " + "id:%d target:%s header_size:%d error:%s send_buffer_size:%d cwnd:%d " + "ret:%d adv_wnd:%d in-flight:%d mtu:%d\n" + , this, int(m_seq_nr), int(m_ack_nr) + , m_send_id, print_endpoint(udp::endpoint(m_remote_address, m_port)).c_str() + , header_size, ec.message().c_str(), m_write_buffer_size, int(m_cwnd >> 16) + , int(ret), m_adv_wnd, m_bytes_in_flight, m_mtu); +#endif + return false; + } + + if (((m_seq_nr - m_acked_seq_nr) & ACK_MASK) > 1 + && payload_size < m_mtu - header_size + && !ack + && m_nagle) + { + // this is nagle. If we don't have a full packet + // worth of payload to send AND we have at least + // one outstanding packet, hold off. Once the + // outstanding packet is acked, we'll send this + // payload + UTP_LOGV("%8p: NAGLE not enough payload send_buffer_size:%d cwnd:%d " + "ret:%d adv_wnd:%d in-flight:%d mtu:%d\n" + , this, m_write_buffer_size, int(m_cwnd >> 16) + , ret, m_adv_wnd, m_bytes_in_flight, m_mtu); + return false; + } + + int packet_size = header_size + payload_size; + + // MTU DISCOVERY + bool use_as_probe = false; + if (m_mtu_seq == 0 + && packet_size > m_mtu_floor + && m_seq_nr != 0) + { + use_as_probe = true; + m_mtu_seq = m_seq_nr; + } + + packet* p; + // we only need a heap allocation if we have payload and + // need to keep the packet around (in the outbuf) + if (payload_size) p = (packet*)malloc(sizeof(packet) + packet_size); + else p = (packet*)TORRENT_ALLOCA(char, sizeof(packet) + packet_size); + + p->size = packet_size; + p->header_size = packet_size - payload_size; + p->num_transmissions = 1; + p->need_resend = false; + p->mtu_probe = use_as_probe; + char* ptr = p->buf; + utp_header* h = (utp_header*)ptr; + ptr += sizeof(utp_header); + + h->type_ver = ((payload_size ? ST_DATA : ST_STATE) << 4) | 1; + h->extension = sack ? 1 : 0; + h->connection_id = m_send_id; + h->timestamp_difference_microseconds = m_reply_micro; + h->wnd_size = m_in_buf_size - m_buffered_incoming_bytes - m_receive_buffer_size; + // seq_nr is ignored for ST_STATE packets, so it doesn't + // matter that we say this is a sequence number we haven't + // actually sent yet + h->seq_nr = m_seq_nr; + h->ack_nr = m_ack_nr; + + if (sack) + { + *ptr++ = 0; // end of extension chain + *ptr++ = sack; // bytes for SACK bitfield + write_sack(ptr, sack); + ptr += sack; + } + + write_payload(ptr, payload_size); + + // fill in the timestamp as late as possible + ptime now = time_now_hires(); + p->send_time = now; + h->timestamp_microseconds = boost::uint32_t(total_microseconds(now - min_time())); + +#if TORRENT_UTP_LOG + UTP_LOGV("%8p: sending packet seq_nr:%d ack_nr:%d type:%s " + "id:%d target:%s size:%d error:%s send_buffer_size:%d cwnd:%d " + "ret:%d adv_wnd:%d in-flight:%d mtu:%d timestamp:%u time_diff:%u " + "mtu_probe:%d\n" + , this, int(h->seq_nr), int(h->ack_nr), packet_type_names[h->get_type()] + , m_send_id, print_endpoint(udp::endpoint(m_remote_address, m_port)).c_str() + , packet_size, m_error.message().c_str(), m_write_buffer_size, int(m_cwnd >> 16) + , ret, m_adv_wnd, m_bytes_in_flight, m_mtu, boost::uint32_t(h->timestamp_microseconds) + , boost::uint32_t(h->timestamp_difference_microseconds), int(p->mtu_probe)); +#endif + + TORRENT_ASSERT(!m_error); + + error_code ec; + m_sm->send_packet(udp::endpoint(m_remote_address, m_port) + , (char const*)h, packet_size, ec + , use_as_probe ? utp_socket_manager::dont_fragment : 0); + + ++m_out_packets; + + if (ec == error::message_size && use_as_probe) + { + m_mtu_ceiling = m_mtu - 1; + update_mtu_limits(); + // TODO: we might want to do something else here + // as well, to resend the packet immediately without + // it being an MTU probe + } + else if (ec) + { + m_error = ec; + m_state = UTP_STATE_ERROR_WAIT; + test_socket_state(); + if (payload_size) free(p); + return false; + } + + // we just sent a packet. this means we just ACKed the last received + // packet as well. So, we can now reset the delayed ack timer to + // not trigger for a long time + m_ack_timer = now + minutes(10); + + // if we have payload, we need to save the packet until it's acked + // and progress m_seq_nr + if (payload_size) + { +#if !TORRENT_UT_SEQ + // if the other end closed the connection immediately + // our FIN packet will end up having the same sequence + // number as the SYN, so this assert is invalid + TORRENT_ASSERT(!m_outbuf.at(m_seq_nr)); +#endif + packet* old = (packet*)m_outbuf.insert(m_seq_nr, p); + if (old) + { + if (!old->need_resend) m_bytes_in_flight -= old->size - old->header_size; + free(old); + } + m_seq_nr = (m_seq_nr + 1) & ACK_MASK; + TORRENT_ASSERT(payload_size >= 0); + m_bytes_in_flight += payload_size; + } + + return ret; +} + +// size is in bytes +void utp_socket_impl::write_sack(char* buf, int size) const +{ + TORRENT_ASSERT(m_inbuf.size()); + int ack_nr = (m_ack_nr + 2) & ACK_MASK; + char* end = buf + size; + + for (; buf != end; ++buf) + { + *buf = 0; + int mask = 1; + for (int i = 0; i < 8; ++i) + { + if (m_inbuf.at(ack_nr)) *buf |= mask; + mask <<= 1; + ack_nr = (ack_nr + 1) & ACK_MASK; + } + } +} + +bool utp_socket_impl::resend_packet(packet* p, bool fast_resend) +{ + // for fast re-sends the packet hasn't been marked as needing resending + TORRENT_ASSERT(p->need_resend || fast_resend); + + TORRENT_ASSERT(!m_error); + + if (fast_resend + && ((m_acked_seq_nr + 1) & ACK_MASK) == m_mtu_seq + && m_mtu_seq != 0) + { + // we got multiple acks for the packet before our probe, assume + // it was dropped because it was too big + m_mtu_ceiling = m_mtu - 1; + update_mtu_limits(); + } + + // we can only resend the packet if there's + // enough space in our congestion window + int window_size_left = (std::min)(int(m_cwnd >> 16), int(m_adv_wnd)) - m_bytes_in_flight; + if (!fast_resend&& p->size - p->header_size > window_size_left) + { + m_last_cwnd_hit = time_now_hires(); + return false; + } + + // plus one since we have fast-resend as well, which doesn't + // necessarily trigger by a timeout + TORRENT_ASSERT(p->num_transmissions < m_sm->num_resends() + 1); + + TORRENT_ASSERT(p->size - p->header_size >= 0); + if (p->need_resend) m_bytes_in_flight += p->size - p->header_size; + + ++p->num_transmissions; + p->need_resend = false; + utp_header* h = (utp_header*)p->buf; + // update packet header + h->timestamp_difference_microseconds = m_reply_micro; + p->send_time = time_now_hires(); + h->timestamp_microseconds = boost::uint32_t(total_microseconds(p->send_time - min_time())); + + error_code ec; + m_sm->send_packet(udp::endpoint(m_remote_address, m_port) + , (char const*)p->buf, p->size, ec); + ++m_out_packets; + +#if TORRENT_UTP_LOG + UTP_LOGV("%8p: re-sending packet seq_nr:%d ack_nr:%d type:%s " + "id:%d target:%s size:%d error:%s send_buffer_size:%d cwnd:%d " + "adv_wnd:%d in-flight:%d mtu:%d timestamp:%u time_diff:%u\n" + , this, int(h->seq_nr), int(h->ack_nr), packet_type_names[h->get_type()] + , m_send_id, print_endpoint(udp::endpoint(m_remote_address, m_port)).c_str() + , p->size, ec.message().c_str(), m_write_buffer_size, int(m_cwnd >> 16) + , m_adv_wnd, m_bytes_in_flight, m_mtu, boost::uint32_t(h->timestamp_microseconds) + , boost::uint32_t(h->timestamp_difference_microseconds)); +#endif + + if (ec) + { + m_error = ec; + m_state = UTP_STATE_ERROR_WAIT; + test_socket_state(); + return false; + } + + return true; +} + +void utp_socket_impl::experienced_loss(int seq_nr) +{ + // since loss often comes in bursts, we only cut the + // window in half once per RTT. This is implemented + // by limiting which packets can cause us to cut the + // window size. The first packet that's lost will + // update the limit to the last sequence number we sent. + // i.e. only packet sent after this loss can cause another + // window size cut + if (compare_less_wrap(seq_nr, m_loss_seq_nr, ACK_MASK)) return; + + // cut window size in 2 + m_cwnd = (std::max)(m_cwnd / 2, boost::int64_t(m_mtu << 16)); + m_loss_seq_nr = m_seq_nr; + UTP_LOGV("%8p: Lost packet %d caused cwnd cut\n", this, seq_nr); + + // the window size could go below one MMS here, if it does, + // we'll get a timeout in about one second +} + +void utp_socket_impl::ack_packet(packet* p, ptime const& receive_time + , boost::uint32_t& min_rtt, boost::uint16_t seq_nr) +{ + TORRENT_ASSERT(p); + if (!p->need_resend) + { + TORRENT_ASSERT(m_bytes_in_flight >= p->size - p->header_size); + m_bytes_in_flight -= p->size - p->header_size; + } + + if (seq_nr == m_mtu_seq && m_mtu_seq != 0) + { + TORRENT_ASSERT(p->mtu_probe); + // our mtu probe was acked! + m_mtu_floor = m_mtu; + update_mtu_limits(); + } + + // increment the acked sequence number counter + if (((m_acked_seq_nr + 1) & ACK_MASK) == seq_nr) + { + m_acked_seq_nr = seq_nr; + // update loss seq number if it's less than the packet + // that was just acked. If loss seq nr is greater, it suggests + // that we're still in a window that has experienced loss + if (compare_less_wrap(m_loss_seq_nr, m_acked_seq_nr, ACK_MASK)) + m_loss_seq_nr = m_acked_seq_nr; + m_duplicate_acks = 0; + } + // increment the fast resend sequence number + if (m_fast_resend_seq_nr == seq_nr) + m_fast_resend_seq_nr = (m_fast_resend_seq_nr + 1) & ACK_MASK; + + boost::uint32_t rtt = boost::uint32_t(total_microseconds(receive_time - p->send_time)); + if (receive_time < p->send_time) + { + // this means our clock is not monotonic. Just assume the RTT was 100 ms + rtt = 100000; + + // the clock for this plaform is not monotonic! + TORRENT_ASSERT(false); + } + + UTP_LOGV("%8p: acked packet %d (%d bytes) (rtt:%u)\n" + , this, seq_nr, p->size - p->header_size, rtt / 1000); + + m_rtt.add_sample(rtt / 1000); + if (rtt < min_rtt) min_rtt = rtt; + free(p); +} + +void utp_socket_impl::incoming(char const* buf, int size, packet* p, ptime now) +{ + while (!m_read_buffer.empty()) + { + if (p) + { + buf = p->buf + p->header_size; + TORRENT_ASSERT(p->size - p->header_size >= size); + } + iovec_t* target = &m_read_buffer.front(); + + int to_copy = (std::min)(size, int(target->len)); + memcpy(target->buf, buf, to_copy); + if (m_read == 0) + { + m_read_timeout = now + milliseconds(100); + UTP_LOGV("%8p: setting read timeout to 100 ms from now\n", this); + } + m_read += to_copy; + target->buf = ((char*)target->buf) + to_copy; + target->len -= to_copy; + buf += to_copy; + TORRENT_ASSERT(m_read_buffer_size >= to_copy); + m_read_buffer_size -= to_copy; + size -= to_copy; + if (target->len == 0) m_read_buffer.erase(m_read_buffer.begin()); + if (p) + { + p->header_size += to_copy; + TORRENT_ASSERT(p->header_size <= p->size); + } + + if (size == 0) + { + TORRENT_ASSERT(p == 0 || p->header_size == p->size); + free(p); + maybe_trigger_receive_callback(now); + return; + } + } + + TORRENT_ASSERT(m_read_buffer_size == 0); + + if (!p) + { + TORRENT_ASSERT(buf); + p = (packet*)malloc(sizeof(packet) + size); + p->size = size; + p->header_size = 0; + memcpy(p->buf, buf, size); + } + if (m_receive_buffer_size == 0) m_read_timeout = now + milliseconds(100); + // save this packet until the client issues another read + m_receive_buffer.push_back(p); + m_receive_buffer_size += p->size - p->header_size; + + check_receive_buffers(); +} + +bool utp_socket_impl::cancel_handlers(error_code const& ec, bool kill) +{ + TORRENT_ASSERT(ec); + bool ret = m_read_handler || m_write_handler || m_connect_handler; + if (m_read_handler) m_read_handler(m_userdata, 0, ec, kill); + m_read_handler = 0; + if (m_write_handler) m_write_handler(m_userdata, 0, ec, kill); + m_write_handler = 0; + if (m_connect_handler) m_connect_handler(m_userdata, ec, kill); + m_connect_handler = 0; + return ret; +} + +bool utp_socket_impl::consume_incoming_data( + utp_header const* ph, char const* ptr, int payload_size + , ptime now) +{ + if (ph->get_type() != ST_DATA) return false; + + if (m_eof && m_ack_nr == m_eof_seq_nr) + { + // What?! We've already received a FIN and everything up + // to it has been acked. Ignore this packet + return true; + } + + if (ph->seq_nr == ((m_ack_nr + 1) & ACK_MASK)) + { + TORRENT_ASSERT(m_inbuf.at(m_ack_nr) == 0); + + // we received a packet in order + incoming(ptr, payload_size, 0, now); + m_ack_nr = (m_ack_nr + 1) & ACK_MASK; + + // If this packet was previously in the reorder buffer + // it would have been acked when m_ack_nr-1 was acked. + TORRENT_ASSERT(m_inbuf.at(m_ack_nr) == 0); + + UTP_LOGV("%8p: remove inbuf: %d (%d)\n" + , this, m_ack_nr, int(m_inbuf.size())); + + for (;;) + { + int const next_ack_nr = (m_ack_nr + 1) & ACK_MASK; + + packet* p = (packet*)m_inbuf.remove(next_ack_nr); + + if (!p) + break; + + m_buffered_incoming_bytes -= p->size - p->header_size; + incoming(0, p->size - p->header_size, p, now); + + m_ack_nr = next_ack_nr; + + UTP_LOGV("%8p: reordered remove inbuf: %d (%d)\n" + , this, m_ack_nr, int(m_inbuf.size())); + } + + // should we trigger the read handler? + maybe_trigger_receive_callback(now); + } + else + { + // this packet was received out of order. Stick it in the + // reorder buffer until it can be delivered in order + + // have we already received this packet and passed it on + // to the client? + if (!compare_less_wrap(m_ack_nr, ph->seq_nr, ACK_MASK)) + { + UTP_LOGV("%8p: already received seq_nr: %d\n" + , this, int(ph->seq_nr)); + return true; + } + + // do we already have this packet? If so, just ignore it + if (m_inbuf.at(ph->seq_nr)) + { + UTP_LOGV("%8p: already received seq_nr: %d\n" + , this, int(ph->seq_nr)); + return true; + } + + // we don't need to save the packet header, just the payload + packet* p = (packet*)malloc(sizeof(packet) + payload_size); + p->size = payload_size; + p->header_size = 0; + p->num_transmissions = 0; + p->need_resend = false; + memcpy(p->buf, ptr, payload_size); + m_inbuf.insert(ph->seq_nr, p); + m_buffered_incoming_bytes += p->size; + + UTP_LOGV("%8p: out of order. insert inbuf: %d (%d) m_ack_nr: %d\n" + , this, int(ph->seq_nr), int(m_inbuf.size()), m_ack_nr); + } + + return false; +} + +// returns true of the socket was closed +bool utp_socket_impl::test_socket_state() +{ + // if the socket is in a state where it's dead, just waiting to + // tell the client that it's closed. Do that and transition into + // the deleted state, where it will be deleted + // it might be possible to get here twice, in which we need to + // cancel any new handlers as well, even though we're already + // in the delete state + if (!m_error) return false; + TORRENT_ASSERT(m_state == UTP_STATE_ERROR_WAIT || m_state == UTP_STATE_DELETE); + +#if TORRENT_UTP_LOG + UTP_LOGV("%8p: state:%s error:%s\n" + , this, socket_state_names[m_state], m_error.message().c_str()); +#endif + + if (cancel_handlers(m_error, true)) + { + m_state = UTP_STATE_DELETE; +#if TORRENT_UTP_LOG + UTP_LOGV("%8p: state:%s\n", this, socket_state_names[m_state]); +#endif + return true; + } + return false; +} + +void utp_socket_impl::init_mtu(int link_mtu, int utp_mtu) +{ + // if we're in a RAM constrained environment, don't increase + // the buffer size for interfaces with large MTUs. Just stick + // to ethernet frame sizes + if (m_sm->allow_dynamic_sock_buf()) + { + // Make sure that we have enough socket buffer space + // for sending and receiving packets of this size + // add 10% for smaller ACKs and other overhead + m_sm->set_sock_buf(link_mtu * 11 / 10); + } + else if (link_mtu > TORRENT_ETHERNET_MTU) + { + // we can't use larger packets than this since we're + // not allocating any more memory for socket buffers + int decrease = link_mtu - TORRENT_ETHERNET_MTU; + utp_mtu -= decrease; + link_mtu -= decrease; + } + + m_mtu = utp_mtu; + m_mtu_ceiling = utp_mtu; + if (m_mtu_floor > utp_mtu) m_mtu_floor = utp_mtu; + + // if the window size is smaller than one packet size + // set it to one + if ((m_cwnd >> 16) < m_mtu) m_cwnd = m_mtu << 16; + + UTP_LOGV("%8p: intializing MTU to: %d [%d, %d]\n" + , this, m_mtu, m_mtu_floor, m_mtu_ceiling); +} + +// return false if this is an invalid packet +bool utp_socket_impl::incoming_packet(char const* buf, int size + , udp::endpoint const& ep, ptime receive_time) +{ + utp_header* ph = (utp_header*)buf; + + if (ph->get_version() != 1) + { + UTP_LOGV("%8p: incoming packet version:%d (ignored)\n" + , this, int(ph->get_version())); + return false; + } + + // SYN packets have special (reverse) connection ids + if (ph->get_type() != ST_SYN && ph->connection_id != m_recv_id) + { + UTP_LOGV("%8p: incoming packet id:%d expected:%d (ignored)\n" + , this, int(ph->connection_id), int(m_recv_id)); + return false; + } + + if (ph->get_type() >= NUM_TYPES) + { + UTP_LOGV("%8p: incoming packet type:%d (ignored)\n" + , this, int(ph->get_type())); + return false; + } + + if (m_state == UTP_STATE_NONE && ph->get_type() == ST_SYN) + { + m_remote_address = ep.address(); + m_port = ep.port(); + } + + if (m_state != UTP_STATE_NONE && ph->get_type() == ST_SYN) + { + UTP_LOGV("%8p: incoming packet type:ST_SYN (ignored)\n", this); + return true; + } + + bool step = false; + if (receive_time - m_last_history_step > minutes(1)) + { + step = true; + m_last_history_step = receive_time; + } + + // this is the difference between their send time and our receive time + // 0 means no sample yet + boost::uint32_t their_delay = 0; + if (ph->timestamp_microseconds != 0) + { + m_reply_micro = boost::uint32_t(total_microseconds(receive_time - min_time())) + - ph->timestamp_microseconds; + boost::uint32_t prev_base = m_their_delay_hist.initialized() ? m_their_delay_hist.base() : 0; + their_delay = m_their_delay_hist.add_sample(m_reply_micro, step); + int base_change = m_their_delay_hist.base() - prev_base; + UTP_LOGV("%8p: their_delay::add_sample:%u prev_base:%u new_base:%u\n" + , this, m_reply_micro, prev_base, m_their_delay_hist.base()); + + if (prev_base && base_change < 0 && base_change > -10000) + { + // their base delay went down. This is caused by clock drift. To compensate, + // adjust our base delay upwards + // don't adjust more than 10 ms. If the change is that big, something is probably wrong + m_delay_hist.adjust_base(-base_change); + } + + UTP_LOGV("%8p: incoming packet reply_micro:%u base_change:%d\n" + , this, m_reply_micro, prev_base ? base_change : 0); + } + + if (ph->get_type() == ST_RESET) + { + UTP_LOGV("%8p: incoming packet type:RESET\n", this); + m_error = asio::error::connection_reset; + m_state = UTP_STATE_ERROR_WAIT; + test_socket_state(); + return true; + } + + // is this ACK valid? If the other end is ACKing + // a packet that hasn't been sent yet + // just ignore it. A 3rd party could easily inject a packet + // like this in a stream, don't sever it because of it. + // since m_seq_nr is the sequence number of the next packet + // we'll send (and m_seq_nr-1 was the last packet we sent), + // if the ACK we got is greater than the last packet we sent + // something is wrong. + // If our state is state_none, this packet must be a syn packet + // and the ack_nr should be ignored + boost::uint16_t cmp_seq_nr = (m_seq_nr - 1) & ACK_MASK; +#if TORRENT_UT_SEQ + if (m_state == UTP_STATE_SYN_SENT && ph->get_type() == ST_STATE) + cmp_seq_nr = m_seq_nr; +#endif + if (m_state != UTP_STATE_NONE + && compare_less_wrap(cmp_seq_nr, ph->ack_nr, ACK_MASK)) + { + UTP_LOGV("%8p: incoming packet ack_nr:%d our seq_nr:%d (ignored)\n" + , this, int(ph->ack_nr), m_seq_nr); + return true; + } + + // check to make sure the sequence number of this packet + // is reasonable. If it's a data packet and we've already + // received it, ignore it. This is either a stray old packet + // that finally made it here (after having been re-sent) or + // an attempt to interfere with the connection from a 3rd party + // in both cases, we can safely ignore the timestamp and ACK + // information in this packet +/* + // even if we've already received this packet, we need to + // send another ack to it, since it may be a resend caused by + // our ack getting dropped + if (m_state != UTP_STATE_SYN_SENT + && ph->get_type() == ST_DATA + && !compare_less_wrap(m_ack_nr, ph->seq_nr, ACK_MASK)) + { + // we've already received this packet + UTP_LOGV("%8p: incoming packet seq_nr:%d our ack_nr:%d (ignored)\n" + , this, int(ph->seq_nr), m_ack_nr); + return true; + } +*/ + + // if the socket is closing, always ignore any packet + // with a higher sequence number than the FIN sequence number + if (m_eof && compare_less_wrap(m_eof_seq_nr, ph->seq_nr, ACK_MASK)) + { + UTP_LOGV("%8p: incoming packet seq_nr:%d eof_seq_nr:%d (ignored)\n" + , this, int(ph->seq_nr), m_eof_seq_nr); + + } + + if (m_state != UTP_STATE_NONE + && m_state != UTP_STATE_SYN_SENT + && compare_less_wrap((m_ack_nr + max_packets_reorder) & ACK_MASK, ph->seq_nr, ACK_MASK)) + { + // this is too far out to fit in our reorder buffer. Drop it + // This is either an attack to try to break the connection + // or a seariously damaged connection that lost a lot of + // packets. Neither is very likely, and it should be OK + // to drop the timestamp information. + UTP_LOGV("%8p: incoming packet seq_nr:%d our ack_nr:%d (ignored)\n" + , this, int(ph->seq_nr), m_ack_nr); + return true; + } + + ++m_in_packets; + + // this is a valid incoming packet, update the timeout timer + m_num_timeouts = 0; + m_timeout = receive_time + milliseconds(packet_timeout()); + UTP_LOGV("%8p: updating timeout to: now + %d\n" + , this, packet_timeout()); + + // the test for INT_MAX here is a work-around for a bug in uTorrent where + // it's sometimes sent as INT_MAX when it is in fact uninitialized + const boost::uint32_t sample = ph->timestamp_difference_microseconds == INT_MAX + ? 0 : ph->timestamp_difference_microseconds; + + boost::uint32_t delay = 0; + if (sample != 0) + { + delay = m_delay_hist.add_sample(sample, step); + m_delay_sample_hist[m_delay_sample_idx++] = delay; + if (m_delay_sample_idx >= num_delay_hist) m_delay_sample_idx = 0; + } + + int acked_bytes = 0; + + TORRENT_ASSERT(m_bytes_in_flight >= 0); + int prev_bytes_in_flight = m_bytes_in_flight; + + m_adv_wnd = ph->wnd_size; + + // if we get an ack for the same sequence number as + // was last ACKed, and we have outstanding packets, + // it counts as a duplicate ack + if (ph->ack_nr == m_acked_seq_nr && m_outbuf.size()) + { + ++m_duplicate_acks; + } + + boost::uint32_t min_rtt = UINT_MAX; + + TORRENT_ASSERT(m_outbuf.at((m_acked_seq_nr + 1) & ACK_MASK) || ((m_seq_nr - m_acked_seq_nr) & ACK_MASK) <= 1); + + // has this packet already been ACKed? + // if the ACK we just got is less than the max ACKed + // sequence number, it doesn't tell us anything. + // So, only act on it if the ACK is greater than the last acked + // sequence number + if (m_state != UTP_STATE_NONE && compare_less_wrap(m_acked_seq_nr, ph->ack_nr, ACK_MASK)) + { + int const next_ack_nr = ph->ack_nr; + + for (int ack_nr = (m_acked_seq_nr + 1) & ACK_MASK; + ack_nr != ((next_ack_nr + 1) & ACK_MASK); + ack_nr = (ack_nr + 1) & ACK_MASK) + { + if (m_fast_resend_seq_nr == ack_nr) + m_fast_resend_seq_nr = (m_fast_resend_seq_nr + 1) & ACK_MASK; + packet* p = (packet*)m_outbuf.remove(ack_nr); + if (!p) + { + if (((m_acked_seq_nr + 1) & ACK_MASK) == ack_nr) + m_acked_seq_nr = ack_nr; + continue; + } + acked_bytes += p->size - p->header_size; + ack_packet(p, receive_time, min_rtt, ack_nr); + } + + // update loss seq number if it's less than the packet + // that was just acked. If loss seq nr is greater, it suggests + // that we're still in a window that has experienced loss + if (compare_less_wrap(m_loss_seq_nr, m_acked_seq_nr, ACK_MASK)) + m_loss_seq_nr = m_acked_seq_nr; + + m_duplicate_acks = 0; + if (compare_less_wrap(m_fast_resend_seq_nr, (m_acked_seq_nr + 1) & ACK_MASK, ACK_MASK)) + m_fast_resend_seq_nr = (m_acked_seq_nr + 1) & ACK_MASK; + } + + // look for extended headers + char const* ptr = buf; + ptr += sizeof(utp_header); + + unsigned int extension = ph->extension; + while (extension) + { + // invalid packet. It says it has an extension header + // but the packet is too short + if (ptr - buf + 2 > size) + { + UTP_LOGV("%8p: invalid extension header\n", this); + return true; + } + int next_extension = unsigned(*ptr++); + unsigned int len = unsigned(*ptr++); + if (ptr - buf + len > size) + { + UTP_LOGV("%8p: invalid extension header size:%d packet:%d\n" + , this, len, int(ptr - buf)); + return true; + } + switch(extension) + { + case 1: // selective ACKs + parse_sack(ph->ack_nr, ptr, len, &acked_bytes, receive_time, min_rtt); + break; + } + ptr += len; + extension = next_extension; + } + + // the send operation in parse_sack() may have set the socket to an error + // state, in which case we shouldn't continue + if (m_state == UTP_STATE_ERROR_WAIT || m_state == UTP_STATE_DELETE) return true; + + if (m_duplicate_acks >= dup_ack_limit + && ((m_acked_seq_nr + 1) & ACK_MASK) == m_fast_resend_seq_nr) + { + // LOSS + + UTP_LOGV("%8p: Packet %d lost.\n", this, m_fast_resend_seq_nr); + + // resend the lost packet + packet* p = (packet*)m_outbuf.at(m_fast_resend_seq_nr); + TORRENT_ASSERT(p); + if (p) + { + experienced_loss(m_fast_resend_seq_nr); + resend_packet(p, true); + if (m_state == UTP_STATE_ERROR_WAIT || m_state == UTP_STATE_DELETE) return true; + } + // don't fast-resend this again + m_fast_resend_seq_nr = (m_fast_resend_seq_nr + 1) & ACK_MASK; + } + + // ptr points to the payload of the packet + // size is the packet size, payload is the + // number of payload bytes are in this packet + const int header_size = ptr - buf; + const int payload_size = size - header_size; + +#if TORRENT_UTP_LOG + UTP_LOGV("%8p: incoming packet seq_nr:%d ack_nr:%d type:%s id:%d size:%d timestampdiff:%u timestamp:%u " + "our ack_nr:%d our seq_nr:%d our acked_seq_nr:%d our state:%s\n" + , this, int(ph->seq_nr), int(ph->ack_nr), packet_type_names[ph->get_type()] + , int(ph->connection_id), payload_size, boost::uint32_t(ph->timestamp_difference_microseconds) + , boost::uint32_t(ph->timestamp_microseconds), m_ack_nr, m_seq_nr, m_acked_seq_nr, socket_state_names[m_state]); +#endif + + if (ph->get_type() == ST_FIN) + { + // We ignore duplicate FIN packets, but we still need to ACK them. + if (ph->seq_nr == ((m_ack_nr + 1) & ACK_MASK) + || ph->seq_nr == m_ack_nr) + { + UTP_LOGV("%8p: FIN received in order\n", this); + + // The FIN arrived in order, nothing else is in the + // reorder buffer. + +// TORRENT_ASSERT(m_inbuf.size() == 0); + m_ack_nr = ph->seq_nr; + + // Transition to UTP_STATE_FIN_SENT. The sent FIN is also an ack + // to the FIN we received. Once we're in UTP_STATE_FIN_SENT we + // just need to wait for our FIN to be acked. + + if (m_state == UTP_STATE_FIN_SENT) + { + send_pkt(true); + if (m_state == UTP_STATE_ERROR_WAIT || m_state == UTP_STATE_DELETE) return true; + } + else + { + send_fin(); + if (m_state == UTP_STATE_ERROR_WAIT || m_state == UTP_STATE_DELETE) return true; + } + } + + if (m_eof) + { + UTP_LOGV("%8p: duplicate FIN packet (ignoring)\n", this); + return true; + } + m_eof = true; + m_eof_seq_nr = ph->seq_nr; + + // we will respond with a fin once we have received everything up to m_eof_seq_nr + } + + switch (m_state) + { + case UTP_STATE_NONE: + { + if (ph->get_type() == ST_SYN) + { + // if we're in state_none, the only thing + // we accept are SYN packets. + m_state = UTP_STATE_CONNECTED; + + m_remote_address = ep.address(); + m_port = ep.port(); + +#if TORRENT_UTP_LOG + UTP_LOGV("%8p: state:%s\n" + , this, socket_state_names[m_state]); +#endif + m_ack_nr = ph->seq_nr; + m_seq_nr = rand(); + m_acked_seq_nr = (m_seq_nr - 1) & ACK_MASK; + m_loss_seq_nr = m_acked_seq_nr; + + TORRENT_ASSERT(m_send_id == ph->connection_id); + TORRENT_ASSERT(m_recv_id == ((m_send_id + 1) & 0xffff)); + + send_pkt(true); + + return true; + } + else + { +#if TORRENT_UTP_LOG + UTP_LOGV("%8p: type:%s state:%s (ignored)\n" + , this, packet_type_names[ph->get_type()], socket_state_names[m_state]); +#endif + return true; + } + break; + } + case UTP_STATE_SYN_SENT: + { + // just wait for an ack to our SYN, ignore everything else + if (ph->ack_nr != ((m_seq_nr - 1) & ACK_MASK)) + { +#if TORRENT_UTP_LOG + UTP_LOGV("%8p: incorrect ack_nr (%d) waiting for %d\n" + , this, int(ph->ack_nr), (m_seq_nr - 1) & ACK_MASK); +#endif + return true; + } + + TORRENT_ASSERT(!m_error); + m_state = UTP_STATE_CONNECTED; +#if TORRENT_UTP_LOG + UTP_LOGV("%8p: state:%s\n", this, socket_state_names[m_state]); +#endif + + // only progress our ack_nr on ST_DATA messages + // since our m_ack_nr is uninitialized at this point + // we still need to set it to something regardless + if (ph->get_type() == ST_DATA) + m_ack_nr = ph->seq_nr; + else + m_ack_nr = (ph->seq_nr - 1) & ACK_MASK; + + // notify the client that the socket connected + if (m_connect_handler) + { + UTP_LOGV("%8p: calling connect handler\n", this); + m_connect_handler(m_userdata, m_error, false); + } + m_connect_handler = 0; + // fall through + } + case UTP_STATE_CONNECTED: + { + // the lowest seen RTT can be used to clamp the delay + // within reasonable bounds. The one-way delay is never + // higher than the round-trip time. + + // it's impossible for delay to be more than the RTT, so make + // sure to clamp it as a sanity check + if (delay > min_rtt) delay = min_rtt; + + // only use the minimum from the last 3 delay measurements + delay = *std::min_element(m_delay_sample_hist, m_delay_sample_hist + num_delay_hist); + + if (sample && acked_bytes && prev_bytes_in_flight) + do_ledbat(acked_bytes, delay, prev_bytes_in_flight, receive_time); + + consume_incoming_data(ph, ptr, payload_size, receive_time); + + // the parameter to send_pkt tells it if we're acking data + // If we are, we'll send an ACK regardless of if we have any + // space left in our send window or not. If we just got an ACK + // (i.e. ST_STATE) we're not ACKing anything. If we just + // received a FIN packet, we need to ack that as well + bool has_ack = ph->get_type() == ST_DATA || ph->get_type() == ST_FIN || ph->get_type() == ST_SYN; + int delayed_ack = m_sm->delayed_ack(); + if (has_ack && delayed_ack && m_ack_timer > receive_time) + { + // we have data to ACK, and delayed ACKs are enabled. + // update the ACK timer and clear the flag, to pretend + // like we don't have anything to ACK + m_ack_timer = (std::min)(m_ack_timer, receive_time + milliseconds(delayed_ack)); + has_ack = false; + UTP_LOGV("%8p: delaying ack. timer triggers in %d milliseconds\n" + , this, int(total_milliseconds(m_ack_timer - time_now_hires()))); + } + + if (send_pkt(has_ack)) + { + // try to send more data as long as we can + while (send_pkt(false)); + } + maybe_trigger_send_callback(receive_time); + if (m_state == UTP_STATE_ERROR_WAIT || m_state == UTP_STATE_DELETE) return true; + + // Everything up to the FIN has been receieved, respond with a FIN + // from our side. + if (m_eof && m_ack_nr == ((m_eof_seq_nr - 1) & ACK_MASK)) + { + UTP_LOGV("%8p: incoming stream consumed\n", this); + + // This transitions to the UTP_STATE_FIN_SENT state. + send_fin(); + if (m_state == UTP_STATE_ERROR_WAIT || m_state == UTP_STATE_DELETE) return true; + } + +#if TORRENT_UTP_LOG + if (sample && acked_bytes && prev_bytes_in_flight) + { + char their_delay_base[20]; + if (m_their_delay_hist.initialized()) + snprintf(their_delay_base, sizeof(their_delay_base), "%u", m_their_delay_hist.base()); + else + strcpy(their_delay_base, "-"); + + char our_delay_base[20]; + if (m_delay_hist.initialized()) + snprintf(our_delay_base, sizeof(our_delay_base), "%u", m_delay_hist.base()); + else + strcpy(our_delay_base, "-"); + + UTP_LOG("%8p: " + "actual_delay:%u " + "our_delay:%f " + "their_delay:%f " + "off_target:%f " + "max_window:%u " + "upload_rate:%d " + "delay_base:%s " + "delay_sum:%f " + "target_delay:%d " + "acked_bytes:%d " + "cur_window:%d " + "scaled_gain:%f " + "rtt:%u " + "rate:%d " + "quota:%d " + "wnduser:%u " + "rto:%d " + "timeout:%d " + "get_microseconds:%u " + "cur_window_packets:%u " + "packet_size:%d " + "their_delay_base:%s " + "their_actual_delay:%u " + "seq_nr:%u " + "acked_seq_nr:%u " + "reply_micro:%u " + "min_rtt:%u " + "send_buffer:%d " + "recv_buffer:%d " + "\n" + , this + , sample + , float(delay / 1000.f) + , float(their_delay / 1000.f) + , float(int(m_sm->target_delay() - delay)) / 1000.f + , boost::uint32_t(m_cwnd >> 16) + , 0 + , our_delay_base + , float(delay + their_delay) / 1000.f + , m_sm->target_delay() / 1000 + , acked_bytes + , m_bytes_in_flight + , 0.f // float(scaled_gain) + , m_rtt.mean() + , int(m_cwnd * 1000 / (m_rtt.mean()?m_rtt.mean():50)) >> 16 + , 0 + , m_adv_wnd + , packet_timeout() + , int(total_milliseconds(m_timeout - receive_time)) + , int(total_microseconds(receive_time - min_time())) + , (m_seq_nr - m_acked_seq_nr) & ACK_MASK + , m_mtu + , their_delay_base + , boost::uint32_t(m_reply_micro) + , m_seq_nr + , m_acked_seq_nr + , m_reply_micro + , min_rtt / 1000 + , m_write_buffer_size + , m_read_buffer_size); + } +#endif + + return true; + } + case UTP_STATE_FIN_SENT: + { + // There are two ways we can end up in this state: + // + // 1. If the socket has been explicitly closed on our + // side, in which case m_eof is false. + // + // 2. If we received a FIN from the remote side, in which + // case m_eof is true. If this is the case, we don't + // come here until everything up to the FIN has been + // received. + // + // + // + + // At this point m_seq_nr - 1 is the FIN sequence number. + + // We can receive both ST_DATA and ST_STATE here, because after + // we have closed our end of the socket, the remote end might + // have data in the pipeline. We don't really care about the + // data, but we do have to ack it. Or rather, we have to ack + // the FIN that will come after the data. + + // Case 1: + // --------------------------------------------------------------- + // + // If we are here because the local endpoint was closed, we need + // to first wait for all of our messages to be acked: + // + // if (m_acked_seq_nr == ((m_seq_nr - 1) & ACK_MASK)) + // + // `m_seq_nr - 1` is the ST_FIN message that we sent. + // + // ---------------------- + // + // After that has happened we need to wait for the remote side + // to send its ST_FIN message. When we receive that we send an + // ST_STATE back to ack, and wait for a sufficient period. + // During this wait we keep acking incoming ST_FIN's. This is + // all handled at the top of this function. + // + // Note that the user handlers are all cancelled when the initial + // close() call happens, so nothing will happen on the user side + // after that. + + // Case 2: + // --------------------------------------------------------------- + // + // If we are here because we received a ST_FIN message, and then + // sent our own ST_FIN to ack that, we need to wait for our ST_FIN + // to be acked: + // + // if (m_acked_seq_nr == ((m_seq_nr - 1) & ACK_MASK)) + // + // `m_seq_nr - 1` is the ST_FIN message that we sent. + // + // After that has happened we know the remote side has all our + // data, and we can gracefully shut down. + + if (consume_incoming_data(ph, ptr, payload_size, receive_time)) + return true; + + if (m_acked_seq_nr == ((m_seq_nr - 1) & ACK_MASK)) + { + // When this happens we know that the remote side has + // received all of our packets. + + UTP_LOGV("%8p: FIN acked\n", this); + + if (!m_attached) + { + UTP_LOGV("%8p: close initiated here, delete socket\n", this); + m_error = asio::error::eof; + m_state = UTP_STATE_DELETE; + test_socket_state(); + } + else + { + UTP_LOGV("%8p: closing socket\n", this); + m_error = asio::error::eof; + m_state = UTP_STATE_ERROR_WAIT; + test_socket_state(); + } + } + + return true; + } + case UTP_STATE_DELETE: + default: + { + // respond with a reset + send_reset(ph); + return true; + } + } + + return false; +} + +void utp_socket_impl::do_ledbat(int acked_bytes, int delay, int in_flight, ptime const now) +{ + // the portion of the in-flight bytes that were acked. This is used to make + // the gain factor be scaled by the rtt. The formula is applied once per + // rtt, or on every ACK skaled by the number of ACKs per rtt + TORRENT_ASSERT(in_flight > 0); + TORRENT_ASSERT(acked_bytes > 0); + + int target_delay = m_sm->target_delay(); + + // all of these are fixed points with 16 bits fraction portion + boost::int64_t window_factor = (boost::int64_t(acked_bytes) << 16) / in_flight; + boost::int64_t delay_factor = (boost::int64_t(target_delay - delay) << 16) / target_delay; + boost::int64_t scaled_gain = (window_factor * delay_factor) >> 16; + scaled_gain *= boost::int64_t(m_sm->gain_factor()); + + if (scaled_gain > 0 && m_last_cwnd_hit + milliseconds((std::max)(m_rtt.mean(), 10)) < now) + { + // we haven't bumped into the cwnd limit size in the last second + // this probably means we have a send rate limit, so we shouldn't make + // the cwnd size any larger + scaled_gain = 0; + } + + UTP_LOGV("%8p: do_ledbat delay:%d off_target: %d window_factor:%f target_factor:%f " + "scaled_gain:%f cwnd:%d\n" + , this, delay, target_delay - delay, window_factor / float(1 << 16) + , delay_factor / float(1 << 16) + , scaled_gain / float(1 << 16), int(m_cwnd >> 16)); + + // if scaled_gain + m_cwnd <= 0, set m_cwnd to 0 + if (-scaled_gain >= m_cwnd) + { + m_cwnd = 0; + } + else + { + m_cwnd += scaled_gain; + TORRENT_ASSERT(m_cwnd > 0); + } +} + +void utp_stream::bind(endpoint_type const& ep, error_code& ec) { } + +// returns the number of milliseconds a packet would have before +// it would time-out if it was sent right now. Takes the RTT estimate +// into account +int utp_socket_impl::packet_timeout() const +{ + // SYN packets have a bit longer timeout, since we don't + // have an RTT estimate yet, make a conservative guess + if (m_state == UTP_STATE_NONE) return 3000; + + int timeout = (std::max)(m_sm->min_timeout(), m_rtt.mean() + m_rtt.avg_deviation() * 2); + if (m_num_timeouts > 0) timeout += (1 << (int(m_num_timeouts) - 1)) * 1000; + return timeout; +} + +void utp_socket_impl::tick(ptime const& now) +{ +#if TORRENT_UTP_LOG + UTP_LOGV("%8p: tick:%s r: %d (%s) w: %d (%s)\n" + , this, socket_state_names[m_state], m_read, m_read_handler ? "handler" : "no handler" + , m_written, m_write_handler ? "handler" : "no handler"); +#endif + bool window_opened = false; + + TORRENT_ASSERT(m_outbuf.at((m_acked_seq_nr + 1) & ACK_MASK) || ((m_seq_nr - m_acked_seq_nr) & ACK_MASK) <= 1); + + // don't hang on to received data for too long, and don't + // wait too long telling the client we've sent some data. + // these functions will trigger time callback if we have + // a reason to and it's been long enough since we sent or + // received the data + maybe_trigger_receive_callback(now); + maybe_trigger_send_callback(now); + + // if we're already in an error state, we're just waiting for the + // client to perform an operation so that we can communicate the + // error. No need to do anything else with this socket + if (m_state == UTP_STATE_ERROR_WAIT || m_state == UTP_STATE_DELETE) return; + + if (now > m_timeout) + { + // TIMEOUT! + // set cwnd to 1 MSS + + // the window went from less than one MSS to one MSS + // we can now sent messages again, the send window was opened + if ((m_cwnd >> 16) < m_mtu) window_opened = true; + + m_cwnd = m_mtu << 16; + if (m_outbuf.size()) ++m_num_timeouts; + m_timeout = now + milliseconds(packet_timeout()); + + UTP_LOGV("%8p: timeout resetting cwnd:%d\n" + , this, int(m_cwnd >> 16)); + + if (((m_acked_seq_nr + 1) & ACK_MASK) == m_mtu_seq + && ((m_seq_nr - 1) & ACK_MASK) == m_mtu_seq + && m_mtu_seq != 0) + { + // we timed out, and the only outstanding packet + // we had was the probe. Assume it was dropped + // because it was too big + m_mtu_ceiling = m_mtu - 1; + update_mtu_limits(); + } + + // we dropped all packets, that includes the mtu probe + m_mtu_seq = 0; + + // since we've already timed out now, don't count + // loss that we might detect for packets that just + // timed out + m_loss_seq_nr = m_seq_nr; + + // we need to go one past m_seq_nr to cover the case + // where we just sent a SYN packet and then adjusted for + // the uTorrent sequence number reuse + for (int i = m_acked_seq_nr & ACK_MASK; + i != ((m_seq_nr + 1) & ACK_MASK); + i = (i + 1) & ACK_MASK) + { + packet* p = (packet*)m_outbuf.at(i); + if (!p) continue; + if (p->need_resend) continue; + p->need_resend = true; + TORRENT_ASSERT(m_bytes_in_flight >= p->size - p->header_size); + m_bytes_in_flight -= p->size - p->header_size; + UTP_LOGV("%8p: Packet %d lost.\n", this, i); + } + + TORRENT_ASSERT(m_bytes_in_flight == 0); + + // if we have a packet that needs re-sending, resend it + packet* p = (packet*)m_outbuf.at((m_acked_seq_nr + 1) & ACK_MASK); + if (p) + { + if (p->num_transmissions >= m_sm->num_resends() + || (m_state == UTP_STATE_SYN_SENT && p->num_transmissions >= m_sm->syn_resends()) + || (m_state == UTP_STATE_FIN_SENT && p->num_transmissions >= m_sm->fin_resends())) + { +#if TORRENT_UTP_LOG + UTP_LOGV("%8p: %d failed sends in a row. Socket timed out. state:%s\n" + , this, p->num_transmissions, socket_state_names[m_state]); +#endif + + // the connection is dead + m_error = asio::error::timed_out; + m_state = UTP_STATE_ERROR_WAIT; + test_socket_state(); + return; + } + + // don't fast-resend this packet + if (m_fast_resend_seq_nr == ((m_acked_seq_nr + 1) & ACK_MASK)) + m_fast_resend_seq_nr = (m_fast_resend_seq_nr + 1) & ACK_MASK; + + // the packet timed out, resend it + resend_packet(p); + if (m_state == UTP_STATE_ERROR_WAIT || m_state == UTP_STATE_DELETE) return; + } + else if (m_state < UTP_STATE_FIN_SENT) + { + send_pkt(false); + if (m_state == UTP_STATE_ERROR_WAIT || m_state == UTP_STATE_DELETE) return; + } + else if (m_state == UTP_STATE_FIN_SENT) + { + // the connection is dead + m_error = asio::error::eof; + m_state = UTP_STATE_ERROR_WAIT; + test_socket_state(); + return; + } + } + + if (now > m_ack_timer) + { + UTP_LOGV("%8p: ack timer expired, sending ACK\n", this); + // we need to send an ACK now! + send_pkt(true); + if (m_state == UTP_STATE_ERROR_WAIT || m_state == UTP_STATE_DELETE) return; + } + + switch (m_state) + { + case UTP_STATE_NONE: + case UTP_STATE_DELETE: + return; +// case UTP_STATE_SYN_SENT: +// +// break; + } +} + +void utp_socket_impl::check_receive_buffers() const +{ + std::size_t size = 0; + + for (std::vector::const_iterator i = m_receive_buffer.begin() + , end(m_receive_buffer.end()); i != end; ++i) + { + if (packet const* p = *i) + size += p->size - p->header_size; + } + + TORRENT_ASSERT(size == m_receive_buffer_size); +} + +} + diff --git a/test/Jamfile b/test/Jamfile index 8a2c1efc9..813fced4e 100644 --- a/test/Jamfile +++ b/test/Jamfile @@ -34,6 +34,7 @@ test-suite libtorrent : [ run test_bdecode_performance.cpp ] [ run test_pe_crypto.cpp ] + [ run test_utp.cpp ] [ run test_auto_unchoke.cpp ] [ run test_http_connection.cpp ] [ run test_torrent.cpp ] diff --git a/test/setup_transfer.cpp b/test/setup_transfer.cpp index f11b02630..9a16dce12 100644 --- a/test/setup_transfer.cpp +++ b/test/setup_transfer.cpp @@ -241,9 +241,9 @@ setup_transfer(session* ses1, session* ses2, session* ses3 ses1->set_settings(sess_set); ses2->set_settings(sess_set); if (ses3) ses3->set_settings(sess_set); - ses1->set_alert_mask(~alert::progress_notification); - ses2->set_alert_mask(~alert::progress_notification); - if (ses3) ses3->set_alert_mask(~alert::progress_notification); + ses1->set_alert_mask(~(alert::progress_notification | alert::stats_notification)); + ses2->set_alert_mask(~(alert::progress_notification | alert::stats_notification)); + if (ses3) ses3->set_alert_mask(~(alert::progress_notification | alert::stats_notification)); std::srand(time(0)); peer_id pid; @@ -289,9 +289,11 @@ setup_transfer(session* ses1, session* ses2, session* ses3 if (p) param = *p; param.ti = clone_ptr(t); param.save_path = "./tmp1" + suffix; + param.seed_mode = true; error_code ec; torrent_handle tor1 = ses1->add_torrent(param, ec); tor1.super_seeding(super_seeding); + param.seed_mode = false; TEST_CHECK(!ses1->get_torrents().empty()); torrent_handle tor2; torrent_handle tor3; @@ -753,7 +755,7 @@ void web_server_thread(int* port, bool ssl, bool chunked) while (!p.finished()) { - TORRENT_ASSERT(len < sizeof(buf)); + TORRENT_ASSERT(len < int(sizeof(buf))); size_t received = s.read_some(boost::asio::buffer(&buf[len] , sizeof(buf) - len), ec); // fprintf(stderr, "read: %d\n", int(received)); diff --git a/test/test_primitives.cpp b/test/test_primitives.cpp index 1d11f7626..f45fc98f5 100644 --- a/test/test_primitives.cpp +++ b/test/test_primitives.cpp @@ -43,8 +43,11 @@ POSSIBILITY OF SUCH DAMAGE. #include "libtorrent/broadcast_socket.hpp" #include "libtorrent/identify_client.hpp" #include "libtorrent/file.hpp" +#include "libtorrent/packet_buffer.hpp" #include "libtorrent/session.hpp" #include "libtorrent/bencode.hpp" +#include "libtorrent/timestamp_history.hpp" +#include "libtorrent/enum_net.hpp" #ifndef TORRENT_DISABLE_DHT #include "libtorrent/kademlia/node_id.hpp" #include "libtorrent/kademlia/routing_table.hpp" @@ -382,6 +385,98 @@ int test_main() error_code ec; int ret = 0; + // test timestamp_history + { + timestamp_history h; + TEST_EQUAL(h.add_sample(0x32, false), 0); + TEST_EQUAL(h.base(), 0x32); + TEST_EQUAL(h.add_sample(0x33, false), 0x1); + TEST_EQUAL(h.base(), 0x32); + TEST_EQUAL(h.add_sample(0x3433, false), 0x3401); + TEST_EQUAL(h.base(), 0x32); + TEST_EQUAL(h.add_sample(0x30, false), 0); + TEST_EQUAL(h.base(), 0x30); + + // test that wrapping of the timestamp is properly handled + h.add_sample(0xfffffff3, false); + TEST_EQUAL(h.base(), 0xfffffff3); + } + + // test packet_buffer + { + packet_buffer pb; + + TEST_EQUAL(pb.capacity(), 0); + TEST_EQUAL(pb.size(), 0); + TEST_EQUAL(pb.span(), 0); + + pb.insert(123, (void*)123); + TEST_EQUAL(pb.at(123 + 16), 0); + + TEST_CHECK(pb.at(123) == (void*)123); + TEST_CHECK(pb.capacity() > 0); + TEST_EQUAL(pb.size(), 1); + TEST_EQUAL(pb.span(), 1); + TEST_EQUAL(pb.cursor(), 123); + + pb.insert(125, (void*)125); + + TEST_CHECK(pb.at(125) == (void*)125); + TEST_EQUAL(pb.size(), 2); + TEST_EQUAL(pb.span(), 3); + TEST_EQUAL(pb.cursor(), 123); + + pb.insert(500, (void*)500); + TEST_EQUAL(pb.size(), 3); + TEST_EQUAL(pb.span(), 501 - 123); + TEST_EQUAL(pb.capacity(), 512); + + TEST_CHECK(pb.remove(123) == (void*)123); + TEST_EQUAL(pb.size(), 2); + TEST_EQUAL(pb.span(), 501 - 125); + TEST_EQUAL(pb.cursor(), 125); + TEST_CHECK(pb.remove(125) == (void*)125); + TEST_EQUAL(pb.size(), 1); + TEST_EQUAL(pb.span(), 1); + TEST_EQUAL(pb.cursor(), 500); + + TEST_CHECK(pb.remove(500) == (void*)500); + TEST_EQUAL(pb.size(), 0); + TEST_EQUAL(pb.span(), 0); + + for (int i = 0; i < 0xff; ++i) + { + int index = (i + 0xfff0) & 0xffff; + pb.insert(index, (void*)(index + 1)); + fprintf(stderr, "insert: %u (mask: %x)\n", index, int(pb.capacity() - 1)); + TEST_EQUAL(pb.capacity(), 512); + if (i >= 14) + { + index = (index - 14) & 0xffff; + fprintf(stderr, "remove: %u\n", index); + TEST_CHECK(pb.remove(index) == (void*)(index + 1)); + TEST_EQUAL(pb.size(), 14); + } + } + } + + { + // test wrapping the indices + packet_buffer pb; + + TEST_EQUAL(pb.size(), 0); + + pb.insert(0xfffe, (void*)1); + TEST_CHECK(pb.at(0xfffe) == (void*)1); + + pb.insert(2, (void*)2); + TEST_CHECK(pb.at(2) == (void*)2); + + pb.remove(0xfffe); + TEST_CHECK(pb.at(0xfffe) == (void*)0); + TEST_CHECK(pb.at(2) == (void*)2); + } + TEST_CHECK(error_code(errors::http_error).message() == "HTTP error"); TEST_CHECK(error_code(errors::missing_file_sizes).message() == "missing or invalid 'file sizes' entry"); TEST_CHECK(error_code(errors::unsupported_protocol_version).message() == "unsupported protocol version"); @@ -391,7 +486,7 @@ int test_main() TEST_CHECK(errors::reserved129 == 129); TEST_CHECK(errors::reserved159 == 159); - TEST_CHECK(errors::reserved108 == 108); + TEST_CHECK(errors::reserved109 == 109); { // test session state load/restore @@ -1062,6 +1157,16 @@ int test_main() #endif TEST_CHECK(is_any(address_v4::any())); TEST_CHECK(!is_any(address::from_string("31.53.21.64", ec))); + + TEST_CHECK(match_addr_mask( + address::from_string("10.0.1.3", ec), + address::from_string("10.0.3.3", ec), + address::from_string("255.255.0.0", ec))); + + TEST_CHECK(!match_addr_mask( + address::from_string("10.0.1.3", ec), + address::from_string("10.1.3.3", ec), + address::from_string("255.255.0.0", ec))); // test torrent parsing diff --git a/test/test_utp.cpp b/test/test_utp.cpp new file mode 100644 index 000000000..22ec980a4 --- /dev/null +++ b/test/test_utp.cpp @@ -0,0 +1,151 @@ +/* + +Copyright (c) 2008, Arvid Norberg +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the distribution. + * Neither the name of the author nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "libtorrent/session.hpp" +#include "libtorrent/session_settings.hpp" +#include "libtorrent/hasher.hpp" +#include "libtorrent/alert_types.hpp" +#include "libtorrent/bencode.hpp" +#include "libtorrent/thread.hpp" +#include "libtorrent/time.hpp" +#include "libtorrent/file.hpp" +#include +#include + +#include "test.hpp" +#include "setup_transfer.hpp" +#include +#include + +using namespace libtorrent; +using boost::tuples::ignore; + +void test_transfer() +{ + // in case the previous run was terminated + error_code ec; + remove_all("./tmp1_utp", ec); + remove_all("./tmp2_utp", ec); + + session ses1(fingerprint("LT", 0, 1, 0, 0), std::make_pair(48885, 49930), "0.0.0.0", 0); + session ses2(fingerprint("LT", 0, 1, 0, 0), std::make_pair(49885, 50930), "0.0.0.0", 0); + + session_settings sett; + + sett.enable_outgoing_tcp = false; + sett.min_reconnect_time = 1; + sett.announce_to_all_trackers = true; + sett.announce_to_all_tiers = true; + // make sure we announce to both http and udp trackers + sett.prefer_udp_trackers = false; + + // for performance testing +// sett.disable_hash_checks = true; +// sett.utp_delayed_ack = 0; + + // disable this to use regular size packets over loopback +// sett.utp_dynamic_sock_buf = false; + + ses1.set_settings(sett); + ses2.set_settings(sett); + +#ifndef TORRENT_DISABLE_ENCRYPTION + pe_settings pes; + pes.out_enc_policy = pe_settings::disabled; + pes.in_enc_policy = pe_settings::disabled; + ses1.set_pe_settings(pes); + ses2.set_pe_settings(pes); +#endif + + torrent_handle tor1; + torrent_handle tor2; + + create_directory("./tmp1_utp", ec); + std::ofstream file("./tmp1_utp/temporary"); + boost::intrusive_ptr t = ::create_torrent(&file, 16 * 1024, 1000, false); + file.close(); + + // for performance testing + add_torrent_params atp; +// atp.storage = &disabled_storage_constructor; + + // test using piece sizes smaller than 16kB + boost::tie(tor1, tor2, ignore) = setup_transfer(&ses1, &ses2, 0 + , true, false, true, "_utp", 8 * 1024, &t, false, &atp); + + for (int i = 0; i < 300; ++i) + { + print_alerts(ses1, "ses1", true, true, true); + print_alerts(ses2, "ses2", true, true, true); + + torrent_status st1 = tor1.status(); + torrent_status st2 = tor2.status(); + + std::cerr + << "\033[32m" << int(st1.download_payload_rate / 1000.f) << "kB/s " + << "\033[33m" << int(st1.upload_payload_rate / 1000.f) << "kB/s " + << "\033[0m" << int(st1.progress * 100) << "% " + << st1.num_peers + << ": " + << "\033[32m" << int(st2.download_payload_rate / 1000.f) << "kB/s " + << "\033[31m" << int(st2.upload_payload_rate / 1000.f) << "kB/s " + << "\033[0m" << int(st2.progress * 100) << "% " + << st2.num_peers + << " cc: " << st2.connect_candidates + << std::endl; + + if (st2.is_finished) break; + + TEST_CHECK(st1.state == torrent_status::seeding + || st1.state == torrent_status::checking_files); + TEST_CHECK(st2.state == torrent_status::downloading); + + test_sleep(500); + } + + TEST_CHECK(tor1.status().is_finished); + TEST_CHECK(tor2.status().is_finished); +} + +int test_main() +{ + using namespace libtorrent; + + test_transfer(); + + error_code ec; + remove_all("./tmp1_utp", ec); + remove_all("./tmp2_utp", ec); + + return 0; +} +