From b49999b76ec2e5e8d04604d22a5264fe47bead36 Mon Sep 17 00:00:00 2001 From: Arvid Norberg Date: Sat, 8 Jan 2011 08:54:51 +0000 Subject: [PATCH] made the DHT implementation slightly more robust against routing table poisoning and node ID spoofing --- ChangeLog | 1 + bindings/python/src/session_settings.cpp | 4 + docs/manual.rst | 11 ++ include/libtorrent/kademlia/node.hpp | 2 + include/libtorrent/kademlia/routing_table.hpp | 6 + include/libtorrent/kademlia/rpc_manager.hpp | 5 - include/libtorrent/session_settings.hpp | 14 ++ src/broadcast_socket.cpp | 3 +- src/kademlia/routing_table.cpp | 131 +++++++++++++++++- src/kademlia/rpc_manager.cpp | 13 +- src/kademlia/traversal_algorithm.cpp | 33 +++++ src/session_impl.cpp | 3 +- 12 files changed, 203 insertions(+), 23 deletions(-) diff --git a/ChangeLog b/ChangeLog index 5682de222..e1353453d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,4 @@ + * made the DHT implementation slightly more robust against routing table poisoning and node ID spoofing * support chunked encoding in http downloads (http_connection) * support adding torrents by url to the .torrent file * support CDATA tags in xml parser diff --git a/bindings/python/src/session_settings.cpp b/bindings/python/src/session_settings.cpp index e53c155b8..fb90b8483 100644 --- a/bindings/python/src/session_settings.cpp +++ b/bindings/python/src/session_settings.cpp @@ -179,8 +179,12 @@ void bind_session_settings() class_("dht_settings") .def_readwrite("max_peers_reply", &dht_settings::max_peers_reply) .def_readwrite("search_branching", &dht_settings::search_branching) +#ifndef TORRENT_NO_DEPRECATE .def_readwrite("service_port", &dht_settings::service_port) +#endif .def_readwrite("max_fail_count", &dht_settings::max_fail_count) + .def_readwrite("restrict_routing_ips", &dht_settings::restrict_routing_ips) + .def_readwrite("restrict_search_ips", &dht_settings::restrict_search_ips) ; #endif diff --git a/docs/manual.rst b/docs/manual.rst index 2902bf686..8c80f0830 100644 --- a/docs/manual.rst +++ b/docs/manual.rst @@ -1110,6 +1110,8 @@ struct has the following members:: int max_peers_reply; int search_branching; int max_fail_count; + bool restrict_routing_ips; + bool restrict_search_ips; }; ``max_peers_reply`` is the maximum number of peers the node will send in @@ -1125,6 +1127,15 @@ that are ready to replace a failing node, it will be replaced immediately, this limit is only used to clear out nodes that don't have any node that can replace them. +``restrict_routing_ips`` determines if the routing table entries should restrict +entries to one per IP. This defaults to true, which helps mitigate some attacks +on the DHT. It prevents adding multiple nodes with IPs with a very close CIDR +distance. + +``restrict_search_ips`` determines if DHT searches should prevent adding nodes +with IPs with very close CIDR distance. This also defaults to true and helps +mitigate certain attacks on the DHT. + The ``dht_settings`` struct used to contain a ``service_port`` member to control which port the DHT would listen on and send messages from. This field is deprecated and ignored. libtorrent always tries to open the UDP socket on the same port diff --git a/include/libtorrent/kademlia/node.hpp b/include/libtorrent/kademlia/node.hpp index 7bca2a4bc..67a8529a7 100644 --- a/include/libtorrent/kademlia/node.hpp +++ b/include/libtorrent/kademlia/node.hpp @@ -257,6 +257,8 @@ public: void status(libtorrent::session_status& s); + dht_settings const& settings() const { return m_settings; } + protected: // is called when a find data request is received. Should // return false if the data is not stored on this node. If diff --git a/include/libtorrent/kademlia/routing_table.hpp b/include/libtorrent/kademlia/routing_table.hpp index c3f2cb06f..893e28323 100644 --- a/include/libtorrent/kademlia/routing_table.hpp +++ b/include/libtorrent/kademlia/routing_table.hpp @@ -199,6 +199,12 @@ private: // be used in searches, but they will never // be added to the routing table. std::set m_router_nodes; + + // these are all the IPs that are in the routing + // table. It's used to only allow a single entry + // per IP in the whole table. Currently only for + // IPv4 + std::set m_ips; }; } } // namespace libtorrent::dht diff --git a/include/libtorrent/kademlia/rpc_manager.hpp b/include/libtorrent/kademlia/rpc_manager.hpp index a1d190f27..30f62475b 100644 --- a/include/libtorrent/kademlia/rpc_manager.hpp +++ b/include/libtorrent/kademlia/rpc_manager.hpp @@ -98,8 +98,6 @@ public: private: - enum { max_transaction_id = 0x10000 }; - boost::uint32_t calc_connection_id(udp::endpoint addr); mutable boost::pool<> m_pool_allocator; @@ -107,9 +105,6 @@ private: typedef std::list transactions_t; transactions_t m_transactions; - // this is the next transaction id to be used - int m_next_transaction_id; - send_fun m_send; void* m_userdata; node_id m_our_id; diff --git a/include/libtorrent/session_settings.hpp b/include/libtorrent/session_settings.hpp index 5561d7dc7..850b8636c 100644 --- a/include/libtorrent/session_settings.hpp +++ b/include/libtorrent/session_settings.hpp @@ -1033,6 +1033,8 @@ namespace libtorrent #endif , max_fail_count(20) , max_torrent_search_reply(20) + , restrict_routing_ips(true) + , restrict_search_ips(true) {} // the maximum number of peers to send in a @@ -1056,6 +1058,18 @@ namespace libtorrent // the max number of torrents to return in a // torrent search query to the DHT int max_torrent_search_reply; + + // when set, nodes whose IP address that's in + // the same /24 (or /64 for IPv6) range in the + // same routing table bucket. This is an attempt + // to mitigate node ID spoofing attacks + // also restrict any IP to only have a single + // entry in the whole routing table + bool restrict_routing_ips; + + // applies the same IP restrictions on nodes + // received during a DHT search (traversal algorithm) + bool restrict_search_ips; }; #endif diff --git a/src/broadcast_socket.cpp b/src/broadcast_socket.cpp index 238f8ea1f..40ac8d964 100644 --- a/src/broadcast_socket.cpp +++ b/src/broadcast_socket.cpp @@ -172,7 +172,8 @@ namespace libtorrent } // returns the number of bits in that differ from the right - // between the addresses. + // between the addresses. The larger number, the further apart + // the IPs are int cidr_distance(address const& a1, address const& a2) { #if TORRENT_USE_IPV6 diff --git a/src/kademlia/routing_table.cpp b/src/kademlia/routing_table.cpp index b71402dea..b1ce649db 100644 --- a/src/kademlia/routing_table.cpp +++ b/src/kademlia/routing_table.cpp @@ -41,6 +41,7 @@ POSSIBILITY OF SUCH DAMAGE. #include #include "libtorrent/kademlia/routing_table.hpp" +#include "libtorrent/broadcast_socket.hpp" // for cidr_distance #include "libtorrent/session_status.hpp" #include "libtorrent/kademlia/node_id.hpp" #include "libtorrent/session_settings.hpp" @@ -277,12 +278,69 @@ routing_table::table_t::iterator routing_table::find_bucket(node_id const& id) return i; } +bool compare_ip_cidr(node_entry const& lhs, node_entry const& rhs) +{ + TORRENT_ASSERT(lhs.addr.is_v4() == rhs.addr.is_v4()); + // the number of bits in the IPs that may match. If + // more bits that this matches, something suspicious is + // going on and we shouldn't add the second one to our + // routing table + int cutoff = rhs.addr.is_v4() ? 8 : 64; + int dist = cidr_distance(lhs.addr, rhs.addr); + return dist <= cutoff; +} + bool routing_table::add_node(node_entry const& e) { if (m_router_nodes.find(e.ep()) != m_router_nodes.end()) return false; bool ret = need_bootstrap(); + // if we're restricting IPs, check if we already have this IP in the table + if (m_settings.restrict_routing_ips + && m_ips.find(e.addr.to_v4().to_bytes()) != m_ips.end()) + { +#ifdef TORRENT_DHT_VERBOSE_LOGGING + bool id_match = false; + bool found = false; + node_id other_id; + for (table_t::iterator i = m_buckets.begin() + , end(m_buckets.end()); i != end; ++i) + { + for (bucket_t::iterator j = i->replacements.begin(); + j != i->replacements.end(); ++j) + { + if (j->addr != e.addr) continue; + found = true; + other_id = j->id; + if (j->id != e.id) continue; + id_match = true; + break; + } + if (id_match) break; + for (bucket_t::iterator j = i->live_nodes.begin(); + j != i->live_nodes.end(); ++j) + { + if (j->addr != e.addr) continue; + found = true; + other_id = j->id; + if (j->id != e.id) continue; + id_match = true; + break; + } + if (id_match) break; + } + TORRENT_ASSERT(found); + if (!id_match) + { + TORRENT_LOG(table) << "ignoring node (duplicate IP): " + << e.id << " " << e.addr + << " existing: " << other_id; + } +#endif + return ret; + } + // don't add ourself if (e.id == m_id) return ret; @@ -290,12 +348,18 @@ bool routing_table::add_node(node_entry const& e) bucket_t& b = i->live_nodes; bucket_t& rb = i->replacements; + bucket_t::iterator j; + // if the node already exists, we don't need it - bucket_t::iterator j = std::find_if(b.begin(), b.end() + j = std::find_if(b.begin(), b.end() , boost::bind(&node_entry::id, _1) == e.id); if (j != b.end()) { + // a new IP address just claimed this node-ID + // ignore it + if (j->addr != e.addr || j->port != e.port) return ret; + // we already have the node in our bucket // just move it to the back since it was // the last node we had any contact with @@ -308,6 +372,36 @@ bool routing_table::add_node(node_entry const& e) if (std::find_if(rb.begin(), rb.end(), boost::bind(&node_entry::id, _1) == e.id) != rb.end()) return ret; + if (m_settings.restrict_routing_ips) + { + // don't allow multiple entries from IPs very close to each other + j = std::find_if(b.begin(), b.end(), boost::bind(&compare_ip_cidr, _1, e)); + if (j != b.end()) + { + // we already have a node in this bucket with an IP very + // close to this one. We know that it's not the same, because + // it claims a different node-ID. Ignore this to avoid attacks +#ifdef TORRENT_DHT_VERBOSE_LOGGING + TORRENT_LOG(table) << "ignoring node: " << e.id << " " << e.addr + << " existing node: " + << j->id << " " << j->addr; +#endif + return ret; + } + + j = std::find_if(rb.begin(), rb.end(), boost::bind(&compare_ip_cidr, _1, e)); + if (j != rb.end()) + { + // same thing bug for the replacement bucket +#ifdef TORRENT_DHT_VERBOSE_LOGGING + TORRENT_LOG(table) << "ignoring (replacement) node: " << e.id << " " << e.addr + << " existing node: " + << j->id << " " << j->addr; +#endif + return ret; + } + } + // if the node was not present in our list // we will only insert it if there is room // for it, or if some of our nodes have gone @@ -316,6 +410,7 @@ bool routing_table::add_node(node_entry const& e) { if (b.empty()) b.reserve(m_bucket_size); b.push_back(e); + m_ips.insert(e.addr.to_v4().to_bytes()); // TORRENT_LOG(table) << "inserting node: " << e.id << " " << e.addr; return ret; } @@ -344,8 +439,10 @@ bool routing_table::add_node(node_entry const& e) { // j points to a node that has not been pinged. // Replace it with this new one + m_ips.erase(j->addr.to_v4().to_bytes()); b.erase(j); b.push_back(e); + m_ips.insert(e.addr.to_v4().to_bytes()); // TORRENT_LOG(table) << "replacing unpinged node: " << e.id << " " << e.addr; return ret; } @@ -364,8 +461,10 @@ bool routing_table::add_node(node_entry const& e) { // i points to a node that has been marked // as stale. Replace it with this new one + m_ips.erase(j->addr.to_v4().to_bytes()); b.erase(j); b.push_back(e); + m_ips.insert(e.addr.to_v4().to_bytes()); // TORRENT_LOG(table) << "replacing stale node: " << e.id << " " << e.addr; return ret; } @@ -387,10 +486,9 @@ bool routing_table::add_node(node_entry const& e) // just return. if (j != rb.end()) { - // make sure we mark this node as pinged - // and if its address has changed, update - // that as well - *j = e; + // if the IP address matches, it's the same node + // make sure it's marked as pinged + if (j->ep() == e.ep()) j->set_pinged(); return ret; } @@ -400,11 +498,14 @@ bool routing_table::add_node(node_entry const& e) // but prefer nodes that haven't been pinged, since they are // less reliable than this one, that has been pinged j = std::find_if(rb.begin(), rb.end(), boost::bind(&node_entry::pinged, _1) == false); - rb.erase(j != rb.end() ? j : rb.begin()); + if (j == rb.end()) j = rb.begin(); + m_ips.erase(j->addr.to_v4().to_bytes()); + rb.erase(j); } if (rb.empty()) rb.reserve(m_bucket_size); rb.push_back(e); + m_ips.insert(e.addr.to_v4().to_bytes()); // TORRENT_LOG(table) << "inserting node in replacement cache: " << e.id << " " << e.addr; return ret; } @@ -458,21 +559,35 @@ bool routing_table::add_node(node_entry const& e) j = rb.erase(j); } + bool added = false; // now insert the new node in the appropriate bucket if (distance_exp(m_id, e.id) >= 159 - bucket_index) { if (b.size() < m_bucket_size) + { b.push_back(e); + added = true; + } else if (rb.size() < m_bucket_size) + { rb.push_back(e); + added = true; + } } else { if (new_bucket.size() < m_bucket_size) + { new_bucket.push_back(e); + added = true; + } else if (new_replacement_bucket.size() < m_bucket_size) + { new_replacement_bucket.push_back(e); + added = true; + } } + if (added) m_ips.insert(e.addr.to_v4().to_bytes()); return ret; } @@ -529,10 +644,14 @@ void routing_table::node_failed(node_id const& id) // if this node has failed too many times, or if this node // has never responded at all, remove it if (j->fail_count() >= m_settings.max_fail_count || !j->pinged()) + { + m_ips.erase(j->addr.to_v4().to_bytes()); b.erase(j); + } return; } + m_ips.erase(j->addr.to_v4().to_bytes()); b.erase(j); j = std::find_if(rb.begin(), rb.end(), boost::bind(&node_entry::pinged, _1) == true); diff --git a/src/kademlia/rpc_manager.cpp b/src/kademlia/rpc_manager.cpp index f7b279fbf..92c1a5b60 100644 --- a/src/kademlia/rpc_manager.cpp +++ b/src/kademlia/rpc_manager.cpp @@ -163,7 +163,6 @@ rpc_manager::rpc_manager(node_id const& our_id , routing_table& table, send_fun const& sf , void* userdata, aux::session_impl& ses) : m_pool_allocator(observer_size, 10) - , m_next_transaction_id(std::rand() % max_transaction_id) , m_send(sf) , m_userdata(userdata) , m_our_id(our_id) @@ -182,7 +181,6 @@ rpc_manager::rpc_manager(node_id const& our_id #define PRINT_OFFSETOF(x, y) TORRENT_LOG(rpc) << " +" << offsetof(x, y) << ": " #y TORRENT_LOG(rpc) << " observer: " << sizeof(observer); - PRINT_OFFSETOF(observer, flags); PRINT_OFFSETOF(observer, m_sent); PRINT_OFFSETOF(observer, m_refs); PRINT_OFFSETOF(observer, m_algorithm); @@ -190,6 +188,7 @@ rpc_manager::rpc_manager(node_id const& our_id PRINT_OFFSETOF(observer, m_addr); PRINT_OFFSETOF(observer, m_port); PRINT_OFFSETOF(observer, m_transaction_id); + PRINT_OFFSETOF(observer, flags); TORRENT_LOG(rpc) << " announce_observer: " << sizeof(announce_observer); TORRENT_LOG(rpc) << " null_observer: " << sizeof(null_observer); @@ -238,9 +237,6 @@ size_t rpc_manager::allocation_size() const void rpc_manager::check_invariant() const { - TORRENT_ASSERT(m_next_transaction_id >= 0); - TORRENT_ASSERT(m_next_transaction_id < max_transaction_id); - for (transactions_t::const_iterator i = m_transactions.begin() , end(m_transactions.end()); i != end; ++i) { @@ -458,11 +454,12 @@ bool rpc_manager::invoke(entry& e, udp::endpoint target_addr std::string transaction_id; transaction_id.resize(2); char* out = &transaction_id[0]; - io::write_uint16(m_next_transaction_id, out); + int tid = rand() ^ (rand() << 5); + io::write_uint16(tid, out); e["t"] = transaction_id; o->set_target(target_addr); - o->set_transaction_id(m_next_transaction_id); + o->set_transaction_id(tid); #ifdef TORRENT_DHT_VERBOSE_LOGGING TORRENT_LOG(rpc) << "[" << o->m_algorithm.get() << "] invoking " @@ -472,8 +469,6 @@ bool rpc_manager::invoke(entry& e, udp::endpoint target_addr if (m_send(m_userdata, e, target_addr, 1)) { m_transactions.push_back(o); - ++m_next_transaction_id; - m_next_transaction_id %= max_transaction_id; #ifdef TORRENT_DEBUG o->m_was_sent = true; #endif diff --git a/src/kademlia/traversal_algorithm.cpp b/src/kademlia/traversal_algorithm.cpp index ebd062575..7da9991d2 100644 --- a/src/kademlia/traversal_algorithm.cpp +++ b/src/kademlia/traversal_algorithm.cpp @@ -39,6 +39,7 @@ POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include "libtorrent/broadcast_socket.hpp" // for cidr_distance #include @@ -75,6 +76,18 @@ traversal_algorithm::traversal_algorithm( #endif } +bool compare_ip_cidr(observer_ptr const& lhs, observer_ptr const& rhs) +{ + TORRENT_ASSERT(lhs->target_addr().is_v4() == rhs->target_addr().is_v4()); + // the number of bits in the IPs that may match. If + // more bits that this matches, something suspicious is + // going on and we shouldn't add the second one to our + // routing table + int cutoff = rhs->target_addr().is_v4() ? 4 : 64; + int dist = cidr_distance(lhs->target_addr(), rhs->target_addr()); + return dist <= cutoff; +} + void traversal_algorithm::add_entry(node_id const& id, udp::endpoint addr, unsigned char flags) { TORRENT_ASSERT(m_node.m_rpc.allocation_size() >= sizeof(find_data_observer)); @@ -111,6 +124,26 @@ void traversal_algorithm::add_entry(node_id const& id, udp::endpoint addr, unsig if (i == m_results.end() || (*i)->id() != id) { + if (m_node.settings().restrict_search_ips) + { + // don't allow multiple entries from IPs very close to each other + std::vector::iterator j = std::find_if( + m_results.begin(), m_results.end(), boost::bind(&compare_ip_cidr, _1, o)); + + if (j != m_results.end()) + { + // we already have a node in this search with an IP very + // close to this one. We know that it's not the same, because + // it claims a different node-ID. Ignore this to avoid attacks +#ifdef TORRENT_DHT_VERBOSE_LOGGING + TORRENT_LOG(traversal) << "ignoring DHT search entry: " << o->id() + << " " << o->target_addr() + << " existing node: " + << (*j)->id() << " " << (*j)->target_addr(); +#endif + return; + } + } TORRENT_ASSERT(std::find_if(m_results.begin(), m_results.end() , boost::bind(&observer::id, _1) == id) == m_results.end()); #ifdef TORRENT_DHT_VERBOSE_LOGGING diff --git a/src/session_impl.cpp b/src/session_impl.cpp index 579be6838..d6c5d601b 100644 --- a/src/session_impl.cpp +++ b/src/session_impl.cpp @@ -4411,8 +4411,7 @@ namespace aux { void session_impl::set_external_address(address const& ip , int source_type, address const& source) { - TORRENT_ASSERT(ip != address()); - + if (is_any(ip)) return; if (is_local(ip)) return; if (is_loopback(ip)) return;