From e7b8b6da491f5ec27cf88b768191c79ebdcbf48a Mon Sep 17 00:00:00 2001 From: arvidn Date: Sat, 22 Aug 2015 15:24:49 +0200 Subject: [PATCH] forward port DHT routing table fix from RC_1_0 --- ChangeLog | 1 + src/kademlia/routing_table.cpp | 16 ++++++----- test/test_dht.cpp | 50 ++++++++++++++++++++++++++++++---- 3 files changed, 54 insertions(+), 13 deletions(-) diff --git a/ChangeLog b/ChangeLog index a4b121829..0461d05e2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -73,6 +73,7 @@ * added support for hashing pieces in multiple threads + * improve DHT routing table to not create an unbalanced tree * fix bug in uTP that would cause any connection taking more than one second to connect be timed out (introduced in the vulnerability path) * fixed falling back to sending UDP packets direct when socks proxy fails diff --git a/src/kademlia/routing_table.cpp b/src/kademlia/routing_table.cpp index a5aa62233..1c1b41495 100644 --- a/src/kademlia/routing_table.cpp +++ b/src/kademlia/routing_table.cpp @@ -462,7 +462,7 @@ bool compare_ip_cidr(node_entry const& lhs, node_entry const& rhs) } // anonymous namespace node_entry* routing_table::find_node(udp::endpoint const& ep - , routing_table::table_t::iterator* bucket) + , routing_table::table_t::iterator* bucket) { for (table_t::iterator i = m_buckets.begin() , end(m_buckets.end()); i != end; ++i) @@ -489,7 +489,7 @@ node_entry* routing_table::find_node(udp::endpoint const& ep } void routing_table::remove_node(node_entry* n - , routing_table::table_t::iterator bucket) + , routing_table::table_t::iterator bucket) { INVARIANT_CHECK; @@ -612,7 +612,7 @@ routing_table::add_node_status_t routing_table::add_node_impl(node_entry e) remove_node(existing, existing_bucket); } } - + table_t::iterator i = find_bucket(e.id); bucket_t& b = i->live_nodes; bucket_t& rb = i->replacements; @@ -719,8 +719,10 @@ routing_table::add_node_status_t routing_table::add_node_impl(node_entry e) // can we split the bucket? // only nodes that haven't failed can split the bucket, and we can only // split the last bucket - bool can_split = (boost::next(i) == m_buckets.end() && m_buckets.size() < 159) - && e.fail_count() == 0; + const bool can_split = (boost::next(i) == m_buckets.end() + && m_buckets.size() < 159) + && e.fail_count() == 0 + && (i == m_buckets.begin() || boost::prior(i)->live_nodes.size() > 1); if (e.pinged() && e.fail_count() == 0) { @@ -759,7 +761,7 @@ routing_table::add_node_status_t routing_table::add_node_impl(node_entry e) m_ips.insert(e.addr().to_v4().to_bytes()); return node_added; } - + // in order to provide as few lookups as possible before finding // the data someone is looking for, make sure there is an affinity // towards having a good spread of node IDs in each bucket @@ -1056,7 +1058,7 @@ void routing_table::node_failed(node_id const& nid, udp::endpoint const& ep) // claiming the same ID. The node we have in our routing // table is not necessarily stale if (j->ep() != ep) return; - + if (rb.empty()) { j->timed_out(); diff --git a/test/test_dht.cpp b/test/test_dht.cpp index 49c7f9047..9cbf18ee9 100644 --- a/test/test_dht.cpp +++ b/test/test_dht.cpp @@ -443,6 +443,7 @@ struct obs : dht::dht_observer }; // TODO: test obfuscated_get_peers +// TODO: 3 split this test up into smaller test cases TORRENT_TEST(dht) { dht_settings sett; @@ -934,7 +935,7 @@ TORRENT_TEST(dht) #ifdef TORRENT_USE_VALGRIND VALGRIND_CHECK_MEM_IS_DEFINED(signature, item_sig_len); #endif - // break the signature + // break the signature signature[2] ^= 0xaa; fprintf(stderr, "PUT broken signature\n"); @@ -1064,19 +1065,56 @@ TORRENT_TEST(dht) node_id diff = to_hash("15764f7459456a9453f8719b09547c11d5f34061"); routing_table tbl(id, 8, sett, &observer); - + // insert 256 nodes evenly distributed across the ID space. // we expect to fill the top 5 buckets - for (int i = 0; i < 256; ++i) + for (int i = 255; i >= 0; --i) { // test a node with the same IP:port changing ID add_and_replace(id, diff); - id[0] = i; + // in order to make this node-load a bit more realistic, start from + // distant nodes and work our way in closer to the node id + // the routing table will reject nodes that are too imbalanced (if all + // nodes are very close to our ID and none are far away, it's + // suspicious). + id[0] ^= i; + tbl.node_seen(id, rand_udp_ep(), 20 + (id[19] & 0xff)); + + // restore the first byte of the node ID + id[0] ^= i; + } + printf("num_active_buckets: %d\n", tbl.num_active_buckets()); + // number of nodes per tree level (when adding 256 evenly distributed + // nodes): + // 0: 128 + // 1: 64 + // 2: 32 + // 3: 16 + // 4: 8 + // i.e. no more than 5 levels + TEST_EQUAL(tbl.num_active_buckets(), 5); + +#if defined TORRENT_DHT_VERBOSE_LOGGING || defined TORRENT_DEBUG + tbl.print_state(std::cerr); +#endif + } + + { + sett.extended_routing_table = false; + node_id id = to_hash("1234876923549721020394873245098347598635"); + + routing_table tbl(id, 8, sett, &observer); + + // insert nodes in the routing table that will force it to split + // and make sure we don't end up with a table completely out of balance + for (int i = 0; i < 32; ++i) + { + id[4] = i; tbl.node_seen(id, rand_udp_ep(), 20 + (id[19] & 0xff)); } printf("num_active_buckets: %d\n", tbl.num_active_buckets()); - TEST_EQUAL(tbl.num_active_buckets(), 6); - + TEST_EQUAL(tbl.num_active_buckets(), 2); + #if defined TORRENT_DEBUG tbl.print_state(std::cerr); #endif