From e7b8b6da491f5ec27cf88b768191c79ebdcbf48a Mon Sep 17 00:00:00 2001
From: arvidn <arvid@cs.umu.se>
Date: Sat, 22 Aug 2015 15:24:49 +0200
Subject: [PATCH] forward port DHT routing table fix from RC_1_0

---
 ChangeLog                      |  1 +
 src/kademlia/routing_table.cpp | 16 ++++++-----
 test/test_dht.cpp              | 50 ++++++++++++++++++++++++++++++----
 3 files changed, 54 insertions(+), 13 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index a4b121829..0461d05e2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -73,6 +73,7 @@
 	* added support for hashing pieces in multiple threads
 
 
+	* improve DHT routing table to not create an unbalanced tree
 	* fix bug in uTP that would cause any connection taking more than one second
 	  to connect be timed out (introduced in the vulnerability path)
 	* fixed falling back to sending UDP packets direct when socks proxy fails
diff --git a/src/kademlia/routing_table.cpp b/src/kademlia/routing_table.cpp
index a5aa62233..1c1b41495 100644
--- a/src/kademlia/routing_table.cpp
+++ b/src/kademlia/routing_table.cpp
@@ -462,7 +462,7 @@ bool compare_ip_cidr(node_entry const& lhs, node_entry const& rhs)
 } // anonymous namespace
 
 node_entry* routing_table::find_node(udp::endpoint const& ep
-	, routing_table::table_t::iterator* bucket) 
+	, routing_table::table_t::iterator* bucket)
 {
 	for (table_t::iterator i = m_buckets.begin()
 		, end(m_buckets.end()); i != end; ++i)
@@ -489,7 +489,7 @@ node_entry* routing_table::find_node(udp::endpoint const& ep
 }
 
 void routing_table::remove_node(node_entry* n
-	, routing_table::table_t::iterator bucket) 
+	, routing_table::table_t::iterator bucket)
 {
 	INVARIANT_CHECK;
 
@@ -612,7 +612,7 @@ routing_table::add_node_status_t routing_table::add_node_impl(node_entry e)
 			remove_node(existing, existing_bucket);
 		}
 	}
-	
+
 	table_t::iterator i = find_bucket(e.id);
 	bucket_t& b = i->live_nodes;
 	bucket_t& rb = i->replacements;
@@ -719,8 +719,10 @@ routing_table::add_node_status_t routing_table::add_node_impl(node_entry e)
 	// can we split the bucket?
 	// only nodes that haven't failed can split the bucket, and we can only
 	// split the last bucket
-	bool can_split = (boost::next(i) == m_buckets.end() && m_buckets.size() < 159)
-		&& e.fail_count() == 0;
+	const bool can_split = (boost::next(i) == m_buckets.end()
+		&& m_buckets.size() < 159)
+		&& e.fail_count() == 0
+		&& (i == m_buckets.begin() || boost::prior(i)->live_nodes.size() > 1);
 
 	if (e.pinged() && e.fail_count() == 0)
 	{
@@ -759,7 +761,7 @@ routing_table::add_node_status_t routing_table::add_node_impl(node_entry e)
 			m_ips.insert(e.addr().to_v4().to_bytes());
 			return node_added;
 		}
-		
+
 		// in order to provide as few lookups as possible before finding
 		// the data someone is looking for, make sure there is an affinity
 		// towards having a good spread of node IDs in each bucket
@@ -1056,7 +1058,7 @@ void routing_table::node_failed(node_id const& nid, udp::endpoint const& ep)
 	// claiming the same ID. The node we have in our routing
 	// table is not necessarily stale
 	if (j->ep() != ep) return;
-	
+
 	if (rb.empty())
 	{
 		j->timed_out();
diff --git a/test/test_dht.cpp b/test/test_dht.cpp
index 49c7f9047..9cbf18ee9 100644
--- a/test/test_dht.cpp
+++ b/test/test_dht.cpp
@@ -443,6 +443,7 @@ struct obs : dht::dht_observer
 };
 
 // TODO: test obfuscated_get_peers
+// TODO: 3 split this test up into smaller test cases
 TORRENT_TEST(dht)
 {
 	dht_settings sett;
@@ -934,7 +935,7 @@ TORRENT_TEST(dht)
 #ifdef TORRENT_USE_VALGRIND
 		VALGRIND_CHECK_MEM_IS_DEFINED(signature, item_sig_len);
 #endif
-		// break the signature 
+		// break the signature
 		signature[2] ^= 0xaa;
 
 		fprintf(stderr, "PUT broken signature\n");
@@ -1064,19 +1065,56 @@ TORRENT_TEST(dht)
 		node_id diff = to_hash("15764f7459456a9453f8719b09547c11d5f34061");
 
 		routing_table tbl(id, 8, sett, &observer);
-   
+
 		// insert 256 nodes evenly distributed across the ID space.
 		// we expect to fill the top 5 buckets
-		for (int i = 0; i < 256; ++i)
+		for (int i = 255; i >= 0; --i)
 		{
 			// test a node with the same IP:port changing ID
 			add_and_replace(id, diff);
-			id[0] = i;
+			// in order to make this node-load a bit more realistic, start from
+			// distant nodes and work our way in closer to the node id
+			// the routing table will reject nodes that are too imbalanced (if all
+			// nodes are very close to our ID and none are far away, it's
+			// suspicious).
+			id[0] ^= i;
+			tbl.node_seen(id, rand_udp_ep(), 20 + (id[19] & 0xff));
+
+			// restore the first byte of the node ID
+			id[0] ^= i;
+		}
+		printf("num_active_buckets: %d\n", tbl.num_active_buckets());
+		// number of nodes per tree level (when adding 256 evenly distributed
+		// nodes):
+		// 0: 128
+		// 1: 64
+		// 2: 32
+		// 3: 16
+		// 4: 8
+		// i.e. no more than 5 levels
+		TEST_EQUAL(tbl.num_active_buckets(), 5);
+
+#if defined TORRENT_DHT_VERBOSE_LOGGING || defined TORRENT_DEBUG
+		tbl.print_state(std::cerr);
+#endif
+	}
+
+	{
+		sett.extended_routing_table = false;
+		node_id id = to_hash("1234876923549721020394873245098347598635");
+
+		routing_table tbl(id, 8, sett, &observer);
+
+		// insert nodes in the routing table that will force it to split
+		// and make sure we don't end up with a table completely out of balance
+		for (int i = 0; i < 32; ++i)
+		{
+			id[4] = i;
 			tbl.node_seen(id, rand_udp_ep(), 20 + (id[19] & 0xff));
 		}
 		printf("num_active_buckets: %d\n", tbl.num_active_buckets());
-		TEST_EQUAL(tbl.num_active_buckets(), 6);
-   
+		TEST_EQUAL(tbl.num_active_buckets(), 2);
+
 #if defined TORRENT_DEBUG
 		tbl.print_state(std::cerr);
 #endif