make it a bit more likely to split the last bucket in the DHT routing table

2019-08-11 16:40:20 -07:00 · 2019-08-11 16:40:20 -07:00 · ff113a262c
commit ff113a262c
parent e97f7659c8
3 changed files with 136 additions and 65 deletions
--- a/include/libtorrent/kademlia/routing_table.hpp
+++ b/include/libtorrent/kademlia/routing_table.hpp
@ -114,6 +114,9 @@ struct TORRENT_EXTRA_EXPORT ip_set
 TORRENT_EXTRA_EXPORT std::uint8_t classify_prefix(int bucket_idx, bool last_bucket
 	, int bucket_size, node_id nid);

+TORRENT_EXTRA_EXPORT bool all_in_same_bucket(span<node_entry const> b
+	, node_id const& id, int bucket_index);
+
 // differences in the implementation from the description in
 // the paper:
 //
--- a/src/kademlia/routing_table.cpp
+++ b/src/kademlia/routing_table.cpp
@ -605,6 +605,22 @@ bool routing_table::add_node(node_entry const& e)
 	return false;
 }

+bool all_in_same_bucket(span<node_entry const> b, node_id const& id, int const bucket_index)
+{
+	int const byte_offset = bucket_index / 8;
+	int const bit_offset = bucket_index % 8;
+	std::uint8_t const mask = 0x80 >> bit_offset;
+	int counter[2] = {0, 0};
+	int const i =  (id[byte_offset] & mask) ? 1 : 0;
+	++counter[i];
+	for (auto const& e : b)
+	{
+		int const idx =  (e.id[byte_offset] & mask) ? 1 : 0;
+		++counter[idx];
+	}
+	return counter[0] == 0 || counter[1] == 0;
+}
+
 routing_table::add_node_status_t routing_table::add_node_impl(node_entry e)
 {
 #ifdef TORRENT_EXPENSIVE_INVARIANT_CHECKS
@ -790,16 +806,6 @@ routing_table::add_node_status_t routing_table::add_node_impl(node_entry e)
 	}
 ip_ok:

-	// can we split the bucket?
-	// only nodes that have been confirmed can split the bucket, and we can only
-	// split the last bucket
-	bool const can_split = (std::next(i) == m_buckets.end()
-		&& m_buckets.size() < 159)
-		&& (m_settings.prefer_verified_node_ids == false
-			|| (e.verified && mostly_verified_nodes(b)))
-		&& e.confirmed()
-		&& (i == m_buckets.begin() || std::prev(i)->live_nodes.size() > 1);
-
 	// if there's room in the main bucket, just insert it
 	// if we can split the bucket (i.e. it's the last bucket) use the next
 	// bucket's size limit. This makes use split the low-numbered buckets split
@ -824,6 +830,20 @@ ip_ok:

 	bool const last_bucket = bucket_index + 1 == int(m_buckets.size());

+	// only nodes that have been confirmed can split the bucket, and we can only
+	// split the last bucket
+	// if all nodes in the bucket, including the new node id (e.id) fall in the
+	// same bucket, splitting isn't going to do anything.
+	bool const can_split = (std::next(i) == m_buckets.end()
+		&& m_buckets.size() < 159)
+		&& (m_settings.prefer_verified_node_ids == false
+			|| (e.verified && mostly_verified_nodes(b)))
+		&& e.confirmed()
+		&& (i == m_buckets.begin() || std::prev(i)->live_nodes.size() > 1)
+		&& !all_in_same_bucket(b, e.id, bucket_index);
+
+	if (can_split) return need_bucket_split;
+
 	if (e.confirmed())
 	{
 		auto const ret = replace_node_impl(e, b, m_ips, bucket_index, bucket_size_limit, last_bucket
@ -834,10 +854,9 @@ ip_ok:
 		if (ret != need_bucket_split) return ret;
 	}

-	// if we can't split, try to insert into the replacement bucket
+	// if we can't split, nor replace anything in the live buckets try to insert
+	// into the replacement bucket

-	if (!can_split)
-	{
 	// if we don't have any identified stale nodes in
 	// the bucket, and the bucket is full, we have to
 	// cache this node and wait until some node fails
@ -881,9 +900,6 @@ ip_ok:
 	return node_added;
 }

-	return need_bucket_split;
-}
-
 void routing_table::split_bucket()
 {
 	INVARIANT_CHECK;
--- a/test/test_dht.cpp
+++ b/test/test_dht.cpp
@ -3019,7 +3019,7 @@ TORRENT_TEST(routing_table_balance)
 	// and make sure we don't end up with a table completely out of balance
 	for (int i = 0; i < 32; ++i)
 	{
-		id[4] = i & 0xff;
+		id[0] = (i << 3) & 0xff;
 		tbl.node_seen(id, rand_udp_ep(), 20 + (id[19] & 0xff));
 	}
 	std::printf("num_active_buckets: %d\n", tbl.num_active_buckets());
@ -3123,7 +3123,7 @@ TORRENT_TEST(routing_table_for_each)

 	for (int i = 0; i < 32; ++i)
 	{
-		id[4] = i & 0xff;
+		id[0] = (i << 3) & 0xff;
 		tbl.node_seen(id, rand_udp_ep(), 20 + (id[19] & 0xff));
 	}

@ -3136,20 +3136,20 @@ TORRENT_TEST(routing_table_for_each)
 	std::printf("replacements: %d\n", replacements);

 	TEST_EQUAL(tbl.num_active_buckets(), 2);
-	TEST_EQUAL(nodes, 2);
-	TEST_EQUAL(replacements, 2);
+	TEST_EQUAL(nodes, 4);
+	TEST_EQUAL(replacements, 4);

 	print_state(std::cout, tbl);

 	std::vector<node_entry> v;
-	tbl.for_each_node(std::bind(node_push_back, &v, _1), nullptr);
-	TEST_EQUAL(v.size(), 2);
-	v.clear();
-	tbl.for_each_node(nullptr, std::bind(node_push_back, &v, _1));
-	TEST_EQUAL(v.size(), 2);
-	v.clear();
-	tbl.for_each_node(std::bind(node_push_back, &v, _1));
+	tbl.for_each_node([&](node_entry const& e) { v.push_back(e); }, nullptr);
 	TEST_EQUAL(v.size(), 4);
+	v.clear();
+	tbl.for_each_node(nullptr, [&](node_entry const& e) { v.push_back(e); });
+	TEST_EQUAL(v.size(), 4);
+	v.clear();
+	tbl.for_each_node([&](node_entry const& e) { v.push_back(e); });
+	TEST_EQUAL(v.size(), 8);
 }

 TORRENT_TEST(node_set_id)
@ -3992,6 +3992,58 @@ TORRENT_TEST(replace_node_impl)
 	}
 }

+TORRENT_TEST(all_in_same_bucket)
+{
+	TEST_CHECK(all_in_same_bucket({}, to_hash("8000000000000000000000000000000000000000"), 0) == true);
+	TEST_CHECK(all_in_same_bucket({}, to_hash("8000000000000000000000000000000000000001"), 1) == true);
+	TEST_CHECK(all_in_same_bucket({}, to_hash("8000000000000000000000000000000000000002"), 2) == true);
+	TEST_CHECK(all_in_same_bucket({}, to_hash("8000000000000000000000000000000000000003"), 3) == true);
+	{
+		dht::bucket_t b = {
+			n(nullptr, "0000000000000000000000000000000000000000"),
+		};
+		TEST_CHECK(all_in_same_bucket(b, to_hash("8000000000000000000000000000000000000000"), 0) == false);
+	}
+
+	{
+		dht::bucket_t b = {
+			n(nullptr, "8000000000000000000000000000000000000000"),
+			n(nullptr, "f000000000000000000000000000000000000000"),
+		};
+		TEST_CHECK(all_in_same_bucket(b, to_hash("8000000000000000000000000000000000000000"), 0) == true);
+	}
+	{
+		dht::bucket_t b = {
+			n(nullptr, "8000000000000000000000000000000000000000"),
+			n(nullptr, "0000000000000000000000000000000000000000"),
+		};
+		TEST_CHECK(all_in_same_bucket(b, to_hash("8000000000000000000000000000000000000000"), 0) == false);
+	}
+	{
+		dht::bucket_t b = {
+			n(nullptr, "0800000000000000000000000000000000000000"),
+			n(nullptr, "0000000000000000000000000000000000000000"),
+		};
+		TEST_CHECK(all_in_same_bucket(b, to_hash("0800000000000000000000000000000000000000"), 4) == false);
+	}
+	{
+		dht::bucket_t b = {
+			n(nullptr, "0800000000000000000000000000000000000000"),
+			n(nullptr, "0800000000000000000000000000000000000000"),
+		};
+
+		TEST_CHECK(all_in_same_bucket(b, to_hash("0800000000000000000000000000000000000000"), 4) == true);
+	}
+	{
+		dht::bucket_t b = {
+			n(nullptr, "0007000000000000000000000000000000000000"),
+			n(nullptr, "0004000000000000000000000000000000000000"),
+		};
+
+		TEST_CHECK(all_in_same_bucket(b, to_hash("0005000000000000000000000000000000000000"), 13) == true);
+	}
+}
+
 // TODO: test obfuscated_get_peers

 #else