diff --git a/include/libtorrent/kademlia/routing_table.hpp b/include/libtorrent/kademlia/routing_table.hpp index 4ec0e07d7..a971a8328 100644 --- a/include/libtorrent/kademlia/routing_table.hpp +++ b/include/libtorrent/kademlia/routing_table.hpp @@ -111,9 +111,12 @@ struct TORRENT_EXTRA_EXPORT ip_set // necessary to traverse the DHT, we want the nodes in our buckets to be spread // out across all possible "sub-branches". This is what the "classify" refers // to. The 3 (or more) bits following the shared bit prefix. -TORRENT_EXTRA_EXPORT std::uint8_t classify_prefix(int bucket_idx, bool last_bucket +TORRENT_EXTRA_EXPORT std::uint8_t classify_prefix(int bucket_idx, bool last_bucket , int bucket_size, node_id nid); +TORRENT_EXTRA_EXPORT bool all_in_same_bucket(span b + , node_id const& id, int bucket_index); + // differences in the implementation from the description in // the paper: // diff --git a/src/kademlia/routing_table.cpp b/src/kademlia/routing_table.cpp index b1a83f166..425729588 100644 --- a/src/kademlia/routing_table.cpp +++ b/src/kademlia/routing_table.cpp @@ -605,6 +605,22 @@ bool routing_table::add_node(node_entry const& e) return false; } +bool all_in_same_bucket(span b, node_id const& id, int const bucket_index) +{ + int const byte_offset = bucket_index / 8; + int const bit_offset = bucket_index % 8; + std::uint8_t const mask = 0x80 >> bit_offset; + int counter[2] = {0, 0}; + int const i = (id[byte_offset] & mask) ? 1 : 0; + ++counter[i]; + for (auto const& e : b) + { + int const idx = (e.id[byte_offset] & mask) ? 1 : 0; + ++counter[idx]; + } + return counter[0] == 0 || counter[1] == 0; +} + routing_table::add_node_status_t routing_table::add_node_impl(node_entry e) { #ifdef TORRENT_EXPENSIVE_INVARIANT_CHECKS @@ -790,16 +806,6 @@ routing_table::add_node_status_t routing_table::add_node_impl(node_entry e) } ip_ok: - // can we split the bucket? - // only nodes that have been confirmed can split the bucket, and we can only - // split the last bucket - bool const can_split = (std::next(i) == m_buckets.end() - && m_buckets.size() < 159) - && (m_settings.prefer_verified_node_ids == false - || (e.verified && mostly_verified_nodes(b))) - && e.confirmed() - && (i == m_buckets.begin() || std::prev(i)->live_nodes.size() > 1); - // if there's room in the main bucket, just insert it // if we can split the bucket (i.e. it's the last bucket) use the next // bucket's size limit. This makes use split the low-numbered buckets split @@ -824,6 +830,20 @@ ip_ok: bool const last_bucket = bucket_index + 1 == int(m_buckets.size()); + // only nodes that have been confirmed can split the bucket, and we can only + // split the last bucket + // if all nodes in the bucket, including the new node id (e.id) fall in the + // same bucket, splitting isn't going to do anything. + bool const can_split = (std::next(i) == m_buckets.end() + && m_buckets.size() < 159) + && (m_settings.prefer_verified_node_ids == false + || (e.verified && mostly_verified_nodes(b))) + && e.confirmed() + && (i == m_buckets.begin() || std::prev(i)->live_nodes.size() > 1) + && !all_in_same_bucket(b, e.id, bucket_index); + + if (can_split) return need_bucket_split; + if (e.confirmed()) { auto const ret = replace_node_impl(e, b, m_ips, bucket_index, bucket_size_limit, last_bucket @@ -834,54 +854,50 @@ ip_ok: if (ret != need_bucket_split) return ret; } - // if we can't split, try to insert into the replacement bucket + // if we can't split, nor replace anything in the live buckets try to insert + // into the replacement bucket - if (!can_split) + // if we don't have any identified stale nodes in + // the bucket, and the bucket is full, we have to + // cache this node and wait until some node fails + // and then replace it. + j = std::find_if(rb.begin(), rb.end() + , [&e](node_entry const& ne) { return ne.id == e.id; }); + + // if the node is already in the replacement bucket + // just return. + if (j != rb.end()) { - // if we don't have any identified stale nodes in - // the bucket, and the bucket is full, we have to - // cache this node and wait until some node fails - // and then replace it. - j = std::find_if(rb.begin(), rb.end() - , [&e](node_entry const& ne) { return ne.id == e.id; }); - - // if the node is already in the replacement bucket - // just return. - if (j != rb.end()) - { - // if the IP address matches, it's the same node - // make sure it's marked as pinged - if (j->ep() == e.ep()) j->set_pinged(); - return node_added; - } - - if (int(rb.size()) >= m_bucket_size) - { - // if the replacement bucket is full, remove the oldest entry - // but prefer nodes that haven't been pinged, since they are - // less reliable than this one, that has been pinged - j = std::find_if(rb.begin(), rb.end() - , [] (node_entry const& ne) { return !ne.pinged(); }); - if (j == rb.end()) - { - auto const ret = replace_node_impl(e, rb, m_ips, bucket_index, m_bucket_size, last_bucket -#ifndef TORRENT_DISABLE_LOGGING - , nullptr -#endif - ); - return ret == node_added ? node_added : failed_to_add; - } - m_ips.erase(j->addr()); - rb.erase(j); - } - - if (rb.empty()) rb.reserve(m_bucket_size); - rb.push_back(e); - m_ips.insert(e.addr()); + // if the IP address matches, it's the same node + // make sure it's marked as pinged + if (j->ep() == e.ep()) j->set_pinged(); return node_added; } - return need_bucket_split; + if (int(rb.size()) >= m_bucket_size) + { + // if the replacement bucket is full, remove the oldest entry + // but prefer nodes that haven't been pinged, since they are + // less reliable than this one, that has been pinged + j = std::find_if(rb.begin(), rb.end() + , [] (node_entry const& ne) { return !ne.pinged(); }); + if (j == rb.end()) + { + auto const ret = replace_node_impl(e, rb, m_ips, bucket_index, m_bucket_size, last_bucket +#ifndef TORRENT_DISABLE_LOGGING + , nullptr +#endif + ); + return ret == node_added ? node_added : failed_to_add; + } + m_ips.erase(j->addr()); + rb.erase(j); + } + + if (rb.empty()) rb.reserve(m_bucket_size); + rb.push_back(e); + m_ips.insert(e.addr()); + return node_added; } void routing_table::split_bucket() diff --git a/test/test_dht.cpp b/test/test_dht.cpp index 3f75dc76e..fabf9d42a 100644 --- a/test/test_dht.cpp +++ b/test/test_dht.cpp @@ -3019,7 +3019,7 @@ TORRENT_TEST(routing_table_balance) // and make sure we don't end up with a table completely out of balance for (int i = 0; i < 32; ++i) { - id[4] = i & 0xff; + id[0] = (i << 3) & 0xff; tbl.node_seen(id, rand_udp_ep(), 20 + (id[19] & 0xff)); } std::printf("num_active_buckets: %d\n", tbl.num_active_buckets()); @@ -3123,7 +3123,7 @@ TORRENT_TEST(routing_table_for_each) for (int i = 0; i < 32; ++i) { - id[4] = i & 0xff; + id[0] = (i << 3) & 0xff; tbl.node_seen(id, rand_udp_ep(), 20 + (id[19] & 0xff)); } @@ -3136,20 +3136,20 @@ TORRENT_TEST(routing_table_for_each) std::printf("replacements: %d\n", replacements); TEST_EQUAL(tbl.num_active_buckets(), 2); - TEST_EQUAL(nodes, 2); - TEST_EQUAL(replacements, 2); + TEST_EQUAL(nodes, 4); + TEST_EQUAL(replacements, 4); print_state(std::cout, tbl); std::vector v; - tbl.for_each_node(std::bind(node_push_back, &v, _1), nullptr); - TEST_EQUAL(v.size(), 2); - v.clear(); - tbl.for_each_node(nullptr, std::bind(node_push_back, &v, _1)); - TEST_EQUAL(v.size(), 2); - v.clear(); - tbl.for_each_node(std::bind(node_push_back, &v, _1)); + tbl.for_each_node([&](node_entry const& e) { v.push_back(e); }, nullptr); TEST_EQUAL(v.size(), 4); + v.clear(); + tbl.for_each_node(nullptr, [&](node_entry const& e) { v.push_back(e); }); + TEST_EQUAL(v.size(), 4); + v.clear(); + tbl.for_each_node([&](node_entry const& e) { v.push_back(e); }); + TEST_EQUAL(v.size(), 8); } TORRENT_TEST(node_set_id) @@ -3992,6 +3992,58 @@ TORRENT_TEST(replace_node_impl) } } +TORRENT_TEST(all_in_same_bucket) +{ + TEST_CHECK(all_in_same_bucket({}, to_hash("8000000000000000000000000000000000000000"), 0) == true); + TEST_CHECK(all_in_same_bucket({}, to_hash("8000000000000000000000000000000000000001"), 1) == true); + TEST_CHECK(all_in_same_bucket({}, to_hash("8000000000000000000000000000000000000002"), 2) == true); + TEST_CHECK(all_in_same_bucket({}, to_hash("8000000000000000000000000000000000000003"), 3) == true); + { + dht::bucket_t b = { + n(nullptr, "0000000000000000000000000000000000000000"), + }; + TEST_CHECK(all_in_same_bucket(b, to_hash("8000000000000000000000000000000000000000"), 0) == false); + } + + { + dht::bucket_t b = { + n(nullptr, "8000000000000000000000000000000000000000"), + n(nullptr, "f000000000000000000000000000000000000000"), + }; + TEST_CHECK(all_in_same_bucket(b, to_hash("8000000000000000000000000000000000000000"), 0) == true); + } + { + dht::bucket_t b = { + n(nullptr, "8000000000000000000000000000000000000000"), + n(nullptr, "0000000000000000000000000000000000000000"), + }; + TEST_CHECK(all_in_same_bucket(b, to_hash("8000000000000000000000000000000000000000"), 0) == false); + } + { + dht::bucket_t b = { + n(nullptr, "0800000000000000000000000000000000000000"), + n(nullptr, "0000000000000000000000000000000000000000"), + }; + TEST_CHECK(all_in_same_bucket(b, to_hash("0800000000000000000000000000000000000000"), 4) == false); + } + { + dht::bucket_t b = { + n(nullptr, "0800000000000000000000000000000000000000"), + n(nullptr, "0800000000000000000000000000000000000000"), + }; + + TEST_CHECK(all_in_same_bucket(b, to_hash("0800000000000000000000000000000000000000"), 4) == true); + } + { + dht::bucket_t b = { + n(nullptr, "0007000000000000000000000000000000000000"), + n(nullptr, "0004000000000000000000000000000000000000"), + }; + + TEST_CHECK(all_in_same_bucket(b, to_hash("0005000000000000000000000000000000000000"), 13) == true); + } +} + // TODO: test obfuscated_get_peers #else