make it a bit more likely to split the last bucket in the DHT routing table

This commit is contained in:
Arvid Norberg 2019-08-11 16:40:20 -07:00 committed by Arvid Norberg
parent e97f7659c8
commit ff113a262c
3 changed files with 136 additions and 65 deletions

View File

@ -111,9 +111,12 @@ struct TORRENT_EXTRA_EXPORT ip_set
// necessary to traverse the DHT, we want the nodes in our buckets to be spread
// out across all possible "sub-branches". This is what the "classify" refers
// to. The 3 (or more) bits following the shared bit prefix.
TORRENT_EXTRA_EXPORT std::uint8_t classify_prefix(int bucket_idx, bool last_bucket
TORRENT_EXTRA_EXPORT std::uint8_t classify_prefix(int bucket_idx, bool last_bucket
, int bucket_size, node_id nid);
TORRENT_EXTRA_EXPORT bool all_in_same_bucket(span<node_entry const> b
, node_id const& id, int bucket_index);
// differences in the implementation from the description in
// the paper:
//

View File

@ -605,6 +605,22 @@ bool routing_table::add_node(node_entry const& e)
return false;
}
bool all_in_same_bucket(span<node_entry const> b, node_id const& id, int const bucket_index)
{
int const byte_offset = bucket_index / 8;
int const bit_offset = bucket_index % 8;
std::uint8_t const mask = 0x80 >> bit_offset;
int counter[2] = {0, 0};
int const i = (id[byte_offset] & mask) ? 1 : 0;
++counter[i];
for (auto const& e : b)
{
int const idx = (e.id[byte_offset] & mask) ? 1 : 0;
++counter[idx];
}
return counter[0] == 0 || counter[1] == 0;
}
routing_table::add_node_status_t routing_table::add_node_impl(node_entry e)
{
#ifdef TORRENT_EXPENSIVE_INVARIANT_CHECKS
@ -790,16 +806,6 @@ routing_table::add_node_status_t routing_table::add_node_impl(node_entry e)
}
ip_ok:
// can we split the bucket?
// only nodes that have been confirmed can split the bucket, and we can only
// split the last bucket
bool const can_split = (std::next(i) == m_buckets.end()
&& m_buckets.size() < 159)
&& (m_settings.prefer_verified_node_ids == false
|| (e.verified && mostly_verified_nodes(b)))
&& e.confirmed()
&& (i == m_buckets.begin() || std::prev(i)->live_nodes.size() > 1);
// if there's room in the main bucket, just insert it
// if we can split the bucket (i.e. it's the last bucket) use the next
// bucket's size limit. This makes use split the low-numbered buckets split
@ -824,6 +830,20 @@ ip_ok:
bool const last_bucket = bucket_index + 1 == int(m_buckets.size());
// only nodes that have been confirmed can split the bucket, and we can only
// split the last bucket
// if all nodes in the bucket, including the new node id (e.id) fall in the
// same bucket, splitting isn't going to do anything.
bool const can_split = (std::next(i) == m_buckets.end()
&& m_buckets.size() < 159)
&& (m_settings.prefer_verified_node_ids == false
|| (e.verified && mostly_verified_nodes(b)))
&& e.confirmed()
&& (i == m_buckets.begin() || std::prev(i)->live_nodes.size() > 1)
&& !all_in_same_bucket(b, e.id, bucket_index);
if (can_split) return need_bucket_split;
if (e.confirmed())
{
auto const ret = replace_node_impl(e, b, m_ips, bucket_index, bucket_size_limit, last_bucket
@ -834,54 +854,50 @@ ip_ok:
if (ret != need_bucket_split) return ret;
}
// if we can't split, try to insert into the replacement bucket
// if we can't split, nor replace anything in the live buckets try to insert
// into the replacement bucket
if (!can_split)
// if we don't have any identified stale nodes in
// the bucket, and the bucket is full, we have to
// cache this node and wait until some node fails
// and then replace it.
j = std::find_if(rb.begin(), rb.end()
, [&e](node_entry const& ne) { return ne.id == e.id; });
// if the node is already in the replacement bucket
// just return.
if (j != rb.end())
{
// if we don't have any identified stale nodes in
// the bucket, and the bucket is full, we have to
// cache this node and wait until some node fails
// and then replace it.
j = std::find_if(rb.begin(), rb.end()
, [&e](node_entry const& ne) { return ne.id == e.id; });
// if the node is already in the replacement bucket
// just return.
if (j != rb.end())
{
// if the IP address matches, it's the same node
// make sure it's marked as pinged
if (j->ep() == e.ep()) j->set_pinged();
return node_added;
}
if (int(rb.size()) >= m_bucket_size)
{
// if the replacement bucket is full, remove the oldest entry
// but prefer nodes that haven't been pinged, since they are
// less reliable than this one, that has been pinged
j = std::find_if(rb.begin(), rb.end()
, [] (node_entry const& ne) { return !ne.pinged(); });
if (j == rb.end())
{
auto const ret = replace_node_impl(e, rb, m_ips, bucket_index, m_bucket_size, last_bucket
#ifndef TORRENT_DISABLE_LOGGING
, nullptr
#endif
);
return ret == node_added ? node_added : failed_to_add;
}
m_ips.erase(j->addr());
rb.erase(j);
}
if (rb.empty()) rb.reserve(m_bucket_size);
rb.push_back(e);
m_ips.insert(e.addr());
// if the IP address matches, it's the same node
// make sure it's marked as pinged
if (j->ep() == e.ep()) j->set_pinged();
return node_added;
}
return need_bucket_split;
if (int(rb.size()) >= m_bucket_size)
{
// if the replacement bucket is full, remove the oldest entry
// but prefer nodes that haven't been pinged, since they are
// less reliable than this one, that has been pinged
j = std::find_if(rb.begin(), rb.end()
, [] (node_entry const& ne) { return !ne.pinged(); });
if (j == rb.end())
{
auto const ret = replace_node_impl(e, rb, m_ips, bucket_index, m_bucket_size, last_bucket
#ifndef TORRENT_DISABLE_LOGGING
, nullptr
#endif
);
return ret == node_added ? node_added : failed_to_add;
}
m_ips.erase(j->addr());
rb.erase(j);
}
if (rb.empty()) rb.reserve(m_bucket_size);
rb.push_back(e);
m_ips.insert(e.addr());
return node_added;
}
void routing_table::split_bucket()

View File

@ -3019,7 +3019,7 @@ TORRENT_TEST(routing_table_balance)
// and make sure we don't end up with a table completely out of balance
for (int i = 0; i < 32; ++i)
{
id[4] = i & 0xff;
id[0] = (i << 3) & 0xff;
tbl.node_seen(id, rand_udp_ep(), 20 + (id[19] & 0xff));
}
std::printf("num_active_buckets: %d\n", tbl.num_active_buckets());
@ -3123,7 +3123,7 @@ TORRENT_TEST(routing_table_for_each)
for (int i = 0; i < 32; ++i)
{
id[4] = i & 0xff;
id[0] = (i << 3) & 0xff;
tbl.node_seen(id, rand_udp_ep(), 20 + (id[19] & 0xff));
}
@ -3136,20 +3136,20 @@ TORRENT_TEST(routing_table_for_each)
std::printf("replacements: %d\n", replacements);
TEST_EQUAL(tbl.num_active_buckets(), 2);
TEST_EQUAL(nodes, 2);
TEST_EQUAL(replacements, 2);
TEST_EQUAL(nodes, 4);
TEST_EQUAL(replacements, 4);
print_state(std::cout, tbl);
std::vector<node_entry> v;
tbl.for_each_node(std::bind(node_push_back, &v, _1), nullptr);
TEST_EQUAL(v.size(), 2);
v.clear();
tbl.for_each_node(nullptr, std::bind(node_push_back, &v, _1));
TEST_EQUAL(v.size(), 2);
v.clear();
tbl.for_each_node(std::bind(node_push_back, &v, _1));
tbl.for_each_node([&](node_entry const& e) { v.push_back(e); }, nullptr);
TEST_EQUAL(v.size(), 4);
v.clear();
tbl.for_each_node(nullptr, [&](node_entry const& e) { v.push_back(e); });
TEST_EQUAL(v.size(), 4);
v.clear();
tbl.for_each_node([&](node_entry const& e) { v.push_back(e); });
TEST_EQUAL(v.size(), 8);
}
TORRENT_TEST(node_set_id)
@ -3992,6 +3992,58 @@ TORRENT_TEST(replace_node_impl)
}
}
TORRENT_TEST(all_in_same_bucket)
{
TEST_CHECK(all_in_same_bucket({}, to_hash("8000000000000000000000000000000000000000"), 0) == true);
TEST_CHECK(all_in_same_bucket({}, to_hash("8000000000000000000000000000000000000001"), 1) == true);
TEST_CHECK(all_in_same_bucket({}, to_hash("8000000000000000000000000000000000000002"), 2) == true);
TEST_CHECK(all_in_same_bucket({}, to_hash("8000000000000000000000000000000000000003"), 3) == true);
{
dht::bucket_t b = {
n(nullptr, "0000000000000000000000000000000000000000"),
};
TEST_CHECK(all_in_same_bucket(b, to_hash("8000000000000000000000000000000000000000"), 0) == false);
}
{
dht::bucket_t b = {
n(nullptr, "8000000000000000000000000000000000000000"),
n(nullptr, "f000000000000000000000000000000000000000"),
};
TEST_CHECK(all_in_same_bucket(b, to_hash("8000000000000000000000000000000000000000"), 0) == true);
}
{
dht::bucket_t b = {
n(nullptr, "8000000000000000000000000000000000000000"),
n(nullptr, "0000000000000000000000000000000000000000"),
};
TEST_CHECK(all_in_same_bucket(b, to_hash("8000000000000000000000000000000000000000"), 0) == false);
}
{
dht::bucket_t b = {
n(nullptr, "0800000000000000000000000000000000000000"),
n(nullptr, "0000000000000000000000000000000000000000"),
};
TEST_CHECK(all_in_same_bucket(b, to_hash("0800000000000000000000000000000000000000"), 4) == false);
}
{
dht::bucket_t b = {
n(nullptr, "0800000000000000000000000000000000000000"),
n(nullptr, "0800000000000000000000000000000000000000"),
};
TEST_CHECK(all_in_same_bucket(b, to_hash("0800000000000000000000000000000000000000"), 4) == true);
}
{
dht::bucket_t b = {
n(nullptr, "0007000000000000000000000000000000000000"),
n(nullptr, "0004000000000000000000000000000000000000"),
};
TEST_CHECK(all_in_same_bucket(b, to_hash("0005000000000000000000000000000000000000"), 13) == true);
}
}
// TODO: test obfuscated_get_peers
#else