From 23e11af89985ffceb1bdd04c0bed2cfeadc0bd3f Mon Sep 17 00:00:00 2001 From: Steven Siloti Date: Wed, 21 Sep 2016 20:04:40 -0700 Subject: [PATCH] fix peer picking algorithm The old code was always picking the first to_pick peers from the set. --- src/kademlia/dht_storage.cpp | 37 ++++++++++++++---------------------- test/test_dht_storage.cpp | 35 ++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 23 deletions(-) diff --git a/src/kademlia/dht_storage.cpp b/src/kademlia/dht_storage.cpp index 95125c658..80036e5d7 100644 --- a/src/kademlia/dht_storage.cpp +++ b/src/kademlia/dht_storage.cpp @@ -214,18 +214,15 @@ namespace else { tcp const protocol = requester.is_v4() ? tcp::v4() : tcp::v6(); - int max = m_settings.max_peers_reply; + int to_pick = m_settings.max_peers_reply; // if these are IPv6 peers their addresses are 4x the size of IPv4 // so reduce the max peers 4 fold to compensate // max_peers_reply should probably be specified in bytes if (!v.peers.empty() && protocol == tcp::v6()) - max /= 4; - // we're picking "to_pick" from a list of "num" at random. - int const to_pick = std::min(int(v.peers.size()), max); - std::set::const_iterator iter = v.peers.begin(); + to_pick /= 4; entry::list_type& pe = peers["values"].list(); - for (int t = 0, m = 0; m < to_pick && iter != v.peers.end(); ++iter) + for (auto iter = v.peers.begin(); to_pick > 0 && iter != v.peers.end(); ++iter) { // if the node asking for peers is a seed, skip seeds from the // peer list @@ -235,26 +232,20 @@ namespace if (iter->addr.protocol() != protocol) continue; - ++t; - std::string* str; - if (t <= to_pick) - { - pe.push_back(entry()); - str = &pe.back().string(); - } - else - { - // maybe replace an item we've already picked - if (random(t - 1) >= to_pick) continue; - str = &pe[random(to_pick - 1)].string(); - } + // pick this peer with probability + // / + if (random(uint32_t(std::distance(iter, v.peers.end()))) > to_pick) + continue; - str->resize(18); - std::string::iterator out = str->begin(); + pe.push_back(entry()); + std::string& str = pe.back().string(); + + str.resize(18); + std::string::iterator out = str.begin(); detail::write_endpoint(iter->addr, out); - str->resize(out - str->begin()); + str.resize(out - str.begin()); - ++m; + --to_pick; } } diff --git a/test/test_dht_storage.cpp b/test/test_dht_storage.cpp index 0d6a89275..3bf16acd9 100644 --- a/test/test_dht_storage.cpp +++ b/test/test_dht_storage.cpp @@ -339,6 +339,41 @@ TORRENT_TEST(mutable_item_limit) TEST_EQUAL(cnt.mutable_data, 42); } +TORRENT_TEST(get_peers_dist) +{ + // test that get_peers returns reasonably disjoint sets of peers with each call + // take two samples of 100 peers from 1000 and make sure there aren't too many + // peers found in both lists + dht_settings sett = test_settings(); + sett.max_peers = 1000; + sett.max_peers_reply = 100; + std::unique_ptr s(create_default_dht_storage(sett)); + + address addr = rand_v4(); + for (int i = 0; i < 1000; ++i) + { + s->announce_peer(n1, tcp::endpoint(addr, uint16_t(i)) + , "torrent_name", false); + } + + std::set peer_set; + int duplicates = 0; + for (int i = 0; i < 2; ++i) + { + entry peers; + s->get_peers(n1, false, false, address(), peers); + TEST_EQUAL(peers["values"].list().size(), 100); + for (auto const& p : peers["values"].list()) + { + int port = detail::read_v4_endpoint(p.string().begin()).port(); + if (!peer_set.insert(port).second) + ++duplicates; + } + } + std::printf("duplicate peers found: %d\n", duplicates); + TEST_CHECK(duplicates < 20); +} + TORRENT_TEST(update_node_ids) { dht_settings sett = test_settings();