From 23e11af89985ffceb1bdd04c0bed2cfeadc0bd3f Mon Sep 17 00:00:00 2001 From: Steven Siloti Date: Wed, 21 Sep 2016 20:04:40 -0700 Subject: [PATCH 1/3] fix peer picking algorithm The old code was always picking the first to_pick peers from the set. --- src/kademlia/dht_storage.cpp | 37 ++++++++++++++---------------------- test/test_dht_storage.cpp | 35 ++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 23 deletions(-) diff --git a/src/kademlia/dht_storage.cpp b/src/kademlia/dht_storage.cpp index 95125c658..80036e5d7 100644 --- a/src/kademlia/dht_storage.cpp +++ b/src/kademlia/dht_storage.cpp @@ -214,18 +214,15 @@ namespace else { tcp const protocol = requester.is_v4() ? tcp::v4() : tcp::v6(); - int max = m_settings.max_peers_reply; + int to_pick = m_settings.max_peers_reply; // if these are IPv6 peers their addresses are 4x the size of IPv4 // so reduce the max peers 4 fold to compensate // max_peers_reply should probably be specified in bytes if (!v.peers.empty() && protocol == tcp::v6()) - max /= 4; - // we're picking "to_pick" from a list of "num" at random. - int const to_pick = std::min(int(v.peers.size()), max); - std::set::const_iterator iter = v.peers.begin(); + to_pick /= 4; entry::list_type& pe = peers["values"].list(); - for (int t = 0, m = 0; m < to_pick && iter != v.peers.end(); ++iter) + for (auto iter = v.peers.begin(); to_pick > 0 && iter != v.peers.end(); ++iter) { // if the node asking for peers is a seed, skip seeds from the // peer list @@ -235,26 +232,20 @@ namespace if (iter->addr.protocol() != protocol) continue; - ++t; - std::string* str; - if (t <= to_pick) - { - pe.push_back(entry()); - str = &pe.back().string(); - } - else - { - // maybe replace an item we've already picked - if (random(t - 1) >= to_pick) continue; - str = &pe[random(to_pick - 1)].string(); - } + // pick this peer with probability + // / + if (random(uint32_t(std::distance(iter, v.peers.end()))) > to_pick) + continue; - str->resize(18); - std::string::iterator out = str->begin(); + pe.push_back(entry()); + std::string& str = pe.back().string(); + + str.resize(18); + std::string::iterator out = str.begin(); detail::write_endpoint(iter->addr, out); - str->resize(out - str->begin()); + str.resize(out - str.begin()); - ++m; + --to_pick; } } diff --git a/test/test_dht_storage.cpp b/test/test_dht_storage.cpp index 0d6a89275..3bf16acd9 100644 --- a/test/test_dht_storage.cpp +++ b/test/test_dht_storage.cpp @@ -339,6 +339,41 @@ TORRENT_TEST(mutable_item_limit) TEST_EQUAL(cnt.mutable_data, 42); } +TORRENT_TEST(get_peers_dist) +{ + // test that get_peers returns reasonably disjoint sets of peers with each call + // take two samples of 100 peers from 1000 and make sure there aren't too many + // peers found in both lists + dht_settings sett = test_settings(); + sett.max_peers = 1000; + sett.max_peers_reply = 100; + std::unique_ptr s(create_default_dht_storage(sett)); + + address addr = rand_v4(); + for (int i = 0; i < 1000; ++i) + { + s->announce_peer(n1, tcp::endpoint(addr, uint16_t(i)) + , "torrent_name", false); + } + + std::set peer_set; + int duplicates = 0; + for (int i = 0; i < 2; ++i) + { + entry peers; + s->get_peers(n1, false, false, address(), peers); + TEST_EQUAL(peers["values"].list().size(), 100); + for (auto const& p : peers["values"].list()) + { + int port = detail::read_v4_endpoint(p.string().begin()).port(); + if (!peer_set.insert(port).second) + ++duplicates; + } + } + std::printf("duplicate peers found: %d\n", duplicates); + TEST_CHECK(duplicates < 20); +} + TORRENT_TEST(update_node_ids) { dht_settings sett = test_settings(); From 89942bf3d4276b3d465dd6d6f0545db0a6261f96 Mon Sep 17 00:00:00 2001 From: Steven Siloti Date: Thu, 22 Sep 2016 21:26:07 -0700 Subject: [PATCH 2/3] use a sorted vector to store peer announcments Given the frequency of linear scans being done, std::set is clearly sub-optimal for storing announced peers. A std::vector is the obvious choice, which I also decided to make sorted. A sorted vector trades better performance in announce_peer for slower purging, the latter being mitigated by batching. --- src/kademlia/dht_storage.cpp | 72 ++++++++++++++++++++---------------- test/test_dht_storage.cpp | 16 +++++++- 2 files changed, 55 insertions(+), 33 deletions(-) diff --git a/src/kademlia/dht_storage.cpp b/src/kademlia/dht_storage.cpp index 80036e5d7..c09ea4df1 100644 --- a/src/kademlia/dht_storage.cpp +++ b/src/kademlia/dht_storage.cpp @@ -73,7 +73,8 @@ namespace struct torrent_entry { std::string name; - std::set peers; + std::vector peers4; + std::vector peers6; }; // TODO: 2 make this configurable in dht_settings @@ -176,7 +177,7 @@ namespace { size_t ret = 0; for (auto const& t : m_map) - ret += t.second.peers.size(); + ret += t.second.peers4.size() + t.second.peers6.size(); return ret; } #endif @@ -193,6 +194,7 @@ namespace if (i == m_map.end()) return int(m_map.size()) >= m_settings.max_torrents; torrent_entry const& v = i->second; + auto const& peersv = requester.is_v4() ? v.peers4 : v.peers6; if (!v.name.empty()) peers["n"] = v.name; @@ -201,7 +203,7 @@ namespace bloom_filter<256> downloaders; bloom_filter<256> seeds; - for (auto const& p : v.peers) + for (auto const& p : peersv) { sha1_hash const iphash = hash_address(p.addr.address()); if (p.seed) seeds.set(iphash); @@ -218,23 +220,26 @@ namespace // if these are IPv6 peers their addresses are 4x the size of IPv4 // so reduce the max peers 4 fold to compensate // max_peers_reply should probably be specified in bytes - if (!v.peers.empty() && protocol == tcp::v6()) + if (!peersv.empty() && protocol == tcp::v6()) to_pick /= 4; entry::list_type& pe = peers["values"].list(); - for (auto iter = v.peers.begin(); to_pick > 0 && iter != v.peers.end(); ++iter) + int candidates = int(std::count_if(peersv.begin(), peersv.end() + , [=](peer_entry const& e) { return !(noseed && e.seed); })); + + to_pick = (std::min)(to_pick, candidates); + + for (auto iter = peersv.begin(); to_pick > 0; ++iter) { // if the node asking for peers is a seed, skip seeds from the // peer list if (noseed && iter->seed) continue; - // only include peers with the right address family - if (iter->addr.protocol() != protocol) - continue; + TORRENT_ASSERT(candidates >= to_pick); // pick this peer with probability // / - if (random(uint32_t(std::distance(iter, v.peers.end()))) > to_pick) + if (random(uint32_t(candidates--)) > to_pick) continue; pe.push_back(entry()); @@ -249,7 +254,7 @@ namespace } } - if (int(i->second.peers.size()) < m_settings.max_peers) + if (int(peersv.size()) < m_settings.max_peers) return false; // we're at the max peers stored for this torrent @@ -258,8 +263,8 @@ namespace // a different port than the one it is using to send DHT messages peer_entry requester_entry; requester_entry.addr.address(requester); - auto requester_iter = i->second.peers.lower_bound(requester_entry); - return requester_iter == i->second.peers.end() + auto requester_iter = std::lower_bound(peersv.begin(), peersv.end(), requester_entry); + return requester_iter == peersv.end() || requester_iter->addr.address() != requester; } @@ -292,23 +297,27 @@ namespace v->name = name.substr(0, 100).to_string(); } + auto& peersv = endp.protocol() == tcp::v4() ? v->peers4 : v->peers6; + peer_entry peer; peer.addr = endp; peer.added = aux::time_now(); peer.seed = seed; - auto i = v->peers.find(peer); - if (i != v->peers.end()) + auto i = std::lower_bound(peersv.begin(), peersv.end(), peer); + if (i != peersv.end() && i->addr == endp) { - v->peers.erase(i++); - m_counters.peers -= 1; + *i = peer; } - else if (v->peers.size() >= m_settings.max_peers) + else if (peersv.size() >= m_settings.max_peers) { // we're at capacity, drop the announce return; } - v->peers.insert(i, peer); - m_counters.peers += 1; + else + { + peersv.insert(i, peer); + m_counters.peers += 1; + } } bool get_immutable_item(sha1_hash const& target @@ -448,9 +457,10 @@ namespace for (auto i = m_map.begin(), end(m_map.end()); i != end;) { torrent_entry& t = i->second; - purge_peers(t.peers); + purge_peers(t.peers4); + purge_peers(t.peers6); - if (!t.peers.empty()) + if (!t.peers4.empty() || !t.peers6.empty()) { ++i; continue; @@ -504,19 +514,17 @@ namespace std::map m_immutable_table; std::map m_mutable_table; - void purge_peers(std::set& peers) + void purge_peers(std::vector& peers) { - for (auto i = peers.begin(), end(peers.end()); i != end;) + auto now = aux::time_now(); + auto new_end = std::remove_if(peers.begin(), peers.end() + , [=](peer_entry const& e) { - // the peer has timed out - if (i->added + minutes(int(announce_interval * 3 / 2)) < aux::time_now()) - { - peers.erase(i++); - m_counters.peers -= 1; - } - else - ++i; - } + return e.added + minutes(int(announce_interval * 3 / 2)) < now; + }); + + m_counters.peers -= std::distance(new_end, peers.end()); + peers.erase(new_end, peers.end()); } }; } diff --git a/test/test_dht_storage.cpp b/test/test_dht_storage.cpp index 3bf16acd9..635dfb6b2 100644 --- a/test/test_dht_storage.cpp +++ b/test/test_dht_storage.cpp @@ -345,7 +345,7 @@ TORRENT_TEST(get_peers_dist) // take two samples of 100 peers from 1000 and make sure there aren't too many // peers found in both lists dht_settings sett = test_settings(); - sett.max_peers = 1000; + sett.max_peers = 2000; sett.max_peers_reply = 100; std::unique_ptr s(create_default_dht_storage(sett)); @@ -372,6 +372,20 @@ TORRENT_TEST(get_peers_dist) } std::printf("duplicate peers found: %d\n", duplicates); TEST_CHECK(duplicates < 20); + + // add 1000 seeds to the mix and make sure we still pick the desired number + // of peers if we select only non-seeds + for (int i = 1000; i < 2000; ++i) + { + s->announce_peer(n1, tcp::endpoint(addr, uint16_t(i)) + , "torrent_name", true); + } + + { + entry peers; + s->get_peers(n1, true, false, address(), peers); + TEST_EQUAL(peers["values"].list().size(), 100); + } } TORRENT_TEST(update_node_ids) From 35cfc6b5d3d7e2b67b7205d41959b28d22dbbca5 Mon Sep 17 00:00:00 2001 From: Steven Siloti Date: Fri, 23 Sep 2016 07:30:36 -0700 Subject: [PATCH 3/3] shrink peers vectors with too much excess capacity --- src/kademlia/dht_storage.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/kademlia/dht_storage.cpp b/src/kademlia/dht_storage.cpp index c09ea4df1..51a09df55 100644 --- a/src/kademlia/dht_storage.cpp +++ b/src/kademlia/dht_storage.cpp @@ -525,6 +525,9 @@ namespace m_counters.peers -= std::distance(new_end, peers.end()); peers.erase(new_end, peers.end()); + // if we're using less than 1/4 of the capacity free up the excess + if (!peers.empty() && peers.capacity() / peers.size() >= 4u) + peers.shrink_to_fit(); } }; }