added support for infohashes sample in dht storage (#1586)
added support for infohashes sample in dht storage
This commit is contained in:
parent
17ee28d176
commit
deecdb6cfa
|
@ -206,6 +206,21 @@ namespace dht
|
|||
, span<char const> salt
|
||||
, address const& addr) = 0;
|
||||
|
||||
// This function retrieves a sample infohashes
|
||||
//
|
||||
// For implementers:
|
||||
// The infohashes should be stored in ["samples"] (N × 20 bytes).
|
||||
// the following keys should be filled
|
||||
// item["interval"] - the subset refresh interval in seconds.
|
||||
// item["num"] - number of infohashes in storage.
|
||||
//
|
||||
// Internally, this function is allowed to lazily evaluate, cache
|
||||
// and modify the actual sample to put in ``item``
|
||||
//
|
||||
// returns the number of infohashes in the sample.
|
||||
//
|
||||
virtual int get_infohashes_sample(entry& item) = 0;
|
||||
|
||||
// This function is called periodically (non-constant frequency).
|
||||
//
|
||||
// For implementers:
|
||||
|
|
|
@ -189,6 +189,17 @@ namespace libtorrent
|
|||
// If the incoming requests causes to many bytes to be sent in responses,
|
||||
// incoming requests will be dropped until the quota has been replenished.
|
||||
int upload_rate_limit;
|
||||
|
||||
// the infohashes sample recomputation interval (in seconds).
|
||||
// The node will precompute a subset of the tracked infohashes and return
|
||||
// that instead of calculating it upon each request. The permissible range
|
||||
// is between 0 and 21600 seconds (inclusive).
|
||||
int sample_infohashes_interval = 21600;
|
||||
|
||||
// the maximum number of elements in the sampled subset of infohashes.
|
||||
// If this number is too big, expect the DHT storage implementations
|
||||
// to clamp it in order to allow UDP packets go through
|
||||
int max_infohashes_sample_count = 20;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -162,4 +162,54 @@ TORRENT_TEST(dht_storage_counters)
|
|||
test_expiration(hours(1), s, c); // test expiration of everything after 3 hours
|
||||
}
|
||||
|
||||
TORRENT_TEST(dht_storage_infohashes_sample)
|
||||
{
|
||||
dht_settings sett = test_settings();
|
||||
sett.max_torrents = 5;
|
||||
sett.sample_infohashes_interval = 30;
|
||||
sett.max_infohashes_sample_count = 2;
|
||||
std::unique_ptr<dht_storage_interface> s(create_default_dht_storage(sett));
|
||||
|
||||
TEST_CHECK(s.get() != nullptr);
|
||||
|
||||
sha1_hash const n1 = to_hash("5fbfbff10c5d6a4ec8a88e4c6ab4c28b95eee401");
|
||||
sha1_hash const n2 = to_hash("5fbfbff10c5d6a4ec8a88e4c6ab4c28b95eee402");
|
||||
sha1_hash const n3 = to_hash("5fbfbff10c5d6a4ec8a88e4c6ab4c28b95eee403");
|
||||
sha1_hash const n4 = to_hash("5fbfbff10c5d6a4ec8a88e4c6ab4c28b95eee404");
|
||||
|
||||
tcp::endpoint const p1 = ep("124.31.75.21", 1);
|
||||
tcp::endpoint const p2 = ep("124.31.75.22", 1);
|
||||
tcp::endpoint const p3 = ep("124.31.75.23", 1);
|
||||
tcp::endpoint const p4 = ep("124.31.75.24", 1);
|
||||
|
||||
s->announce_peer(n1, p1, "torrent_name1", false);
|
||||
s->announce_peer(n2, p2, "torrent_name2", false);
|
||||
s->announce_peer(n3, p3, "torrent_name3", false);
|
||||
s->announce_peer(n4, p4, "torrent_name4", false);
|
||||
|
||||
entry item;
|
||||
int r = s->get_infohashes_sample(item);
|
||||
TEST_EQUAL(r, 2);
|
||||
|
||||
default_config cfg;
|
||||
simulation sim(cfg);
|
||||
sim::asio::io_service ios(sim, addr("10.0.0.1"));
|
||||
|
||||
sim::asio::high_resolution_timer timer(ios);
|
||||
timer.expires_from_now(hours(1)); // expiration of torrents
|
||||
timer.async_wait([&s](boost::system::error_code const& ec)
|
||||
{
|
||||
libtorrent::aux::update_time_now();
|
||||
// tick here to trigger the torrents expiration
|
||||
s->tick();
|
||||
|
||||
entry item;
|
||||
int r = s->get_infohashes_sample(item);
|
||||
TEST_EQUAL(r, 0);
|
||||
});
|
||||
|
||||
boost::system::error_code ec;
|
||||
sim.run(ec);
|
||||
}
|
||||
|
||||
#endif // TORRENT_DISABLE_DHT
|
||||
|
|
|
@ -171,6 +171,23 @@ namespace
|
|||
, immutable_item_comparator(node_ids));
|
||||
}
|
||||
|
||||
constexpr int sample_infohashes_interval_max = 21600;
|
||||
constexpr int infohashes_sample_count_max = 20;
|
||||
|
||||
struct infohashes_sample
|
||||
{
|
||||
std::vector<sha1_hash> samples;
|
||||
time_point created = min_time();
|
||||
|
||||
int count() const { return int(samples.size()); }
|
||||
};
|
||||
|
||||
int clamp(int v, int lo, int hi)
|
||||
{
|
||||
TORRENT_ASSERT(lo <= hi);
|
||||
return (v < lo) ? lo : (hi < v) ? hi : v;
|
||||
}
|
||||
|
||||
class dht_default_storage final : public dht_storage_interface, boost::noncopyable
|
||||
{
|
||||
public:
|
||||
|
@ -453,6 +470,21 @@ namespace
|
|||
touch_item(i->second, addr);
|
||||
}
|
||||
|
||||
int get_infohashes_sample(entry& item) override
|
||||
{
|
||||
item["interval"] = clamp(m_settings.sample_infohashes_interval
|
||||
, 0, sample_infohashes_interval_max);
|
||||
item["num"] = int(m_map.size());
|
||||
|
||||
refresh_infohashes_sample();
|
||||
|
||||
std::vector<sha1_hash> const& samples = m_infohashes_sample.samples;
|
||||
item["samples"] = span<char const>(
|
||||
reinterpret_cast<char const*>(samples.data()), samples.size() * 20);
|
||||
|
||||
return m_infohashes_sample.count();
|
||||
}
|
||||
|
||||
void tick() override
|
||||
{
|
||||
// look through all peers and see if any have timed out
|
||||
|
@ -517,6 +549,8 @@ namespace
|
|||
std::map<node_id, dht_immutable_item> m_immutable_table;
|
||||
std::map<node_id, dht_mutable_item> m_mutable_table;
|
||||
|
||||
infohashes_sample m_infohashes_sample;
|
||||
|
||||
void purge_peers(std::vector<peer_entry>& peers)
|
||||
{
|
||||
auto now = aux::time_now();
|
||||
|
@ -532,6 +566,48 @@ namespace
|
|||
if (!peers.empty() && peers.capacity() / peers.size() >= 4u)
|
||||
peers.shrink_to_fit();
|
||||
}
|
||||
|
||||
void refresh_infohashes_sample()
|
||||
{
|
||||
time_point const now = aux::time_now();
|
||||
int const interval = clamp(m_settings.sample_infohashes_interval
|
||||
, 0, sample_infohashes_interval_max);
|
||||
|
||||
int const max_count = clamp(m_settings.max_infohashes_sample_count
|
||||
, 0, infohashes_sample_count_max);
|
||||
int const count = std::min(max_count, int(m_map.size()));
|
||||
|
||||
if (interval > 0
|
||||
&& m_infohashes_sample.created + seconds(interval) > now
|
||||
&& m_infohashes_sample.count() >= max_count)
|
||||
return;
|
||||
|
||||
std::vector<sha1_hash>& samples = m_infohashes_sample.samples;
|
||||
samples.clear();
|
||||
samples.reserve(count);
|
||||
|
||||
int to_pick = count;
|
||||
int candidates = int(m_map.size());
|
||||
|
||||
for (auto const& t : m_map)
|
||||
{
|
||||
if (to_pick == 0)
|
||||
break;
|
||||
|
||||
TORRENT_ASSERT(candidates >= to_pick);
|
||||
|
||||
// pick this key with probability
|
||||
// <keys left to pick> / <keys left in the set>
|
||||
if (random(std::uint32_t(candidates--)) > std::uint32_t(to_pick))
|
||||
continue;
|
||||
|
||||
samples.push_back(t.first);
|
||||
--to_pick;
|
||||
}
|
||||
|
||||
TORRENT_ASSERT(int(samples.size()) == count);
|
||||
m_infohashes_sample.created = now;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -437,4 +437,76 @@ TORRENT_TEST(update_node_ids)
|
|||
TEST_CHECK(r);
|
||||
}
|
||||
|
||||
TORRENT_TEST(infohashes_sample)
|
||||
{
|
||||
dht_settings sett = test_settings();
|
||||
sett.max_torrents = 5;
|
||||
sett.sample_infohashes_interval = 10;
|
||||
sett.max_infohashes_sample_count = 2;
|
||||
std::unique_ptr<dht_storage_interface> s(create_default_dht_storage(sett));
|
||||
|
||||
tcp::endpoint const p1 = ep("124.31.75.21", 1);
|
||||
tcp::endpoint const p2 = ep("124.31.75.22", 1);
|
||||
tcp::endpoint const p3 = ep("124.31.75.23", 1);
|
||||
tcp::endpoint const p4 = ep("124.31.75.24", 1);
|
||||
|
||||
s->announce_peer(n1, p1, "torrent_name1", false);
|
||||
s->announce_peer(n2, p2, "torrent_name2", false);
|
||||
s->announce_peer(n3, p3, "torrent_name3", false);
|
||||
s->announce_peer(n4, p4, "torrent_name4", false);
|
||||
|
||||
entry item;
|
||||
int r = s->get_infohashes_sample(item);
|
||||
TEST_EQUAL(r, 2);
|
||||
TEST_EQUAL(item["interval"].integer(), 10)
|
||||
TEST_EQUAL(item["num"].integer(), 4);
|
||||
TEST_EQUAL(item["samples"].string().size(), 2 * 20);
|
||||
|
||||
// get all of them
|
||||
sett.max_infohashes_sample_count = 5;
|
||||
|
||||
item = entry();
|
||||
r = s->get_infohashes_sample(item);
|
||||
TEST_EQUAL(r, 4);
|
||||
TEST_EQUAL(item["interval"].integer(), 10)
|
||||
TEST_EQUAL(item["num"].integer(), 4);
|
||||
TEST_EQUAL(item["samples"].string().size(), 4 * 20);
|
||||
|
||||
std::string const samples = item["samples"].to_string();
|
||||
TEST_CHECK(samples.find(aux::to_hex(n1)) != std::string::npos);
|
||||
TEST_CHECK(samples.find(aux::to_hex(n2)) != std::string::npos);
|
||||
TEST_CHECK(samples.find(aux::to_hex(n3)) != std::string::npos);
|
||||
TEST_CHECK(samples.find(aux::to_hex(n4)) != std::string::npos);
|
||||
}
|
||||
|
||||
TORRENT_TEST(infohashes_sample_dist)
|
||||
{
|
||||
dht_settings sett = test_settings();
|
||||
sett.max_torrents = 1000;
|
||||
sett.sample_infohashes_interval = 0; // need this to force refresh every call
|
||||
sett.max_infohashes_sample_count = 1;
|
||||
std::unique_ptr<dht_storage_interface> s(create_default_dht_storage(sett));
|
||||
|
||||
for (int i = 0; i < 1000; ++i)
|
||||
{
|
||||
s->announce_peer(rand_hash(), tcp::endpoint(rand_v4(), std::uint16_t(i))
|
||||
, "torrent_name", false);
|
||||
}
|
||||
|
||||
std::set<sha1_hash> infohash_set;
|
||||
for (int i = 0; i < 1000; ++i)
|
||||
{
|
||||
entry item;
|
||||
int r = s->get_infohashes_sample(item);
|
||||
TEST_EQUAL(r, 1);
|
||||
TEST_EQUAL(item["interval"].integer(), 0)
|
||||
TEST_EQUAL(item["num"].integer(), 1000);
|
||||
TEST_EQUAL(item["samples"].string().size(), 20);
|
||||
|
||||
infohash_set.insert(sha1_hash(item["samples"].string()));
|
||||
}
|
||||
std::printf("infohashes set size: %d\n", int(infohash_set.size()));
|
||||
TEST_CHECK(infohash_set.size() > 500);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue