support DHT scrape

This commit is contained in:
Arvid Norberg 2011-05-23 00:45:36 +00:00
parent 88fa0b9393
commit 58d723012a
12 changed files with 208 additions and 83 deletions

View File

@ -1,3 +1,4 @@
* support DHT scrape
* added support for fadvise/F_RDADVISE for improved disk read performance
* introduced pop_alerts() which pops the entire alert queue in a single call
* support saving metadata in resume file, enable it by default for magnet links

View File

@ -37,30 +37,38 @@ POSSIBILITY OF SUCH DAMAGE.
#include "libtorrent/peer_id.hpp" // for sha1_hash
#include "libtorrent/config.hpp" // for sha1_hash
#include <math.h> // for log()
namespace libtorrent
{
TORRENT_EXPORT void set_bit(boost::uint32_t b, boost::uint8_t* bits, int len);
TORRENT_EXPORT bool has_bit(boost::uint32_t b, boost::uint8_t const* bits, int len);
TORRENT_EXPORT void set_bits(boost::uint8_t const* b, boost::uint8_t* bits, int len);
TORRENT_EXPORT bool has_bits(boost::uint8_t const* b, boost::uint8_t const* bits, int len);
TORRENT_EXPORT int count_zero_bits(boost::uint8_t const* bits, int len);
template <int N>
struct bloom_filter
{
bool find(sha1_hash const& k) const
{
return has_bit(k[0], bits, N)
&& has_bit(k[1], bits, N)
&& has_bit(k[2], bits, N);
}
{ return has_bits(&k[0], bits, N); }
void set(sha1_hash const& k)
{
set_bit(k[0], bits, N);
set_bit(k[1], bits, N);
set_bit(k[2], bits, N);
}
{ set_bits(&k[0], bits, N); }
std::string to_string() const
{ return std::string((char const*)&bits[0], N); }
void from_string(char const* str)
{ memcpy(bits, str, N); }
void clear() { memset(bits, 0, N); }
float size() const
{
const int c = (std::min)(count_zero_bits(bits, N), (N * 8) - 1);
const int m = N * 8;
return log(c / float(m)) / (2.f * log(1.f - 1.f/m));
}
bloom_filter() { clear(); }
private:

View File

@ -88,7 +88,7 @@ namespace libtorrent { namespace dht
entry state() const;
void announce(sha1_hash const& ih, int listen_port
void announce(sha1_hash const& ih, int listen_port, bool seed
, boost::function<void(std::vector<tcp::endpoint> const&)> f);
void dht_status(session_status& s);

View File

@ -70,7 +70,8 @@ public:
find_data(node_impl& node, node_id target
, data_callback const& dcallback
, nodes_callback const& ncallback);
, nodes_callback const& ncallback
, bool noseeds);
virtual char const* name() const { return "get_peers"; }
@ -90,6 +91,7 @@ private:
node_id const m_target;
bool m_done:1;
bool m_got_peers:1;
bool m_noseeds:1;
};
class find_data_observer : public observer

View File

@ -103,6 +103,7 @@ struct peer_entry
{
tcp::endpoint addr;
ptime added;
bool seed;
};
// this is a group. It contains a set of group members
@ -251,7 +252,7 @@ public:
{ m_table.print_state(os); }
#endif
void announce(sha1_hash const& info_hash, int listen_port
void announce(sha1_hash const& info_hash, int listen_port, bool seed
, boost::function<void(std::vector<tcp::endpoint> const&)> f);
bool verify_token(std::string const& token, char const* info_hash
@ -293,10 +294,9 @@ public:
dht_settings const& settings() const { return m_settings; }
protected:
// is called when a find data request is received. Should
// return false if the data is not stored on this node. If
// the data is stored, it should be serialized into 'data'.
bool lookup_peers(sha1_hash const& info_hash, int prefix, entry& reply) const;
void lookup_peers(sha1_hash const& info_hash, int prefix, entry& reply
, bool noseed, bool scrape) const;
bool lookup_torrents(sha1_hash const& target, entry& reply
, char* tags) const;

View File

@ -34,10 +34,43 @@ POSSIBILITY OF SUCH DAMAGE.
namespace libtorrent
{
bool has_bit(boost::uint32_t b, boost::uint8_t const* bits, int len)
{ b %= len * 8; return (bits[b/8] & (1 << (b & 7))) != 0; }
bool has_bits(boost::uint8_t const* k, boost::uint8_t const* bits, int len)
{
boost::uint32_t idx1 = boost::uint32_t(k[0]) | (boost::uint32_t(k[1]) << 8);
boost::uint32_t idx2 = boost::uint32_t(k[2]) | (boost::uint32_t(k[3]) << 8);
idx1 %= len * 8;
idx2 %= len * 8;
return (bits[idx1/8] & (1 << (idx1 & 7))) != 0
&& (bits[idx2/8] & (1 << (idx2 & 7))) != 0;
}
void set_bit(boost::uint32_t b, boost::uint8_t* bits, int len)
{ b %= len * 8; bits[b/8] |= (1 << (b & 7)); }
void set_bits(boost::uint8_t const* k, boost::uint8_t* bits, int len)
{
boost::uint32_t idx1 = boost::uint32_t(k[0]) | (boost::uint32_t(k[1]) << 8);
boost::uint32_t idx2 = boost::uint32_t(k[2]) | (boost::uint32_t(k[3]) << 8);
idx1 %= len * 8;
idx2 %= len * 8;
bits[idx1/8] |= (1 << (idx1 & 7));
bits[idx2/8] |= (1 << (idx2 & 7));
}
int count_zero_bits(boost::uint8_t const* bits, int len)
{
// number of bits _not_ set in a nibble
boost::uint8_t bitcount[16] =
{
// 0000, 0001, 0010, 0011, 0100, 0101, 0110, 0111,
// 1000, 1001, 1010, 1011, 1100, 1101, 1110, 1111
4, 3, 3, 2, 3, 2, 2, 1,
3, 2, 2, 1, 2, 1, 1, 0
};
int ret = 0;
for (int i = 0; i < len; ++i)
{
ret += bitcount[bits[i] & 0xf];
ret += bitcount[(bits[i] >> 4) & 0xf];
}
return ret;
}
}

View File

@ -427,11 +427,11 @@ namespace libtorrent { namespace dht
#endif
}
void dht_tracker::announce(sha1_hash const& ih, int listen_port
void dht_tracker::announce(sha1_hash const& ih, int listen_port, bool seed
, boost::function<void(std::vector<tcp::endpoint> const&)> f)
{
TORRENT_ASSERT(m_ses.is_network_thread());
m_dht.announce(ih, listen_port, f);
m_dht.announce(ih, listen_port, seed, f);
}

View File

@ -179,13 +179,15 @@ find_data::find_data(
node_impl& node
, node_id target
, data_callback const& dcallback
, nodes_callback const& ncallback)
, nodes_callback const& ncallback
, bool noseeds)
: traversal_algorithm(node, target)
, m_data_callback(dcallback)
, m_nodes_callback(ncallback)
, m_target(target)
, m_done(false)
, m_got_peers(false)
, m_noseeds(noseeds)
{
node.m_table.for_each_node(&add_entry_fun, 0, (traversal_algorithm*)this);
}
@ -213,6 +215,7 @@ bool find_data::invoke(observer_ptr o)
e["q"] = "get_peers";
entry& a = e["a"];
a["info_hash"] = m_target.to_string();
if (m_noseeds) a["noseed"] = 1;
return m_node.m_rpc.invoke(e, o->target_ep(), o);
}

View File

@ -338,7 +338,7 @@ void node_impl::incoming(msg const& m)
namespace
{
void announce_fun(std::vector<std::pair<node_entry, std::string> > const& v
, node_impl& node, int listen_port, sha1_hash const& ih)
, node_impl& node, int listen_port, sha1_hash const& ih, bool seed)
{
#ifdef TORRENT_DHT_VERBOSE_LOGGING
TORRENT_LOG(node) << "sending announce_peer [ ih: " << ih
@ -371,6 +371,7 @@ namespace
a["info_hash"] = ih.to_string();
a["port"] = listen_port;
a["token"] = i->second;
a["seed"] = int(seed);
node.m_rpc.invoke(e, i->first.ep(), o);
}
}
@ -406,7 +407,7 @@ void node_impl::add_node(udp::endpoint node)
m_rpc.invoke(e, node, o);
}
void node_impl::announce(sha1_hash const& info_hash, int listen_port
void node_impl::announce(sha1_hash const& info_hash, int listen_port, bool seed
, boost::function<void(std::vector<tcp::endpoint> const&)> f)
{
#ifdef TORRENT_DHT_VERBOSE_LOGGING
@ -416,7 +417,7 @@ void node_impl::announce(sha1_hash const& info_hash, int listen_port
// for info-hash id. then send announce_peer to them.
boost::intrusive_ptr<find_data> ta(new find_data(*this, info_hash, f
, boost::bind(&announce_fun, _1, boost::ref(*this)
, listen_port, info_hash)));
, listen_port, info_hash, seed), seed));
ta->start();
}
@ -527,54 +528,64 @@ bool node_impl::lookup_torrents(sha1_hash const& target
return true;
}
bool node_impl::lookup_peers(sha1_hash const& info_hash, int prefix, entry& reply) const
void node_impl::lookup_peers(sha1_hash const& info_hash, int prefix, entry& reply
, bool noseed, bool scrape) const
{
if (m_alerts.should_post<dht_get_peers_alert>())
m_alerts.post_alert(dht_get_peers_alert(info_hash));
table_t::const_iterator i = m_map.lower_bound(info_hash);
if (i == m_map.end()) return false;
if (i->first != info_hash && prefix == 20) return false;
if (i == m_map.end()) return;
if (i->first != info_hash && prefix == 20) return;
if (prefix != 20)
{
sha1_hash mask = sha1_hash::max();
mask <<= (20 - prefix) * 8;
if ((i->first & mask) != (info_hash & mask)) return false;
if ((i->first & mask) != (info_hash & mask)) return;
}
torrent_entry const& v = i->second;
if (v.peers.empty()) return false;
if (!v.name.empty()) reply["n"] = v.name;
int num = (std::min)((int)v.peers.size(), m_settings.max_peers_reply);
int t = 0;
int m = 0;
std::set<peer_entry>::const_iterator iter = v.peers.begin();
entry::list_type& pe = reply["values"].list();
std::string endpoint;
while (m < num)
if (scrape)
{
if ((random() / float(UINT_MAX + 1.f)) * (num - t) >= num - m)
bloom_filter<256> downloaders;
bloom_filter<256> seeds;
for (std::set<peer_entry>::const_iterator i = v.peers.begin()
, end(v.peers.end()); i != end; ++i)
{
++iter;
++t;
sha1_hash iphash;
hash_address(i->addr.address(), iphash);
if (i->seed) seeds.set(iphash);
else downloaders.set(iphash);
}
else
reply["BFpe"] = downloaders.to_string();
reply["BFse"] = seeds.to_string();
}
else
{
int num = (std::min)((int)v.peers.size(), m_settings.max_peers_reply);
std::set<peer_entry>::const_iterator iter = v.peers.begin();
entry::list_type& pe = reply["values"].list();
std::string endpoint;
for (int t = 0, m = 0; m < num && iter != v.peers.end(); ++iter, ++t)
{
if ((random() / float(UINT_MAX + 1.f)) * (num - t) >= num - m) continue;
if (noseed && iter->seed) continue;
endpoint.resize(18);
std::string::iterator out = endpoint.begin();
write_endpoint(iter->addr, out);
endpoint.resize(out - endpoint.begin());
pe.push_back(entry(endpoint));
++iter;
++t;
++m;
}
}
return true;
return;
}
namespace
@ -762,10 +773,12 @@ void node_impl::incoming_request(msg const& m, entry& e)
key_desc_t msg_desc[] = {
{"info_hash", lazy_entry::string_t, 20, 0},
{"ifhpfxl", lazy_entry::int_t, 0, key_desc_t::optional},
{"noseed", lazy_entry::int_t, 0, key_desc_t::optional},
{"scrape", lazy_entry::int_t, 0, key_desc_t::optional},
};
lazy_entry const* msg_keys[2];
if (!verify_message(arg_ent, msg_desc, msg_keys, 2, error_string, sizeof(error_string)))
lazy_entry const* msg_keys[4];
if (!verify_message(arg_ent, msg_desc, msg_keys, 4, error_string, sizeof(error_string)))
{
incoming_error(e, error_string);
return;
@ -783,8 +796,11 @@ void node_impl::incoming_request(msg const& m, entry& e)
if (prefix > 20) prefix = 20;
else if (prefix < 4) prefix = 4;
bool ret = lookup_peers(info_hash, prefix, reply);
(void)ret;
bool noseed = false;
bool scrape = false;
if (msg_keys[2] && msg_keys[2]->int_value() != 0) noseed = true;
if (msg_keys[3] && msg_keys[3]->int_value() != 0) scrape = true;
lookup_peers(info_hash, prefix, reply, noseed, scrape);
#ifdef TORRENT_DHT_VERBOSE_LOGGING
if (ret) TORRENT_LOG(node) << " values: " << reply["values"].list().size();
#endif
@ -819,10 +835,11 @@ void node_impl::incoming_request(msg const& m, entry& e)
{"port", lazy_entry::int_t, 0, 0},
{"token", lazy_entry::string_t, 0, 0},
{"n", lazy_entry::string_t, 0, key_desc_t::optional},
{"seed", lazy_entry::int_t, 0, key_desc_t::optional},
};
lazy_entry const* msg_keys[4];
if (!verify_message(arg_ent, msg_desc, msg_keys, 4, error_string, sizeof(error_string)))
lazy_entry const* msg_keys[5];
if (!verify_message(arg_ent, msg_desc, msg_keys, 5, error_string, sizeof(error_string)))
{
#ifdef TORRENT_DHT_VERBOSE_LOGGING
++g_failed_announces;
@ -891,6 +908,7 @@ void node_impl::incoming_request(msg const& m, entry& e)
peer_entry peer;
peer.addr = tcp::endpoint(m.addr.address(), port);
peer.added = time_now();
peer.seed = msg_keys[4] && msg_keys[4]->int_value();
std::set<peer_entry>::iterator i = v.peers.find(peer);
if (i != v.peers.end()) v.peers.erase(i++);
v.peers.insert(i, peer);

View File

@ -49,7 +49,7 @@ refresh::refresh(
node_impl& node
, node_id target
, done_callback const& callback)
: find_data(node, target, find_data::data_callback(), callback)
: find_data(node, target, find_data::data_callback(), callback, false)
{
}

View File

@ -1862,7 +1862,7 @@ namespace libtorrent
boost::weak_ptr<torrent> self(shared_from_this());
m_ses.m_dht->announce(m_torrent_file->info_hash()
, m_ses.listen_port()
, m_ses.listen_port(), is_seed()
, boost::bind(&torrent::on_dht_announce_response_disp, self, _1));
}

View File

@ -35,6 +35,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "libtorrent/session.hpp"
#include "libtorrent/kademlia/node.hpp" // for verify_message
#include "libtorrent/bencode.hpp"
#include "libtorrent/socket_io.hpp" // for hash_address
#include <iostream>
#include "test.hpp"
@ -75,9 +76,8 @@ void send_dht_msg(node_impl& node, char const* msg, udp::endpoint const& ep
, lazy_entry* reply, char const* t = "10", char const* info_hash = 0
, char const* name = 0, std::string const token = std::string(), int port = 0
, std::string const target = std::string(), entry const* item = 0
, std::string const signature = std::string()
, std::string const key = std::string()
, std::string const id = std::string())
, std::string const id = std::string()
, bool scrape = false, bool seed = false)
{
// we're about to clear out the backing buffer
// for this lazy_entry, so we better clear it now
@ -94,8 +94,8 @@ void send_dht_msg(node_impl& node, char const* msg, udp::endpoint const& ep
if (port) a["port"] = port;
if (!target.empty()) a["target"] = target;
if (item) a["item"] = *item;
if (!signature.empty()) a["sig"] = signature;
if (!key.empty()) a["key"] = key;
if (scrape) a["scrape"] = 1;
if (seed) a["seed"] = 1;
char msg_buf[1500];
int size = bencode(msg_buf, e);
// std::cerr << "sending: " << e << "\n";
@ -130,14 +130,12 @@ void send_dht_msg(node_impl& node, char const* msg, udp::endpoint const& ep
struct announce_item
{
sha1_hash next;
boost::array<char, 64> key;
int num_peers;
entry ent;
sha1_hash target;
void gen()
{
ent["next"] = next.to_string();
ent["key"] = std::string(&key[0], 64);
ent["A"] = "a";
ent["B"] = "b";
ent["num_peers"] = num_peers;
@ -160,8 +158,7 @@ void announce_items(node_impl& node, udp::endpoint const* eps
if ((i % items[j].num_peers) == 0) continue;
lazy_entry response;
send_dht_msg(node, "get_item", eps[i], &response, "10", 0
, 0, no, 0, items[j].target.to_string(), 0, no
, std::string(&items[j].key[0], 64), ids[i].to_string());
, 0, no, 0, items[j].target.to_string());
key_desc_t desc[] =
{
@ -175,7 +172,7 @@ void announce_items(node_impl& node, udp::endpoint const* eps
lazy_entry const* parsed[5];
char error_string[200];
// fprintf(stderr, "msg: %s\n", print_entry(response).c_str());
fprintf(stderr, "msg: %s\n", print_entry(response).c_str());
int ret = verify_message(&response, desc, parsed, 5, error_string, sizeof(error_string));
if (ret)
{
@ -225,8 +222,7 @@ void announce_items(node_impl& node, udp::endpoint const* eps
{
lazy_entry response;
send_dht_msg(node, "get_item", eps[0], &response, "10", 0
, 0, no, 0, items[j].target.to_string(), 0, no
, std::string(&items[j].key[0], 64), ids[0].to_string());
, 0, no, 0, items[j].target.to_string());
key_desc_t desc[] =
{
@ -343,17 +339,18 @@ int test_main()
dht::key_desc_t peer1_desc[] = {
{"y", lazy_entry::string_t, 1, 0},
{"r", lazy_entry::dict_t, 0, key_desc_t::parse_children},
{"token", lazy_entry::string_t, 0, 0},
{"id", lazy_entry::string_t, 20, key_desc_t::last_child},
};
std::string token;
fprintf(stderr, "msg: %s\n", print_entry(response).c_str());
ret = dht::verify_message(&response, peer1_desc, parsed, 3, error_string, sizeof(error_string));
ret = dht::verify_message(&response, peer1_desc, parsed, 4, error_string, sizeof(error_string));
TEST_CHECK(ret);
if (ret)
{
TEST_CHECK(parsed[0]->string_value() == "r");
token = parsed[1]->dict_find_string_value("token");
token = parsed[2]->string_value();
}
else
{
@ -382,33 +379,96 @@ int test_main()
fprintf(stderr, " invalid announce response: %s\n", error_string);
}
// announce from 100 random IPs and make sure scrape works
// 50 downloaders and 50 seeds
for (int i = 0; i < 100; ++i)
{
source = udp::endpoint(rand_v4(), 6000);
send_dht_msg(node, "get_peers", source, &response, "10", "01010101010101010101");
ret = dht::verify_message(&response, peer1_desc, parsed, 4, error_string, sizeof(error_string));
if (ret)
{
TEST_CHECK(parsed[0]->string_value() == "r");
token = parsed[2]->string_value();
}
else
{
fprintf(stderr, " invalid get_peers response: %s\n", error_string);
}
response.clear();
send_dht_msg(node, "announce_peer", source, &response, "10", "01010101010101010101"
, "test", token, 8080, std::string(), 0, std::string(), false, i >= 50);
response.clear();
}
// ====== get_peers ======
send_dht_msg(node, "get_peers", source, &response, "10", "01010101010101010101");
send_dht_msg(node, "get_peers", source, &response, "10", "01010101010101010101"
, 0, std::string(), 0, std::string(), 0, std::string(), true);
dht::key_desc_t peer2_desc[] = {
{"y", lazy_entry::string_t, 1, 0},
{"r", lazy_entry::dict_t, 0, key_desc_t::parse_children},
{"BFpe", lazy_entry::string_t, 256, 0},
{"BFse", lazy_entry::string_t, 256, 0},
{"id", lazy_entry::string_t, 20, key_desc_t::last_child},
};
fprintf(stderr, "msg: %s\n", print_entry(response).c_str());
ret = dht::verify_message(&response, peer2_desc, parsed, 3, error_string, sizeof(error_string));
ret = dht::verify_message(&response, peer2_desc, parsed, 5, error_string, sizeof(error_string));
TEST_CHECK(ret);
if (ret)
{
TEST_CHECK(parsed[0]->string_value() == "r");
TEST_EQUAL(parsed[1]->dict_find_string_value("n"), "test");
bloom_filter<256> downloaders;
bloom_filter<256> seeds;
downloaders.from_string(parsed[2]->string_ptr());
seeds.from_string(parsed[3]->string_ptr());
fprintf(stderr, "seeds: %f\n", seeds.size());
fprintf(stderr, "downloaders: %f\n", downloaders.size());
TEST_CHECK(fabs(seeds.size() - 50.f) <= 2.f);
TEST_CHECK(fabs(downloaders.size() - 50.f) <= 2.f);
}
else
{
fprintf(stderr, " invalid get_peers response: %s\n", error_string);
}
bloom_filter<256> test;
for (int i = 0; i < 256; ++i)
{
char adr[50];
snprintf(adr, 50, "192.0.2.%d", i);
address a = address::from_string(adr);
sha1_hash iphash;
hash_address(a, iphash);
test.set(iphash);
}
for (int i = 0; i < 0x3E8; ++i)
{
char adr[50];
snprintf(adr, 50, "2001:db8::%x", i);
address a = address::from_string(adr);
sha1_hash iphash;
hash_address(a, iphash);
test.set(iphash);
}
fprintf(stderr, "test.size: %f\n", test.size());
TEST_CHECK(fabs(test.size() - 1224.93f) < 0.001);
fprintf(stderr, "%s\n", to_hex(test.to_string()).c_str());
TEST_CHECK(to_hex(test.to_string()) == "f6c3f5eaa07ffd91bde89f777f26fb2bff37bdb8fb2bbaa2fd3ddde7bacfff75ee7ccbaefe5eedb1fbfaff67f6abff5e43ddbca3fd9b9ffdf4ffd3e9dff12d1bdf59db53dbe9fa5b7ff3b8fdfcde1afb8bedd7be2f3ee71ebbbfe93bcdeefe148246c2bc5dbff7e7efdcf24fd8dc7adffd8fffdfddfff7a4bbeedf5cb95ce81fc7fcff1ff4ffffdfe5f7fdcbb7fd79b3fa1fc77bfe07fff905b7b7ffc7fefeffe0b8370bb0cd3f5b7f2bd93feb4386cfdd6f7fd5bfaf2e9ebffffeecd67adbf7c67f17efd5d75eba6ffeba7fff47a91eb1bfbb53e8abfb5762abe8ff237279bfefbfeef5ffc5febfdfe5adffadfee1fb737ffffbfd9f6aeffeee76b6fd8f72ef");
response.clear();
// ====== announce_item ======
/*
udp::endpoint eps[1000];
node_id ids[1000];
@ -420,21 +480,21 @@ int test_main()
announce_item items[] =
{
{ generate_next(), generate_key(), 1 },
{ generate_next(), generate_key(), 2 },
{ generate_next(), generate_key(), 3 },
{ generate_next(), generate_key(), 4 },
{ generate_next(), generate_key(), 5 },
{ generate_next(), generate_key(), 6 },
{ generate_next(), generate_key(), 7 },
{ generate_next(), generate_key(), 8 }
{ generate_next(), 1 },
{ generate_next(), 2 },
{ generate_next(), 3 },
{ generate_next(), 4 },
{ generate_next(), 5 },
{ generate_next(), 6 },
{ generate_next(), 7 },
{ generate_next(), 8 }
};
for (int i = 0; i < sizeof(items)/sizeof(items[0]); ++i)
items[i].gen();
announce_items(node, eps, ids, items, sizeof(items)/sizeof(items[0]));
*/
return 0;
}