From 39264c5ad350f831a7d4cb16a89fcd871bc4f230 Mon Sep 17 00:00:00 2001 From: Arvid Norberg Date: Mon, 9 Sep 2013 07:08:02 +0000 Subject: [PATCH] introduce new, optional, get_peers mechanism that is more privacy preserving (doesn't leak the info-hash to as many DHT nodes) --- examples/client_test.cpp | 4 + include/libtorrent/kademlia/find_data.hpp | 27 ++++- include/libtorrent/kademlia/node_id.hpp | 5 +- .../kademlia/traversal_algorithm.hpp | 2 +- include/libtorrent/session_settings.hpp | 5 + src/kademlia/find_data.cpp | 106 +++++++++++++++++- src/kademlia/node.cpp | 18 ++- src/kademlia/traversal_algorithm.cpp | 5 + test/test_dht.cpp | 1 + 9 files changed, 160 insertions(+), 13 deletions(-) diff --git a/examples/client_test.cpp b/examples/client_test.cpp index 6ee17bb7d..d3115c5a7 100644 --- a/examples/client_test.cpp +++ b/examples/client_test.cpp @@ -1541,6 +1541,10 @@ int main(int argc, char* argv[]) } #ifndef TORRENT_DISABLE_DHT + dht_settings dht; + dht.privacy_lookups = true; + ses.set_dht_settings(dht); + if (start_dht) { settings.use_dht_as_fallback = false; diff --git a/include/libtorrent/kademlia/find_data.hpp b/include/libtorrent/kademlia/find_data.hpp index 3a319f84f..b5c170dd5 100644 --- a/include/libtorrent/kademlia/find_data.hpp +++ b/include/libtorrent/kademlia/find_data.hpp @@ -74,18 +74,18 @@ public: , nodes_callback const& ncallback , bool noseeds); - virtual char const* name() const { return "get_peers"; } + virtual void start(); + + virtual char const* name() const; node_id const target() const { return m_target; } protected: - void done(); + virtual void done(); observer_ptr new_observer(void* ptr, udp::endpoint const& ep, node_id const& id); virtual bool invoke(observer_ptr o); -private: - data_callback m_data_callback; nodes_callback m_nodes_callback; std::map m_write_tokens; @@ -95,6 +95,25 @@ private: bool m_noseeds:1; }; +class obfuscated_get_peers : public find_data +{ +public: + typedef find_data::nodes_callback done_callback; + + obfuscated_get_peers(node_impl& node, node_id target + , data_callback const& dcallback + , nodes_callback const& ncallback + , bool noseeds); + + virtual char const* name() const; + +protected: + + observer_ptr new_observer(void* ptr, udp::endpoint const& ep, node_id const& id); + virtual bool invoke(observer_ptr o); + virtual void done(); +}; + class find_data_observer : public observer { public: diff --git a/include/libtorrent/kademlia/node_id.hpp b/include/libtorrent/kademlia/node_id.hpp index 94939a456..5324211fd 100644 --- a/include/libtorrent/kademlia/node_id.hpp +++ b/include/libtorrent/kademlia/node_id.hpp @@ -55,7 +55,10 @@ node_id TORRENT_EXTRA_EXPORT distance(node_id const& n1, node_id const& n2); bool TORRENT_EXTRA_EXPORT compare_ref(node_id const& n1, node_id const& n2, node_id const& ref); // returns n in: 2^n <= distance(n1, n2) < 2^(n+1) -// usefult for finding out which bucket a node belongs to +// useful for finding out which bucket a node belongs to +// the value that's returned is the number of trailing bits +// after the shared bit prefix of ``n1`` and ``n2``. +// if the first bits are different, that's 160. int TORRENT_EXTRA_EXPORT distance_exp(node_id const& n1, node_id const& n2); node_id TORRENT_EXTRA_EXPORT generate_id(address const& external_ip); diff --git a/include/libtorrent/kademlia/traversal_algorithm.hpp b/include/libtorrent/kademlia/traversal_algorithm.hpp index b1db37531..212570963 100644 --- a/include/libtorrent/kademlia/traversal_algorithm.hpp +++ b/include/libtorrent/kademlia/traversal_algorithm.hpp @@ -71,7 +71,7 @@ struct traversal_algorithm : boost::noncopyable void* allocate_observer(); void free_observer(void* ptr); - virtual char const* name() const { return "traversal_algorithm"; } + virtual char const* name() const; virtual void start(); node_id const& target() const { return m_target; } diff --git a/include/libtorrent/session_settings.hpp b/include/libtorrent/session_settings.hpp index a9ae56bb1..9955b5b24 100644 --- a/include/libtorrent/session_settings.hpp +++ b/include/libtorrent/session_settings.hpp @@ -1405,6 +1405,7 @@ namespace libtorrent , restrict_search_ips(true) , extended_routing_table(true) , aggressive_lookups(true) + , privacy_lookups(false) {} // the maximum number of peers to send in a @@ -1470,6 +1471,10 @@ namespace libtorrent // i.e. every time we get results back with closer nodes, we query them right away. // It lowers the lookup times at the cost of more outstanding queries. bool aggressive_lookups; + + // when set, perform lookups in a way that is slightly more expensive, but which + // minimizes the amount of information leaked about you. + bool privacy_lookups; }; #ifndef TORRENT_DISABLE_ENCRYPTION diff --git a/src/kademlia/find_data.cpp b/src/kademlia/find_data.cpp index 482b353b8..6ff97db02 100644 --- a/src/kademlia/find_data.cpp +++ b/src/kademlia/find_data.cpp @@ -183,7 +183,16 @@ find_data::find_data( , m_got_peers(false) , m_noseeds(noseeds) { - node.m_table.for_each_node(&add_entry_fun, 0, (traversal_algorithm*)this); +} + +void find_data::start() +{ + // if the user didn't add seed-nodes manually, grab a bunch of nodes from the + // routing table + if (m_results.empty()) + m_node.m_table.for_each_node(&add_entry_fun, 0, (traversal_algorithm*)this); + + traversal_algorithm::start(); } observer_ptr find_data::new_observer(void* ptr @@ -196,6 +205,8 @@ observer_ptr find_data::new_observer(void* ptr return o; } +char const* find_data::name() const { return "get_peers"; } + bool find_data::invoke(observer_ptr o) { if (m_done) @@ -206,17 +217,19 @@ bool find_data::invoke(observer_ptr o) entry e; e["y"] = "q"; - e["q"] = "get_peers"; entry& a = e["a"]; + + e["q"] = "get_peers"; a["info_hash"] = m_target.to_string(); if (m_noseeds) a["noseed"] = 1; + return m_node.m_rpc.invoke(e, o->target_ep(), o); } void find_data::got_peers(std::vector const& peers) { if (!peers.empty()) m_got_peers = true; - m_data_callback(peers); + if (m_data_callback) m_data_callback(peers); } void find_data::done() @@ -242,10 +255,95 @@ void find_data::done() results.push_back(std::make_pair(node_entry(o->id(), o->target_ep()), j->second)); --num_results; } - m_nodes_callback(results, m_got_peers); + if (m_nodes_callback) m_nodes_callback(results, m_got_peers); traversal_algorithm::done(); } +obfuscated_get_peers::obfuscated_get_peers( + node_impl& node + , node_id info_hash + , data_callback const& dcallback + , nodes_callback const& ncallback + , bool noseeds) + : find_data(node, info_hash, dcallback, ncallback, noseeds) +{ +} + +char const* obfuscated_get_peers::name() const { return "get_peers [obfuscated]"; } + +observer_ptr obfuscated_get_peers::new_observer(void* ptr + , udp::endpoint const& ep, node_id const& id) +{ + observer_ptr o(new (ptr) find_data_observer(this, ep, id)); +#if defined TORRENT_DEBUG || TORRENT_RELEASE_ASSERTS + o->m_in_constructor = false; +#endif + return o; +} + +bool obfuscated_get_peers::invoke(observer_ptr o) +{ + entry e; + e["y"] = "q"; + e["q"] = "find_node"; + entry& a = e["a"]; + + // This logic will obfuscate the target info-hash + // we're looking up, in order to preserve more privacy + // on the DHT. This is done by only including enough + // bits in the info-hash for the node we're querying to + // give a good answer, but not more. + + node_id id = o->id(); + int shared_prefix = 160 - distance_exp(id, m_target); + + // now, obfuscate the bits past shared_prefix + 5 + node_id obfuscated_target = generate_random_id(); + obfuscated_target >>= shared_prefix + 6; + obfuscated_target^= m_target; + a["target"] = obfuscated_target.to_string(); + + return m_node.m_rpc.invoke(e, o->target_ep(), o); +} + +void obfuscated_get_peers::done() +{ + boost::intrusive_ptr ta(new find_data(m_node, m_target + , m_data_callback + , m_nodes_callback + , m_noseeds)); + + // don't call these when the obfuscated_get_peers + // is done, we're passing them on to be called when + // ta completes. + m_data_callback.clear(); + m_nodes_callback.clear(); + +#ifdef TORRENT_DHT_VERBOSE_LOGGING + TORRENT_LOG(traversal) << " [" << this << "]" + << " obfuscated get_peers phase 1 done, spawning get_peers [" << ta.get() << "]"; +#endif + + int num_added = 0; + for (std::vector::iterator i = m_results.begin() + , end(m_results.end()); i != end && num_added < 10; ++i) + { + observer_ptr o = *i; + + // only add nodes whose node ID we knoe and that + // we know are alive + if (o->flags & observer::flag_no_id) continue; + if ((o->flags & observer::flag_alive) == 0) continue; + + ta->add_entry(o->id(), o->target_ep(), observer::flag_initial); + ++num_added; + } + + ta->start(); + + find_data::done(); +} + } } // namespace libtorrent::dht diff --git a/src/kademlia/node.cpp b/src/kademlia/node.cpp index 47af0cbc3..b9d88ec3a 100644 --- a/src/kademlia/node.cpp +++ b/src/kademlia/node.cpp @@ -332,9 +332,21 @@ void node_impl::announce(sha1_hash const& info_hash, int listen_port, bool seed #endif // search for nodes with ids close to id or with peers // for info-hash id. then send announce_peer to them. - boost::intrusive_ptr ta(new find_data(*this, info_hash, f - , boost::bind(&announce_fun, _1, boost::ref(*this) - , listen_port, info_hash, seed), seed)); + + boost::intrusive_ptr ta; + if (m_settings.privacy_lookups) + { + ta.reset(new obfuscated_get_peers(*this, info_hash, f + , boost::bind(&announce_fun, _1, boost::ref(*this) + , listen_port, info_hash, seed), seed)); + } + else + { + ta.reset(new find_data(*this, info_hash, f + , boost::bind(&announce_fun, _1, boost::ref(*this) + , listen_port, info_hash, seed), seed)); + } + ta->start(); } diff --git a/src/kademlia/traversal_algorithm.cpp b/src/kademlia/traversal_algorithm.cpp index 116e7718a..b95d7914b 100644 --- a/src/kademlia/traversal_algorithm.cpp +++ b/src/kademlia/traversal_algorithm.cpp @@ -191,6 +191,11 @@ void traversal_algorithm::free_observer(void* ptr) m_node.m_rpc.free_observer(ptr); } +char const* traversal_algorithm::name() const +{ + return "traversal_algorithm"; +} + void traversal_algorithm::traverse(node_id const& id, udp::endpoint addr) { #ifdef TORRENT_DHT_VERBOSE_LOGGING diff --git a/test/test_dht.cpp b/test/test_dht.cpp index ff9ec9070..0d2751ed1 100644 --- a/test/test_dht.cpp +++ b/test/test_dht.cpp @@ -322,6 +322,7 @@ struct print_alert : alert_dispatcher } }; +// TODO: 3 test find_data, obfuscated_get_peers and bootstrap int test_main() { dht_settings sett;