update dht_sec specification and the dht code

This commit is contained in:
Arvid Norberg 2012-05-31 02:16:44 +00:00
parent d09e958166
commit 49a1398fe5
4 changed files with 131 additions and 132 deletions

View File

@ -3,7 +3,7 @@
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="generator" content="Docutils 0.5: http://docutils.sourceforge.net/" />
<meta name="generator" content="Docutils 0.8.1: http://docutils.sourceforge.net/" />
<title>BitTorrent DHT security extension</title>
<meta name="author" content="Arvid Norberg, arvid&#64;rasterbar.com" />
<link rel="stylesheet" type="text/css" href="../../css/base.css" />
@ -93,7 +93,7 @@ by observing that a typical routing table typically has about 20 of its
top routing table buckets full. That means the key space is dense enough
to contain 8 nodes for every combination of the 20 top bits of node IDs.</p>
<blockquote>
<tt class="docutils literal"><span class="pre">2^20</span> <span class="pre">*</span> <span class="pre">8</span> <span class="pre">=</span> <span class="pre">8388608</span></tt></blockquote>
<tt class="docutils literal">2^20 * 8 = 8388608</tt></blockquote>
<p>By controlling that many IP addresses, an attacker could snoop any info-hash.
By controlling 8 times that many IP addresses, an attacker could actually
take over any info-hash.</p>
@ -109,18 +109,20 @@ of IPs, as well as allowing more than one node ID per external IP, the node
ID can be restricted at each class level of the IP.</p>
<p>The expression to calculate a valid ID prefix (from an IPv4 address) is:</p>
<pre class="literal-block">
sha1((A * (B * (C * (D * (rand() % 8) % 0x100) % 0x4000) % 0x100000)) % 0x4000000)
sha1((ip &amp; 0x30f3fff) .. r)
</pre>
<p>Where <tt class="docutils literal"><span class="pre">A</span></tt>, <tt class="docutils literal"><span class="pre">B</span></tt>, <tt class="docutils literal"><span class="pre">C</span></tt> and <tt class="docutils literal"><span class="pre">D</span></tt> are the four octets of an IPv4 address.</p>
<p>The pattern is that the modulus constant is shifted left by 6 for each octet.
It generalizes to IPv6 by only considering the first 64 bit of the IP (since
the low 64 bits are controlled by the host) and shifting the modulus by 3 for
each octet instead.</p>
<p>And for an IPv6 address (<tt class="docutils literal">ip</tt> is the high 64 bits of the address):</p>
<pre class="literal-block">
sha1((ip &amp; 0x103070f1f3f7fff) .. r)
</pre>
<p><tt class="docutils literal">r</tt> is a random number in the range [0, 7]. The resulting integer,
representing the masked IP address is supposed to be big-endian before
hashed. The &quot;..&quot; means concatenation.</p>
<p>The details of implementing this is to evaluate the expression, store the
result in a big endian 32 bit integer and hash those 4 bytes with SHA-1.
The first 4 bytes of the node ID used in the DHT MUST match the first 4
result in a big endian 64 bit integer and hash those 8 bytes with SHA-1.</p>
<p>The first 4 bytes of the node ID used in the DHT MUST match the first 4
bytes in the resulting hash. The last byte of the hash MUST match the
random number used to generate the hash.</p>
random number (<tt class="docutils literal">r</tt>) used to generate the hash.</p>
<img alt="ip_id_v4.png" src="ip_id_v4.png" />
<img alt="ip_id_v6.png" src="ip_id_v6.png" />
<p>Example code code for calculating a valid node ID:</p>
@ -129,63 +131,45 @@ uint8_t* ip; // our external IPv4 or IPv6 address (network byte order)
int num_octets; // the number of octets to consider in ip (4 or 8)
uint8_t node_id[20]; // resulting node ID
uint32_t rand = rand() &amp; 0xff;
uint32_t modulus = 0x100;
uint32_t seed = rand &amp; 0x7;
int mod_shift = 6 * 4 / num_octets; // 6 or 3, depending on IPv4 and IPv6
while (num_octets)
{
seed = (uint64_t(seed) * ip[num_octets-1]) &amp; (modulus-1);
modulus &lt;&lt;= mod_shift;
--num_octets;
}
uint8_t v4mask[] = { 0x03, 0x0f, 0x3f, 0xff };
uint8_t v6mask[] = { 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff };
uint8_t* mask = num_octets == 4 ? v4_mask : v8_mask;
for (int i = 0; i &lt; num_octets; ++i)
ip[i] &amp;= mask[i];
seed = htonl(seed);
SHA_CTX ctx;
SHA1_Init(&amp;ctx);
SHA1_Update(&amp;ctx, (unsigned char*)&amp;seed, sizeof(seed));
SHA1_Update(&amp;ctx, (unsigned char*)ip, num_octets);
uint32_t rand = rand() &amp; 0xff;
uint8_t r = rand &amp; 0x7;
SHA1_Update(&amp;ctx, (unsigned char*)&amp;r, 1);
SHA1_Final(&amp;ctx, node_id);
for (int i = 4; i &lt; 19; ++i) node_id[i] = rand();
node_id[19] = rand;
</pre>
<p>Example code to verify a node ID:</p>
<pre class="literal-block">
uint8_t* ip; // incoming IPv4 or IPv6 address (network byte order)
int num_octets; // the number of octets to consider in ip (4 or 8)
uint8_t node_id[20]; // incoming node ID
uint32_t modulus = 0x100;
uint32_t seed = node_id[19] &amp; 0x7;
int mod_shift = 6 * 4 / num_octets; // 6 or 3, depending on IPv4 and IPv6
while (num_octets)
{
seed = (uint64_t(seed) * ip[num_octets-1]) &amp; (modulus-1);
modulus &lt;&lt;= mod_shift;
--num_octets;
}
seed = htonl(seed);
SHA_CTX ctx;
SHA1_Init(&amp;ctx);
SHA1_Update(&amp;ctx, (unsigned char*)&amp;seed, sizeof(seed));
uint8_t digest[20];
SHA1_Final(&amp;ctx, digest);
if (memcmp(digest, node_id, 4) != 0)
return false; // failed verification
else
return true; // verification passed
</pre>
<p>test vectors:</p>
<pre class="literal-block">
IP rand example node ID
============ ===== ==========================================
124.31.75.21 1 <strong>8a84ac4d</strong> 0c5d6a4ec8a88e4c6ab4c28b95eee4 <strong>01</strong>
21.75.31.124 86 <strong>b25a51b1</strong> 4e7a08645677bbd1cfe7d8f956d532 <strong>56</strong>
65.23.51.170 22 <strong>dc35968d</strong> bc8f112a3d426c84764f8c2a1150e6 <strong>16</strong>
84.124.73.14 65 <strong>98f44bb1</strong> 1bb1fe518101ceef99462b947a01ff <strong>41</strong>
43.213.53.83 90 <strong>5978e1c4</strong> 5b7c4be0237986d5243b87aa6d5130 <strong>5a</strong>
</pre>
<p>The bold parts of the node ID are the important parts. The rest are
random numbers.</p>
</div>
<div class="section" id="bootstrapping">
<h1>bootstrapping</h1>
<p>In order to set ones initial node ID, the external IP needs to be known. This
is not a trivial problem. WIth this extension, <em>all</em> DHT requests whose node
is not a trivial problem. With this extension, <em>all</em> DHT requests whose node
ID does not match its IP address MUST be serviced and MUST also include one
extra result value (inside the <tt class="docutils literal"><span class="pre">r</span></tt> dictionary) called <tt class="docutils literal"><span class="pre">ip</span></tt>. The IP field
extra result value (inside the <tt class="docutils literal">r</tt> dictionary) called <tt class="docutils literal">ip</tt>. The IP field
contains the raw (big endian) byte representation of the external IP address.
This is the same byte sequence passed to SHA-1.</p>
<p>A DHT node which receives an <tt class="docutils literal"><span class="pre">ip</span></tt> result in a request SHOULD consider restarting
This is the same byte sequence used to verify the node ID.</p>
<p>A DHT node which receives an <tt class="docutils literal">ip</tt> result in a request SHOULD consider restarting
its DHT node with a new node ID, taking this IP into account. Since a single node
can not be trusted, there should be some mechanism of determining whether or
not the node has a correct understanding of its external IP or not. This could
@ -197,7 +181,7 @@ nodes, from separate searches, tells you your node ID is incorrect.</p>
<p>Once enforced, write tokens from peers whose node ID does not match its external
IP should be considered dropped. In other words, a peer that uses a non-matching
ID MUST never be used to store information on, regardless of which request. In the
original DHT specification only <tt class="docutils literal"><span class="pre">announce_peer</span></tt> stores data in the network,
original DHT specification only <tt class="docutils literal">announce_peer</tt> stores data in the network,
but any future extension which stores data in the network SHOULD use the same
restriction.</p>
<p>Any peer on a local network address is exempt from this node ID verification.

View File

@ -65,20 +65,22 @@ ID can be restricted at each class level of the IP.
The expression to calculate a valid ID prefix (from an IPv4 address) is::
sha1((A * (B * (C * (D * (rand() % 8) % 0x100) % 0x4000) % 0x100000)) % 0x4000000)
sha1((ip & 0x30f3fff) .. r)
Where ``A``, ``B``, ``C`` and ``D`` are the four octets of an IPv4 address.
And for an IPv6 address (``ip`` is the high 64 bits of the address)::
The pattern is that the modulus constant is shifted left by 6 for each octet.
It generalizes to IPv6 by only considering the first 64 bit of the IP (since
the low 64 bits are controlled by the host) and shifting the modulus by 3 for
each octet instead.
sha1((ip & 0x103070f1f3f7fff) .. r)
``r`` is a random number in the range [0, 7]. The resulting integer,
representing the masked IP address is supposed to be big-endian before
hashed. The ".." means concatenation.
The details of implementing this is to evaluate the expression, store the
result in a big endian 32 bit integer and hash those 4 bytes with SHA-1.
result in a big endian 64 bit integer and hash those 8 bytes with SHA-1.
The first 4 bytes of the node ID used in the DHT MUST match the first 4
bytes in the resulting hash. The last byte of the hash MUST match the
random number used to generate the hash.
random number (``r``) used to generate the hash.
.. image:: ip_id_v4.png
.. image:: ip_id_v6.png
@ -89,63 +91,47 @@ Example code code for calculating a valid node ID::
int num_octets; // the number of octets to consider in ip (4 or 8)
uint8_t node_id[20]; // resulting node ID
uint32_t rand = rand() & 0xff;
uint32_t modulus = 0x100;
uint32_t seed = rand & 0x7;
int mod_shift = 6 * 4 / num_octets; // 6 or 3, depending on IPv4 and IPv6
while (num_octets)
{
seed = (uint64_t(seed) * ip[num_octets-1]) & (modulus-1);
modulus <<= mod_shift;
--num_octets;
}
uint8_t v4mask[] = { 0x03, 0x0f, 0x3f, 0xff };
uint8_t v6mask[] = { 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff };
uint8_t* mask = num_octets == 4 ? v4_mask : v8_mask;
for (int i = 0; i < num_octets; ++i)
ip[i] &= mask[i];
seed = htonl(seed);
SHA_CTX ctx;
SHA1_Init(&ctx);
SHA1_Update(&ctx, (unsigned char*)&seed, sizeof(seed));
SHA1_Update(&ctx, (unsigned char*)ip, num_octets);
uint32_t rand = rand() & 0xff;
uint8_t r = rand & 0x7;
SHA1_Update(&ctx, (unsigned char*)&r, 1);
SHA1_Final(&ctx, node_id);
for (int i = 4; i < 19; ++i) node_id[i] = rand();
node_id[19] = rand;
Example code to verify a node ID::
uint8_t* ip; // incoming IPv4 or IPv6 address (network byte order)
int num_octets; // the number of octets to consider in ip (4 or 8)
uint8_t node_id[20]; // incoming node ID
uint32_t modulus = 0x100;
uint32_t seed = node_id[19] & 0x7;
int mod_shift = 6 * 4 / num_octets; // 6 or 3, depending on IPv4 and IPv6
while (num_octets)
{
seed = (uint64_t(seed) * ip[num_octets-1]) & (modulus-1);
modulus <<= mod_shift;
--num_octets;
}
seed = htonl(seed);
SHA_CTX ctx;
SHA1_Init(&ctx);
SHA1_Update(&ctx, (unsigned char*)&seed, sizeof(seed));
uint8_t digest[20];
SHA1_Final(&ctx, digest);
if (memcmp(digest, node_id, 4) != 0)
return false; // failed verification
else
return true; // verification passed
test vectors:
.. parsed-literal::
IP rand example node ID
============ ===== ==========================================
124.31.75.21 1 **8a84ac4d** 0c5d6a4ec8a88e4c6ab4c28b95eee4 **01**
21.75.31.124 86 **b25a51b1** 4e7a08645677bbd1cfe7d8f956d532 **56**
65.23.51.170 22 **dc35968d** bc8f112a3d426c84764f8c2a1150e6 **16**
84.124.73.14 65 **98f44bb1** 1bb1fe518101ceef99462b947a01ff **41**
43.213.53.83 90 **5978e1c4** 5b7c4be0237986d5243b87aa6d5130 **5a**
The bold parts of the node ID are the important parts. The rest are
random numbers.
bootstrapping
-------------
In order to set ones initial node ID, the external IP needs to be known. This
is not a trivial problem. WIth this extension, *all* DHT requests whose node
is not a trivial problem. With this extension, *all* DHT requests whose node
ID does not match its IP address MUST be serviced and MUST also include one
extra result value (inside the ``r`` dictionary) called ``ip``. The IP field
contains the raw (big endian) byte representation of the external IP address.
This is the same byte sequence passed to SHA-1.
This is the same byte sequence used to verify the node ID.
A DHT node which receives an ``ip`` result in a request SHOULD consider restarting
its DHT node with a new node ID, taking this IP into account. Since a single node

View File

@ -98,48 +98,43 @@ int distance_exp(node_id const& n1, node_id const& n2)
struct static_ { static_() { std::srand((unsigned int)std::time(0)); } } static__;
node_id generate_id_impl(address const& ip, boost::uint32_t r)
node_id generate_id_impl(address const& ip_, boost::uint32_t r)
{
boost::uint32_t seed = r & 0x7;
boost::uint32_t modulus = 0x100;
boost::uint8_t* p = 0;
int num_octets = 0;
int mod_shift = 0;
boost::uint8_t* ip = 0;
const static uint8_t v4mask[] = { 0x03, 0x0f, 0x3f, 0xff };
const static uint8_t v6mask[] = { 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff };
uint8_t const* mask = 0;
int num_octets = 0;
address_v4::bytes_type b4;
#if TORRENT_USE_IPV6
address_v6::bytes_type b6;
if (ip.is_v6())
if (ip_.is_v6())
{
b6 = ip.to_v6().to_bytes();
p = &b6[0];
b6 = ip_.to_v6().to_bytes();
ip = &b6[0];
num_octets = 8;
mod_shift = 3;
mask = v6mask;
}
else
#endif
{
b4 = ip.to_v4().to_bytes();
p = &b4[0];
b4 = ip_.to_v4().to_bytes();
ip = &b4[0];
num_octets = 4;
mod_shift = 6;
mask = v4mask;
}
while (num_octets)
{
seed *= p[num_octets-1];
seed &= (modulus-1);
modulus <<= mod_shift;
--num_octets;
}
seed = htonl(seed);
node_id id = hasher((const char*)&seed, sizeof(seed)).final();
for (int i = 0; i < num_octets; ++i)
ip[i] &= mask[i];
hasher h;
h.update((char*)ip, num_octets);
uint8_t rand = r & 0x7;
h.update((char*)&r, 1);
node_id id = h.final();
for (int i = 4; i < 19; ++i) id[i] = random();
id[19] = r;
return id;

View File

@ -68,6 +68,11 @@ using namespace boost::tuples;
namespace libtorrent {
TORRENT_EXPORT std::string sanitize_path(std::string const& p);
namespace dht
{
TORRENT_EXPORT libtorrent::dht::node_id generate_id_impl(
address const& ip_, boost::uint32_t r);
}
}
sha1_hash to_hash(char const* s)
@ -1714,10 +1719,39 @@ int test_main()
}
TEST_CHECK(hits > int(temp.size()) / 2);
using namespace libtorrent::dht;
char const* ips[] = {
"124.31.75.21",
"21.75.31.124",
"65.23.51.170",
"84.124.73.14",
"43.213.53.83",
};
int rs[] = { 1,86,22,65,90 };
boost::uint8_t prefixes[][4] =
{
{0x8a, 0x84, 0xac, 0x4d},
{0xb2, 0x5a, 0x51, 0xb1 },
{0xdc, 0x35, 0x96, 0x8d },
{0x98, 0xf4, 0x4b, 0xb1 },
{0x59, 0x78, 0xe1, 0xc4 },
};
for (int i = 0; i < 5; ++i)
{
address a = address_v4::from_string(ips[i]);
node_id id = generate_id_impl(a, rs[i]);
for (int j = 0; j < 4; ++j)
TEST_CHECK(id[j] == prefixes[i][j]);
TEST_CHECK(id[19] == rs[i]);
fprintf(stderr, "IP address: %s r: %d node ID: %s\n", ips[i]
, rs[i], to_hex(id.to_string()).c_str());
}
#endif
// test peer_id/sha1_hash type
sha1_hash h1(0);