forked from premiere/premiere-libtorrent
update dht_sec specification and the dht code
This commit is contained in:
parent
d09e958166
commit
49a1398fe5
|
@ -3,7 +3,7 @@
|
|||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
<meta name="generator" content="Docutils 0.5: http://docutils.sourceforge.net/" />
|
||||
<meta name="generator" content="Docutils 0.8.1: http://docutils.sourceforge.net/" />
|
||||
<title>BitTorrent DHT security extension</title>
|
||||
<meta name="author" content="Arvid Norberg, arvid@rasterbar.com" />
|
||||
<link rel="stylesheet" type="text/css" href="../../css/base.css" />
|
||||
|
@ -93,7 +93,7 @@ by observing that a typical routing table typically has about 20 of its
|
|||
top routing table buckets full. That means the key space is dense enough
|
||||
to contain 8 nodes for every combination of the 20 top bits of node IDs.</p>
|
||||
<blockquote>
|
||||
<tt class="docutils literal"><span class="pre">2^20</span> <span class="pre">*</span> <span class="pre">8</span> <span class="pre">=</span> <span class="pre">8388608</span></tt></blockquote>
|
||||
<tt class="docutils literal">2^20 * 8 = 8388608</tt></blockquote>
|
||||
<p>By controlling that many IP addresses, an attacker could snoop any info-hash.
|
||||
By controlling 8 times that many IP addresses, an attacker could actually
|
||||
take over any info-hash.</p>
|
||||
|
@ -109,18 +109,20 @@ of IPs, as well as allowing more than one node ID per external IP, the node
|
|||
ID can be restricted at each class level of the IP.</p>
|
||||
<p>The expression to calculate a valid ID prefix (from an IPv4 address) is:</p>
|
||||
<pre class="literal-block">
|
||||
sha1((A * (B * (C * (D * (rand() % 8) % 0x100) % 0x4000) % 0x100000)) % 0x4000000)
|
||||
sha1((ip & 0x30f3fff) .. r)
|
||||
</pre>
|
||||
<p>Where <tt class="docutils literal"><span class="pre">A</span></tt>, <tt class="docutils literal"><span class="pre">B</span></tt>, <tt class="docutils literal"><span class="pre">C</span></tt> and <tt class="docutils literal"><span class="pre">D</span></tt> are the four octets of an IPv4 address.</p>
|
||||
<p>The pattern is that the modulus constant is shifted left by 6 for each octet.
|
||||
It generalizes to IPv6 by only considering the first 64 bit of the IP (since
|
||||
the low 64 bits are controlled by the host) and shifting the modulus by 3 for
|
||||
each octet instead.</p>
|
||||
<p>And for an IPv6 address (<tt class="docutils literal">ip</tt> is the high 64 bits of the address):</p>
|
||||
<pre class="literal-block">
|
||||
sha1((ip & 0x103070f1f3f7fff) .. r)
|
||||
</pre>
|
||||
<p><tt class="docutils literal">r</tt> is a random number in the range [0, 7]. The resulting integer,
|
||||
representing the masked IP address is supposed to be big-endian before
|
||||
hashed. The ".." means concatenation.</p>
|
||||
<p>The details of implementing this is to evaluate the expression, store the
|
||||
result in a big endian 32 bit integer and hash those 4 bytes with SHA-1.
|
||||
The first 4 bytes of the node ID used in the DHT MUST match the first 4
|
||||
result in a big endian 64 bit integer and hash those 8 bytes with SHA-1.</p>
|
||||
<p>The first 4 bytes of the node ID used in the DHT MUST match the first 4
|
||||
bytes in the resulting hash. The last byte of the hash MUST match the
|
||||
random number used to generate the hash.</p>
|
||||
random number (<tt class="docutils literal">r</tt>) used to generate the hash.</p>
|
||||
<img alt="ip_id_v4.png" src="ip_id_v4.png" />
|
||||
<img alt="ip_id_v6.png" src="ip_id_v6.png" />
|
||||
<p>Example code code for calculating a valid node ID:</p>
|
||||
|
@ -129,63 +131,45 @@ uint8_t* ip; // our external IPv4 or IPv6 address (network byte order)
|
|||
int num_octets; // the number of octets to consider in ip (4 or 8)
|
||||
uint8_t node_id[20]; // resulting node ID
|
||||
|
||||
uint32_t rand = rand() & 0xff;
|
||||
uint32_t modulus = 0x100;
|
||||
uint32_t seed = rand & 0x7;
|
||||
int mod_shift = 6 * 4 / num_octets; // 6 or 3, depending on IPv4 and IPv6
|
||||
while (num_octets)
|
||||
{
|
||||
seed = (uint64_t(seed) * ip[num_octets-1]) & (modulus-1);
|
||||
modulus <<= mod_shift;
|
||||
--num_octets;
|
||||
}
|
||||
uint8_t v4mask[] = { 0x03, 0x0f, 0x3f, 0xff };
|
||||
uint8_t v6mask[] = { 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff };
|
||||
uint8_t* mask = num_octets == 4 ? v4_mask : v8_mask;
|
||||
|
||||
for (int i = 0; i < num_octets; ++i)
|
||||
ip[i] &= mask[i];
|
||||
|
||||
seed = htonl(seed);
|
||||
SHA_CTX ctx;
|
||||
SHA1_Init(&ctx);
|
||||
SHA1_Update(&ctx, (unsigned char*)&seed, sizeof(seed));
|
||||
SHA1_Update(&ctx, (unsigned char*)ip, num_octets);
|
||||
uint32_t rand = rand() & 0xff;
|
||||
uint8_t r = rand & 0x7;
|
||||
SHA1_Update(&ctx, (unsigned char*)&r, 1);
|
||||
SHA1_Final(&ctx, node_id);
|
||||
for (int i = 4; i < 19; ++i) node_id[i] = rand();
|
||||
node_id[19] = rand;
|
||||
</pre>
|
||||
<p>Example code to verify a node ID:</p>
|
||||
<pre class="literal-block">
|
||||
uint8_t* ip; // incoming IPv4 or IPv6 address (network byte order)
|
||||
int num_octets; // the number of octets to consider in ip (4 or 8)
|
||||
uint8_t node_id[20]; // incoming node ID
|
||||
|
||||
uint32_t modulus = 0x100;
|
||||
uint32_t seed = node_id[19] & 0x7;
|
||||
int mod_shift = 6 * 4 / num_octets; // 6 or 3, depending on IPv4 and IPv6
|
||||
while (num_octets)
|
||||
{
|
||||
seed = (uint64_t(seed) * ip[num_octets-1]) & (modulus-1);
|
||||
modulus <<= mod_shift;
|
||||
--num_octets;
|
||||
}
|
||||
|
||||
seed = htonl(seed);
|
||||
SHA_CTX ctx;
|
||||
SHA1_Init(&ctx);
|
||||
SHA1_Update(&ctx, (unsigned char*)&seed, sizeof(seed));
|
||||
uint8_t digest[20];
|
||||
SHA1_Final(&ctx, digest);
|
||||
if (memcmp(digest, node_id, 4) != 0)
|
||||
return false; // failed verification
|
||||
else
|
||||
return true; // verification passed
|
||||
</pre>
|
||||
<p>test vectors:</p>
|
||||
<pre class="literal-block">
|
||||
IP rand example node ID
|
||||
============ ===== ==========================================
|
||||
124.31.75.21 1 <strong>8a84ac4d</strong> 0c5d6a4ec8a88e4c6ab4c28b95eee4 <strong>01</strong>
|
||||
21.75.31.124 86 <strong>b25a51b1</strong> 4e7a08645677bbd1cfe7d8f956d532 <strong>56</strong>
|
||||
65.23.51.170 22 <strong>dc35968d</strong> bc8f112a3d426c84764f8c2a1150e6 <strong>16</strong>
|
||||
84.124.73.14 65 <strong>98f44bb1</strong> 1bb1fe518101ceef99462b947a01ff <strong>41</strong>
|
||||
43.213.53.83 90 <strong>5978e1c4</strong> 5b7c4be0237986d5243b87aa6d5130 <strong>5a</strong>
|
||||
</pre>
|
||||
<p>The bold parts of the node ID are the important parts. The rest are
|
||||
random numbers.</p>
|
||||
</div>
|
||||
<div class="section" id="bootstrapping">
|
||||
<h1>bootstrapping</h1>
|
||||
<p>In order to set ones initial node ID, the external IP needs to be known. This
|
||||
is not a trivial problem. WIth this extension, <em>all</em> DHT requests whose node
|
||||
is not a trivial problem. With this extension, <em>all</em> DHT requests whose node
|
||||
ID does not match its IP address MUST be serviced and MUST also include one
|
||||
extra result value (inside the <tt class="docutils literal"><span class="pre">r</span></tt> dictionary) called <tt class="docutils literal"><span class="pre">ip</span></tt>. The IP field
|
||||
extra result value (inside the <tt class="docutils literal">r</tt> dictionary) called <tt class="docutils literal">ip</tt>. The IP field
|
||||
contains the raw (big endian) byte representation of the external IP address.
|
||||
This is the same byte sequence passed to SHA-1.</p>
|
||||
<p>A DHT node which receives an <tt class="docutils literal"><span class="pre">ip</span></tt> result in a request SHOULD consider restarting
|
||||
This is the same byte sequence used to verify the node ID.</p>
|
||||
<p>A DHT node which receives an <tt class="docutils literal">ip</tt> result in a request SHOULD consider restarting
|
||||
its DHT node with a new node ID, taking this IP into account. Since a single node
|
||||
can not be trusted, there should be some mechanism of determining whether or
|
||||
not the node has a correct understanding of its external IP or not. This could
|
||||
|
@ -197,7 +181,7 @@ nodes, from separate searches, tells you your node ID is incorrect.</p>
|
|||
<p>Once enforced, write tokens from peers whose node ID does not match its external
|
||||
IP should be considered dropped. In other words, a peer that uses a non-matching
|
||||
ID MUST never be used to store information on, regardless of which request. In the
|
||||
original DHT specification only <tt class="docutils literal"><span class="pre">announce_peer</span></tt> stores data in the network,
|
||||
original DHT specification only <tt class="docutils literal">announce_peer</tt> stores data in the network,
|
||||
but any future extension which stores data in the network SHOULD use the same
|
||||
restriction.</p>
|
||||
<p>Any peer on a local network address is exempt from this node ID verification.
|
||||
|
|
|
@ -65,20 +65,22 @@ ID can be restricted at each class level of the IP.
|
|||
|
||||
The expression to calculate a valid ID prefix (from an IPv4 address) is::
|
||||
|
||||
sha1((A * (B * (C * (D * (rand() % 8) % 0x100) % 0x4000) % 0x100000)) % 0x4000000)
|
||||
sha1((ip & 0x30f3fff) .. r)
|
||||
|
||||
Where ``A``, ``B``, ``C`` and ``D`` are the four octets of an IPv4 address.
|
||||
And for an IPv6 address (``ip`` is the high 64 bits of the address)::
|
||||
|
||||
The pattern is that the modulus constant is shifted left by 6 for each octet.
|
||||
It generalizes to IPv6 by only considering the first 64 bit of the IP (since
|
||||
the low 64 bits are controlled by the host) and shifting the modulus by 3 for
|
||||
each octet instead.
|
||||
sha1((ip & 0x103070f1f3f7fff) .. r)
|
||||
|
||||
``r`` is a random number in the range [0, 7]. The resulting integer,
|
||||
representing the masked IP address is supposed to be big-endian before
|
||||
hashed. The ".." means concatenation.
|
||||
|
||||
The details of implementing this is to evaluate the expression, store the
|
||||
result in a big endian 32 bit integer and hash those 4 bytes with SHA-1.
|
||||
result in a big endian 64 bit integer and hash those 8 bytes with SHA-1.
|
||||
|
||||
The first 4 bytes of the node ID used in the DHT MUST match the first 4
|
||||
bytes in the resulting hash. The last byte of the hash MUST match the
|
||||
random number used to generate the hash.
|
||||
random number (``r``) used to generate the hash.
|
||||
|
||||
.. image:: ip_id_v4.png
|
||||
.. image:: ip_id_v6.png
|
||||
|
@ -89,63 +91,47 @@ Example code code for calculating a valid node ID::
|
|||
int num_octets; // the number of octets to consider in ip (4 or 8)
|
||||
uint8_t node_id[20]; // resulting node ID
|
||||
|
||||
uint32_t rand = rand() & 0xff;
|
||||
uint32_t modulus = 0x100;
|
||||
uint32_t seed = rand & 0x7;
|
||||
int mod_shift = 6 * 4 / num_octets; // 6 or 3, depending on IPv4 and IPv6
|
||||
while (num_octets)
|
||||
{
|
||||
seed = (uint64_t(seed) * ip[num_octets-1]) & (modulus-1);
|
||||
modulus <<= mod_shift;
|
||||
--num_octets;
|
||||
}
|
||||
uint8_t v4mask[] = { 0x03, 0x0f, 0x3f, 0xff };
|
||||
uint8_t v6mask[] = { 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff };
|
||||
uint8_t* mask = num_octets == 4 ? v4_mask : v8_mask;
|
||||
|
||||
for (int i = 0; i < num_octets; ++i)
|
||||
ip[i] &= mask[i];
|
||||
|
||||
seed = htonl(seed);
|
||||
SHA_CTX ctx;
|
||||
SHA1_Init(&ctx);
|
||||
SHA1_Update(&ctx, (unsigned char*)&seed, sizeof(seed));
|
||||
SHA1_Update(&ctx, (unsigned char*)ip, num_octets);
|
||||
uint32_t rand = rand() & 0xff;
|
||||
uint8_t r = rand & 0x7;
|
||||
SHA1_Update(&ctx, (unsigned char*)&r, 1);
|
||||
SHA1_Final(&ctx, node_id);
|
||||
for (int i = 4; i < 19; ++i) node_id[i] = rand();
|
||||
node_id[19] = rand;
|
||||
|
||||
Example code to verify a node ID::
|
||||
|
||||
uint8_t* ip; // incoming IPv4 or IPv6 address (network byte order)
|
||||
int num_octets; // the number of octets to consider in ip (4 or 8)
|
||||
uint8_t node_id[20]; // incoming node ID
|
||||
|
||||
uint32_t modulus = 0x100;
|
||||
uint32_t seed = node_id[19] & 0x7;
|
||||
int mod_shift = 6 * 4 / num_octets; // 6 or 3, depending on IPv4 and IPv6
|
||||
while (num_octets)
|
||||
{
|
||||
seed = (uint64_t(seed) * ip[num_octets-1]) & (modulus-1);
|
||||
modulus <<= mod_shift;
|
||||
--num_octets;
|
||||
}
|
||||
|
||||
seed = htonl(seed);
|
||||
SHA_CTX ctx;
|
||||
SHA1_Init(&ctx);
|
||||
SHA1_Update(&ctx, (unsigned char*)&seed, sizeof(seed));
|
||||
uint8_t digest[20];
|
||||
SHA1_Final(&ctx, digest);
|
||||
if (memcmp(digest, node_id, 4) != 0)
|
||||
return false; // failed verification
|
||||
else
|
||||
return true; // verification passed
|
||||
|
||||
test vectors:
|
||||
|
||||
.. parsed-literal::
|
||||
|
||||
IP rand example node ID
|
||||
============ ===== ==========================================
|
||||
124.31.75.21 1 **8a84ac4d** 0c5d6a4ec8a88e4c6ab4c28b95eee4 **01**
|
||||
21.75.31.124 86 **b25a51b1** 4e7a08645677bbd1cfe7d8f956d532 **56**
|
||||
65.23.51.170 22 **dc35968d** bc8f112a3d426c84764f8c2a1150e6 **16**
|
||||
84.124.73.14 65 **98f44bb1** 1bb1fe518101ceef99462b947a01ff **41**
|
||||
43.213.53.83 90 **5978e1c4** 5b7c4be0237986d5243b87aa6d5130 **5a**
|
||||
|
||||
The bold parts of the node ID are the important parts. The rest are
|
||||
random numbers.
|
||||
|
||||
bootstrapping
|
||||
-------------
|
||||
|
||||
In order to set ones initial node ID, the external IP needs to be known. This
|
||||
is not a trivial problem. WIth this extension, *all* DHT requests whose node
|
||||
is not a trivial problem. With this extension, *all* DHT requests whose node
|
||||
ID does not match its IP address MUST be serviced and MUST also include one
|
||||
extra result value (inside the ``r`` dictionary) called ``ip``. The IP field
|
||||
contains the raw (big endian) byte representation of the external IP address.
|
||||
This is the same byte sequence passed to SHA-1.
|
||||
This is the same byte sequence used to verify the node ID.
|
||||
|
||||
A DHT node which receives an ``ip`` result in a request SHOULD consider restarting
|
||||
its DHT node with a new node ID, taking this IP into account. Since a single node
|
||||
|
|
|
@ -98,48 +98,43 @@ int distance_exp(node_id const& n1, node_id const& n2)
|
|||
|
||||
struct static_ { static_() { std::srand((unsigned int)std::time(0)); } } static__;
|
||||
|
||||
node_id generate_id_impl(address const& ip, boost::uint32_t r)
|
||||
node_id generate_id_impl(address const& ip_, boost::uint32_t r)
|
||||
{
|
||||
boost::uint32_t seed = r & 0x7;
|
||||
boost::uint32_t modulus = 0x100;
|
||||
|
||||
boost::uint8_t* p = 0;
|
||||
int num_octets = 0;
|
||||
int mod_shift = 0;
|
||||
boost::uint8_t* ip = 0;
|
||||
|
||||
const static uint8_t v4mask[] = { 0x03, 0x0f, 0x3f, 0xff };
|
||||
const static uint8_t v6mask[] = { 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff };
|
||||
uint8_t const* mask = 0;
|
||||
int num_octets = 0;
|
||||
|
||||
address_v4::bytes_type b4;
|
||||
#if TORRENT_USE_IPV6
|
||||
address_v6::bytes_type b6;
|
||||
if (ip.is_v6())
|
||||
if (ip_.is_v6())
|
||||
{
|
||||
b6 = ip.to_v6().to_bytes();
|
||||
p = &b6[0];
|
||||
b6 = ip_.to_v6().to_bytes();
|
||||
ip = &b6[0];
|
||||
num_octets = 8;
|
||||
mod_shift = 3;
|
||||
mask = v6mask;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
b4 = ip.to_v4().to_bytes();
|
||||
p = &b4[0];
|
||||
b4 = ip_.to_v4().to_bytes();
|
||||
ip = &b4[0];
|
||||
num_octets = 4;
|
||||
mod_shift = 6;
|
||||
mask = v4mask;
|
||||
}
|
||||
|
||||
while (num_octets)
|
||||
{
|
||||
seed *= p[num_octets-1];
|
||||
seed &= (modulus-1);
|
||||
modulus <<= mod_shift;
|
||||
--num_octets;
|
||||
}
|
||||
|
||||
seed = htonl(seed);
|
||||
|
||||
node_id id = hasher((const char*)&seed, sizeof(seed)).final();
|
||||
for (int i = 0; i < num_octets; ++i)
|
||||
ip[i] &= mask[i];
|
||||
|
||||
hasher h;
|
||||
h.update((char*)ip, num_octets);
|
||||
uint8_t rand = r & 0x7;
|
||||
h.update((char*)&r, 1);
|
||||
node_id id = h.final();
|
||||
for (int i = 4; i < 19; ++i) id[i] = random();
|
||||
|
||||
id[19] = r;
|
||||
|
||||
return id;
|
||||
|
|
|
@ -68,6 +68,11 @@ using namespace boost::tuples;
|
|||
|
||||
namespace libtorrent {
|
||||
TORRENT_EXPORT std::string sanitize_path(std::string const& p);
|
||||
namespace dht
|
||||
{
|
||||
TORRENT_EXPORT libtorrent::dht::node_id generate_id_impl(
|
||||
address const& ip_, boost::uint32_t r);
|
||||
}
|
||||
}
|
||||
|
||||
sha1_hash to_hash(char const* s)
|
||||
|
@ -1714,10 +1719,39 @@ int test_main()
|
|||
}
|
||||
TEST_CHECK(hits > int(temp.size()) / 2);
|
||||
|
||||
using namespace libtorrent::dht;
|
||||
|
||||
char const* ips[] = {
|
||||
"124.31.75.21",
|
||||
"21.75.31.124",
|
||||
"65.23.51.170",
|
||||
"84.124.73.14",
|
||||
"43.213.53.83",
|
||||
};
|
||||
|
||||
int rs[] = { 1,86,22,65,90 };
|
||||
|
||||
boost::uint8_t prefixes[][4] =
|
||||
{
|
||||
{0x8a, 0x84, 0xac, 0x4d},
|
||||
{0xb2, 0x5a, 0x51, 0xb1 },
|
||||
{0xdc, 0x35, 0x96, 0x8d },
|
||||
{0x98, 0xf4, 0x4b, 0xb1 },
|
||||
{0x59, 0x78, 0xe1, 0xc4 },
|
||||
};
|
||||
|
||||
for (int i = 0; i < 5; ++i)
|
||||
{
|
||||
address a = address_v4::from_string(ips[i]);
|
||||
node_id id = generate_id_impl(a, rs[i]);
|
||||
for (int j = 0; j < 4; ++j)
|
||||
TEST_CHECK(id[j] == prefixes[i][j]);
|
||||
TEST_CHECK(id[19] == rs[i]);
|
||||
fprintf(stderr, "IP address: %s r: %d node ID: %s\n", ips[i]
|
||||
, rs[i], to_hex(id.to_string()).c_str());
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
// test peer_id/sha1_hash type
|
||||
|
||||
sha1_hash h1(0);
|
||||
|
|
Loading…
Reference in New Issue