forked from premiere/premiere-libtorrent
update dht_sec document
This commit is contained in:
parent
6dbc7091d3
commit
76941a609f
Binary file not shown.
After Width: | Height: | Size: 7.6 KiB |
|
@ -59,9 +59,10 @@
|
|||
<li><a class="reference internal" href="#considerations" id="id4">considerations</a></li>
|
||||
<li><a class="reference internal" href="#node-id-restriction" id="id5">Node ID restriction</a></li>
|
||||
<li><a class="reference internal" href="#bootstrapping" id="id6">bootstrapping</a></li>
|
||||
<li><a class="reference internal" href="#enforcement" id="id7">enforcement</a></li>
|
||||
<li><a class="reference internal" href="#backwards-compatibility-and-transition" id="id8">backwards compatibility and transition</a></li>
|
||||
<li><a class="reference internal" href="#forward-compatibility" id="id9">forward compatibility</a></li>
|
||||
<li><a class="reference internal" href="#rationale" id="id7">rationale</a></li>
|
||||
<li><a class="reference internal" href="#enforcement" id="id8">enforcement</a></li>
|
||||
<li><a class="reference internal" href="#backwards-compatibility-and-transition" id="id9">backwards compatibility and transition</a></li>
|
||||
<li><a class="reference internal" href="#forward-compatibility" id="id10">forward compatibility</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="section" id="id1">
|
||||
|
@ -112,20 +113,20 @@ distribution of the IDs remoain uniform. This is why CRC32 was chosen
|
|||
as the hash function. See <a class="reference external" href="http://blog.libtorrent.org/2012/12/dht-security/">comparisons of hash functions</a>.</p>
|
||||
<p>The expression to calculate a valid ID prefix (from an IPv4 address) is:</p>
|
||||
<pre class="literal-block">
|
||||
crc32((ip & 0x01071f7f) .. r)
|
||||
crc32((ip & 0x030f3fff) .. r)
|
||||
</pre>
|
||||
<p>And for an IPv6 address (<tt class="docutils literal">ip</tt> is the high 64 bits of the address):</p>
|
||||
<pre class="literal-block">
|
||||
crc32((ip & 0x000103070f1f3f7f) .. r)
|
||||
crc32((ip & 0x0103070f1f3f7fff) .. r)
|
||||
</pre>
|
||||
<p><tt class="docutils literal">r</tt> is a random number in the range [0, 7]. The resulting integer,
|
||||
representing the masked IP address is supposed to be big-endian before
|
||||
hashed. The ".." means concatenation.</p>
|
||||
<p>The details of implementing this is to evaluate the expression, store the
|
||||
result in a big endian 64 bit integer and hash those 8 bytes with CRC32.</p>
|
||||
<p>The first 4 bytes of the node ID used in the DHT MUST match the first 4
|
||||
bytes in the resulting hash. The last byte of the hash MUST match the
|
||||
random number (<tt class="docutils literal">r</tt>) used to generate the hash.</p>
|
||||
<p>The first (most significant) 21 bits of the node ID used in the DHT MUST
|
||||
match the first 21 bits of the resulting hash. The last byte of the hash MUST
|
||||
match the random number (<tt class="docutils literal">r</tt>) used to generate the hash.</p>
|
||||
<img alt="ip_id_v4.png" src="ip_id_v4.png" />
|
||||
<img alt="ip_id_v6.png" src="ip_id_v6.png" />
|
||||
<p>Example code code for calculating a valid node ID:</p>
|
||||
|
@ -134,39 +135,40 @@ uint8_t* ip; // our external IPv4 or IPv6 address (network byte order)
|
|||
int num_octets; // the number of octets to consider in ip (4 or 8)
|
||||
uint8_t node_id[20]; // resulting node ID
|
||||
|
||||
uint8_t v4mask[] = { 0x01, 0x07, 0x1f, 0x7f };
|
||||
uint8_t v6mask[] = { 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f };
|
||||
uint8_t* mask = num_octets == 4 ? v4_mask : v8_mask;
|
||||
uint8_t v4_mask[] = { 0x03, 0x0f, 0x3f, 0xff };
|
||||
uint8_t v6_mask[] = { 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff };
|
||||
uint8_t* mask = num_octets == 4 ? v4_mask : v6_mask;
|
||||
|
||||
for (int i = 0; i < num_octets; ++i)
|
||||
ip[i] &= mask[i];
|
||||
|
||||
uint32_t rand = rand() & 0xff;
|
||||
uint32_t rand = std::rand() & 0xff;
|
||||
uint8_t r = rand & 0x7;
|
||||
|
||||
uint32_t crc = crc32(0, NULL, 0);
|
||||
uint32_t crc = crc32(0, nullptr, 0);
|
||||
crc = crc32(crc, ip, num_octets);
|
||||
crc = crc32(crc, &r, 1);
|
||||
|
||||
// only take the top 21 bits from crc
|
||||
node_id[0] = (crc >> 24) & 0xff;
|
||||
node_id[1] = (crc >> 16) & 0xff;
|
||||
node_id[2] = (crc >> 8) & 0xff;
|
||||
node_id[3] = crc & 0xff;
|
||||
for (int i = 4; i < 19; ++i) node_id[i] = std::rand();
|
||||
node_id[2] = ((crc >> 8) & 0xf8) | (std::rand() & 0x7);
|
||||
for (int i = 3; i < 19; ++i) node_id[i] = std::rand();
|
||||
node_id[19] = rand;
|
||||
</pre>
|
||||
<p>test vectors:</p>
|
||||
<pre class="literal-block">
|
||||
IP rand example node ID
|
||||
============ ===== ==========================================
|
||||
124.31.75.21 1 <strong>1712f6c7</strong> 0c5d6a4ec8a88e4c6ab4c28b95eee4 <strong>01</strong>
|
||||
21.75.31.124 86 <strong>946406c1</strong> 4e7a08645677bbd1cfe7d8f956d532 <strong>56</strong>
|
||||
65.23.51.170 22 <strong>fefd9220</strong> bc8f112a3d426c84764f8c2a1150e6 <strong>16</strong>
|
||||
84.124.73.14 65 <strong>af1546dd</strong> 1bb1fe518101ceef99462b947a01ff <strong>41</strong>
|
||||
43.213.53.83 90 <strong>a9e920bf</strong> 5b7c4be0237986d5243b87aa6d5130 <strong>5a</strong>
|
||||
124.31.75.21 1 <strong>d2a6df</strong> f10c5d6a4ec8a88e4c6ab4c28b95eee4 <strong>01</strong>
|
||||
21.75.31.124 86 <strong>48cb19</strong> c14e7a08645677bbd1cfe7d8f956d532 <strong>56</strong>
|
||||
65.23.51.170 22 <strong>fd334a</strong> 20bc8f112a3d426c84764f8c2a1150e6 <strong>16</strong>
|
||||
84.124.73.14 65 <strong>6aa169</strong> dd1bb1fe518101ceef99462b947a01ff <strong>41</strong>
|
||||
43.213.53.83 90 <strong>eb6434</strong> bf5b7c4be0237986d5243b87aa6d5130 <strong>5a</strong>
|
||||
</pre>
|
||||
<p>The bold parts of the node ID are the important parts. The rest are
|
||||
random numbers.</p>
|
||||
random numbers. The last bold number of each row has only its most significant
|
||||
bit pulled from the CRC function. The lower 3 bits are random.</p>
|
||||
</div>
|
||||
<div class="section" id="bootstrapping">
|
||||
<h1>bootstrapping</h1>
|
||||
|
@ -187,6 +189,44 @@ not the node has a correct understanding of its external IP or not. This could
|
|||
be done by voting, or only restart the DHT once at least a certain number of
|
||||
nodes, from separate searches, tells you your node ID is incorrect.</p>
|
||||
</div>
|
||||
<div class="section" id="rationale">
|
||||
<h1>rationale</h1>
|
||||
<p>The choice of using CRC32 instead of a more traditional cryptographic hash
|
||||
function is justified primarily of these reasons:</p>
|
||||
<ol class="arabic simple">
|
||||
<li>it is a fast function</li>
|
||||
<li>produces well distributed results</li>
|
||||
<li>there is no need for the hash function to be one-way (the input set is
|
||||
so small that any hash function could be reversed).</li>
|
||||
</ol>
|
||||
<p>There are primarily two tests run on SHA-1 and CRC32 to establish the
|
||||
distribution of results. The first one is the number of bits in the output
|
||||
set that contain every possible combination of bits. The CRC function
|
||||
has a longer such prefix in its output than SHA-1. This means nodes will still
|
||||
have well uniformly distributed IDs, even when IP addresses in use are not
|
||||
uniformly distributed.</p>
|
||||
<p>The following graph illustrate a few different hash functions with regard
|
||||
to this property.</p>
|
||||
<img alt="complete_bit_prefixes.png" src="complete_bit_prefixes.png" />
|
||||
<p>This test takes into account IP addresses that are not globally routable, i.e.
|
||||
reserved for local networks, multicast and other things. It also takes into
|
||||
account that some /8 blocks are not in use by end-users and exremely unlikely
|
||||
to ever run a DHT node. This makes the results likely to be very similar to
|
||||
what we would see in the wild.</p>
|
||||
<p>These results indicate that CRC32 provides the best uniformity in the results
|
||||
in terms of bit prefixes where all possibilities are represented, and that
|
||||
no more than 21 bits should be used from the result. If more than 21 bits
|
||||
were to be used, there would be certain node IDs that would be impossible to
|
||||
have, which would make routing sub-optimal.</p>
|
||||
<p>The second test is more of a sanity test for the uniform distribution property.
|
||||
The target space (32 bit interger) is divided up into 1000 buckets. Every valid
|
||||
IP and <tt class="docutils literal">r</tt> input is run through the algorithm and the result is put in the
|
||||
bucket it falls in. The expectation is that each bucket has roughly an equal
|
||||
number of results falling into it. The following graph shows the resulting
|
||||
histogram, comparing SHA-1 and CRC32.</p>
|
||||
<img alt="hash_distribution.png" src="hash_distribution.png" />
|
||||
<p>The source code for these tests can be found <a class="reference external" href="https://github.com/arvidn/hash_complete_prefix">here</a>.</p>
|
||||
</div>
|
||||
<div class="section" id="enforcement">
|
||||
<h1>enforcement</h1>
|
||||
<p>Once enforced, write tokens from peers whose node ID does not match its external
|
||||
|
|
|
@ -71,11 +71,11 @@ __ http://blog.libtorrent.org/2012/12/dht-security/
|
|||
|
||||
The expression to calculate a valid ID prefix (from an IPv4 address) is::
|
||||
|
||||
crc32((ip & 0x01071f7f) .. r)
|
||||
crc32((ip & 0x030f3fff) .. r)
|
||||
|
||||
And for an IPv6 address (``ip`` is the high 64 bits of the address)::
|
||||
|
||||
crc32((ip & 0x000103070f1f3f7f) .. r)
|
||||
crc32((ip & 0x0103070f1f3f7fff) .. r)
|
||||
|
||||
``r`` is a random number in the range [0, 7]. The resulting integer,
|
||||
representing the masked IP address is supposed to be big-endian before
|
||||
|
@ -84,9 +84,9 @@ hashed. The ".." means concatenation.
|
|||
The details of implementing this is to evaluate the expression, store the
|
||||
result in a big endian 64 bit integer and hash those 8 bytes with CRC32.
|
||||
|
||||
The first 4 bytes of the node ID used in the DHT MUST match the first 4
|
||||
bytes in the resulting hash. The last byte of the hash MUST match the
|
||||
random number (``r``) used to generate the hash.
|
||||
The first (most significant) 21 bits of the node ID used in the DHT MUST
|
||||
match the first 21 bits of the resulting hash. The last byte of the hash MUST
|
||||
match the random number (``r``) used to generate the hash.
|
||||
|
||||
.. image:: ip_id_v4.png
|
||||
.. image:: ip_id_v6.png
|
||||
|
@ -97,25 +97,25 @@ Example code code for calculating a valid node ID::
|
|||
int num_octets; // the number of octets to consider in ip (4 or 8)
|
||||
uint8_t node_id[20]; // resulting node ID
|
||||
|
||||
uint8_t v4mask[] = { 0x01, 0x07, 0x1f, 0x7f };
|
||||
uint8_t v6mask[] = { 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f };
|
||||
uint8_t* mask = num_octets == 4 ? v4_mask : v8_mask;
|
||||
uint8_t v4_mask[] = { 0x03, 0x0f, 0x3f, 0xff };
|
||||
uint8_t v6_mask[] = { 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff };
|
||||
uint8_t* mask = num_octets == 4 ? v4_mask : v6_mask;
|
||||
|
||||
for (int i = 0; i < num_octets; ++i)
|
||||
ip[i] &= mask[i];
|
||||
|
||||
uint32_t rand = rand() & 0xff;
|
||||
uint32_t rand = std::rand() & 0xff;
|
||||
uint8_t r = rand & 0x7;
|
||||
|
||||
uint32_t crc = crc32(0, NULL, 0);
|
||||
uint32_t crc = crc32(0, nullptr, 0);
|
||||
crc = crc32(crc, ip, num_octets);
|
||||
crc = crc32(crc, &r, 1);
|
||||
|
||||
// only take the top 21 bits from crc
|
||||
node_id[0] = (crc >> 24) & 0xff;
|
||||
node_id[1] = (crc >> 16) & 0xff;
|
||||
node_id[2] = (crc >> 8) & 0xff;
|
||||
node_id[3] = crc & 0xff;
|
||||
for (int i = 4; i < 19; ++i) node_id[i] = std::rand();
|
||||
node_id[2] = ((crc >> 8) & 0xf8) | (std::rand() & 0x7);
|
||||
for (int i = 3; i < 19; ++i) node_id[i] = std::rand();
|
||||
node_id[19] = rand;
|
||||
|
||||
test vectors:
|
||||
|
@ -124,14 +124,15 @@ test vectors:
|
|||
|
||||
IP rand example node ID
|
||||
============ ===== ==========================================
|
||||
124.31.75.21 1 **1712f6c7** 0c5d6a4ec8a88e4c6ab4c28b95eee4 **01**
|
||||
21.75.31.124 86 **946406c1** 4e7a08645677bbd1cfe7d8f956d532 **56**
|
||||
65.23.51.170 22 **fefd9220** bc8f112a3d426c84764f8c2a1150e6 **16**
|
||||
84.124.73.14 65 **af1546dd** 1bb1fe518101ceef99462b947a01ff **41**
|
||||
43.213.53.83 90 **a9e920bf** 5b7c4be0237986d5243b87aa6d5130 **5a**
|
||||
124.31.75.21 1 **d2a6df** f10c5d6a4ec8a88e4c6ab4c28b95eee4 **01**
|
||||
21.75.31.124 86 **48cb19** c14e7a08645677bbd1cfe7d8f956d532 **56**
|
||||
65.23.51.170 22 **fd334a** 20bc8f112a3d426c84764f8c2a1150e6 **16**
|
||||
84.124.73.14 65 **6aa169** dd1bb1fe518101ceef99462b947a01ff **41**
|
||||
43.213.53.83 90 **eb6434** bf5b7c4be0237986d5243b87aa6d5130 **5a**
|
||||
|
||||
The bold parts of the node ID are the important parts. The rest are
|
||||
random numbers.
|
||||
random numbers. The last bold number of each row has only its most significant
|
||||
bit pulled from the CRC function. The lower 3 bits are random.
|
||||
|
||||
bootstrapping
|
||||
-------------
|
||||
|
@ -156,6 +157,54 @@ not the node has a correct understanding of its external IP or not. This could
|
|||
be done by voting, or only restart the DHT once at least a certain number of
|
||||
nodes, from separate searches, tells you your node ID is incorrect.
|
||||
|
||||
rationale
|
||||
---------
|
||||
|
||||
The choice of using CRC32 instead of a more traditional cryptographic hash
|
||||
function is justified primarily of these reasons:
|
||||
|
||||
1. it is a fast function
|
||||
2. produces well distributed results
|
||||
3. there is no need for the hash function to be one-way (the input set is
|
||||
so small that any hash function could be reversed).
|
||||
|
||||
There are primarily two tests run on SHA-1 and CRC32 to establish the
|
||||
distribution of results. The first one is the number of bits in the output
|
||||
set that contain every possible combination of bits. The CRC function
|
||||
has a longer such prefix in its output than SHA-1. This means nodes will still
|
||||
have well uniformly distributed IDs, even when IP addresses in use are not
|
||||
uniformly distributed.
|
||||
|
||||
The following graph illustrate a few different hash functions with regard
|
||||
to this property.
|
||||
|
||||
.. image:: complete_bit_prefixes.png
|
||||
|
||||
This test takes into account IP addresses that are not globally routable, i.e.
|
||||
reserved for local networks, multicast and other things. It also takes into
|
||||
account that some /8 blocks are not in use by end-users and exremely unlikely
|
||||
to ever run a DHT node. This makes the results likely to be very similar to
|
||||
what we would see in the wild.
|
||||
|
||||
These results indicate that CRC32 provides the best uniformity in the results
|
||||
in terms of bit prefixes where all possibilities are represented, and that
|
||||
no more than 21 bits should be used from the result. If more than 21 bits
|
||||
were to be used, there would be certain node IDs that would be impossible to
|
||||
have, which would make routing sub-optimal.
|
||||
|
||||
The second test is more of a sanity test for the uniform distribution property.
|
||||
The target space (32 bit interger) is divided up into 1000 buckets. Every valid
|
||||
IP and ``r`` input is run through the algorithm and the result is put in the
|
||||
bucket it falls in. The expectation is that each bucket has roughly an equal
|
||||
number of results falling into it. The following graph shows the resulting
|
||||
histogram, comparing SHA-1 and CRC32.
|
||||
|
||||
.. image:: hash_distribution.png
|
||||
|
||||
The source code for these tests can be found here_.
|
||||
|
||||
.. _here: https://github.com/arvidn/hash_complete_prefix
|
||||
|
||||
enforcement
|
||||
-----------
|
||||
|
||||
|
|
Binary file not shown.
After Width: | Height: | Size: 8.8 KiB |
Binary file not shown.
Before Width: | Height: | Size: 5.7 KiB After Width: | Height: | Size: 5.7 KiB |
Binary file not shown.
Before Width: | Height: | Size: 6.2 KiB After Width: | Height: | Size: 6.3 KiB |
56
docs/ips.py
56
docs/ips.py
|
@ -1,56 +0,0 @@
|
|||
#/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
def num_ids(bits, total_bits):
|
||||
|
||||
if total_bits == 32:
|
||||
bit_dec = 2
|
||||
else:
|
||||
bit_dec = 1
|
||||
|
||||
num_used = 7;
|
||||
ret = 3
|
||||
|
||||
while bits > 0:
|
||||
ret += min(num_used, bits)
|
||||
num_used -= bit_dec
|
||||
if num_used < 0: num_used = 0
|
||||
bits -= 8
|
||||
|
||||
return 1 << ret
|
||||
|
||||
f = open('ip_id_v4.dat', 'w+')
|
||||
for i in range(0, 33):
|
||||
print >>f, '%d\t%d\t%d' % (i, num_ids(i, 32), 1 << i)
|
||||
f.close()
|
||||
|
||||
f = open('ip_id_v6.dat', 'w+')
|
||||
for i in range(0, 65):
|
||||
print >>f, '%d\t%d\t%d' % (i, num_ids(i, 64), 1 << i)
|
||||
f.close()
|
||||
|
||||
f = open('ip_id.gnuplot', 'w+')
|
||||
|
||||
f.write('''
|
||||
set term png size 600,300
|
||||
set output "ip_id_v4.png"
|
||||
set logscale y
|
||||
set title "Number of possible node IDs"
|
||||
set ylabel "possible node IDs"
|
||||
set xlabel "bits controlled in IPv4"
|
||||
set xtics 4
|
||||
set grid
|
||||
plot "ip_id_v4.dat" using 1:2 title "octet-wise modulus" with lines, \
|
||||
"ip_id_v4.dat" using 1:3 title "hash of IP" with lines
|
||||
|
||||
set output "ip_id_v6.png"
|
||||
set title "Number of possible node IDs"
|
||||
set xlabel "bits controlled in IPv6"
|
||||
plot "ip_id_v6.dat" using 1:2 title "octet-wise modulus" with lines, \
|
||||
"ip_id_v6.dat" using 1:3 title "hash of IP" with lines
|
||||
''')
|
||||
f.close()
|
||||
os.system('gnuplot ip_id.gnuplot')
|
||||
|
Loading…
Reference in New Issue