update dht_sec specification and the dht code

2012-05-31 02:16:44 +00:00 · 2012-05-31 02:16:44 +00:00 · 49a1398fe5
parent d09e958166
commit 49a1398fe5
4 changed files with 131 additions and 132 deletions
--- a/docs/dht_sec.html
+++ b/docs/dht_sec.html
@ -3,7 +3,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
-<meta name="generator" content="Docutils 0.5: http://docutils.sourceforge.net/" />
+<meta name="generator" content="Docutils 0.8.1: http://docutils.sourceforge.net/" />
 <title>BitTorrent DHT security extension</title>
 <meta name="author" content="Arvid Norberg, arvid&#64;rasterbar.com" />
 <link rel="stylesheet" type="text/css" href="../../css/base.css" />
@ -93,7 +93,7 @@ by observing that a typical routing table typically has about 20 of its
 top routing table buckets full. That means the key space is dense enough
 to contain 8 nodes for every combination of the 20 top bits of node IDs.</p>
 <blockquote>
-<tt class="docutils literal"><span class="pre">2^20</span> <span class="pre">*</span> <span class="pre">8</span> <span class="pre">=</span> <span class="pre">8388608</span></tt></blockquote>
+<tt class="docutils literal">2^20 * 8 = 8388608</tt></blockquote>
 <p>By controlling that many IP addresses, an attacker could snoop any info-hash.
 By controlling 8 times that many IP addresses, an attacker could actually
 take over any info-hash.</p>
@ -109,18 +109,20 @@ of IPs, as well as allowing more than one node ID per external IP, the node
 ID can be restricted at each class level of the IP.</p>
 <p>The expression to calculate a valid ID prefix (from an IPv4 address) is:</p>
 <pre class="literal-block">
-sha1((A * (B * (C * (D * (rand() % 8) % 0x100) % 0x4000) % 0x100000)) % 0x4000000)
+sha1((ip &amp; 0x30f3fff) .. r)
 </pre>
-<p>Where <tt class="docutils literal"><span class="pre">A</span></tt>, <tt class="docutils literal"><span class="pre">B</span></tt>, <tt class="docutils literal"><span class="pre">C</span></tt> and <tt class="docutils literal"><span class="pre">D</span></tt> are the four octets of an IPv4 address.</p>
-<p>The pattern is that the modulus constant is shifted left by 6 for each octet.
-It generalizes to IPv6 by only considering the first 64 bit of the IP (since
-the low 64 bits are controlled by the host) and shifting the modulus by 3 for
-each octet instead.</p>
+<p>And for an IPv6 address (<tt class="docutils literal">ip</tt> is the high 64 bits of the address):</p>
+<pre class="literal-block">
+sha1((ip &amp; 0x103070f1f3f7fff) ..  r)
+</pre>
+<p><tt class="docutils literal">r</tt> is a random number in the range [0, 7]. The resulting integer,
+representing the masked IP address is supposed to be big-endian before
+hashed. The &quot;..&quot; means concatenation.</p>
 <p>The details of implementing this is to evaluate the expression, store the
-result in a big endian 32 bit integer and hash those 4 bytes with SHA-1.
-The first 4 bytes of the node ID used in the DHT MUST match the first 4
+result in a big endian 64 bit integer and hash those 8 bytes with SHA-1.</p>
+<p>The first 4 bytes of the node ID used in the DHT MUST match the first 4
 bytes in the resulting hash. The last byte of the hash MUST match the
-random number used to generate the hash.</p>
+random number (<tt class="docutils literal">r</tt>) used to generate the hash.</p>
 <img alt="ip_id_v4.png" src="ip_id_v4.png" />
 <img alt="ip_id_v6.png" src="ip_id_v6.png" />
 <p>Example code code for calculating a valid node ID:</p>
@ -129,63 +131,45 @@ uint8_t* ip; // our external IPv4 or IPv6 address (network byte order)
 int num_octets; // the number of octets to consider in ip (4 or 8)
 uint8_t node_id[20]; // resulting node ID

-uint32_t rand = rand() &amp; 0xff;
-uint32_t modulus = 0x100;
-uint32_t seed = rand &amp; 0x7;
-int mod_shift = 6 * 4 / num_octets; // 6 or 3, depending on IPv4 and IPv6
-while (num_octets)
-{
-        seed = (uint64_t(seed) * ip[num_octets-1]) &amp; (modulus-1);
-        modulus &lt;&lt;= mod_shift;
-        --num_octets;
-}
+uint8_t v4mask[] = { 0x03, 0x0f, 0x3f, 0xff };
+uint8_t v6mask[] = { 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff };
+uint8_t* mask = num_octets == 4 ? v4_mask : v8_mask;
+
+for (int i = 0; i &lt; num_octets; ++i)
+        ip[i] &amp;= mask[i];

-seed = htonl(seed);
 SHA_CTX ctx;
 SHA1_Init(&amp;ctx);
-SHA1_Update(&amp;ctx, (unsigned char*)&amp;seed, sizeof(seed));
+SHA1_Update(&amp;ctx, (unsigned char*)ip, num_octets);
+uint32_t rand = rand() &amp; 0xff;
+uint8_t r = rand &amp; 0x7;
+SHA1_Update(&amp;ctx, (unsigned char*)&amp;r, 1);
 SHA1_Final(&amp;ctx, node_id);
 for (int i = 4; i &lt; 19; ++i) node_id[i] = rand();
 node_id[19] = rand;
 </pre>
-<p>Example code to verify a node ID:</p>
-<pre class="literal-block">
-uint8_t* ip; // incoming IPv4 or IPv6 address (network byte order)
-int num_octets; // the number of octets to consider in ip (4 or 8)
-uint8_t node_id[20]; // incoming node ID
-
-uint32_t modulus = 0x100;
-uint32_t seed = node_id[19] &amp; 0x7;
-int mod_shift = 6 * 4 / num_octets; // 6 or 3, depending on IPv4 and IPv6
-while (num_octets)
-{
-        seed = (uint64_t(seed) * ip[num_octets-1]) &amp; (modulus-1);
-        modulus &lt;&lt;= mod_shift;
-        --num_octets;
-}
-
-seed = htonl(seed);
-SHA_CTX ctx;
-SHA1_Init(&amp;ctx);
-SHA1_Update(&amp;ctx, (unsigned char*)&amp;seed, sizeof(seed));
-uint8_t digest[20];
-SHA1_Final(&amp;ctx, digest);
-if (memcmp(digest, node_id, 4) != 0)
-        return false; // failed verification
-else
-        return true; // verification passed
-</pre>
 <p>test vectors:</p>
+<pre class="literal-block">
+IP           rand  example node ID
+============ ===== ==========================================
+124.31.75.21   1   <strong>8a84ac4d</strong> 0c5d6a4ec8a88e4c6ab4c28b95eee4 <strong>01</strong>
+21.75.31.124  86   <strong>b25a51b1</strong> 4e7a08645677bbd1cfe7d8f956d532 <strong>56</strong>
+65.23.51.170  22   <strong>dc35968d</strong> bc8f112a3d426c84764f8c2a1150e6 <strong>16</strong>
+84.124.73.14  65   <strong>98f44bb1</strong> 1bb1fe518101ceef99462b947a01ff <strong>41</strong>
+43.213.53.83  90   <strong>5978e1c4</strong> 5b7c4be0237986d5243b87aa6d5130 <strong>5a</strong>
+</pre>
+<p>The bold parts of the node ID are the important parts. The rest are
+random numbers.</p>
 </div>
 <div class="section" id="bootstrapping">
 <h1>bootstrapping</h1>
 <p>In order to set ones initial node ID, the external IP needs to be known. This
-is not a trivial problem. WIth this extension, <em>all</em> DHT requests whose node
+is not a trivial problem. With this extension, <em>all</em> DHT requests whose node
 ID does not match its IP address MUST be serviced and MUST also include one
-extra result value (inside the <tt class="docutils literal"><span class="pre">r</span></tt> dictionary) called <tt class="docutils literal"><span class="pre">ip</span></tt>. The IP field
+extra result value (inside the <tt class="docutils literal">r</tt> dictionary) called <tt class="docutils literal">ip</tt>. The IP field
 contains the raw (big endian) byte representation of the external IP address.
-This is the same byte sequence passed to SHA-1.</p>
-<p>A DHT node which receives an <tt class="docutils literal"><span class="pre">ip</span></tt> result in a request SHOULD consider restarting
+This is the same byte sequence used to verify the node ID.</p>
+<p>A DHT node which receives an <tt class="docutils literal">ip</tt> result in a request SHOULD consider restarting
 its DHT node with a new node ID, taking this IP into account. Since a single node
 can not be trusted, there should be some mechanism of determining whether or
 not the node has a correct understanding of its external IP or not. This could
@ -197,7 +181,7 @@ nodes, from separate searches, tells you your node ID is incorrect.</p>
 <p>Once enforced, write tokens from peers whose node ID does not match its external
 IP should be considered dropped. In other words, a peer that uses a non-matching
 ID MUST never be used to store information on, regardless of which request. In the
-original DHT specification only <tt class="docutils literal"><span class="pre">announce_peer</span></tt> stores data in the network,
+original DHT specification only <tt class="docutils literal">announce_peer</tt> stores data in the network,
 but any future extension which stores data in the network SHOULD use the same
 restriction.</p>
 <p>Any peer on a local network address is exempt from this node ID verification.
--- a/docs/dht_sec.rst
+++ b/docs/dht_sec.rst
@ -65,20 +65,22 @@ ID can be restricted at each class level of the IP.

 The expression to calculate a valid ID prefix (from an IPv4 address) is::

-	sha1((A * (B * (C * (D * (rand() % 8) % 0x100) % 0x4000) % 0x100000)) % 0x4000000)
+	sha1((ip & 0x30f3fff) .. r)

-Where ``A``, ``B``, ``C`` and ``D`` are the four octets of an IPv4 address.
+And for an IPv6 address (``ip`` is the high 64 bits of the address)::

-The pattern is that the modulus constant is shifted left by 6 for each octet.
-It generalizes to IPv6 by only considering the first 64 bit of the IP (since
-the low 64 bits are controlled by the host) and shifting the modulus by 3 for
-each octet instead.
+	sha1((ip & 0x103070f1f3f7fff) ..  r)
+
+``r`` is a random number in the range [0, 7]. The resulting integer,
+representing the masked IP address is supposed to be big-endian before
+hashed. The ".." means concatenation.

 The details of implementing this is to evaluate the expression, store the
-result in a big endian 32 bit integer and hash those 4 bytes with SHA-1.
+result in a big endian 64 bit integer and hash those 8 bytes with SHA-1.
+
 The first 4 bytes of the node ID used in the DHT MUST match the first 4
 bytes in the resulting hash. The last byte of the hash MUST match the
-random number used to generate the hash.
+random number (``r``) used to generate the hash.

 .. image:: ip_id_v4.png
 .. image:: ip_id_v6.png
@ -89,63 +91,47 @@ Example code code for calculating a valid node ID::
 	int num_octets; // the number of octets to consider in ip (4 or 8)
 	uint8_t node_id[20]; // resulting node ID

-	uint32_t rand = rand() & 0xff;
-	uint32_t modulus = 0x100;
-	uint32_t seed = rand & 0x7;
-	int mod_shift = 6 * 4 / num_octets; // 6 or 3, depending on IPv4 and IPv6
-	while (num_octets)
-	{
-		seed = (uint64_t(seed) * ip[num_octets-1]) & (modulus-1);
-		modulus <<= mod_shift;
-		--num_octets;
-	}
+	uint8_t v4mask[] = { 0x03, 0x0f, 0x3f, 0xff };
+	uint8_t v6mask[] = { 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff };
+	uint8_t* mask = num_octets == 4 ? v4_mask : v8_mask;
+
+	for (int i = 0; i < num_octets; ++i)
+		ip[i] &= mask[i];

-	seed = htonl(seed);
 	SHA_CTX ctx;
 	SHA1_Init(&ctx);
-	SHA1_Update(&ctx, (unsigned char*)&seed, sizeof(seed));
+	SHA1_Update(&ctx, (unsigned char*)ip, num_octets);
+	uint32_t rand = rand() & 0xff;
+	uint8_t r = rand & 0x7;
+	SHA1_Update(&ctx, (unsigned char*)&r, 1);
 	SHA1_Final(&ctx, node_id);
 	for (int i = 4; i < 19; ++i) node_id[i] = rand();
 	node_id[19] = rand;

-Example code to verify a node ID::
-
-	uint8_t* ip; // incoming IPv4 or IPv6 address (network byte order)
-	int num_octets; // the number of octets to consider in ip (4 or 8)
-	uint8_t node_id[20]; // incoming node ID
-
-	uint32_t modulus = 0x100;
-	uint32_t seed = node_id[19] & 0x7;
-	int mod_shift = 6 * 4 / num_octets; // 6 or 3, depending on IPv4 and IPv6
-	while (num_octets)
-	{
-		seed = (uint64_t(seed) * ip[num_octets-1]) & (modulus-1);
-		modulus <<= mod_shift;
-		--num_octets;
-	}
-
-	seed = htonl(seed);
-	SHA_CTX ctx;
-	SHA1_Init(&ctx);
-	SHA1_Update(&ctx, (unsigned char*)&seed, sizeof(seed));
-	uint8_t digest[20];
-	SHA1_Final(&ctx, digest);
-	if (memcmp(digest, node_id, 4) != 0)
-		return false; // failed verification
-	else
-		return true; // verification passed
-
 test vectors:

+.. parsed-literal::
+
+	IP           rand  example node ID
+	============ ===== ==========================================
+	124.31.75.21   1   **8a84ac4d** 0c5d6a4ec8a88e4c6ab4c28b95eee4 **01**
+	21.75.31.124  86   **b25a51b1** 4e7a08645677bbd1cfe7d8f956d532 **56**
+	65.23.51.170  22   **dc35968d** bc8f112a3d426c84764f8c2a1150e6 **16**
+	84.124.73.14  65   **98f44bb1** 1bb1fe518101ceef99462b947a01ff **41**
+	43.213.53.83  90   **5978e1c4** 5b7c4be0237986d5243b87aa6d5130 **5a**
+
+The bold parts of the node ID are the important parts. The rest are
+random numbers.
+
 bootstrapping
 -------------

 In order to set ones initial node ID, the external IP needs to be known. This
-is not a trivial problem. WIth this extension, *all* DHT requests whose node
+is not a trivial problem. With this extension, *all* DHT requests whose node
 ID does not match its IP address MUST be serviced and MUST also include one
 extra result value (inside the ``r`` dictionary) called ``ip``. The IP field
 contains the raw (big endian) byte representation of the external IP address.
-This is the same byte sequence passed to SHA-1.
+This is the same byte sequence used to verify the node ID.

 A DHT node which receives an ``ip`` result in a request SHOULD consider restarting
 its DHT node with a new node ID, taking this IP into account. Since a single node
--- a/src/kademlia/node_id.cpp
+++ b/src/kademlia/node_id.cpp
@ -98,48 +98,43 @@ int distance_exp(node_id const& n1, node_id const& n2)

 struct static_ { static_() { std::srand((unsigned int)std::time(0)); } } static__;

-node_id generate_id_impl(address const& ip, boost::uint32_t r)
+node_id generate_id_impl(address const& ip_, boost::uint32_t r)
 {
-	boost::uint32_t seed = r & 0x7;
-	boost::uint32_t modulus = 0x100;
-
-	boost::uint8_t* p = 0;
-	int num_octets = 0;
-	int mod_shift = 0;
+	boost::uint8_t* ip = 0;
 	
+	const static uint8_t v4mask[] = { 0x03, 0x0f, 0x3f, 0xff };
+	const static uint8_t v6mask[] = { 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff };
+	uint8_t const* mask = 0;
+	int num_octets = 0;
+
 	address_v4::bytes_type b4;
 #if TORRENT_USE_IPV6
 	address_v6::bytes_type b6;
-	if (ip.is_v6())
+	if (ip_.is_v6())
 	{
-		b6 = ip.to_v6().to_bytes();
-		p = &b6[0];
+		b6 = ip_.to_v6().to_bytes();
+		ip = &b6[0];
 		num_octets = 8;
-		mod_shift = 3;
+		mask = v6mask;
 	}
 	else
 #endif
 	{
-		b4 = ip.to_v4().to_bytes();
-		p = &b4[0];
+		b4 = ip_.to_v4().to_bytes();
+		ip = &b4[0];
 		num_octets = 4;
-		mod_shift = 6;
+		mask = v4mask;
 	}

-	while (num_octets)
-	{
-		seed *= p[num_octets-1];
-		seed &= (modulus-1);
-		modulus <<= mod_shift;
-		--num_octets;
-	}
-
-	seed = htonl(seed);
-
-	node_id id = hasher((const char*)&seed, sizeof(seed)).final();
+	for (int i = 0; i < num_octets; ++i)
+		ip[i] &= mask[i];

+	hasher h;
+	h.update((char*)ip, num_octets);
+	uint8_t rand = r & 0x7;
+	h.update((char*)&r, 1);
+	node_id id = h.final();
 	for (int i = 4; i < 19; ++i) id[i] = random();
-
 	id[19] = r;

 	return id;
--- a/test/test_primitives.cpp
+++ b/test/test_primitives.cpp
@ -68,6 +68,11 @@ using namespace boost::tuples;

 namespace libtorrent {
 	TORRENT_EXPORT std::string sanitize_path(std::string const& p);
+	namespace dht
+	{
+		TORRENT_EXPORT libtorrent::dht::node_id generate_id_impl(
+			address const& ip_, boost::uint32_t r);
+	}
 }

 sha1_hash to_hash(char const* s)
@ -1714,10 +1719,39 @@ int test_main()
 	}
 	TEST_CHECK(hits > int(temp.size()) / 2);

+	using namespace libtorrent::dht;
+
+	char const* ips[] = {
+		"124.31.75.21",
+		"21.75.31.124",
+		"65.23.51.170",
+		"84.124.73.14",
+		"43.213.53.83",
+	};
+
+	int rs[] = { 1,86,22,65,90 };
+
+	boost::uint8_t prefixes[][4] =
+	{
+		{0x8a, 0x84, 0xac, 0x4d},
+		{0xb2, 0x5a, 0x51, 0xb1 },
+		{0xdc, 0x35, 0x96, 0x8d },
+		{0x98, 0xf4, 0x4b, 0xb1 },
+		{0x59, 0x78, 0xe1, 0xc4 },
+	};
+
+	for (int i = 0; i < 5; ++i)
+	{
+		address a = address_v4::from_string(ips[i]);
+		node_id id = generate_id_impl(a, rs[i]);
+		for (int j = 0; j < 4; ++j)
+			TEST_CHECK(id[j] == prefixes[i][j]);
+		TEST_CHECK(id[19] == rs[i]);
+		fprintf(stderr, "IP address: %s r: %d node ID: %s\n", ips[i]
+			, rs[i], to_hex(id.to_string()).c_str());
+	}
 #endif

-
-
 	// test peer_id/sha1_hash type

 	sha1_hash h1(0);