From 49a1398fe5fadef36fded1368d2894dd560f74cd Mon Sep 17 00:00:00 2001 From: Arvid Norberg Date: Thu, 31 May 2012 02:16:44 +0000 Subject: [PATCH] update dht_sec specification and the dht code --- docs/dht_sec.html | 94 +++++++++++++++++----------------------- docs/dht_sec.rst | 84 +++++++++++++++-------------------- src/kademlia/node_id.cpp | 47 +++++++++----------- test/test_primitives.cpp | 38 +++++++++++++++- 4 files changed, 131 insertions(+), 132 deletions(-) diff --git a/docs/dht_sec.html b/docs/dht_sec.html index c5cc3118f..caa761505 100644 --- a/docs/dht_sec.html +++ b/docs/dht_sec.html @@ -3,7 +3,7 @@ - + BitTorrent DHT security extension @@ -93,7 +93,7 @@ by observing that a typical routing table typically has about 20 of its top routing table buckets full. That means the key space is dense enough to contain 8 nodes for every combination of the 20 top bits of node IDs.

-2^20 * 8 = 8388608
+2^20 * 8 = 8388608

By controlling that many IP addresses, an attacker could snoop any info-hash. By controlling 8 times that many IP addresses, an attacker could actually take over any info-hash.

@@ -109,18 +109,20 @@ of IPs, as well as allowing more than one node ID per external IP, the node ID can be restricted at each class level of the IP.

The expression to calculate a valid ID prefix (from an IPv4 address) is:

-sha1((A * (B * (C * (D * (rand() % 8) % 0x100) % 0x4000) % 0x100000)) % 0x4000000)
+sha1((ip & 0x30f3fff) .. r)
 
-

Where A, B, C and D are the four octets of an IPv4 address.

-

The pattern is that the modulus constant is shifted left by 6 for each octet. -It generalizes to IPv6 by only considering the first 64 bit of the IP (since -the low 64 bits are controlled by the host) and shifting the modulus by 3 for -each octet instead.

+

And for an IPv6 address (ip is the high 64 bits of the address):

+
+sha1((ip & 0x103070f1f3f7fff) ..  r)
+
+

r is a random number in the range [0, 7]. The resulting integer, +representing the masked IP address is supposed to be big-endian before +hashed. The ".." means concatenation.

The details of implementing this is to evaluate the expression, store the -result in a big endian 32 bit integer and hash those 4 bytes with SHA-1. -The first 4 bytes of the node ID used in the DHT MUST match the first 4 +result in a big endian 64 bit integer and hash those 8 bytes with SHA-1.

+

The first 4 bytes of the node ID used in the DHT MUST match the first 4 bytes in the resulting hash. The last byte of the hash MUST match the -random number used to generate the hash.

+random number (r) used to generate the hash.

ip_id_v4.png ip_id_v6.png

Example code code for calculating a valid node ID:

@@ -129,63 +131,45 @@ uint8_t* ip; // our external IPv4 or IPv6 address (network byte order) int num_octets; // the number of octets to consider in ip (4 or 8) uint8_t node_id[20]; // resulting node ID -uint32_t rand = rand() & 0xff; -uint32_t modulus = 0x100; -uint32_t seed = rand & 0x7; -int mod_shift = 6 * 4 / num_octets; // 6 or 3, depending on IPv4 and IPv6 -while (num_octets) -{ - seed = (uint64_t(seed) * ip[num_octets-1]) & (modulus-1); - modulus <<= mod_shift; - --num_octets; -} +uint8_t v4mask[] = { 0x03, 0x0f, 0x3f, 0xff }; +uint8_t v6mask[] = { 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff }; +uint8_t* mask = num_octets == 4 ? v4_mask : v8_mask; + +for (int i = 0; i < num_octets; ++i) + ip[i] &= mask[i]; -seed = htonl(seed); SHA_CTX ctx; SHA1_Init(&ctx); -SHA1_Update(&ctx, (unsigned char*)&seed, sizeof(seed)); +SHA1_Update(&ctx, (unsigned char*)ip, num_octets); +uint32_t rand = rand() & 0xff; +uint8_t r = rand & 0x7; +SHA1_Update(&ctx, (unsigned char*)&r, 1); SHA1_Final(&ctx, node_id); for (int i = 4; i < 19; ++i) node_id[i] = rand(); node_id[19] = rand; -

Example code to verify a node ID:

-
-uint8_t* ip; // incoming IPv4 or IPv6 address (network byte order)
-int num_octets; // the number of octets to consider in ip (4 or 8)
-uint8_t node_id[20]; // incoming node ID
-
-uint32_t modulus = 0x100;
-uint32_t seed = node_id[19] & 0x7;
-int mod_shift = 6 * 4 / num_octets; // 6 or 3, depending on IPv4 and IPv6
-while (num_octets)
-{
-        seed = (uint64_t(seed) * ip[num_octets-1]) & (modulus-1);
-        modulus <<= mod_shift;
-        --num_octets;
-}
-
-seed = htonl(seed);
-SHA_CTX ctx;
-SHA1_Init(&ctx);
-SHA1_Update(&ctx, (unsigned char*)&seed, sizeof(seed));
-uint8_t digest[20];
-SHA1_Final(&ctx, digest);
-if (memcmp(digest, node_id, 4) != 0)
-        return false; // failed verification
-else
-        return true; // verification passed
-

test vectors:

+
+IP           rand  example node ID
+============ ===== ==========================================
+124.31.75.21   1   8a84ac4d 0c5d6a4ec8a88e4c6ab4c28b95eee4 01
+21.75.31.124  86   b25a51b1 4e7a08645677bbd1cfe7d8f956d532 56
+65.23.51.170  22   dc35968d bc8f112a3d426c84764f8c2a1150e6 16
+84.124.73.14  65   98f44bb1 1bb1fe518101ceef99462b947a01ff 41
+43.213.53.83  90   5978e1c4 5b7c4be0237986d5243b87aa6d5130 5a
+
+

The bold parts of the node ID are the important parts. The rest are +random numbers.

bootstrapping

In order to set ones initial node ID, the external IP needs to be known. This -is not a trivial problem. WIth this extension, all DHT requests whose node +is not a trivial problem. With this extension, all DHT requests whose node ID does not match its IP address MUST be serviced and MUST also include one -extra result value (inside the r dictionary) called ip. The IP field +extra result value (inside the r dictionary) called ip. The IP field contains the raw (big endian) byte representation of the external IP address. -This is the same byte sequence passed to SHA-1.

-

A DHT node which receives an ip result in a request SHOULD consider restarting +This is the same byte sequence used to verify the node ID.

+

A DHT node which receives an ip result in a request SHOULD consider restarting its DHT node with a new node ID, taking this IP into account. Since a single node can not be trusted, there should be some mechanism of determining whether or not the node has a correct understanding of its external IP or not. This could @@ -197,7 +181,7 @@ nodes, from separate searches, tells you your node ID is incorrect.

Once enforced, write tokens from peers whose node ID does not match its external IP should be considered dropped. In other words, a peer that uses a non-matching ID MUST never be used to store information on, regardless of which request. In the -original DHT specification only announce_peer stores data in the network, +original DHT specification only announce_peer stores data in the network, but any future extension which stores data in the network SHOULD use the same restriction.

Any peer on a local network address is exempt from this node ID verification. diff --git a/docs/dht_sec.rst b/docs/dht_sec.rst index 883164b61..f71d54548 100644 --- a/docs/dht_sec.rst +++ b/docs/dht_sec.rst @@ -65,20 +65,22 @@ ID can be restricted at each class level of the IP. The expression to calculate a valid ID prefix (from an IPv4 address) is:: - sha1((A * (B * (C * (D * (rand() % 8) % 0x100) % 0x4000) % 0x100000)) % 0x4000000) + sha1((ip & 0x30f3fff) .. r) -Where ``A``, ``B``, ``C`` and ``D`` are the four octets of an IPv4 address. +And for an IPv6 address (``ip`` is the high 64 bits of the address):: -The pattern is that the modulus constant is shifted left by 6 for each octet. -It generalizes to IPv6 by only considering the first 64 bit of the IP (since -the low 64 bits are controlled by the host) and shifting the modulus by 3 for -each octet instead. + sha1((ip & 0x103070f1f3f7fff) .. r) + +``r`` is a random number in the range [0, 7]. The resulting integer, +representing the masked IP address is supposed to be big-endian before +hashed. The ".." means concatenation. The details of implementing this is to evaluate the expression, store the -result in a big endian 32 bit integer and hash those 4 bytes with SHA-1. +result in a big endian 64 bit integer and hash those 8 bytes with SHA-1. + The first 4 bytes of the node ID used in the DHT MUST match the first 4 bytes in the resulting hash. The last byte of the hash MUST match the -random number used to generate the hash. +random number (``r``) used to generate the hash. .. image:: ip_id_v4.png .. image:: ip_id_v6.png @@ -89,63 +91,47 @@ Example code code for calculating a valid node ID:: int num_octets; // the number of octets to consider in ip (4 or 8) uint8_t node_id[20]; // resulting node ID - uint32_t rand = rand() & 0xff; - uint32_t modulus = 0x100; - uint32_t seed = rand & 0x7; - int mod_shift = 6 * 4 / num_octets; // 6 or 3, depending on IPv4 and IPv6 - while (num_octets) - { - seed = (uint64_t(seed) * ip[num_octets-1]) & (modulus-1); - modulus <<= mod_shift; - --num_octets; - } + uint8_t v4mask[] = { 0x03, 0x0f, 0x3f, 0xff }; + uint8_t v6mask[] = { 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff }; + uint8_t* mask = num_octets == 4 ? v4_mask : v8_mask; + + for (int i = 0; i < num_octets; ++i) + ip[i] &= mask[i]; - seed = htonl(seed); SHA_CTX ctx; SHA1_Init(&ctx); - SHA1_Update(&ctx, (unsigned char*)&seed, sizeof(seed)); + SHA1_Update(&ctx, (unsigned char*)ip, num_octets); + uint32_t rand = rand() & 0xff; + uint8_t r = rand & 0x7; + SHA1_Update(&ctx, (unsigned char*)&r, 1); SHA1_Final(&ctx, node_id); for (int i = 4; i < 19; ++i) node_id[i] = rand(); node_id[19] = rand; -Example code to verify a node ID:: - - uint8_t* ip; // incoming IPv4 or IPv6 address (network byte order) - int num_octets; // the number of octets to consider in ip (4 or 8) - uint8_t node_id[20]; // incoming node ID - - uint32_t modulus = 0x100; - uint32_t seed = node_id[19] & 0x7; - int mod_shift = 6 * 4 / num_octets; // 6 or 3, depending on IPv4 and IPv6 - while (num_octets) - { - seed = (uint64_t(seed) * ip[num_octets-1]) & (modulus-1); - modulus <<= mod_shift; - --num_octets; - } - - seed = htonl(seed); - SHA_CTX ctx; - SHA1_Init(&ctx); - SHA1_Update(&ctx, (unsigned char*)&seed, sizeof(seed)); - uint8_t digest[20]; - SHA1_Final(&ctx, digest); - if (memcmp(digest, node_id, 4) != 0) - return false; // failed verification - else - return true; // verification passed - test vectors: +.. parsed-literal:: + + IP rand example node ID + ============ ===== ========================================== + 124.31.75.21 1 **8a84ac4d** 0c5d6a4ec8a88e4c6ab4c28b95eee4 **01** + 21.75.31.124 86 **b25a51b1** 4e7a08645677bbd1cfe7d8f956d532 **56** + 65.23.51.170 22 **dc35968d** bc8f112a3d426c84764f8c2a1150e6 **16** + 84.124.73.14 65 **98f44bb1** 1bb1fe518101ceef99462b947a01ff **41** + 43.213.53.83 90 **5978e1c4** 5b7c4be0237986d5243b87aa6d5130 **5a** + +The bold parts of the node ID are the important parts. The rest are +random numbers. + bootstrapping ------------- In order to set ones initial node ID, the external IP needs to be known. This -is not a trivial problem. WIth this extension, *all* DHT requests whose node +is not a trivial problem. With this extension, *all* DHT requests whose node ID does not match its IP address MUST be serviced and MUST also include one extra result value (inside the ``r`` dictionary) called ``ip``. The IP field contains the raw (big endian) byte representation of the external IP address. -This is the same byte sequence passed to SHA-1. +This is the same byte sequence used to verify the node ID. A DHT node which receives an ``ip`` result in a request SHOULD consider restarting its DHT node with a new node ID, taking this IP into account. Since a single node diff --git a/src/kademlia/node_id.cpp b/src/kademlia/node_id.cpp index e5249e5e7..a104961e7 100644 --- a/src/kademlia/node_id.cpp +++ b/src/kademlia/node_id.cpp @@ -98,48 +98,43 @@ int distance_exp(node_id const& n1, node_id const& n2) struct static_ { static_() { std::srand((unsigned int)std::time(0)); } } static__; -node_id generate_id_impl(address const& ip, boost::uint32_t r) +node_id generate_id_impl(address const& ip_, boost::uint32_t r) { - boost::uint32_t seed = r & 0x7; - boost::uint32_t modulus = 0x100; - - boost::uint8_t* p = 0; - int num_octets = 0; - int mod_shift = 0; + boost::uint8_t* ip = 0; + const static uint8_t v4mask[] = { 0x03, 0x0f, 0x3f, 0xff }; + const static uint8_t v6mask[] = { 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff }; + uint8_t const* mask = 0; + int num_octets = 0; + address_v4::bytes_type b4; #if TORRENT_USE_IPV6 address_v6::bytes_type b6; - if (ip.is_v6()) + if (ip_.is_v6()) { - b6 = ip.to_v6().to_bytes(); - p = &b6[0]; + b6 = ip_.to_v6().to_bytes(); + ip = &b6[0]; num_octets = 8; - mod_shift = 3; + mask = v6mask; } else #endif { - b4 = ip.to_v4().to_bytes(); - p = &b4[0]; + b4 = ip_.to_v4().to_bytes(); + ip = &b4[0]; num_octets = 4; - mod_shift = 6; + mask = v4mask; } - while (num_octets) - { - seed *= p[num_octets-1]; - seed &= (modulus-1); - modulus <<= mod_shift; - --num_octets; - } - - seed = htonl(seed); - - node_id id = hasher((const char*)&seed, sizeof(seed)).final(); + for (int i = 0; i < num_octets; ++i) + ip[i] &= mask[i]; + hasher h; + h.update((char*)ip, num_octets); + uint8_t rand = r & 0x7; + h.update((char*)&r, 1); + node_id id = h.final(); for (int i = 4; i < 19; ++i) id[i] = random(); - id[19] = r; return id; diff --git a/test/test_primitives.cpp b/test/test_primitives.cpp index ad0d474d5..2e5dec6aa 100644 --- a/test/test_primitives.cpp +++ b/test/test_primitives.cpp @@ -68,6 +68,11 @@ using namespace boost::tuples; namespace libtorrent { TORRENT_EXPORT std::string sanitize_path(std::string const& p); + namespace dht + { + TORRENT_EXPORT libtorrent::dht::node_id generate_id_impl( + address const& ip_, boost::uint32_t r); + } } sha1_hash to_hash(char const* s) @@ -1714,10 +1719,39 @@ int test_main() } TEST_CHECK(hits > int(temp.size()) / 2); + using namespace libtorrent::dht; + + char const* ips[] = { + "124.31.75.21", + "21.75.31.124", + "65.23.51.170", + "84.124.73.14", + "43.213.53.83", + }; + + int rs[] = { 1,86,22,65,90 }; + + boost::uint8_t prefixes[][4] = + { + {0x8a, 0x84, 0xac, 0x4d}, + {0xb2, 0x5a, 0x51, 0xb1 }, + {0xdc, 0x35, 0x96, 0x8d }, + {0x98, 0xf4, 0x4b, 0xb1 }, + {0x59, 0x78, 0xe1, 0xc4 }, + }; + + for (int i = 0; i < 5; ++i) + { + address a = address_v4::from_string(ips[i]); + node_id id = generate_id_impl(a, rs[i]); + for (int j = 0; j < 4; ++j) + TEST_CHECK(id[j] == prefixes[i][j]); + TEST_CHECK(id[19] == rs[i]); + fprintf(stderr, "IP address: %s r: %d node ID: %s\n", ips[i] + , rs[i], to_hex(id.to_string()).c_str()); + } #endif - - // test peer_id/sha1_hash type sha1_hash h1(0);