diff --git a/docs/complete_bit_prefixes.png b/docs/complete_bit_prefixes.png
new file mode 100644
index 000000000..3bc73874d
Binary files /dev/null and b/docs/complete_bit_prefixes.png differ
diff --git a/docs/dht_sec.html b/docs/dht_sec.html
index c57801bb6..969c7eff2 100644
--- a/docs/dht_sec.html
+++ b/docs/dht_sec.html
@@ -59,9 +59,10 @@
 <li><a class="reference internal" href="#considerations" id="id4">considerations</a></li>
 <li><a class="reference internal" href="#node-id-restriction" id="id5">Node ID restriction</a></li>
 <li><a class="reference internal" href="#bootstrapping" id="id6">bootstrapping</a></li>
-<li><a class="reference internal" href="#enforcement" id="id7">enforcement</a></li>
-<li><a class="reference internal" href="#backwards-compatibility-and-transition" id="id8">backwards compatibility and transition</a></li>
-<li><a class="reference internal" href="#forward-compatibility" id="id9">forward compatibility</a></li>
+<li><a class="reference internal" href="#rationale" id="id7">rationale</a></li>
+<li><a class="reference internal" href="#enforcement" id="id8">enforcement</a></li>
+<li><a class="reference internal" href="#backwards-compatibility-and-transition" id="id9">backwards compatibility and transition</a></li>
+<li><a class="reference internal" href="#forward-compatibility" id="id10">forward compatibility</a></li>
 </ul>
 </div>
 <div class="section" id="id1">
@@ -112,20 +113,20 @@ distribution of the IDs remoain uniform. This is why CRC32 was chosen
 as the hash function. See <a class="reference external" href="http://blog.libtorrent.org/2012/12/dht-security/">comparisons of hash functions</a>.</p>
 <p>The expression to calculate a valid ID prefix (from an IPv4 address) is:</p>
 <pre class="literal-block">
-crc32((ip &amp; 0x01071f7f) .. r)
+crc32((ip &amp; 0x030f3fff) .. r)
 </pre>
 <p>And for an IPv6 address (<tt class="docutils literal">ip</tt> is the high 64 bits of the address):</p>
 <pre class="literal-block">
-crc32((ip &amp; 0x000103070f1f3f7f) ..  r)
+crc32((ip &amp; 0x0103070f1f3f7fff) ..  r)
 </pre>
 <p><tt class="docutils literal">r</tt> is a random number in the range [0, 7]. The resulting integer,
 representing the masked IP address is supposed to be big-endian before
 hashed. The &quot;..&quot; means concatenation.</p>
 <p>The details of implementing this is to evaluate the expression, store the
 result in a big endian 64 bit integer and hash those 8 bytes with CRC32.</p>
-<p>The first 4 bytes of the node ID used in the DHT MUST match the first 4
-bytes in the resulting hash. The last byte of the hash MUST match the
-random number (<tt class="docutils literal">r</tt>) used to generate the hash.</p>
+<p>The first (most significant) 21 bits of the node ID used in the DHT MUST
+match the first 21 bits of the resulting hash. The last byte of the hash MUST
+match the random number (<tt class="docutils literal">r</tt>) used to generate the hash.</p>
 <img alt="ip_id_v4.png" src="ip_id_v4.png" />
 <img alt="ip_id_v6.png" src="ip_id_v6.png" />
 <p>Example code code for calculating a valid node ID:</p>
@@ -134,39 +135,40 @@ uint8_t* ip; // our external IPv4 or IPv6 address (network byte order)
 int num_octets; // the number of octets to consider in ip (4 or 8)
 uint8_t node_id[20]; // resulting node ID
 
-uint8_t v4mask[] = { 0x01, 0x07, 0x1f, 0x7f };
-uint8_t v6mask[] = { 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f };
-uint8_t* mask = num_octets == 4 ? v4_mask : v8_mask;
+uint8_t v4_mask[] = { 0x03, 0x0f, 0x3f, 0xff };
+uint8_t v6_mask[] = { 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff };
+uint8_t* mask = num_octets == 4 ? v4_mask : v6_mask;
 
 for (int i = 0; i &lt; num_octets; ++i)
         ip[i] &amp;= mask[i];
 
-uint32_t rand = rand() &amp; 0xff;
+uint32_t rand = std::rand() &amp; 0xff;
 uint8_t r = rand &amp; 0x7;
 
-uint32_t crc = crc32(0, NULL, 0);
+uint32_t crc = crc32(0, nullptr, 0);
 crc = crc32(crc, ip, num_octets);
 crc = crc32(crc, &amp;r, 1);
 
+// only take the top 21 bits from crc
 node_id[0] = (crc &gt;&gt; 24) &amp; 0xff;
 node_id[1] = (crc &gt;&gt; 16) &amp; 0xff;
-node_id[2] = (crc &gt;&gt; 8) &amp; 0xff;
-node_id[3] = crc &amp; 0xff;
-for (int i = 4; i &lt; 19; ++i) node_id[i] = std::rand();
+node_id[2] = ((crc &gt;&gt; 8) &amp; 0xf8) | (std::rand() &amp; 0x7);
+for (int i = 3; i &lt; 19; ++i) node_id[i] = std::rand();
 node_id[19] = rand;
 </pre>
 <p>test vectors:</p>
 <pre class="literal-block">
 IP           rand  example node ID
 ============ ===== ==========================================
-124.31.75.21   1   <strong>1712f6c7</strong> 0c5d6a4ec8a88e4c6ab4c28b95eee4 <strong>01</strong>
-21.75.31.124  86   <strong>946406c1</strong> 4e7a08645677bbd1cfe7d8f956d532 <strong>56</strong>
-65.23.51.170  22   <strong>fefd9220</strong> bc8f112a3d426c84764f8c2a1150e6 <strong>16</strong>
-84.124.73.14  65   <strong>af1546dd</strong> 1bb1fe518101ceef99462b947a01ff <strong>41</strong>
-43.213.53.83  90   <strong>a9e920bf</strong> 5b7c4be0237986d5243b87aa6d5130 <strong>5a</strong>
+124.31.75.21   1   <strong>d2a6df</strong> f10c5d6a4ec8a88e4c6ab4c28b95eee4 <strong>01</strong>
+21.75.31.124  86   <strong>48cb19</strong> c14e7a08645677bbd1cfe7d8f956d532 <strong>56</strong>
+65.23.51.170  22   <strong>fd334a</strong> 20bc8f112a3d426c84764f8c2a1150e6 <strong>16</strong>
+84.124.73.14  65   <strong>6aa169</strong> dd1bb1fe518101ceef99462b947a01ff <strong>41</strong>
+43.213.53.83  90   <strong>eb6434</strong> bf5b7c4be0237986d5243b87aa6d5130 <strong>5a</strong>
 </pre>
 <p>The bold parts of the node ID are the important parts. The rest are
-random numbers.</p>
+random numbers. The last bold number of each row has only its most significant
+bit pulled from the CRC function. The lower 3 bits are random.</p>
 </div>
 <div class="section" id="bootstrapping">
 <h1>bootstrapping</h1>
@@ -187,6 +189,44 @@ not the node has a correct understanding of its external IP or not. This could
 be done by voting, or only restart the DHT once at least a certain number of
 nodes, from separate searches, tells you your node ID is incorrect.</p>
 </div>
+<div class="section" id="rationale">
+<h1>rationale</h1>
+<p>The choice of using CRC32 instead of a more traditional cryptographic hash
+function is justified primarily of these reasons:</p>
+<ol class="arabic simple">
+<li>it is a fast function</li>
+<li>produces well distributed results</li>
+<li>there is no need for the hash function to be one-way (the input set is
+so small that any hash function could be reversed).</li>
+</ol>
+<p>There are primarily two tests run on SHA-1 and CRC32 to establish the
+distribution of results. The first one is the number of bits in the output
+set that contain every possible combination of bits. The CRC function
+has a longer such prefix in its output than SHA-1. This means nodes will still
+have well uniformly distributed IDs, even when IP addresses in use are not
+uniformly distributed.</p>
+<p>The following graph illustrate a few different hash functions with regard
+to this property.</p>
+<img alt="complete_bit_prefixes.png" src="complete_bit_prefixes.png" />
+<p>This test takes into account IP addresses that are not globally routable, i.e.
+reserved for local networks, multicast and other things. It also takes into
+account that some /8 blocks are not in use by end-users and exremely unlikely
+to ever run a DHT node. This makes the results likely to be very similar to
+what we would see in the wild.</p>
+<p>These results indicate that CRC32 provides the best uniformity in the results
+in terms of bit prefixes where all possibilities are represented, and that
+no more than 21 bits should be used from the result. If more than 21 bits
+were to be used, there would be certain node IDs that would be impossible to
+have, which would make routing sub-optimal.</p>
+<p>The second test is more of a sanity test for the uniform distribution property.
+The target space (32 bit interger) is divided up into 1000 buckets. Every valid
+IP and <tt class="docutils literal">r</tt> input is run through the algorithm and the result is put in the
+bucket it falls in. The expectation is that each bucket has roughly an equal
+number of results falling into it. The following graph shows the resulting
+histogram, comparing SHA-1 and CRC32.</p>
+<img alt="hash_distribution.png" src="hash_distribution.png" />
+<p>The source code for these tests can be found <a class="reference external" href="https://github.com/arvidn/hash_complete_prefix">here</a>.</p>
+</div>
 <div class="section" id="enforcement">
 <h1>enforcement</h1>
 <p>Once enforced, write tokens from peers whose node ID does not match its external
diff --git a/docs/dht_sec.rst b/docs/dht_sec.rst
index 221d11cab..6984341f7 100644
--- a/docs/dht_sec.rst
+++ b/docs/dht_sec.rst
@@ -71,11 +71,11 @@ __ http://blog.libtorrent.org/2012/12/dht-security/
 
 The expression to calculate a valid ID prefix (from an IPv4 address) is::
 
-	crc32((ip & 0x01071f7f) .. r)
+	crc32((ip & 0x030f3fff) .. r)
 
 And for an IPv6 address (``ip`` is the high 64 bits of the address)::
 
-	crc32((ip & 0x000103070f1f3f7f) ..  r)
+	crc32((ip & 0x0103070f1f3f7fff) ..  r)
 
 ``r`` is a random number in the range [0, 7]. The resulting integer,
 representing the masked IP address is supposed to be big-endian before
@@ -84,9 +84,9 @@ hashed. The ".." means concatenation.
 The details of implementing this is to evaluate the expression, store the
 result in a big endian 64 bit integer and hash those 8 bytes with CRC32.
 
-The first 4 bytes of the node ID used in the DHT MUST match the first 4
-bytes in the resulting hash. The last byte of the hash MUST match the
-random number (``r``) used to generate the hash.
+The first (most significant) 21 bits of the node ID used in the DHT MUST
+match the first 21 bits of the resulting hash. The last byte of the hash MUST
+match the random number (``r``) used to generate the hash.
 
 .. image:: ip_id_v4.png
 .. image:: ip_id_v6.png
@@ -97,25 +97,25 @@ Example code code for calculating a valid node ID::
 	int num_octets; // the number of octets to consider in ip (4 or 8)
 	uint8_t node_id[20]; // resulting node ID
 
-	uint8_t v4mask[] = { 0x01, 0x07, 0x1f, 0x7f };
-	uint8_t v6mask[] = { 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f };
-	uint8_t* mask = num_octets == 4 ? v4_mask : v8_mask;
+	uint8_t v4_mask[] = { 0x03, 0x0f, 0x3f, 0xff };
+	uint8_t v6_mask[] = { 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff };
+	uint8_t* mask = num_octets == 4 ? v4_mask : v6_mask;
 
 	for (int i = 0; i < num_octets; ++i)
 		ip[i] &= mask[i];
 
-	uint32_t rand = rand() & 0xff;
+	uint32_t rand = std::rand() & 0xff;
 	uint8_t r = rand & 0x7;
 
-	uint32_t crc = crc32(0, NULL, 0);
+	uint32_t crc = crc32(0, nullptr, 0);
 	crc = crc32(crc, ip, num_octets);
 	crc = crc32(crc, &r, 1);
 
+	// only take the top 21 bits from crc
 	node_id[0] = (crc >> 24) & 0xff;
 	node_id[1] = (crc >> 16) & 0xff;
-	node_id[2] = (crc >> 8) & 0xff;
-	node_id[3] = crc & 0xff;
-	for (int i = 4; i < 19; ++i) node_id[i] = std::rand();
+	node_id[2] = ((crc >> 8) & 0xf8) | (std::rand() & 0x7);
+	for (int i = 3; i < 19; ++i) node_id[i] = std::rand();
 	node_id[19] = rand;
 
 test vectors:
@@ -124,14 +124,15 @@ test vectors:
 
 	IP           rand  example node ID
 	============ ===== ==========================================
-	124.31.75.21   1   **1712f6c7** 0c5d6a4ec8a88e4c6ab4c28b95eee4 **01**
-	21.75.31.124  86   **946406c1** 4e7a08645677bbd1cfe7d8f956d532 **56**
-	65.23.51.170  22   **fefd9220** bc8f112a3d426c84764f8c2a1150e6 **16**
-	84.124.73.14  65   **af1546dd** 1bb1fe518101ceef99462b947a01ff **41**
-	43.213.53.83  90   **a9e920bf** 5b7c4be0237986d5243b87aa6d5130 **5a**
+	124.31.75.21   1   **d2a6df** f10c5d6a4ec8a88e4c6ab4c28b95eee4 **01**
+	21.75.31.124  86   **48cb19** c14e7a08645677bbd1cfe7d8f956d532 **56**
+	65.23.51.170  22   **fd334a** 20bc8f112a3d426c84764f8c2a1150e6 **16**
+	84.124.73.14  65   **6aa169** dd1bb1fe518101ceef99462b947a01ff **41**
+	43.213.53.83  90   **eb6434** bf5b7c4be0237986d5243b87aa6d5130 **5a**
 
 The bold parts of the node ID are the important parts. The rest are
-random numbers.
+random numbers. The last bold number of each row has only its most significant
+bit pulled from the CRC function. The lower 3 bits are random.
 
 bootstrapping
 -------------
@@ -156,6 +157,54 @@ not the node has a correct understanding of its external IP or not. This could
 be done by voting, or only restart the DHT once at least a certain number of
 nodes, from separate searches, tells you your node ID is incorrect.
 
+rationale
+---------
+
+The choice of using CRC32 instead of a more traditional cryptographic hash
+function is justified primarily of these reasons:
+
+1. it is a fast function
+2. produces well distributed results
+3. there is no need for the hash function to be one-way (the input set is
+   so small that any hash function could be reversed).
+
+There are primarily two tests run on SHA-1 and CRC32 to establish the
+distribution of results. The first one is the number of bits in the output
+set that contain every possible combination of bits. The CRC function
+has a longer such prefix in its output than SHA-1. This means nodes will still
+have well uniformly distributed IDs, even when IP addresses in use are not
+uniformly distributed.
+
+The following graph illustrate a few different hash functions with regard
+to this property.
+
+.. image:: complete_bit_prefixes.png
+
+This test takes into account IP addresses that are not globally routable, i.e.
+reserved for local networks, multicast and other things. It also takes into
+account that some /8 blocks are not in use by end-users and exremely unlikely
+to ever run a DHT node. This makes the results likely to be very similar to
+what we would see in the wild.
+
+These results indicate that CRC32 provides the best uniformity in the results
+in terms of bit prefixes where all possibilities are represented, and that
+no more than 21 bits should be used from the result. If more than 21 bits
+were to be used, there would be certain node IDs that would be impossible to
+have, which would make routing sub-optimal.
+
+The second test is more of a sanity test for the uniform distribution property.
+The target space (32 bit interger) is divided up into 1000 buckets. Every valid
+IP and ``r`` input is run through the algorithm and the result is put in the
+bucket it falls in. The expectation is that each bucket has roughly an equal
+number of results falling into it. The following graph shows the resulting
+histogram, comparing SHA-1 and CRC32.
+
+.. image:: hash_distribution.png
+
+The source code for these tests can be found here_.
+
+.. _here: https://github.com/arvidn/hash_complete_prefix
+
 enforcement
 -----------
 
diff --git a/docs/hash_distribution.png b/docs/hash_distribution.png
new file mode 100644
index 000000000..7bc1be79e
Binary files /dev/null and b/docs/hash_distribution.png differ
diff --git a/docs/ip_id_v4.png b/docs/ip_id_v4.png
index 7c17deb63..55d02390e 100644
Binary files a/docs/ip_id_v4.png and b/docs/ip_id_v4.png differ
diff --git a/docs/ip_id_v6.png b/docs/ip_id_v6.png
index 0422adb50..328caed1c 100644
Binary files a/docs/ip_id_v6.png and b/docs/ip_id_v6.png differ
diff --git a/docs/ips.py b/docs/ips.py
deleted file mode 100644
index b633aede8..000000000
--- a/docs/ips.py
+++ /dev/null
@@ -1,56 +0,0 @@
-#/bin/python
-
-import os
-import sys
-
-def num_ids(bits, total_bits):
-
-	if total_bits == 32:
-		bit_dec = 2
-	else:
-		bit_dec = 1
-
-	num_used = 7;
-	ret = 3
-
-	while bits > 0:
-		ret += min(num_used, bits)
-		num_used -= bit_dec
-		if num_used < 0: num_used = 0
-		bits -= 8
-	
-	return 1 << ret
-
-f = open('ip_id_v4.dat', 'w+')
-for i in range(0, 33):
-	print >>f, '%d\t%d\t%d' % (i, num_ids(i, 32), 1 << i)
-f.close()
-
-f = open('ip_id_v6.dat', 'w+')
-for i in range(0, 65):
-	print >>f, '%d\t%d\t%d' % (i, num_ids(i, 64), 1 << i)
-f.close()
-
-f = open('ip_id.gnuplot', 'w+')
-
-f.write('''
-set term png size 600,300
-set output "ip_id_v4.png"
-set logscale y
-set title "Number of possible node IDs"
-set ylabel "possible node IDs"
-set xlabel "bits controlled in IPv4"
-set xtics 4
-set grid
-plot "ip_id_v4.dat" using 1:2 title "octet-wise modulus" with lines, \
-	"ip_id_v4.dat" using 1:3 title "hash of IP" with lines
-
-set output "ip_id_v6.png"
-set title "Number of possible node IDs"
-set xlabel "bits controlled in IPv6"
-plot "ip_id_v6.dat" using 1:2 title "octet-wise modulus" with lines, \
-	"ip_id_v6.dat" using 1:3 title "hash of IP" with lines
-''')
-f.close()
-os.system('gnuplot ip_id.gnuplot')
-