diff --git a/include/libtorrent/kademlia/routing_table.hpp b/include/libtorrent/kademlia/routing_table.hpp
index f210c941f..4ec0e07d7 100644
--- a/include/libtorrent/kademlia/routing_table.hpp
+++ b/include/libtorrent/kademlia/routing_table.hpp
@@ -80,7 +80,7 @@ struct ipv6_hash
 	}
 };
 
-struct ip_set
+struct TORRENT_EXTRA_EXPORT ip_set
 {
 	void insert(address const& addr);
 	bool exists(address const& addr) const;
@@ -97,12 +97,23 @@ struct ip_set
 		return m_ip4s == rh.m_ip4s && m_ip6s == rh.m_ip6s;
 	}
 
+	std::size_t size() const { return m_ip4s.size() + m_ip6s.size(); }
+
 	// these must be multisets because there can be multiple routing table
 	// entries for a single IP when restrict_routing_ips is set to false
 	std::unordered_multiset<address_v4::bytes_type, ipv4_hash> m_ip4s;
 	std::unordered_multiset<address_v6::bytes_type, ipv6_hash> m_ip6s;
 };
 
+// Each routing table bucket represents node IDs with a certain number of bits
+// of prefix in common with our own node ID. Each bucket fits 8 nodes (and
+// sometimes more, closer to the top). In order to minimize the number of hops
+// necessary to traverse the DHT, we want the nodes in our buckets to be spread
+// out across all possible "sub-branches". This is what the "classify" refers
+// to. The 3 (or more) bits following the shared bit prefix.
+TORRENT_EXTRA_EXPORT std::uint8_t  classify_prefix(int bucket_idx, bool last_bucket
+	, int bucket_size, node_id nid);
+
 // differences in the implementation from the description in
 // the paper:
 //
@@ -308,6 +319,14 @@ private:
 	int const m_bucket_size;
 };
 
+TORRENT_EXTRA_EXPORT routing_table::add_node_status_t
+replace_node_impl(node_entry const& e, bucket_t& b, ip_set& ips
+	, int bucket_index, int bucket_size_limit, bool last_bucket
+#ifndef TORRENT_DISABLE_LOGGING
+	, dht_logger* log
+#endif
+	);
+
 } } // namespace libtorrent::dht
 
 #endif // ROUTING_TABLE_HPP
diff --git a/src/kademlia/routing_table.cpp b/src/kademlia/routing_table.cpp
index 5b31b76bc..b1a83f166 100644
--- a/src/kademlia/routing_table.cpp
+++ b/src/kademlia/routing_table.cpp
@@ -108,7 +108,139 @@ bool mostly_verified_nodes(bucket_t const& b)
 	return num_verified >= static_cast<int>(b.size()) * 2 / 3;
 }
 
-routing_table::routing_table(node_id const& id, udp proto, int bucket_size
+std::uint8_t classify_prefix(int const bucket_idx, bool const last_bucket
+	, int const bucket_size, node_id nid)
+{
+	TORRENT_ASSERT_VAL(bucket_size > 0, bucket_size);
+	TORRENT_ASSERT_VAL(bucket_size <= 256, bucket_size);
+
+	std::uint32_t mask = static_cast<std::uint32_t>(bucket_size) - 1;
+	// bucket sizes must be even powers of two.
+	TORRENT_ASSERT_VAL((mask & static_cast<std::uint32_t>(bucket_size)) == 0, bucket_size);
+
+	int const mask_shift = aux::count_leading_zeros(mask);
+	TORRENT_ASSERT_VAL(mask_shift >= 0, mask_shift);
+	TORRENT_ASSERT_VAL(mask_shift < 8, mask_shift);
+	mask <<= mask_shift;
+	TORRENT_ASSERT_VAL(mask > 0, mask);
+	TORRENT_ASSERT_VAL(bool((mask & 0x80) != 0), mask);
+
+	// the reason to shift one bit extra (except for the last bucket) is that the
+	// first bit *defines* the bucket. That bit will be the same for all entries.
+	// We're not interested in that one. However, the last bucket hasn't split
+	// yet, so it will contain entries from both "sides", so we need to include
+	// the top bit.
+	nid <<= bucket_idx + int(!last_bucket);
+	std::uint8_t const ret = (nid[0] & mask) >> mask_shift;
+	TORRENT_ASSERT_VAL(ret < bucket_size, ret);
+	return ret;
+}
+
+routing_table::add_node_status_t replace_node_impl(node_entry const& e
+	, bucket_t& b, ip_set& ips, int const bucket_index
+	, int const bucket_size_limit, bool const last_bucket
+#ifndef TORRENT_DISABLE_LOGGING
+	, dht_logger* log
+#endif
+	)
+{
+	// if the bucket isn't full, we're not replacing anything, and this function
+	// should not have been called
+	TORRENT_ASSERT(int(b.size()) >= bucket_size_limit);
+
+	bucket_t::iterator j = std::max_element(b.begin(), b.end()
+		, [](node_entry const& lhs, node_entry const& rhs)
+		{ return lhs.fail_count() < rhs.fail_count(); });
+	TORRENT_ASSERT(j != b.end());
+
+	if (j->fail_count() > 0)
+	{
+		// i points to a node that has been marked
+		// as stale. Replace it with this new one
+		ips.erase(j->addr());
+		*j = e;
+		ips.insert(e.addr());
+		return routing_table::node_added;
+	}
+
+	// then we look for nodes with the same 3 bit prefix (or however
+	// many bits prefix the bucket size warrants). If there is no other
+	// node with this prefix, remove the duplicate with the highest RTT.
+	// as the last replacement strategy, if the node we found matching our
+	// bit prefix has higher RTT than the new node, replace it.
+
+	// in order to provide as few lookups as possible before finding
+	// the data someone is looking for, make sure there is an affinity
+	// towards having a good spread of node IDs in each bucket
+	std::uint8_t const to_add_prefix = classify_prefix(bucket_index
+		, last_bucket, bucket_size_limit, e.id);
+
+	// nodes organized by their prefix
+	aux::array<std::vector<bucket_t::iterator>, 128> nodes_storage;
+	auto const nodes = span<std::vector<bucket_t::iterator>>{nodes_storage}.first(bucket_size_limit);
+
+	for (j = b.begin(); j != b.end(); ++j)
+	{
+		std::uint8_t const prefix = classify_prefix(
+			bucket_index, last_bucket, bucket_size_limit, j->id);
+		TORRENT_ASSERT(prefix < nodes.size());
+		nodes[prefix].push_back(j);
+	}
+
+	if (!nodes[to_add_prefix].empty())
+	{
+		j = *std::max_element(nodes[to_add_prefix].begin(), nodes[to_add_prefix].end()
+			, [](bucket_t::iterator lhs, bucket_t::iterator rhs)
+			{ return *lhs < *rhs; });
+
+		// only if e is better than the worst node in this prefix slot do we
+		// replace it. resetting j means we're not replacing it
+		if (!(e < *j)) j = b.end();
+	}
+	else
+	{
+		// there is no node in this prefix slot. We definitely want to add it.
+		// Now we just need to figure out which one to replace
+		std::vector<bucket_t::iterator> replace_candidates;
+		for (auto const& n : nodes)
+		{
+			if (n.size() > 1) replace_candidates.insert(replace_candidates.end(), n.begin(), n.end());
+		}
+
+		// since the bucket is full, and there's no node in the prefix-slot
+		// we're about to add to, there must be at least one prefix slot that
+		// has more than one node.
+		TORRENT_ASSERT(!replace_candidates.empty());
+
+		// from these nodes, pick the "worst" one and replace it
+		j = *std::max_element(replace_candidates.begin(), replace_candidates.end()
+			, [](bucket_t::iterator lhs, bucket_t::iterator rhs)
+			{ return *lhs < *rhs; });
+	}
+
+	if (j != b.end())
+	{
+#ifndef TORRENT_DISABLE_LOGGING
+		if (log != nullptr && log->should_log(dht_logger::routing_table))
+		{
+			log->log(dht_logger::routing_table, "replacing node with better one: %s %s [%s %dms %d] vs. [%s %dms %d]"
+				, aux::to_hex(e.id).c_str(), print_address(e.addr()).c_str()
+				, e.verified ? "verified" : "not-verified", e.rtt
+				, classify_prefix(bucket_index, last_bucket, bucket_size_limit, e.id)
+				, j->verified ? "verified" : "not-verified", j->rtt
+				, classify_prefix(bucket_index, last_bucket, bucket_size_limit, j->id)
+				);
+		}
+#endif
+		ips.erase(j->addr());
+		*j = e;
+		ips.insert(e.addr());
+		return routing_table::node_added;
+	}
+	return routing_table::need_bucket_split;
+}
+
+routing_table::routing_table(node_id const& id, udp const proto, int const bucket_size
 	, dht::settings const& settings
 	, dht_logger* log)
 	:
@@ -122,6 +254,8 @@ routing_table::routing_table(node_id const& id, udp proto, int bucket_size
 	, m_last_self_refresh(min_time())
 	, m_bucket_size(bucket_size)
 {
+	// bucket sizes must be a power of 2
+	TORRENT_ASSERT_VAL(((bucket_size - 1) & bucket_size) == 0, bucket_size);
 	TORRENT_UNUSED(log);
 	m_buckets.reserve(30);
 }
@@ -369,6 +503,8 @@ node_entry* routing_table::find_node(udp::endpoint const& ep
 	return nullptr;
 }
 
+// TODO: this need to take bucket "prefix" into account. It should be unified
+// with add_node_impl()
 void routing_table::fill_from_replacements(table_t::iterator bucket)
 {
 	bucket_t& b = bucket->live_nodes;
@@ -379,9 +515,7 @@ void routing_table::fill_from_replacements(table_t::iterator bucket)
 
 	// sort by RTT first, to find the node with the lowest
 	// RTT that is pinged
-	std::sort(rb.begin(), rb.end()
-		, [](node_entry const& lhs, node_entry const& rhs)
-			{ return lhs.rtt < rhs.rtt; });
+	std::sort(rb.begin(), rb.end());
 
 	while (int(b.size()) < bucket_size && !rb.empty())
 	{
@@ -588,7 +722,6 @@ routing_table::add_node_status_t routing_table::add_node_impl(node_entry e)
 	// long to split, and lose nodes (in the case where lower-numbered buckets
 	// are larger)
 	int const bucket_size_limit = bucket_limit(bucket_index);
-	int const next_bucket_size_limit = bucket_limit(bucket_index + 1);
 
 	bucket_t::iterator j;
 
@@ -672,7 +805,7 @@ ip_ok:
 	// bucket's size limit. This makes use split the low-numbered buckets split
 	// earlier when we have larger low buckets, to make it less likely that we
 	// lose nodes
-	if (e.pinged() && int(b.size()) < (can_split ? next_bucket_size_limit : bucket_size_limit))
+	if (e.pinged() && int(b.size()) < bucket_size_limit)
 	{
 		if (b.empty()) b.reserve(bucket_size_limit);
 		b.push_back(e);
@@ -682,154 +815,23 @@ ip_ok:
 
 	// if there is no room, we look for nodes marked as stale
 	// in the k-bucket. If we find one, we can replace it.
-	// then we look for nodes with the same 3 bit prefix (or however
-	// many bits prefix the bucket size warrants). If there is no other
-	// node with this prefix, remove the duplicate with the highest RTT.
-	// as the last replacement strategy, if the node we found matching our
-	// bit prefix has higher RTT than the new node, replace it.
+
+	// A node is considered stale if it has failed at least one
+	// time. Here we choose the node that has failed most times.
+	// If we don't find one, place this node in the replacement-
+	// cache and replace any nodes that will fail in the future
+	// with nodes from that cache.
+
+	bool const last_bucket = bucket_index + 1 == int(m_buckets.size());
 
 	if (e.confirmed())
 	{
-		// A node is considered stale if it has failed at least one
-		// time. Here we choose the node that has failed most times.
-		// If we don't find one, place this node in the replacement-
-		// cache and replace any nodes that will fail in the future
-		// with nodes from that cache.
-
-		j = std::max_element(b.begin(), b.end()
-			, [](node_entry const& lhs, node_entry const& rhs)
-			{ return lhs.fail_count() < rhs.fail_count(); });
-		TORRENT_ASSERT(j != b.end());
-
-		if (j->fail_count() > 0)
-		{
-			// i points to a node that has been marked
-			// as stale. Replace it with this new one
-			m_ips.erase(j->addr());
-			*j = e;
-			m_ips.insert(e.addr());
-			return node_added;
-		}
-
-		// in order to provide as few lookups as possible before finding
-		// the data someone is looking for, make sure there is an affinity
-		// towards having a good spread of node IDs in each bucket
-
-		int mask = bucket_size_limit - 1;
-		int mask_shift = 0;
-		TORRENT_ASSERT_VAL(mask > 0, mask);
-		while ((mask & 0x80) == 0)
-		{
-			mask <<= 1;
-			++mask_shift;
-		}
-
-		// in case bucket_size_limit is not an even power of 2
-		mask = (0xff << mask_shift) & 0xff;
-
-		// pick out all nodes that have the same prefix as the new node
-		std::vector<bucket_t::iterator> nodes;
-		bool force_replace = false;
-
-		// the last bucket is special, since it hasn't been split yet, it
-		// includes that top bit as well
-		int const prefix_offset =
-			bucket_index + 1 == int(m_buckets.size()) ? bucket_index : bucket_index + 1;
-
-		{
-			node_id id = e.id;
-			id <<= prefix_offset;
-			int const candidate_prefix = id[0] & mask;
-
-			for (j = b.begin(); j != b.end(); ++j)
-			{
-				if (!matching_prefix(j->id, mask, candidate_prefix, prefix_offset)) continue;
-				nodes.push_back(j);
-			}
-		}
-
-		if (!nodes.empty())
-		{
-			j = *std::max_element(nodes.begin(), nodes.end()
-				, [](bucket_t::iterator lhs, bucket_t::iterator rhs)
-				{ return *lhs < *rhs; });
-		}
-		else
-		{
-			// there is no node in this prefix-slot, there may be some
-			// nodes sharing a prefix. Find all nodes that do not
-			// have a unique prefix
-
-			// find node entries with duplicate prefixes in O(1)
-			aux::vector<bucket_t::iterator> prefix(aux::numeric_cast<std::size_t>(int(1 << (8 - mask_shift))), b.end());
-			TORRENT_ASSERT(int(prefix.size()) >= bucket_size_limit);
-
-			// the begin iterator from this object is used as a placeholder
-			// for an occupied slot whose node has already been added to the
-			// duplicate nodes list.
-			bucket_t placeholder;
-
-			nodes.reserve(b.size());
-			for (j = b.begin(); j != b.end(); ++j)
-			{
-				node_id id = j->id;
-				id <<= prefix_offset;
-				int this_prefix = (id[0] & mask) >> mask_shift;
-				TORRENT_ASSERT(this_prefix >= 0);
-				TORRENT_ASSERT(this_prefix < int(prefix.size()));
-				if (prefix[this_prefix] != b.end())
-				{
-					// there's already a node with this prefix. Remember both
-					// duplicates.
-					nodes.push_back(j);
-
-					if (prefix[this_prefix] != placeholder.begin())
-					{
-						nodes.push_back(prefix[this_prefix]);
-						prefix[this_prefix] = placeholder.begin();
-					}
-				}
-			}
-
-			if (!nodes.empty())
-			{
-				// from these nodes, pick the one with the highest RTT
-				// and replace it
-
-				auto k = std::max_element(nodes.begin(), nodes.end()
-					, [](bucket_t::iterator lhs, bucket_t::iterator rhs)
-					{ return *lhs < *rhs; });
-
-				// in this case, we would really rather replace the node even if
-				// the new node has higher RTT, because it fills a new prefix that we otherwise
-				// don't have.
-				force_replace = true;
-				j = *k;
-			}
-			else
-			{
-				j = std::max_element(b.begin(), b.end());
-			}
-		}
-
-		if (j != b.end() && (force_replace || e < *j))
-		{
+		auto const ret = replace_node_impl(e, b, m_ips, bucket_index, bucket_size_limit, last_bucket
 #ifndef TORRENT_DISABLE_LOGGING
-			if (m_log != nullptr && m_log->should_log(dht_logger::routing_table))
-			{
-				m_log->log(dht_logger::routing_table, "replacing node with better one: %s %s %s %dms vs. %s %dms"
-					, aux::to_hex(e.id).c_str(), print_address(e.addr()).c_str()
-					, e.verified ? "verified" : "not-verified", e.rtt
-					, j->verified ? "verified" : "not-verified", j->rtt);
-			}
+			, m_log
 #endif
-			m_ips.erase(j->addr());
-			*j = e;
-			m_ips.insert(e.addr());
-			return node_added;
-		}
-		// in order to keep lookup times small, prefer nodes with low RTTs
-
+			);
+		if (ret != need_bucket_split) return ret;
 	}
 
 	// if we can't split, try to insert into the replacement bucket
@@ -840,7 +842,6 @@ ip_ok:
 		// the bucket, and the bucket is full, we have to
 		// cache this node and wait until some node fails
 		// and then replace it.
-
 		j = std::find_if(rb.begin(), rb.end()
 			, [&e](node_entry const& ne) { return ne.id == e.id; });
 
@@ -861,7 +862,15 @@ ip_ok:
 			// less reliable than this one, that has been pinged
 			j = std::find_if(rb.begin(), rb.end()
 				, [] (node_entry const& ne) { return !ne.pinged(); });
-			if (j == rb.end()) j = rb.begin();
+			if (j == rb.end())
+			{
+				auto const ret = replace_node_impl(e, rb, m_ips, bucket_index, m_bucket_size, last_bucket
+#ifndef TORRENT_DISABLE_LOGGING
+					, nullptr
+#endif
+					);
+				return ret == node_added ? node_added : failed_to_add;
+			}
 			m_ips.erase(j->addr());
 			rb.erase(j);
 		}
diff --git a/test/test_dht.cpp b/test/test_dht.cpp
index f7249ca05..93a75c0f8 100644
--- a/test/test_dht.cpp
+++ b/test/test_dht.cpp
@@ -688,35 +688,25 @@ void print_state(std::ostream& os, routing_table const& table)
 			, int(i->replacements.size()));
 		if (cursor > int(buf.size()) - 500) buf.resize(buf.size() * 3 / 2);
 
-		int id_shift;
-		// the last bucket is special, since it hasn't been split yet, it
-		// includes that top bit as well
-		if (bucket_index + 1 == int(table.buckets().size()))
-			id_shift = bucket_index;
-		else
-			id_shift = bucket_index + 1;
+		bucket_t nodes = i->live_nodes;
 
-		for (bucket_t::const_iterator j = i->live_nodes.begin()
-			, end2(i->live_nodes.end()); j != end2; ++j)
+		std::sort(nodes.begin(), nodes.end()
+			, [](node_entry const& lhs, node_entry const& rhs)
+			{ return lhs.id < rhs.id; }
+		);
+
+		for (auto j = nodes.begin(); j != nodes.end(); ++j)
 		{
-			int bucket_size_limit = table.bucket_limit(bucket_index);
-			std::uint32_t top_mask = std::uint32_t(bucket_size_limit - 1);
-			int mask_shift = 0;
+			int const bucket_size_limit = table.bucket_limit(bucket_index);
+			TORRENT_ASSERT_VAL(bucket_size_limit <= 256, bucket_size_limit);
 			TORRENT_ASSERT_VAL(bucket_size_limit > 0, bucket_size_limit);
-			while ((top_mask & 0x80) == 0)
-			{
-				top_mask <<= 1;
-				++mask_shift;
-			}
-			top_mask = (0xff << mask_shift) & 0xff;
 
-			node_id id = j->id;
-			id <<= id_shift;
+			bool const last_bucket = bucket_index + 1 == int(table.buckets().size());
+			int const prefix = classify_prefix(bucket_index, last_bucket
+				, bucket_size_limit, j->id);
 
 			cursor += std::snprintf(BUFFER_CURSOR_POS
-				, " prefix: %2x id: %s"
-				, ((id[0] & top_mask) >> mask_shift)
-				, aux::to_hex(j->id).c_str());
+				, " prefix: %2x id: %s", prefix, aux::to_hex(j->id).c_str());
 
 			if (j->rtt == 0xffff)
 			{
@@ -730,7 +720,7 @@ void print_state(std::ostream& os, routing_table const& table)
 			}
 
 			cursor += std::snprintf(BUFFER_CURSOR_POS
-				, " fail: %4d ping: %d dist: %3d"
+				, " fail: %3d ping: %d dist: %3d"
 				, j->fail_count()
 				, j->pinged()
 				, distance_exp(table.id(), j->id));
@@ -758,52 +748,29 @@ void print_state(std::ostream& os, routing_table const& table)
 	for (auto i = table.buckets().begin(), end(table.buckets().end());
 		i != end; ++i, ++bucket_index)
 	{
-		int bucket_size_limit = table.bucket_limit(bucket_index);
-
-		// mask out the first 3 bits, or more depending
-		// on the bucket_size_limit
-		// we have all the lower bits set in (bucket_size_limit-1)
-		// but we want the left-most bits to be set. Shift it
-		// until the MSB is set
-		std::uint32_t top_mask = std::uint32_t(bucket_size_limit - 1);
-		int mask_shift = 0;
-		TORRENT_ASSERT_VAL(bucket_size_limit > 0, bucket_size_limit);
-		while ((top_mask & 0x80) == 0)
-		{
-			top_mask <<= 1;
-			++mask_shift;
-		}
-		top_mask = (0xff << mask_shift) & 0xff;
-		bucket_size_limit = int((top_mask >> mask_shift) + 1);
+		int const bucket_size_limit = table.bucket_limit(bucket_index);
 		TORRENT_ASSERT_VAL(bucket_size_limit <= 256, bucket_size_limit);
-		bool sub_buckets[256];
-		std::memset(sub_buckets, 0, sizeof(sub_buckets));
+		TORRENT_ASSERT_VAL(bucket_size_limit > 0, bucket_size_limit);
+		std::array<bool, 256> sub_buckets;
+		sub_buckets.fill(false);
 
-		int id_shift;
 		// the last bucket is special, since it hasn't been split yet, it
 		// includes that top bit as well
-		if (bucket_index + 1 == int(table.buckets().size()))
-			id_shift = bucket_index;
-		else
-			id_shift = bucket_index + 1;
+		bool const last_bucket = bucket_index + 1 == int(table.buckets().size());
 
-		for (bucket_t::const_iterator j = i->live_nodes.begin()
-			, end2(i->live_nodes.end()); j != end2; ++j)
+		for (auto const& e : i->live_nodes)
 		{
-			node_id id = j->id;
-			id <<= id_shift;
-			int b = (id[0] & top_mask) >> mask_shift;
-			TORRENT_ASSERT(b >= 0 && b < int(sizeof(sub_buckets)/sizeof(sub_buckets[0])));
-			sub_buckets[b] = true;
+			std::size_t const prefix = static_cast<std::size_t>(
+				classify_prefix(bucket_index, last_bucket, bucket_size_limit, e.id));
+			sub_buckets[prefix] = true;
 		}
 
-		cursor += std::snprintf(BUFFER_CURSOR_POS
-			, "%2d mask: %2x: [", bucket_index, (top_mask >> mask_shift));
+		cursor += std::snprintf(BUFFER_CURSOR_POS, "%2d: [", bucket_index);
 
 		for (int j = 0; j < bucket_size_limit; ++j)
 		{
 			cursor += std::snprintf(BUFFER_CURSOR_POS
-				, (sub_buckets[j] ? "X" : " "));
+				, (sub_buckets[static_cast<std::size_t>(j)] ? "X" : " "));
 		}
 		cursor += std::snprintf(BUFFER_CURSOR_POS
 			, "]\n");
@@ -1581,6 +1548,8 @@ namespace {
 
 void test_routing_table(address(&rand_addr)())
 {
+	init_rand_address();
+
 	dht_test_setup t(udp::endpoint(rand_addr(), 20));
 	bdecode_node response;
 
@@ -1589,7 +1558,7 @@ void test_routing_table(address(&rand_addr)())
 	s.extended_routing_table = false;
 	//	s.restrict_routing_ips = false;
 	node_id const nid = to_hash("3123456789abcdef01232456789abcdef0123456");
-	const int bucket_size = 10;
+	const int bucket_size = 8;
 	dht::routing_table table(nid, t.source.protocol(), bucket_size, s, &t.observer);
 	TEST_EQUAL(std::get<0>(table.size()), 0);
 
@@ -1685,8 +1654,6 @@ void test_routing_table(address(&rand_addr)())
 
 	s.restrict_routing_ips = false;
 
-	init_rand_address();
-
 	{
 		auto const ep = rand_udp_ep(rand_addr);
 		auto const id = generate_id(ep.address());
@@ -1694,16 +1661,16 @@ void test_routing_table(address(&rand_addr)())
 	}
 
 	nodes.clear();
-	for (int i = 0; i < 7000; ++i)
+	for (int i = 0; i < 10000; ++i)
 	{
 		auto const ep = rand_udp_ep(rand_addr);
 		auto const id = generate_id(ep.address());
 		table.node_seen(id, ep, 20 + (id[19] & 0xff));
 	}
 	std::printf("active buckets: %d\n", table.num_active_buckets());
-	TEST_CHECK(table.num_active_buckets() == 10
-		|| table.num_active_buckets() == 11);
-	TEST_CHECK(std::get<0>(table.size()) >= 10 * 10);
+	TEST_CHECK(table.num_active_buckets() == 11
+		|| table.num_active_buckets() == 12);
+	TEST_CHECK(std::get<0>(table.size()) >= bucket_size * 10);
 	//TODO: 2 test num_global_nodes
 	//TODO: 2 test need_refresh
 
@@ -3032,7 +2999,7 @@ TORRENT_TEST(routing_table_uniform)
 	// 3: 16
 	// 4: 8
 	// i.e. no more than 5 levels
-	TEST_EQUAL(tbl.num_active_buckets(), 5);
+	TEST_EQUAL(tbl.num_active_buckets(), 6);
 
 	print_state(std::cout, tbl);
 }
@@ -3538,7 +3505,7 @@ TORRENT_TEST(dht_verify_node_address)
 	dht::settings s;
 	s.extended_routing_table = false;
 	node_id id = to_hash("3123456789abcdef01232456789abcdef0123456");
-	const int bucket_size = 10;
+	const int bucket_size = 8;
 	dht::routing_table table(id, udp::v4(), bucket_size, s, &observer);
 	std::vector<node_entry> nodes;
 	TEST_EQUAL(std::get<0>(table.size()), 0);
@@ -3562,7 +3529,7 @@ TORRENT_TEST(dht_verify_node_address)
 
 	// incorrect data, wrong id, should cause node to be removed
 	table.node_seen(to_hash("0123456789abcdef01232456789abcdef0123456")
-					, udp::endpoint(addr("4.4.4.4"), 4), 10);
+		, udp::endpoint(addr("4.4.4.4"), 4), 10);
 	table.find_node(id, nodes, 0, 10);
 
 	TEST_EQUAL(std::get<0>(table.size()), 0);
@@ -3834,6 +3801,197 @@ TORRENT_TEST(mostly_verified_nodes)
 	TEST_CHECK(!mostly_verified_nodes({fake_node(false), fake_node(false), fake_node(false)}));
 }
 
+TORRENT_TEST(classify_prefix)
+{
+	// the last bucket in the routing table
+	TEST_EQUAL(int(classify_prefix(0, true, 8, to_hash("0cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdc"))), 0);
+	TEST_EQUAL(int(classify_prefix(0, true, 8, to_hash("2cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdc"))), 1);
+	TEST_EQUAL(int(classify_prefix(0, true, 8, to_hash("4cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdc"))), 2);
+	TEST_EQUAL(int(classify_prefix(0, true, 8, to_hash("6cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdc"))), 3);
+	TEST_EQUAL(int(classify_prefix(0, true, 8, to_hash("8cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdc"))), 4);
+	TEST_EQUAL(int(classify_prefix(0, true, 8, to_hash("acdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdc"))), 5);
+	TEST_EQUAL(int(classify_prefix(0, true, 8, to_hash("ccdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdc"))), 6);
+	TEST_EQUAL(int(classify_prefix(0, true, 8, to_hash("ecdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdc"))), 7);
+	TEST_EQUAL(int(classify_prefix(0, true, 8, to_hash("fcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdc"))), 7);
+
+	TEST_EQUAL(int(classify_prefix(4, true, 8, to_hash("c0cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 0);
+	TEST_EQUAL(int(classify_prefix(4, true, 8, to_hash("c2cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 1);
+	TEST_EQUAL(int(classify_prefix(4, true, 8, to_hash("c4cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 2);
+	TEST_EQUAL(int(classify_prefix(4, true, 8, to_hash("c6cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 3);
+	TEST_EQUAL(int(classify_prefix(4, true, 8, to_hash("c8cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 4);
+	TEST_EQUAL(int(classify_prefix(4, true, 8, to_hash("cacdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 5);
+	TEST_EQUAL(int(classify_prefix(4, true, 8, to_hash("cccdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 6);
+	TEST_EQUAL(int(classify_prefix(4, true, 8, to_hash("cecdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 7);
+	TEST_EQUAL(int(classify_prefix(4, true, 8, to_hash("cfcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 7);
+
+	TEST_EQUAL(int(classify_prefix(8, true, 8, to_hash("dc0cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdc"))), 0);
+	TEST_EQUAL(int(classify_prefix(8, true, 8, to_hash("dc2cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdc"))), 1);
+	TEST_EQUAL(int(classify_prefix(8, true, 8, to_hash("dc4cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdc"))), 2);
+	TEST_EQUAL(int(classify_prefix(8, true, 8, to_hash("dc6cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdc"))), 3);
+	TEST_EQUAL(int(classify_prefix(8, true, 8, to_hash("dc8cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdc"))), 4);
+	TEST_EQUAL(int(classify_prefix(8, true, 8, to_hash("dcacdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdc"))), 5);
+	TEST_EQUAL(int(classify_prefix(8, true, 8, to_hash("dcccdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdc"))), 6);
+	TEST_EQUAL(int(classify_prefix(8, true, 8, to_hash("dcecdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdc"))), 7);
+	TEST_EQUAL(int(classify_prefix(8, true, 8, to_hash("dcfcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdc"))), 7);
+
+	TEST_EQUAL(int(classify_prefix(12, true, 8, to_hash("cdc0cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 0);
+	TEST_EQUAL(int(classify_prefix(12, true, 8, to_hash("cdc2cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 1);
+	TEST_EQUAL(int(classify_prefix(12, true, 8, to_hash("cdc4cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 2);
+	TEST_EQUAL(int(classify_prefix(12, true, 8, to_hash("cdc6cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 3);
+	TEST_EQUAL(int(classify_prefix(12, true, 8, to_hash("cdc8cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 4);
+	TEST_EQUAL(int(classify_prefix(12, true, 8, to_hash("cdcacdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 5);
+	TEST_EQUAL(int(classify_prefix(12, true, 8, to_hash("cdcccdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 6);
+	TEST_EQUAL(int(classify_prefix(12, true, 8, to_hash("cdcecdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 7);
+	TEST_EQUAL(int(classify_prefix(12, true, 8, to_hash("cdcfcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 7);
+
+	// not the last bucket in the routing table
+	TEST_EQUAL(int(classify_prefix(11, false, 8, to_hash("cdc0cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 0);
+	TEST_EQUAL(int(classify_prefix(11, false, 8, to_hash("cdc2cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 1);
+	TEST_EQUAL(int(classify_prefix(11, false, 8, to_hash("cdc4cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 2);
+	TEST_EQUAL(int(classify_prefix(11, false, 8, to_hash("cdc6cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 3);
+	TEST_EQUAL(int(classify_prefix(11, false, 8, to_hash("cdc8cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 4);
+	TEST_EQUAL(int(classify_prefix(11, false, 8, to_hash("cdcacdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 5);
+	TEST_EQUAL(int(classify_prefix(11, false, 8, to_hash("cdcccdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 6);
+	TEST_EQUAL(int(classify_prefix(11, false, 8, to_hash("cdcecdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 7);
+	TEST_EQUAL(int(classify_prefix(11, false, 8, to_hash("cdcfcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 7);
+
+	TEST_EQUAL(int(classify_prefix(12, false, 8, to_hash("cdc8cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 0);
+	TEST_EQUAL(int(classify_prefix(12, false, 8, to_hash("cdc9cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 1);
+	TEST_EQUAL(int(classify_prefix(12, false, 8, to_hash("cdcacdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 2);
+	TEST_EQUAL(int(classify_prefix(12, false, 8, to_hash("cdcbcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 3);
+	TEST_EQUAL(int(classify_prefix(12, false, 8, to_hash("cdcccdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 4);
+	TEST_EQUAL(int(classify_prefix(12, false, 8, to_hash("cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 5);
+	TEST_EQUAL(int(classify_prefix(12, false, 8, to_hash("cdcecdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 6);
+	TEST_EQUAL(int(classify_prefix(12, false, 8, to_hash("cdcfcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 7);
+	TEST_EQUAL(int(classify_prefix(12, false, 8, to_hash("cdc7cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 7);
+
+	// larger bucket
+	TEST_EQUAL(int(classify_prefix(12, true, 16, to_hash("cdc0cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 0);
+	TEST_EQUAL(int(classify_prefix(12, true, 16, to_hash("cdc1cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 1);
+	TEST_EQUAL(int(classify_prefix(12, true, 16, to_hash("cdc2cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 2);
+	TEST_EQUAL(int(classify_prefix(12, true, 16, to_hash("cdc3cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 3);
+	TEST_EQUAL(int(classify_prefix(12, true, 16, to_hash("cdc4cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 4);
+	TEST_EQUAL(int(classify_prefix(12, true, 16, to_hash("cdc5cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 5);
+	TEST_EQUAL(int(classify_prefix(12, true, 16, to_hash("cdc6cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 6);
+	TEST_EQUAL(int(classify_prefix(12, true, 16, to_hash("cdc7cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 7);
+	TEST_EQUAL(int(classify_prefix(12, true, 16, to_hash("cdc8cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 8);
+	TEST_EQUAL(int(classify_prefix(12, true, 16, to_hash("cdc9cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 9);
+	TEST_EQUAL(int(classify_prefix(12, true, 16, to_hash("cdcacdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 10);
+	TEST_EQUAL(int(classify_prefix(12, true, 16, to_hash("cdcbcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 11);
+	TEST_EQUAL(int(classify_prefix(12, true, 16, to_hash("cdcccdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 12);
+	TEST_EQUAL(int(classify_prefix(12, true, 16, to_hash("cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 13);
+	TEST_EQUAL(int(classify_prefix(12, true, 16, to_hash("cdcecdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 14);
+	TEST_EQUAL(int(classify_prefix(12, true, 16, to_hash("cdcfcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"))), 15);
+}
+
+namespace {
+node_entry n(ip_set* ips, char const* nid, bool verified = true, int rtt = 0, int failed = 0)
+{
+	node_entry e(rand_udp_ep());
+	if (ips) ips->insert(e.addr());
+	e.verified = verified;
+	e.rtt = static_cast<std::uint16_t>(rtt);
+	e.id = to_hash(nid);
+	if (failed != 0) e.timeout_count = static_cast<std::uint8_t>(failed);
+	return e;
+}
+}
+
+#ifndef TORRENT_DISABLE_LOGGING
+#define LOGGER , nullptr
+#else
+#define LOGGER
+#endif
+TORRENT_TEST(replace_node_impl)
+{
+	// replace specific prefix "slot"
+	{
+	ip_set p;
+	dht::bucket_t b = {
+		n(&p, "1fffffffffffffffffffffffffffffffffffffff", true, 50),
+		n(&p, "3fffffffffffffffffffffffffffffffffffffff", true, 50),
+		n(&p, "5fffffffffffffffffffffffffffffffffffffff", true, 50),
+		n(&p, "7fffffffffffffffffffffffffffffffffffffff", true, 50),
+		n(&p, "9fffffffffffffffffffffffffffffffffffffff", true, 50), // <== replaced
+		n(&p, "bfffffffffffffffffffffffffffffffffffffff", true, 50),
+		n(&p, "dfffffffffffffffffffffffffffffffffffffff", true, 50),
+		n(&p, "ffffffffffffffffffffffffffffffffffffffff", true, 50),
+	};
+	TEST_EQUAL(p.size(), 8);
+	TEST_CHECK(
+		replace_node_impl(n(nullptr, "9fcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd")
+	, b, p, 0, 8, true LOGGER) == routing_table::node_added);
+	TEST_CHECK(b[4].id == to_hash("9fcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"));
+	TEST_EQUAL(p.size(), 8);
+	}
+
+	// only try to replace specific prefix "slot", and if we fail (RTT is
+	// higher), don't replace anything else
+	{
+	ip_set p;
+	dht::bucket_t b = {
+		n(&p, "1fffffffffffffffffffffffffffffffffffffff", true, 500),
+		n(&p, "3fffffffffffffffffffffffffffffffffffffff", true, 500),
+		n(&p, "5fffffffffffffffffffffffffffffffffffffff", true, 500),
+		n(&p, "7fffffffffffffffffffffffffffffffffffffff", true, 500),
+		n(&p, "9fffffffffffffffffffffffffffffffffffffff", true, 50),
+		n(&p, "bfffffffffffffffffffffffffffffffffffffff", true, 500),
+		n(&p, "dfffffffffffffffffffffffffffffffffffffff", true, 500),
+		n(&p, "ffffffffffffffffffffffffffffffffffffffff", true, 500),
+	};
+	TEST_EQUAL(p.size(), 8);
+	TEST_CHECK(
+		replace_node_impl(n(nullptr, "9fcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd", true, 100)
+	, b, p, 0, 8, true LOGGER) != routing_table::node_added);
+	TEST_CHECK(b[4].id == to_hash("9fffffffffffffffffffffffffffffffffffffff"));
+	TEST_EQUAL(p.size(), 8);
+	}
+
+	// if there are multiple candidates to replace, pick the one with the highest
+	// RTT. We're picking the prefix slots with duplicates
+	{
+	ip_set p;
+	dht::bucket_t b = {
+		n(&p, "1fffffffffffffffffffffffffffffffffffffff", true, 50),
+		n(&p, "3fffffffffffffffffffffffffffffffffffffff", true, 50),
+		n(&p, "5fffffffffffffffffffffffffffffffffffffff", true, 50),
+		n(&p, "7fffffffffffffffffffffffffffffffffffffff", true, 50),
+		n(&p, "bfffffffffffffffffffffffffffffffffffffff", true, 50),
+		n(&p, "bfffffffffffffffffffffffffffffffffffffff", true, 51), // <== replaced
+		n(&p, "dfffffffffffffffffffffffffffffffffffffff", true, 50),
+		n(&p, "ffffffffffffffffffffffffffffffffffffffff", true, 50),
+	};
+	TEST_EQUAL(p.size(), 8);
+	TEST_CHECK(
+		replace_node_impl(n(nullptr, "9fcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd", true, 50)
+	, b, p, 0, 8, true LOGGER) == routing_table::node_added);
+	TEST_CHECK(b[5].id == to_hash("9fcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"));
+	TEST_EQUAL(p.size(), 8);
+	}
+
+	// if there is a node with fail count > 0, replaec that, regardless of
+	// anything else
+	{
+	ip_set p;
+	dht::bucket_t b = {
+		n(&p, "1fffffffffffffffffffffffffffffffffffffff", true, 50),
+		n(&p, "3fffffffffffffffffffffffffffffffffffffff", true, 50),
+		n(&p, "5fffffffffffffffffffffffffffffffffffffff", true, 50),
+		n(&p, "7fffffffffffffffffffffffffffffffffffffff", true, 50),
+		n(&p, "9fffffffffffffffffffffffffffffffffffffff", true, 50),
+		n(&p, "bfffffffffffffffffffffffffffffffffffffff", true, 50),
+		n(&p, "dfffffffffffffffffffffffffffffffffffffff", true, 50),
+		n(&p, "ffffffffffffffffffffffffffffffffffffffff", true, 50, 1), // <== replaced
+	};
+	TEST_EQUAL(p.size(), 8);
+	TEST_CHECK(
+		replace_node_impl(n(nullptr, "9fcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd", true, 50)
+	, b, p, 0, 8, true LOGGER) == routing_table::node_added);
+	TEST_CHECK(b[7].id == to_hash("9fcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd"));
+	TEST_EQUAL(p.size(), 8);
+	}
+}
+
 // TODO: test obfuscated_get_peers
 
 #else