From 5b747f58f7cc9ed875d0f37a3a4fd9ed866a0c06 Mon Sep 17 00:00:00 2001
From: Arvid Norberg <arvid@libtorrent.org>
Date: Sun, 31 Jan 2010 19:14:00 +0000
Subject: [PATCH] add guided read cache and update tuning docs

---
 bindings/python/src/session_settings.cpp |  1 +
 docs/manual.rst                          | 15 +++++++
 docs/tuning.rst                          | 17 ++++++++
 examples/client_test.cpp                 |  1 +
 include/libtorrent/disk_io_thread.hpp    | 23 ++++++----
 include/libtorrent/session_settings.hpp  | 14 ++++++
 include/libtorrent/storage.hpp           |  7 +--
 src/disk_io_thread.cpp                   | 54 +++++++++++++++++-------
 src/peer_connection.cpp                  | 37 +++++++++++++++-
 src/storage.cpp                          | 15 ++++---
 10 files changed, 149 insertions(+), 35 deletions(-)

diff --git a/bindings/python/src/session_settings.cpp b/bindings/python/src/session_settings.cpp
index ddb020bc2..e98c7e59c 100644
--- a/bindings/python/src/session_settings.cpp
+++ b/bindings/python/src/session_settings.cpp
@@ -107,6 +107,7 @@ void bind_session_settings()
         .def_readwrite("drop_skipped_requests", &session_settings::drop_skipped_requests)
         .def_readwrite("low_prio_disk", &session_settings::low_prio_disk)
         .def_readwrite("volatile_read_cache", &session_settings::volatile_read_cache)
+        .def_readwrite("guided_read_cache", &guided_read_cache)
     ;
 
     enum_<proxy_settings::proxy_type>("proxy_type")
diff --git a/docs/manual.rst b/docs/manual.rst
index a49968479..d07ddd736 100644
--- a/docs/manual.rst
+++ b/docs/manual.rst
@@ -3722,6 +3722,8 @@ session_settings
 
 		bool low_prio_disk;
 		bool volatile_read_cache;
+		bool guided_read_cache;
+		bool default_min_cache_age;
 	};
 
 ``user_agent`` this is the client identification to the tracker.
@@ -4198,6 +4200,19 @@ to free up more space. This is useful if you don't expect the disk
 cache to create any cache hits from other peers than the one who
 triggered the cache line to be read into the cache in the first place.
 
+``guided_read_cache`` enables the disk cache to adjust the size
+of a cache line generated by peers to depend on the upload rate
+you are sending to that peer. The intention is to optimize the RAM
+usage of the cache, to read ahead further for peers that you're
+sending faster to.
+
+``default_min_cache_age`` is the minimum number of seconds any read
+cache line is kept in the cache. This defaults to one second but
+may be greater if ``guided_read_cache`` is enabled. Having a lower
+bound on the time a cache line stays in the cache is an attempt
+to avoid swapping the same pieces in and out of the cache in case
+there is a shortage of spare cache space.
+
 pe_settings
 ===========
 
diff --git a/docs/tuning.rst b/docs/tuning.rst
index 810c220d1..738a99387 100644
--- a/docs/tuning.rst
+++ b/docs/tuning.rst
@@ -242,6 +242,23 @@ should be disabled, to not systematically trigger requests for pieces that are n
 for all peers. You can turn off allow-fast by settings ``session_settings::allowed_fast_set_size``
 to 0.
 
+As an alternative to the explicit cache and suggest messages, there's a *guided cache*
+mode. This means the size of the read cache line that's stored in the cache is determined
+based on the upload rate to the peer that triggered the read operation. The idea being
+that slow peers don't use up a disproportional amount of space in the cache. This
+is enabled through ``session_settings::guided_read_cache``.
+
+In cases where the assumption is that the cache is only used as a read-ahead, and that no
+other peer will ever request the same block while it's still in the cache, the read
+cache can be set to be *volatile*. This means that every block that is requested out of
+the read cache is removed immediately. This saves a significant amount of cache space
+which can be used as read-ahead for other peers. This mode should **never** be combined
+with either ``explicit_read_cache`` or ``suggest_read_cache``, since those uses opposite
+strategies for the read cache. You don't want to on one hand attract peers to request
+the same pieces, and on the other hand assume that they won't request the same pieces
+and drop them when the first peer requests it. To enable volatile read cache, set
+``session_settings::volatile_read_cache`` to true.
+
 send buffer low watermark
 -------------------------
 
diff --git a/examples/client_test.cpp b/examples/client_test.cpp
index 894aff838..80b68ad26 100644
--- a/examples/client_test.cpp
+++ b/examples/client_test.cpp
@@ -771,6 +771,7 @@ int main(int argc, char* argv[])
 	//settings.announce_to_all_trackers = true;
 	settings.optimize_hashing_for_speed = false;
 	settings.disk_cache_algorithm = session_settings::largest_contiguous;
+	settings.volatile_read_cache = true;
 
 	int refresh_delay = 1;
 
diff --git a/include/libtorrent/disk_io_thread.hpp b/include/libtorrent/disk_io_thread.hpp
index 21e2a83ee..58456d084 100644
--- a/include/libtorrent/disk_io_thread.hpp
+++ b/include/libtorrent/disk_io_thread.hpp
@@ -84,7 +84,8 @@ namespace libtorrent
 			, buffer_size(0)
 			, piece(0)
 			, offset(0)
-			, priority(0)
+			, max_cache_line(0)
+			, cache_min_time(0)
 		{}
 
 		enum action_t
@@ -123,11 +124,14 @@ namespace libtorrent
 		// file the disk operation failed on
 		std::string error_file;
 
-		// priority decides whether or not this
-		// job will skip entries in the queue or
-		// not. It always skips in front of entries
-		// with lower priority
-		int priority;
+		// if this is > 0, it specifies the max number of blocks to read
+		// ahead in the read cache for this access. This is only valid
+		// for 'read' actions
+		int max_cache_line;
+
+		// if this is > 0, it may increase the minimum time the cache
+		// line caused by this operation stays in the cache
+		int cache_min_time;
 
 		boost::shared_ptr<entry> resume_data;
 
@@ -311,7 +315,9 @@ namespace libtorrent
 			// storage this piece belongs to
 			boost::intrusive_ptr<piece_manager> storage;
 			// the last time a block was writting to this piece
-			ptime last_use;
+			// plus the minimum amount of time the block is guaranteed
+			// to stay in the cache
+			ptime expire;
 			// the number of blocks in the cache for this piece
 			int num_blocks;
 			// the pointers to the block data
@@ -326,7 +332,7 @@ namespace libtorrent
 				ordered_unique<const_mem_fun<cached_piece_entry, std::pair<void*, int>
 				, &cached_piece_entry::storage_piece_pair> >
 				, ordered_non_unique<member<cached_piece_entry, ptime
-					, &cached_piece_entry::last_use> >
+					, &cached_piece_entry::expire> >
 				> 
 			> cache_t;
 
@@ -363,6 +369,7 @@ namespace libtorrent
 		int flush_range(cached_piece_entry& p, int start, int end, mutex::scoped_lock& l);
 		int cache_block(disk_io_job& j
 			, boost::function<void(int,disk_io_job const&)>& handler
+			, int cache_expire
 			, mutex::scoped_lock& l);
 
 		// read cache operations
diff --git a/include/libtorrent/session_settings.hpp b/include/libtorrent/session_settings.hpp
index 547381b85..9a6b3eb3a 100644
--- a/include/libtorrent/session_settings.hpp
+++ b/include/libtorrent/session_settings.hpp
@@ -187,6 +187,8 @@ namespace libtorrent
 			, drop_skipped_requests(false)
 			, low_prio_disk(true)
 			, volatile_read_cache(false)
+			, guided_read_cache(true)
+			, default_cache_min_age(1)
 		{}
 
 		// this is the user agent that will be sent to the tracker
@@ -690,6 +692,18 @@ namespace libtorrent
 		// following. This may be useful if the block is not
 		// expected to be hit again. It would save some memory
 		bool volatile_read_cache;
+
+		// if this is set to true, the size of the cache line
+		// generated by a particular read request depends on the
+		// rate you're sending to that peer. This optimizes the
+		// memory usage of the disk read cache by reading
+		// further ahead for peers that you're uploading at high
+		// rates to
+		bool guided_read_cache;
+
+		// this is the default minimum time any read cache line
+		// is kept in the cache.
+		int default_cache_min_age;
 	};
 
 #ifndef TORRENT_DISABLE_DHT
diff --git a/include/libtorrent/storage.hpp b/include/libtorrent/storage.hpp
index 6e24a8b23..b1b5ad243 100644
--- a/include/libtorrent/storage.hpp
+++ b/include/libtorrent/storage.hpp
@@ -221,16 +221,17 @@ namespace libtorrent
 		void async_read(
 			peer_request const& r
 			, boost::function<void(int, disk_io_job const&)> const& handler
-			, int priority = 0);
+			, int cache_line_size = 0
+			, int cache_expiry = 0);
 
 		void async_read_and_hash(
 			peer_request const& r
 			, boost::function<void(int, disk_io_job const&)> const& handler
-			, int priority = 0);
+			, int cache_expiry = 0);
 
 		void async_cache(int piece
 			, boost::function<void(int, disk_io_job const&)> const& handler
-			, int priority = 0);
+			, int cache_expiry = 0);
 
 		void async_write(
 			peer_request const& r
diff --git a/src/disk_io_thread.cpp b/src/disk_io_thread.cpp
index 45f88add7..191a55fe8 100644
--- a/src/disk_io_thread.cpp
+++ b/src/disk_io_thread.cpp
@@ -332,7 +332,7 @@ namespace libtorrent
 			if (ti.info_hash() != ih) continue;
 			cached_piece_info info;
 			info.piece = i->piece;
-			info.last_use = i->last_use;
+			info.last_use = i->expire;
 			info.kind = cached_piece_info::write_cache;
 			int blocks_in_piece = (ti.piece_size(i->piece) + (m_block_size) - 1) / m_block_size;
 			info.blocks.resize(blocks_in_piece);
@@ -347,7 +347,7 @@ namespace libtorrent
 			if (ti.info_hash() != ih) continue;
 			cached_piece_info info;
 			info.piece = i->piece;
-			info.last_use = i->last_use;
+			info.last_use = i->expire;
 			info.kind = cached_piece_info::read_cache;
 			int blocks_in_piece = (ti.piece_size(i->piece) + (m_block_size) - 1) / m_block_size;
 			info.blocks.resize(blocks_in_piece);
@@ -394,11 +394,13 @@ namespace libtorrent
 
 	struct update_last_use
 	{
+		update_last_use(int exp): expire(exp) {}
 		void operator()(disk_io_thread::cached_piece_entry& p)
 		{
 			TORRENT_ASSERT(p.storage);
-			p.last_use = time_now();
+			p.expire = time_now() + seconds(expire);
 		}
+		int expire;
 	};
 
 	disk_io_thread::cache_piece_index_t::iterator disk_io_thread::find_cached_piece(
@@ -423,7 +425,7 @@ namespace libtorrent
 		cache_lru_index_t& widx = m_pieces.get<1>();
 		cache_lru_index_t::iterator i = widx.begin();
 		time_duration cut_off = seconds(m_settings.cache_expiry);
-		while (i != widx.end() && now - i->last_use > cut_off)
+		while (i != widx.end() && now - i->expire > cut_off)
 		{
 			TORRENT_ASSERT(i->storage);
 			flush_range(const_cast<cached_piece_entry&>(*i), 0, INT_MAX, l);
@@ -435,7 +437,9 @@ namespace libtorrent
 		// flush read cache
 		cache_lru_index_t& ridx = m_read_pieces.get<1>();
 		i = ridx.begin();
-		while (i != ridx.end() && now - i->last_use > cut_off)
+		while (i != ridx.end() && now - i->expire > cut_off)
+		{
+			free_piece(const_cast<cached_piece_entry&>(*i), l);
 		{
 			free_piece(const_cast<cached_piece_entry&>(*i), l);
 			ridx.erase(i++);
@@ -478,8 +482,8 @@ namespace libtorrent
 			if (i == idx.end()) return 0;
 		}
 
-		// don't replace an entry that is less than one second old
-		if (time_now() - i->last_use < seconds(1)) return 0;
+		// don't replace an entry that is is too young
+		if (time_now() > i->expire) return 0;
 		int blocks = 0;
 		if (num_blocks >= i->num_blocks)
 		{
@@ -727,11 +731,13 @@ namespace libtorrent
 	// returns -1 on failure
 	int disk_io_thread::cache_block(disk_io_job& j
 		, boost::function<void(int,disk_io_job const&)>& handler
+		, int cache_expire
 		, mutex::scoped_lock& l)
 	{
 		INVARIANT_CHECK;
 		TORRENT_ASSERT(find_cached_piece(m_pieces, j, l) == m_pieces.end());
 		TORRENT_ASSERT((j.offset & (m_block_size-1)) == 0);
+		TORRENT_ASSERT(j.cache_min_time >= 0);
 		cached_piece_entry p;
 
 		int piece_size = j.storage->info()->piece_size(j.piece);
@@ -746,7 +752,7 @@ namespace libtorrent
 
 		p.piece = j.piece;
 		p.storage = j.storage;
-		p.last_use = time_now();
+		p.expire = time_now() + seconds(j.cache_min_time);
 		p.num_blocks = 1;
 		p.blocks.reset(new (std::nothrow) cached_block_entry[blocks_in_piece]);
 		if (!p.blocks) return -1;
@@ -902,6 +908,8 @@ namespace libtorrent
 	{
 		INVARIANT_CHECK;
 
+		TORRENT_ASSERT(j.cache_min_time >= 0);
+
 		// this function will create a new cached_piece_entry
 		// and requires that it doesn't already exist
 		cache_piece_index_t& idx = m_read_pieces.get<0>();
@@ -916,6 +924,7 @@ namespace libtorrent
 		blocks_to_read = (std::min)(blocks_to_read, (std::max)((m_settings.cache_size
 			+ m_cache_stats.read_cache_size - in_use())/2, 3));
 		blocks_to_read = (std::min)(blocks_to_read, m_settings.read_cache_line_size);
+		if (j.max_cache_line > 0) blocks_to_read = (std::min)(blocks_to_read, j.max_cache_line);
 
 		if (in_use() + blocks_to_read > m_settings.cache_size)
 		{
@@ -927,7 +936,7 @@ namespace libtorrent
 		cached_piece_entry p;
 		p.piece = j.piece;
 		p.storage = j.storage;
-		p.last_use = time_now();
+		p.expire = time_now() + seconds(j.cache_min_time);
 		p.num_blocks = 0;
 		p.blocks.reset(new (std::nothrow) cached_block_entry[blocks_in_piece]);
 		if (!p.blocks) return -1;
@@ -1017,6 +1026,8 @@ namespace libtorrent
 	{
 		INVARIANT_CHECK;
 
+		TORRENT_ASSERT(j.cache_min_time >= 0);
+
 		cache_piece_index_t& idx = m_read_pieces.get<0>();
 		p = find_cached_piece(m_read_pieces, j, l);
 
@@ -1034,7 +1045,7 @@ namespace libtorrent
 				, options, blocks_in_piece, l);
 			hit = false;
 			if (ret < 0) return ret;
-			idx.modify(p, update_last_use());
+			idx.modify(p, update_last_use(j.cache_min_time));
 		}
 		else if (p == m_read_pieces.end())
 		{
@@ -1046,7 +1057,7 @@ namespace libtorrent
 			cached_piece_entry pe;
 			pe.piece = j.piece;
 			pe.storage = j.storage;
-			pe.last_use = time_now();
+			pe.expire = time_now() + seconds(j.cache_min_time);
 			pe.num_blocks = 0;
 			pe.blocks.reset(new (std::nothrow) cached_block_entry[blocks_in_piece]);
 			if (!pe.blocks) return -1;
@@ -1059,7 +1070,7 @@ namespace libtorrent
 		}
 		else
 		{
-			idx.modify(p, update_last_use());
+			idx.modify(p, update_last_use(j.cache_min_time));
 		}
 		TORRENT_ASSERT(!m_read_pieces.empty());
 		TORRENT_ASSERT(p->piece == j.piece);
@@ -1072,6 +1083,8 @@ namespace libtorrent
 	{
 		TORRENT_ASSERT(j.buffer);
 
+		TORRENT_ASSERT(j.cache_min_time >= 0);
+
 		mutex::scoped_lock l(m_piece_mutex);
 	
 		cache_piece_index_t::iterator p;
@@ -1097,7 +1110,7 @@ namespace libtorrent
 		if (ret < 0) return ret;
 		cache_piece_index_t& idx = m_read_pieces.get<0>();
 		if (p->num_blocks == 0) idx.erase(p);
-		else idx.modify(p, update_last_use());
+		else idx.modify(p, update_last_use(j.cache_min_time));
 
 		// if read cache is disabled or we exceeded the
 		// limit, remove this piece from the cache
@@ -1182,6 +1195,7 @@ namespace libtorrent
 				+ m_cache_stats.read_cache_size - in_use())/2, 3));
 			blocks_to_read = (std::min)(blocks_to_read, m_settings.read_cache_line_size);
 			blocks_to_read = (std::max)(blocks_to_read, min_blocks_to_read);
+			if (j.max_cache_line > 0) blocks_to_read = (std::min)(blocks_to_read, j.max_cache_line);
 			
 			// if we don't have enough space for the new piece, try flushing something else
 			if (in_use() + blocks_to_read > m_settings.cache_size)
@@ -1225,6 +1239,7 @@ namespace libtorrent
 	int disk_io_thread::try_read_from_cache(disk_io_job const& j)
 	{
 		TORRENT_ASSERT(j.buffer);
+		TORRENT_ASSERT(j.cache_min_time >= 0);
 
 		mutex::scoped_lock l(m_piece_mutex);
 		if (!m_settings.use_read_cache) return -2;
@@ -1263,7 +1278,7 @@ namespace libtorrent
 		ret = copy_from_piece(const_cast<cached_piece_entry&>(*p), hit, j, l);
 		if (ret < 0) return ret;
 		if (p->num_blocks == 0) idx.erase(p);
-		else idx.modify(p, update_last_use());
+		else idx.modify(p, update_last_use(j.cache_min_time));
 
 		ret = j.buffer_size;
 		++m_cache_stats.blocks_read;
@@ -1547,6 +1562,11 @@ namespace libtorrent
 #ifdef TORRENT_DISK_STATS
 			ptime start = time_now();
 #endif
+
+			if (j.cache_min_time < 0)
+				j.cache_min_time = j.cache_min_time == 0 ? m_settings.default_cache_min_age
+					: (std::max)(m_settings.default_cache_min_age, j.cache_min_time);
+
 #ifndef BOOST_NO_EXCEPTIONS
 			try {
 #endif
@@ -1801,6 +1821,8 @@ namespace libtorrent
 					mutex::scoped_lock l(m_piece_mutex);
 					INVARIANT_CHECK;
 
+					TORRENT_ASSERT(j.cache_min_time >= 0);
+
 					if (in_use() >= m_settings.cache_size)
 						flush_cache_blocks(l, in_use() - m_settings.cache_size + 1);
 
@@ -1825,7 +1847,7 @@ namespace libtorrent
 #endif
 						++m_cache_stats.cache_size;
 						++const_cast<cached_piece_entry&>(*p).num_blocks;
-						idx.modify(p, update_last_use());
+						idx.modify(p, update_last_use(j.cache_min_time));
 						// we might just have created a contiguous range
 						// that meets the requirement to be flushed. try it
 						flush_contiguous_blocks(const_cast<cached_piece_entry&>(*p)
@@ -1834,7 +1856,7 @@ namespace libtorrent
 					}
 					else
 					{
-						if (cache_block(j, j.callback, l) < 0)
+						if (cache_block(j, j.callback, j.cache_min_time, l) < 0)
 						{
 							l.unlock();
 							file::iovec_t iov = {j.buffer, j.buffer_size};
diff --git a/src/peer_connection.cpp b/src/peer_connection.cpp
index 7497fc7c5..0cfc82a23 100644
--- a/src/peer_connection.cpp
+++ b/src/peer_connection.cpp
@@ -3846,6 +3846,37 @@ namespace libtorrent
 		send_block_requests();
 	}
 
+	std::pair<int, int> peer_connection::preferred_caching() const
+	{
+		int line_size = 0;
+		int expiry = 0;
+		if (m_ses.m_settings.guided_read_cache)
+		{
+			int upload_rate = m_statistics.upload_payload_rate();
+			if (upload_rate == 0) upload_rate = 1;
+
+			int num_uploads = m_ses.num_uploads();
+			if (num_uploads == 0) num_uploads = 1;
+			// assume half of the cache is write cache if we're downloading
+			// this torrent as well
+			int cache_size = m_ses.m_settings.cache_size / num_uploads;
+			if (!t->is_finished()) cache_size /= 2;
+			// cache_size is the amount of cache we have per peer. The
+			// cache line should not be greater than this
+
+			// try to avoid locking caches for more than a couple of seconds
+			if (upload_rate * 4 / 16 / 1024 < cache_size)
+				cache_size = upload_rate * 4 / 16 / 1024;
+
+			expiry = cache_size * 16 * 1024 / upload_rate;
+			if (expiry < 1) expiry = 1;
+			else if (expiry > 10) expiry = 10;
+
+			line_size = cache_size;
+		}
+		return std::make_pai(line_size, expiry);
+	}
+
 	void peer_connection::fill_send_buffer()
 	{
 #ifdef TORRENT_EXPENSIVE_INVARIANT_CHECKS
@@ -3882,17 +3913,19 @@ namespace libtorrent
 			TORRENT_ASSERT(r.start + r.length <= t->torrent_file().piece_size(r.piece));
 			TORRENT_ASSERT(r.length > 0 && r.start >= 0);
 
+			std::pair<int, int> cache = preferred_caching();
+
 			if (!t->seed_mode() || t->verified_piece(r.piece))
 			{
 				t->filesystem().async_read(r, bind(&peer_connection::on_disk_read_complete
-					, self(), _1, _2, r));
+					, self(), _1, _2, r), cache.first, cache.second);
 			}
 			else
 			{
 				// this means we're in seed mode and we haven't yet
 				// verified this piece (r.piece)
 				t->filesystem().async_read_and_hash(r, bind(&peer_connection::on_disk_read_complete
-					, self(), _1, _2, r));
+					, self(), _1, _2, r), cache.second);
 				t->verified(r.piece);
 			}
 
diff --git a/src/storage.cpp b/src/storage.cpp
index bbe6eb5f0..c90eb4f60 100644
--- a/src/storage.cpp
+++ b/src/storage.cpp
@@ -1526,7 +1526,7 @@ ret:
 	void piece_manager::async_read_and_hash(
 		peer_request const& r
 		, boost::function<void(int, disk_io_job const&)> const& handler
-		, int priority)
+		, int cache_expiry)
 	{
 		disk_io_job j;
 		j.storage = this;
@@ -1535,7 +1535,7 @@ ret:
 		j.offset = r.start;
 		j.buffer_size = r.length;
 		j.buffer = 0;
-		j.priority = priority;
+		j.cache_min_time = cache_expiry;
 		TORRENT_ASSERT(r.length <= 16 * 1024);
 		m_io_thread.add_job(j, handler);
 #ifdef TORRENT_DEBUG
@@ -1548,7 +1548,7 @@ ret:
 
 	void piece_manager::async_cache(int piece
 		, boost::function<void(int, disk_io_job const&)> const& handler
-		, int priority)
+		, int cache_expiry)
 	{
 		disk_io_job j;
 		j.storage = this;
@@ -1557,14 +1557,15 @@ ret:
 		j.offset = 0;
 		j.buffer_size = 0;
 		j.buffer = 0;
-		j.priority = priority;
+		j.cache_min_time = cache_expiry;
 		m_io_thread.add_job(j, handler);
 	}
 
 	void piece_manager::async_read(
 		peer_request const& r
 		, boost::function<void(int, disk_io_job const&)> const& handler
-		, int priority)
+		, int cache_line_size
+		, int cache_expiry)
 	{
 		disk_io_job j;
 		j.storage = this;
@@ -1573,7 +1574,9 @@ ret:
 		j.offset = r.start;
 		j.buffer_size = r.length;
 		j.buffer = 0;
-		j.priority = priority;
+		j.max_cache_line = cache_line_size;
+		j.cache_min_time = cache_expiry;
+
 		// if a buffer is not specified, only one block can be read
 		// since that is the size of the pool allocator's buffers
 		TORRENT_ASSERT(r.length <= 16 * 1024);