From e4df6331577682a9cd5ae0a58681be7c4cf8153e Mon Sep 17 00:00:00 2001 From: Arvid Norberg Date: Sun, 20 Mar 2011 05:47:27 +0000 Subject: [PATCH] added another disk cache flush algorithm to minimize readback when hashing pieces --- docs/manual.rst | 7 ++- examples/client_test.cpp | 3 +- include/libtorrent/disk_io_thread.hpp | 10 +++- include/libtorrent/session_settings.hpp | 2 +- parse_session_stats.py | 5 +- src/disk_io_thread.cpp | 63 ++++++++++++++++++++++--- src/session.cpp | 6 ++- src/session_impl.cpp | 37 +++++++++------ 8 files changed, 101 insertions(+), 32 deletions(-) diff --git a/docs/manual.rst b/docs/manual.rst index 2961db8ef..2b8f6c267 100644 --- a/docs/manual.rst +++ b/docs/manual.rst @@ -4294,7 +4294,7 @@ session_settings int file_checks_delay_per_block; enum disk_cache_algo_t - { lru, largest_contiguous }; + { lru, largest_contiguous, avoid_readback }; disk_cache_algo_t disk_cache_algorithm; @@ -4898,7 +4898,10 @@ flushes the entire piece, in the write cache, that was least recently written to. This is specified by the ``session_settings::lru`` enum value. ``session_settings::largest_contiguous`` will flush the largest sequences of contiguous blocks from the write cache, regarless of the -piece's last use time. +piece's last use time. ``session_settings::avoid_readback`` will prioritize +flushing blocks that will avoid having to read them back in to verify +the hash of the piece once it's done. This is especially useful for high +throughput setups, where reading from the disk is especially expensive. ``read_cache_line_size`` is the number of blocks to read into the read cache when a read cache miss occurs. Setting this to 0 is essentially diff --git a/examples/client_test.cpp b/examples/client_test.cpp index c20641108..3eec2fadf 100644 --- a/examples/client_test.cpp +++ b/examples/client_test.cpp @@ -1845,7 +1845,8 @@ int main(int argc, char* argv[]) out += esc("0"); #endif char const* piece_state[4] = {"", " slow", " medium", " fast"}; - snprintf(str, sizeof(str), "]%s", piece_state[i->piece_state]); + snprintf(str, sizeof(str), "] %2d%s ", cp ? cp->next_to_hash : -1 + , piece_state[i->piece_state]); out += str; if (cp) { diff --git a/include/libtorrent/disk_io_thread.hpp b/include/libtorrent/disk_io_thread.hpp index 0c5e16862..7bb5413db 100644 --- a/include/libtorrent/disk_io_thread.hpp +++ b/include/libtorrent/disk_io_thread.hpp @@ -73,6 +73,7 @@ namespace libtorrent int piece; std::vector blocks; ptime last_use; + int next_to_hash; enum kind_t { read_cache = 0, write_cache = 1 }; kind_t kind; }; @@ -353,6 +354,8 @@ namespace libtorrent int piece; // storage this piece belongs to boost::intrusive_ptr storage; + // the pointers to the block data + boost::shared_array blocks; // the last time a block was writting to this piece // plus the minimum amount of time the block is guaranteed // to stay in the cache @@ -361,8 +364,11 @@ namespace libtorrent int num_blocks; // used to determine if this piece should be flushed int num_contiguous_blocks; - // the pointers to the block data - boost::shared_array blocks; + // this is the first block that has not yet been hashed + // by the partial hasher. When minimizing read-back, this + // is used to determine if flushing a range would force us + // to read it back later when hashing + int next_block_to_hash; std::pair storage_piece_pair() const { return std::pair(storage.get(), piece); } diff --git a/include/libtorrent/session_settings.hpp b/include/libtorrent/session_settings.hpp index cdc87102e..025a207e7 100644 --- a/include/libtorrent/session_settings.hpp +++ b/include/libtorrent/session_settings.hpp @@ -751,7 +751,7 @@ namespace libtorrent int file_checks_delay_per_block; enum disk_cache_algo_t - { lru, largest_contiguous }; + { lru, largest_contiguous, avoid_readback }; disk_cache_algo_t disk_cache_algorithm; diff --git a/parse_session_stats.py b/parse_session_stats.py index 34f66df95..8ef2eaa63 100755 --- a/parse_session_stats.py +++ b/parse_session_stats.py @@ -62,9 +62,10 @@ gen_report('piece_picker_end_game', ['end game piece picker blocks', 'piece pick gen_report('piece_picker', ['piece picks', 'reject piece picks', 'unchoke piece picks', 'incoming redundant piece picks', 'incoming piece picks', 'end game piece picks', 'snubbed piece picks']) gen_report('bandwidth', ['% failed payload bytes', '% wasted payload bytes', '% protocol bytes']) gen_report('disk_time', ['disk read time', 'disk write time', 'disk queue time', 'disk hash time', 'disk job time', 'disk sort time']) -gen_report('disk_time2', ['cumulative read time', 'cumulative write time', 'cumulative hash time', 'cumulative job time', 'cumulative sort time']) -gen_report('disk_cache_hits', ['disk block read', 'read cache hits', 'disk block written']) +gen_report('disk_time_proportion', ['% read time', '% write time', '% hash time', '% sort time']) +gen_report('disk_cache_hits', ['disk block read', 'read cache hits', 'disk block written', 'disk read back']) gen_report('disk_cache', ['read disk cache size', 'disk cache size', 'disk buffer allocations', 'cache size']) +gen_report('disk_readback', ['% read back']) gen_report('disk_queue', ['disk queue size', 'disk queued bytes']) gen_report('waste', ['failed bytes', 'redundant bytes', 'download rate']) gen_report('connect_candidates', ['connect candidates']) diff --git a/src/disk_io_thread.cpp b/src/disk_io_thread.cpp index 4e341af90..1780c9f04 100644 --- a/src/disk_io_thread.cpp +++ b/src/disk_io_thread.cpp @@ -305,6 +305,7 @@ namespace libtorrent torrent_info const& ti = *i->storage->info(); if (ti.info_hash() != ih) continue; cached_piece_info info; + info.next_to_hash = i->next_block_to_hash; info.piece = i->piece; info.last_use = i->expire; info.kind = cached_piece_info::write_cache; @@ -320,6 +321,7 @@ namespace libtorrent torrent_info const& ti = *i->storage->info(); if (ti.info_hash() != ih) continue; cached_piece_info info; + info.next_to_hash = i->next_block_to_hash; info.piece = i->piece; info.last_use = i->expire; info.kind = cached_piece_info::read_cache; @@ -419,7 +421,10 @@ namespace libtorrent { TORRENT_ASSERT(i->storage); flush_range(const_cast(*i), 0, INT_MAX, l); - widx.erase(i++); + // we want to keep the piece in here to have an accurate + // number for next_block_to_hash, if we're in avoid_readback mode + if (m_settings.disk_cache_algorithm != session_settings::avoid_readback) + widx.erase(i++); } if (m_settings.explicit_read_cache) return; @@ -638,9 +643,9 @@ namespace libtorrent while (blocks > 0) { cache_lru_index_t::iterator i = - std::max_element(idx.begin(), idx.end() - , boost::bind(&disk_io_thread::cached_piece_entry::num_contiguous_blocks, _1) - < boost::bind(&disk_io_thread::cached_piece_entry::num_contiguous_blocks, _2)); + std::max_element(idx.begin(), idx.end() + , boost::bind(&disk_io_thread::cached_piece_entry::num_contiguous_blocks, _1) + < boost::bind(&disk_io_thread::cached_piece_entry::num_contiguous_blocks, _2)); if (i == idx.end()) return ret; tmp = flush_contiguous_blocks(const_cast(*i), l); if (i->num_blocks == 0) idx.erase(i); @@ -648,6 +653,39 @@ namespace libtorrent ret += tmp; } } + else if (m_settings.disk_cache_algorithm == session_settings::avoid_readback) + { + cache_lru_index_t& idx = m_pieces.get<1>(); + for (cache_lru_index_t::iterator i = idx.begin(); i != idx.end(); ++i) + { + cached_piece_entry& p = const_cast(*i); + if (!i->blocks[i->next_block_to_hash].buf) continue; + int piece_size = i->storage->info()->piece_size(i->piece); + int blocks_in_piece = (piece_size + m_block_size - 1) / m_block_size; + int start = i->next_block_to_hash; + int end = start + 1; + while (end < blocks_in_piece && i->blocks[end].buf) ++end; + tmp = flush_range(p, start, end, l); + p.num_contiguous_blocks = contiguous_blocks(p); + blocks -= tmp; + ret += tmp; + if (blocks <= 0) break; + } + + // if we still need to flush blocks, flush the largest contiguous blocks + // regardless of if we'll have to read them back later + while (blocks > 0) + { + cache_lru_index_t::iterator i = + std::max_element(idx.begin(), idx.end() + , boost::bind(&disk_io_thread::cached_piece_entry::num_contiguous_blocks, _1) + < boost::bind(&disk_io_thread::cached_piece_entry::num_contiguous_blocks, _2)); + if (i == idx.end()) return ret; + tmp = flush_contiguous_blocks(const_cast(*i), l); + blocks -= tmp; + ret += tmp; + } + } return ret; } @@ -729,6 +767,7 @@ namespace libtorrent --p.num_blocks; ++m_cache_stats.blocks_written; --m_cache_stats.cache_size; + if (i == p.next_block_to_hash) ++p.next_block_to_hash; } ptime done = time_now_hires(); @@ -798,6 +837,7 @@ namespace libtorrent p.expire = time_now() + seconds(j.cache_min_time); p.num_blocks = 1; p.num_contiguous_blocks = 1; + p.next_block_to_hash = 0; p.blocks.reset(new (std::nothrow) cached_block_entry[blocks_in_piece]); if (!p.blocks) return -1; int block = j.offset / m_block_size; @@ -983,6 +1023,7 @@ namespace libtorrent p.expire = time_now() + seconds(j.cache_min_time); p.num_blocks = 0; p.num_contiguous_blocks = 0; + p.next_block_to_hash = 0; p.blocks.reset(new (std::nothrow) cached_block_entry[blocks_in_piece]); if (!p.blocks) return -1; @@ -1106,6 +1147,7 @@ namespace libtorrent pe.expire = time_now() + seconds(j.cache_min_time); pe.num_blocks = 0; pe.num_contiguous_blocks = 0; + pe.next_block_to_hash = 0; pe.blocks.reset(new (std::nothrow) cached_block_entry[blocks_in_piece]); if (!pe.blocks) return -1; ret = read_into_piece(pe, 0, options, INT_MAX, l); @@ -2070,7 +2112,9 @@ namespace libtorrent --m_cache_stats.cache_size; --const_cast(*p).num_blocks; } - else if ((block > 0 && p->blocks[block-1].buf) || (block < blocks_in_piece-1 && p->blocks[block+1].buf)) + else if ((block > 0 && p->blocks[block-1].buf) + || (block < blocks_in_piece-1 && p->blocks[block+1].buf) + || p->num_blocks == 0) { // update the contiguous blocks counter for this piece. Only if it has // an adjacent block. If it doesn't, we already know it couldn't have @@ -2091,8 +2135,13 @@ namespace libtorrent idx.modify(p, update_last_use(j.cache_min_time)); // we might just have created a contiguous range // that meets the requirement to be flushed. try it - flush_contiguous_blocks(const_cast(*p) - , l, m_settings.write_cache_line_size); + // if we're in avoid_readback mode, don't do this. Only flush + // pieces when we need more space in the cache (which will avoid + // flushing blocks out-of-order) or when we issue a hash job, + // wich indicates the piece is completely downloaded + if (m_settings.disk_cache_algorithm != session_settings::avoid_readback) + flush_contiguous_blocks(const_cast(*p) + , l, m_settings.write_cache_line_size); if (p->num_blocks == 0) idx.erase(p); test_error(j); TORRENT_ASSERT(!j.storage->error()); diff --git a/src/session.cpp b/src/session.cpp index 7e6d697e2..34fcc2ea1 100644 --- a/src/session.cpp +++ b/src/session.cpp @@ -228,8 +228,10 @@ namespace libtorrent // the max number of bytes pending write before we throttle // download rate set.max_queued_disk_bytes = 100 * 1024 * 1024; - // flush write cache based on largest contiguous block - set.disk_cache_algorithm = session_settings::largest_contiguous; + // flush write cache in a way to minimize the amount we need to + // read back once we want to hash-check the piece. i.e. try to + // flush all blocks in-order + set.disk_cache_algorithm = session_settings::avoid_readback; set.explicit_read_cache = false; // prevent fast pieces to interfere with suggested pieces diff --git a/src/session_impl.cpp b/src/session_impl.cpp index 0ed1eb5f5..026a4a502 100644 --- a/src/session_impl.cpp +++ b/src/session_impl.cpp @@ -959,12 +959,12 @@ namespace aux { ":connect candidates" ":disk queue limit" ":disk queue low watermark" - ":cumulative job time" - ":cumulative read time" - ":cumulative write time" - ":cumulative hash time" - ":cumulative sort time" - ":disk total read back" + ":% read time" + ":% write time" + ":% hash time" + ":% sort time" + ":disk read back" + ":% read back" "\n\n", m_stats_logger); } #endif @@ -2727,12 +2727,19 @@ namespace aux { ++peer_ul_rate_buckets[ul_bucket]; } + int low_watermark = m_settings.max_queued_disk_bytes_low_watermark == 0 + ? m_settings.max_queued_disk_bytes / 2 + : m_settings.max_queued_disk_bytes_low_watermark; + if (now - m_last_log_rotation > hours(1)) rotate_stats_log(); if (m_stats_logger) { cache_status cs = m_disk_thread.status(); + + int total_job_time = cs.cumulative_job_time == 0 ? 1 : cs.cumulative_job_time; + fprintf(m_stats_logger , "%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t" "%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t" @@ -2742,8 +2749,8 @@ namespace aux { "%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t" "%f\t%f\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t" "%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t" - "%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t" - "%d\t%d\t%d\t%d\n" + "%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%f\t%f\t" + "%f\t%f\t%d\t%f\n" , total_milliseconds(now - m_last_log_rotation) / 1000.f , int(m_stat.total_upload() - m_last_uploaded) , int(m_stat.total_download() - m_last_downloaded) @@ -2831,13 +2838,13 @@ namespace aux { , m_settings.connections_limit , connect_candidates , int(m_settings.max_queued_disk_bytes) - , int(m_settings.max_queued_disk_bytes_low_watermark) - , int(cs.cumulative_job_time) - , int(cs.cumulative_read_time) - , int(cs.cumulative_write_time) - , int(cs.cumulative_hash_time) - , int(cs.cumulative_sort_time) - , cs.total_read_back + , low_watermark + , float(cs.cumulative_read_time * 100.f / total_job_time) + , float(cs.cumulative_write_time * 100.f / total_job_time) + , float(cs.cumulative_hash_time * 100.f / total_job_time) + , float(cs.cumulative_sort_time * 100.f / total_job_time) + , int(cs.total_read_back - m_last_cache_status.total_read_back) + , float(cs.total_read_back * 100.f / (cs.blocks_written == 0 ? 1: cs.blocks_written)) ); m_last_cache_status = cs; m_last_failed = m_total_failed_bytes;