/* Copyright (c) 2007-2018, Arvid Norberg, Steven Siloti All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the author nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef TORRENT_DISK_IO_THREAD #define TORRENT_DISK_IO_THREAD #include "libtorrent/config.hpp" #include "libtorrent/storage.hpp" #include "libtorrent/allocator.hpp" #include "libtorrent/io_service.hpp" #include "libtorrent/sliding_average.hpp" #include "libtorrent/disk_io_job.hpp" #include "libtorrent/disk_job_pool.hpp" #include "libtorrent/block_cache.hpp" #include "libtorrent/file_pool.hpp" #include "libtorrent/disk_interface.hpp" #include "libtorrent/performance_counters.hpp" #include "libtorrent/aux_/session_settings.hpp" #include "libtorrent/thread.hpp" #include "libtorrent/aux_/disable_warnings_push.hpp" #include #include #include #include #include #include #ifndef TORRENT_DISABLE_POOL_ALLOCATOR #include #endif #include #include "libtorrent/aux_/disable_warnings_pop.hpp" namespace libtorrent { class alert; struct add_torrent_params; struct counters; class alert_manager; struct cached_piece_info { piece_manager* storage; // holds one entry for each block in this piece. ``true`` represents // the data for that block being in the disk cache and ``false`` means it's not. std::vector blocks; // the time when a block was last written to this piece. The older // a piece is, the more likely it is to be flushed to disk. time_point last_use; // The index of the next block that needs to be hashed. // Blocks are hashed as they are downloaded in order to not // have to re-read them from disk once the piece is complete, to // compare its hash against the hashes in the .torrent file. int next_to_hash; // the piece index for this cache entry. int piece; enum kind_t { read_cache = 0, write_cache = 1, volatile_read_cache = 2 }; // specifies if this piece is part of the read cache or the write cache. kind_t kind; bool need_readback; }; typedef tailqueue jobqueue_t; // this struct holds a number of statistics counters // relevant for the disk io thread and disk cache. struct TORRENT_EXPORT cache_status { // initializes all counters to 0 cache_status() : pieces() #ifndef TORRENT_NO_DEPRECATE , blocks_written(0) , writes(0) , blocks_read(0) , blocks_read_hit(0) , reads(0) , queued_bytes(0) , cache_size(0) , write_cache_size(0) , read_cache_size(0) , pinned_blocks(0) , total_used_buffers(0) , average_read_time(0) , average_write_time(0) , average_hash_time(0) , average_job_time(0) , cumulative_job_time(0) , cumulative_read_time(0) , cumulative_write_time(0) , cumulative_hash_time(0) , total_read_back(0) , read_queue_size(0) , blocked_jobs(0) , queued_jobs(0) , peak_queued(0) , pending_jobs(0) , num_jobs(0) , num_read_jobs(0) , num_write_jobs(0) , arc_mru_size(0) , arc_mru_ghost_size(0) , arc_mfu_size(0) , arc_mfu_ghost_size(0) , arc_write_size(0) , arc_volatile_size(0) , num_writing_threads(0) #endif { #ifndef TORRENT_NO_DEPRECATE memset(num_fence_jobs, 0, sizeof(num_fence_jobs)); #endif } std::vector pieces; #ifndef TORRENT_NO_DEPRECATE // the total number of 16 KiB blocks written to disk // since this session was started. int blocks_written; // the total number of write operations performed since this // session was started. // // The ratio (``blocks_written`` - ``writes``) / ``blocks_written`` represents // the number of saved write operations per total write operations. i.e. a kind // of cache hit ratio for the write cahe. int writes; // the number of blocks that were requested from the // bittorrent engine (from peers), that were served from disk or cache. int blocks_read; // the number of blocks that was just copied from the read cache // // The ratio ``blocks_read_hit`` / ``blocks_read`` is the cache hit ratio // for the read cache. int blocks_read_hit; // the number of read operations used int reads; // the number of bytes queued for writing, including bytes // submitted to the OS for writing, but not yet complete mutable boost::int64_t queued_bytes; // the number of 16 KiB blocks currently in the disk cache (both read and write). // This includes both read and write cache. int cache_size; // the number of blocks in the cache used for write cache int write_cache_size; // the number of 16KiB blocks in the read cache. int read_cache_size; // the number of blocks with a refcount > 0, i.e. // they may not be evicted int pinned_blocks; // the total number of buffers currently in use. // This includes the read/write disk cache as well as send and receive buffers // used in peer connections. // deprecated, use session_stats_metrics "disk.disk_blocks_in_use" mutable int total_used_buffers; // the number of microseconds an average disk I/O job // has to wait in the job queue before it get processed. // the time read jobs takes on average to complete // (not including the time in the queue), in microseconds. This only measures // read cache misses. int average_read_time; // the time write jobs takes to complete, on average, // in microseconds. This does not include the time the job sits in the disk job // queue or in the write cache, only blocks that are flushed to disk. int average_write_time; // the time hash jobs takes to complete on average, in // microseconds. Hash jobs include running SHA-1 on the data (which for the most // part is done incrementally) and sometimes reading back parts of the piece. It // also includes checking files without valid resume data. int average_hash_time; int average_job_time; // the number of milliseconds spent in all disk jobs, and specific ones // since the start of the session. Times are specified in milliseconds int cumulative_job_time; int cumulative_read_time; int cumulative_write_time; int cumulative_hash_time; // the number of blocks that had to be read back from disk because // they were flushed before the SHA-1 hash got to hash them. If this // is large, a larger cache could significantly improve performance int total_read_back; // number of read jobs in the disk job queue int read_queue_size; // number of jobs blocked because of a fence int blocked_jobs; // number of jobs waiting to be issued (m_to_issue) // average over 30 seconds // deprecated, use session_stats_metrics "disk.queued_disk_jobs" int queued_jobs; // largest ever seen number of queued jobs int peak_queued; // number of jobs waiting to complete (m_pending) // average over 30 seconds int pending_jobs; // total number of disk job objects allocated right now int num_jobs; // total number of disk read job objects allocated right now int num_read_jobs; // total number of disk write job objects allocated right now int num_write_jobs; // ARC cache stats. All of these counters are in number of pieces // not blocks. A piece does not necessarily correspond to a certain // number of blocks. The pieces in the ghost list never have any // blocks in them int arc_mru_size; int arc_mru_ghost_size; int arc_mfu_size; int arc_mfu_ghost_size; int arc_write_size; int arc_volatile_size; // the number of threads currently writing to disk int num_writing_threads; // counts only fence jobs that are currently blocking jobs // not fences that are themself blocked int num_fence_jobs[disk_io_job::num_job_ids]; #endif }; // this is a singleton consisting of the thread and a queue // of disk io jobs struct TORRENT_EXTRA_EXPORT disk_io_thread TORRENT_FINAL : disk_job_pool , disk_interface , buffer_allocator_interface { disk_io_thread(io_service& ios , counters& cnt , void* userdata , int block_size = 16 * 1024); ~disk_io_thread(); void set_settings(settings_pack const* sett, alert_manager& alerts); void set_num_threads(int i, bool wait = true); void abort(bool wait); void async_read(piece_manager* storage, peer_request const& r , boost::function const& handler, void* requester , int flags = 0) TORRENT_OVERRIDE; void async_write(piece_manager* storage, peer_request const& r , disk_buffer_holder& buffer , boost::function const& handler , int flags = 0) TORRENT_OVERRIDE; void async_hash(piece_manager* storage, int piece, int flags , boost::function const& handler, void* requester) TORRENT_OVERRIDE; void async_move_storage(piece_manager* storage, std::string const& p, int flags , boost::function const& handler) TORRENT_OVERRIDE; void async_release_files(piece_manager* storage , boost::function const& handler = boost::function()) TORRENT_OVERRIDE; void async_delete_files(piece_manager* storage, int options , boost::function const& handler) TORRENT_OVERRIDE; void async_check_fastresume(piece_manager* storage , bdecode_node const* resume_data , std::vector& links , boost::function const& handler) TORRENT_OVERRIDE; void async_save_resume_data(piece_manager* storage , boost::function const& handler) TORRENT_OVERRIDE; void async_rename_file(piece_manager* storage, int index, std::string const& name , boost::function const& handler) TORRENT_OVERRIDE; void async_stop_torrent(piece_manager* storage , boost::function const& handler) TORRENT_OVERRIDE; #ifndef TORRENT_NO_DEPRECATE void async_cache_piece(piece_manager* storage, int piece , boost::function const& handler) TORRENT_OVERRIDE; void async_finalize_file(piece_manager* storage, int file , boost::function const& handler = boost::function()) TORRENT_OVERRIDE; #endif void async_flush_piece(piece_manager* storage, int piece , boost::function const& handler = boost::function()) TORRENT_OVERRIDE; void async_set_file_priority(piece_manager* storage , std::vector const& prio , boost::function const& handler) TORRENT_OVERRIDE; void async_load_torrent(add_torrent_params* params , boost::function const& handler) TORRENT_OVERRIDE; void async_tick_torrent(piece_manager* storage , boost::function const& handler) TORRENT_OVERRIDE; void clear_read_cache(piece_manager* storage) TORRENT_OVERRIDE; void async_clear_piece(piece_manager* storage, int index , boost::function const& handler) TORRENT_OVERRIDE; // this is not asynchronous and requires that the piece does not // have any pending buffers. It's meant to be used for pieces that // were just read and hashed and failed the hash check. // there should be no read-operations left, and all buffers should // be discardable void clear_piece(piece_manager* storage, int index) TORRENT_OVERRIDE; // implements buffer_allocator_interface void reclaim_block(block_cache_reference ref) TORRENT_OVERRIDE; void free_disk_buffer(char* buf) TORRENT_OVERRIDE { m_disk_cache.free_buffer(buf); } char* allocate_disk_buffer(char const* category) TORRENT_OVERRIDE { bool exceed = false; return allocate_disk_buffer(exceed, boost::shared_ptr(), category); } void trigger_cache_trim(); char* allocate_disk_buffer(bool& exceeded, boost::shared_ptr o , char const* category) TORRENT_OVERRIDE; bool exceeded_cache_use() const { return m_disk_cache.exceeded_max_size(); } void update_stats_counters(counters& c) const TORRENT_OVERRIDE; void get_cache_info(cache_status* ret, bool no_pieces = true , piece_manager const* storage = 0) const TORRENT_OVERRIDE; // this submits all queued up jobs to the thread void submit_jobs(); block_cache* cache() { return &m_disk_cache; } #if TORRENT_USE_ASSERTS bool is_disk_buffer(char* buffer) const TORRENT_OVERRIDE { return m_disk_cache.is_disk_buffer(buffer); } #endif enum thread_type_t { generic_thread, hasher_thread }; void thread_fun(int thread_id, thread_type_t type , boost::shared_ptr w); virtual file_pool& files() TORRENT_OVERRIDE { return m_file_pool; } io_service& get_io_service() { return m_ios; } int prep_read_job_impl(disk_io_job* j, bool check_fence = true); #if TORRENT_USE_INVARIANT_CHECKS void check_invariant() const; #endif void maybe_issue_queued_read_jobs(cached_piece_entry* pe, jobqueue_t& completed_jobs); int do_read(disk_io_job* j, jobqueue_t& completed_jobs); int do_uncached_read(disk_io_job* j); int do_write(disk_io_job* j, jobqueue_t& completed_jobs); int do_uncached_write(disk_io_job* j); int do_hash(disk_io_job* j, jobqueue_t& completed_jobs); int do_uncached_hash(disk_io_job* j); int do_move_storage(disk_io_job* j, jobqueue_t& completed_jobs); int do_release_files(disk_io_job* j, jobqueue_t& completed_jobs); int do_delete_files(disk_io_job* j, jobqueue_t& completed_jobs); int do_check_fastresume(disk_io_job* j, jobqueue_t& completed_jobs); int do_save_resume_data(disk_io_job* j, jobqueue_t& completed_jobs); int do_rename_file(disk_io_job* j, jobqueue_t& completed_jobs); int do_stop_torrent(disk_io_job* j, jobqueue_t& completed_jobs); int do_read_and_hash(disk_io_job* j, jobqueue_t& completed_jobs); #ifndef TORRENT_NO_DEPRECATE int do_cache_piece(disk_io_job* j, jobqueue_t& completed_jobs); int do_finalize_file(disk_io_job* j, jobqueue_t& completed_jobs); #endif int do_flush_piece(disk_io_job* j, jobqueue_t& completed_jobs); int do_flush_hashed(disk_io_job* j, jobqueue_t& completed_jobs); int do_flush_storage(disk_io_job* j, jobqueue_t& completed_jobs); int do_trim_cache(disk_io_job* j, jobqueue_t& completed_jobs); int do_file_priority(disk_io_job* j, jobqueue_t& completed_jobs); int do_load_torrent(disk_io_job* j, jobqueue_t& completed_jobs); int do_clear_piece(disk_io_job* j, jobqueue_t& completed_jobs); int do_tick(disk_io_job* j, jobqueue_t& completed_jobs); int do_resolve_links(disk_io_job* j, jobqueue_t& completed_jobs); void call_job_handlers(void* userdata); private: enum return_value_t { // the do_* functions can return this to indicate the disk // job did not complete immediately, and shouldn't be posted yet defer_handler = -200, // the job cannot be completed right now, put it back in the // queue and try again later retry_job = -201 }; void add_completed_job(disk_io_job* j); void add_completed_jobs(jobqueue_t& jobs); void add_completed_jobs_impl(jobqueue_t& jobs , jobqueue_t& completed_jobs); void fail_jobs(storage_error const& e, jobqueue_t& jobs_); void fail_jobs_impl(storage_error const& e, jobqueue_t& src, jobqueue_t& dst); void check_cache_level(mutex::scoped_lock& l, jobqueue_t& completed_jobs); void perform_job(disk_io_job* j, jobqueue_t& completed_jobs); // this queues up another job to be submitted void add_job(disk_io_job* j, bool user_add = true); void add_fence_job(piece_manager* storage, disk_io_job* j , bool user_add = true); // assumes l is locked (cache mutex). // writes out the blocks [start, end) (releases the lock // during the file operation) int flush_range(cached_piece_entry* p, int start, int end , jobqueue_t& completed_jobs, mutex::scoped_lock& l); // low level flush operations, used by flush_range int build_iovec(cached_piece_entry* pe, int start, int end , file::iovec_t* iov, int* flushing, int block_base_index = 0); void flush_iovec(cached_piece_entry* pe, file::iovec_t const* iov, int const* flushing , int num_blocks, storage_error& error); void iovec_flushed(cached_piece_entry* pe , int* flushing, int num_blocks, int block_offset , storage_error const& error , jobqueue_t& completed_jobs); // assumes l is locked (the cache mutex). // assumes pe->hash to be set. // If there are new blocks in piece 'pe' that have not been // hashed by the partial_hash object attached to this piece, // the piece will void kick_hasher(cached_piece_entry* pe, mutex::scoped_lock& l); // flags to pass in to flush_cache() enum flush_flags_t { // only flush read cache (this is cheap) flush_read_cache = 1, // flush read cache, and write cache flush_write_cache = 2, // flush read cache, delete write cache without flushing to disk flush_delete_cache = 4, // expect all pieces for the storage to have been // cleared when flush_cache() returns. This is only // used for asserts and only applies for fence jobs flush_expect_clear = 8 }; void flush_cache(piece_manager* storage, boost::uint32_t flags, jobqueue_t& completed_jobs, mutex::scoped_lock& l); void flush_expired_write_blocks(jobqueue_t& completed_jobs, mutex::scoped_lock& l); void flush_piece(cached_piece_entry* pe, int flags, jobqueue_t& completed_jobs, mutex::scoped_lock& l); int try_flush_hashed(cached_piece_entry* p, int cont_blocks, jobqueue_t& completed_jobs, mutex::scoped_lock& l); void try_flush_write_blocks(int num, jobqueue_t& completed_jobs, mutex::scoped_lock& l); // used to batch reclaiming of blocks to once per cycle void commit_reclaimed_blocks(); void maybe_flush_write_blocks(); void execute_job(disk_io_job* j); void immediate_execute(); void abort_jobs(); // this is a counter which is atomically incremented // by each thread as it's started up, in order to // assign a unique id to each thread boost::atomic m_num_threads; // set to true once we start shutting down boost::atomic m_abort; // this is a counter of how many threads are currently running. // it's used to identify the last thread still running while // shutting down. This last thread is responsible for cleanup boost::atomic m_num_running_threads; // the actual threads running disk jobs std::vector > m_threads; aux::session_settings m_settings; // userdata pointer for the complete_job function, which // is posted to the network thread when jobs complete void* m_userdata; // the last time we expired write blocks from the cache time_point m_last_cache_expiry; time_point m_last_file_check; // LRU cache of open files file_pool m_file_pool; // disk cache mutable mutex m_cache_mutex; block_cache m_disk_cache; enum { cache_check_idle, cache_check_active, cache_check_reinvoke }; int m_cache_check_state; // total number of blocks in use by both the read // and the write cache. This is not supposed to // exceed m_cache_size counters& m_stats_counters; // average read time for cache misses (in microseconds) average_accumulator m_read_time; // average write time (in microseconds) average_accumulator m_write_time; // average hash time (in microseconds) average_accumulator m_hash_time; // average time to serve a job (any job) in microseconds average_accumulator m_job_time; // this is the main thread io_service. Callbacks are // posted on this in order to have them execute in // the main thread. io_service& m_ios; // used to wake up the disk IO thread when there are new // jobs on the job queue (m_queued_jobs) condition_variable m_job_cond; // mutex to protect the m_queued_jobs list mutable mutex m_job_mutex; // jobs queued for servicing jobqueue_t m_queued_jobs; // when using more than 2 threads, this is // used for just hashing jobs, just for threads // dedicated to do hashing condition_variable m_hash_job_cond; jobqueue_t m_queued_hash_jobs; // used to rate limit disk performance warnings time_point m_last_disk_aio_performance_warning; // jobs that are completed are put on this queue // whenever the queue size grows from 0 to 1 // a message is posted to the network thread, which // will then drain the queue and execute the jobs' // handler functions mutex m_completed_jobs_mutex; jobqueue_t m_completed_jobs; // these are blocks that have been returned by the main thread // but they haven't been freed yet. This is used to batch // reclaiming of blocks, to only need one mutex lock per cycle std::vector m_blocks_to_reclaim; // when this is true, there is an outstanding message in the // message queue that will reclaim all blocks in // m_blocks_to_reclaim, there's no need to send another one bool m_outstanding_reclaim_message; #if TORRENT_USE_ASSERTS int m_magic; #endif }; } #endif