From cb9c3cb37d348e4d50fa46bab89bea4a5b11de1d Mon Sep 17 00:00:00 2001 From: Arvid Norberg Date: Thu, 21 May 2009 16:15:05 +0000 Subject: [PATCH] memory optimization for checking torrents --- ChangeLog | 4 + docs/manual.rst | 24 ------ include/libtorrent/error_code.hpp | 1 + include/libtorrent/storage.hpp | 15 ++-- src/disk_io_thread.cpp | 25 +++++- src/error_code.cpp | 1 + src/storage.cpp | 130 +++++++++++++++--------------- test/setup_transfer.cpp | 2 +- test/test_swarm.cpp | 2 +- 9 files changed, 106 insertions(+), 98 deletions(-) diff --git a/ChangeLog b/ChangeLog index 919cba4a8..b420fe395 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ + * applied temporary memory storage optimization to when checking + a torrent as well + * removed hash_for_slot() from storage_interface. It is now implemented + by using the readv() function from the storage implementation * improved IPv6 support by announcing twice when necessary * added feature to set a separate global rate limit for local peers * added preset settings for low memory environments and seed machines diff --git a/docs/manual.rst b/docs/manual.rst index de12569d9..c13818982 100644 --- a/docs/manual.rst +++ b/docs/manual.rst @@ -5312,7 +5312,6 @@ The interface looks like this:: virtual bool move_slot(int src_slot, int dst_slot) = 0; virtual bool swap_slots(int slot1, int slot2) = 0; virtual bool swap_slots3(int slot1, int slot2, int slot3) = 0; - virtual sha1_hash hash_for_slot(int slot, partial_hash& h, int piece_size) = 0; virtual bool rename_file(int file, std::string const& new_name) = 0; virtual bool release_files() = 0; virtual bool delete_files() = 0; @@ -5489,29 +5488,6 @@ This is only used in compact mode. Returning ``true`` indicates an error occurred. -hash_for_slot() ---------------- - - :: - - sha1_hash hash_for_slot(int slot, partial_hash& h, int piece_size) = 0; - -The function should read the remaining bytes of the slot and hash it with the -sha-1 state in ``partion_hash``. The ``partial_hash`` struct looks like this:: - - struct partial_hash - { - partial_hash(); - int offset; - hasher h; - }; - -``offset`` is the number of bytes in the slot that has already been hashed, and -``h`` is the sha-1 state of that hash. ``piece_size`` is the size of the piece -that is stored in the given slot. - -The function should return the hash of the piece stored in the slot. - rename_file() ------------- diff --git a/include/libtorrent/error_code.hpp b/include/libtorrent/error_code.hpp index 288a4588f..8f4d3a458 100644 --- a/include/libtorrent/error_code.hpp +++ b/include/libtorrent/error_code.hpp @@ -74,6 +74,7 @@ namespace libtorrent invalid_torrent_handle, invalid_entry_type, missing_info_hash_in_uri, + file_too_short, }; } diff --git a/include/libtorrent/storage.hpp b/include/libtorrent/storage.hpp index 4964f3811..9cc1dfeb9 100644 --- a/include/libtorrent/storage.hpp +++ b/include/libtorrent/storage.hpp @@ -154,9 +154,6 @@ namespace libtorrent // in slot3 and the data in slot3 in slot1 virtual bool swap_slots3(int slot1, int slot2, int slot3) = 0; - // returns the sha1-hash for the data at the given slot - virtual sha1_hash hash_for_slot(int slot, partial_hash& h, int piece_size) = 0; - // this will close all open files that are opened for // writing. This is called when a torrent has finished // downloading. @@ -319,6 +316,12 @@ namespace libtorrent bool allocate_slots(int num_slots, bool abort_on_disk = false); + // updates the ph.h hasher object with the data at the given slot + // and optionally a 'small hash' as well, the hash for + // the partial slot. Returns the number of bytes read + int hash_for_slot(int slot, partial_hash& h, int piece_size + , int small_piece_size = 0, sha1_hash* small_hash = 0); + int read_impl( file::iovec_t* bufs , int piece_index @@ -337,7 +340,8 @@ namespace libtorrent // -1=error 0=ok >0=skip this many pieces int check_one_piece(int& have_piece); int identify_data( - char const* piece_data + sha1_hash const& large_hash + , sha1_hash const& small_hash , int current_slot); void switch_to_full_mode(); @@ -419,9 +423,6 @@ namespace libtorrent // storage (osed when remapping files) storage_constructor_type m_storage_constructor; - // temporary buffer used while checking - disk_buffer_holder m_piece_data; - // this maps a piece hash to piece index. It will be // build the first time it is used (to save time if it // isn't needed) diff --git a/src/disk_io_thread.cpp b/src/disk_io_thread.cpp index 65f2879fe..84561e4e2 100644 --- a/src/disk_io_thread.cpp +++ b/src/disk_io_thread.cpp @@ -659,8 +659,14 @@ namespace libtorrent file::iovec_t b = { buf.get(), buffer_size }; ret = p.storage->read_impl(&b, p.piece, start_block * m_block_size, 1); l.lock(); - TORRENT_ASSERT(ret == buffer_size || p.storage->error()); if (p.storage->error()) { return -1; } + if (ret != buffer_size) + { + // this means the file wasn't big enough for this read + p.storage->get_storage_impl()->set_error("" + , error_code(errors::file_too_short, libtorrent_category)); + return -1; + } ++m_cache_stats.reads; } @@ -692,8 +698,14 @@ namespace libtorrent l.unlock(); ret = p.storage->read_impl(iov, p.piece, start_block * m_block_size, iov_counter); l.lock(); - TORRENT_ASSERT(ret == buffer_size || p.storage->error()); if (p.storage->error()) { return -1; } + if (ret != buffer_size) + { + // this means the file wasn't big enough for this read + p.storage->get_storage_impl()->set_error("" + , error_code(errors::file_too_short, libtorrent_category)); + return -1; + } ++m_cache_stats.reads; } @@ -1314,6 +1326,15 @@ namespace libtorrent test_error(j); break; } + if (ret != j.storage->m_files.piece_size(j.piece) - j.offset) + { + // this means the file wasn't big enough for this read + j.error = error_code(errors::file_too_short, libtorrent_category); + j.error_file.clear(); + j.str = j.error.message(); + ret = -1; + break; + } ++m_cache_stats.blocks_read; } read_holder.release(); diff --git a/src/error_code.cpp b/src/error_code.cpp index c443f18e6..a300dc0e8 100644 --- a/src/error_code.cpp +++ b/src/error_code.cpp @@ -70,6 +70,7 @@ namespace libtorrent "invalid torrent handle used", "invalid type requested from entry", "missing info-hash from URI", + "file too short", }; if (ev < 0 || ev >= sizeof(msgs)/sizeof(msgs[0])) return "Unknown error"; diff --git a/src/storage.cpp b/src/storage.cpp index 746950067..32be9f58c 100644 --- a/src/storage.cpp +++ b/src/storage.cpp @@ -432,7 +432,6 @@ namespace libtorrent bool swap_slots3(int slot1, int slot2, int slot3); bool verify_resume_data(lazy_entry const& rd, std::string& error); bool write_resume_data(entry& rd) const; - sha1_hash hash_for_slot(int slot, partial_hash& ph, int piece_size); // this identifies a read or write operation // so that storage::readwrite() knows what to @@ -476,14 +475,16 @@ namespace libtorrent bool m_allocate_files; }; - sha1_hash storage::hash_for_slot(int slot, partial_hash& ph, int piece_size) + int piece_manager::hash_for_slot(int slot, partial_hash& ph, int piece_size + , int small_piece_size, sha1_hash* small_hash) { TORRENT_ASSERT(!error()); + int num_read = 0; int slot_size = piece_size - ph.offset; if (slot_size > 0) { int block_size = 16 * 1024; - if (disk_pool()) block_size = disk_pool()->block_size(); + if (m_storage->disk_pool()) block_size = m_storage->disk_pool()->block_size(); int size = slot_size; int num_blocks = (size + block_size - 1) / block_size; @@ -492,40 +493,68 @@ namespace libtorrent // and then hash it. When optimizing for memory usage, we read // one block at a time and hash it. This ends up only using a // single buffer - if (settings().optimize_hashing_for_speed) + if (m_storage->settings().optimize_hashing_for_speed) { file::iovec_t* bufs = TORRENT_ALLOCA(file::iovec_t, num_blocks); for (int i = 0; i < num_blocks; ++i) { - bufs[i].iov_base = disk_pool()->allocate_buffer("hash temp"); + bufs[i].iov_base = m_storage->disk_pool()->allocate_buffer("hash temp"); bufs[i].iov_len = (std::min)(block_size, size); size -= bufs[i].iov_len; } - readv(bufs, slot, ph.offset, num_blocks); + num_read = m_storage->readv(bufs, slot, ph.offset, num_blocks); for (int i = 0; i < num_blocks; ++i) { - ph.h.update((char const*)bufs[i].iov_base, bufs[i].iov_len); - disk_pool()->free_buffer((char*)bufs[i].iov_base); + if (small_hash && small_piece_size < block_size) + { + ph.h.update((char const*)bufs[i].iov_base, small_piece_size); + *small_hash = hasher(ph.h).final(); + small_hash = 0; // avoid this case again + ph.h.update((char const*)bufs[i].iov_base + small_piece_size + , bufs[i].iov_len - small_piece_size); + } + else + { + ph.h.update((char const*)bufs[i].iov_base, bufs[i].iov_len); + small_piece_size -= bufs[i].iov_len; + } + m_storage->disk_pool()->free_buffer((char*)bufs[i].iov_base); } } else { file::iovec_t buf; - disk_buffer_holder holder(*disk_pool(), disk_pool()->allocate_buffer("hash temp")); + disk_buffer_holder holder(*m_storage->disk_pool() + , m_storage->disk_pool()->allocate_buffer("hash temp")); buf.iov_base = holder.get(); for (int i = 0; i < num_blocks; ++i) { buf.iov_len = (std::min)(block_size, size); - readv(&buf, slot, ph.offset, 1); - ph.h.update((char const*)buf.iov_base, buf.iov_len); + int ret = m_storage->readv(&buf, slot, ph.offset, 1); + if (ret > 0) num_read += ret; + + if (small_hash && small_piece_size < block_size) + { + ph.h.update((char const*)buf.iov_base, small_piece_size); + *small_hash = hasher(ph.h).final(); + small_hash = 0; // avoid this case again + ph.h.update((char const*)buf.iov_base + small_piece_size + , buf.iov_len - small_piece_size); + } + else + { + ph.h.update((char const*)buf.iov_base, buf.iov_len); + small_piece_size -= buf.iov_len; + } + ph.offset += buf.iov_len; size -= buf.iov_len; } } - if (error()) return sha1_hash(0); + if (error()) return 0; } - return ph.h.final(); + return num_read; } bool storage::initialize(bool allocate_files) @@ -1333,16 +1362,8 @@ ret: } if (file_bytes_left != bytes_transferred) - { - // the file was not big enough -#ifdef TORRENT_WINDOWS - ec = error_code(ERROR_HANDLE_EOF, get_system_category()); -#else - ec = error_code(EIO, get_posix_category()); -#endif - set_error(m_save_path / file_iter->path, ec); return bytes_transferred; - } + advance_bufs(current_buf, bytes_transferred); TORRENT_ASSERT(count_bufs(current_buf, bytes_left - file_bytes_left) <= num_bufs); } @@ -1440,7 +1461,6 @@ ret: , m_scratch_buffer2(io, 0) , m_scratch_piece(-1) , m_storage_constructor(sc) - , m_piece_data(io, 0) , m_io_thread(io) , m_torrent(torrent) { @@ -1631,7 +1651,9 @@ ret: int slot = slot_for(piece); TORRENT_ASSERT(slot != has_no_slot); - return m_storage->hash_for_slot(slot, ph, m_files.piece_size(piece)); + hash_for_slot(slot, ph, m_files.piece_size(piece)); + if (m_storage->error()) return sha1_hash(0); + return ph.h.final(); } int piece_manager::move_storage_impl(fs::path const& save_path) @@ -1804,31 +1826,11 @@ ret: } int piece_manager::identify_data( - char const* piece_data + sha1_hash const& large_hash + , sha1_hash const& small_hash , int current_slot) { // INVARIANT_CHECK; - - const int piece_size = static_cast(m_files.piece_length()); - const int last_piece_size = static_cast(m_files.piece_size( - m_files.num_pieces() - 1)); - - // calculate a small digest, with the same - // size as the last piece. And a large digest - // which has the same size as a normal piece - hasher small_digest; - small_digest.update(piece_data, last_piece_size); - hasher large_digest(small_digest); - TORRENT_ASSERT(piece_size - last_piece_size >= 0); - if (piece_size - last_piece_size > 0) - { - large_digest.update( - piece_data + last_piece_size - , piece_size - last_piece_size); - } - sha1_hash large_hash = large_digest.final(); - sha1_hash small_hash = small_digest.final(); - typedef std::multimap::const_iterator map_iter; map_iter begin1; map_iter end1; @@ -2359,7 +2361,6 @@ ret: TORRENT_ASSERT(m_current_slot == m_files.num_pieces()); // clear the memory we've been using - m_piece_data.reset(); std::multimap().swap(m_hash_to_piece); if (m_storage_mode != storage_mode_compact) @@ -2433,18 +2434,24 @@ ret: m_hash_to_piece.insert(std::make_pair(m_info->hash_for_piece(i), i)); } - if (!m_piece_data) - { - int blocks_per_piece = (std::max)(m_files.piece_length() / m_io_thread.block_size(), 1); - m_piece_data.reset(m_io_thread.allocate_buffers(blocks_per_piece, "check piece") - , blocks_per_piece); - } - + partial_hash ph; + int num_read = 0; int piece_size = m_files.piece_size(m_current_slot); - int num_read = m_storage->read(m_piece_data.get() - , m_current_slot, 0, piece_size); + int small_piece_size = m_files.piece_size(m_files.num_pieces() - 1); + bool read_short = true; + sha1_hash small_hash; + if (piece_size == small_piece_size) + { + num_read = hash_for_slot(m_current_slot, ph, piece_size, 0, 0); + } + else + { + num_read = hash_for_slot(m_current_slot, ph, piece_size + , small_piece_size, &small_hash); + } + read_short = num_read != piece_size; - if (num_read < 0) + if (read_short) { if (m_storage->error() #ifdef TORRENT_WINDOWS @@ -2454,17 +2461,14 @@ ret: && m_storage->error() != error_code(ENOENT, get_posix_category())) #endif { - m_piece_data.reset(); return -1; } + // if the file is incomplete, skip the rest of it return skip_file(); } - // if the file is incomplete, skip the rest of it - if (num_read != piece_size) - return skip_file(); - - int piece_index = identify_data(m_piece_data.get(), m_current_slot); + sha1_hash large_hash = ph.h.final(); + int piece_index = identify_data(large_hash, small_hash, m_current_slot); if (piece_index >= 0) have_piece = piece_index; diff --git a/test/setup_transfer.cpp b/test/setup_transfer.cpp index c8ad37457..b59429873 100644 --- a/test/setup_transfer.cpp +++ b/test/setup_transfer.cpp @@ -299,7 +299,7 @@ setup_transfer(session* ses1, session* ses2, session* ses3 } char ih_hex[41]; to_hex((char const*)&t->info_hash()[0], 20, ih_hex); - std::cerr << "generated torrent: " << ih_hex << std::endl; + std::cerr << "generated torrent: " << ih_hex << " ./tmp1" << suffix << "/temporary" << std::endl; } else { diff --git a/test/test_swarm.cpp b/test/test_swarm.cpp index 4e0941450..8bc5a4a5e 100644 --- a/test/test_swarm.cpp +++ b/test/test_swarm.cpp @@ -96,7 +96,7 @@ void test_swarm(bool super_seeding = false, bool strict = false, bool seed_mode p.seed_mode = seed_mode; // test using piece sizes smaller than 16kB boost::tie(tor1, tor2, tor3) = setup_transfer(&ses1, &ses2, &ses3, true - , false, true, "_swarm", 8 * 1024, 0, super_seeding, &p); + , false, true, "_swarm", 32 * 1024, 0, super_seeding, &p); if (time_critical) {