From f1b8582a95955ac428038869913964edfdc7249d Mon Sep 17 00:00:00 2001 From: Arvid Norberg Date: Sun, 20 Jan 2013 23:21:53 +0000 Subject: [PATCH] add gen_todo.py script. include todo.html and mark up some todos in the code with priority --- docs/hacking.rst | 5 + docs/todo.html | 3599 ++++++++++++++++++++ gen_todo.py | 110 + include/libtorrent/kademlia/find_data.hpp | 3 +- include/libtorrent/kademlia/node_entry.hpp | 4 +- include/libtorrent/torrent.hpp | 5 +- include/libtorrent/torrent_handle.hpp | 4 - src/alert.cpp | 8 +- src/file.cpp | 1 - src/http_seed_connection.cpp | 2 +- src/http_tracker_connection.cpp | 2 +- src/kademlia/rpc_manager.cpp | 3 +- src/peer_connection.cpp | 2 +- src/session_impl.cpp | 7 +- src/storage.cpp | 7 +- src/torrent.cpp | 9 +- src/udp_tracker_connection.cpp | 6 +- src/utp_stream.cpp | 5 +- 18 files changed, 3752 insertions(+), 30 deletions(-) create mode 100644 docs/todo.html create mode 100644 gen_todo.py diff --git a/docs/hacking.rst b/docs/hacking.rst index 3828dd824..b1e0cec14 100644 --- a/docs/hacking.rst +++ b/docs/hacking.rst @@ -9,6 +9,11 @@ libtorrent hacking :depth: 2 :backlinks: none +This describe some of the internals of libtorrent. If you're looking for +something to contribute, please take a look at the `todo list`_. + +.. _`todo list`: todo.html + terminology =========== diff --git a/docs/todo.html b/docs/todo.html new file mode 100644 index 000000000..132069bcc --- /dev/null +++ b/docs/todo.html @@ -0,0 +1,3599 @@ + + + + +

libtorrent todo-list

+
relevance 3src/alert.cpp:370change this to use a timed wait on a condition variable problem is, that's not necessarily portable. But it should be used where available. This implementation can be left the way it is for more primitive platforms
relevance 3src/storage.cpp:989use binary search to find the file entry
relevance 3src/storage.cpp:1095use binary search to find the file entry
relevance 3src/utp_stream.cpp:412remove the read timeout concept. This should not be necessary
relevance 3src/utp_stream.cpp:415remove the write timeout concept. This should not be necessary
relevance 3src/kademlia/rpc_manager.cpp:36remove this dependency by having the dht observer have its own flags
relevance 3include/libtorrent/kademlia/find_data.hpp:60rename this class to find_peers, since that's what it does find_data is an unnecessarily generic name
relevance 2src/torrent.cpp:5790pass in ec along with the alert
relevance 2src/utp_stream.cpp:617support the option to turn it off
relevance 2include/libtorrent/torrent.hpp:1038this should be a deque, since time critical pieces are expected to be popped in the same order as they are sorted. The expectation is that new items are pushed back and items are popped from the front
relevance 2include/libtorrent/kademlia/node_entry.hpp:92replace with a union of address_v4 and address_v6 to not waste space. This struct is instantiated hundreds of times for the routing table
relevance 1src/http_seed_connection.cpp:120in chunked encoding mode, this assert won't hold. the chunk headers should be subtracted from the receive_buffer_size
relevance 1src/peer_connection.cpp:2488peers should really be corked/uncorked outside of all completed disk operations
relevance 1src/session_impl.cpp:6135we only need to do this if our global IPv4 address has changed since the DHT (currently) only supports IPv4. Since restarting the DHT is kind of expensive, it would be nice to not do it unnecessarily
relevance 1src/torrent.cpp:5294save the send_stats state instead of throwing them away it may pose an issue when downgrading though
relevance 0src/bt_peer_connection.cpp:660this could be optimized using knuth morris pratt
relevance 0src/bt_peer_connection.cpp:1755don't trust this blindly
relevance 0src/bt_peer_connection.cpp:2069if we're finished, send upload_only message
relevance 0src/bt_peer_connection.cpp:3308move the erasing into the loop above remove all payload ranges that has been sent
relevance 0src/file.cpp:1205is there any way to pre-fetch data from a file on windows?
relevance 0src/http_tracker_connection.cpp:99support authentication (i.e. user name and password) in the URL
relevance 0src/i2p_stream.cpp:172move this to proxy_base and use it in all proxies
relevance 0src/packet_buffer.cpp:176use compare_less_wrap for this comparison as well
relevance 0src/peer_connection.cpp:2651this might need something more so that once we have the metadata we can construct a full bitfield
relevance 0src/peer_connection.cpp:2782sort the allowed fast set in priority order
relevance 0src/peer_connection.cpp:3892we should probably just send a HAVE_ALL here
relevance 0src/peer_connection.cpp:4475peers should really be corked/uncorked outside of all completed disk operations
relevance 0src/policy.cpp:857only allow _one_ connection to use this override at a time
relevance 0src/policy.cpp:1889how do we deal with our external address changing? Pass in a force-update maybe? and keep a version number in policy
relevance 0src/session_impl.cpp:1887recalculate all connect candidates for all torrents
relevance 0src/session_impl.cpp:4267allow extensions to sort torrents for queuing
relevance 0src/session_impl.cpp:4423use a lower limit than m_settings.connections_limit to allocate the to 10% or so of connection slots for incoming connections
relevance 0src/session_impl.cpp:4457make this bias configurable
relevance 0src/session_impl.cpp:4458also take average_peers into account, to create a bias for downloading torrents with < average peers
relevance 0src/session_impl.cpp:4602make configurable
relevance 0src/session_impl.cpp:4616make configurable
relevance 0src/session_impl.cpp:5113find by url?
relevance 0src/session_impl.cpp:5469report the proper address of the router
relevance 0src/session_impl.cpp:5673report errors as alerts
relevance 0src/storage.cpp:325if the read fails, set error and exit immediately
relevance 0src/storage.cpp:358if the read fails, set error and exit immediately
relevance 0src/storage.cpp:623make this more generic to not just work if files have been renamed, but also if they have been merged into a single file for instance maybe use the same format as .torrent files and reuse some code from torrent_info
relevance 0src/storage.cpp:1208what if file_base is used to merge several virtual files into a single physical file? We should probably disable this if file_base is used. This is not a widely used feature though
relevance 0src/torrent.cpp:1112make this depend on the error and on the filesystem the files are being downloaded to. If the error is no_space_left_on_device and the filesystem doesn't support sparse files, only zero the priorities of the pieces that are at the tails of all files, leaving everything up to the highest written piece in each file
relevance 0src/torrent.cpp:1505filter out peers that are disconnecting
relevance 0src/torrent.cpp:2289this pattern is repeated in a few places. Factor this into a function and generalize the concept of a torrent having a dedicated listen port
relevance 0src/torrent.cpp:5028make this more generic to not just work if files have been renamed, but also if they have been merged into a single file for instance maybe use the same format as .torrent files and reuse some code from torrent_info The mapped_files needs to be read both in the network thread and in the disk thread, since they both have their own mapped files structures which are kept in sync
relevance 0src/torrent.cpp:5164if this is a merkle torrent and we can't restore the tree, we need to wipe all the bits in the have array, but not necessarily we might want to do a full check to see if we have all the pieces
relevance 0src/torrent.cpp:5351make this more generic to not just work if files have been renamed, but also if they have been merged into a single file for instance
relevance 0src/torrent.cpp:5928ideally, we would disconnect the oldest connection i.e. the one that has waited the longest to connect.
relevance 0src/torrent.cpp:6005if peer is a really good peer, maybe we shouldn't disconnect it
relevance 0src/torrent.cpp:6180should disconnect all peers that have the pieces we have not just seeds
relevance 0src/torrent.cpp:7820go through the pieces we have and count the total number of downloaders we have. Only count peers that are interested in us since some peers might not send have messages for pieces we have it num_interested == 0, we need to pick a new piece
relevance 0src/torrent.cpp:8047if there's been long enough since we requested something from this piece, request one of the backup blocks (the one with the least number of requests to it) and update the last request timestamp
relevance 0src/torrent.cpp:8718with some response codes, we should just consider the tracker as a failure and not retry it anymore
relevance 0src/torrent_info.cpp:187should this take a char const*?
relevance 0src/torrent_info.cpp:366this logic should be a separate step done once the torrent is loaded, and the original filenames should be preserved!
relevance 0src/torrent_info.cpp:387once the filename renaming is removed from here this check can be removed as well
relevance 0src/udp_tracker_connection.cpp:548don't use a string here. The problem is that some trackers will respond with actual strings. Especially i2p trackers
relevance 0src/utp_stream.cpp:1846we might want to do something else here as well, to resend the packet immediately without it being an MTU probe
relevance 0src/kademlia/dht_tracker.cpp:641fix this stats logging ++m_replies_sent[e["r"]]; m_replies_bytes_sent[e["r"]] += int(m_send_buf.size());
relevance 0src/kademlia/node.cpp:63configurable?
relevance 0src/kademlia/node.cpp:690find_node should write directly to the response entry
relevance 0src/kademlia/rpc_manager.cpp:439don't call short_timeout() again if we've already called it once
relevance 0include/libtorrent/config.hpp:283Make this count Unicode characters instead of bytes on windows
relevance 0include/libtorrent/ip_voter.hpp:100instead, have one instance per possible subnet, global IPv4, global IPv6, loopback, 192.168.x.x, 10.x.x.x, etc.
relevance 0include/libtorrent/proxy_base.hpp:152it would be nice to remember the bind port and bind once we know where the proxy is m_sock.bind(endpoint, ec);
relevance 0include/libtorrent/torrent_info.hpp:448these strings could be lazy_entry* to save memory
relevance 0include/libtorrent/udp_socket.hpp:195move this debug facility into a base class. It's used in a lot of places
relevance 0include/libtorrent/utp_stream.hpp:350implement
relevance 0include/libtorrent/web_peer_connection.hpp:127if we make this be a disk_buffer_holder instead we would save a copy sometimes use allocate_disk_receive_buffer and release_disk_receive_buffer
relevance 0include/libtorrent/kademlia/dht_tracker.hpp:79take a udp_socket_interface here instead. Move udp_socket_interface down into libtorrent core
\ No newline at end of file diff --git a/gen_todo.py b/gen_todo.py new file mode 100644 index 000000000..71338c952 --- /dev/null +++ b/gen_todo.py @@ -0,0 +1,110 @@ +import glob +import os + +paths = ['src/*.cpp', 'src/kademlia/*.cpp', 'include/libtorrent/*.hpp', 'include/libtorrent/kademlia/*.hpp', 'include/libtorrent/aux_/*.hpp', 'include/libtorrent/extensions/*.hpp'] + +os.system('ctags %s 2>/dev/null' % ' '.join(paths)) + +files = [] + +for p in paths: + files.extend(glob.glob(p)) + +items = [] + +# keeps 20 non-comment lines, used to get more context around +# todo-items +context = [] + +for f in files: + h = open(f) + + state = '' + line_no = 0 + context_lines = 0 + + for l in h: + line_no += 1 + line = l.strip() + if 'TODO:' in line and line.startswith('//'): + line = line.split('TODO:')[1].strip() + state = 'todo' + items.append({}) + items[-1]['location'] = '%s:%d' % (f, line_no) + items[-1]['priority'] = 0 + if line[0] in '0123456789': + items[-1]['priority'] = int(line[0]) + line = line[1:].strip() + items[-1]['todo'] = line + continue + + if state == '': + context.append(l) + if len(context) > 20: context.pop(0) + continue + + if state == 'todo': + if line.strip().startswith('//'): + items[-1]['todo'] += '\n' + items[-1]['todo'] += line[2:].strip() + else: + state = 'context' + items[-1]['context'] = ''.join(context) + '
' + l + '
'; + context_lines = 1 + continue + + if state == 'context': + items[-1]['context'] += l + context_lines += 1 + if context_lines > 30: state = '' + + h.close() + +items.sort(key = lambda x: x['priority'], reverse = True) + +#for i in items: +# print '\n\n', i['todo'], '\n' +# print i['location'], '\n' +# print 'prio: ', i['priority'], '\n' +# if 'context' in i: +# print i['context'], '\n' + +out = open('docs/todo.html', 'w+') +out.write(''' + + + +

libtorrent todo-list

+''') + +index = 0 +for i in items: + if not 'context' in i: i['context'] = '' + out.write('' \ + % (i['priority'], index, i['location'], i['todo'].replace('\n', ' '))) + + out.write('' \ + % (index, i['todo'], i['location'], i['context'])) + index += 1 + +out.write('
relevance %d%s%s
') +out.close() + diff --git a/include/libtorrent/kademlia/find_data.hpp b/include/libtorrent/kademlia/find_data.hpp index b892df17f..3a319f84f 100644 --- a/include/libtorrent/kademlia/find_data.hpp +++ b/include/libtorrent/kademlia/find_data.hpp @@ -57,7 +57,8 @@ class node_impl; // -------- find data ----------- -//TODO: rename this to find_peers +//TODO: 3 rename this class to find_peers, since that's what it does +// find_data is an unnecessarily generic name class find_data : public traversal_algorithm { public: diff --git a/include/libtorrent/kademlia/node_entry.hpp b/include/libtorrent/kademlia/node_entry.hpp index e800c931b..5dcdf463b 100644 --- a/include/libtorrent/kademlia/node_entry.hpp +++ b/include/libtorrent/kademlia/node_entry.hpp @@ -89,7 +89,9 @@ struct node_entry else rtt = int(rtt) / 3 + int(new_rtt) * 2 / 3; } - // TODO: replace with a union of address_v4 and address_v6 + // TODO: 2 replace with a union of address_v4 and address_v6 + // to not waste space. This struct is instantiated hundreds of times + // for the routing table address addr; boost::uint16_t port; // the number of times this node has failed to diff --git a/include/libtorrent/torrent.hpp b/include/libtorrent/torrent.hpp index 0042bd5d7..8dee396cd 100644 --- a/include/libtorrent/torrent.hpp +++ b/include/libtorrent/torrent.hpp @@ -1035,7 +1035,10 @@ namespace libtorrent }; // this list is sorted by time_critical_piece::deadline - // TODO: this should be a deque + // TODO: 2 this should be a deque, since time critical + // pieces are expected to be popped in the same order + // as they are sorted. The expectation is that new items + // are pushed back and items are popped from the front std::list m_time_critical_pieces; std::string m_trackerid; diff --git a/include/libtorrent/torrent_handle.hpp b/include/libtorrent/torrent_handle.hpp index b93129a34..0de09f956 100644 --- a/include/libtorrent/torrent_handle.hpp +++ b/include/libtorrent/torrent_handle.hpp @@ -371,10 +371,6 @@ namespace libtorrent // when it was added. std::string name() const; - // TODO: add a feature where the user can tell the torrent - // to finish all pieces currently in the pipeline, and then - // abort the torrent. - void set_upload_limit(int limit) const; int upload_limit() const; void set_download_limit(int limit) const; diff --git a/src/alert.cpp b/src/alert.cpp index 70e86b036..81be6e25a 100644 --- a/src/alert.cpp +++ b/src/alert.cpp @@ -361,14 +361,16 @@ namespace libtorrent { // system_time end = get_system_time() // + boost::posix_time::microseconds(total_microseconds(max_wait)); - // apparently this call can be interrupted - // prematurely if there are other signals + // this call can be interrupted prematurely by other signals // while (m_condition.timed_wait(lock, end)) // if (!m_alerts.empty()) return m_alerts.front(); ptime start = time_now_hires(); - // TODO: change this to use an asio timer instead + // TODO: 3 change this to use a timed wait on a condition variable + // problem is, that's not necessarily portable. But it should be used + // where available. This implementation can be left the way it is for + // more primitive platforms while (m_alerts.empty()) { lock.unlock(); diff --git a/src/file.cpp b/src/file.cpp index 613186197..3c4aa1baf 100644 --- a/src/file.cpp +++ b/src/file.cpp @@ -338,7 +338,6 @@ namespace libtorrent // rely on default umask to filter x and w permissions // for group and others - // TODO: copy the mode from the source file int permissions = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; diff --git a/src/http_seed_connection.cpp b/src/http_seed_connection.cpp index 8e57e3f94..7506fa6c7 100644 --- a/src/http_seed_connection.cpp +++ b/src/http_seed_connection.cpp @@ -117,7 +117,7 @@ namespace libtorrent else { int receive_buffer_size = receive_buffer().left() - m_parser.body_start(); - // TODO: in chunked encoding mode, this assert won't hold + // TODO: 1 in chunked encoding mode, this assert won't hold. // the chunk headers should be subtracted from the receive_buffer_size TORRENT_ASSERT(receive_buffer_size <= t->block_size()); ret.bytes_downloaded = t->block_size() - receive_buffer_size; diff --git a/src/http_tracker_connection.cpp b/src/http_tracker_connection.cpp index d3660ef34..1fead9d00 100644 --- a/src/http_tracker_connection.cpp +++ b/src/http_tracker_connection.cpp @@ -96,7 +96,7 @@ namespace libtorrent void http_tracker_connection::start() { - // TODO: authentication + // TODO: 0 support authentication (i.e. user name and password) in the URL std::string url = tracker_req().url; if (tracker_req().kind == tracker_request::scrape_request) diff --git a/src/kademlia/rpc_manager.cpp b/src/kademlia/rpc_manager.cpp index f3684c852..01a8a921d 100644 --- a/src/kademlia/rpc_manager.cpp +++ b/src/kademlia/rpc_manager.cpp @@ -33,7 +33,8 @@ POSSIBILITY OF SUCH DAMAGE. #include "libtorrent/pch.hpp" #include "libtorrent/socket.hpp" -// TODO: it would be nice to not have this dependency here +// TODO: 3 remove this dependency by having the dht observer +// have its own flags #include "libtorrent/aux_/session_impl.hpp" #include diff --git a/src/peer_connection.cpp b/src/peer_connection.cpp index ba786de18..51b0b478e 100644 --- a/src/peer_connection.cpp +++ b/src/peer_connection.cpp @@ -2485,7 +2485,7 @@ namespace libtorrent TORRENT_ASSERT(m_ses.is_network_thread()); // flush send buffer at the end of this scope - // TODO: peers should really be corked/uncorked outside of + // TODO: 1 peers should really be corked/uncorked outside of // all completed disk operations cork _c(*this); diff --git a/src/session_impl.cpp b/src/session_impl.cpp index 5c425fc30..86bc09b7e 100644 --- a/src/session_impl.cpp +++ b/src/session_impl.cpp @@ -4082,7 +4082,6 @@ retry: if (m_next_dht_torrent == m_torrents.end()) m_next_dht_torrent = m_torrents.begin(); m_next_dht_torrent->second->dht_announce(); - // TODO: make a list for torrents that want to be announced on the DHT ++m_next_dht_torrent; if (m_next_dht_torrent == m_torrents.end()) m_next_dht_torrent = m_torrents.begin(); @@ -4265,7 +4264,7 @@ retry: bool handled_by_extension = false; #ifndef TORRENT_DISABLE_EXTENSIONS - // TODO: allow extensions to sort torrents for queuing + // TODO: 0 allow extensions to sort torrents for queuing #endif if (!handled_by_extension) @@ -6133,7 +6132,9 @@ retry: // since we have a new external IP now, we need to // restart the DHT with a new node ID #ifndef TORRENT_DISABLE_DHT - // TODO: we only need to do this if our global IPv4 address has changed + // TODO: 1 we only need to do this if our global IPv4 address has changed + // since the DHT (currently) only supports IPv4. Since restarting the DHT + // is kind of expensive, it would be nice to not do it unnecessarily if (m_dht) { entry s = m_dht->state(); diff --git a/src/storage.cpp b/src/storage.cpp index 7b29833ee..4e90881c0 100644 --- a/src/storage.cpp +++ b/src/storage.cpp @@ -986,7 +986,7 @@ ret: size_type file_offset = start; file_storage::iterator file_iter; - // TODO: use binary search! + // TODO: 3 use binary search to find the file entry for (file_iter = files().begin();;) { if (file_offset < file_iter->size) @@ -1092,7 +1092,7 @@ ret: size_type file_offset = start; file_storage::iterator file_iter; - // TODO: use binary search! + // TODO: 3 use binary search to find the file entry for (file_iter = files().begin();;) { if (file_offset < file_iter->size) @@ -1206,7 +1206,8 @@ ret: // a specific alignment for writes. Make sure to truncate the size // TODO: what if file_base is used to merge several virtual files - // into a single physical file? + // into a single physical file? We should probably disable this + // if file_base is used. This is not a widely used feature though file_handle->set_size(file_iter->size, ec); } } diff --git a/src/torrent.cpp b/src/torrent.cpp index 2def77bc9..214abd80e 100644 --- a/src/torrent.cpp +++ b/src/torrent.cpp @@ -4780,13 +4780,13 @@ namespace libtorrent if (web->type == web_seed_entry::url_seed) { c = new (std::nothrow) web_peer_connection( - m_ses, shared_from_this(), s, a, web->url, &web->peer_info, // TODO: pass in web + m_ses, shared_from_this(), s, a, web->url, &web->peer_info, web->auth, web->extra_headers); } else if (web->type == web_seed_entry::http_seed) { c = new (std::nothrow) http_seed_connection( - m_ses, shared_from_this(), s, a, web->url, &web->peer_info, // TODO: pass in web + m_ses, shared_from_this(), s, a, web->url, &web->peer_info, web->auth, web->extra_headers); } if (!c) return; @@ -5291,7 +5291,8 @@ namespace libtorrent , end(m_trackers.end()); i != end; ++i) { // don't save trackers we can't trust - // TODO: save the send_stats state instead + // TODO: 1 save the send_stats state instead of throwing them away + // it may pose an issue when downgrading though if (i->send_stats == false) continue; if (i->tier == tier) { @@ -5786,7 +5787,7 @@ namespace libtorrent // failed to parse it. Pause the torrent if (alerts().should_post()) { - // TODO: pass in ec along with the alert + // TODO: 2 pass in ec along with the alert alerts().post_alert(metadata_failed_alert(get_handle())); } set_error(errors::invalid_swarm_metadata, ""); diff --git a/src/udp_tracker_connection.cpp b/src/udp_tracker_connection.cpp index eaa8539c3..ba41d4a42 100644 --- a/src/udp_tracker_connection.cpp +++ b/src/udp_tracker_connection.cpp @@ -545,9 +545,9 @@ namespace libtorrent std::vector peer_list; for (int i = 0; i < num_peers; ++i) { - // TODO: don't use a string here. The problem is that - // some trackers will respond with actual strings. - // Especially i2p trackers + // TODO: it would be more efficient to not use a string here. + // however, the problem is that some trackers will respond + // with actual strings. For example i2p trackers peer_entry e; char ip_string[100]; unsigned int a = detail::read_uint8(buf); diff --git a/src/utp_stream.cpp b/src/utp_stream.cpp index fc92659f4..463f11b49 100644 --- a/src/utp_stream.cpp +++ b/src/utp_stream.cpp @@ -409,9 +409,10 @@ struct utp_socket_impl // timers when we should trigger the read and // write callbacks (unless the buffers fill up // before) + // TODO: 3 remove the read timeout concept. This should not be necessary ptime m_read_timeout; - // TODO: remove the write timeout concept, and maybe even the read timeout + // TODO: 3 remove the write timeout concept. This should not be necessary ptime m_write_timeout; // the time when the last packet we sent times out. Including re-sends. @@ -613,7 +614,7 @@ struct utp_socket_impl bool m_attached:1; // this is true if nagle is enabled (which it is by default) - // TODO: support the option to turn it off + // TODO: 2 support the option to turn it off bool m_nagle:1; // this is true while the socket is in slow start mode. It's