premiere-libtorrent/src/torrent_info.cpp

746 lines
19 KiB
C++

/*
Copyright (c) 2003-2008, Arvid Norberg
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the distribution.
* Neither the name of the author nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include "libtorrent/pch.hpp"
#include <ctime>
#include <iostream>
#include <fstream>
#include <iomanip>
#include <iterator>
#include <algorithm>
#include <set>
#ifdef _MSC_VER
#pragma warning(push, 1)
#endif
#include <boost/lexical_cast.hpp>
#include <boost/filesystem/path.hpp>
#include <boost/filesystem.hpp>
#include <boost/bind.hpp>
#ifdef _MSC_VER
#pragma warning(pop)
#endif
#include "libtorrent/torrent_info.hpp"
#include "libtorrent/bencode.hpp"
#include "libtorrent/hasher.hpp"
#include "libtorrent/entry.hpp"
#include "libtorrent/file.hpp"
#include "libtorrent/utf8.hpp"
namespace gr = boost::gregorian;
using namespace libtorrent;
namespace
{
namespace fs = boost::filesystem;
void convert_to_utf8(std::string& str, unsigned char chr)
{
str += 0xc0 | ((chr & 0xff) >> 6);
str += 0x80 | (chr & 0x3f);
}
bool verify_encoding(std::string& target)
{
std::string tmp_path;
bool valid_encoding = true;
for (std::string::iterator i = target.begin()
, end(target.end()); i != end; ++i)
{
// valid ascii-character
if ((*i & 0x80) == 0)
{
tmp_path += *i;
continue;
}
if (std::distance(i, end) < 2)
{
convert_to_utf8(tmp_path, *i);
valid_encoding = false;
continue;
}
// valid 2-byte utf-8 character
if ((i[0] & 0xe0) == 0xc0
&& (i[1] & 0xc0) == 0x80)
{
tmp_path += i[0];
tmp_path += i[1];
i += 1;
continue;
}
if (std::distance(i, end) < 3)
{
convert_to_utf8(tmp_path, *i);
valid_encoding = false;
continue;
}
// valid 3-byte utf-8 character
if ((i[0] & 0xf0) == 0xe0
&& (i[1] & 0xc0) == 0x80
&& (i[2] & 0xc0) == 0x80)
{
tmp_path += i[0];
tmp_path += i[1];
tmp_path += i[2];
i += 2;
continue;
}
if (std::distance(i, end) < 4)
{
convert_to_utf8(tmp_path, *i);
valid_encoding = false;
continue;
}
// valid 4-byte utf-8 character
if ((i[0] & 0xf0) == 0xe0
&& (i[1] & 0xc0) == 0x80
&& (i[2] & 0xc0) == 0x80
&& (i[3] & 0xc0) == 0x80)
{
tmp_path += i[0];
tmp_path += i[1];
tmp_path += i[2];
tmp_path += i[3];
i += 3;
continue;
}
convert_to_utf8(tmp_path, *i);
valid_encoding = false;
}
// the encoding was not valid utf-8
// save the original encoding and replace the
// commonly used path with the correctly
// encoded string
if (!valid_encoding) target = tmp_path;
return valid_encoding;
}
void verify_encoding(file_entry& target)
{
std::string p = target.path.string();
if (!verify_encoding(p)) target.path = p;
}
void trim_path_element(std::string& path_element)
{
// on windows, NAME_MAX refers to Unicode characters
// on linux it refers to bytes (utf-8 encoded)
// TODO: Make this count Unicode characters instead of bytes on windows
if (path_element.size() > NAME_MAX)
{
// truncate filenames that are too long. But keep extensions!
std::string ext = fs::extension(path_element);
if (ext.size() > 15)
{
path_element.resize(NAME_MAX);
}
else
{
path_element.resize(NAME_MAX - ext.size());
path_element += ext;
}
}
}
bool extract_single_file(lazy_entry const& dict, file_entry& target
, std::string const& root_dir)
{
lazy_entry const* length = dict.dict_find("length");
if (length == 0 || length->type() != lazy_entry::int_t)
return false;
target.size = length->int_value();
target.path = root_dir;
target.file_base = 0;
// prefer the name.utf-8
// because if it exists, it is more
// likely to be correctly encoded
lazy_entry const* p = dict.dict_find("path.utf-8");
if (p == 0 || p->type() != lazy_entry::list_t)
p = dict.dict_find("path");
if (p == 0 || p->type() != lazy_entry::list_t)
return false;
for (int i = 0, end(p->list_size()); i < end; ++i)
{
if (p->list_at(i)->type() != lazy_entry::string_t)
return false;
std::string path_element = p->list_at(i)->string_value();
trim_path_element(path_element);
if (path_element != "..")
target.path /= path_element;
}
verify_encoding(target);
if (target.path.is_complete())
return false;
// bitcomet pad file
#if BOOST_VERSION < 103600
if (target.path.leaf().substr(0, 18) == "_____padding_file_")
#else
if (target.path.filename().substr(0, 18) == "_____padding_file_")
#endif
target.pad_file = true;
return true;
}
bool extract_files(lazy_entry const& list, file_storage& target
, std::string const& root_dir)
{
if (list.type() != lazy_entry::list_t) return false;
for (int i = 0, end(list.list_size()); i < end; ++i)
{
file_entry e;
if (!extract_single_file(*list.list_at(i), e, root_dir))
return false;
target.add_file(e);
}
return true;
}
}
namespace libtorrent
{
int load_file(fs::path const& filename, std::vector<char>& v)
{
file f;
error_code ec;
if (!f.open(filename, file::read_only, ec)) return -1;
f.seek(0, file::end, ec);
if (ec) return -1;
size_type s = f.tell(ec);
if (ec) return -1;
if (s > 5000000) return -2;
v.resize(s);
if (s == 0) return 0;
f.seek(0, file::begin, ec);
if (ec) return -1;
size_type read = f.read(&v[0], s, ec);
if (read != s) return -3;
if (ec) return -3;
return 0;
}
#ifndef TORRENT_NO_DEPRECATE
// standard constructor that parses a torrent file
torrent_info::torrent_info(entry const& torrent_file)
: m_creation_date(pt::ptime(pt::not_a_date_time))
, m_multifile(false)
, m_private(false)
, m_info_section_size(0)
, m_piece_hashes(0)
{
std::vector<char> tmp;
std::back_insert_iterator<std::vector<char> > out(tmp);
bencode(out, torrent_file);
lazy_entry e;
lazy_bdecode(&tmp[0], &tmp[0] + tmp.size(), e);
std::string error;
#ifndef BOOST_NO_EXCEPTIONS
if (!parse_torrent_file(e, error))
throw invalid_torrent_file();
#else
parse_torrent_file(e, error);
#endif
}
#endif
torrent_info::torrent_info(lazy_entry const& torrent_file)
: m_creation_date(pt::ptime(pt::not_a_date_time))
, m_multifile(false)
, m_private(false)
, m_info_section_size(0)
, m_piece_hashes(0)
{
std::string error;
#ifndef BOOST_NO_EXCEPTIONS
if (!parse_torrent_file(torrent_file, error))
throw invalid_torrent_file();
#else
parse_torrent_file(torrent_file, error);
#endif
}
torrent_info::torrent_info(char const* buffer, int size)
: m_creation_date(pt::ptime(pt::not_a_date_time))
, m_multifile(false)
, m_private(false)
, m_info_section_size(0)
, m_piece_hashes(0)
{
std::string error;
lazy_entry e;
lazy_bdecode(buffer, buffer + size, e);
#ifndef BOOST_NO_EXCEPTIONS
if (!parse_torrent_file(e, error))
throw invalid_torrent_file();
#else
parse_torrent_file(e, error);
#endif
}
// constructor used for creating new torrents
// will not contain any hashes, comments, creation date
// just the necessary to use it with piece manager
// used for torrents with no metadata
torrent_info::torrent_info(sha1_hash const& info_hash)
: m_info_hash(info_hash)
, m_creation_date(pt::second_clock::universal_time())
, m_multifile(false)
, m_private(false)
, m_info_section_size(0)
, m_piece_hashes(0)
{}
torrent_info::torrent_info(fs::path const& filename)
: m_creation_date(pt::ptime(pt::not_a_date_time))
, m_multifile(false)
, m_private(false)
{
std::vector<char> buf;
int ret = load_file(filename, buf);
if (ret < 0) return;
if (buf.empty())
#ifndef BOOST_NO_EXCEPTIONS
throw invalid_torrent_file();
#else
return;
#endif
lazy_entry e;
lazy_bdecode(&buf[0], &buf[0] + buf.size(), e);
std::string error;
#ifndef BOOST_NO_EXCEPTIONS
if (!parse_torrent_file(e, error))
throw invalid_torrent_file();
#else
parse_torrent_file(e, error);
#endif
}
torrent_info::torrent_info(fs::wpath const& filename)
: m_creation_date(pt::ptime(pt::not_a_date_time))
, m_multifile(false)
, m_private(false)
{
std::vector<char> buf;
std::string utf8;
wchar_utf8(filename.string(), utf8);
int ret = load_file(utf8, buf);
if (ret < 0) return;
if (buf.empty())
#ifndef BOOST_NO_EXCEPTIONS
throw invalid_torrent_file();
#else
return;
#endif
lazy_entry e;
lazy_bdecode(&buf[0], &buf[0] + buf.size(), e);
std::string error;
#ifndef BOOST_NO_EXCEPTIONS
if (!parse_torrent_file(e, error))
throw invalid_torrent_file();
#else
parse_torrent_file(e, error);
#endif
}
torrent_info::~torrent_info()
{}
void torrent_info::copy_on_write()
{
if (m_orig_files) return;
m_orig_files.reset(new file_storage(m_files));
}
void torrent_info::swap(torrent_info& ti)
{
using std::swap;
m_urls.swap(ti.m_urls);
m_url_seeds.swap(ti.m_url_seeds);
m_files.swap(ti.m_files);
m_orig_files.swap(ti.m_orig_files);
m_nodes.swap(ti.m_nodes);
swap(m_info_hash, ti.m_info_hash);
swap(m_creation_date, ti.m_creation_date);
m_comment.swap(ti.m_comment);
m_created_by.swap(ti.m_created_by);
swap(m_multifile, ti.m_multifile);
swap(m_private, ti.m_private);
swap(m_info_section, ti.m_info_section);
swap(m_info_section_size, ti.m_info_section_size);
swap(m_piece_hashes, ti.m_piece_hashes);
swap(m_info_dict, ti.m_info_dict);
}
bool torrent_info::parse_info_section(lazy_entry const& info, std::string& error)
{
if (info.type() != lazy_entry::dict_t)
{
error = "'info' entry is not a dictionary";
return false;
}
// hash the info-field to calculate info-hash
hasher h;
std::pair<char const*, int> section = info.data_section();
h.update(section.first, section.second);
m_info_hash = h.final();
// copy the info section
m_info_section_size = section.second;
m_info_section.reset(new char[m_info_section_size]);
memcpy(m_info_section.get(), section.first, m_info_section_size);
TORRENT_ASSERT(section.first[0] == 'd');
TORRENT_ASSERT(section.first[m_info_section_size-1] == 'e');
// extract piece length
int piece_length = info.dict_find_int_value("piece length", -1);
if (piece_length <= 0)
{
error = "invalid or missing 'piece length' entry in torrent file";
return false;
}
m_files.set_piece_length(piece_length);
// extract file name (or the directory name if it's a multifile libtorrent)
std::string name = info.dict_find_string_value("name.utf-8");
if (name.empty()) name = info.dict_find_string_value("name");
if (name.empty())
{
error = "missing name in torrent file";
return false;
}
fs::path tmp = name;
if (tmp.is_complete())
{
name = tmp.leaf();
trim_path_element(name);
}
#if BOOST_VERSION < 103600
else if (tmp.has_branch_path())
#else
else if (tmp.has_parent_path())
#endif
{
fs::path p;
for (fs::path::iterator i = tmp.begin()
, end(tmp.end()); i != end; ++i)
{
if (*i == "." || *i == "..") continue;
std::string path_element = *i;
trim_path_element(path_element);
p /= path_element;
}
name = p.string();
}
else
{
trim_path_element(name);
}
if (name == ".." || name == ".")
{
error = "invalid 'name' of torrent (possible exploit attempt)";
return false;
}
// correct utf-8 encoding errors
verify_encoding(name);
// extract file list
lazy_entry const* i = info.dict_find_list("files");
if (i == 0)
{
// if there's no list of files, there has to be a length
// field.
file_entry e;
e.path = name;
e.offset = 0;
e.size = info.dict_find_int_value("length", -1);
// bitcomet pad file
#if BOOST_VERSION < 103600
if (e.path.leaf().substr(0, 18) == "_____padding_file_")
#else
if (e.path.filename().substr(0, 18) == "_____padding_file_")
#endif
e.pad_file = true;
if (e.size < 0)
{
error = "invalid length of torrent";
return false;
}
m_files.add_file(e);
m_multifile = false;
}
else
{
if (!extract_files(*i, m_files, name))
{
error = "failed to parse files from torrent file";
return false;
}
m_multifile = true;
}
m_files.set_name(name);
// extract sha-1 hashes for all pieces
// we want this division to round upwards, that's why we have the
// extra addition
m_files.set_num_pieces(int((m_files.total_size() + m_files.piece_length() - 1)
/ m_files.piece_length()));
lazy_entry const* pieces = info.dict_find("pieces");
if (pieces == 0 || pieces->type() != lazy_entry::string_t)
{
error = "invalid or missing 'pieces' entry in torrent file";
return false;
}
if (pieces->string_length() != m_files.num_pieces() * 20)
{
error = "incorrect number of piece hashes in torrent file";
return false;
}
m_piece_hashes = m_info_section.get() + (pieces->string_ptr() - section.first);
TORRENT_ASSERT(m_piece_hashes >= m_info_section.get());
TORRENT_ASSERT(m_piece_hashes < m_info_section.get() + m_info_section_size);
m_private = info.dict_find_int_value("private", 0);
return true;
}
bool torrent_info::parse_torrent_file(lazy_entry const& torrent_file, std::string& error)
{
if (torrent_file.type() != lazy_entry::dict_t)
{
error = "torrent file is not a dictionary";
return false;
}
// extract the url of the tracker
lazy_entry const* i = torrent_file.dict_find_list("announce-list");
if (i)
{
m_urls.reserve(i->list_size());
for (int j = 0, end(i->list_size()); j < end; ++j)
{
lazy_entry const* tier = i->list_at(j);
if (tier->type() != lazy_entry::list_t) continue;
for (int k = 0, end(tier->list_size()); k < end; ++k)
{
announce_entry e(tier->list_string_value_at(k));
if (e.url.empty()) continue;
e.tier = j;
e.fail_limit = 0;
e.source = announce_entry::source_torrent;
m_urls.push_back(e);
}
}
if (!m_urls.empty())
{
// shuffle each tier
std::vector<announce_entry>::iterator start = m_urls.begin();
std::vector<announce_entry>::iterator stop;
int current_tier = m_urls.front().tier;
for (stop = m_urls.begin(); stop != m_urls.end(); ++stop)
{
if (stop->tier != current_tier)
{
std::random_shuffle(start, stop);
start = stop;
current_tier = stop->tier;
}
}
std::random_shuffle(start, stop);
}
}
if (m_urls.empty())
{
announce_entry e(torrent_file.dict_find_string_value("announce"));
e.fail_limit = 0;
e.source = announce_entry::source_torrent;
if (!e.url.empty()) m_urls.push_back(e);
}
lazy_entry const* nodes = torrent_file.dict_find_list("nodes");
if (nodes)
{
for (int i = 0, end(nodes->list_size()); i < end; ++i)
{
lazy_entry const* n = nodes->list_at(i);
if (n->type() != lazy_entry::list_t
|| n->list_size() < 2
|| n->list_at(0)->type() != lazy_entry::string_t
|| n->list_at(1)->type() != lazy_entry::int_t)
continue;
m_nodes.push_back(std::make_pair(
n->list_at(0)->string_value()
, int(n->list_at(1)->int_value())));
}
}
// extract creation date
size_type cd = torrent_file.dict_find_int_value("creation date", -1);
if (cd >= 0)
{
m_creation_date = pt::ptime(gr::date(1970, gr::Jan, 1))
+ pt::seconds(long(cd));
}
// if there are any url-seeds, extract them
lazy_entry const* url_seeds = torrent_file.dict_find("url-list");
if (url_seeds && url_seeds->type() == lazy_entry::string_t)
{
m_url_seeds.push_back(url_seeds->string_value());
}
else if (url_seeds && url_seeds->type() == lazy_entry::list_t)
{
for (int i = 0, end(url_seeds->list_size()); i < end; ++i)
{
lazy_entry const* url = url_seeds->list_at(i);
if (url->type() != lazy_entry::string_t) continue;
m_url_seeds.push_back(url->string_value());
}
}
// if there are any http-seeds, extract them
lazy_entry const* http_seeds = torrent_file.dict_find("httpseeds");
if (http_seeds && http_seeds->type() == lazy_entry::string_t)
{
m_http_seeds.push_back(http_seeds->string_value());
}
else if (http_seeds && http_seeds->type() == lazy_entry::list_t)
{
for (int i = 0, end(http_seeds->list_size()); i < end; ++i)
{
lazy_entry const* url = http_seeds->list_at(i);
if (url->type() != lazy_entry::string_t) continue;
m_http_seeds.push_back(url->string_value());
}
}
m_comment = torrent_file.dict_find_string_value("comment.utf-8");
if (m_comment.empty()) m_comment = torrent_file.dict_find_string_value("comment");
verify_encoding(m_comment);
m_created_by = torrent_file.dict_find_string_value("created by.utf-8");
if (m_created_by.empty()) m_created_by = torrent_file.dict_find_string_value("created by");
verify_encoding(m_created_by);
lazy_entry const* info = torrent_file.dict_find_dict("info");
if (info == 0)
{
error = "missing or invalid 'info' section in torrent file";
return false;
}
return parse_info_section(*info, error);
}
boost::optional<pt::ptime>
torrent_info::creation_date() const
{
if (m_creation_date != pt::ptime(gr::date(pt::not_a_date_time)))
{
return boost::optional<pt::ptime>(m_creation_date);
}
return boost::optional<pt::ptime>();
}
void torrent_info::add_tracker(std::string const& url, int tier)
{
announce_entry e(url);
e.tier = tier;
e.source = announce_entry::source_client;
m_urls.push_back(e);
using boost::bind;
std::sort(m_urls.begin(), m_urls.end(), bind(&announce_entry::tier, _1)
< bind(&announce_entry::tier, _2));
}
#ifndef TORRENT_NO_DEPRECATE
// ------- start deprecation -------
void torrent_info::print(std::ostream& os) const
{
os << "trackers:\n";
for (std::vector<announce_entry>::const_iterator i = trackers().begin();
i != trackers().end(); ++i)
{
os << i->tier << ": " << i->url << "\n";
}
if (!m_comment.empty())
os << "comment: " << m_comment << "\n";
// if (m_creation_date != pt::ptime(gr::date(pt::not_a_date_time)))
// os << "creation date: " << to_simple_string(m_creation_date) << "\n";
os << "private: " << (m_private?"yes":"no") << "\n";
os << "number of pieces: " << num_pieces() << "\n";
os << "piece length: " << piece_length() << "\n";
os << "files:\n";
for (file_storage::iterator i = m_files.begin(); i != m_files.end(); ++i)
os << " " << std::setw(11) << i->size << " " << i->path.string() << "\n";
}
// ------- end deprecation -------
#endif
}