forked from premiere/premiere-libtorrent
added workaround for incorrectly encoded paths in torrent files
This commit is contained in:
parent
7eb6090a08
commit
c84e96898b
|
@ -1,3 +1,5 @@
|
|||
* added support for incorrectly encoded paths in torrent files
|
||||
(assumes Latin-1 encoding and converts to UTF-8).
|
||||
* fixed bug with file_progress() with files = 0 bytes
|
||||
* fixed a race condition bug in udp_tracker_connection that could
|
||||
cause a crash.
|
||||
|
|
|
@ -13,6 +13,8 @@ docs/qbittorrent_thumb.jpg \
|
|||
docs/ziptorrent_thumb.gif \
|
||||
docs/vs2005_build_notes.html \
|
||||
docs/vs2005_build_notes.rst \
|
||||
docs/ubuntu_build_notes.html \
|
||||
docs/ubuntu_build_notes.rst \
|
||||
docs/udp_tracker_protocol.html docs/client_test.rst docs/client_test.html \
|
||||
docs/unicode_support.png docs/client_test.png docs/style.css Jamfile project-root.jam \
|
||||
m4/ac_cxx_namespaces.m4 m4/acx_pthread.m4 m4/ax_boost_date-time.m4 \
|
||||
|
|
|
@ -944,13 +944,20 @@ torrent, all the files starts with a directory with the same name as <tt class="
|
|||
The filenames are encoded with UTF-8.</p>
|
||||
<p><tt class="docutils literal"><span class="pre">size</span></tt> is the size of the file (in bytes) and <tt class="docutils literal"><span class="pre">offset</span></tt> is the byte offset
|
||||
of the file within the torrent. i.e. the sum of all the sizes of the files
|
||||
before this one in the file list this one in the file list..</p>
|
||||
before this one in the file list this one in the file list.</p>
|
||||
<p><tt class="docutils literal"><span class="pre">orig_path</span></tt> is set to 0 in case the path element is an exact copy of that
|
||||
found in the metadata. In case the path in the original metadata was
|
||||
incorrectly encoded, and had to be fixed in order to be acceptable utf-8,
|
||||
the original string is preserved in <tt class="docutils literal"><span class="pre">orig_path</span></tt>. The reason to keep it
|
||||
is to be able to reproduce the info-section exactly, with the correct
|
||||
info-hash.</p>
|
||||
<pre class="literal-block">
|
||||
struct file_entry
|
||||
{
|
||||
boost::filesystem::path path;
|
||||
size_type offset;
|
||||
size_type size;
|
||||
boost::scoped_ptr<boost::filesystem::path> orig_path;
|
||||
};
|
||||
</pre>
|
||||
</div>
|
||||
|
|
|
@ -854,7 +854,14 @@ The filenames are encoded with UTF-8.
|
|||
|
||||
``size`` is the size of the file (in bytes) and ``offset`` is the byte offset
|
||||
of the file within the torrent. i.e. the sum of all the sizes of the files
|
||||
before this one in the file list this one in the file list..
|
||||
before this one in the file list this one in the file list.
|
||||
|
||||
``orig_path`` is set to 0 in case the path element is an exact copy of that
|
||||
found in the metadata. In case the path in the original metadata was
|
||||
incorrectly encoded, and had to be fixed in order to be acceptable utf-8,
|
||||
the original string is preserved in ``orig_path``. The reason to keep it
|
||||
is to be able to reproduce the info-section exactly, with the correct
|
||||
info-hash.
|
||||
|
||||
::
|
||||
|
||||
|
@ -863,6 +870,7 @@ before this one in the file list this one in the file list..
|
|||
boost::filesystem::path path;
|
||||
size_type offset;
|
||||
size_type size;
|
||||
boost::shared_ptr<boost::filesystem::path> orig_path;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -47,13 +47,13 @@ cvs -d:pserver:anonymous@boost.cvs.sourceforge.net:/cvsroot/boost login
|
|||
cvs -z3 -d:pserver:anonymous@boost.cvs.sourceforge.net:/cvsroot/boost checkout boost
|
||||
cvs -d:pserver:anonymous@boost.cvs.sourceforge.net:/cvsroot/boost logout
|
||||
|
||||
cvs -d:pserver:anonym...@libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent login
|
||||
cvs -z3 -d:pserver:anonym...@libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent co -P libtorrent
|
||||
cvs -d:pserver:anonym...@libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent logout
|
||||
cvs -d:pserver:anonymous@libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent login
|
||||
cvs -z3 -d:pserver:anonymous@libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent co -P libtorrent
|
||||
cvs -d:pserver:anonymous@libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent logout
|
||||
|
||||
cvs -d:pserver:anonym...@asio.cvs.sourceforge.net:/cvsroot/asio login
|
||||
cvs -z3 -d:pserver:anonym...@asio.cvs.sourceforge.net:/cvsroot/asio co -P asio
|
||||
cvs -d:pserver:anonym...@asio.cvs.sourceforge.net:/cvsroot/asio login
|
||||
cvs -d:pserver:anonymous@asio.cvs.sourceforge.net:/cvsroot/asio login
|
||||
cvs -z3 -d:pserver:anonymous@asio.cvs.sourceforge.net:/cvsroot/asio co -P asio
|
||||
cvs -d:pserver:anonymous@asio.cvs.sourceforge.net:/cvsroot/asio login
|
||||
</pre>
|
||||
</div>
|
||||
<div class="section" id="step-2-building-boost">
|
||||
|
|
|
@ -30,13 +30,13 @@ by executing the following commands::
|
|||
cvs -z3 -d:pserver:anonymous@boost.cvs.sourceforge.net:/cvsroot/boost checkout boost
|
||||
cvs -d:pserver:anonymous@boost.cvs.sourceforge.net:/cvsroot/boost logout
|
||||
|
||||
cvs -d:pserver:anonym...@libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent login
|
||||
cvs -z3 -d:pserver:anonym...@libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent co -P libtorrent
|
||||
cvs -d:pserver:anonym...@libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent logout
|
||||
cvs -d:pserver:anonymous@libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent login
|
||||
cvs -z3 -d:pserver:anonymous@libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent co -P libtorrent
|
||||
cvs -d:pserver:anonymous@libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent logout
|
||||
|
||||
cvs -d:pserver:anonym...@asio.cvs.sourceforge.net:/cvsroot/asio login
|
||||
cvs -z3 -d:pserver:anonym...@asio.cvs.sourceforge.net:/cvsroot/asio co -P asio
|
||||
cvs -d:pserver:anonym...@asio.cvs.sourceforge.net:/cvsroot/asio login
|
||||
cvs -d:pserver:anonymous@asio.cvs.sourceforge.net:/cvsroot/asio login
|
||||
cvs -z3 -d:pserver:anonymous@asio.cvs.sourceforge.net:/cvsroot/asio co -P asio
|
||||
cvs -d:pserver:anonymous@asio.cvs.sourceforge.net:/cvsroot/asio login
|
||||
|
||||
Step 2: Building boost
|
||||
======================
|
||||
|
|
|
@ -45,6 +45,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
#include <boost/date_time/gregorian/gregorian_types.hpp>
|
||||
#include <boost/optional.hpp>
|
||||
#include <boost/filesystem/path.hpp>
|
||||
#include <boost/shared_ptr.hpp>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(pop)
|
||||
|
@ -65,6 +66,11 @@ namespace libtorrent
|
|||
boost::filesystem::path path;
|
||||
size_type offset; // the offset of this file inside the torrent
|
||||
size_type size; // the size of this file
|
||||
// if the path was incorrectly encoded, this is
|
||||
// the origianal corrupt encoded string. It is
|
||||
// preserved in order to be able to reproduce
|
||||
// the correct info-hash
|
||||
boost::shared_ptr<boost::filesystem::path> orig_path;
|
||||
};
|
||||
|
||||
struct TORRENT_EXPORT file_slice
|
||||
|
|
|
@ -62,6 +62,98 @@ using namespace boost::filesystem;
|
|||
|
||||
namespace
|
||||
{
|
||||
void convert_to_utf8(std::string& str, unsigned char chr)
|
||||
{
|
||||
str += 0xc0 | ((chr & 0xff) >> 6);
|
||||
str += 0x80 | (chr & 0x3f);
|
||||
}
|
||||
|
||||
void verify_encoding(file_entry& target)
|
||||
{
|
||||
std::string tmp_path;
|
||||
std::string file_path = target.path.string();
|
||||
bool valid_encoding = true;
|
||||
for (std::string::iterator i = file_path.begin()
|
||||
, end(file_path.end()); i != end; ++i)
|
||||
{
|
||||
// valid ascii-character
|
||||
if ((*i & 0x80) == 0)
|
||||
{
|
||||
tmp_path += *i;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (std::distance(i, end) < 2)
|
||||
{
|
||||
convert_to_utf8(tmp_path, *i);
|
||||
valid_encoding = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
// valid 2-byte utf-8 character
|
||||
if ((i[0] & 0xe0) == 0xc0
|
||||
&& (i[1] & 0xc0) == 0x80)
|
||||
{
|
||||
tmp_path += i[0];
|
||||
tmp_path += i[1];
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (std::distance(i, end) < 3)
|
||||
{
|
||||
convert_to_utf8(tmp_path, *i);
|
||||
valid_encoding = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
// valid 3-byte utf-8 character
|
||||
if ((i[0] & 0xf0) == 0xe0
|
||||
&& (i[1] & 0xc0) == 0x80
|
||||
&& (i[2] & 0xc0) == 0x80)
|
||||
{
|
||||
tmp_path += i[0];
|
||||
tmp_path += i[1];
|
||||
tmp_path += i[2];
|
||||
i += 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (std::distance(i, end) < 4)
|
||||
{
|
||||
convert_to_utf8(tmp_path, *i);
|
||||
valid_encoding = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
// valid 4-byte utf-8 character
|
||||
if ((i[0] & 0xf0) == 0xe0
|
||||
&& (i[1] & 0xc0) == 0x80
|
||||
&& (i[2] & 0xc0) == 0x80
|
||||
&& (i[3] & 0xc0) == 0x80)
|
||||
{
|
||||
tmp_path += i[0];
|
||||
tmp_path += i[1];
|
||||
tmp_path += i[2];
|
||||
tmp_path += i[3];
|
||||
i += 3;
|
||||
continue;
|
||||
}
|
||||
|
||||
convert_to_utf8(tmp_path, *i);
|
||||
valid_encoding = false;
|
||||
}
|
||||
// the encoding was not valid utf-8
|
||||
// save the original encoding and replace the
|
||||
// commonly used path with the correctly
|
||||
// encoded string
|
||||
if (!valid_encoding)
|
||||
{
|
||||
target.orig_path.reset(new path(target.path));
|
||||
target.path = tmp_path;
|
||||
}
|
||||
}
|
||||
|
||||
void extract_single_file(const entry& dict, file_entry& target
|
||||
, std::string const& root_dir)
|
||||
{
|
||||
|
@ -89,6 +181,7 @@ namespace
|
|||
if (i->string() != "..")
|
||||
target.path /= i->string();
|
||||
}
|
||||
verify_encoding(target);
|
||||
if (target.path.is_complete()) throw std::runtime_error("torrent contains "
|
||||
"a file with an absolute path: '"
|
||||
+ target.path.native_file_string() + "'");
|
||||
|
@ -501,7 +594,7 @@ namespace libtorrent
|
|||
files = entry(entry::list_t);
|
||||
|
||||
for (std::vector<file_entry>::const_iterator i = m_files.begin();
|
||||
i != m_files.end(); ++i)
|
||||
i != m_files.end(); ++i)
|
||||
{
|
||||
files.list().push_back(entry(entry::dictionary_t));
|
||||
entry& file_e = files.list().back();
|
||||
|
@ -509,12 +602,14 @@ namespace libtorrent
|
|||
entry& path_e = file_e["path"];
|
||||
path_e = entry(entry::list_t);
|
||||
|
||||
fs::path const& file_path(i->path);
|
||||
assert(file_path.has_branch_path());
|
||||
assert(*file_path.begin() == m_name);
|
||||
fs::path const* file_path;
|
||||
if (i->orig_path) file_path = &(*i->orig_path);
|
||||
else file_path = &i->path;
|
||||
assert(file_path->has_branch_path());
|
||||
assert(*file_path->begin() == m_name);
|
||||
|
||||
for (fs::path::iterator j = boost::next(file_path.begin());
|
||||
j != file_path.end(); ++j)
|
||||
for (fs::path::iterator j = boost::next(file_path->begin());
|
||||
j != file_path->end(); ++j)
|
||||
{
|
||||
path_e.list().push_back(entry(*j));
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue