added workaround for incorrectly encoded paths in torrent files
This commit is contained in:
parent
7eb6090a08
commit
c84e96898b
|
@ -1,3 +1,5 @@
|
||||||
|
* added support for incorrectly encoded paths in torrent files
|
||||||
|
(assumes Latin-1 encoding and converts to UTF-8).
|
||||||
* fixed bug with file_progress() with files = 0 bytes
|
* fixed bug with file_progress() with files = 0 bytes
|
||||||
* fixed a race condition bug in udp_tracker_connection that could
|
* fixed a race condition bug in udp_tracker_connection that could
|
||||||
cause a crash.
|
cause a crash.
|
||||||
|
|
|
@ -13,6 +13,8 @@ docs/qbittorrent_thumb.jpg \
|
||||||
docs/ziptorrent_thumb.gif \
|
docs/ziptorrent_thumb.gif \
|
||||||
docs/vs2005_build_notes.html \
|
docs/vs2005_build_notes.html \
|
||||||
docs/vs2005_build_notes.rst \
|
docs/vs2005_build_notes.rst \
|
||||||
|
docs/ubuntu_build_notes.html \
|
||||||
|
docs/ubuntu_build_notes.rst \
|
||||||
docs/udp_tracker_protocol.html docs/client_test.rst docs/client_test.html \
|
docs/udp_tracker_protocol.html docs/client_test.rst docs/client_test.html \
|
||||||
docs/unicode_support.png docs/client_test.png docs/style.css Jamfile project-root.jam \
|
docs/unicode_support.png docs/client_test.png docs/style.css Jamfile project-root.jam \
|
||||||
m4/ac_cxx_namespaces.m4 m4/acx_pthread.m4 m4/ax_boost_date-time.m4 \
|
m4/ac_cxx_namespaces.m4 m4/acx_pthread.m4 m4/ax_boost_date-time.m4 \
|
||||||
|
|
|
@ -944,13 +944,20 @@ torrent, all the files starts with a directory with the same name as <tt class="
|
||||||
The filenames are encoded with UTF-8.</p>
|
The filenames are encoded with UTF-8.</p>
|
||||||
<p><tt class="docutils literal"><span class="pre">size</span></tt> is the size of the file (in bytes) and <tt class="docutils literal"><span class="pre">offset</span></tt> is the byte offset
|
<p><tt class="docutils literal"><span class="pre">size</span></tt> is the size of the file (in bytes) and <tt class="docutils literal"><span class="pre">offset</span></tt> is the byte offset
|
||||||
of the file within the torrent. i.e. the sum of all the sizes of the files
|
of the file within the torrent. i.e. the sum of all the sizes of the files
|
||||||
before this one in the file list this one in the file list..</p>
|
before this one in the file list this one in the file list.</p>
|
||||||
|
<p><tt class="docutils literal"><span class="pre">orig_path</span></tt> is set to 0 in case the path element is an exact copy of that
|
||||||
|
found in the metadata. In case the path in the original metadata was
|
||||||
|
incorrectly encoded, and had to be fixed in order to be acceptable utf-8,
|
||||||
|
the original string is preserved in <tt class="docutils literal"><span class="pre">orig_path</span></tt>. The reason to keep it
|
||||||
|
is to be able to reproduce the info-section exactly, with the correct
|
||||||
|
info-hash.</p>
|
||||||
<pre class="literal-block">
|
<pre class="literal-block">
|
||||||
struct file_entry
|
struct file_entry
|
||||||
{
|
{
|
||||||
boost::filesystem::path path;
|
boost::filesystem::path path;
|
||||||
size_type offset;
|
size_type offset;
|
||||||
size_type size;
|
size_type size;
|
||||||
|
boost::scoped_ptr<boost::filesystem::path> orig_path;
|
||||||
};
|
};
|
||||||
</pre>
|
</pre>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
@ -854,7 +854,14 @@ The filenames are encoded with UTF-8.
|
||||||
|
|
||||||
``size`` is the size of the file (in bytes) and ``offset`` is the byte offset
|
``size`` is the size of the file (in bytes) and ``offset`` is the byte offset
|
||||||
of the file within the torrent. i.e. the sum of all the sizes of the files
|
of the file within the torrent. i.e. the sum of all the sizes of the files
|
||||||
before this one in the file list this one in the file list..
|
before this one in the file list this one in the file list.
|
||||||
|
|
||||||
|
``orig_path`` is set to 0 in case the path element is an exact copy of that
|
||||||
|
found in the metadata. In case the path in the original metadata was
|
||||||
|
incorrectly encoded, and had to be fixed in order to be acceptable utf-8,
|
||||||
|
the original string is preserved in ``orig_path``. The reason to keep it
|
||||||
|
is to be able to reproduce the info-section exactly, with the correct
|
||||||
|
info-hash.
|
||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
|
@ -863,6 +870,7 @@ before this one in the file list this one in the file list..
|
||||||
boost::filesystem::path path;
|
boost::filesystem::path path;
|
||||||
size_type offset;
|
size_type offset;
|
||||||
size_type size;
|
size_type size;
|
||||||
|
boost::shared_ptr<boost::filesystem::path> orig_path;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -47,13 +47,13 @@ cvs -d:pserver:anonymous@boost.cvs.sourceforge.net:/cvsroot/boost login
|
||||||
cvs -z3 -d:pserver:anonymous@boost.cvs.sourceforge.net:/cvsroot/boost checkout boost
|
cvs -z3 -d:pserver:anonymous@boost.cvs.sourceforge.net:/cvsroot/boost checkout boost
|
||||||
cvs -d:pserver:anonymous@boost.cvs.sourceforge.net:/cvsroot/boost logout
|
cvs -d:pserver:anonymous@boost.cvs.sourceforge.net:/cvsroot/boost logout
|
||||||
|
|
||||||
cvs -d:pserver:anonym...@libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent login
|
cvs -d:pserver:anonymous@libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent login
|
||||||
cvs -z3 -d:pserver:anonym...@libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent co -P libtorrent
|
cvs -z3 -d:pserver:anonymous@libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent co -P libtorrent
|
||||||
cvs -d:pserver:anonym...@libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent logout
|
cvs -d:pserver:anonymous@libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent logout
|
||||||
|
|
||||||
cvs -d:pserver:anonym...@asio.cvs.sourceforge.net:/cvsroot/asio login
|
cvs -d:pserver:anonymous@asio.cvs.sourceforge.net:/cvsroot/asio login
|
||||||
cvs -z3 -d:pserver:anonym...@asio.cvs.sourceforge.net:/cvsroot/asio co -P asio
|
cvs -z3 -d:pserver:anonymous@asio.cvs.sourceforge.net:/cvsroot/asio co -P asio
|
||||||
cvs -d:pserver:anonym...@asio.cvs.sourceforge.net:/cvsroot/asio login
|
cvs -d:pserver:anonymous@asio.cvs.sourceforge.net:/cvsroot/asio login
|
||||||
</pre>
|
</pre>
|
||||||
</div>
|
</div>
|
||||||
<div class="section" id="step-2-building-boost">
|
<div class="section" id="step-2-building-boost">
|
||||||
|
|
|
@ -30,13 +30,13 @@ by executing the following commands::
|
||||||
cvs -z3 -d:pserver:anonymous@boost.cvs.sourceforge.net:/cvsroot/boost checkout boost
|
cvs -z3 -d:pserver:anonymous@boost.cvs.sourceforge.net:/cvsroot/boost checkout boost
|
||||||
cvs -d:pserver:anonymous@boost.cvs.sourceforge.net:/cvsroot/boost logout
|
cvs -d:pserver:anonymous@boost.cvs.sourceforge.net:/cvsroot/boost logout
|
||||||
|
|
||||||
cvs -d:pserver:anonym...@libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent login
|
cvs -d:pserver:anonymous@libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent login
|
||||||
cvs -z3 -d:pserver:anonym...@libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent co -P libtorrent
|
cvs -z3 -d:pserver:anonymous@libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent co -P libtorrent
|
||||||
cvs -d:pserver:anonym...@libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent logout
|
cvs -d:pserver:anonymous@libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent logout
|
||||||
|
|
||||||
cvs -d:pserver:anonym...@asio.cvs.sourceforge.net:/cvsroot/asio login
|
cvs -d:pserver:anonymous@asio.cvs.sourceforge.net:/cvsroot/asio login
|
||||||
cvs -z3 -d:pserver:anonym...@asio.cvs.sourceforge.net:/cvsroot/asio co -P asio
|
cvs -z3 -d:pserver:anonymous@asio.cvs.sourceforge.net:/cvsroot/asio co -P asio
|
||||||
cvs -d:pserver:anonym...@asio.cvs.sourceforge.net:/cvsroot/asio login
|
cvs -d:pserver:anonymous@asio.cvs.sourceforge.net:/cvsroot/asio login
|
||||||
|
|
||||||
Step 2: Building boost
|
Step 2: Building boost
|
||||||
======================
|
======================
|
||||||
|
|
|
@ -45,6 +45,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||||
#include <boost/date_time/gregorian/gregorian_types.hpp>
|
#include <boost/date_time/gregorian/gregorian_types.hpp>
|
||||||
#include <boost/optional.hpp>
|
#include <boost/optional.hpp>
|
||||||
#include <boost/filesystem/path.hpp>
|
#include <boost/filesystem/path.hpp>
|
||||||
|
#include <boost/shared_ptr.hpp>
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
#pragma warning(pop)
|
#pragma warning(pop)
|
||||||
|
@ -65,6 +66,11 @@ namespace libtorrent
|
||||||
boost::filesystem::path path;
|
boost::filesystem::path path;
|
||||||
size_type offset; // the offset of this file inside the torrent
|
size_type offset; // the offset of this file inside the torrent
|
||||||
size_type size; // the size of this file
|
size_type size; // the size of this file
|
||||||
|
// if the path was incorrectly encoded, this is
|
||||||
|
// the origianal corrupt encoded string. It is
|
||||||
|
// preserved in order to be able to reproduce
|
||||||
|
// the correct info-hash
|
||||||
|
boost::shared_ptr<boost::filesystem::path> orig_path;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct TORRENT_EXPORT file_slice
|
struct TORRENT_EXPORT file_slice
|
||||||
|
|
|
@ -62,6 +62,98 @@ using namespace boost::filesystem;
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
|
void convert_to_utf8(std::string& str, unsigned char chr)
|
||||||
|
{
|
||||||
|
str += 0xc0 | ((chr & 0xff) >> 6);
|
||||||
|
str += 0x80 | (chr & 0x3f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void verify_encoding(file_entry& target)
|
||||||
|
{
|
||||||
|
std::string tmp_path;
|
||||||
|
std::string file_path = target.path.string();
|
||||||
|
bool valid_encoding = true;
|
||||||
|
for (std::string::iterator i = file_path.begin()
|
||||||
|
, end(file_path.end()); i != end; ++i)
|
||||||
|
{
|
||||||
|
// valid ascii-character
|
||||||
|
if ((*i & 0x80) == 0)
|
||||||
|
{
|
||||||
|
tmp_path += *i;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (std::distance(i, end) < 2)
|
||||||
|
{
|
||||||
|
convert_to_utf8(tmp_path, *i);
|
||||||
|
valid_encoding = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// valid 2-byte utf-8 character
|
||||||
|
if ((i[0] & 0xe0) == 0xc0
|
||||||
|
&& (i[1] & 0xc0) == 0x80)
|
||||||
|
{
|
||||||
|
tmp_path += i[0];
|
||||||
|
tmp_path += i[1];
|
||||||
|
i += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (std::distance(i, end) < 3)
|
||||||
|
{
|
||||||
|
convert_to_utf8(tmp_path, *i);
|
||||||
|
valid_encoding = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// valid 3-byte utf-8 character
|
||||||
|
if ((i[0] & 0xf0) == 0xe0
|
||||||
|
&& (i[1] & 0xc0) == 0x80
|
||||||
|
&& (i[2] & 0xc0) == 0x80)
|
||||||
|
{
|
||||||
|
tmp_path += i[0];
|
||||||
|
tmp_path += i[1];
|
||||||
|
tmp_path += i[2];
|
||||||
|
i += 2;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (std::distance(i, end) < 4)
|
||||||
|
{
|
||||||
|
convert_to_utf8(tmp_path, *i);
|
||||||
|
valid_encoding = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// valid 4-byte utf-8 character
|
||||||
|
if ((i[0] & 0xf0) == 0xe0
|
||||||
|
&& (i[1] & 0xc0) == 0x80
|
||||||
|
&& (i[2] & 0xc0) == 0x80
|
||||||
|
&& (i[3] & 0xc0) == 0x80)
|
||||||
|
{
|
||||||
|
tmp_path += i[0];
|
||||||
|
tmp_path += i[1];
|
||||||
|
tmp_path += i[2];
|
||||||
|
tmp_path += i[3];
|
||||||
|
i += 3;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
convert_to_utf8(tmp_path, *i);
|
||||||
|
valid_encoding = false;
|
||||||
|
}
|
||||||
|
// the encoding was not valid utf-8
|
||||||
|
// save the original encoding and replace the
|
||||||
|
// commonly used path with the correctly
|
||||||
|
// encoded string
|
||||||
|
if (!valid_encoding)
|
||||||
|
{
|
||||||
|
target.orig_path.reset(new path(target.path));
|
||||||
|
target.path = tmp_path;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void extract_single_file(const entry& dict, file_entry& target
|
void extract_single_file(const entry& dict, file_entry& target
|
||||||
, std::string const& root_dir)
|
, std::string const& root_dir)
|
||||||
{
|
{
|
||||||
|
@ -89,6 +181,7 @@ namespace
|
||||||
if (i->string() != "..")
|
if (i->string() != "..")
|
||||||
target.path /= i->string();
|
target.path /= i->string();
|
||||||
}
|
}
|
||||||
|
verify_encoding(target);
|
||||||
if (target.path.is_complete()) throw std::runtime_error("torrent contains "
|
if (target.path.is_complete()) throw std::runtime_error("torrent contains "
|
||||||
"a file with an absolute path: '"
|
"a file with an absolute path: '"
|
||||||
+ target.path.native_file_string() + "'");
|
+ target.path.native_file_string() + "'");
|
||||||
|
@ -501,7 +594,7 @@ namespace libtorrent
|
||||||
files = entry(entry::list_t);
|
files = entry(entry::list_t);
|
||||||
|
|
||||||
for (std::vector<file_entry>::const_iterator i = m_files.begin();
|
for (std::vector<file_entry>::const_iterator i = m_files.begin();
|
||||||
i != m_files.end(); ++i)
|
i != m_files.end(); ++i)
|
||||||
{
|
{
|
||||||
files.list().push_back(entry(entry::dictionary_t));
|
files.list().push_back(entry(entry::dictionary_t));
|
||||||
entry& file_e = files.list().back();
|
entry& file_e = files.list().back();
|
||||||
|
@ -509,12 +602,14 @@ namespace libtorrent
|
||||||
entry& path_e = file_e["path"];
|
entry& path_e = file_e["path"];
|
||||||
path_e = entry(entry::list_t);
|
path_e = entry(entry::list_t);
|
||||||
|
|
||||||
fs::path const& file_path(i->path);
|
fs::path const* file_path;
|
||||||
assert(file_path.has_branch_path());
|
if (i->orig_path) file_path = &(*i->orig_path);
|
||||||
assert(*file_path.begin() == m_name);
|
else file_path = &i->path;
|
||||||
|
assert(file_path->has_branch_path());
|
||||||
|
assert(*file_path->begin() == m_name);
|
||||||
|
|
||||||
for (fs::path::iterator j = boost::next(file_path.begin());
|
for (fs::path::iterator j = boost::next(file_path->begin());
|
||||||
j != file_path.end(); ++j)
|
j != file_path->end(); ++j)
|
||||||
{
|
{
|
||||||
path_e.list().push_back(entry(*j));
|
path_e.list().push_back(entry(*j));
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue