added workaround for incorrectly encoded paths in torrent files

This commit is contained in:
Arvid Norberg 2006-10-12 23:51:10 +00:00
parent 7eb6090a08
commit c84e96898b
8 changed files with 140 additions and 20 deletions

View File

@ -1,3 +1,5 @@
* added support for incorrectly encoded paths in torrent files
(assumes Latin-1 encoding and converts to UTF-8).
* fixed bug with file_progress() with files = 0 bytes
* fixed a race condition bug in udp_tracker_connection that could
cause a crash.

View File

@ -13,6 +13,8 @@ docs/qbittorrent_thumb.jpg \
docs/ziptorrent_thumb.gif \
docs/vs2005_build_notes.html \
docs/vs2005_build_notes.rst \
docs/ubuntu_build_notes.html \
docs/ubuntu_build_notes.rst \
docs/udp_tracker_protocol.html docs/client_test.rst docs/client_test.html \
docs/unicode_support.png docs/client_test.png docs/style.css Jamfile project-root.jam \
m4/ac_cxx_namespaces.m4 m4/acx_pthread.m4 m4/ax_boost_date-time.m4 \

View File

@ -944,13 +944,20 @@ torrent, all the files starts with a directory with the same name as <tt class="
The filenames are encoded with UTF-8.</p>
<p><tt class="docutils literal"><span class="pre">size</span></tt> is the size of the file (in bytes) and <tt class="docutils literal"><span class="pre">offset</span></tt> is the byte offset
of the file within the torrent. i.e. the sum of all the sizes of the files
before this one in the file list this one in the file list..</p>
before this one in the file list this one in the file list.</p>
<p><tt class="docutils literal"><span class="pre">orig_path</span></tt> is set to 0 in case the path element is an exact copy of that
found in the metadata. In case the path in the original metadata was
incorrectly encoded, and had to be fixed in order to be acceptable utf-8,
the original string is preserved in <tt class="docutils literal"><span class="pre">orig_path</span></tt>. The reason to keep it
is to be able to reproduce the info-section exactly, with the correct
info-hash.</p>
<pre class="literal-block">
struct file_entry
{
boost::filesystem::path path;
size_type offset;
size_type size;
boost::scoped_ptr&lt;boost::filesystem::path&gt; orig_path;
};
</pre>
</div>

View File

@ -854,7 +854,14 @@ The filenames are encoded with UTF-8.
``size`` is the size of the file (in bytes) and ``offset`` is the byte offset
of the file within the torrent. i.e. the sum of all the sizes of the files
before this one in the file list this one in the file list..
before this one in the file list this one in the file list.
``orig_path`` is set to 0 in case the path element is an exact copy of that
found in the metadata. In case the path in the original metadata was
incorrectly encoded, and had to be fixed in order to be acceptable utf-8,
the original string is preserved in ``orig_path``. The reason to keep it
is to be able to reproduce the info-section exactly, with the correct
info-hash.
::
@ -863,6 +870,7 @@ before this one in the file list this one in the file list..
boost::filesystem::path path;
size_type offset;
size_type size;
boost::shared_ptr<boost::filesystem::path> orig_path;
};

View File

@ -47,13 +47,13 @@ cvs -d:pserver:anonymous&#64;boost.cvs.sourceforge.net:/cvsroot/boost login
cvs -z3 -d:pserver:anonymous&#64;boost.cvs.sourceforge.net:/cvsroot/boost checkout boost
cvs -d:pserver:anonymous&#64;boost.cvs.sourceforge.net:/cvsroot/boost logout
cvs -d:pserver:anonym...&#64;libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent login
cvs -z3 -d:pserver:anonym...&#64;libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent co -P libtorrent
cvs -d:pserver:anonym...&#64;libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent logout
cvs -d:pserver:anonymous&#64;libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent login
cvs -z3 -d:pserver:anonymous&#64;libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent co -P libtorrent
cvs -d:pserver:anonymous&#64;libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent logout
cvs -d:pserver:anonym...&#64;asio.cvs.sourceforge.net:/cvsroot/asio login
cvs -z3 -d:pserver:anonym...&#64;asio.cvs.sourceforge.net:/cvsroot/asio co -P asio
cvs -d:pserver:anonym...&#64;asio.cvs.sourceforge.net:/cvsroot/asio login
cvs -d:pserver:anonymous&#64;asio.cvs.sourceforge.net:/cvsroot/asio login
cvs -z3 -d:pserver:anonymous&#64;asio.cvs.sourceforge.net:/cvsroot/asio co -P asio
cvs -d:pserver:anonymous&#64;asio.cvs.sourceforge.net:/cvsroot/asio login
</pre>
</div>
<div class="section" id="step-2-building-boost">

View File

@ -30,13 +30,13 @@ by executing the following commands::
cvs -z3 -d:pserver:anonymous@boost.cvs.sourceforge.net:/cvsroot/boost checkout boost
cvs -d:pserver:anonymous@boost.cvs.sourceforge.net:/cvsroot/boost logout
cvs -d:pserver:anonym...@libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent login
cvs -z3 -d:pserver:anonym...@libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent co -P libtorrent
cvs -d:pserver:anonym...@libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent logout
cvs -d:pserver:anonymous@libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent login
cvs -z3 -d:pserver:anonymous@libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent co -P libtorrent
cvs -d:pserver:anonymous@libtorrent.cvs.sourceforge.net:/cvsroot/libtorrent logout
cvs -d:pserver:anonym...@asio.cvs.sourceforge.net:/cvsroot/asio login
cvs -z3 -d:pserver:anonym...@asio.cvs.sourceforge.net:/cvsroot/asio co -P asio
cvs -d:pserver:anonym...@asio.cvs.sourceforge.net:/cvsroot/asio login
cvs -d:pserver:anonymous@asio.cvs.sourceforge.net:/cvsroot/asio login
cvs -z3 -d:pserver:anonymous@asio.cvs.sourceforge.net:/cvsroot/asio co -P asio
cvs -d:pserver:anonymous@asio.cvs.sourceforge.net:/cvsroot/asio login
Step 2: Building boost
======================

View File

@ -45,6 +45,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include <boost/date_time/gregorian/gregorian_types.hpp>
#include <boost/optional.hpp>
#include <boost/filesystem/path.hpp>
#include <boost/shared_ptr.hpp>
#ifdef _MSC_VER
#pragma warning(pop)
@ -65,6 +66,11 @@ namespace libtorrent
boost::filesystem::path path;
size_type offset; // the offset of this file inside the torrent
size_type size; // the size of this file
// if the path was incorrectly encoded, this is
// the origianal corrupt encoded string. It is
// preserved in order to be able to reproduce
// the correct info-hash
boost::shared_ptr<boost::filesystem::path> orig_path;
};
struct TORRENT_EXPORT file_slice

View File

@ -62,6 +62,98 @@ using namespace boost::filesystem;
namespace
{
void convert_to_utf8(std::string& str, unsigned char chr)
{
str += 0xc0 | ((chr & 0xff) >> 6);
str += 0x80 | (chr & 0x3f);
}
void verify_encoding(file_entry& target)
{
std::string tmp_path;
std::string file_path = target.path.string();
bool valid_encoding = true;
for (std::string::iterator i = file_path.begin()
, end(file_path.end()); i != end; ++i)
{
// valid ascii-character
if ((*i & 0x80) == 0)
{
tmp_path += *i;
continue;
}
if (std::distance(i, end) < 2)
{
convert_to_utf8(tmp_path, *i);
valid_encoding = false;
continue;
}
// valid 2-byte utf-8 character
if ((i[0] & 0xe0) == 0xc0
&& (i[1] & 0xc0) == 0x80)
{
tmp_path += i[0];
tmp_path += i[1];
i += 1;
continue;
}
if (std::distance(i, end) < 3)
{
convert_to_utf8(tmp_path, *i);
valid_encoding = false;
continue;
}
// valid 3-byte utf-8 character
if ((i[0] & 0xf0) == 0xe0
&& (i[1] & 0xc0) == 0x80
&& (i[2] & 0xc0) == 0x80)
{
tmp_path += i[0];
tmp_path += i[1];
tmp_path += i[2];
i += 2;
continue;
}
if (std::distance(i, end) < 4)
{
convert_to_utf8(tmp_path, *i);
valid_encoding = false;
continue;
}
// valid 4-byte utf-8 character
if ((i[0] & 0xf0) == 0xe0
&& (i[1] & 0xc0) == 0x80
&& (i[2] & 0xc0) == 0x80
&& (i[3] & 0xc0) == 0x80)
{
tmp_path += i[0];
tmp_path += i[1];
tmp_path += i[2];
tmp_path += i[3];
i += 3;
continue;
}
convert_to_utf8(tmp_path, *i);
valid_encoding = false;
}
// the encoding was not valid utf-8
// save the original encoding and replace the
// commonly used path with the correctly
// encoded string
if (!valid_encoding)
{
target.orig_path.reset(new path(target.path));
target.path = tmp_path;
}
}
void extract_single_file(const entry& dict, file_entry& target
, std::string const& root_dir)
{
@ -89,6 +181,7 @@ namespace
if (i->string() != "..")
target.path /= i->string();
}
verify_encoding(target);
if (target.path.is_complete()) throw std::runtime_error("torrent contains "
"a file with an absolute path: '"
+ target.path.native_file_string() + "'");
@ -501,7 +594,7 @@ namespace libtorrent
files = entry(entry::list_t);
for (std::vector<file_entry>::const_iterator i = m_files.begin();
i != m_files.end(); ++i)
i != m_files.end(); ++i)
{
files.list().push_back(entry(entry::dictionary_t));
entry& file_e = files.list().back();
@ -509,12 +602,14 @@ namespace libtorrent
entry& path_e = file_e["path"];
path_e = entry(entry::list_t);
fs::path const& file_path(i->path);
assert(file_path.has_branch_path());
assert(*file_path.begin() == m_name);
fs::path const* file_path;
if (i->orig_path) file_path = &(*i->orig_path);
else file_path = &i->path;
assert(file_path->has_branch_path());
assert(*file_path->begin() == m_name);
for (fs::path::iterator j = boost::next(file_path.begin());
j != file_path.end(); ++j)
for (fs::path::iterator j = boost::next(file_path->begin());
j != file_path->end(); ++j)
{
path_e.list().push_back(entry(*j));
}