clean up xml parser, to use string_view (#1731)

clean up xml parser, to use string_view
This commit is contained in:
Arvid Norberg 2017-02-19 07:34:55 -05:00 committed by GitHub
parent 1e98bf19a5
commit 5f69e8217c
7 changed files with 73 additions and 128 deletions

View File

@ -60,7 +60,7 @@ namespace libtorrent
TORRENT_EXTRA_EXPORT int split_string(char const** tags, int buf_size, char* in);
TORRENT_EXTRA_EXPORT bool string_begins_no_case(char const* s1, char const* s2);
TORRENT_EXTRA_EXPORT bool string_equal_no_case(char const* s1, char const* s2);
TORRENT_EXTRA_EXPORT bool string_equal_no_case(string_view s1, string_view s2);
TORRENT_EXTRA_EXPORT void url_random(char* begin, char* end);

View File

@ -103,19 +103,19 @@ namespace libtorrent
struct parse_state
{
bool in_service = false;
std::list<std::string> tag_stack;
std::vector<string_view> tag_stack;
std::string control_url;
std::string service_type;
std::string model;
std::string url_base;
bool top_tags(const char* str1, const char* str2)
bool top_tags(string_view str1, string_view str2)
{
std::list<std::string>::reverse_iterator i = tag_stack.rbegin();
auto i = tag_stack.rbegin();
if (i == tag_stack.rend()) return false;
if (!string_equal_no_case(i->c_str(), str2)) return false;
if (!string_equal_no_case(*i, str2)) return false;
++i;
if (i == tag_stack.rend()) return false;
if (!string_equal_no_case(i->c_str(), str1)) return false;
if (!string_equal_no_case(*i, str1)) return false;
return true;
}
};
@ -133,14 +133,13 @@ struct ip_address_parse_state: error_code_parse_state
std::string ip_address;
};
TORRENT_EXTRA_EXPORT void find_control_url(int type, char const* string
, int str_len, parse_state& state);
TORRENT_EXTRA_EXPORT void find_control_url(int type, string_view, parse_state& state);
TORRENT_EXTRA_EXPORT void find_error_code(int type, char const* string
, int str_len, error_code_parse_state& state);
TORRENT_EXTRA_EXPORT void find_error_code(int type, string_view string
, error_code_parse_state& state);
TORRENT_EXTRA_EXPORT void find_ip_address(int type, char const* string
, int str_len, ip_address_parse_state& state);
TORRENT_EXTRA_EXPORT void find_ip_address(int type, string_view string
, ip_address_parse_state& state);
// TODO: support using the windows API for UPnP operations as well
struct TORRENT_EXTRA_EXPORT upnp final

View File

@ -37,7 +37,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "libtorrent/config.hpp"
#include "libtorrent/assert.hpp"
#include "libtorrent/span.hpp"
#include "libtorrent/string_view.hpp"
namespace libtorrent
{
@ -62,9 +62,8 @@ namespace libtorrent
// val is attribute value
// neither string is 0-terminated, but their lengths are specified via
// name_len and val_len respectively
// TODO: 3 use span<> for the callback
TORRENT_EXTRA_EXPORT void xml_parse(span<char const> input
, std::function<void(int, char const*, int, char const*, int)> callback);
TORRENT_EXTRA_EXPORT void xml_parse(string_view input
, std::function<void(int, string_view, string_view)> callback);
}

View File

@ -125,18 +125,12 @@ namespace libtorrent
return true;
}
bool string_equal_no_case(char const* s1, char const* s2)
bool string_equal_no_case(string_view s1, string_view s2)
{
TORRENT_ASSERT(s1 != nullptr);
TORRENT_ASSERT(s2 != nullptr);
while (to_lower(*s1) == to_lower(*s2))
{
if (*s1 == 0) return true;
++s1;
++s2;
}
return false;
if (s1.size() != s2.size()) return false;
return std::equal(s1.begin(), s1.end(), s2.begin()
, [] (char const c1, char const c2)
{ return to_lower(c1) == to_lower(c2); });
}
// generate a url-safe random string

View File

@ -869,35 +869,17 @@ void upnp::delete_port_mapping(rootdevice& d, int const i)
post(d, soap, soap_action);
}
namespace
{
void copy_tolower(std::string& dst, char const* src, int len)
{
dst.clear();
dst.reserve(aux::numeric_cast<std::size_t>(len));
while (len-- > 0)
{
dst.push_back(to_lower(*src++));
}
}
}
void find_control_url(int const type, char const* string
, int const str_len, parse_state& state)
void find_control_url(int const type, string_view str, parse_state& state)
{
if (type == xml_start_tag)
{
std::string tag;
copy_tolower(tag, string, str_len);
state.tag_stack.push_back(tag);
// std::copy(state.tag_stack.begin(), state.tag_stack.end(), std::ostream_iterator<std::string>(std::cout, " "));
// std::cout << std::endl;
state.tag_stack.push_back(str);
}
else if (type == xml_end_tag)
{
if (!state.tag_stack.empty())
{
if (state.in_service && state.tag_stack.back() == "service")
if (state.in_service && string_equal_no_case(state.tag_stack.back(), "service"))
state.in_service = false;
state.tag_stack.pop_back();
}
@ -905,31 +887,29 @@ void find_control_url(int const type, char const* string
else if (type == xml_string)
{
if (state.tag_stack.empty()) return;
// std::cout << " " << string << std::endl;}
if (!state.in_service && state.top_tags("service", "servicetype"))
{
std::string name(string, aux::numeric_cast<std::size_t>(str_len));
if (string_equal_no_case(name.c_str(), "urn:schemas-upnp-org:service:WANIPConnection:1")
|| string_equal_no_case(name.c_str(), "urn:schemas-upnp-org:service:WANIPConnection:2")
|| string_equal_no_case(name.c_str(), "urn:schemas-upnp-org:service:WANPPPConnection:1"))
if (string_equal_no_case(str, "urn:schemas-upnp-org:service:WANIPConnection:1")
|| string_equal_no_case(str, "urn:schemas-upnp-org:service:WANIPConnection:2")
|| string_equal_no_case(str, "urn:schemas-upnp-org:service:WANPPPConnection:1"))
{
state.service_type.assign(string, aux::numeric_cast<std::size_t>(str_len));
state.service_type.assign(str.begin(), str.end());
state.in_service = true;
}
}
else if (state.control_url.empty() && state.in_service
&& state.top_tags("service", "controlurl") && std::strlen(string) > 0)
&& state.top_tags("service", "controlurl") && str.size() > 0)
{
// default to the first (or only) control url in the router's listing
state.control_url.assign(string, aux::numeric_cast<std::size_t>(str_len));
state.control_url.assign(str.begin(), str.end());
}
else if (state.model.empty() && state.top_tags("device", "modelname"))
{
state.model.assign(string, aux::numeric_cast<std::size_t>(str_len));
state.model.assign(str.begin(), str.end());
}
else if (state.tag_stack.back() == "urlbase")
else if (string_equal_no_case(state.tag_stack.back(), "urlbase"))
{
state.url_base.assign(string, aux::numeric_cast<std::size_t>(str_len));
state.url_base.assign(str.begin(), str.end());
}
}
}
@ -985,7 +965,8 @@ void upnp::on_upnp_xml(error_code const& e
}
parse_state s;
xml_parse(p.get_body(), std::bind(&find_control_url, _1, _2, _3, std::ref(s)));
auto body = p.get_body();
xml_parse({body.data(), body.size()}, std::bind(&find_control_url, _1, _2, std::ref(s)));
if (s.control_url.empty())
{
#ifndef TORRENT_DISABLE_LOGGING
@ -1116,33 +1097,33 @@ void upnp::disable(error_code const& ec)
m_socket.close();
}
void find_error_code(int const type, char const* string, int const str_len, error_code_parse_state& state)
void find_error_code(int const type, string_view string, error_code_parse_state& state)
{
if (state.exit) return;
if (type == xml_start_tag && !std::strncmp("errorCode", string, aux::numeric_cast<std::size_t>(str_len)))
if (type == xml_start_tag && string == "errorCode")
{
state.in_error_code = true;
}
else if (type == xml_string && state.in_error_code)
{
std::string error_code_str(string, aux::numeric_cast<std::size_t>(str_len));
std::string error_code_str(string.begin(), string.end());
state.error_code = std::atoi(error_code_str.c_str());
state.exit = true;
}
}
void find_ip_address(int const type, char const* string, int const str_len, ip_address_parse_state& state)
void find_ip_address(int const type, string_view string, ip_address_parse_state& state)
{
find_error_code(type, string, str_len, state);
find_error_code(type, string, state);
if (state.exit) return;
if (type == xml_start_tag && !std::strncmp("NewExternalIPAddress", string, aux::numeric_cast<std::size_t>(str_len)))
if (type == xml_start_tag && string == "NewExternalIPAddress")
{
state.in_ip_address = true;
}
else if (type == xml_string && state.in_ip_address)
{
state.ip_address.assign(string, aux::numeric_cast<std::size_t>(str_len));
state.ip_address.assign(string.begin(), string.end());
state.exit = true;
}
}
@ -1282,7 +1263,7 @@ void upnp::on_upnp_get_ip_address_response(error_code const& e
#endif
ip_address_parse_state s;
xml_parse(body, std::bind(&find_ip_address, _1, _2, _3, std::ref(s)));
xml_parse({body.data(), body.size()}, std::bind(&find_ip_address, _1, _2, std::ref(s)));
#ifndef TORRENT_DISABLE_LOGGING
if (s.error_code != -1)
{
@ -1383,7 +1364,7 @@ void upnp::on_upnp_map_response(error_code const& e
error_code_parse_state s;
span<char const> body = p.get_body();
xml_parse(body, std::bind(&find_error_code, _1, _2, _3, std::ref(s)));
xml_parse({body.data(), body.size()}, std::bind(&find_error_code, _1, _2, std::ref(s)));
if (s.error_code != -1)
{
@ -1537,7 +1518,8 @@ void upnp::on_upnp_unmap_response(error_code const& e
error_code_parse_state s;
if (p.header_finished())
{
xml_parse(p.get_body(), std::bind(&find_error_code, _1, _2, _3, std::ref(s)));
span<char const> body = p.get_body();
xml_parse({body.data(), body.size()}, std::bind(&find_error_code, _1, _2, std::ref(s)));
}
portmap_protocol const proto = m_mappings[mapping].protocol;

View File

@ -34,27 +34,25 @@ POSSIBILITY OF SUCH DAMAGE.
#include "libtorrent/xml_parse.hpp"
#include "libtorrent/string_util.hpp"
#include "libtorrent/string_view.hpp"
namespace libtorrent
{
TORRENT_EXTRA_EXPORT void xml_parse(span<char const> input
, std::function<void(int, char const*, int, char const*, int)> callback)
TORRENT_EXTRA_EXPORT void xml_parse(string_view input
, std::function<void(int, string_view, string_view)> callback)
{
char const* p = input.data();
char const* end = input.data() + input.size();
for(;p != end; ++p)
{
char const* start = p;
int token;
// look for tag start
for(; p != end && *p != '<'; ++p);
if (p != start)
{
token = xml_string;
const int name_len = int(p - start);
callback(token, start, name_len, nullptr, 0);
callback(xml_string, {start, std::size_t(p - start)}, {});
}
if (p == end) break;
@ -71,15 +69,11 @@ namespace libtorrent
// parse error
if (p == end)
{
token = xml_parse_error;
start = "unexpected end of file";
callback(token, start, int(std::strlen(start)), nullptr, 0);
callback(xml_parse_error, "unexpected end of file", {});
break;
}
token = xml_string;
const int name_len = int(p - start - 2);
callback(token, start, name_len, nullptr, 0);
callback(xml_string, {start, std::size_t(p - start - 2)}, {});
continue;
}
@ -94,9 +88,7 @@ namespace libtorrent
// parse error
if (p == end)
{
token = xml_parse_error;
start = "unexpected end of file";
callback(token, start, int(std::strlen(start)), nullptr, 0);
callback(xml_parse_error, "unexpected end of file", {});
break;
}
@ -106,39 +98,29 @@ namespace libtorrent
if (*start == '/')
{
++start;
token = xml_end_tag;
const int name_len = int(tag_name_end - start);
callback(token, start, name_len, nullptr, 0);
callback(xml_end_tag, {start, std::size_t(tag_name_end - start)}, {});
}
else if (*(p - 1) == '/')
{
token = xml_empty_tag;
const int name_len = int((std::min)(tag_name_end - start, p - start - 1));
callback(token, start, name_len, nullptr, 0);
callback(xml_empty_tag, {start, std::size_t(std::min(tag_name_end - start, p - start - 1))}, {});
tag_end = p - 1;
}
else if (*start == '?' && *(p - 1) == '?')
{
++start;
token = xml_declaration_tag;
const int name_len = int((std::min)(tag_name_end - start, p - start - 1));
callback(token, start, name_len, nullptr, 0);
callback(xml_declaration_tag, {start, std::size_t(std::min(tag_name_end - start, p - start - 1))}, {});
tag_end = p - 1;
}
else if (start + 5 < p && std::memcmp(start, "!--", 3) == 0 && std::memcmp(p - 2, "--", 2) == 0)
{
start += 3;
token = xml_comment;
const int name_len = int(tag_name_end - start - 2);
callback(token, start, name_len, nullptr, 0);
callback(xml_comment, {start, std::size_t(tag_name_end - start - 2)}, {});
tag_end = p - 2;
continue;
}
else
{
token = xml_start_tag;
const int name_len = int(tag_name_end - start);
callback(token, start, name_len, nullptr, 0);
callback(xml_start_tag, {start, std::size_t(tag_name_end - start)}, {});
}
// parse attributes
@ -152,7 +134,7 @@ namespace libtorrent
start = i;
// find end of attribute name
for (; i != tag_end && *i != '=' && !is_space(*i); ++i);
const int name_len = int(i - start);
std::size_t const name_len = i - start;
// look for equality sign
for (; i != tag_end && *i != '='; ++i);
@ -161,8 +143,7 @@ namespace libtorrent
// instead of a series of key value pairs
if (i == tag_end)
{
token = xml_tag_content;
callback(token, start, int(i - start), nullptr, 0);
callback(xml_tag_content, {start, std::size_t(i - start)}, {});
break;
}
@ -171,9 +152,7 @@ namespace libtorrent
// check for parse error (values must be quoted)
if (i == tag_end || (*i != '\'' && *i != '\"'))
{
token = xml_parse_error;
start = "unquoted attribute value";
callback(token, start, int(std::strlen(start)), nullptr, 0);
callback(xml_parse_error, "unquoted attribute value", {});
break;
}
char quote = *i;
@ -183,14 +162,10 @@ namespace libtorrent
// parse error (missing end quote)
if (i == tag_end)
{
token = xml_parse_error;
start = "missing end quote on attribute";
callback(token, start, int(std::strlen(start)), nullptr, 0);
callback(xml_parse_error, "missing end quote on attribute", {});
break;
}
const int val_len = int(i - val_start);
token = xml_attribute;
callback(token, start, name_len, val_start, val_len);
callback(xml_attribute, {start, name_len}, {val_start, std::size_t(i - val_start)});
}
}
}

View File

@ -266,8 +266,8 @@ char upnp_xml4[] =
using namespace libtorrent;
using namespace std::placeholders;
void parser_callback(std::string& out, int token, char const* s, int len
, char const* val, int val_len)
void parser_callback(std::string& out, int token, string_view s
, string_view val)
{
switch (token)
{
@ -282,28 +282,24 @@ void parser_callback(std::string& out, int token, char const* s, int len
case xml_tag_content: out += "T"; break;
default: TEST_CHECK(false);
}
out.append(s, len);
out.append(s.begin(), s.end());
if (token == xml_attribute)
{
TEST_CHECK(val != nullptr);
TEST_CHECK(!val.empty());
out += "V";
out.append(val, val_len);
out.append(val.begin(), val.end());
}
else
{
TEST_CHECK(val == nullptr);
TEST_CHECK(val.empty());
}
}
span<char const> str(char const* in)
{
return span<char const>(in, strlen(in));
}
void test_parse(char const* in, char const* expected)
{
std::string out;
xml_parse(str(in), std::bind(&parser_callback
, std::ref(out), _1, _2, _3, _4, _5));
xml_parse(in, std::bind(&parser_callback
, std::ref(out), _1, _2, _3));
std::printf("in: %s\n out: %s\nexpected: %s\n"
, in, out.c_str(), expected);
TEST_EQUAL(out, expected);
@ -312,7 +308,7 @@ void test_parse(char const* in, char const* expected)
TORRENT_TEST(upnp_parser1)
{
parse_state xml_s;
xml_parse(upnp_xml, std::bind(&find_control_url, _1, _2, _3, std::ref(xml_s)));
xml_parse(upnp_xml, std::bind(&find_control_url, _1, _2, std::ref(xml_s)));
std::cout << "namespace " << xml_s.service_type << std::endl;
std::cout << "url_base: " << xml_s.url_base << std::endl;
@ -326,7 +322,7 @@ TORRENT_TEST(upnp_parser1)
TORRENT_TEST(upnp_parser2)
{
parse_state xml_s;
xml_parse(upnp_xml2, std::bind(&find_control_url, _1, _2, _3, std::ref(xml_s)));
xml_parse(upnp_xml2, std::bind(&find_control_url, _1, _2, std::ref(xml_s)));
std::cout << "namespace " << xml_s.service_type << std::endl;
std::cout << "url_base: " << xml_s.url_base << std::endl;
@ -340,7 +336,7 @@ TORRENT_TEST(upnp_parser2)
TORRENT_TEST(upnp_parser3)
{
error_code_parse_state xml_s;
xml_parse(upnp_xml3, std::bind(&find_error_code, _1, _2, _3, std::ref(xml_s)));
xml_parse(upnp_xml3, std::bind(&find_error_code, _1, _2, std::ref(xml_s)));
std::cout << "error_code " << xml_s.error_code << std::endl;
TEST_EQUAL(xml_s.error_code, 402);
@ -349,7 +345,7 @@ TORRENT_TEST(upnp_parser3)
TORRENT_TEST(upnp_parser4)
{
ip_address_parse_state xml_s;
xml_parse(upnp_xml4, std::bind(&find_ip_address, _1, _2, _3, std::ref(xml_s)));
xml_parse(upnp_xml4, std::bind(&find_ip_address, _1, _2, std::ref(xml_s)));
std::cout << "error_code " << xml_s.error_code << std::endl;
std::cout << "ip_address " << xml_s.ip_address << std::endl;