fix const correctness in xml_parse(). update unit tests rss and upnp to use new signature for parser callback

This commit is contained in:
arvidn 2015-09-05 19:31:47 -04:00
parent 7e70c41646
commit cb65694578
8 changed files with 220 additions and 252 deletions

View File

@ -217,7 +217,8 @@ namespace libtorrent
// are posted to the network thread
struct TORRENT_EXTRA_EXPORT feed : boost::enable_shared_from_this<feed>
{
friend void parse_feed(feed_state& f, int token, char const* name, char const* val);
friend void parse_feed(feed_state& f, int token, char const* name, int len
, char const* val, int val_len);
feed(aux::session_impl& ses, feed_settings const& feed);

View File

@ -126,7 +126,8 @@ struct parse_state
}
};
TORRENT_EXTRA_EXPORT void find_control_url(int type, char const* string, parse_state& state);
TORRENT_EXTRA_EXPORT void find_control_url(int type, char const* string
, int str_len, parse_state& state);
// TODO: support using the windows API for UPnP operations as well
class TORRENT_EXTRA_EXPORT upnp : public boost::enable_shared_from_this<upnp>

View File

@ -62,11 +62,14 @@ namespace libtorrent
xml_tag_content
};
// callback(int type, char const* name, char const* val)
// str2 is only used for attributes. name is element or attribute
// name and val is attribute value
TORRENT_EXTRA_EXPORT void xml_parse(char* p, char* end
, boost::function<void(int,char const*,char const*)> callback);
// callback(int type, char const* name, int name_len
// , char const* val, int val_len)
// name is element or attribute name
// val is attribute value
// neither string is null terminated, but their lengths are specified via
// name_len and val_len respectively
TORRENT_EXTRA_EXPORT void xml_parse(char const* p, char const* end
, boost::function<void(int,char const*,int,char const*,int)> callback);
}

@ -1 +1 @@
Subproject commit c82420cd1b6a83a028f51eeff6d19828136b466c
Subproject commit 5924e3b17eb0bdd710027187ccd2952422b25a39

View File

@ -166,7 +166,8 @@ struct feed_state
}
};
void parse_feed(feed_state& f, int token, char const* name, char const* val)
void parse_feed(feed_state& f, int token, char const* name, int name_len
, char const* val, int val_len)
{
switch (token)
{
@ -176,7 +177,7 @@ void parse_feed(feed_state& f, int token, char const* name, char const* val)
case xml_start_tag:
case xml_empty_tag:
{
f.current_tag = name;
f.current_tag.assign(name, name_len);
if (f.type == feed_state::none)
{
if (string_equal_no_case(f.current_tag.c_str(), "feed"))
@ -184,20 +185,21 @@ void parse_feed(feed_state& f, int token, char const* name, char const* val)
else if (string_equal_no_case(f.current_tag.c_str(), "rss"))
f.type = feed_state::rss2;
}
if (f.is_item(name)) f.in_item = true;
if (f.is_item(f.current_tag.c_str())) f.in_item = true;
return;
}
case xml_attribute:
{
if (!f.in_item) return;
std::string str(name, name_len);
if (f.is_url(f.current_tag.c_str())
&& f.type == feed_state::atom)
{
// atom feeds have items like this:
// <link href="http://..." length="12345"/>
if (string_equal_no_case(name, "href"))
f.current_item.url = val;
else if (string_equal_no_case(name, "length"))
if (string_equal_no_case(str.c_str(), "href"))
f.current_item.url.assign(val, val_len);
else if (string_equal_no_case(str.c_str(), "length"))
f.current_item.size = strtoll(val, 0, 10);
}
else if (f.type == feed_state::rss2
@ -205,9 +207,9 @@ void parse_feed(feed_state& f, int token, char const* name, char const* val)
{
// rss feeds have items like this:
// <enclosure url="http://..." length="12345"/>
if (string_equal_no_case(name, "url"))
f.current_item.url = val;
else if (string_equal_no_case(name, "length"))
if (string_equal_no_case(str.c_str(), "url"))
f.current_item.url.assign(val, val_len);
else if (string_equal_no_case(str.c_str(), "length"))
f.current_item.size = strtoll(val, 0, 10);
}
else if (f.type == feed_state::rss2
@ -215,16 +217,16 @@ void parse_feed(feed_state& f, int token, char const* name, char const* val)
{
// rss feeds sometimes have items like this:
// <media:content url="http://..." filesize="12345"/>
if (string_equal_no_case(name, "url"))
f.current_item.url = val;
else if (string_equal_no_case(name, "filesize"))
if (string_equal_no_case(str.c_str(), "url"))
f.current_item.url.assign(val, val_len);
else if (string_equal_no_case(str.c_str(), "filesize"))
f.current_item.size = strtoll(val, 0, 10);
}
return;
}
case xml_end_tag:
{
if (f.in_item && f.is_item(name))
if (f.in_item && f.is_item(std::string(name, name_len).c_str()))
{
f.in_item = false;
if (!f.current_item.title.empty()
@ -243,9 +245,9 @@ void parse_feed(feed_state& f, int token, char const* name, char const* val)
if (!f.in_item)
{
if (f.is_title(f.current_tag.c_str()))
f.ret.m_title = name;
f.ret.m_title.assign(name, name_len);
else if (f.is_desc(f.current_tag.c_str()))
f.ret.m_description = name;
f.ret.m_description.assign(name, name_len);
else if (f.is_ttl(f.current_tag.c_str()))
{
int tmp = atoi(name);
@ -254,20 +256,20 @@ void parse_feed(feed_state& f, int token, char const* name, char const* val)
return;
}
if (f.is_title(f.current_tag.c_str()))
f.current_item.title = name;
f.current_item.title.assign(name, name_len);
else if (f.is_desc(f.current_tag.c_str()))
f.current_item.description = name;
f.current_item.description.assign(name, name_len);
else if (f.is_uuid(f.current_tag.c_str()))
f.current_item.uuid = name;
f.current_item.uuid.assign(name, name_len);
else if (f.is_url(f.current_tag.c_str()) && f.type != feed_state::atom)
f.current_item.url = name;
f.current_item.url.assign(name, name_len);
else if (f.is_comment(f.current_tag.c_str()))
f.current_item.comment = name;
f.current_item.comment.assign(name, name_len);
else if (f.is_category(f.current_tag.c_str()))
f.current_item.category = name;
f.current_item.category.assign(name, name_len);
else if (f.is_size(f.current_tag.c_str()))
f.current_item.size = strtoll(name, 0, 10);
else if (f.is_hash(f.current_tag.c_str()) && strlen(name) == 40)
else if (f.is_hash(f.current_tag.c_str()) && name_len == 40)
{
if (!from_hex(name, 40, f.current_item.info_hash.data()))
{
@ -381,10 +383,9 @@ void feed::on_feed(error_code const& ec
m_failures = 0;
char* buf = const_cast<char*>(data);
feed_state s(*this);
xml_parse(buf, buf + size, boost::bind(&parse_feed, boost::ref(s), _1, _2, _3));
xml_parse(data, data + size, boost::bind(&parse_feed, boost::ref(s)
, _1, _2, _3, _4, _5));
time_t now = time(NULL);

View File

@ -826,19 +826,24 @@ void upnp::delete_port_mapping(rootdevice& d, int i)
namespace
{
void copy_tolower(std::string& dst, char const* src)
void copy_tolower(std::string& dst, char const* src, int len)
{
dst.clear();
while (*src) dst.push_back(to_lower(*src++));
dst.reserve(len);
while (len-- > 0)
{
dst.push_back(to_lower(*src++));
}
}
}
TORRENT_EXTRA_EXPORT void find_control_url(int type, char const* string, parse_state& state)
TORRENT_EXTRA_EXPORT void find_control_url(int type, char const* string
, int str_len, parse_state& state)
{
if (type == xml_start_tag)
{
std::string tag;
copy_tolower(tag, string);
copy_tolower(tag, string, str_len);
state.tag_stack.push_back(tag);
// std::copy(state.tag_stack.begin(), state.tag_stack.end(), std::ostream_iterator<std::string>(std::cout, " "));
// std::cout << std::endl;
@ -858,26 +863,27 @@ TORRENT_EXTRA_EXPORT void find_control_url(int type, char const* string, parse_s
// std::cout << " " << string << std::endl;}
if (!state.in_service && state.top_tags("service", "servicetype"))
{
if (string_equal_no_case(string, "urn:schemas-upnp-org:service:WANIPConnection:1")
|| string_equal_no_case(string, "urn:schemas-upnp-org:service:WANIPConnection:2")
|| string_equal_no_case(string, "urn:schemas-upnp-org:service:WANPPPConnection:1"))
std::string name(string, str_len);
if (string_equal_no_case(name.c_str(), "urn:schemas-upnp-org:service:WANIPConnection:1")
|| string_equal_no_case(name.c_str(), "urn:schemas-upnp-org:service:WANIPConnection:2")
|| string_equal_no_case(name.c_str(), "urn:schemas-upnp-org:service:WANPPPConnection:1"))
{
state.service_type = string;
state.service_type.assign(string, str_len);
state.in_service = true;
}
}
else if (state.control_url.empty() && state.in_service && state.top_tags("service", "controlurl") && strlen(string) > 0)
{
// default to the first (or only) control url in the router's listing
state.control_url = string;
state.control_url.assign(string, str_len);
}
else if (state.model.empty() && state.top_tags("device", "modelname"))
{
state.model = string;
state.model.assign(string, str_len);
}
else if (state.tag_stack.back() == "urlbase")
{
state.url_base = string;
state.url_base.assign(string, str_len);
}
}
}
@ -928,9 +934,8 @@ void upnp::on_upnp_xml(error_code const& e
}
parse_state s;
xml_parse(const_cast<char*>(p.get_body().begin),
const_cast<char*>(p.get_body().end)
, boost::bind(&find_control_url, _1, _2, boost::ref(s)));
xml_parse(p.get_body().begin, p.get_body().end
, boost::bind(&find_control_url, _1, _2, _3, boost::ref(s)));
if (s.control_url.empty())
{
char msg[500];

View File

@ -37,29 +37,21 @@ POSSIBILITY OF SUCH DAMAGE.
namespace libtorrent
{
// TODO: 3 make this take a const buffer instead (and pass in string length
// to callback function). Then remove all associated const_casts.
TORRENT_EXTRA_EXPORT void xml_parse(char* p, char* end
, boost::function<void(int,char const*,char const*)> callback)
TORRENT_EXTRA_EXPORT void xml_parse(char const* p, char const* end
, boost::function<void(int,char const*,int,char const*,int)> callback)
{
for(;p != end; ++p)
{
char const* start = p;
char const* val_start = 0;
int token;
// look for tag start
for(; p != end && *p != '<'; ++p);
if (p != start)
{
if (p != end)
{
TORRENT_ASSERT(*p == '<');
*p = 0;
}
token = xml_string;
callback(token, start, val_start);
if (p != end) *p = '<';
const int name_len = p - start;
callback(token, start, name_len, NULL, 0);
}
if (p == end) break;
@ -78,22 +70,20 @@ namespace libtorrent
{
token = xml_parse_error;
start = "unexpected end of file";
callback(token, start, val_start);
callback(token, start, strlen(start), NULL, 0);
break;
}
token = xml_string;
char tmp = p[-2];
p[-2] = 0;
callback(token, start, val_start);
p[-2] = tmp;
const int name_len = p - start - 2;
callback(token, start, name_len, NULL, 0);
continue;
}
// parse the name of the tag.
for (start = p; p != end && *p != '>' && !is_space(*p); ++p);
char* tag_name_end = p;
char const* tag_name_end = p;
// skip the attributes for now
for (; p != end && *p != '>'; ++p);
@ -103,68 +93,63 @@ namespace libtorrent
{
token = xml_parse_error;
start = "unexpected end of file";
callback(token, start, val_start);
callback(token, start, strlen(start), NULL, 0);
break;
}
TORRENT_ASSERT(*p == '>');
// save the character that terminated the tag name
// it could be both '>' and ' '.
char save = *tag_name_end;
*tag_name_end = 0;
char* tag_end = p;
char const* tag_end = p;
if (*start == '/')
{
++start;
token = xml_end_tag;
callback(token, start, val_start);
const int name_len = tag_name_end - start;
callback(token, start, name_len, NULL, 0);
}
else if (*(p-1) == '/')
{
*(p-1) = 0;
token = xml_empty_tag;
callback(token, start, val_start);
*(p-1) = '/';
const int name_len = (std::min)(tag_name_end - start, p - start - 1);
callback(token, start, name_len, NULL, 0);
tag_end = p - 1;
}
else if (*start == '?' && *(p-1) == '?')
{
*(p-1) = 0;
++start;
token = xml_declaration_tag;
callback(token, start, val_start);
*(p-1) = '?';
const int name_len = (std::min)(tag_name_end - start, p - start - 1);
callback(token, start, name_len, NULL, 0);
tag_end = p - 1;
}
else if (start + 5 < p && std::memcmp(start, "!--", 3) == 0 && std::memcmp(p-2, "--", 2) == 0)
{
start += 3;
*(p-2) = 0;
token = xml_comment;
callback(token, start, val_start);
*(p-2) = '-';
const int name_len = tag_name_end - start - 2;
callback(token, start, name_len, NULL, 0);
tag_end = p - 2;
continue;
}
else
{
token = xml_start_tag;
callback(token, start, val_start);
const int name_len = tag_name_end - start;
callback(token, start, name_len, NULL, 0);
}
*tag_name_end = save;
// parse attributes
for (char* i = tag_name_end; i < tag_end; ++i)
for (char const* i = tag_name_end; i < tag_end; ++i)
{
char const* val_start = NULL;
// find start of attribute name
for (; i != tag_end && is_space(*i); ++i);
if (i == tag_end) break;
start = i;
// find end of attribute name
for (; i != tag_end && *i != '=' && !is_space(*i); ++i);
char* name_end = i;
const int name_len = i - start;
// look for equality sign
for (; i != tag_end && *i != '='; ++i);
@ -173,12 +158,8 @@ namespace libtorrent
// instead of a series of key value pairs
if (i == tag_end)
{
char tmp = *i;
*i = 0; // null terminate the content string
token = xml_tag_content;
val_start = 0;
callback(token, start, val_start);
*i = tmp;
callback(token, start, i - start, NULL, 0);
break;
}
@ -188,9 +169,8 @@ namespace libtorrent
if (i == tag_end || (*i != '\'' && *i != '\"'))
{
token = xml_parse_error;
val_start = 0;
start = "unquoted attribute value";
callback(token, start, val_start);
callback(token, start, strlen(start), NULL, 0);
break;
}
char quote = *i;
@ -201,18 +181,13 @@ namespace libtorrent
if (i == tag_end)
{
token = xml_parse_error;
val_start = 0;
start = "missing end quote on attribute";
callback(token, start, val_start);
callback(token, start, strlen(start), NULL, 0);
break;
}
save = *i;
*i = 0;
*name_end = 0;
const int val_len = i - val_start;
token = xml_attribute;
callback(token, start, val_start);
*name_end = '=';
*i = save;
callback(token, start, name_len, val_start, val_len);
}
}
}

View File

@ -235,7 +235,8 @@ char upnp_xml2[] =
using namespace libtorrent;
void parser_callback(std::string& out, int token, char const* s, char const* val)
void parser_callback(std::string& out, int token, char const* s, int len
, char const* val, int val_len)
{
switch (token)
{
@ -250,154 +251,135 @@ void parser_callback(std::string& out, int token, char const* s, char const* val
case xml_tag_content: out += "T"; break;
default: TEST_CHECK(false);
}
out += s;
out.append(s, len);
if (token == xml_attribute)
{
TEST_CHECK(val != 0);
TEST_CHECK(val != NULL);
out += "V";
out += val;
out.append(val, val_len);
}
else
{
TEST_CHECK(val == 0);
TEST_CHECK(val == NULL);
}
}
TORRENT_TEST(xml)
void test_parse(char const* in, char const* expected)
{
std::string out;
xml_parse(in, in + strlen(in), boost::bind(&parser_callback
, boost::ref(out), _1, _2, _3, _4, _5));
fprintf(stderr, "in: %s\n out: %s\nexpected: %s\n"
, in, out.c_str(), expected);
TEST_EQUAL(out, expected);
}
TORRENT_TEST(upnp_parser1)
{
// test upnp xml parser
{
parse_state xml_s;
xml_parse(upnp_xml, upnp_xml + sizeof(upnp_xml)
, boost::bind(&find_control_url, _1, _2, boost::ref(xml_s)));
, boost::bind(&find_control_url, _1, _2, _3, boost::ref(xml_s)));
std::cerr << "namespace " << xml_s.service_type << std::endl;
std::cerr << "url_base: " << xml_s.url_base << std::endl;
std::cerr << "control_url: " << xml_s.control_url << std::endl;
std::cerr << "model: " << xml_s.model << std::endl;
TEST_CHECK(xml_s.url_base == "http://192.168.0.1:5678");
TEST_CHECK(xml_s.control_url == "/WANIPConnection");
TEST_CHECK(xml_s.model == "D-Link Router");
}
{
parse_state xml_s;
xml_parse(upnp_xml2, upnp_xml2 + sizeof(upnp_xml2)
, boost::bind(&find_control_url, _1, _2, boost::ref(xml_s)));
std::cerr << "namespace " << xml_s.service_type << std::endl;
std::cerr << "url_base: " << xml_s.url_base << std::endl;
std::cerr << "control_url: " << xml_s.control_url << std::endl;
std::cerr << "model: " << xml_s.model << std::endl;
TEST_CHECK(xml_s.url_base == "http://192.168.1.1:49152");
TEST_CHECK(xml_s.control_url == "/upnp/control/WANPPPConn1");
TEST_CHECK(xml_s.model == "Wireless-G ADSL Home Gateway");
}
{
// test xml parser
char xml[] = "<a>foo<b/>bar</a>";
std::string out;
xml_parse(xml, xml + sizeof(xml) - 1, boost::bind(&parser_callback
, boost::ref(out), _1, _2, _3));
std::cerr << out << std::endl;
TEST_CHECK(out == "BaSfooEbSbarFa");
}
{
char xml[] = "<?xml version = \"1.0\"?><c x=\"1\" \t y=\"3\"/><d foo='bar'></d boo='foo'><!--comment-->";
std::string out;
xml_parse(xml, xml + sizeof(xml) - 1, boost::bind(&parser_callback
, boost::ref(out), _1, _2, _3));
std::cerr << out << std::endl;
TEST_CHECK(out == "DxmlAversionV1.0EcAxV1AyV3BdAfooVbarFdAbooVfooCcomment");
}
{
char xml[] = "<a f=1>foo</a f='b>";
std::string out;
xml_parse(xml, xml + sizeof(xml) - 1, boost::bind(&parser_callback
, boost::ref(out), _1, _2, _3));
std::cerr << out << std::endl;
TEST_CHECK(out == "BaPunquoted attribute valueSfooFaPmissing end quote on attribute");
}
{
char xml[] = "<a f>foo</a v >";
std::string out;
xml_parse(xml, xml + sizeof(xml) - 1, boost::bind(&parser_callback
, boost::ref(out), _1, _2, _3));
std::cerr << out << std::endl;
TEST_CHECK(out == "BaTfSfooFaTv ");
}
{
// test unterminated CDATA tags
char xml[] = "<![CDATA[foo";
std::string out;
xml_parse(xml, xml + sizeof(xml) - 1, boost::bind(&parser_callback
, boost::ref(out), _1, _2, _3));
std::cerr << out << std::endl;
TEST_CHECK(out == "Punexpected end of file");
}
{
// test CDATA tag
char xml[] = "<![CDATA[verbatim tag that can have > and < in it]]>";
std::string out;
xml_parse(xml, xml + sizeof(xml) - 1, boost::bind(&parser_callback
, boost::ref(out), _1, _2, _3));
std::cerr << out << std::endl;
TEST_CHECK(out == "Sverbatim tag that can have > and < in it");
}
{
// test unterminated tags
char xml[] = "<foo";
std::string out;
xml_parse(xml, xml + sizeof(xml) - 1, boost::bind(&parser_callback
, boost::ref(out), _1, _2, _3));
std::cerr << out << std::endl;
TEST_CHECK(out == "Punexpected end of file");
}
{
// test unquoted attribute values
char xml[] = "<foo a=bar>";
std::string out;
xml_parse(xml, xml + sizeof(xml) - 1, boost::bind(&parser_callback
, boost::ref(out), _1, _2, _3));
std::cerr << out << std::endl;
TEST_CHECK(out == "BfooPunquoted attribute value");
}
{
// test unterminated attribute value
char xml[] = "<foo a=\"bar>";
std::string out;
xml_parse(xml, xml + sizeof(xml) - 1, boost::bind(&parser_callback
, boost::ref(out), _1, _2, _3));
std::cerr << out << std::endl;
TEST_CHECK(out == "BfooPmissing end quote on attribute");
}
{
// test unterminated tag
char xml[] = "<foo a=\"bar";
std::string out;
xml_parse(xml, xml + sizeof(xml) - 1, boost::bind(&parser_callback
, boost::ref(out), _1, _2, _3));
std::cerr << out << std::endl;
TEST_CHECK(out == "Punexpected end of file");
}
TEST_EQUAL(xml_s.url_base, "http://192.168.0.1:5678");
TEST_EQUAL(xml_s.control_url, "/WANIPConnection");
TEST_EQUAL(xml_s.model, "D-Link Router");
}
TORRENT_TEST(upnp_parser2)
{
parse_state xml_s;
xml_parse(upnp_xml2, upnp_xml2 + sizeof(upnp_xml2)
, boost::bind(&find_control_url, _1, _2, _3, boost::ref(xml_s)));
std::cerr << "namespace " << xml_s.service_type << std::endl;
std::cerr << "url_base: " << xml_s.url_base << std::endl;
std::cerr << "control_url: " << xml_s.control_url << std::endl;
std::cerr << "model: " << xml_s.model << std::endl;
TEST_EQUAL(xml_s.url_base, "http://192.168.1.1:49152");
TEST_EQUAL(xml_s.control_url, "/upnp/control/WANPPPConn1");
TEST_EQUAL(xml_s.model, "Wireless-G ADSL Home Gateway");
}
TORRENT_TEST(tags)
{
test_parse("<a>foo<b/>bar</a>", "BaSfooEbSbarFa");
}
TORRENT_TEST(xml_tag_comment)
{
test_parse("<?xml version = \"1.0\"?><c x=\"1\" \t y=\"3\"/><d foo='bar'></d boo='foo'><!--comment-->"
, "DxmlAversionV1.0EcAxV1AyV3BdAfooVbarFdAbooVfooCcomment");
}
TORRENT_TEST(empty_tag)
{
test_parse("<foo/>", "Efoo");
}
TORRENT_TEST(empty_tag_whitespace)
{
test_parse("<foo />", "Efoo");
}
TORRENT_TEST(xml_tag_no_attribute)
{
test_parse("<?xml?>", "Dxml");
}
TORRENT_TEST(xml_tag_no_attribute_whitespace)
{
test_parse("<?xml ?>", "Dxml");
}
TORRENT_TEST(attribute_missing_qoute)
{
test_parse("<a f=1>foo</a f='b>"
, "BaPunquoted attribute valueSfooFaPmissing end quote on attribute");
}
TORRENT_TEST(attribute_whitespace)
{
test_parse("<a f>foo</a v >", "BaTfSfooFaTv ");
}
TORRENT_TEST(unterminated_cdata)
{
// test unterminated CDATA tags
test_parse("<![CDATA[foo", "Punexpected end of file");
}
TORRENT_TEST(cdata)
{
// test CDATA tag
test_parse("<![CDATA[verbatim tag that can have > and < in it]]>"
, "Sverbatim tag that can have > and < in it");
}
TORRENT_TEST(unterminated_tag)
{
// test unterminated tags
test_parse("<foo", "Punexpected end of file");
}
TORRENT_TEST(unqouted_attribute_value)
{
// test unquoted attribute values
test_parse("<foo a=bar>", "BfooPunquoted attribute value");
}
TORRENT_TEST(unterminated_attribute)
{
// test unterminated attribute value
test_parse("<foo a=\"bar>", "BfooPmissing end quote on attribute");
}
TORRENT_TEST(unterminated_tag_with_attribute)
{
// test unterminated tag
test_parse("<foo a=\"bar", "Punexpected end of file");
}