From cb656945787628806eb00b4c32a538313e06fd21 Mon Sep 17 00:00:00 2001 From: arvidn Date: Sat, 5 Sep 2015 19:31:47 -0400 Subject: [PATCH] fix const correctness in xml_parse(). update unit tests rss and upnp to use new signature for parser callback --- include/libtorrent/rss.hpp | 21 +-- include/libtorrent/upnp.hpp | 3 +- include/libtorrent/xml_parse.hpp | 13 +- simulation/libsimulator | 2 +- src/rss.cpp | 51 +++--- src/upnp.cpp | 33 ++-- src/xml_parse.cpp | 87 ++++------ test/test_xml.cpp | 262 ++++++++++++++----------------- 8 files changed, 220 insertions(+), 252 deletions(-) diff --git a/include/libtorrent/rss.hpp b/include/libtorrent/rss.hpp index d7c01a941..8f7a35a8c 100644 --- a/include/libtorrent/rss.hpp +++ b/include/libtorrent/rss.hpp @@ -217,29 +217,30 @@ namespace libtorrent // are posted to the network thread struct TORRENT_EXTRA_EXPORT feed : boost::enable_shared_from_this { - friend void parse_feed(feed_state& f, int token, char const* name, char const* val); + friend void parse_feed(feed_state& f, int token, char const* name, int len + , char const* val, int val_len); feed(aux::session_impl& ses, feed_settings const& feed); void on_feed(error_code const& ec, http_parser const& parser , char const* data, int size); - + int update_feed(); - + aux::session_impl& session() const { return m_ses; } - + void set_settings(feed_settings const& s); void get_settings(feed_settings* s) const; - void get_feed_status(feed_status* ret) const; + void get_feed_status(feed_status* ret) const; int next_update(time_t now) const; void load_state(bdecode_node const& rd); void save_state(entry& rd) const; - + private: friend struct aux::session_impl; - + // explicitly disallow assignment, to silence msvc warning feed& operator=(feed const&); @@ -275,12 +276,12 @@ namespace libtorrent // true while waiting for the server to respond bool m_updating; feed_settings m_settings; - + aux::session_impl& m_ses; }; - + } #endif // TORRENT_NO_DEPRECATE - + #endif diff --git a/include/libtorrent/upnp.hpp b/include/libtorrent/upnp.hpp index 904d8bb52..1dd8ffa07 100644 --- a/include/libtorrent/upnp.hpp +++ b/include/libtorrent/upnp.hpp @@ -126,7 +126,8 @@ struct parse_state } }; -TORRENT_EXTRA_EXPORT void find_control_url(int type, char const* string, parse_state& state); +TORRENT_EXTRA_EXPORT void find_control_url(int type, char const* string + , int str_len, parse_state& state); // TODO: support using the windows API for UPnP operations as well class TORRENT_EXTRA_EXPORT upnp : public boost::enable_shared_from_this diff --git a/include/libtorrent/xml_parse.hpp b/include/libtorrent/xml_parse.hpp index 532e4da05..e91c0b34d 100644 --- a/include/libtorrent/xml_parse.hpp +++ b/include/libtorrent/xml_parse.hpp @@ -62,11 +62,14 @@ namespace libtorrent xml_tag_content }; - // callback(int type, char const* name, char const* val) - // str2 is only used for attributes. name is element or attribute - // name and val is attribute value - TORRENT_EXTRA_EXPORT void xml_parse(char* p, char* end - , boost::function callback); + // callback(int type, char const* name, int name_len + // , char const* val, int val_len) + // name is element or attribute name + // val is attribute value + // neither string is null terminated, but their lengths are specified via + // name_len and val_len respectively + TORRENT_EXTRA_EXPORT void xml_parse(char const* p, char const* end + , boost::function callback); } diff --git a/simulation/libsimulator b/simulation/libsimulator index c82420cd1..5924e3b17 160000 --- a/simulation/libsimulator +++ b/simulation/libsimulator @@ -1 +1 @@ -Subproject commit c82420cd1b6a83a028f51eeff6d19828136b466c +Subproject commit 5924e3b17eb0bdd710027187ccd2952422b25a39 diff --git a/src/rss.cpp b/src/rss.cpp index d55f005a6..abaf12a7c 100644 --- a/src/rss.cpp +++ b/src/rss.cpp @@ -166,7 +166,8 @@ struct feed_state } }; -void parse_feed(feed_state& f, int token, char const* name, char const* val) +void parse_feed(feed_state& f, int token, char const* name, int name_len + , char const* val, int val_len) { switch (token) { @@ -176,7 +177,7 @@ void parse_feed(feed_state& f, int token, char const* name, char const* val) case xml_start_tag: case xml_empty_tag: { - f.current_tag = name; + f.current_tag.assign(name, name_len); if (f.type == feed_state::none) { if (string_equal_no_case(f.current_tag.c_str(), "feed")) @@ -184,20 +185,21 @@ void parse_feed(feed_state& f, int token, char const* name, char const* val) else if (string_equal_no_case(f.current_tag.c_str(), "rss")) f.type = feed_state::rss2; } - if (f.is_item(name)) f.in_item = true; + if (f.is_item(f.current_tag.c_str())) f.in_item = true; return; } case xml_attribute: { if (!f.in_item) return; + std::string str(name, name_len); if (f.is_url(f.current_tag.c_str()) && f.type == feed_state::atom) { // atom feeds have items like this: // - if (string_equal_no_case(name, "href")) - f.current_item.url = val; - else if (string_equal_no_case(name, "length")) + if (string_equal_no_case(str.c_str(), "href")) + f.current_item.url.assign(val, val_len); + else if (string_equal_no_case(str.c_str(), "length")) f.current_item.size = strtoll(val, 0, 10); } else if (f.type == feed_state::rss2 @@ -205,9 +207,9 @@ void parse_feed(feed_state& f, int token, char const* name, char const* val) { // rss feeds have items like this: // - if (string_equal_no_case(name, "url")) - f.current_item.url = val; - else if (string_equal_no_case(name, "length")) + if (string_equal_no_case(str.c_str(), "url")) + f.current_item.url.assign(val, val_len); + else if (string_equal_no_case(str.c_str(), "length")) f.current_item.size = strtoll(val, 0, 10); } else if (f.type == feed_state::rss2 @@ -215,16 +217,16 @@ void parse_feed(feed_state& f, int token, char const* name, char const* val) { // rss feeds sometimes have items like this: // - if (string_equal_no_case(name, "url")) - f.current_item.url = val; - else if (string_equal_no_case(name, "filesize")) + if (string_equal_no_case(str.c_str(), "url")) + f.current_item.url.assign(val, val_len); + else if (string_equal_no_case(str.c_str(), "filesize")) f.current_item.size = strtoll(val, 0, 10); } return; } case xml_end_tag: { - if (f.in_item && f.is_item(name)) + if (f.in_item && f.is_item(std::string(name, name_len).c_str())) { f.in_item = false; if (!f.current_item.title.empty() @@ -243,9 +245,9 @@ void parse_feed(feed_state& f, int token, char const* name, char const* val) if (!f.in_item) { if (f.is_title(f.current_tag.c_str())) - f.ret.m_title = name; + f.ret.m_title.assign(name, name_len); else if (f.is_desc(f.current_tag.c_str())) - f.ret.m_description = name; + f.ret.m_description.assign(name, name_len); else if (f.is_ttl(f.current_tag.c_str())) { int tmp = atoi(name); @@ -254,20 +256,20 @@ void parse_feed(feed_state& f, int token, char const* name, char const* val) return; } if (f.is_title(f.current_tag.c_str())) - f.current_item.title = name; + f.current_item.title.assign(name, name_len); else if (f.is_desc(f.current_tag.c_str())) - f.current_item.description = name; + f.current_item.description.assign(name, name_len); else if (f.is_uuid(f.current_tag.c_str())) - f.current_item.uuid = name; + f.current_item.uuid.assign(name, name_len); else if (f.is_url(f.current_tag.c_str()) && f.type != feed_state::atom) - f.current_item.url = name; + f.current_item.url.assign(name, name_len); else if (f.is_comment(f.current_tag.c_str())) - f.current_item.comment = name; + f.current_item.comment.assign(name, name_len); else if (f.is_category(f.current_tag.c_str())) - f.current_item.category = name; + f.current_item.category.assign(name, name_len); else if (f.is_size(f.current_tag.c_str())) f.current_item.size = strtoll(name, 0, 10); - else if (f.is_hash(f.current_tag.c_str()) && strlen(name) == 40) + else if (f.is_hash(f.current_tag.c_str()) && name_len == 40) { if (!from_hex(name, 40, f.current_item.info_hash.data())) { @@ -381,10 +383,9 @@ void feed::on_feed(error_code const& ec m_failures = 0; - char* buf = const_cast(data); - feed_state s(*this); - xml_parse(buf, buf + size, boost::bind(&parse_feed, boost::ref(s), _1, _2, _3)); + xml_parse(data, data + size, boost::bind(&parse_feed, boost::ref(s) + , _1, _2, _3, _4, _5)); time_t now = time(NULL); diff --git a/src/upnp.cpp b/src/upnp.cpp index 854cf687e..a1db478a8 100644 --- a/src/upnp.cpp +++ b/src/upnp.cpp @@ -826,19 +826,24 @@ void upnp::delete_port_mapping(rootdevice& d, int i) namespace { - void copy_tolower(std::string& dst, char const* src) + void copy_tolower(std::string& dst, char const* src, int len) { dst.clear(); - while (*src) dst.push_back(to_lower(*src++)); + dst.reserve(len); + while (len-- > 0) + { + dst.push_back(to_lower(*src++)); + } } } -TORRENT_EXTRA_EXPORT void find_control_url(int type, char const* string, parse_state& state) +TORRENT_EXTRA_EXPORT void find_control_url(int type, char const* string + , int str_len, parse_state& state) { if (type == xml_start_tag) { std::string tag; - copy_tolower(tag, string); + copy_tolower(tag, string, str_len); state.tag_stack.push_back(tag); // std::copy(state.tag_stack.begin(), state.tag_stack.end(), std::ostream_iterator(std::cout, " ")); // std::cout << std::endl; @@ -858,26 +863,27 @@ TORRENT_EXTRA_EXPORT void find_control_url(int type, char const* string, parse_s // std::cout << " " << string << std::endl;} if (!state.in_service && state.top_tags("service", "servicetype")) { - if (string_equal_no_case(string, "urn:schemas-upnp-org:service:WANIPConnection:1") - || string_equal_no_case(string, "urn:schemas-upnp-org:service:WANIPConnection:2") - || string_equal_no_case(string, "urn:schemas-upnp-org:service:WANPPPConnection:1")) + std::string name(string, str_len); + if (string_equal_no_case(name.c_str(), "urn:schemas-upnp-org:service:WANIPConnection:1") + || string_equal_no_case(name.c_str(), "urn:schemas-upnp-org:service:WANIPConnection:2") + || string_equal_no_case(name.c_str(), "urn:schemas-upnp-org:service:WANPPPConnection:1")) { - state.service_type = string; + state.service_type.assign(string, str_len); state.in_service = true; } } else if (state.control_url.empty() && state.in_service && state.top_tags("service", "controlurl") && strlen(string) > 0) { // default to the first (or only) control url in the router's listing - state.control_url = string; + state.control_url.assign(string, str_len); } else if (state.model.empty() && state.top_tags("device", "modelname")) { - state.model = string; + state.model.assign(string, str_len); } else if (state.tag_stack.back() == "urlbase") { - state.url_base = string; + state.url_base.assign(string, str_len); } } } @@ -928,9 +934,8 @@ void upnp::on_upnp_xml(error_code const& e } parse_state s; - xml_parse(const_cast(p.get_body().begin), - const_cast(p.get_body().end) - , boost::bind(&find_control_url, _1, _2, boost::ref(s))); + xml_parse(p.get_body().begin, p.get_body().end + , boost::bind(&find_control_url, _1, _2, _3, boost::ref(s))); if (s.control_url.empty()) { char msg[500]; diff --git a/src/xml_parse.cpp b/src/xml_parse.cpp index 1efc0498d..5bfbf9a5c 100644 --- a/src/xml_parse.cpp +++ b/src/xml_parse.cpp @@ -37,29 +37,21 @@ POSSIBILITY OF SUCH DAMAGE. namespace libtorrent { - // TODO: 3 make this take a const buffer instead (and pass in string length - // to callback function). Then remove all associated const_casts. - TORRENT_EXTRA_EXPORT void xml_parse(char* p, char* end - , boost::function callback) + TORRENT_EXTRA_EXPORT void xml_parse(char const* p, char const* end + , boost::function callback) { for(;p != end; ++p) { char const* start = p; - char const* val_start = 0; int token; // look for tag start for(; p != end && *p != '<'; ++p); if (p != start) { - if (p != end) - { - TORRENT_ASSERT(*p == '<'); - *p = 0; - } token = xml_string; - callback(token, start, val_start); - if (p != end) *p = '<'; + const int name_len = p - start; + callback(token, start, name_len, NULL, 0); } if (p == end) break; @@ -78,22 +70,20 @@ namespace libtorrent { token = xml_parse_error; start = "unexpected end of file"; - callback(token, start, val_start); + callback(token, start, strlen(start), NULL, 0); break; } token = xml_string; - char tmp = p[-2]; - p[-2] = 0; - callback(token, start, val_start); - p[-2] = tmp; + const int name_len = p - start - 2; + callback(token, start, name_len, NULL, 0); continue; } // parse the name of the tag. for (start = p; p != end && *p != '>' && !is_space(*p); ++p); - char* tag_name_end = p; + char const* tag_name_end = p; // skip the attributes for now for (; p != end && *p != '>'; ++p); @@ -103,68 +93,63 @@ namespace libtorrent { token = xml_parse_error; start = "unexpected end of file"; - callback(token, start, val_start); + callback(token, start, strlen(start), NULL, 0); break; } - - TORRENT_ASSERT(*p == '>'); - // save the character that terminated the tag name - // it could be both '>' and ' '. - char save = *tag_name_end; - *tag_name_end = 0; - char* tag_end = p; + TORRENT_ASSERT(*p == '>'); + + char const* tag_end = p; if (*start == '/') { ++start; token = xml_end_tag; - callback(token, start, val_start); + const int name_len = tag_name_end - start; + callback(token, start, name_len, NULL, 0); } else if (*(p-1) == '/') { - *(p-1) = 0; token = xml_empty_tag; - callback(token, start, val_start); - *(p-1) = '/'; + const int name_len = (std::min)(tag_name_end - start, p - start - 1); + callback(token, start, name_len, NULL, 0); tag_end = p - 1; } else if (*start == '?' && *(p-1) == '?') { - *(p-1) = 0; ++start; token = xml_declaration_tag; - callback(token, start, val_start); - *(p-1) = '?'; + const int name_len = (std::min)(tag_name_end - start, p - start - 1); + callback(token, start, name_len, NULL, 0); tag_end = p - 1; } else if (start + 5 < p && std::memcmp(start, "!--", 3) == 0 && std::memcmp(p-2, "--", 2) == 0) { start += 3; - *(p-2) = 0; token = xml_comment; - callback(token, start, val_start); - *(p-2) = '-'; + const int name_len = tag_name_end - start - 2; + callback(token, start, name_len, NULL, 0); tag_end = p - 2; continue; } else { token = xml_start_tag; - callback(token, start, val_start); + const int name_len = tag_name_end - start; + callback(token, start, name_len, NULL, 0); } - *tag_name_end = save; - // parse attributes - for (char* i = tag_name_end; i < tag_end; ++i) + for (char const* i = tag_name_end; i < tag_end; ++i) { + char const* val_start = NULL; + // find start of attribute name for (; i != tag_end && is_space(*i); ++i); if (i == tag_end) break; start = i; // find end of attribute name for (; i != tag_end && *i != '=' && !is_space(*i); ++i); - char* name_end = i; + const int name_len = i - start; // look for equality sign for (; i != tag_end && *i != '='; ++i); @@ -173,12 +158,8 @@ namespace libtorrent // instead of a series of key value pairs if (i == tag_end) { - char tmp = *i; - *i = 0; // null terminate the content string token = xml_tag_content; - val_start = 0; - callback(token, start, val_start); - *i = tmp; + callback(token, start, i - start, NULL, 0); break; } @@ -188,9 +169,8 @@ namespace libtorrent if (i == tag_end || (*i != '\'' && *i != '\"')) { token = xml_parse_error; - val_start = 0; start = "unquoted attribute value"; - callback(token, start, val_start); + callback(token, start, strlen(start), NULL, 0); break; } char quote = *i; @@ -201,18 +181,13 @@ namespace libtorrent if (i == tag_end) { token = xml_parse_error; - val_start = 0; start = "missing end quote on attribute"; - callback(token, start, val_start); + callback(token, start, strlen(start), NULL, 0); break; } - save = *i; - *i = 0; - *name_end = 0; + const int val_len = i - val_start; token = xml_attribute; - callback(token, start, val_start); - *name_end = '='; - *i = save; + callback(token, start, name_len, val_start, val_len); } } } diff --git a/test/test_xml.cpp b/test/test_xml.cpp index 84a817028..5a917ec5e 100644 --- a/test/test_xml.cpp +++ b/test/test_xml.cpp @@ -235,7 +235,8 @@ char upnp_xml2[] = using namespace libtorrent; -void parser_callback(std::string& out, int token, char const* s, char const* val) +void parser_callback(std::string& out, int token, char const* s, int len + , char const* val, int val_len) { switch (token) { @@ -250,154 +251,135 @@ void parser_callback(std::string& out, int token, char const* s, char const* val case xml_tag_content: out += "T"; break; default: TEST_CHECK(false); } - out += s; + out.append(s, len); if (token == xml_attribute) { - TEST_CHECK(val != 0); + TEST_CHECK(val != NULL); out += "V"; - out += val; + out.append(val, val_len); } else { - TEST_CHECK(val == 0); + TEST_CHECK(val == NULL); } } -TORRENT_TEST(xml) +void test_parse(char const* in, char const* expected) { - // test upnp xml parser - { - parse_state xml_s; - xml_parse(upnp_xml, upnp_xml + sizeof(upnp_xml) - , boost::bind(&find_control_url, _1, _2, boost::ref(xml_s))); - - std::cerr << "namespace " << xml_s.service_type << std::endl; - std::cerr << "url_base: " << xml_s.url_base << std::endl; - std::cerr << "control_url: " << xml_s.control_url << std::endl; - std::cerr << "model: " << xml_s.model << std::endl; - TEST_CHECK(xml_s.url_base == "http://192.168.0.1:5678"); - TEST_CHECK(xml_s.control_url == "/WANIPConnection"); - TEST_CHECK(xml_s.model == "D-Link Router"); - } - { - parse_state xml_s; - xml_parse(upnp_xml2, upnp_xml2 + sizeof(upnp_xml2) - , boost::bind(&find_control_url, _1, _2, boost::ref(xml_s))); - - std::cerr << "namespace " << xml_s.service_type << std::endl; - std::cerr << "url_base: " << xml_s.url_base << std::endl; - std::cerr << "control_url: " << xml_s.control_url << std::endl; - std::cerr << "model: " << xml_s.model << std::endl; - TEST_CHECK(xml_s.url_base == "http://192.168.1.1:49152"); - TEST_CHECK(xml_s.control_url == "/upnp/control/WANPPPConn1"); - TEST_CHECK(xml_s.model == "Wireless-G ADSL Home Gateway"); - } - - { - // test xml parser - char xml[] = "foobar"; - std::string out; - - xml_parse(xml, xml + sizeof(xml) - 1, boost::bind(&parser_callback - , boost::ref(out), _1, _2, _3)); - std::cerr << out << std::endl; - TEST_CHECK(out == "BaSfooEbSbarFa"); - } - - { - char xml[] = ""; - std::string out; - - xml_parse(xml, xml + sizeof(xml) - 1, boost::bind(&parser_callback - , boost::ref(out), _1, _2, _3)); - std::cerr << out << std::endl; - TEST_CHECK(out == "DxmlAversionV1.0EcAxV1AyV3BdAfooVbarFdAbooVfooCcomment"); - } - - { - char xml[] = "foo" + , "DxmlAversionV1.0EcAxV1AyV3BdAfooVbarFdAbooVfooCcomment"); +} + +TORRENT_TEST(empty_tag) +{ + test_parse("", "Efoo"); +} + +TORRENT_TEST(empty_tag_whitespace) +{ + test_parse("", "Efoo"); +} + +TORRENT_TEST(xml_tag_no_attribute) +{ + test_parse("", "Dxml"); +} + +TORRENT_TEST(xml_tag_no_attribute_whitespace) +{ + test_parse("", "Dxml"); +} + +TORRENT_TEST(attribute_missing_qoute) +{ + test_parse("foo