From e5a9a6b36f9d8008d93f06944e496c191c9587be Mon Sep 17 00:00:00 2001 From: Arvid Norberg Date: Sat, 3 May 2014 21:10:44 +0000 Subject: [PATCH] make xml_parse not be a template --- CMakeLists.txt | 3 +- Jamfile | 1 + include/libtorrent/xml_parse.hpp | 184 +------------------------- src/Makefile.am | 1 + src/xml_parse.cpp | 218 +++++++++++++++++++++++++++++++ 5 files changed, 227 insertions(+), 180 deletions(-) create mode 100644 src/xml_parse.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index f0776c66e..23196b40d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -53,7 +53,6 @@ set(sources socks5_stream stat storage - thread time timestamp_history torrent @@ -77,6 +76,8 @@ set(sources magnet_uri parse_url ConvertUTF + thread + xml_parse # -- extensions -- metadata_transfer diff --git a/Jamfile b/Jamfile index 4a2857f00..e08ee3529 100755 --- a/Jamfile +++ b/Jamfile @@ -590,6 +590,7 @@ SOURCES = parse_url ConvertUTF thread + xml_parse # -- extensions -- metadata_transfer diff --git a/include/libtorrent/xml_parse.hpp b/include/libtorrent/xml_parse.hpp index 784a11c4a..674f2f21f 100644 --- a/include/libtorrent/xml_parse.hpp +++ b/include/libtorrent/xml_parse.hpp @@ -36,9 +36,12 @@ POSSIBILITY OF SUCH DAMAGE. #include #include +#include "libtorrent/config.hpp" #include "libtorrent/assert.hpp" #include "libtorrent/escape_string.hpp" +#include + namespace libtorrent { enum @@ -59,185 +62,8 @@ namespace libtorrent // callback(int type, char const* name, char const* val) // str2 is only used for attributes. name is element or attribute // name and val is attribute value - - template - void xml_parse(char* p, char* end, CallbackType callback) - { - for(;p != end; ++p) - { - char const* start = p; - char const* val_start = 0; - int token; - // look for tag start - for(; p != end && *p != '<'; ++p); - - if (p != start) - { - if (p != end) - { - TORRENT_ASSERT(*p == '<'); - *p = 0; - } - token = xml_string; - callback(token, start, val_start); - if (p != end) *p = '<'; - } - - if (p == end) break; - - // skip '<' - ++p; - if (p != end && p+8 < end && string_begins_no_case("![CDATA[", p)) - { - // CDATA. match '![CDATA[' - p += 8; - start = p; - while (p != end && !string_begins_no_case("]]>", p-2)) ++p; - - // parse error - if (p == end) - { - token = xml_parse_error; - start = "unexpected end of file"; - callback(token, start, val_start); - break; - } - - token = xml_string; - char tmp = p[-2]; - p[-2] = 0; - callback(token, start, val_start); - p[-2] = tmp; - continue; - } - - // parse the name of the tag. - for (start = p; p != end && *p != '>' && !is_space(*p); ++p); - - char* tag_name_end = p; - - // skip the attributes for now - for (; p != end && *p != '>'; ++p); - - // parse error - if (p == end) - { - token = xml_parse_error; - start = "unexpected end of file"; - callback(token, start, val_start); - break; - } - - TORRENT_ASSERT(*p == '>'); - // save the character that terminated the tag name - // it could be both '>' and ' '. - char save = *tag_name_end; - *tag_name_end = 0; - - char* tag_end = p; - if (*start == '/') - { - ++start; - token = xml_end_tag; - callback(token, start, val_start); - } - else if (*(p-1) == '/') - { - *(p-1) = 0; - token = xml_empty_tag; - callback(token, start, val_start); - *(p-1) = '/'; - tag_end = p - 1; - } - else if (*start == '?' && *(p-1) == '?') - { - *(p-1) = 0; - ++start; - token = xml_declaration_tag; - callback(token, start, val_start); - *(p-1) = '?'; - tag_end = p - 1; - } - else if (start + 5 < p && std::memcmp(start, "!--", 3) == 0 && std::memcmp(p-2, "--", 2) == 0) - { - start += 3; - *(p-2) = 0; - token = xml_comment; - callback(token, start, val_start); - *(p-2) = '-'; - tag_end = p - 2; - continue; - } - else - { - token = xml_start_tag; - callback(token, start, val_start); - } - - *tag_name_end = save; - - // parse attributes - for (char* i = tag_name_end; i < tag_end; ++i) - { - // find start of attribute name - for (; i != tag_end && is_space(*i); ++i); - if (i == tag_end) break; - start = i; - // find end of attribute name - for (; i != tag_end && *i != '=' && !is_space(*i); ++i); - char* name_end = i; - - // look for equality sign - for (; i != tag_end && *i != '='; ++i); - - // no equality sign found. Report this as xml_tag_content - // instead of a series of key value pairs - if (i == tag_end) - { - char tmp = *i; - *i = 0; // null terminate the content string - token = xml_tag_content; - val_start = 0; - callback(token, start, val_start); - *i = tmp; - break; - } - - ++i; - for (; i != tag_end && is_space(*i); ++i); - // check for parse error (values must be quoted) - if (i == tag_end || (*i != '\'' && *i != '\"')) - { - token = xml_parse_error; - val_start = 0; - start = "unquoted attribute value"; - callback(token, start, val_start); - break; - } - char quote = *i; - ++i; - val_start = i; - for (; i != tag_end && *i != quote; ++i); - // parse error (missing end quote) - if (i == tag_end) - { - token = xml_parse_error; - val_start = 0; - start = "missing end quote on attribute"; - callback(token, start, val_start); - break; - } - save = *i; - *i = 0; - *name_end = 0; - token = xml_attribute; - callback(token, start, val_start); - *name_end = '='; - *i = save; - } - } - } - + TORRENT_EXTRA_EXPORT void xml_parse(char* p, char* end + , boost::function callback); } diff --git a/src/Makefile.am b/src/Makefile.am index ffde2a557..a3ab05d8f 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -117,6 +117,7 @@ libtorrent_rasterbar_la_SOURCES = \ utp_socket_manager.cpp \ utp_stream.cpp \ web_peer_connection.cpp \ + xml_parse.cpp \ \ $(KADEMLIA_SOURCES) \ $(GEOIP_SOURCES) \ diff --git a/src/xml_parse.cpp b/src/xml_parse.cpp new file mode 100644 index 000000000..83f344701 --- /dev/null +++ b/src/xml_parse.cpp @@ -0,0 +1,218 @@ +/* + +Copyright (c) 2007-2014, Arvid Norberg +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the distribution. + * Neither the name of the author nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +*/ + + +#include "libtorrent/xml_parse.hpp" + +namespace libtorrent +{ + + TORRENT_EXTRA_EXPORT void xml_parse(char* p, char* end + , boost::function callback) + { + for(;p != end; ++p) + { + char const* start = p; + char const* val_start = 0; + int token; + // look for tag start + for(; p != end && *p != '<'; ++p); + + if (p != start) + { + if (p != end) + { + TORRENT_ASSERT(*p == '<'); + *p = 0; + } + token = xml_string; + callback(token, start, val_start); + if (p != end) *p = '<'; + } + + if (p == end) break; + + // skip '<' + ++p; + if (p != end && p+8 < end && string_begins_no_case("![CDATA[", p)) + { + // CDATA. match '![CDATA[' + p += 8; + start = p; + while (p != end && !string_begins_no_case("]]>", p-2)) ++p; + + // parse error + if (p == end) + { + token = xml_parse_error; + start = "unexpected end of file"; + callback(token, start, val_start); + break; + } + + token = xml_string; + char tmp = p[-2]; + p[-2] = 0; + callback(token, start, val_start); + p[-2] = tmp; + continue; + } + + // parse the name of the tag. + for (start = p; p != end && *p != '>' && !is_space(*p); ++p); + + char* tag_name_end = p; + + // skip the attributes for now + for (; p != end && *p != '>'; ++p); + + // parse error + if (p == end) + { + token = xml_parse_error; + start = "unexpected end of file"; + callback(token, start, val_start); + break; + } + + TORRENT_ASSERT(*p == '>'); + // save the character that terminated the tag name + // it could be both '>' and ' '. + char save = *tag_name_end; + *tag_name_end = 0; + + char* tag_end = p; + if (*start == '/') + { + ++start; + token = xml_end_tag; + callback(token, start, val_start); + } + else if (*(p-1) == '/') + { + *(p-1) = 0; + token = xml_empty_tag; + callback(token, start, val_start); + *(p-1) = '/'; + tag_end = p - 1; + } + else if (*start == '?' && *(p-1) == '?') + { + *(p-1) = 0; + ++start; + token = xml_declaration_tag; + callback(token, start, val_start); + *(p-1) = '?'; + tag_end = p - 1; + } + else if (start + 5 < p && std::memcmp(start, "!--", 3) == 0 && std::memcmp(p-2, "--", 2) == 0) + { + start += 3; + *(p-2) = 0; + token = xml_comment; + callback(token, start, val_start); + *(p-2) = '-'; + tag_end = p - 2; + continue; + } + else + { + token = xml_start_tag; + callback(token, start, val_start); + } + + *tag_name_end = save; + + // parse attributes + for (char* i = tag_name_end; i < tag_end; ++i) + { + // find start of attribute name + for (; i != tag_end && is_space(*i); ++i); + if (i == tag_end) break; + start = i; + // find end of attribute name + for (; i != tag_end && *i != '=' && !is_space(*i); ++i); + char* name_end = i; + + // look for equality sign + for (; i != tag_end && *i != '='; ++i); + + // no equality sign found. Report this as xml_tag_content + // instead of a series of key value pairs + if (i == tag_end) + { + char tmp = *i; + *i = 0; // null terminate the content string + token = xml_tag_content; + val_start = 0; + callback(token, start, val_start); + *i = tmp; + break; + } + + ++i; + for (; i != tag_end && is_space(*i); ++i); + // check for parse error (values must be quoted) + if (i == tag_end || (*i != '\'' && *i != '\"')) + { + token = xml_parse_error; + val_start = 0; + start = "unquoted attribute value"; + callback(token, start, val_start); + break; + } + char quote = *i; + ++i; + val_start = i; + for (; i != tag_end && *i != quote; ++i); + // parse error (missing end quote) + if (i == tag_end) + { + token = xml_parse_error; + val_start = 0; + start = "missing end quote on attribute"; + callback(token, start, val_start); + break; + } + save = *i; + *i = 0; + *name_end = 0; + token = xml_attribute; + callback(token, start, val_start); + *name_end = '='; + *i = save; + } + } + } + +} +