2007-03-27 09:04:31 +02:00
|
|
|
/*
|
|
|
|
|
|
|
|
Copyright (c) 2007, Arvid Norberg
|
|
|
|
All rights reserved.
|
|
|
|
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
|
|
modification, are permitted provided that the following conditions
|
|
|
|
are met:
|
|
|
|
|
|
|
|
* Redistributions of source code must retain the above copyright
|
|
|
|
notice, this list of conditions and the following disclaimer.
|
|
|
|
* Redistributions in binary form must reproduce the above copyright
|
|
|
|
notice, this list of conditions and the following disclaimer in
|
|
|
|
the documentation and/or other materials provided with the distribution.
|
|
|
|
* Neither the name of the author nor the names of its
|
|
|
|
contributors may be used to endorse or promote products derived
|
|
|
|
from this software without specific prior written permission.
|
|
|
|
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
|
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef TORRENT_XML_PARSE_HPP
|
|
|
|
#define TORRENT_XML_PARSE_HPP
|
|
|
|
|
2007-08-07 08:48:47 +02:00
|
|
|
#include <cctype>
|
2009-02-10 08:21:28 +01:00
|
|
|
#include <cstring>
|
2007-08-07 08:48:47 +02:00
|
|
|
|
2011-07-20 07:14:25 +02:00
|
|
|
#include "libtorrent/escape_string.hpp"
|
|
|
|
|
2007-03-27 09:04:31 +02:00
|
|
|
namespace libtorrent
|
|
|
|
{
|
2007-08-07 03:32:38 +02:00
|
|
|
enum
|
|
|
|
{
|
2007-08-07 05:27:08 +02:00
|
|
|
xml_start_tag,
|
|
|
|
xml_end_tag,
|
|
|
|
xml_empty_tag,
|
|
|
|
xml_declaration_tag,
|
|
|
|
xml_string,
|
|
|
|
xml_attribute,
|
|
|
|
xml_comment,
|
2012-02-20 08:51:36 +01:00
|
|
|
xml_parse_error,
|
|
|
|
// used for tags that don't follow the convention of
|
|
|
|
// key-value pairs inside the tag brackets. Like !DOCTYPE
|
|
|
|
xml_tag_content
|
2007-08-07 03:32:38 +02:00
|
|
|
};
|
|
|
|
|
2007-08-07 08:48:47 +02:00
|
|
|
// callback(int type, char const* name, char const* val)
|
|
|
|
// str2 is only used for attributes. name is element or attribute
|
|
|
|
// name and val is attribute value
|
2007-03-27 09:04:31 +02:00
|
|
|
|
|
|
|
template <class CallbackType>
|
|
|
|
void xml_parse(char* p, char* end, CallbackType callback)
|
|
|
|
{
|
|
|
|
for(;p != end; ++p)
|
|
|
|
{
|
|
|
|
char const* start = p;
|
2007-08-07 03:32:38 +02:00
|
|
|
char const* val_start = 0;
|
|
|
|
int token;
|
2007-03-27 09:04:31 +02:00
|
|
|
// look for tag start
|
2012-02-18 10:54:49 +01:00
|
|
|
for(; p != end && *p != '<'; ++p);
|
2007-03-27 09:04:31 +02:00
|
|
|
|
|
|
|
if (p != start)
|
|
|
|
{
|
|
|
|
if (p != end)
|
|
|
|
{
|
2007-10-05 02:30:00 +02:00
|
|
|
TORRENT_ASSERT(*p == '<');
|
2007-03-27 09:04:31 +02:00
|
|
|
*p = 0;
|
|
|
|
}
|
2007-08-07 03:32:38 +02:00
|
|
|
token = xml_string;
|
|
|
|
callback(token, start, val_start);
|
2007-03-27 09:04:31 +02:00
|
|
|
if (p != end) *p = '<';
|
|
|
|
}
|
2007-08-07 03:32:38 +02:00
|
|
|
|
2007-03-27 09:04:31 +02:00
|
|
|
if (p == end) break;
|
|
|
|
|
|
|
|
// skip '<'
|
2010-12-29 02:59:41 +01:00
|
|
|
++p;
|
|
|
|
if (p != end && p+8 < end && string_begins_no_case("![CDATA[", p))
|
|
|
|
{
|
|
|
|
// CDATA. match '![CDATA['
|
|
|
|
p += 8;
|
|
|
|
start = p;
|
|
|
|
while (p != end && !string_begins_no_case("]]>", p-2)) ++p;
|
|
|
|
|
|
|
|
// parse error
|
|
|
|
if (p == end)
|
|
|
|
{
|
|
|
|
token = xml_parse_error;
|
|
|
|
start = "unexpected end of file";
|
|
|
|
callback(token, start, val_start);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
token = xml_string;
|
|
|
|
char tmp = p[-2];
|
|
|
|
p[-2] = 0;
|
|
|
|
callback(token, start, val_start);
|
|
|
|
p[-2] = tmp;
|
|
|
|
continue;
|
|
|
|
}
|
2007-03-27 09:04:31 +02:00
|
|
|
|
2007-08-07 03:32:38 +02:00
|
|
|
// parse the name of the tag.
|
2011-06-22 03:17:22 +02:00
|
|
|
for (start = p; p != end && *p != '>' && !is_space(*p); ++p);
|
2007-08-07 03:32:38 +02:00
|
|
|
|
|
|
|
char* tag_name_end = p;
|
|
|
|
|
|
|
|
// skip the attributes for now
|
|
|
|
for (; p != end && *p != '>'; ++p);
|
2007-03-27 09:04:31 +02:00
|
|
|
|
|
|
|
// parse error
|
2007-08-07 05:27:08 +02:00
|
|
|
if (p == end)
|
|
|
|
{
|
|
|
|
token = xml_parse_error;
|
|
|
|
start = "unexpected end of file";
|
|
|
|
callback(token, start, val_start);
|
|
|
|
break;
|
|
|
|
}
|
2007-03-27 09:04:31 +02:00
|
|
|
|
2007-10-05 02:30:00 +02:00
|
|
|
TORRENT_ASSERT(*p == '>');
|
2007-08-07 03:32:38 +02:00
|
|
|
// save the character that terminated the tag name
|
|
|
|
// it could be both '>' and ' '.
|
|
|
|
char save = *tag_name_end;
|
|
|
|
*tag_name_end = 0;
|
2007-03-27 09:04:31 +02:00
|
|
|
|
2007-08-07 03:32:38 +02:00
|
|
|
char* tag_end = p;
|
2007-03-27 09:04:31 +02:00
|
|
|
if (*start == '/')
|
|
|
|
{
|
|
|
|
++start;
|
2007-08-07 03:32:38 +02:00
|
|
|
token = xml_end_tag;
|
|
|
|
callback(token, start, val_start);
|
|
|
|
}
|
|
|
|
else if (*(p-1) == '/')
|
|
|
|
{
|
|
|
|
*(p-1) = 0;
|
|
|
|
token = xml_empty_tag;
|
|
|
|
callback(token, start, val_start);
|
|
|
|
*(p-1) = '/';
|
|
|
|
tag_end = p - 1;
|
2007-03-27 09:04:31 +02:00
|
|
|
}
|
2007-08-07 05:27:08 +02:00
|
|
|
else if (*start == '?' && *(p-1) == '?')
|
|
|
|
{
|
|
|
|
*(p-1) = 0;
|
|
|
|
++start;
|
|
|
|
token = xml_declaration_tag;
|
|
|
|
callback(token, start, val_start);
|
|
|
|
*(p-1) = '?';
|
|
|
|
tag_end = p - 1;
|
|
|
|
}
|
2009-01-27 09:24:48 +01:00
|
|
|
else if (start + 5 < p && std::memcmp(start, "!--", 3) == 0 && std::memcmp(p-2, "--", 2) == 0)
|
2007-08-07 05:27:08 +02:00
|
|
|
{
|
|
|
|
start += 3;
|
|
|
|
*(p-2) = 0;
|
|
|
|
token = xml_comment;
|
|
|
|
callback(token, start, val_start);
|
|
|
|
*(p-2) = '-';
|
|
|
|
tag_end = p - 2;
|
|
|
|
}
|
2007-03-27 09:04:31 +02:00
|
|
|
else
|
|
|
|
{
|
2007-08-07 03:32:38 +02:00
|
|
|
token = xml_start_tag;
|
|
|
|
callback(token, start, val_start);
|
|
|
|
}
|
|
|
|
|
|
|
|
*tag_name_end = save;
|
|
|
|
|
|
|
|
// parse attributes
|
|
|
|
for (char* i = tag_name_end; i < tag_end; ++i)
|
|
|
|
{
|
2007-08-07 05:27:08 +02:00
|
|
|
// find start of attribute name
|
2012-01-13 13:06:50 +01:00
|
|
|
for (; i != tag_end && is_space(*i); ++i);
|
2007-08-07 05:27:08 +02:00
|
|
|
if (i == tag_end) break;
|
|
|
|
start = i;
|
|
|
|
// find end of attribute name
|
2012-01-13 13:06:50 +01:00
|
|
|
for (; i != tag_end && *i != '=' && !is_space(*i); ++i);
|
2007-08-07 05:27:08 +02:00
|
|
|
char* name_end = i;
|
|
|
|
|
|
|
|
// look for equality sign
|
|
|
|
for (; i != tag_end && *i != '='; ++i);
|
|
|
|
|
2012-02-20 08:51:36 +01:00
|
|
|
// no equality sign found. Report this as xml_tag_content
|
|
|
|
// instead of a series of key value pairs
|
2007-08-07 05:27:08 +02:00
|
|
|
if (i == tag_end)
|
|
|
|
{
|
2012-02-27 08:02:11 +01:00
|
|
|
char tmp = *i;
|
|
|
|
*i = 0; // null terminate the content string
|
2012-02-20 08:51:36 +01:00
|
|
|
token = xml_tag_content;
|
2007-08-07 05:27:08 +02:00
|
|
|
val_start = 0;
|
|
|
|
callback(token, start, val_start);
|
2012-02-27 08:02:11 +01:00
|
|
|
*i = tmp;
|
2007-08-07 05:27:08 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
++i;
|
2012-01-13 13:06:50 +01:00
|
|
|
for (; i != tag_end && is_space(*i); ++i);
|
2007-08-07 05:27:08 +02:00
|
|
|
// check for parse error (values must be quoted)
|
|
|
|
if (i == tag_end || (*i != '\'' && *i != '\"'))
|
|
|
|
{
|
|
|
|
token = xml_parse_error;
|
|
|
|
val_start = 0;
|
|
|
|
start = "unquoted attribute value";
|
|
|
|
callback(token, start, val_start);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
char quote = *i;
|
|
|
|
++i;
|
2007-08-07 03:32:38 +02:00
|
|
|
val_start = i;
|
2007-08-07 05:27:08 +02:00
|
|
|
for (; i != tag_end && *i != quote; ++i);
|
|
|
|
// parse error (missing end quote)
|
|
|
|
if (i == tag_end)
|
|
|
|
{
|
|
|
|
token = xml_parse_error;
|
|
|
|
val_start = 0;
|
|
|
|
start = "missing end quote on attribute";
|
|
|
|
callback(token, start, val_start);
|
|
|
|
break;
|
|
|
|
}
|
2007-08-07 03:32:38 +02:00
|
|
|
save = *i;
|
|
|
|
*i = 0;
|
2007-08-07 05:27:08 +02:00
|
|
|
*name_end = 0;
|
2007-08-07 03:32:38 +02:00
|
|
|
token = xml_attribute;
|
|
|
|
callback(token, start, val_start);
|
2007-08-07 05:27:08 +02:00
|
|
|
*name_end = '=';
|
2007-08-07 03:32:38 +02:00
|
|
|
*i = save;
|
2007-03-27 09:04:31 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|