added support for xml declarators, comments, quoted attribute values and parse error messages in xml parser

This commit is contained in:
Arvid Norberg 2007-08-07 03:27:08 +00:00
parent 9f08256dff
commit cd05e4e67c
2 changed files with 105 additions and 18 deletions

View File

@ -37,11 +37,14 @@ namespace libtorrent
{
enum
{
xml_start_tag = 0,
xml_end_tag = 1,
xml_empty_tag = 2,
xml_string = 3,
xml_attribute = 4
xml_start_tag,
xml_end_tag,
xml_empty_tag,
xml_declaration_tag,
xml_string,
xml_attribute,
xml_comment,
xml_parse_error
};
// callback(int type, char const* str, char const* str2)
@ -84,7 +87,13 @@ namespace libtorrent
for (; p != end && *p != '>'; ++p);
// parse error
if (p == end) break;
if (p == end)
{
token = xml_parse_error;
start = "unexpected end of file";
callback(token, start, val_start);
break;
}
assert(*p == '>');
// save the character that terminated the tag name
@ -107,6 +116,24 @@ namespace libtorrent
*(p-1) = '/';
tag_end = p - 1;
}
else if (*start == '?' && *(p-1) == '?')
{
*(p-1) = 0;
++start;
token = xml_declaration_tag;
callback(token, start, val_start);
*(p-1) = '?';
tag_end = p - 1;
}
else if (start + 5 < p && memcmp(start, "!--", 3) == 0 && memcmp(p-2, "--", 2) == 0)
{
start += 3;
*(p-2) = 0;
token = xml_comment;
callback(token, start, val_start);
*(p-2) = '-';
tag_end = p - 2;
}
else
{
token = xml_start_tag;
@ -116,24 +143,59 @@ namespace libtorrent
*tag_name_end = save;
// parse attributes
start = tag_name_end;
for (char* i = tag_name_end; i < tag_end; ++i)
{
if (*i != '=') continue;
assert(*start == ' ');
++start;
// find start of attribute name
for (; i != tag_end && (*i == ' ' || *i == '\t'); ++i);
if (i == tag_end) break;
start = i;
// find end of attribute name
for (; i != tag_end && *i != '=' && *i != ' ' && *i != '\t'; ++i);
char* name_end = i;
// look for equality sign
for (; i != tag_end && *i != '='; ++i);
if (i == tag_end)
{
token = xml_parse_error;
val_start = 0;
start = "garbage inside element brackets";
callback(token, start, val_start);
break;
}
++i;
for (; i != tag_end && (*i == ' ' || *i == '\t'); ++i);
// check for parse error (values must be quoted)
if (i == tag_end || (*i != '\'' && *i != '\"'))
{
token = xml_parse_error;
val_start = 0;
start = "unquoted attribute value";
callback(token, start, val_start);
break;
}
char quote = *i;
++i;
val_start = i;
for (; i != tag_end && *i != ' '; ++i);
for (; i != tag_end && *i != quote; ++i);
// parse error (missing end quote)
if (i == tag_end)
{
token = xml_parse_error;
val_start = 0;
start = "missing end quote on attribute";
callback(token, start, val_start);
break;
}
save = *i;
*i = 0;
const_cast<char&>(*val_start) = 0;
++val_start;
*name_end = 0;
token = xml_attribute;
callback(token, start, val_start);
--val_start;
const_cast<char&>(*val_start) = '=';
*name_end = '=';
*i = save;
start = i;
}
}

View File

@ -34,15 +34,24 @@ void parser_callback(std::string& out, int token, char const* s, char const* val
case xml_start_tag: out += "B"; break;
case xml_end_tag: out += "F"; break;
case xml_empty_tag: out += "E"; break;
case xml_declaration_tag: out += "D"; break;
case xml_comment: out += "C"; break;
case xml_string: out += "S"; break;
case xml_attribute: out += "A"; break;
case xml_parse_error: out += "P"; break;
default: TEST_CHECK(false);
}
out += s;
if (token == xml_attribute)
{
TEST_CHECK(val != 0);
out += "V";
out += val;
}
else
{
TEST_CHECK(val == 0);
}
}
int test_main()
@ -127,13 +136,29 @@ int test_main()
std::cerr << out1 << std::endl;
TEST_CHECK(out1 == "BaSfooEbSbarFa");
char xml2[] = "<c x=1 y=3/><d foo=bar></d boo=foo>";
char xml2[] = "<?xml version = \"1.0\"?><c x=\"1\" \t y=\"3\"/><d foo='bar'></d boo='foo'><!--comment-->";
std::string out2;
xml_parse(xml2, xml2 + sizeof(xml2) - 1, bind(&parser_callback
, boost::ref(out2), _1, _2, _3));
std::cerr << out2 << std::endl;
TEST_CHECK(out2 == "EcAxV1AyV3BdAfooVbarFdAbooVfoo");
TEST_CHECK(out2 == "DxmlAversionV1.0EcAxV1AyV3BdAfooVbarFdAbooVfooCcomment");
char xml3[] = "<a f=1>foo</a f='b>";
std::string out3;
xml_parse(xml3, xml3 + sizeof(xml3) - 1, bind(&parser_callback
, boost::ref(out3), _1, _2, _3));
std::cerr << out3 << std::endl;
TEST_CHECK(out3 == "BaPunquoted attribute valueSfooFaPmissing end quote on attribute");
char xml4[] = "<a f>foo</a v >";
std::string out4;
xml_parse(xml4, xml4 + sizeof(xml4) - 1, bind(&parser_callback
, boost::ref(out4), _1, _2, _3));
std::cerr << out4 << std::endl;
TEST_CHECK(out4 == "BaPgarbage inside element bracketsSfooFaPgarbage inside element brackets");
return 0;
}