xmllite: Initial support for start tag parsing.

This commit is contained in:
Nikolay Sivov 2013-01-18 10:04:27 +04:00 committed by Alexandre Julliard
parent 53e25cb4f6
commit 61b4673827
2 changed files with 197 additions and 10 deletions

View File

@ -51,7 +51,8 @@ typedef enum
XmlReadInState_Misc_DTD, XmlReadInState_Misc_DTD,
XmlReadInState_DTD, XmlReadInState_DTD,
XmlReadInState_DTD_Misc, XmlReadInState_DTD_Misc,
XmlReadInState_Element XmlReadInState_Element,
XmlReadInState_Content
} XmlReaderInternalState; } XmlReaderInternalState;
typedef enum typedef enum
@ -448,11 +449,28 @@ static void readerinput_grow(xmlreaderinput *readerinput, int length)
} }
} }
static inline int readerinput_is_utf8(xmlreaderinput *readerinput)
{
static char startA[] = {'<','?'};
static char commentA[] = {'<','!'};
encoded_buffer *buffer = &readerinput->buffer->encoded;
unsigned char *ptr = (unsigned char*)buffer->data;
return !memcmp(buffer->data, startA, sizeof(startA)) ||
!memcmp(buffer->data, commentA, sizeof(commentA)) ||
/* test start byte */
(ptr[0] == '<' &&
(
(ptr[1] && (ptr[1] <= 0x7f)) ||
(buffer->data[1] >> 5) == 0x6 || /* 2 bytes */
(buffer->data[1] >> 4) == 0xe || /* 3 bytes */
(buffer->data[1] >> 3) == 0x1e) /* 4 bytes */
);
}
static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc) static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
{ {
encoded_buffer *buffer = &readerinput->buffer->encoded; encoded_buffer *buffer = &readerinput->buffer->encoded;
static char startA[] = {'<','?'};
static char commentA[] = {'<','!'};
static WCHAR startW[] = {'<','?'}; static WCHAR startW[] = {'<','?'};
static WCHAR commentW[] = {'<','!'}; static WCHAR commentW[] = {'<','!'};
static char utf8bom[] = {0xef,0xbb,0xbf}; static char utf8bom[] = {0xef,0xbb,0xbf};
@ -464,8 +482,7 @@ static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encod
/* try start symbols if we have enough data to do that, input buffer should contain /* try start symbols if we have enough data to do that, input buffer should contain
first chunk already */ first chunk already */
if (!memcmp(buffer->data, startA, sizeof(startA)) || if (readerinput_is_utf8(readerinput))
!memcmp(buffer->data, commentA, sizeof(commentA)))
*enc = XmlEncoding_UTF8; *enc = XmlEncoding_UTF8;
else if (!memcmp(buffer->data, startW, sizeof(startW)) || else if (!memcmp(buffer->data, startW, sizeof(startW)) ||
!memcmp(buffer->data, commentW, sizeof(commentW))) !memcmp(buffer->data, commentW, sizeof(commentW)))
@ -987,9 +1004,10 @@ static inline int is_namestartchar(WCHAR ch)
(ch >= 0xfdf0 && ch <= 0xfffd); (ch >= 0xfdf0 && ch <= 0xfffd);
} }
static inline int is_namechar(WCHAR ch) /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
static inline int is_ncnamechar(WCHAR ch)
{ {
return (ch == ':') || (ch >= 'A' && ch <= 'Z') || return (ch >= 'A' && ch <= 'Z') ||
(ch == '_') || (ch >= 'a' && ch <= 'z') || (ch == '_') || (ch >= 'a' && ch <= 'z') ||
(ch == '-') || (ch == '.') || (ch == '-') || (ch == '.') ||
(ch >= '0' && ch <= '9') || (ch >= '0' && ch <= '9') ||
@ -1011,6 +1029,11 @@ static inline int is_namechar(WCHAR ch)
(ch >= 0xfdf0 && ch <= 0xfffd); (ch >= 0xfdf0 && ch <= 0xfffd);
} }
static inline int is_namechar(WCHAR ch)
{
return (ch == ':') || is_ncnamechar(ch);
}
/* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
[#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
[#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
@ -1316,11 +1339,106 @@ static HRESULT reader_parse_dtd(xmlreader *reader)
return S_OK; return S_OK;
} }
/* [7 NS] QName ::= PrefixedName | UnprefixedName
[8 NS] PrefixedName ::= Prefix ':' LocalPart
[9 NS] UnprefixedName ::= LocalPart
[10 NS] Prefix ::= NCName
[11 NS] LocalPart ::= NCName */
static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname)
{
WCHAR *ptr, *start = reader_get_cur(reader);
ptr = start;
if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER;
while (is_ncnamechar(*ptr))
{
reader_skipn(reader, 1);
ptr = reader_get_cur(reader);
}
/* got a qualified name */
if (*ptr == ':')
{
prefix->str = start;
prefix->len = ptr-start;
reader_skipn(reader, 1);
start = ptr = reader_get_cur(reader);
while (is_ncnamechar(*ptr))
{
reader_skipn(reader, 1);
ptr = reader_get_cur(reader);
}
}
else
{
prefix->str = NULL;
prefix->len = 0;
}
local->str = start;
local->len = ptr-start;
if (prefix->len)
TRACE("qname %s:%s\n", debugstr_wn(prefix->str, prefix->len), debugstr_wn(local->str, local->len));
else
TRACE("ncname %s\n", debugstr_wn(local->str, local->len));
qname->str = prefix->str ? prefix->str : local->str;
/* count ':' too */
qname->len = (prefix->len ? prefix->len + 1 : 0) + local->len;
return S_OK;
}
/* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
[14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname)
{
static const WCHAR endW[] = {'/','>',0};
HRESULT hr;
/* skip '<' */
reader_skipn(reader, 1);
hr = reader_parse_qname(reader, prefix, local, qname);
if (FAILED(hr)) return hr;
reader_skipspaces(reader);
if (!reader_cmp(reader, endW)) return S_OK;
FIXME("only empty elements without attributes supported\n");
return E_NOTIMPL;
}
/* [39] element ::= EmptyElemTag | STag content ETag */ /* [39] element ::= EmptyElemTag | STag content ETag */
static HRESULT reader_parse_element(xmlreader *reader) static HRESULT reader_parse_element(xmlreader *reader)
{ {
FIXME("element parsing not implemented\n"); static const WCHAR ltW[] = {'<',0};
return E_NOTIMPL; strval qname, prefix, local;
HRESULT hr;
/* check if we are really on element */
if (reader_cmp(reader, ltW)) return S_FALSE;
reader_shrink(reader);
/* this handles empty elements too */
hr = reader_parse_stag(reader, &prefix, &local, &qname);
if (FAILED(hr)) return hr;
/* FIXME: need to check for defined namespace to reject invalid prefix,
currently reject all prefixes */
if (prefix.len) return NC_E_UNDECLAREDPREFIX;
reader->nodetype = XmlNodeType_Element;
reader_set_strvalue(reader, StringValue_LocalName, &local);
reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
FIXME("element content parsing not implemented\n");
return hr;
} }
static HRESULT reader_parse_nextnode(xmlreader *reader) static HRESULT reader_parse_nextnode(xmlreader *reader)
@ -1389,7 +1507,9 @@ static HRESULT reader_parse_nextnode(xmlreader *reader)
case XmlReadInState_Element: case XmlReadInState_Element:
hr = reader_parse_element(reader); hr = reader_parse_element(reader);
if (FAILED(hr)) return hr; if (FAILED(hr)) return hr;
break;
reader->instate = XmlReadInState_Content;
return hr;
default: default:
FIXME("internal state %d not handled\n", reader->instate); FIXME("internal state %d not handled\n", reader->instate);
return E_NOTIMPL; return E_NOTIMPL;

View File

@ -1022,6 +1022,72 @@ todo_wine {
IXmlReader_Release(reader); IXmlReader_Release(reader);
} }
static struct test_entry element_tests[] = {
{ "<a/>", "a", "", S_OK },
{ "<a />", "a", "", S_OK },
{ "<a:b/>", "a:b", "", NC_E_UNDECLAREDPREFIX },
{ "<:a/>", NULL, NULL, NC_E_QNAMECHARACTER },
{ "< a/>", NULL, NULL, NC_E_QNAMECHARACTER },
{ NULL }
};
static void test_read_element(void)
{
struct test_entry *test = element_tests;
IXmlReader *reader;
HRESULT hr;
hr = pCreateXmlReader(&IID_IXmlReader, (void**)&reader, NULL);
ok(hr == S_OK, "S_OK, got %08x\n", hr);
while (test->xml)
{
XmlNodeType type;
IStream *stream;
stream = create_stream_on_data(test->xml, strlen(test->xml)+1);
hr = IXmlReader_SetInput(reader, (IUnknown*)stream);
ok(hr == S_OK, "got %08x\n", hr);
type = XmlNodeType_None;
hr = IXmlReader_Read(reader, &type);
if (test->hr_broken)
ok(hr == test->hr || broken(hr == test->hr_broken), "got %08x for %s\n", hr, test->xml);
else
ok(hr == test->hr, "got %08x for %s\n", hr, test->xml);
if (hr == S_OK)
{
const WCHAR *str;
WCHAR *str_exp;
UINT len;
ok(type == XmlNodeType_Element, "got %d for %s\n", type, test->xml);
len = 0;
str = NULL;
hr = IXmlReader_GetQualifiedName(reader, &str, &len);
ok(hr == S_OK, "got 0x%08x\n", hr);
ok(len == strlen(test->name), "got %u\n", len);
str_exp = a2w(test->name);
ok(!lstrcmpW(str, str_exp), "got %s\n", wine_dbgstr_w(str));
free_str(str_exp);
/* value */
len = 1;
str = NULL;
hr = IXmlReader_GetValue(reader, &str, &len);
ok(hr == S_OK, "got 0x%08x\n", hr);
ok(len == 0, "got %u\n", len);
ok(*str == 0, "got %s\n", wine_dbgstr_w(str));
}
IStream_Release(stream);
test++;
}
IXmlReader_Release(reader);
}
START_TEST(reader) START_TEST(reader)
{ {
HRESULT r; HRESULT r;
@ -1041,6 +1107,7 @@ START_TEST(reader)
test_read_comment(); test_read_comment();
test_read_pi(); test_read_pi();
test_read_dtd(); test_read_dtd();
test_read_element();
test_read_full(); test_read_full();
test_read_xmldeclaration(); test_read_xmldeclaration();