xmllite: Initial support for start tag parsing.
This commit is contained in:
parent
53e25cb4f6
commit
61b4673827
|
@ -51,7 +51,8 @@ typedef enum
|
|||
XmlReadInState_Misc_DTD,
|
||||
XmlReadInState_DTD,
|
||||
XmlReadInState_DTD_Misc,
|
||||
XmlReadInState_Element
|
||||
XmlReadInState_Element,
|
||||
XmlReadInState_Content
|
||||
} XmlReaderInternalState;
|
||||
|
||||
typedef enum
|
||||
|
@ -448,11 +449,28 @@ static void readerinput_grow(xmlreaderinput *readerinput, int length)
|
|||
}
|
||||
}
|
||||
|
||||
static inline int readerinput_is_utf8(xmlreaderinput *readerinput)
|
||||
{
|
||||
static char startA[] = {'<','?'};
|
||||
static char commentA[] = {'<','!'};
|
||||
encoded_buffer *buffer = &readerinput->buffer->encoded;
|
||||
unsigned char *ptr = (unsigned char*)buffer->data;
|
||||
|
||||
return !memcmp(buffer->data, startA, sizeof(startA)) ||
|
||||
!memcmp(buffer->data, commentA, sizeof(commentA)) ||
|
||||
/* test start byte */
|
||||
(ptr[0] == '<' &&
|
||||
(
|
||||
(ptr[1] && (ptr[1] <= 0x7f)) ||
|
||||
(buffer->data[1] >> 5) == 0x6 || /* 2 bytes */
|
||||
(buffer->data[1] >> 4) == 0xe || /* 3 bytes */
|
||||
(buffer->data[1] >> 3) == 0x1e) /* 4 bytes */
|
||||
);
|
||||
}
|
||||
|
||||
static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
|
||||
{
|
||||
encoded_buffer *buffer = &readerinput->buffer->encoded;
|
||||
static char startA[] = {'<','?'};
|
||||
static char commentA[] = {'<','!'};
|
||||
static WCHAR startW[] = {'<','?'};
|
||||
static WCHAR commentW[] = {'<','!'};
|
||||
static char utf8bom[] = {0xef,0xbb,0xbf};
|
||||
|
@ -464,8 +482,7 @@ static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encod
|
|||
|
||||
/* try start symbols if we have enough data to do that, input buffer should contain
|
||||
first chunk already */
|
||||
if (!memcmp(buffer->data, startA, sizeof(startA)) ||
|
||||
!memcmp(buffer->data, commentA, sizeof(commentA)))
|
||||
if (readerinput_is_utf8(readerinput))
|
||||
*enc = XmlEncoding_UTF8;
|
||||
else if (!memcmp(buffer->data, startW, sizeof(startW)) ||
|
||||
!memcmp(buffer->data, commentW, sizeof(commentW)))
|
||||
|
@ -987,9 +1004,10 @@ static inline int is_namestartchar(WCHAR ch)
|
|||
(ch >= 0xfdf0 && ch <= 0xfffd);
|
||||
}
|
||||
|
||||
static inline int is_namechar(WCHAR ch)
|
||||
/* [4 NS] NCName ::= Name - (Char* ':' Char*) */
|
||||
static inline int is_ncnamechar(WCHAR ch)
|
||||
{
|
||||
return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
|
||||
return (ch >= 'A' && ch <= 'Z') ||
|
||||
(ch == '_') || (ch >= 'a' && ch <= 'z') ||
|
||||
(ch == '-') || (ch == '.') ||
|
||||
(ch >= '0' && ch <= '9') ||
|
||||
|
@ -1011,6 +1029,11 @@ static inline int is_namechar(WCHAR ch)
|
|||
(ch >= 0xfdf0 && ch <= 0xfffd);
|
||||
}
|
||||
|
||||
static inline int is_namechar(WCHAR ch)
|
||||
{
|
||||
return (ch == ':') || is_ncnamechar(ch);
|
||||
}
|
||||
|
||||
/* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
|
||||
[#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
|
||||
[#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
|
||||
|
@ -1316,11 +1339,106 @@ static HRESULT reader_parse_dtd(xmlreader *reader)
|
|||
return S_OK;
|
||||
}
|
||||
|
||||
/* [7 NS] QName ::= PrefixedName | UnprefixedName
|
||||
[8 NS] PrefixedName ::= Prefix ':' LocalPart
|
||||
[9 NS] UnprefixedName ::= LocalPart
|
||||
[10 NS] Prefix ::= NCName
|
||||
[11 NS] LocalPart ::= NCName */
|
||||
static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname)
|
||||
{
|
||||
WCHAR *ptr, *start = reader_get_cur(reader);
|
||||
|
||||
ptr = start;
|
||||
if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER;
|
||||
|
||||
while (is_ncnamechar(*ptr))
|
||||
{
|
||||
reader_skipn(reader, 1);
|
||||
ptr = reader_get_cur(reader);
|
||||
}
|
||||
|
||||
/* got a qualified name */
|
||||
if (*ptr == ':')
|
||||
{
|
||||
prefix->str = start;
|
||||
prefix->len = ptr-start;
|
||||
|
||||
reader_skipn(reader, 1);
|
||||
start = ptr = reader_get_cur(reader);
|
||||
|
||||
while (is_ncnamechar(*ptr))
|
||||
{
|
||||
reader_skipn(reader, 1);
|
||||
ptr = reader_get_cur(reader);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
prefix->str = NULL;
|
||||
prefix->len = 0;
|
||||
}
|
||||
|
||||
local->str = start;
|
||||
local->len = ptr-start;
|
||||
|
||||
if (prefix->len)
|
||||
TRACE("qname %s:%s\n", debugstr_wn(prefix->str, prefix->len), debugstr_wn(local->str, local->len));
|
||||
else
|
||||
TRACE("ncname %s\n", debugstr_wn(local->str, local->len));
|
||||
|
||||
qname->str = prefix->str ? prefix->str : local->str;
|
||||
/* count ':' too */
|
||||
qname->len = (prefix->len ? prefix->len + 1 : 0) + local->len;
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
/* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
|
||||
[14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
|
||||
static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname)
|
||||
{
|
||||
static const WCHAR endW[] = {'/','>',0};
|
||||
HRESULT hr;
|
||||
|
||||
/* skip '<' */
|
||||
reader_skipn(reader, 1);
|
||||
|
||||
hr = reader_parse_qname(reader, prefix, local, qname);
|
||||
if (FAILED(hr)) return hr;
|
||||
|
||||
reader_skipspaces(reader);
|
||||
|
||||
if (!reader_cmp(reader, endW)) return S_OK;
|
||||
|
||||
FIXME("only empty elements without attributes supported\n");
|
||||
return E_NOTIMPL;
|
||||
}
|
||||
|
||||
/* [39] element ::= EmptyElemTag | STag content ETag */
|
||||
static HRESULT reader_parse_element(xmlreader *reader)
|
||||
{
|
||||
FIXME("element parsing not implemented\n");
|
||||
return E_NOTIMPL;
|
||||
static const WCHAR ltW[] = {'<',0};
|
||||
strval qname, prefix, local;
|
||||
HRESULT hr;
|
||||
|
||||
/* check if we are really on element */
|
||||
if (reader_cmp(reader, ltW)) return S_FALSE;
|
||||
reader_shrink(reader);
|
||||
|
||||
/* this handles empty elements too */
|
||||
hr = reader_parse_stag(reader, &prefix, &local, &qname);
|
||||
if (FAILED(hr)) return hr;
|
||||
|
||||
/* FIXME: need to check for defined namespace to reject invalid prefix,
|
||||
currently reject all prefixes */
|
||||
if (prefix.len) return NC_E_UNDECLAREDPREFIX;
|
||||
|
||||
reader->nodetype = XmlNodeType_Element;
|
||||
reader_set_strvalue(reader, StringValue_LocalName, &local);
|
||||
reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
|
||||
|
||||
FIXME("element content parsing not implemented\n");
|
||||
return hr;
|
||||
}
|
||||
|
||||
static HRESULT reader_parse_nextnode(xmlreader *reader)
|
||||
|
@ -1389,7 +1507,9 @@ static HRESULT reader_parse_nextnode(xmlreader *reader)
|
|||
case XmlReadInState_Element:
|
||||
hr = reader_parse_element(reader);
|
||||
if (FAILED(hr)) return hr;
|
||||
break;
|
||||
|
||||
reader->instate = XmlReadInState_Content;
|
||||
return hr;
|
||||
default:
|
||||
FIXME("internal state %d not handled\n", reader->instate);
|
||||
return E_NOTIMPL;
|
||||
|
|
|
@ -1022,6 +1022,72 @@ todo_wine {
|
|||
IXmlReader_Release(reader);
|
||||
}
|
||||
|
||||
static struct test_entry element_tests[] = {
|
||||
{ "<a/>", "a", "", S_OK },
|
||||
{ "<a />", "a", "", S_OK },
|
||||
{ "<a:b/>", "a:b", "", NC_E_UNDECLAREDPREFIX },
|
||||
{ "<:a/>", NULL, NULL, NC_E_QNAMECHARACTER },
|
||||
{ "< a/>", NULL, NULL, NC_E_QNAMECHARACTER },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static void test_read_element(void)
|
||||
{
|
||||
struct test_entry *test = element_tests;
|
||||
IXmlReader *reader;
|
||||
HRESULT hr;
|
||||
|
||||
hr = pCreateXmlReader(&IID_IXmlReader, (void**)&reader, NULL);
|
||||
ok(hr == S_OK, "S_OK, got %08x\n", hr);
|
||||
|
||||
while (test->xml)
|
||||
{
|
||||
XmlNodeType type;
|
||||
IStream *stream;
|
||||
|
||||
stream = create_stream_on_data(test->xml, strlen(test->xml)+1);
|
||||
hr = IXmlReader_SetInput(reader, (IUnknown*)stream);
|
||||
ok(hr == S_OK, "got %08x\n", hr);
|
||||
|
||||
type = XmlNodeType_None;
|
||||
hr = IXmlReader_Read(reader, &type);
|
||||
if (test->hr_broken)
|
||||
ok(hr == test->hr || broken(hr == test->hr_broken), "got %08x for %s\n", hr, test->xml);
|
||||
else
|
||||
ok(hr == test->hr, "got %08x for %s\n", hr, test->xml);
|
||||
if (hr == S_OK)
|
||||
{
|
||||
const WCHAR *str;
|
||||
WCHAR *str_exp;
|
||||
UINT len;
|
||||
|
||||
ok(type == XmlNodeType_Element, "got %d for %s\n", type, test->xml);
|
||||
|
||||
len = 0;
|
||||
str = NULL;
|
||||
hr = IXmlReader_GetQualifiedName(reader, &str, &len);
|
||||
ok(hr == S_OK, "got 0x%08x\n", hr);
|
||||
ok(len == strlen(test->name), "got %u\n", len);
|
||||
str_exp = a2w(test->name);
|
||||
ok(!lstrcmpW(str, str_exp), "got %s\n", wine_dbgstr_w(str));
|
||||
free_str(str_exp);
|
||||
|
||||
/* value */
|
||||
len = 1;
|
||||
str = NULL;
|
||||
hr = IXmlReader_GetValue(reader, &str, &len);
|
||||
ok(hr == S_OK, "got 0x%08x\n", hr);
|
||||
ok(len == 0, "got %u\n", len);
|
||||
ok(*str == 0, "got %s\n", wine_dbgstr_w(str));
|
||||
}
|
||||
|
||||
IStream_Release(stream);
|
||||
test++;
|
||||
}
|
||||
|
||||
IXmlReader_Release(reader);
|
||||
}
|
||||
|
||||
START_TEST(reader)
|
||||
{
|
||||
HRESULT r;
|
||||
|
@ -1041,6 +1107,7 @@ START_TEST(reader)
|
|||
test_read_comment();
|
||||
test_read_pi();
|
||||
test_read_dtd();
|
||||
test_read_element();
|
||||
test_read_full();
|
||||
test_read_xmldeclaration();
|
||||
|
||||
|
|
Loading…
Reference in New Issue