xmllite: Track internal parser state to select expected node type.
This commit is contained in:
parent
ef56b7a448
commit
61483a581e
@ -44,6 +44,14 @@ typedef enum
|
|||||||
XmlEncoding_Unknown
|
XmlEncoding_Unknown
|
||||||
} xml_encoding;
|
} xml_encoding;
|
||||||
|
|
||||||
|
typedef enum
|
||||||
|
{
|
||||||
|
XmlReadInState_Initial,
|
||||||
|
XmlReadInState_XmlDecl,
|
||||||
|
XmlReadInState_Misc_DTD,
|
||||||
|
XmlReadInState_DTD
|
||||||
|
} XmlReaderInternalState;
|
||||||
|
|
||||||
static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
|
static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
|
||||||
static const WCHAR utf8W[] = {'U','T','F','-','8',0};
|
static const WCHAR utf8W[] = {'U','T','F','-','8',0};
|
||||||
|
|
||||||
@ -109,6 +117,7 @@ typedef struct _xmlreader
|
|||||||
xmlreaderinput *input;
|
xmlreaderinput *input;
|
||||||
IMalloc *imalloc;
|
IMalloc *imalloc;
|
||||||
XmlReadState state;
|
XmlReadState state;
|
||||||
|
XmlReaderInternalState instate;
|
||||||
XmlNodeType nodetype;
|
XmlNodeType nodetype;
|
||||||
DtdProcessing dtdmode;
|
DtdProcessing dtdmode;
|
||||||
UINT line, pos; /* reader position in XML stream */
|
UINT line, pos; /* reader position in XML stream */
|
||||||
@ -457,6 +466,7 @@ static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding
|
|||||||
encoded_buffer *dest = &readerinput->buffer->utf16;
|
encoded_buffer *dest = &readerinput->buffer->utf16;
|
||||||
int len, dest_len;
|
int len, dest_len;
|
||||||
HRESULT hr;
|
HRESULT hr;
|
||||||
|
WCHAR *ptr;
|
||||||
UINT cp;
|
UINT cp;
|
||||||
|
|
||||||
hr = get_code_page(enc, &cp);
|
hr = get_code_page(enc, &cp);
|
||||||
@ -477,8 +487,9 @@ static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding
|
|||||||
|
|
||||||
dest_len = MultiByteToWideChar(cp, 0, src->cur, len, NULL, 0);
|
dest_len = MultiByteToWideChar(cp, 0, src->cur, len, NULL, 0);
|
||||||
readerinput_grow(readerinput, dest_len);
|
readerinput_grow(readerinput, dest_len);
|
||||||
MultiByteToWideChar(cp, 0, src->cur, len, (WCHAR*)dest->data, dest_len);
|
ptr = (WCHAR*)dest->data;
|
||||||
dest->data[dest_len] = 0;
|
MultiByteToWideChar(cp, 0, src->cur, len, ptr, dest_len);
|
||||||
|
ptr[dest_len] = 0;
|
||||||
readerinput->buffer->code_page = cp;
|
readerinput->buffer->code_page = cp;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -515,13 +526,18 @@ static void reader_skipn(xmlreader *reader, int n)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int is_wchar_space(WCHAR ch)
|
||||||
|
{
|
||||||
|
return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
|
||||||
|
}
|
||||||
|
|
||||||
/* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
|
/* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
|
||||||
static int reader_skipspaces(xmlreader *reader)
|
static int reader_skipspaces(xmlreader *reader)
|
||||||
{
|
{
|
||||||
encoded_buffer *buffer = &reader->input->buffer->utf16;
|
encoded_buffer *buffer = &reader->input->buffer->utf16;
|
||||||
const WCHAR *ptr = reader_get_cur(reader), *start = ptr;
|
const WCHAR *ptr = reader_get_cur(reader), *start = ptr;
|
||||||
|
|
||||||
while (*ptr == ' ' || *ptr == '\t' || *ptr == '\r' || *ptr == '\n')
|
while (is_wchar_space(*ptr))
|
||||||
{
|
{
|
||||||
buffer->cur += sizeof(WCHAR);
|
buffer->cur += sizeof(WCHAR);
|
||||||
if (*ptr == '\r')
|
if (*ptr == '\r')
|
||||||
@ -753,9 +769,105 @@ static HRESULT reader_parse_xmldecl(xmlreader *reader)
|
|||||||
if (reader_cmp(reader, declcloseW)) return WC_E_XMLDECL;
|
if (reader_cmp(reader, declcloseW)) return WC_E_XMLDECL;
|
||||||
reader_skipn(reader, 2);
|
reader_skipn(reader, 2);
|
||||||
|
|
||||||
|
reader->nodetype = XmlNodeType_XmlDeclaration;
|
||||||
|
|
||||||
return S_OK;
|
return S_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
|
||||||
|
static HRESULT reader_parse_comment(xmlreader *reader)
|
||||||
|
{
|
||||||
|
FIXME("comments not supported\n");
|
||||||
|
return E_NOTIMPL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
|
||||||
|
static HRESULT reader_parse_pi(xmlreader *reader)
|
||||||
|
{
|
||||||
|
FIXME("PI not supported\n");
|
||||||
|
return E_NOTIMPL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* [27] Misc ::= Comment | PI | S */
|
||||||
|
static HRESULT reader_parse_misc(xmlreader *reader)
|
||||||
|
{
|
||||||
|
HRESULT hr = S_FALSE;
|
||||||
|
|
||||||
|
while (1)
|
||||||
|
{
|
||||||
|
static const WCHAR commentW[] = {'<','!','-','-',0};
|
||||||
|
static const WCHAR piW[] = {'<','?',0};
|
||||||
|
const WCHAR *cur = reader_get_cur(reader);
|
||||||
|
|
||||||
|
if (is_wchar_space(*cur))
|
||||||
|
reader_skipspaces(reader);
|
||||||
|
else if (!reader_cmp(reader, commentW))
|
||||||
|
hr = reader_parse_comment(reader);
|
||||||
|
else if (!reader_cmp(reader, piW))
|
||||||
|
hr = reader_parse_pi(reader);
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (FAILED(hr)) return hr;
|
||||||
|
cur = reader_get_cur(reader);
|
||||||
|
}
|
||||||
|
|
||||||
|
return hr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static HRESULT reader_parse_nextnode(xmlreader *reader)
|
||||||
|
{
|
||||||
|
HRESULT hr;
|
||||||
|
|
||||||
|
while (1)
|
||||||
|
{
|
||||||
|
switch (reader->instate)
|
||||||
|
{
|
||||||
|
/* if it's a first call for a new input we need to detect stream encoding */
|
||||||
|
case XmlReadInState_Initial:
|
||||||
|
{
|
||||||
|
xml_encoding enc;
|
||||||
|
|
||||||
|
hr = readerinput_growraw(reader->input);
|
||||||
|
if (FAILED(hr)) return hr;
|
||||||
|
|
||||||
|
/* try to detect encoding by BOM or data and set input code page */
|
||||||
|
hr = readerinput_detectencoding(reader->input, &enc);
|
||||||
|
TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map[enc].name), hr);
|
||||||
|
if (FAILED(hr)) return hr;
|
||||||
|
|
||||||
|
/* always switch first time cause we have to put something in */
|
||||||
|
readerinput_switchencoding(reader->input, enc);
|
||||||
|
|
||||||
|
/* parse xml declaration */
|
||||||
|
hr = reader_parse_xmldecl(reader);
|
||||||
|
if (FAILED(hr)) return hr;
|
||||||
|
|
||||||
|
reader->instate = XmlReadInState_Misc_DTD;
|
||||||
|
if (hr == S_OK) return hr;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case XmlReadInState_Misc_DTD:
|
||||||
|
hr = reader_parse_misc(reader);
|
||||||
|
if (FAILED(hr)) return hr;
|
||||||
|
if (hr == S_FALSE)
|
||||||
|
{
|
||||||
|
reader->instate = XmlReadInState_DTD;
|
||||||
|
return S_OK;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case XmlReadInState_DTD:
|
||||||
|
FIXME("DTD parsing not supported\n");
|
||||||
|
return E_NOTIMPL;
|
||||||
|
default:
|
||||||
|
FIXME("internal state %d not handled\n", reader->instate);
|
||||||
|
return E_NOTIMPL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return E_NOTIMPL;
|
||||||
|
}
|
||||||
|
|
||||||
static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
|
static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
|
||||||
{
|
{
|
||||||
xmlreader *This = impl_from_IXmlReader(iface);
|
xmlreader *This = impl_from_IXmlReader(iface);
|
||||||
@ -844,7 +956,10 @@ static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
|
|||||||
/* set stream for supplied IXmlReaderInput */
|
/* set stream for supplied IXmlReaderInput */
|
||||||
hr = readerinput_query_for_stream(This->input);
|
hr = readerinput_query_for_stream(This->input);
|
||||||
if (hr == S_OK)
|
if (hr == S_OK)
|
||||||
|
{
|
||||||
This->state = XmlReadState_Initial;
|
This->state = XmlReadState_Initial;
|
||||||
|
This->instate = XmlReadInState_Initial;
|
||||||
|
}
|
||||||
|
|
||||||
return hr;
|
return hr;
|
||||||
}
|
}
|
||||||
@ -893,44 +1008,22 @@ static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LO
|
|||||||
return S_OK;
|
return S_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *node_type)
|
static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
|
||||||
{
|
{
|
||||||
xmlreader *This = impl_from_IXmlReader(iface);
|
xmlreader *This = impl_from_IXmlReader(iface);
|
||||||
|
XmlNodeType oldtype = This->nodetype;
|
||||||
|
HRESULT hr;
|
||||||
|
|
||||||
FIXME("(%p)->(%p): stub\n", This, node_type);
|
FIXME("(%p)->(%p): stub\n", This, nodetype);
|
||||||
|
|
||||||
if (This->state == XmlReadState_Closed) return S_FALSE;
|
if (This->state == XmlReadState_Closed) return S_FALSE;
|
||||||
|
|
||||||
/* if it's a first call for a new input we need to detect stream encoding */
|
hr = reader_parse_nextnode(This);
|
||||||
if (This->state == XmlReadState_Initial)
|
if (oldtype == XmlNodeType_None && This->nodetype != oldtype)
|
||||||
{
|
|
||||||
xml_encoding enc;
|
|
||||||
HRESULT hr;
|
|
||||||
|
|
||||||
hr = readerinput_growraw(This->input);
|
|
||||||
if (FAILED(hr)) return hr;
|
|
||||||
|
|
||||||
/* try to detect encoding by BOM or data and set input code page */
|
|
||||||
hr = readerinput_detectencoding(This->input, &enc);
|
|
||||||
TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map[enc].name), hr);
|
|
||||||
if (FAILED(hr)) return hr;
|
|
||||||
|
|
||||||
/* always switch first time cause we have to put something in */
|
|
||||||
readerinput_switchencoding(This->input, enc);
|
|
||||||
|
|
||||||
/* parse xml declaration */
|
|
||||||
hr = reader_parse_xmldecl(This);
|
|
||||||
if (FAILED(hr)) return hr;
|
|
||||||
|
|
||||||
if (hr == S_OK)
|
|
||||||
{
|
|
||||||
This->state = XmlReadState_Interactive;
|
This->state = XmlReadState_Interactive;
|
||||||
This->nodetype = *node_type = XmlNodeType_XmlDeclaration;
|
if (hr == S_OK) *nodetype = This->nodetype;
|
||||||
return S_OK;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return E_NOTIMPL;
|
return hr;
|
||||||
}
|
}
|
||||||
|
|
||||||
static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
|
static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
|
||||||
@ -1223,6 +1316,7 @@ HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc)
|
|||||||
reader->ref = 1;
|
reader->ref = 1;
|
||||||
reader->input = NULL;
|
reader->input = NULL;
|
||||||
reader->state = XmlReadState_Closed;
|
reader->state = XmlReadState_Closed;
|
||||||
|
reader->instate = XmlReadInState_Initial;
|
||||||
reader->dtdmode = DtdProcessing_Prohibit;
|
reader->dtdmode = DtdProcessing_Prohibit;
|
||||||
reader->line = reader->pos = 0;
|
reader->line = reader->pos = 0;
|
||||||
reader->imalloc = imalloc;
|
reader->imalloc = imalloc;
|
||||||
|
@ -497,6 +497,7 @@ static void test_readerinput(void)
|
|||||||
IStream_Release(stream);
|
IStream_Release(stream);
|
||||||
|
|
||||||
/* test input interface selection sequence */
|
/* test input interface selection sequence */
|
||||||
|
input = NULL;
|
||||||
hr = testinput_createinstance((void**)&input);
|
hr = testinput_createinstance((void**)&input);
|
||||||
ok(hr == S_OK, "Expected S_OK, got %08x\n", hr);
|
ok(hr == S_OK, "Expected S_OK, got %08x\n", hr);
|
||||||
|
|
||||||
@ -688,10 +689,70 @@ todo_wine {
|
|||||||
ok(hr == S_OK, "got %08x\n", hr);
|
ok(hr == S_OK, "got %08x\n", hr);
|
||||||
ok(type == XmlNodeType_XmlDeclaration, "got %d\n", type);
|
ok(type == XmlNodeType_XmlDeclaration, "got %d\n", type);
|
||||||
|
|
||||||
|
type = XmlNodeType_XmlDeclaration;
|
||||||
|
hr = IXmlReader_Read(reader, &type);
|
||||||
|
/* newer versions return syntax error here cause document is incomplete,
|
||||||
|
it makes more sense than invalid char error */
|
||||||
|
todo_wine {
|
||||||
|
ok(hr == WC_E_SYNTAX || broken(hr == WC_E_XMLCHARACTER), "got 0x%08x\n", hr);
|
||||||
|
ok(type == XmlNodeType_None, "got %d\n", type);
|
||||||
|
}
|
||||||
IStream_Release(stream);
|
IStream_Release(stream);
|
||||||
IXmlReader_Release(reader);
|
IXmlReader_Release(reader);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const char xml_comment[] = "\xef\xbb\xbf<!-- comment -->";
|
||||||
|
static const char xml_comment1[] = "\xef\xbb\xbf<!-- - comment-->";
|
||||||
|
static const char xml_comment2[] = "\xef\xbb\xbf<!-- -- comment-->";
|
||||||
|
|
||||||
|
static void test_read_comment(void)
|
||||||
|
{
|
||||||
|
HRESULT hr;
|
||||||
|
IStream *stream;
|
||||||
|
IXmlReader *reader;
|
||||||
|
XmlNodeType type;
|
||||||
|
|
||||||
|
hr = pCreateXmlReader(&IID_IXmlReader, (void**)&reader, NULL);
|
||||||
|
ok(hr == S_OK, "S_OK, got %08x\n", hr);
|
||||||
|
|
||||||
|
stream = create_stream_on_data(xml_comment, sizeof(xml_comment));
|
||||||
|
hr = IXmlReader_SetInput(reader, (IUnknown*)stream);
|
||||||
|
ok(hr == S_OK, "got %08x\n", hr);
|
||||||
|
|
||||||
|
type = XmlNodeType_None;
|
||||||
|
hr = IXmlReader_Read(reader, &type);
|
||||||
|
todo_wine {
|
||||||
|
ok(hr == S_OK, "got %08x\n", hr);
|
||||||
|
ok(type == XmlNodeType_Comment, "got %d\n", type);
|
||||||
|
}
|
||||||
|
IStream_Release(stream);
|
||||||
|
|
||||||
|
stream = create_stream_on_data(xml_comment1, sizeof(xml_comment1));
|
||||||
|
hr = IXmlReader_SetInput(reader, (IUnknown*)stream);
|
||||||
|
ok(hr == S_OK, "got %08x\n", hr);
|
||||||
|
|
||||||
|
type = XmlNodeType_None;
|
||||||
|
hr = IXmlReader_Read(reader, &type);
|
||||||
|
todo_wine {
|
||||||
|
ok(hr == S_OK, "got %08x\n", hr);
|
||||||
|
ok(type == XmlNodeType_Comment, "got %d\n", type);
|
||||||
|
}
|
||||||
|
IStream_Release(stream);
|
||||||
|
|
||||||
|
stream = create_stream_on_data(xml_comment2, sizeof(xml_comment2));
|
||||||
|
hr = IXmlReader_SetInput(reader, (IUnknown*)stream);
|
||||||
|
ok(hr == S_OK, "got %08x\n", hr);
|
||||||
|
|
||||||
|
type = XmlNodeType_None;
|
||||||
|
hr = IXmlReader_Read(reader, &type);
|
||||||
|
todo_wine
|
||||||
|
ok(hr == WC_E_COMMENT || broken(hr == WC_E_GREATERTHAN), "got %08x\n", hr);
|
||||||
|
ok(type == XmlNodeType_None, "got %d\n", type);
|
||||||
|
IStream_Release(stream);
|
||||||
|
|
||||||
|
IXmlReader_Release(reader);
|
||||||
|
}
|
||||||
|
|
||||||
START_TEST(reader)
|
START_TEST(reader)
|
||||||
{
|
{
|
||||||
HRESULT r;
|
HRESULT r;
|
||||||
@ -708,6 +769,7 @@ START_TEST(reader)
|
|||||||
test_reader_create();
|
test_reader_create();
|
||||||
test_readerinput();
|
test_readerinput();
|
||||||
test_reader_state();
|
test_reader_state();
|
||||||
|
test_read_comment();
|
||||||
test_read_xmldeclaration();
|
test_read_xmldeclaration();
|
||||||
|
|
||||||
CoUninitialize();
|
CoUninitialize();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user