From 61483a581e647676eb96ebf84d33e94e6941f424 Mon Sep 17 00:00:00 2001 From: Nikolay Sivov Date: Wed, 19 Dec 2012 09:30:44 +0400 Subject: [PATCH] xmllite: Track internal parser state to select expected node type. --- dlls/xmllite/reader.c | 162 ++++++++++++++++++++++++++++-------- dlls/xmllite/tests/reader.c | 62 ++++++++++++++ 2 files changed, 190 insertions(+), 34 deletions(-) diff --git a/dlls/xmllite/reader.c b/dlls/xmllite/reader.c index 8ec2eae4331..8fe5ee31db5 100644 --- a/dlls/xmllite/reader.c +++ b/dlls/xmllite/reader.c @@ -44,6 +44,14 @@ typedef enum XmlEncoding_Unknown } xml_encoding; +typedef enum +{ + XmlReadInState_Initial, + XmlReadInState_XmlDecl, + XmlReadInState_Misc_DTD, + XmlReadInState_DTD +} XmlReaderInternalState; + static const WCHAR utf16W[] = {'U','T','F','-','1','6',0}; static const WCHAR utf8W[] = {'U','T','F','-','8',0}; @@ -109,6 +117,7 @@ typedef struct _xmlreader xmlreaderinput *input; IMalloc *imalloc; XmlReadState state; + XmlReaderInternalState instate; XmlNodeType nodetype; DtdProcessing dtdmode; UINT line, pos; /* reader position in XML stream */ @@ -457,6 +466,7 @@ static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding encoded_buffer *dest = &readerinput->buffer->utf16; int len, dest_len; HRESULT hr; + WCHAR *ptr; UINT cp; hr = get_code_page(enc, &cp); @@ -477,8 +487,9 @@ static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding dest_len = MultiByteToWideChar(cp, 0, src->cur, len, NULL, 0); readerinput_grow(readerinput, dest_len); - MultiByteToWideChar(cp, 0, src->cur, len, (WCHAR*)dest->data, dest_len); - dest->data[dest_len] = 0; + ptr = (WCHAR*)dest->data; + MultiByteToWideChar(cp, 0, src->cur, len, ptr, dest_len); + ptr[dest_len] = 0; readerinput->buffer->code_page = cp; } @@ -515,13 +526,18 @@ static void reader_skipn(xmlreader *reader, int n) } } +static inline int is_wchar_space(WCHAR ch) +{ + return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n'; +} + /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */ static int reader_skipspaces(xmlreader *reader) { encoded_buffer *buffer = &reader->input->buffer->utf16; const WCHAR *ptr = reader_get_cur(reader), *start = ptr; - while (*ptr == ' ' || *ptr == '\t' || *ptr == '\r' || *ptr == '\n') + while (is_wchar_space(*ptr)) { buffer->cur += sizeof(WCHAR); if (*ptr == '\r') @@ -753,9 +769,105 @@ static HRESULT reader_parse_xmldecl(xmlreader *reader) if (reader_cmp(reader, declcloseW)) return WC_E_XMLDECL; reader_skipn(reader, 2); + reader->nodetype = XmlNodeType_XmlDeclaration; + return S_OK; } +/* [15] Comment ::= '' */ +static HRESULT reader_parse_comment(xmlreader *reader) +{ + FIXME("comments not supported\n"); + return E_NOTIMPL; +} + +/* [16] PI ::= '' Char*)))? '?>' */ +static HRESULT reader_parse_pi(xmlreader *reader) +{ + FIXME("PI not supported\n"); + return E_NOTIMPL; +} + +/* [27] Misc ::= Comment | PI | S */ +static HRESULT reader_parse_misc(xmlreader *reader) +{ + HRESULT hr = S_FALSE; + + while (1) + { + static const WCHAR commentW[] = {'<','!','-','-',0}; + static const WCHAR piW[] = {'<','?',0}; + const WCHAR *cur = reader_get_cur(reader); + + if (is_wchar_space(*cur)) + reader_skipspaces(reader); + else if (!reader_cmp(reader, commentW)) + hr = reader_parse_comment(reader); + else if (!reader_cmp(reader, piW)) + hr = reader_parse_pi(reader); + else + break; + + if (FAILED(hr)) return hr; + cur = reader_get_cur(reader); + } + + return hr; +} + +static HRESULT reader_parse_nextnode(xmlreader *reader) +{ + HRESULT hr; + + while (1) + { + switch (reader->instate) + { + /* if it's a first call for a new input we need to detect stream encoding */ + case XmlReadInState_Initial: + { + xml_encoding enc; + + hr = readerinput_growraw(reader->input); + if (FAILED(hr)) return hr; + + /* try to detect encoding by BOM or data and set input code page */ + hr = readerinput_detectencoding(reader->input, &enc); + TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map[enc].name), hr); + if (FAILED(hr)) return hr; + + /* always switch first time cause we have to put something in */ + readerinput_switchencoding(reader->input, enc); + + /* parse xml declaration */ + hr = reader_parse_xmldecl(reader); + if (FAILED(hr)) return hr; + + reader->instate = XmlReadInState_Misc_DTD; + if (hr == S_OK) return hr; + } + break; + case XmlReadInState_Misc_DTD: + hr = reader_parse_misc(reader); + if (FAILED(hr)) return hr; + if (hr == S_FALSE) + { + reader->instate = XmlReadInState_DTD; + return S_OK; + } + break; + case XmlReadInState_DTD: + FIXME("DTD parsing not supported\n"); + return E_NOTIMPL; + default: + FIXME("internal state %d not handled\n", reader->instate); + return E_NOTIMPL; + } + } + + return E_NOTIMPL; +} + static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject) { xmlreader *This = impl_from_IXmlReader(iface); @@ -844,7 +956,10 @@ static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input) /* set stream for supplied IXmlReaderInput */ hr = readerinput_query_for_stream(This->input); if (hr == S_OK) + { This->state = XmlReadState_Initial; + This->instate = XmlReadInState_Initial; + } return hr; } @@ -893,44 +1008,22 @@ static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LO return S_OK; } -static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *node_type) +static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype) { xmlreader *This = impl_from_IXmlReader(iface); + XmlNodeType oldtype = This->nodetype; + HRESULT hr; - FIXME("(%p)->(%p): stub\n", This, node_type); + FIXME("(%p)->(%p): stub\n", This, nodetype); if (This->state == XmlReadState_Closed) return S_FALSE; - /* if it's a first call for a new input we need to detect stream encoding */ - if (This->state == XmlReadState_Initial) - { - xml_encoding enc; - HRESULT hr; + hr = reader_parse_nextnode(This); + if (oldtype == XmlNodeType_None && This->nodetype != oldtype) + This->state = XmlReadState_Interactive; + if (hr == S_OK) *nodetype = This->nodetype; - hr = readerinput_growraw(This->input); - if (FAILED(hr)) return hr; - - /* try to detect encoding by BOM or data and set input code page */ - hr = readerinput_detectencoding(This->input, &enc); - TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map[enc].name), hr); - if (FAILED(hr)) return hr; - - /* always switch first time cause we have to put something in */ - readerinput_switchencoding(This->input, enc); - - /* parse xml declaration */ - hr = reader_parse_xmldecl(This); - if (FAILED(hr)) return hr; - - if (hr == S_OK) - { - This->state = XmlReadState_Interactive; - This->nodetype = *node_type = XmlNodeType_XmlDeclaration; - return S_OK; - } - } - - return E_NOTIMPL; + return hr; } static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type) @@ -1223,6 +1316,7 @@ HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc) reader->ref = 1; reader->input = NULL; reader->state = XmlReadState_Closed; + reader->instate = XmlReadInState_Initial; reader->dtdmode = DtdProcessing_Prohibit; reader->line = reader->pos = 0; reader->imalloc = imalloc; diff --git a/dlls/xmllite/tests/reader.c b/dlls/xmllite/tests/reader.c index 852f3d42836..ed66dea70e5 100644 --- a/dlls/xmllite/tests/reader.c +++ b/dlls/xmllite/tests/reader.c @@ -497,6 +497,7 @@ static void test_readerinput(void) IStream_Release(stream); /* test input interface selection sequence */ + input = NULL; hr = testinput_createinstance((void**)&input); ok(hr == S_OK, "Expected S_OK, got %08x\n", hr); @@ -688,10 +689,70 @@ todo_wine { ok(hr == S_OK, "got %08x\n", hr); ok(type == XmlNodeType_XmlDeclaration, "got %d\n", type); + type = XmlNodeType_XmlDeclaration; + hr = IXmlReader_Read(reader, &type); + /* newer versions return syntax error here cause document is incomplete, + it makes more sense than invalid char error */ +todo_wine { + ok(hr == WC_E_SYNTAX || broken(hr == WC_E_XMLCHARACTER), "got 0x%08x\n", hr); + ok(type == XmlNodeType_None, "got %d\n", type); +} IStream_Release(stream); IXmlReader_Release(reader); } +static const char xml_comment[] = "\xef\xbb\xbf"; +static const char xml_comment1[] = "\xef\xbb\xbf"; +static const char xml_comment2[] = "\xef\xbb\xbf"; + +static void test_read_comment(void) +{ + HRESULT hr; + IStream *stream; + IXmlReader *reader; + XmlNodeType type; + + hr = pCreateXmlReader(&IID_IXmlReader, (void**)&reader, NULL); + ok(hr == S_OK, "S_OK, got %08x\n", hr); + + stream = create_stream_on_data(xml_comment, sizeof(xml_comment)); + hr = IXmlReader_SetInput(reader, (IUnknown*)stream); + ok(hr == S_OK, "got %08x\n", hr); + + type = XmlNodeType_None; + hr = IXmlReader_Read(reader, &type); +todo_wine { + ok(hr == S_OK, "got %08x\n", hr); + ok(type == XmlNodeType_Comment, "got %d\n", type); +} + IStream_Release(stream); + + stream = create_stream_on_data(xml_comment1, sizeof(xml_comment1)); + hr = IXmlReader_SetInput(reader, (IUnknown*)stream); + ok(hr == S_OK, "got %08x\n", hr); + + type = XmlNodeType_None; + hr = IXmlReader_Read(reader, &type); +todo_wine { + ok(hr == S_OK, "got %08x\n", hr); + ok(type == XmlNodeType_Comment, "got %d\n", type); +} + IStream_Release(stream); + + stream = create_stream_on_data(xml_comment2, sizeof(xml_comment2)); + hr = IXmlReader_SetInput(reader, (IUnknown*)stream); + ok(hr == S_OK, "got %08x\n", hr); + + type = XmlNodeType_None; + hr = IXmlReader_Read(reader, &type); +todo_wine + ok(hr == WC_E_COMMENT || broken(hr == WC_E_GREATERTHAN), "got %08x\n", hr); + ok(type == XmlNodeType_None, "got %d\n", type); + IStream_Release(stream); + + IXmlReader_Release(reader); +} + START_TEST(reader) { HRESULT r; @@ -708,6 +769,7 @@ START_TEST(reader) test_reader_create(); test_readerinput(); test_reader_state(); + test_read_comment(); test_read_xmldeclaration(); CoUninitialize();