From 6874bf3c4ed75c0acb2b1f15fb00278656b44200 Mon Sep 17 00:00:00 2001 From: Nikolay Sivov Date: Wed, 13 Mar 2013 15:04:59 +0400 Subject: [PATCH] msxml3: Process CDATA section before reporting it. --- dlls/msxml3/saxreader.c | 161 +++++++++++++++++++------------ dlls/msxml3/tests/saxreader.c | 175 +++++++++++++++++++++++++--------- 2 files changed, 228 insertions(+), 108 deletions(-) diff --git a/dlls/msxml3/saxreader.c b/dlls/msxml3/saxreader.c index 6c0f2ca7a32..5475e1a408e 100644 --- a/dlls/msxml3/saxreader.c +++ b/dlls/msxml3/saxreader.c @@ -1760,33 +1760,73 @@ static void libxmlFatalError(void *ctx, const char *msg, ...) This->ret = E_FAIL; } -static void libxmlCDataBlock(void *ctx, const xmlChar *value, int len) +/* The only reason this helper exists is that CDATA section are reported by chunks, + newlines are used as delimiter. More than that, reader even alters input data before reporting. + + This helper should be called for substring with trailing newlines. +*/ +static BSTR saxreader_get_cdata_chunk(const xmlChar *str, int len) { - saxlocator *This = ctx; - struct saxcontenthandler_iface *content = saxreader_get_contenthandler(This->saxreader); - struct saxlexicalhandler_iface *lexical = saxreader_get_lexicalhandler(This->saxreader); - HRESULT hr = S_OK; - xmlChar *beg = (xmlChar*)This->pParserCtxt->input->cur-len; - xmlChar *cur, *end; - int realLen; - BSTR Chars; - BOOL lastEvent = FALSE, change; + BSTR bstr = bstr_from_xmlCharN(str, len), ret; + WCHAR *ptr; - update_position(This, FALSE); - while(beg-9>=This->pParserCtxt->input->base - && memcmp(beg-9, "= bstr) + ptr--; + + while (*++ptr) { - if(*beg=='\n' || (*beg=='\r' && *(beg+1)!='\n')) - This->line--; - beg--; + /* replace returns as: + + - "\r" -> "\n" + - "\r\r" -> "\r" + - "\r\n" -> "\n" + */ + if (*ptr == '\r') + { + if (*(ptr+1) == '\r' || *(ptr+1) == '\n') + { + /* shift tail */ + memmove(ptr, ptr+1, len-- - (ptr-bstr)); + } + else + *ptr = '\n'; + } } - This->column = 0; - for(; beg>=This->pParserCtxt->input->base && *beg!='\n' && *beg!='\r'; beg--) - This->column++; - if (saxreader_has_handler(This, SAXLexicalHandler)) + ret = SysAllocStringLen(bstr, len); + SysFreeString(bstr); + return ret; +} + +static HRESULT saxreader_saxcharacters(saxlocator *locator, BSTR chars) +{ + struct saxcontenthandler_iface *content = saxreader_get_contenthandler(locator->saxreader); + HRESULT hr; + + if (!saxreader_has_handler(locator, SAXContentHandler)) return S_OK; + + if (locator->vbInterface) + hr = IVBSAXContentHandler_characters(content->vbhandler, &chars); + else + hr = ISAXContentHandler_characters(content->handler, chars, SysStringLen(chars)); + + return hr; +} + +static void libxml_cdatablock(void *ctx, const xmlChar *value, int len) +{ + const xmlChar *start, *end; + saxlocator *locator = ctx; + struct saxlexicalhandler_iface *lexical = saxreader_get_lexicalhandler(locator->saxreader); + HRESULT hr = S_OK; + BSTR chars; + int i; + + update_position(locator, FALSE); + if (saxreader_has_handler(locator, SAXLexicalHandler)) { - if (This->vbInterface) + if (locator->vbInterface) hr = IVBSAXLexicalHandler_startCDATA(lexical->vbhandler); else hr = ISAXLexicalHandler_startCDATA(lexical->handler); @@ -1794,61 +1834,60 @@ static void libxmlCDataBlock(void *ctx, const xmlChar *value, int len) if(FAILED(hr)) { - format_error_message_from_id(This, hr); + format_error_message_from_id(locator, hr); return; } - realLen = This->pParserCtxt->input->cur-beg-3; - cur = beg; - end = beg; + start = value; + end = NULL; + i = 0; - while(1) + while (i < len) { - while(end-begsaxreader->pool, cur, end-cur+1); - if (This->vbInterface) - hr = IVBSAXContentHandler_characters(content->vbhandler, &Chars); - else - hr = ISAXContentHandler_characters(content->handler, Chars, SysStringLen(Chars)); - } - - if(change) *end = '\r'; - - if(lastEvent) - break; - - This->column += end-cur+2; - end += 2; - cur = end; + i++; + locator->column++; } - if (saxreader_has_handler(This, SAXLexicalHandler)) + /* no newline chars (or last chunk) report as a whole */ + if (!end && start == value) { - if (This->vbInterface) + /* report */ + chars = bstr_from_xmlCharN(start, len-(start-value)); + TRACE("(%s)\n", debugstr_w(chars)); + hr = saxreader_saxcharacters(locator, chars); + SysFreeString(chars); + } + + if (saxreader_has_handler(locator, SAXLexicalHandler)) + { + if (locator->vbInterface) hr = IVBSAXLexicalHandler_endCDATA(lexical->vbhandler); else hr = ISAXLexicalHandler_endCDATA(lexical->handler); } if(FAILED(hr)) - format_error_message_from_id(This, hr); - - This->column += 4+end-cur; + format_error_message_from_id(locator, hr); } static xmlParserInputPtr libxmlresolveentity(void *ctx, const xmlChar *publicid, const xmlChar *systemid) @@ -3259,7 +3298,7 @@ HRESULT SAXXMLReader_create(MSXML_VERSION version, IUnknown *outer, LPVOID *ppOb reader->sax.comment = libxmlComment; reader->sax.error = libxmlFatalError; reader->sax.fatalError = libxmlFatalError; - reader->sax.cdataBlock = libxmlCDataBlock; + reader->sax.cdataBlock = libxml_cdatablock; reader->sax.resolveEntity = libxmlresolveentity; *ppObj = &reader->IVBSAXXMLReader_iface; diff --git a/dlls/msxml3/tests/saxreader.c b/dlls/msxml3/tests/saxreader.c index da662bb9249..df0aa47a45d 100644 --- a/dlls/msxml3/tests/saxreader.c +++ b/dlls/msxml3/tests/saxreader.c @@ -578,7 +578,14 @@ static const char test_attributes[] = static const char test_cdata_xml[] = "" -""; +""; + +static const char test2_cdata_xml[] = +"" +""; + +static const char test3_cdata_xml[] = +""; static struct call_entry content_handler_test1[] = { { CH_PUTDOCUMENTLOCATOR, 0, 0, S_OK }, @@ -911,9 +918,36 @@ static struct call_entry cdata_test[] = { { LH_STARTCDATA, 1, 35, S_OK }, { CH_CHARACTERS, 1, 35, S_OK, "Some \n" }, { CH_CHARACTERS, 1, 42, S_OK, "text\n\n" }, - { CH_CHARACTERS, 4, 1, S_OK, "data\n" }, - { LH_ENDCDATA, 4, 1, S_OK }, - { CH_ENDELEMENT, 4, 6, S_OK, "", "a", "a" }, + { CH_CHARACTERS, 1, 49, S_OK, "data\n\n" }, + { LH_ENDCDATA, 1, 49, S_OK }, + { CH_ENDELEMENT, 6, 6, S_OK, "", "a", "a" }, + { CH_ENDDOCUMENT, 0, 0, S_OK }, + { CH_ENDTEST } +}; + +static struct call_entry cdata_test2[] = { + { CH_PUTDOCUMENTLOCATOR, 0, 0, S_OK }, + { CH_STARTDOCUMENT, 0, 0, S_OK }, + { CH_STARTELEMENT, 1, 26, S_OK, "", "a", "a" }, + { LH_STARTCDATA, 1, 35, S_OK }, + { CH_CHARACTERS, 1, 35, S_OK, "\n\n" }, + { CH_CHARACTERS, 1, 38, S_OK, "Some \n" }, + { CH_CHARACTERS, 1, 45, S_OK, "text\n\n" }, + { CH_CHARACTERS, 1, 52, S_OK, "data\n\n" }, + { LH_ENDCDATA, 1, 52, S_OK }, + { CH_ENDELEMENT, 8, 6, S_OK, "", "a", "a" }, + { CH_ENDDOCUMENT, 0, 0, S_OK }, + { CH_ENDTEST } +}; + +static struct call_entry cdata_test3[] = { + { CH_PUTDOCUMENTLOCATOR, 0, 0, S_OK }, + { CH_STARTDOCUMENT, 0, 0, S_OK }, + { CH_STARTELEMENT, 1, 26, S_OK, "", "a", "a" }, + { LH_STARTCDATA, 1, 35, S_OK }, + { CH_CHARACTERS, 1, 35, S_OK, "Some text data" }, + { LH_ENDCDATA, 1, 35, S_OK }, + { CH_ENDELEMENT, 1, 54, S_OK, "", "a", "a" }, { CH_ENDDOCUMENT, 0, 0, S_OK }, { CH_ENDTEST } }; @@ -928,10 +962,40 @@ static struct call_entry cdata_test_alt[] = { { CH_CHARACTERS, 2, 0, S_OK, "\n" }, { CH_CHARACTERS, 3, 1, S_OK, "text\n" }, { CH_CHARACTERS, 4, 0, S_OK, "\n" }, - { CH_CHARACTERS, 5, 3, S_OK, "data\n" }, - { LH_ENDCDATA, 5, 3, S_OK }, - { CH_ENDELEMENT, 5, 7, S_OK, "", "a", "a" }, - { CH_ENDDOCUMENT, 5, 7, S_OK }, + { CH_CHARACTERS, 6, 3, S_OK, "data\n\n" }, + { LH_ENDCDATA, 6, 3, S_OK }, + { CH_ENDELEMENT, 6, 7, S_OK, "", "a", "a" }, + { CH_ENDDOCUMENT, 6, 7, S_OK }, + { CH_ENDTEST } +}; + +static struct call_entry cdata_test2_alt[] = { + { CH_PUTDOCUMENTLOCATOR, 1, 0, S_OK }, + { CH_STARTDOCUMENT, 1, 22, S_OK }, + { CH_STARTELEMENT, 1, 25, S_OK, "", "a", "a" }, + { LH_STARTCDATA, 1, 34, S_OK }, + { CH_CHARACTERS, 2, 1, S_OK, "\n" }, + { CH_CHARACTERS, 3, 0, S_OK, "\n" }, + { CH_CHARACTERS, 3, 6, S_OK, "Some " }, + { CH_CHARACTERS, 4, 0, S_OK, "\n" }, + { CH_CHARACTERS, 5, 1, S_OK, "text\n" }, + { CH_CHARACTERS, 6, 0, S_OK, "\n" }, + { CH_CHARACTERS, 8, 3, S_OK, "data\n\n" }, + { LH_ENDCDATA, 8, 3, S_OK }, + { CH_ENDELEMENT, 8, 7, S_OK, "", "a", "a" }, + { CH_ENDDOCUMENT, 8, 7, S_OK }, + { CH_ENDTEST } +}; + +static struct call_entry cdata_test3_alt[] = { + { CH_PUTDOCUMENTLOCATOR, 1, 0, S_OK }, + { CH_STARTDOCUMENT, 1, 22, S_OK }, + { CH_STARTELEMENT, 1, 25, S_OK, "", "a", "a" }, + { LH_STARTCDATA, 1, 34, S_OK }, + { CH_CHARACTERS, 1, 51, S_OK, "Some text data" }, + { LH_ENDCDATA, 1, 51, S_OK }, + { CH_ENDELEMENT, 1, 55, S_OK, "", "a", "a" }, + { CH_ENDDOCUMENT, 1, 55, S_OK }, { CH_ENDTEST } }; @@ -1934,6 +1998,24 @@ static struct msxmlsupported_data_t reader_support_data[] = static struct saxlexicalhandler lexicalhandler; static struct saxdeclhandler declhandler; +static IStream *create_test_stream(const char *data, int len) +{ + ULARGE_INTEGER size; + LARGE_INTEGER pos; + IStream *stream; + ULONG written; + + if (len == -1) len = strlen(data); + CreateStreamOnHGlobal(NULL, TRUE, &stream); + size.QuadPart = len; + IStream_SetSize(stream, size); + IStream_Write(stream, data, len, &written); + pos.QuadPart = 0; + IStream_Seek(stream, pos, STREAM_SEEK_SET, NULL); + + return stream; +} + static void test_saxreader(void) { const struct msxmlsupported_data_t *table = reader_support_data; @@ -1946,8 +2028,6 @@ static void test_saxreader(void) SAFEARRAYBOUND SADim[1]; char *ptr = NULL; IStream *stream; - ULARGE_INTEGER size; - LARGE_INTEGER pos; ULONG written; HANDLE file; static const CHAR testXmlA[] = "test.xml"; @@ -2041,12 +2121,7 @@ static void test_saxreader(void) SafeArrayDestroy(sa); - CreateStreamOnHGlobal(NULL, TRUE, &stream); - size.QuadPart = strlen(testXML); - IStream_SetSize(stream, size); - IStream_Write(stream, testXML, strlen(testXML), &written); - pos.QuadPart = 0; - IStream_Seek(stream, pos, STREAM_SEEK_SET, NULL); + stream = create_test_stream(testXML, -1); V_VT(&var) = VT_UNKNOWN; V_UNKNOWN(&var) = (IUnknown*)stream; @@ -2057,12 +2132,7 @@ static void test_saxreader(void) IStream_Release(stream); - CreateStreamOnHGlobal(NULL, TRUE, &stream); - size.QuadPart = strlen(test_attributes); - IStream_SetSize(stream, size); - IStream_Write(stream, test_attributes, strlen(test_attributes), &written); - pos.QuadPart = 0; - IStream_Seek(stream, pos, STREAM_SEEK_SET, NULL); + stream = create_test_stream(test_attributes, -1); V_VT(&var) = VT_UNKNOWN; V_UNKNOWN(&var) = (IUnknown*)stream; @@ -2200,12 +2270,7 @@ static void test_saxreader(void) hr = ISAXXMLReader_putFeature(reader, _bstr_("http://xml.org/sax/features/namespaces"), VARIANT_FALSE); EXPECT_HR(hr, S_OK); - CreateStreamOnHGlobal(NULL, TRUE, &stream); - size.QuadPart = strlen(test_attributes); - IStream_SetSize(stream, size); - IStream_Write(stream, test_attributes, strlen(test_attributes), &written); - pos.QuadPart = 0; - IStream_Seek(stream, pos, STREAM_SEEK_SET, NULL); + stream = create_test_stream(test_attributes, -1); V_VT(&var) = VT_UNKNOWN; V_UNKNOWN(&var) = (IUnknown*)stream; @@ -2228,12 +2293,7 @@ static void test_saxreader(void) hr = ISAXXMLReader_putFeature(reader, _bstr_("http://xml.org/sax/features/namespace-prefixes"), VARIANT_FALSE); EXPECT_HR(hr, S_OK); - CreateStreamOnHGlobal(NULL, TRUE, &stream); - size.QuadPart = strlen(test_attributes); - IStream_SetSize(stream, size); - IStream_Write(stream, test_attributes, strlen(test_attributes), &written); - pos.QuadPart = 0; - IStream_Seek(stream, pos, STREAM_SEEK_SET, NULL); + stream = create_test_stream(test_attributes, -1); V_VT(&var) = VT_UNKNOWN; V_UNKNOWN(&var) = (IUnknown*)stream; @@ -2254,12 +2314,7 @@ static void test_saxreader(void) EXPECT_HR(hr, S_OK); /* attribute normalization */ - CreateStreamOnHGlobal(NULL, TRUE, &stream); - size.QuadPart = strlen(attribute_normalize); - IStream_SetSize(stream, size); - IStream_Write(stream, attribute_normalize, strlen(attribute_normalize), &written); - pos.QuadPart = 0; - IStream_Seek(stream, pos, STREAM_SEEK_SET, NULL); + stream = create_test_stream(attribute_normalize, -1); V_VT(&var) = VT_UNKNOWN; V_UNKNOWN(&var) = (IUnknown*)stream; @@ -2285,13 +2340,6 @@ static void test_saxreader(void) ok(hr == S_OK || broken(hr == E_FAIL), "got 0x%08x\n", hr); /* CDATA sections */ - CreateStreamOnHGlobal(NULL, TRUE, &stream); - size.QuadPart = strlen(test_cdata_xml); - IStream_SetSize(stream, size); - IStream_Write(stream, test_cdata_xml, strlen(test_cdata_xml), &written); - pos.QuadPart = 0; - IStream_Seek(stream, pos, STREAM_SEEK_SET, NULL); - init_saxlexicalhandler(&lexicalhandler, S_OK); V_VT(&var) = VT_UNKNOWN; @@ -2299,6 +2347,7 @@ static void test_saxreader(void) hr = ISAXXMLReader_putProperty(reader, _bstr_("http://xml.org/sax/properties/lexical-handler"), var); ok(hr == S_OK, "got 0x%08x\n", hr); + stream = create_test_stream(test_cdata_xml, -1); V_VT(&var) = VT_UNKNOWN; V_UNKNOWN(&var) = (IUnknown*)stream; @@ -2312,6 +2361,38 @@ static void test_saxreader(void) ok(hr == S_OK, "got 0x%08x\n", hr); ok_sequence(sequences, CONTENT_HANDLER_INDEX, test_seq, "cdata test", TRUE); + /* 2. CDATA sections */ + stream = create_test_stream(test2_cdata_xml, -1); + V_VT(&var) = VT_UNKNOWN; + V_UNKNOWN(&var) = (IUnknown*)stream; + + if (IsEqualGUID(table->clsid, &CLSID_SAXXMLReader60)) + test_seq = cdata_test2_alt; + else + test_seq = cdata_test2; + + set_expected_seq(test_seq); + hr = ISAXXMLReader_parse(reader, var); + ok(hr == S_OK, "got 0x%08x\n", hr); + ok_sequence(sequences, CONTENT_HANDLER_INDEX, test_seq, "cdata test 2", TRUE); + + IStream_Release(stream); + + /* 3. CDATA sections */ + stream = create_test_stream(test3_cdata_xml, -1); + V_VT(&var) = VT_UNKNOWN; + V_UNKNOWN(&var) = (IUnknown*)stream; + + if (IsEqualGUID(table->clsid, &CLSID_SAXXMLReader60)) + test_seq = cdata_test3_alt; + else + test_seq = cdata_test3; + + set_expected_seq(test_seq); + hr = ISAXXMLReader_parse(reader, var); + ok(hr == S_OK, "got 0x%08x\n", hr); + ok_sequence(sequences, CONTENT_HANDLER_INDEX, test_seq, "cdata test 3", TRUE); + IStream_Release(stream); ISAXXMLReader_Release(reader);