msxml3: Process CDATA section before reporting it.

This commit is contained in:
Nikolay Sivov 2013-03-13 15:04:59 +04:00 committed by Alexandre Julliard
parent 33aa647e64
commit 6874bf3c4e
2 changed files with 228 additions and 108 deletions

View File

@ -1760,33 +1760,73 @@ static void libxmlFatalError(void *ctx, const char *msg, ...)
This->ret = E_FAIL; This->ret = E_FAIL;
} }
static void libxmlCDataBlock(void *ctx, const xmlChar *value, int len) /* The only reason this helper exists is that CDATA section are reported by chunks,
{ newlines are used as delimiter. More than that, reader even alters input data before reporting.
saxlocator *This = ctx;
struct saxcontenthandler_iface *content = saxreader_get_contenthandler(This->saxreader);
struct saxlexicalhandler_iface *lexical = saxreader_get_lexicalhandler(This->saxreader);
HRESULT hr = S_OK;
xmlChar *beg = (xmlChar*)This->pParserCtxt->input->cur-len;
xmlChar *cur, *end;
int realLen;
BSTR Chars;
BOOL lastEvent = FALSE, change;
update_position(This, FALSE); This helper should be called for substring with trailing newlines.
while(beg-9>=This->pParserCtxt->input->base */
&& memcmp(beg-9, "<![CDATA[", sizeof(char[9]))) static BSTR saxreader_get_cdata_chunk(const xmlChar *str, int len)
{ {
if(*beg=='\n' || (*beg=='\r' && *(beg+1)!='\n')) BSTR bstr = bstr_from_xmlCharN(str, len), ret;
This->line--; WCHAR *ptr;
beg--;
ptr = bstr + len - 1;
while ((*ptr == '\r' || *ptr == '\n') && ptr >= bstr)
ptr--;
while (*++ptr)
{
/* replace returns as:
- "\r<char>" -> "\n<char>"
- "\r\r" -> "\r"
- "\r\n" -> "\n"
*/
if (*ptr == '\r')
{
if (*(ptr+1) == '\r' || *(ptr+1) == '\n')
{
/* shift tail */
memmove(ptr, ptr+1, len-- - (ptr-bstr));
}
else
*ptr = '\n';
}
} }
This->column = 0;
for(; beg>=This->pParserCtxt->input->base && *beg!='\n' && *beg!='\r'; beg--)
This->column++;
if (saxreader_has_handler(This, SAXLexicalHandler)) ret = SysAllocStringLen(bstr, len);
SysFreeString(bstr);
return ret;
}
static HRESULT saxreader_saxcharacters(saxlocator *locator, BSTR chars)
{ {
if (This->vbInterface) struct saxcontenthandler_iface *content = saxreader_get_contenthandler(locator->saxreader);
HRESULT hr;
if (!saxreader_has_handler(locator, SAXContentHandler)) return S_OK;
if (locator->vbInterface)
hr = IVBSAXContentHandler_characters(content->vbhandler, &chars);
else
hr = ISAXContentHandler_characters(content->handler, chars, SysStringLen(chars));
return hr;
}
static void libxml_cdatablock(void *ctx, const xmlChar *value, int len)
{
const xmlChar *start, *end;
saxlocator *locator = ctx;
struct saxlexicalhandler_iface *lexical = saxreader_get_lexicalhandler(locator->saxreader);
HRESULT hr = S_OK;
BSTR chars;
int i;
update_position(locator, FALSE);
if (saxreader_has_handler(locator, SAXLexicalHandler))
{
if (locator->vbInterface)
hr = IVBSAXLexicalHandler_startCDATA(lexical->vbhandler); hr = IVBSAXLexicalHandler_startCDATA(lexical->vbhandler);
else else
hr = ISAXLexicalHandler_startCDATA(lexical->handler); hr = ISAXLexicalHandler_startCDATA(lexical->handler);
@ -1794,61 +1834,60 @@ static void libxmlCDataBlock(void *ctx, const xmlChar *value, int len)
if(FAILED(hr)) if(FAILED(hr))
{ {
format_error_message_from_id(This, hr); format_error_message_from_id(locator, hr);
return; return;
} }
realLen = This->pParserCtxt->input->cur-beg-3; start = value;
cur = beg; end = NULL;
end = beg; i = 0;
while(1) while (i < len)
{ {
while(end-beg<realLen && *end!='\r') end++; /* scan for newlines */
if(end-beg==realLen) if (value[i] == '\r' || value[i] == '\n')
{ {
end--; /* skip newlines/linefeeds */
lastEvent = TRUE; while (i < len)
{
if (value[i] != '\r' && value[i] != '\n') break;
i++;
} }
else if(end-beg==realLen-1 && *end=='\r' && *(end+1)=='\n') end = &value[i];
lastEvent = TRUE;
if(*end == '\r') change = TRUE; /* report */
else change = FALSE; chars = saxreader_get_cdata_chunk(start, end-start);
TRACE("(chunk %s)\n", debugstr_w(chars));
hr = saxreader_saxcharacters(locator, chars);
SysFreeString(chars);
if(change) *end = '\n'; start = &value[i];
end = NULL;
if (saxreader_has_handler(This, SAXContentHandler)) }
{ i++;
Chars = pooled_bstr_from_xmlCharN(&This->saxreader->pool, cur, end-cur+1); locator->column++;
if (This->vbInterface)
hr = IVBSAXContentHandler_characters(content->vbhandler, &Chars);
else
hr = ISAXContentHandler_characters(content->handler, Chars, SysStringLen(Chars));
} }
if(change) *end = '\r'; /* no newline chars (or last chunk) report as a whole */
if (!end && start == value)
if(lastEvent) {
break; /* report */
chars = bstr_from_xmlCharN(start, len-(start-value));
This->column += end-cur+2; TRACE("(%s)\n", debugstr_w(chars));
end += 2; hr = saxreader_saxcharacters(locator, chars);
cur = end; SysFreeString(chars);
} }
if (saxreader_has_handler(This, SAXLexicalHandler)) if (saxreader_has_handler(locator, SAXLexicalHandler))
{ {
if (This->vbInterface) if (locator->vbInterface)
hr = IVBSAXLexicalHandler_endCDATA(lexical->vbhandler); hr = IVBSAXLexicalHandler_endCDATA(lexical->vbhandler);
else else
hr = ISAXLexicalHandler_endCDATA(lexical->handler); hr = ISAXLexicalHandler_endCDATA(lexical->handler);
} }
if(FAILED(hr)) if(FAILED(hr))
format_error_message_from_id(This, hr); format_error_message_from_id(locator, hr);
This->column += 4+end-cur;
} }
static xmlParserInputPtr libxmlresolveentity(void *ctx, const xmlChar *publicid, const xmlChar *systemid) static xmlParserInputPtr libxmlresolveentity(void *ctx, const xmlChar *publicid, const xmlChar *systemid)
@ -3259,7 +3298,7 @@ HRESULT SAXXMLReader_create(MSXML_VERSION version, IUnknown *outer, LPVOID *ppOb
reader->sax.comment = libxmlComment; reader->sax.comment = libxmlComment;
reader->sax.error = libxmlFatalError; reader->sax.error = libxmlFatalError;
reader->sax.fatalError = libxmlFatalError; reader->sax.fatalError = libxmlFatalError;
reader->sax.cdataBlock = libxmlCDataBlock; reader->sax.cdataBlock = libxml_cdatablock;
reader->sax.resolveEntity = libxmlresolveentity; reader->sax.resolveEntity = libxmlresolveentity;
*ppObj = &reader->IVBSAXXMLReader_iface; *ppObj = &reader->IVBSAXXMLReader_iface;

View File

@ -578,7 +578,14 @@ static const char test_attributes[] =
static const char test_cdata_xml[] = static const char test_cdata_xml[] =
"<?xml version=\"1.0\" ?>" "<?xml version=\"1.0\" ?>"
"<a><![CDATA[Some \r\ntext\n\rdata\n]]></a>"; "<a><![CDATA[Some \r\ntext\n\r\ndata\n\n]]></a>";
static const char test2_cdata_xml[] =
"<?xml version=\"1.0\" ?>"
"<a><![CDATA[\n\r\nSome \r\ntext\n\r\ndata\n\n]]></a>";
static const char test3_cdata_xml[] =
"<?xml version=\"1.0\" ?><a><![CDATA[Some text data]]></a>";
static struct call_entry content_handler_test1[] = { static struct call_entry content_handler_test1[] = {
{ CH_PUTDOCUMENTLOCATOR, 0, 0, S_OK }, { CH_PUTDOCUMENTLOCATOR, 0, 0, S_OK },
@ -911,9 +918,36 @@ static struct call_entry cdata_test[] = {
{ LH_STARTCDATA, 1, 35, S_OK }, { LH_STARTCDATA, 1, 35, S_OK },
{ CH_CHARACTERS, 1, 35, S_OK, "Some \n" }, { CH_CHARACTERS, 1, 35, S_OK, "Some \n" },
{ CH_CHARACTERS, 1, 42, S_OK, "text\n\n" }, { CH_CHARACTERS, 1, 42, S_OK, "text\n\n" },
{ CH_CHARACTERS, 4, 1, S_OK, "data\n" }, { CH_CHARACTERS, 1, 49, S_OK, "data\n\n" },
{ LH_ENDCDATA, 4, 1, S_OK }, { LH_ENDCDATA, 1, 49, S_OK },
{ CH_ENDELEMENT, 4, 6, S_OK, "", "a", "a" }, { CH_ENDELEMENT, 6, 6, S_OK, "", "a", "a" },
{ CH_ENDDOCUMENT, 0, 0, S_OK },
{ CH_ENDTEST }
};
static struct call_entry cdata_test2[] = {
{ CH_PUTDOCUMENTLOCATOR, 0, 0, S_OK },
{ CH_STARTDOCUMENT, 0, 0, S_OK },
{ CH_STARTELEMENT, 1, 26, S_OK, "", "a", "a" },
{ LH_STARTCDATA, 1, 35, S_OK },
{ CH_CHARACTERS, 1, 35, S_OK, "\n\n" },
{ CH_CHARACTERS, 1, 38, S_OK, "Some \n" },
{ CH_CHARACTERS, 1, 45, S_OK, "text\n\n" },
{ CH_CHARACTERS, 1, 52, S_OK, "data\n\n" },
{ LH_ENDCDATA, 1, 52, S_OK },
{ CH_ENDELEMENT, 8, 6, S_OK, "", "a", "a" },
{ CH_ENDDOCUMENT, 0, 0, S_OK },
{ CH_ENDTEST }
};
static struct call_entry cdata_test3[] = {
{ CH_PUTDOCUMENTLOCATOR, 0, 0, S_OK },
{ CH_STARTDOCUMENT, 0, 0, S_OK },
{ CH_STARTELEMENT, 1, 26, S_OK, "", "a", "a" },
{ LH_STARTCDATA, 1, 35, S_OK },
{ CH_CHARACTERS, 1, 35, S_OK, "Some text data" },
{ LH_ENDCDATA, 1, 35, S_OK },
{ CH_ENDELEMENT, 1, 54, S_OK, "", "a", "a" },
{ CH_ENDDOCUMENT, 0, 0, S_OK }, { CH_ENDDOCUMENT, 0, 0, S_OK },
{ CH_ENDTEST } { CH_ENDTEST }
}; };
@ -928,10 +962,40 @@ static struct call_entry cdata_test_alt[] = {
{ CH_CHARACTERS, 2, 0, S_OK, "\n" }, { CH_CHARACTERS, 2, 0, S_OK, "\n" },
{ CH_CHARACTERS, 3, 1, S_OK, "text\n" }, { CH_CHARACTERS, 3, 1, S_OK, "text\n" },
{ CH_CHARACTERS, 4, 0, S_OK, "\n" }, { CH_CHARACTERS, 4, 0, S_OK, "\n" },
{ CH_CHARACTERS, 5, 3, S_OK, "data\n" }, { CH_CHARACTERS, 6, 3, S_OK, "data\n\n" },
{ LH_ENDCDATA, 5, 3, S_OK }, { LH_ENDCDATA, 6, 3, S_OK },
{ CH_ENDELEMENT, 5, 7, S_OK, "", "a", "a" }, { CH_ENDELEMENT, 6, 7, S_OK, "", "a", "a" },
{ CH_ENDDOCUMENT, 5, 7, S_OK }, { CH_ENDDOCUMENT, 6, 7, S_OK },
{ CH_ENDTEST }
};
static struct call_entry cdata_test2_alt[] = {
{ CH_PUTDOCUMENTLOCATOR, 1, 0, S_OK },
{ CH_STARTDOCUMENT, 1, 22, S_OK },
{ CH_STARTELEMENT, 1, 25, S_OK, "", "a", "a" },
{ LH_STARTCDATA, 1, 34, S_OK },
{ CH_CHARACTERS, 2, 1, S_OK, "\n" },
{ CH_CHARACTERS, 3, 0, S_OK, "\n" },
{ CH_CHARACTERS, 3, 6, S_OK, "Some " },
{ CH_CHARACTERS, 4, 0, S_OK, "\n" },
{ CH_CHARACTERS, 5, 1, S_OK, "text\n" },
{ CH_CHARACTERS, 6, 0, S_OK, "\n" },
{ CH_CHARACTERS, 8, 3, S_OK, "data\n\n" },
{ LH_ENDCDATA, 8, 3, S_OK },
{ CH_ENDELEMENT, 8, 7, S_OK, "", "a", "a" },
{ CH_ENDDOCUMENT, 8, 7, S_OK },
{ CH_ENDTEST }
};
static struct call_entry cdata_test3_alt[] = {
{ CH_PUTDOCUMENTLOCATOR, 1, 0, S_OK },
{ CH_STARTDOCUMENT, 1, 22, S_OK },
{ CH_STARTELEMENT, 1, 25, S_OK, "", "a", "a" },
{ LH_STARTCDATA, 1, 34, S_OK },
{ CH_CHARACTERS, 1, 51, S_OK, "Some text data" },
{ LH_ENDCDATA, 1, 51, S_OK },
{ CH_ENDELEMENT, 1, 55, S_OK, "", "a", "a" },
{ CH_ENDDOCUMENT, 1, 55, S_OK },
{ CH_ENDTEST } { CH_ENDTEST }
}; };
@ -1934,6 +1998,24 @@ static struct msxmlsupported_data_t reader_support_data[] =
static struct saxlexicalhandler lexicalhandler; static struct saxlexicalhandler lexicalhandler;
static struct saxdeclhandler declhandler; static struct saxdeclhandler declhandler;
static IStream *create_test_stream(const char *data, int len)
{
ULARGE_INTEGER size;
LARGE_INTEGER pos;
IStream *stream;
ULONG written;
if (len == -1) len = strlen(data);
CreateStreamOnHGlobal(NULL, TRUE, &stream);
size.QuadPart = len;
IStream_SetSize(stream, size);
IStream_Write(stream, data, len, &written);
pos.QuadPart = 0;
IStream_Seek(stream, pos, STREAM_SEEK_SET, NULL);
return stream;
}
static void test_saxreader(void) static void test_saxreader(void)
{ {
const struct msxmlsupported_data_t *table = reader_support_data; const struct msxmlsupported_data_t *table = reader_support_data;
@ -1946,8 +2028,6 @@ static void test_saxreader(void)
SAFEARRAYBOUND SADim[1]; SAFEARRAYBOUND SADim[1];
char *ptr = NULL; char *ptr = NULL;
IStream *stream; IStream *stream;
ULARGE_INTEGER size;
LARGE_INTEGER pos;
ULONG written; ULONG written;
HANDLE file; HANDLE file;
static const CHAR testXmlA[] = "test.xml"; static const CHAR testXmlA[] = "test.xml";
@ -2041,12 +2121,7 @@ static void test_saxreader(void)
SafeArrayDestroy(sa); SafeArrayDestroy(sa);
CreateStreamOnHGlobal(NULL, TRUE, &stream); stream = create_test_stream(testXML, -1);
size.QuadPart = strlen(testXML);
IStream_SetSize(stream, size);
IStream_Write(stream, testXML, strlen(testXML), &written);
pos.QuadPart = 0;
IStream_Seek(stream, pos, STREAM_SEEK_SET, NULL);
V_VT(&var) = VT_UNKNOWN; V_VT(&var) = VT_UNKNOWN;
V_UNKNOWN(&var) = (IUnknown*)stream; V_UNKNOWN(&var) = (IUnknown*)stream;
@ -2057,12 +2132,7 @@ static void test_saxreader(void)
IStream_Release(stream); IStream_Release(stream);
CreateStreamOnHGlobal(NULL, TRUE, &stream); stream = create_test_stream(test_attributes, -1);
size.QuadPart = strlen(test_attributes);
IStream_SetSize(stream, size);
IStream_Write(stream, test_attributes, strlen(test_attributes), &written);
pos.QuadPart = 0;
IStream_Seek(stream, pos, STREAM_SEEK_SET, NULL);
V_VT(&var) = VT_UNKNOWN; V_VT(&var) = VT_UNKNOWN;
V_UNKNOWN(&var) = (IUnknown*)stream; V_UNKNOWN(&var) = (IUnknown*)stream;
@ -2200,12 +2270,7 @@ static void test_saxreader(void)
hr = ISAXXMLReader_putFeature(reader, _bstr_("http://xml.org/sax/features/namespaces"), VARIANT_FALSE); hr = ISAXXMLReader_putFeature(reader, _bstr_("http://xml.org/sax/features/namespaces"), VARIANT_FALSE);
EXPECT_HR(hr, S_OK); EXPECT_HR(hr, S_OK);
CreateStreamOnHGlobal(NULL, TRUE, &stream); stream = create_test_stream(test_attributes, -1);
size.QuadPart = strlen(test_attributes);
IStream_SetSize(stream, size);
IStream_Write(stream, test_attributes, strlen(test_attributes), &written);
pos.QuadPart = 0;
IStream_Seek(stream, pos, STREAM_SEEK_SET, NULL);
V_VT(&var) = VT_UNKNOWN; V_VT(&var) = VT_UNKNOWN;
V_UNKNOWN(&var) = (IUnknown*)stream; V_UNKNOWN(&var) = (IUnknown*)stream;
@ -2228,12 +2293,7 @@ static void test_saxreader(void)
hr = ISAXXMLReader_putFeature(reader, _bstr_("http://xml.org/sax/features/namespace-prefixes"), VARIANT_FALSE); hr = ISAXXMLReader_putFeature(reader, _bstr_("http://xml.org/sax/features/namespace-prefixes"), VARIANT_FALSE);
EXPECT_HR(hr, S_OK); EXPECT_HR(hr, S_OK);
CreateStreamOnHGlobal(NULL, TRUE, &stream); stream = create_test_stream(test_attributes, -1);
size.QuadPart = strlen(test_attributes);
IStream_SetSize(stream, size);
IStream_Write(stream, test_attributes, strlen(test_attributes), &written);
pos.QuadPart = 0;
IStream_Seek(stream, pos, STREAM_SEEK_SET, NULL);
V_VT(&var) = VT_UNKNOWN; V_VT(&var) = VT_UNKNOWN;
V_UNKNOWN(&var) = (IUnknown*)stream; V_UNKNOWN(&var) = (IUnknown*)stream;
@ -2254,12 +2314,7 @@ static void test_saxreader(void)
EXPECT_HR(hr, S_OK); EXPECT_HR(hr, S_OK);
/* attribute normalization */ /* attribute normalization */
CreateStreamOnHGlobal(NULL, TRUE, &stream); stream = create_test_stream(attribute_normalize, -1);
size.QuadPart = strlen(attribute_normalize);
IStream_SetSize(stream, size);
IStream_Write(stream, attribute_normalize, strlen(attribute_normalize), &written);
pos.QuadPart = 0;
IStream_Seek(stream, pos, STREAM_SEEK_SET, NULL);
V_VT(&var) = VT_UNKNOWN; V_VT(&var) = VT_UNKNOWN;
V_UNKNOWN(&var) = (IUnknown*)stream; V_UNKNOWN(&var) = (IUnknown*)stream;
@ -2285,13 +2340,6 @@ static void test_saxreader(void)
ok(hr == S_OK || broken(hr == E_FAIL), "got 0x%08x\n", hr); ok(hr == S_OK || broken(hr == E_FAIL), "got 0x%08x\n", hr);
/* CDATA sections */ /* CDATA sections */
CreateStreamOnHGlobal(NULL, TRUE, &stream);
size.QuadPart = strlen(test_cdata_xml);
IStream_SetSize(stream, size);
IStream_Write(stream, test_cdata_xml, strlen(test_cdata_xml), &written);
pos.QuadPart = 0;
IStream_Seek(stream, pos, STREAM_SEEK_SET, NULL);
init_saxlexicalhandler(&lexicalhandler, S_OK); init_saxlexicalhandler(&lexicalhandler, S_OK);
V_VT(&var) = VT_UNKNOWN; V_VT(&var) = VT_UNKNOWN;
@ -2299,6 +2347,7 @@ static void test_saxreader(void)
hr = ISAXXMLReader_putProperty(reader, _bstr_("http://xml.org/sax/properties/lexical-handler"), var); hr = ISAXXMLReader_putProperty(reader, _bstr_("http://xml.org/sax/properties/lexical-handler"), var);
ok(hr == S_OK, "got 0x%08x\n", hr); ok(hr == S_OK, "got 0x%08x\n", hr);
stream = create_test_stream(test_cdata_xml, -1);
V_VT(&var) = VT_UNKNOWN; V_VT(&var) = VT_UNKNOWN;
V_UNKNOWN(&var) = (IUnknown*)stream; V_UNKNOWN(&var) = (IUnknown*)stream;
@ -2312,6 +2361,38 @@ static void test_saxreader(void)
ok(hr == S_OK, "got 0x%08x\n", hr); ok(hr == S_OK, "got 0x%08x\n", hr);
ok_sequence(sequences, CONTENT_HANDLER_INDEX, test_seq, "cdata test", TRUE); ok_sequence(sequences, CONTENT_HANDLER_INDEX, test_seq, "cdata test", TRUE);
/* 2. CDATA sections */
stream = create_test_stream(test2_cdata_xml, -1);
V_VT(&var) = VT_UNKNOWN;
V_UNKNOWN(&var) = (IUnknown*)stream;
if (IsEqualGUID(table->clsid, &CLSID_SAXXMLReader60))
test_seq = cdata_test2_alt;
else
test_seq = cdata_test2;
set_expected_seq(test_seq);
hr = ISAXXMLReader_parse(reader, var);
ok(hr == S_OK, "got 0x%08x\n", hr);
ok_sequence(sequences, CONTENT_HANDLER_INDEX, test_seq, "cdata test 2", TRUE);
IStream_Release(stream);
/* 3. CDATA sections */
stream = create_test_stream(test3_cdata_xml, -1);
V_VT(&var) = VT_UNKNOWN;
V_UNKNOWN(&var) = (IUnknown*)stream;
if (IsEqualGUID(table->clsid, &CLSID_SAXXMLReader60))
test_seq = cdata_test3_alt;
else
test_seq = cdata_test3;
set_expected_seq(test_seq);
hr = ISAXXMLReader_parse(reader, var);
ok(hr == S_OK, "got 0x%08x\n", hr);
ok_sequence(sequences, CONTENT_HANDLER_INDEX, test_seq, "cdata test 3", TRUE);
IStream_Release(stream); IStream_Release(stream);
ISAXXMLReader_Release(reader); ISAXXMLReader_Release(reader);