msxml3: Process CDATA section before reporting it.

This commit is contained in:
Nikolay Sivov 2013-03-13 15:04:59 +04:00 committed by Alexandre Julliard
parent 33aa647e64
commit 6874bf3c4e
2 changed files with 228 additions and 108 deletions

View File

@ -1760,33 +1760,73 @@ static void libxmlFatalError(void *ctx, const char *msg, ...)
This->ret = E_FAIL;
}
static void libxmlCDataBlock(void *ctx, const xmlChar *value, int len)
/* The only reason this helper exists is that CDATA section are reported by chunks,
newlines are used as delimiter. More than that, reader even alters input data before reporting.
This helper should be called for substring with trailing newlines.
*/
static BSTR saxreader_get_cdata_chunk(const xmlChar *str, int len)
{
saxlocator *This = ctx;
struct saxcontenthandler_iface *content = saxreader_get_contenthandler(This->saxreader);
struct saxlexicalhandler_iface *lexical = saxreader_get_lexicalhandler(This->saxreader);
HRESULT hr = S_OK;
xmlChar *beg = (xmlChar*)This->pParserCtxt->input->cur-len;
xmlChar *cur, *end;
int realLen;
BSTR Chars;
BOOL lastEvent = FALSE, change;
BSTR bstr = bstr_from_xmlCharN(str, len), ret;
WCHAR *ptr;
update_position(This, FALSE);
while(beg-9>=This->pParserCtxt->input->base
&& memcmp(beg-9, "<![CDATA[", sizeof(char[9])))
ptr = bstr + len - 1;
while ((*ptr == '\r' || *ptr == '\n') && ptr >= bstr)
ptr--;
while (*++ptr)
{
if(*beg=='\n' || (*beg=='\r' && *(beg+1)!='\n'))
This->line--;
beg--;
/* replace returns as:
- "\r<char>" -> "\n<char>"
- "\r\r" -> "\r"
- "\r\n" -> "\n"
*/
if (*ptr == '\r')
{
if (*(ptr+1) == '\r' || *(ptr+1) == '\n')
{
/* shift tail */
memmove(ptr, ptr+1, len-- - (ptr-bstr));
}
else
*ptr = '\n';
}
}
This->column = 0;
for(; beg>=This->pParserCtxt->input->base && *beg!='\n' && *beg!='\r'; beg--)
This->column++;
if (saxreader_has_handler(This, SAXLexicalHandler))
ret = SysAllocStringLen(bstr, len);
SysFreeString(bstr);
return ret;
}
static HRESULT saxreader_saxcharacters(saxlocator *locator, BSTR chars)
{
struct saxcontenthandler_iface *content = saxreader_get_contenthandler(locator->saxreader);
HRESULT hr;
if (!saxreader_has_handler(locator, SAXContentHandler)) return S_OK;
if (locator->vbInterface)
hr = IVBSAXContentHandler_characters(content->vbhandler, &chars);
else
hr = ISAXContentHandler_characters(content->handler, chars, SysStringLen(chars));
return hr;
}
static void libxml_cdatablock(void *ctx, const xmlChar *value, int len)
{
const xmlChar *start, *end;
saxlocator *locator = ctx;
struct saxlexicalhandler_iface *lexical = saxreader_get_lexicalhandler(locator->saxreader);
HRESULT hr = S_OK;
BSTR chars;
int i;
update_position(locator, FALSE);
if (saxreader_has_handler(locator, SAXLexicalHandler))
{
if (This->vbInterface)
if (locator->vbInterface)
hr = IVBSAXLexicalHandler_startCDATA(lexical->vbhandler);
else
hr = ISAXLexicalHandler_startCDATA(lexical->handler);
@ -1794,61 +1834,60 @@ static void libxmlCDataBlock(void *ctx, const xmlChar *value, int len)
if(FAILED(hr))
{
format_error_message_from_id(This, hr);
format_error_message_from_id(locator, hr);
return;
}
realLen = This->pParserCtxt->input->cur-beg-3;
cur = beg;
end = beg;
start = value;
end = NULL;
i = 0;
while(1)
while (i < len)
{
while(end-beg<realLen && *end!='\r') end++;
if(end-beg==realLen)
/* scan for newlines */
if (value[i] == '\r' || value[i] == '\n')
{
end--;
lastEvent = TRUE;
/* skip newlines/linefeeds */
while (i < len)
{
if (value[i] != '\r' && value[i] != '\n') break;
i++;
}
end = &value[i];
/* report */
chars = saxreader_get_cdata_chunk(start, end-start);
TRACE("(chunk %s)\n", debugstr_w(chars));
hr = saxreader_saxcharacters(locator, chars);
SysFreeString(chars);
start = &value[i];
end = NULL;
}
else if(end-beg==realLen-1 && *end=='\r' && *(end+1)=='\n')
lastEvent = TRUE;
if(*end == '\r') change = TRUE;
else change = FALSE;
if(change) *end = '\n';
if (saxreader_has_handler(This, SAXContentHandler))
{
Chars = pooled_bstr_from_xmlCharN(&This->saxreader->pool, cur, end-cur+1);
if (This->vbInterface)
hr = IVBSAXContentHandler_characters(content->vbhandler, &Chars);
else
hr = ISAXContentHandler_characters(content->handler, Chars, SysStringLen(Chars));
}
if(change) *end = '\r';
if(lastEvent)
break;
This->column += end-cur+2;
end += 2;
cur = end;
i++;
locator->column++;
}
if (saxreader_has_handler(This, SAXLexicalHandler))
/* no newline chars (or last chunk) report as a whole */
if (!end && start == value)
{
if (This->vbInterface)
/* report */
chars = bstr_from_xmlCharN(start, len-(start-value));
TRACE("(%s)\n", debugstr_w(chars));
hr = saxreader_saxcharacters(locator, chars);
SysFreeString(chars);
}
if (saxreader_has_handler(locator, SAXLexicalHandler))
{
if (locator->vbInterface)
hr = IVBSAXLexicalHandler_endCDATA(lexical->vbhandler);
else
hr = ISAXLexicalHandler_endCDATA(lexical->handler);
}
if(FAILED(hr))
format_error_message_from_id(This, hr);
This->column += 4+end-cur;
format_error_message_from_id(locator, hr);
}
static xmlParserInputPtr libxmlresolveentity(void *ctx, const xmlChar *publicid, const xmlChar *systemid)
@ -3259,7 +3298,7 @@ HRESULT SAXXMLReader_create(MSXML_VERSION version, IUnknown *outer, LPVOID *ppOb
reader->sax.comment = libxmlComment;
reader->sax.error = libxmlFatalError;
reader->sax.fatalError = libxmlFatalError;
reader->sax.cdataBlock = libxmlCDataBlock;
reader->sax.cdataBlock = libxml_cdatablock;
reader->sax.resolveEntity = libxmlresolveentity;
*ppObj = &reader->IVBSAXXMLReader_iface;

View File

@ -578,7 +578,14 @@ static const char test_attributes[] =
static const char test_cdata_xml[] =
"<?xml version=\"1.0\" ?>"
"<a><![CDATA[Some \r\ntext\n\rdata\n]]></a>";
"<a><![CDATA[Some \r\ntext\n\r\ndata\n\n]]></a>";
static const char test2_cdata_xml[] =
"<?xml version=\"1.0\" ?>"
"<a><![CDATA[\n\r\nSome \r\ntext\n\r\ndata\n\n]]></a>";
static const char test3_cdata_xml[] =
"<?xml version=\"1.0\" ?><a><![CDATA[Some text data]]></a>";
static struct call_entry content_handler_test1[] = {
{ CH_PUTDOCUMENTLOCATOR, 0, 0, S_OK },
@ -911,9 +918,36 @@ static struct call_entry cdata_test[] = {
{ LH_STARTCDATA, 1, 35, S_OK },
{ CH_CHARACTERS, 1, 35, S_OK, "Some \n" },
{ CH_CHARACTERS, 1, 42, S_OK, "text\n\n" },
{ CH_CHARACTERS, 4, 1, S_OK, "data\n" },
{ LH_ENDCDATA, 4, 1, S_OK },
{ CH_ENDELEMENT, 4, 6, S_OK, "", "a", "a" },
{ CH_CHARACTERS, 1, 49, S_OK, "data\n\n" },
{ LH_ENDCDATA, 1, 49, S_OK },
{ CH_ENDELEMENT, 6, 6, S_OK, "", "a", "a" },
{ CH_ENDDOCUMENT, 0, 0, S_OK },
{ CH_ENDTEST }
};
static struct call_entry cdata_test2[] = {
{ CH_PUTDOCUMENTLOCATOR, 0, 0, S_OK },
{ CH_STARTDOCUMENT, 0, 0, S_OK },
{ CH_STARTELEMENT, 1, 26, S_OK, "", "a", "a" },
{ LH_STARTCDATA, 1, 35, S_OK },
{ CH_CHARACTERS, 1, 35, S_OK, "\n\n" },
{ CH_CHARACTERS, 1, 38, S_OK, "Some \n" },
{ CH_CHARACTERS, 1, 45, S_OK, "text\n\n" },
{ CH_CHARACTERS, 1, 52, S_OK, "data\n\n" },
{ LH_ENDCDATA, 1, 52, S_OK },
{ CH_ENDELEMENT, 8, 6, S_OK, "", "a", "a" },
{ CH_ENDDOCUMENT, 0, 0, S_OK },
{ CH_ENDTEST }
};
static struct call_entry cdata_test3[] = {
{ CH_PUTDOCUMENTLOCATOR, 0, 0, S_OK },
{ CH_STARTDOCUMENT, 0, 0, S_OK },
{ CH_STARTELEMENT, 1, 26, S_OK, "", "a", "a" },
{ LH_STARTCDATA, 1, 35, S_OK },
{ CH_CHARACTERS, 1, 35, S_OK, "Some text data" },
{ LH_ENDCDATA, 1, 35, S_OK },
{ CH_ENDELEMENT, 1, 54, S_OK, "", "a", "a" },
{ CH_ENDDOCUMENT, 0, 0, S_OK },
{ CH_ENDTEST }
};
@ -928,10 +962,40 @@ static struct call_entry cdata_test_alt[] = {
{ CH_CHARACTERS, 2, 0, S_OK, "\n" },
{ CH_CHARACTERS, 3, 1, S_OK, "text\n" },
{ CH_CHARACTERS, 4, 0, S_OK, "\n" },
{ CH_CHARACTERS, 5, 3, S_OK, "data\n" },
{ LH_ENDCDATA, 5, 3, S_OK },
{ CH_ENDELEMENT, 5, 7, S_OK, "", "a", "a" },
{ CH_ENDDOCUMENT, 5, 7, S_OK },
{ CH_CHARACTERS, 6, 3, S_OK, "data\n\n" },
{ LH_ENDCDATA, 6, 3, S_OK },
{ CH_ENDELEMENT, 6, 7, S_OK, "", "a", "a" },
{ CH_ENDDOCUMENT, 6, 7, S_OK },
{ CH_ENDTEST }
};
static struct call_entry cdata_test2_alt[] = {
{ CH_PUTDOCUMENTLOCATOR, 1, 0, S_OK },
{ CH_STARTDOCUMENT, 1, 22, S_OK },
{ CH_STARTELEMENT, 1, 25, S_OK, "", "a", "a" },
{ LH_STARTCDATA, 1, 34, S_OK },
{ CH_CHARACTERS, 2, 1, S_OK, "\n" },
{ CH_CHARACTERS, 3, 0, S_OK, "\n" },
{ CH_CHARACTERS, 3, 6, S_OK, "Some " },
{ CH_CHARACTERS, 4, 0, S_OK, "\n" },
{ CH_CHARACTERS, 5, 1, S_OK, "text\n" },
{ CH_CHARACTERS, 6, 0, S_OK, "\n" },
{ CH_CHARACTERS, 8, 3, S_OK, "data\n\n" },
{ LH_ENDCDATA, 8, 3, S_OK },
{ CH_ENDELEMENT, 8, 7, S_OK, "", "a", "a" },
{ CH_ENDDOCUMENT, 8, 7, S_OK },
{ CH_ENDTEST }
};
static struct call_entry cdata_test3_alt[] = {
{ CH_PUTDOCUMENTLOCATOR, 1, 0, S_OK },
{ CH_STARTDOCUMENT, 1, 22, S_OK },
{ CH_STARTELEMENT, 1, 25, S_OK, "", "a", "a" },
{ LH_STARTCDATA, 1, 34, S_OK },
{ CH_CHARACTERS, 1, 51, S_OK, "Some text data" },
{ LH_ENDCDATA, 1, 51, S_OK },
{ CH_ENDELEMENT, 1, 55, S_OK, "", "a", "a" },
{ CH_ENDDOCUMENT, 1, 55, S_OK },
{ CH_ENDTEST }
};
@ -1934,6 +1998,24 @@ static struct msxmlsupported_data_t reader_support_data[] =
static struct saxlexicalhandler lexicalhandler;
static struct saxdeclhandler declhandler;
static IStream *create_test_stream(const char *data, int len)
{
ULARGE_INTEGER size;
LARGE_INTEGER pos;
IStream *stream;
ULONG written;
if (len == -1) len = strlen(data);
CreateStreamOnHGlobal(NULL, TRUE, &stream);
size.QuadPart = len;
IStream_SetSize(stream, size);
IStream_Write(stream, data, len, &written);
pos.QuadPart = 0;
IStream_Seek(stream, pos, STREAM_SEEK_SET, NULL);
return stream;
}
static void test_saxreader(void)
{
const struct msxmlsupported_data_t *table = reader_support_data;
@ -1946,8 +2028,6 @@ static void test_saxreader(void)
SAFEARRAYBOUND SADim[1];
char *ptr = NULL;
IStream *stream;
ULARGE_INTEGER size;
LARGE_INTEGER pos;
ULONG written;
HANDLE file;
static const CHAR testXmlA[] = "test.xml";
@ -2041,12 +2121,7 @@ static void test_saxreader(void)
SafeArrayDestroy(sa);
CreateStreamOnHGlobal(NULL, TRUE, &stream);
size.QuadPart = strlen(testXML);
IStream_SetSize(stream, size);
IStream_Write(stream, testXML, strlen(testXML), &written);
pos.QuadPart = 0;
IStream_Seek(stream, pos, STREAM_SEEK_SET, NULL);
stream = create_test_stream(testXML, -1);
V_VT(&var) = VT_UNKNOWN;
V_UNKNOWN(&var) = (IUnknown*)stream;
@ -2057,12 +2132,7 @@ static void test_saxreader(void)
IStream_Release(stream);
CreateStreamOnHGlobal(NULL, TRUE, &stream);
size.QuadPart = strlen(test_attributes);
IStream_SetSize(stream, size);
IStream_Write(stream, test_attributes, strlen(test_attributes), &written);
pos.QuadPart = 0;
IStream_Seek(stream, pos, STREAM_SEEK_SET, NULL);
stream = create_test_stream(test_attributes, -1);
V_VT(&var) = VT_UNKNOWN;
V_UNKNOWN(&var) = (IUnknown*)stream;
@ -2200,12 +2270,7 @@ static void test_saxreader(void)
hr = ISAXXMLReader_putFeature(reader, _bstr_("http://xml.org/sax/features/namespaces"), VARIANT_FALSE);
EXPECT_HR(hr, S_OK);
CreateStreamOnHGlobal(NULL, TRUE, &stream);
size.QuadPart = strlen(test_attributes);
IStream_SetSize(stream, size);
IStream_Write(stream, test_attributes, strlen(test_attributes), &written);
pos.QuadPart = 0;
IStream_Seek(stream, pos, STREAM_SEEK_SET, NULL);
stream = create_test_stream(test_attributes, -1);
V_VT(&var) = VT_UNKNOWN;
V_UNKNOWN(&var) = (IUnknown*)stream;
@ -2228,12 +2293,7 @@ static void test_saxreader(void)
hr = ISAXXMLReader_putFeature(reader, _bstr_("http://xml.org/sax/features/namespace-prefixes"), VARIANT_FALSE);
EXPECT_HR(hr, S_OK);
CreateStreamOnHGlobal(NULL, TRUE, &stream);
size.QuadPart = strlen(test_attributes);
IStream_SetSize(stream, size);
IStream_Write(stream, test_attributes, strlen(test_attributes), &written);
pos.QuadPart = 0;
IStream_Seek(stream, pos, STREAM_SEEK_SET, NULL);
stream = create_test_stream(test_attributes, -1);
V_VT(&var) = VT_UNKNOWN;
V_UNKNOWN(&var) = (IUnknown*)stream;
@ -2254,12 +2314,7 @@ static void test_saxreader(void)
EXPECT_HR(hr, S_OK);
/* attribute normalization */
CreateStreamOnHGlobal(NULL, TRUE, &stream);
size.QuadPart = strlen(attribute_normalize);
IStream_SetSize(stream, size);
IStream_Write(stream, attribute_normalize, strlen(attribute_normalize), &written);
pos.QuadPart = 0;
IStream_Seek(stream, pos, STREAM_SEEK_SET, NULL);
stream = create_test_stream(attribute_normalize, -1);
V_VT(&var) = VT_UNKNOWN;
V_UNKNOWN(&var) = (IUnknown*)stream;
@ -2285,13 +2340,6 @@ static void test_saxreader(void)
ok(hr == S_OK || broken(hr == E_FAIL), "got 0x%08x\n", hr);
/* CDATA sections */
CreateStreamOnHGlobal(NULL, TRUE, &stream);
size.QuadPart = strlen(test_cdata_xml);
IStream_SetSize(stream, size);
IStream_Write(stream, test_cdata_xml, strlen(test_cdata_xml), &written);
pos.QuadPart = 0;
IStream_Seek(stream, pos, STREAM_SEEK_SET, NULL);
init_saxlexicalhandler(&lexicalhandler, S_OK);
V_VT(&var) = VT_UNKNOWN;
@ -2299,6 +2347,7 @@ static void test_saxreader(void)
hr = ISAXXMLReader_putProperty(reader, _bstr_("http://xml.org/sax/properties/lexical-handler"), var);
ok(hr == S_OK, "got 0x%08x\n", hr);
stream = create_test_stream(test_cdata_xml, -1);
V_VT(&var) = VT_UNKNOWN;
V_UNKNOWN(&var) = (IUnknown*)stream;
@ -2312,6 +2361,38 @@ static void test_saxreader(void)
ok(hr == S_OK, "got 0x%08x\n", hr);
ok_sequence(sequences, CONTENT_HANDLER_INDEX, test_seq, "cdata test", TRUE);
/* 2. CDATA sections */
stream = create_test_stream(test2_cdata_xml, -1);
V_VT(&var) = VT_UNKNOWN;
V_UNKNOWN(&var) = (IUnknown*)stream;
if (IsEqualGUID(table->clsid, &CLSID_SAXXMLReader60))
test_seq = cdata_test2_alt;
else
test_seq = cdata_test2;
set_expected_seq(test_seq);
hr = ISAXXMLReader_parse(reader, var);
ok(hr == S_OK, "got 0x%08x\n", hr);
ok_sequence(sequences, CONTENT_HANDLER_INDEX, test_seq, "cdata test 2", TRUE);
IStream_Release(stream);
/* 3. CDATA sections */
stream = create_test_stream(test3_cdata_xml, -1);
V_VT(&var) = VT_UNKNOWN;
V_UNKNOWN(&var) = (IUnknown*)stream;
if (IsEqualGUID(table->clsid, &CLSID_SAXXMLReader60))
test_seq = cdata_test3_alt;
else
test_seq = cdata_test3;
set_expected_seq(test_seq);
hr = ISAXXMLReader_parse(reader, var);
ok(hr == S_OK, "got 0x%08x\n", hr);
ok_sequence(sequences, CONTENT_HANDLER_INDEX, test_seq, "cdata test 3", TRUE);
IStream_Release(stream);
ISAXXMLReader_Release(reader);