From 221f9efe27fe786d4c2ee9c8d76ea9d8a75fc279 Mon Sep 17 00:00:00 2001 From: Nikolay Sivov Date: Sat, 21 Apr 2012 12:32:15 +0400 Subject: [PATCH] msxml3: Switch parser encoding manually when it won't be able to detect it (UTF-16 case). --- dlls/msxml3/saxreader.c | 30 ++++++++++++++++++++++++++---- dlls/msxml3/tests/saxreader.c | 22 ++++++++++++++++------ 2 files changed, 42 insertions(+), 10 deletions(-) diff --git a/dlls/msxml3/saxreader.c b/dlls/msxml3/saxreader.c index a5b7a32b86a..913eeaf7952 100644 --- a/dlls/msxml3/saxreader.c +++ b/dlls/msxml3/saxreader.c @@ -2144,7 +2144,7 @@ static HRESULT internal_parseBuffer(saxreader *This, const char *buffer, int siz HRESULT hr; hr = SAXLocator_create(This, &locator, vbInterface); - if(FAILED(hr)) + if (FAILED(hr)) return hr; if (size >= 4) @@ -2163,22 +2163,44 @@ static HRESULT internal_parseBuffer(saxreader *This, const char *buffer, int siz } } + /* if libxml2 detection failed try to guess */ + if (encoding == XML_CHAR_ENCODING_NONE) + { + const WCHAR *ptr = (WCHAR*)buffer; + /* xml declaration with possibly specfied encoding will be still handled by parser */ + if ((size >= 2) && *ptr == '<' && ptr[1] != '?') + { + enc_name = (xmlChar*)xmlGetCharEncodingName(XML_CHAR_ENCODING_UTF16LE); + encoding = XML_CHAR_ENCODING_UTF16LE; + } + } + else if (encoding == XML_CHAR_ENCODING_UTF8) + enc_name = (xmlChar*)xmlGetCharEncodingName(encoding); + else + enc_name = NULL; + locator->pParserCtxt = xmlCreateMemoryParserCtxt(buffer, size); - if(!locator->pParserCtxt) + if (!locator->pParserCtxt) { ISAXLocator_Release(&locator->ISAXLocator_iface); return E_FAIL; } - if (encoding == XML_CHAR_ENCODING_UTF8) + if (enc_name) + { locator->pParserCtxt->encoding = xmlStrdup(enc_name); + if (encoding == XML_CHAR_ENCODING_UTF16LE) { + TRACE("switching to %s\n", enc_name); + xmlSwitchEncoding(locator->pParserCtxt, encoding); + } + } xmlFree(locator->pParserCtxt->sax); locator->pParserCtxt->sax = &locator->saxreader->sax; locator->pParserCtxt->userData = locator; This->isParsing = TRUE; - if(xmlParseDocument(locator->pParserCtxt)==-1 && locator->ret==S_OK) + if(xmlParseDocument(locator->pParserCtxt) == -1 && locator->ret == S_OK) hr = E_FAIL; else hr = locator->ret; diff --git a/dlls/msxml3/tests/saxreader.c b/dlls/msxml3/tests/saxreader.c index 00a355370ef..5db2d113202 100644 --- a/dlls/msxml3/tests/saxreader.c +++ b/dlls/msxml3/tests/saxreader.c @@ -2317,18 +2317,20 @@ static const struct enc_test_entry_t encoding_test_data[] = { { 0 } }; -static void test_encoding(void) +static void test_saxreader_encoding(void) { const struct enc_test_entry_t *entry = encoding_test_data; static const WCHAR testXmlW[] = {'t','e','s','t','.','x','m','l',0}; static const CHAR testXmlA[] = "test.xml"; - ISAXXMLReader *reader; - DWORD written; - HANDLE file; - HRESULT hr; while (entry->guid) { + ISAXXMLReader *reader; + VARIANT input; + DWORD written; + HANDLE file; + HRESULT hr; + hr = CoCreateInstance(entry->guid, NULL, CLSCTX_INPROC_SERVER, &IID_ISAXXMLReader, (void**)&reader); if (hr != S_OK) { @@ -2349,8 +2351,16 @@ static void test_encoding(void) ok(hr == entry->hr, "Expected 0x%08x, got 0x%08x. CLSID %s\n", entry->hr, hr, entry->clsid); DeleteFileA(testXmlA); + + /* try BSTR input with no BOM or '"); + hr = ISAXXMLReader_parse(reader, input); + EXPECT_HR(hr, S_OK); + ISAXXMLReader_Release(reader); + free_bstrs(); entry++; } } @@ -4474,7 +4484,7 @@ START_TEST(saxreader) test_saxreader(); test_saxreader_properties(); test_saxreader_features(); - test_encoding(); + test_saxreader_encoding(); test_dispex(); /* MXXMLWriter tests */