From 678fbc172c5f54e6417f0fc1336a0a3b444b444c Mon Sep 17 00:00:00 2001 From: Nikolay Sivov Date: Sat, 30 Oct 2010 18:49:43 +0400 Subject: [PATCH] msxml3: Hint parser to use UTF-8 if it's specfied as BOM. --- dlls/msxml3/saxreader.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/dlls/msxml3/saxreader.c b/dlls/msxml3/saxreader.c index a1df5c7b99f..7d8caf4876d 100644 --- a/dlls/msxml3/saxreader.c +++ b/dlls/msxml3/saxreader.c @@ -1770,6 +1770,8 @@ static HRESULT SAXLocator_create(saxreader *reader, saxlocator **ppsaxlocator, B /*** SAXXMLReader internal functions ***/ static HRESULT internal_parseBuffer(saxreader *This, const char *buffer, int size, BOOL vbInterface) { + xmlCharEncoding encoding = XML_CHAR_ENCODING_NONE; + xmlChar *enc_name = NULL; saxlocator *locator; HRESULT hr; @@ -1777,6 +1779,22 @@ static HRESULT internal_parseBuffer(saxreader *This, const char *buffer, int siz if(FAILED(hr)) return hr; + if (size >= 4) + { + const unsigned char *buff = (unsigned char*)buffer; + + encoding = xmlDetectCharEncoding((xmlChar*)buffer, 4); + enc_name = (xmlChar*)xmlGetCharEncodingName(encoding); + TRACE("detected encoding: %s\n", enc_name); + /* skip BOM, parser won't switch encodings and so won't skip it on its own */ + if ((encoding == XML_CHAR_ENCODING_UTF8) && + buff[0] == 0xEF && buff[1] == 0xBB && buff[2] == 0xBF) + { + buffer += 3; + size -= 3; + } + } + locator->pParserCtxt = xmlCreateMemoryParserCtxt(buffer, size); if(!locator->pParserCtxt) { @@ -1784,6 +1802,9 @@ static HRESULT internal_parseBuffer(saxreader *This, const char *buffer, int siz return E_FAIL; } + if (encoding == XML_CHAR_ENCODING_UTF8) + locator->pParserCtxt->encoding = xmlStrdup(enc_name); + xmlFree(locator->pParserCtxt->sax); locator->pParserCtxt->sax = &locator->saxreader->sax; locator->pParserCtxt->userData = locator;