msxml3: Don't use libxml2 encoding helpers.

This commit is contained in:
Nikolay Sivov 2011-12-16 02:36:53 +03:00 committed by Alexandre Julliard
parent d481bf61dc
commit 35c7c69429
2 changed files with 115 additions and 87 deletions

View File

@ -42,11 +42,22 @@ WINE_DEFAULT_DEBUG_CHANNEL(msxml);
#ifdef HAVE_LIBXML2
static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
static const WCHAR utf8W[] = {'U','T','F','-','8',0};
static const char crlfA[] = "\r\n";
static const WCHAR emptyW[] = {0};
typedef enum
{
XmlEncoding_UTF8,
XmlEncoding_UTF16,
XmlEncoding_Unknown
} xml_encoding;
typedef enum
{
OutputBuffer_Native = 0x001,
OutputBuffer_Encoded = 0x010,
OutputBuffer_Both = 0x100
} output_mode;
typedef enum
{
MXWriter_BOM = 0,
@ -55,7 +66,7 @@ typedef enum
MXWriter_OmitXmlDecl,
MXWriter_Standalone,
MXWriter_LastProp
} MXWRITER_PROPS;
} mxwriter_prop;
typedef struct
{
@ -82,9 +93,12 @@ typedef struct
VARIANT_BOOL props[MXWriter_LastProp];
BOOL prop_changed;
xmlCharEncoding encoding;
BSTR version;
BSTR encoding; /* exact property value */
xml_encoding xml_enc;
/* contains a pending (or not closed yet) element name or NULL if
we don't have to close */
BSTR element;
@ -95,20 +109,12 @@ typedef struct
output_buffer *buffer;
} mxwriter;
static const WCHAR *get_encoding_name(xmlCharEncoding encoding)
static xml_encoding parse_encoding_name(const WCHAR *encoding)
{
static const WCHAR unkW[] = {'u','n','k','n','o','w','n',0};
switch (encoding)
{
case XML_CHAR_ENCODING_UTF8:
return utf8W;
case XML_CHAR_ENCODING_UTF16LE:
return utf16W;
default:
FIXME("unsupported encoding %d\n", encoding);
return unkW;
}
static const WCHAR utf8W[] = {'U','T','F','-','8',0};
if (!strcmpiW(encoding, utf8W)) return XmlEncoding_UTF8;
if (!strcmpiW(encoding, utf16W)) return XmlEncoding_UTF16;
return XmlEncoding_Unknown;
}
static HRESULT init_encoded_buffer(encoded_buffer *buffer)
@ -129,14 +135,14 @@ static void free_encoded_buffer(encoded_buffer *buffer)
heap_free(buffer->data);
}
static HRESULT get_code_page(xmlCharEncoding encoding, UINT *cp)
static HRESULT get_code_page(xml_encoding encoding, UINT *cp)
{
switch (encoding)
{
case XML_CHAR_ENCODING_UTF8:
case XmlEncoding_UTF8:
*cp = CP_UTF8;
break;
case XML_CHAR_ENCODING_UTF16LE:
case XmlEncoding_UTF16:
*cp = ~0;
break;
default:
@ -147,7 +153,7 @@ static HRESULT get_code_page(xmlCharEncoding encoding, UINT *cp)
return S_OK;
}
static HRESULT alloc_output_buffer(xmlCharEncoding encoding, output_buffer **buffer)
static HRESULT alloc_output_buffer(xml_encoding encoding, output_buffer **buffer)
{
output_buffer *ret;
HRESULT hr;
@ -201,12 +207,6 @@ static void grow_buffer(encoded_buffer *buffer, int length)
}
}
typedef enum {
OutputBuffer_Native = 0x001,
OutputBuffer_Encoded = 0x010,
OutputBuffer_Both = 0x100
} output_mode;
static HRESULT write_output_buffer_mode(output_buffer *buffer, output_mode mode, const WCHAR *data, int len)
{
int length;
@ -256,29 +256,7 @@ static void close_output_buffer(mxwriter *This)
heap_free(This->buffer->encoded.data);
init_encoded_buffer(&This->buffer->utf16);
init_encoded_buffer(&This->buffer->encoded);
get_code_page(This->encoding, &This->buffer->code_page);
}
static HRESULT bstr_from_xmlCharEncoding(xmlCharEncoding enc, BSTR *encoding)
{
const char *encodingA;
if (enc != XML_CHAR_ENCODING_UTF16LE && enc != XML_CHAR_ENCODING_UTF8) {
FIXME("Unsupported xmlCharEncoding: %d\n", enc);
*encoding = NULL;
return E_NOTIMPL;
}
encodingA = xmlGetCharEncodingName(enc);
if (encodingA) {
DWORD len = MultiByteToWideChar(CP_ACP, 0, encodingA, -1, NULL, 0);
*encoding = SysAllocStringLen(NULL, len-1);
if(*encoding)
MultiByteToWideChar( CP_ACP, 0, encodingA, -1, *encoding, len);
} else
*encoding = SysAllocStringLen(NULL, 0);
return *encoding ? S_OK : E_OUTOFMEMORY;
get_code_page(This->xml_enc, &This->buffer->code_page);
}
/* escapes special characters like:
@ -366,7 +344,7 @@ static void write_prolog_buffer(const mxwriter *This)
/* always write UTF-16 to WCHAR buffer */
write_output_buffer_mode(This->buffer, OutputBuffer_Native, utf16W, sizeof(utf16W)/sizeof(WCHAR) - 1);
write_output_buffer_mode(This->buffer, OutputBuffer_Encoded, get_encoding_name(This->encoding), -1);
write_output_buffer_mode(This->buffer, OutputBuffer_Encoded, This->encoding, -1);
write_output_buffer(This->buffer, quotW, 1);
/* standalone */
@ -394,7 +372,7 @@ static HRESULT write_data_to_stream(mxwriter *This)
/* The xmlOutputBuffer doesn't copy its contents from its 'buffer' to the
* 'conv' buffer when UTF8 encoding is used.
*/
if (This->encoding != XML_CHAR_ENCODING_UTF16LE)
if (This->xml_enc != XmlEncoding_UTF16)
buffer = &This->buffer->encoded;
else
buffer = &This->buffer->utf16;
@ -402,7 +380,7 @@ static HRESULT write_data_to_stream(mxwriter *This)
if (This->dest_written > buffer->written) {
ERR("Failed sanity check! Not sure what to do... (%d > %d)\n", This->dest_written, buffer->written);
return E_FAIL;
} else if (This->dest_written == buffer->written && This->encoding != XML_CHAR_ENCODING_UTF8)
} else if (This->dest_written == buffer->written && This->xml_enc != XmlEncoding_UTF8)
/* Windows seems to make an empty write call when the encoding is UTF-8 and
* all the data has been written to the stream. It doesn't seem make this call
* for any other encodings.
@ -455,14 +433,14 @@ static inline void reset_output_buffer(mxwriter *This)
This->dest_written = 0;
}
static HRESULT writer_set_property(mxwriter *writer, MXWRITER_PROPS property, VARIANT_BOOL value)
static HRESULT writer_set_property(mxwriter *writer, mxwriter_prop property, VARIANT_BOOL value)
{
writer->props[property] = value;
writer->prop_changed = TRUE;
return S_OK;
}
static HRESULT writer_get_property(const mxwriter *writer, MXWRITER_PROPS property, VARIANT_BOOL *value)
static HRESULT writer_get_property(const mxwriter *writer, mxwriter_prop property, VARIANT_BOOL *value)
{
if (!value) return E_POINTER;
*value = writer->props[property];
@ -537,6 +515,7 @@ static ULONG WINAPI mxwriter_Release(IMXWriter *iface)
if (This->dest) IStream_Release(This->dest);
SysFreeString(This->version);
SysFreeString(This->encoding);
SysFreeString(This->element);
release_dispex(&This->dispex);
@ -655,35 +634,28 @@ static HRESULT WINAPI mxwriter_get_output(IMXWriter *iface, VARIANT *dest)
static HRESULT WINAPI mxwriter_put_encoding(IMXWriter *iface, BSTR encoding)
{
mxwriter *This = impl_from_IMXWriter( iface );
xml_encoding enc;
HRESULT hr;
TRACE("(%p)->(%s)\n", This, debugstr_w(encoding));
/* FIXME: filter all supported encodings */
if (!strcmpW(encoding, utf16W) || !strcmpW(encoding, utf8W))
{
HRESULT hr;
LPSTR enc;
hr = flush_output_buffer(This);
if (FAILED(hr))
return hr;
enc = heap_strdupWtoA(encoding);
if (!enc)
return E_OUTOFMEMORY;
This->encoding = xmlParseCharEncoding(enc);
heap_free(enc);
TRACE("got encoding %d\n", This->encoding);
reset_output_buffer(This);
return S_OK;
}
else
enc = parse_encoding_name(encoding);
if (enc == XmlEncoding_Unknown)
{
FIXME("unsupported encoding %s\n", debugstr_w(encoding));
return E_INVALIDARG;
}
hr = flush_output_buffer(This);
if (FAILED(hr))
return hr;
SysReAllocString(&This->encoding, encoding);
This->xml_enc = enc;
TRACE("got encoding %d\n", This->xml_enc);
reset_output_buffer(This);
return S_OK;
}
static HRESULT WINAPI mxwriter_get_encoding(IMXWriter *iface, BSTR *encoding)
@ -694,7 +666,10 @@ static HRESULT WINAPI mxwriter_get_encoding(IMXWriter *iface, BSTR *encoding)
if (!encoding) return E_POINTER;
return bstr_from_xmlCharEncoding(This->encoding, encoding);
*encoding = SysAllocString(This->encoding);
if (!*encoding) return E_OUTOFMEMORY;
return S_OK;
}
static HRESULT WINAPI mxwriter_put_byteOrderMark(IMXWriter *iface, VARIANT_BOOL value)
@ -888,8 +863,8 @@ static HRESULT WINAPI mxwriter_saxcontent_startDocument(ISAXContentHandler *ifac
write_prolog_buffer(This);
if (This->dest && This->encoding == XML_CHAR_ENCODING_UTF16LE) {
static const CHAR utf16BOM[] = {0xff,0xfe};
if (This->dest && This->xml_enc == XmlEncoding_UTF16) {
static const char utf16BOM[] = {0xff,0xfe};
if (This->props[MXWriter_BOM] == VARIANT_TRUE)
/* Windows passes a NULL pointer as the pcbWritten parameter and
@ -1144,16 +1119,18 @@ HRESULT MXWriter_create(MSXML_VERSION version, IUnknown *outer, void **ppObj)
This->props[MXWriter_OmitXmlDecl] = VARIANT_FALSE;
This->props[MXWriter_Standalone] = VARIANT_FALSE;
This->prop_changed = FALSE;
This->encoding = xmlParseCharEncoding("UTF-16");
This->encoding = SysAllocString(utf16W);
This->version = SysAllocString(version10W);
This->xml_enc = XmlEncoding_UTF16;
This->element = NULL;
This->dest = NULL;
This->dest_written = 0;
hr = alloc_output_buffer(This->encoding, &This->buffer);
hr = alloc_output_buffer(This->xml_enc, &This->buffer);
if (hr != S_OK) {
SysFreeString(This->encoding);
SysFreeString(This->version);
heap_free(This);
return hr;

View File

@ -116,6 +116,9 @@ static const CHAR szUtf16BOM[] = {0xff, 0xfe};
static const CHAR szUtf8XML[] =
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\r\n";
static const char utf8xml2[] =
"<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\r\n";
static const CHAR szTestXML[] =
"<?xml version=\"1.0\" ?>\n"
"<BankAccount>\n"
@ -1758,10 +1761,12 @@ static void test_mxwriter_properties(void)
static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
static const WCHAR emptyW[] = {0};
static const WCHAR testW[] = {'t','e','s','t',0};
ISAXContentHandler *content;
IMXWriter *writer;
VARIANT_BOOL b;
HRESULT hr;
BSTR str, str2;
VARIANT dest;
test_mxwriter_default_properties(mxwriter_default_props);
@ -1794,12 +1799,12 @@ static void test_mxwriter_properties(void)
ok(b == VARIANT_TRUE, "got %d\n", b);
hr = IMXWriter_get_encoding(writer, NULL);
ok(hr == E_POINTER, "got %08x\n", hr);
EXPECT_HR(hr, E_POINTER);
/* UTF-16 is a default setting apparently */
str = (void*)0xdeadbeef;
hr = IMXWriter_get_encoding(writer, &str);
ok(hr == S_OK, "got %08x\n", hr);
EXPECT_HR(hr, S_OK);
ok(lstrcmpW(str, utf16W) == 0, "expected empty string, got %s\n", wine_dbgstr_w(str));
str2 = (void*)0xdeadbeef;
@ -1818,8 +1823,8 @@ static void test_mxwriter_properties(void)
str = (void*)0xdeadbeef;
hr = IMXWriter_get_encoding(writer, &str);
ok(hr == S_OK, "got %08x\n", hr);
ok(!lstrcmpW(str, emptyW) == 0, "expected empty string, got %s\n", wine_dbgstr_w(str));
EXPECT_HR(hr, S_OK);
ok(!lstrcmpW(str, _bstr_("UTF-16")), "got %s\n", wine_dbgstr_w(str));
SysFreeString(str);
/* invalid encoding name */
@ -1828,6 +1833,41 @@ static void test_mxwriter_properties(void)
ok(hr == E_INVALIDARG, "got %08x\n", hr);
SysFreeString(str);
/* test case sensivity */
hr = IMXWriter_put_encoding(writer, _bstr_("utf-8"));
EXPECT_HR(hr, S_OK);
str = (void*)0xdeadbeef;
hr = IMXWriter_get_encoding(writer, &str);
EXPECT_HR(hr, S_OK);
ok(!lstrcmpW(str, _bstr_("utf-8")), "got %s\n", wine_dbgstr_w(str));
SysFreeString(str);
hr = IMXWriter_put_encoding(writer, _bstr_("uTf-16"));
EXPECT_HR(hr, S_OK);
str = (void*)0xdeadbeef;
hr = IMXWriter_get_encoding(writer, &str);
EXPECT_HR(hr, S_OK);
ok(!lstrcmpW(str, _bstr_("uTf-16")), "got %s\n", wine_dbgstr_w(str));
SysFreeString(str);
/* how it affects document creation */
hr = IMXWriter_QueryInterface(writer, &IID_ISAXContentHandler, (void**)&content);
EXPECT_HR(hr, S_OK);
hr = ISAXContentHandler_startDocument(content);
EXPECT_HR(hr, S_OK);
hr = ISAXContentHandler_endDocument(content);
EXPECT_HR(hr, S_OK);
V_VT(&dest) = VT_EMPTY;
hr = IMXWriter_get_output(writer, &dest);
EXPECT_HR(hr, S_OK);
ok(V_VT(&dest) == VT_BSTR, "got %d\n", V_VT(&dest));
ok(!lstrcmpW(_bstr_("<?xml version=\"1.0\" encoding=\"UTF-16\" standalone=\"yes\"?>\r\n"),
V_BSTR(&dest)), "got wrong content %s\n", wine_dbgstr_w(V_BSTR(&dest)));
VariantClear(&dest);
ISAXContentHandler_Release(content);
hr = IMXWriter_get_version(writer, NULL);
ok(hr == E_POINTER, "got %08x\n", hr);
/* default version is 'surprisingly' 1.0 */
@ -2450,6 +2490,17 @@ static const mxwriter_stream_test mxwriter_stream_tests[] = {
{TRUE}
}
},
{
VARIANT_TRUE,"utf-8",
{
{FALSE,(const BYTE*)utf8xml2,sizeof(utf8xml2)-1},
/* For some reason Windows makes an empty write call when UTF-8 encoding is used
* and the writer is released.
*/
{FALSE,NULL,0},
{TRUE}
}
},
{
VARIANT_TRUE,"UTF-16",
{