diff --git a/dlls/webservices/reader.c b/dlls/webservices/reader.c index eb89af451e8..8283e7a7a0f 100644 --- a/dlls/webservices/reader.c +++ b/dlls/webservices/reader.c @@ -1008,11 +1008,129 @@ static HRESULT parse_name( const unsigned char *str, unsigned int len, return S_OK; } +static int codepoint_to_utf8( int cp, unsigned char *dst ) +{ + if (cp < 0x80) + { + *dst = cp; + return 1; + } + if (cp < 0x800) + { + dst[1] = 0x80 | (cp & 0x3f); + cp >>= 6; + dst[0] = 0xc0 | cp; + return 2; + } + if ((cp >= 0xd800 && cp <= 0xdfff) || cp == 0xfffe || cp == 0xffff) return -1; + if (cp < 0x10000) + { + dst[2] = 0x80 | (cp & 0x3f); + cp >>= 6; + dst[1] = 0x80 | (cp & 0x3f); + cp >>= 6; + dst[0] = 0xe0 | cp; + return 3; + } + dst[3] = 0x80 | (cp & 0x3f); + cp >>= 6; + dst[2] = 0x80 | (cp & 0x3f); + cp >>= 6; + dst[1] = 0x80 | (cp & 0x3f); + cp >>= 6; + dst[0] = 0xf0 | cp; + return 4; +} + +static HRESULT decode_text( const unsigned char *str, ULONG len, unsigned char *ret, ULONG *ret_len ) +{ + const unsigned char *p = str; + unsigned char *q = ret; + + *ret_len = 0; + while (len) + { + if (*p == '&') + { + p++; len--; + if (!len) return WS_E_INVALID_FORMAT; + + if (len >= 3 && !memcmp( p, "lt;", 3 )) + { + *q++ = '<'; + p += 3; + len -= 3; + } + else if (len >= 3 && !memcmp( p, "gt;", 3 )) + { + *q++ = '>'; + p += 3; + len -= 3; + } + else if (len >= 5 && !memcmp( p, "quot;", 5 )) + { + *q++ = '"'; + p += 5; + len -= 5; + } + else if (len >= 4 && !memcmp( p, "amp;", 4 )) + { + *q++ = '&'; + p += 4; + len -= 4; + } + else if (len >= 5 && !memcmp( p, "apos;", 5 )) + { + *q++ = '\''; + p += 5; + len -= 5; + } + else if (*p == '#') + { + ULONG start, nb_digits, i; + int len_utf8, cp = 0; + + p++; len--; + if (!len || *p != 'x') return WS_E_INVALID_FORMAT; + p++; len--; + + start = len; + while (len && isxdigit( *p )) { p++; len--; }; + if (!len) return WS_E_INVALID_FORMAT; + + p -= nb_digits = start - len; + if (!nb_digits || nb_digits > 5 || p[nb_digits] != ';') return WS_E_INVALID_FORMAT; + for (i = 0; i < nb_digits; i++) + { + cp *= 16; + if (*p >= '0' && *p <= '9') cp += *p - '0'; + else if (*p >= 'a' && *p <= 'f') cp += *p - 'a' + 10; + else cp += *p - 'A' + 10; + p++; + } + p++; len--; + if ((len_utf8 = codepoint_to_utf8( cp, q )) < 0) return WS_E_INVALID_FORMAT; + *ret_len += len_utf8; + q += len_utf8; + continue; + } + else return WS_E_INVALID_FORMAT; + } + else + { + *q++ = *p++; + len--; + } + *ret_len += 1; + } + return S_OK; +} + static HRESULT read_attribute( struct reader *reader, WS_XML_ATTRIBUTE **ret ) { static const WS_XML_STRING xmlns = {5, (BYTE *)"xmlns"}; WS_XML_ATTRIBUTE *attr; - WS_XML_UTF8_TEXT *text; + WS_XML_UTF8_TEXT *text = NULL; unsigned int len = 0, ch, skip, quote; const unsigned char *start; WS_XML_STRING *prefix, *localname; @@ -1083,7 +1201,11 @@ static HRESULT read_attribute( struct reader *reader, WS_XML_ATTRIBUTE **ret ) if ((hr = bind_prefix( reader, attr->prefix, attr->ns )) != S_OK) goto error; if (!(text = alloc_utf8_text( NULL, 0 ))) goto error; } - else if (!(text = alloc_utf8_text( start, len ))) goto error; + else + { + if (!(text = alloc_utf8_text( NULL, len ))) goto error; + if ((hr = decode_text( start, len, text->value.bytes, &text->value.length )) != S_OK) goto error; + } attr->value = &text->text; attr->singleQuote = (quote == '\''); @@ -1092,6 +1214,7 @@ static HRESULT read_attribute( struct reader *reader, WS_XML_ATTRIBUTE **ret ) return S_OK; error: + heap_free( text ); free_attribute( attr ); return hr; } @@ -1207,6 +1330,7 @@ static HRESULT read_text( struct reader *reader ) struct node *node, *parent; WS_XML_TEXT_NODE *text; WS_XML_UTF8_TEXT *utf8; + HRESULT hr; start = read_current_ptr( reader ); for (;;) @@ -1222,11 +1346,17 @@ static HRESULT read_text( struct reader *reader ) if (!(node = alloc_node( WS_XML_NODE_TYPE_TEXT ))) return E_OUTOFMEMORY; text = (WS_XML_TEXT_NODE *)node; - if (!(utf8 = alloc_utf8_text( start, len ))) + if (!(utf8 = alloc_utf8_text( NULL, len ))) { heap_free( node ); return E_OUTOFMEMORY; } + if ((hr = decode_text( start, len, utf8->value.bytes, &utf8->value.length )) != S_OK) + { + heap_free( utf8 ); + heap_free( node ); + return hr; + } text->text = &utf8->text; read_insert_node( reader, parent, node ); diff --git a/dlls/webservices/tests/reader.c b/dlls/webservices/tests/reader.c index 5c4770c2dd9..77f79649707 100644 --- a/dlls/webservices/tests/reader.c +++ b/dlls/webservices/tests/reader.c @@ -3542,6 +3542,120 @@ static void test_WsSetReaderPosition(void) WsFreeHeap( heap ); } +static void test_entities(void) +{ + static const char str1[] = " "; + static const char str2[] = " "; + static const char str3[] = " "; + static const char str4[] = ""; + static const char str5[] = "򪪪"; + static const char str6[] = "&1"; + static const char str7[] = "&1;"; + static const char str8[] = "&1111;"; + static const char str9[] = "&11111;"; + static const char str10[] = "<"; + static const char str11[] = ">"; + static const char str12[] = """; + static const char str13[] = "&"; + static const char str14[] = "'"; + static const char str15[] = "&sopa;"; + static const char str16[] = "&#;"; + static const char str17[] = "&;"; + static const char str18[] = "&&"; + static const char str19[] = "&"; + static const char str20[] = ""; + static const char str21[] = ""; + static const char str22[] = ""; + static const char str23[] = ""; + static const char str24[] = ""; + static const char str25[] = ""; + static const char str26[] = "￿"; + static const char str27[] = "<"; + static const char res4[] = {0xea, 0xaa, 0xaa, 0x00}; + static const char res5[] = {0xf2, 0xaa, 0xaa, 0xaa, 0x00}; + static const char res21[] = {0xed, 0x9f, 0xbf, 0x00}; + static const char res24[] = {0xee, 0x80, 0x80, 0x00}; + static const struct + { + const char *str; + HRESULT hr; + const char *res; + } + tests[] = + { + { str1, WS_E_INVALID_FORMAT }, + { str2, S_OK, "\n" }, + { str3, S_OK, "\n" }, + { str4, S_OK, res4 }, + { str5, S_OK, res5 }, + { str6, WS_E_INVALID_FORMAT }, + { str7, WS_E_INVALID_FORMAT }, + { str8, WS_E_INVALID_FORMAT }, + { str9, WS_E_INVALID_FORMAT }, + { str10, S_OK, "<" }, + { str11, S_OK, ">" }, + { str12, S_OK, "\"" }, + { str13, S_OK, "&" }, + { str14, S_OK, "'" }, + { str15, WS_E_INVALID_FORMAT }, + { str16, WS_E_INVALID_FORMAT }, + { str17, WS_E_INVALID_FORMAT }, + { str18, WS_E_INVALID_FORMAT }, + { str19, WS_E_INVALID_FORMAT }, + { str20, WS_E_INVALID_FORMAT }, + { str21, S_OK, res21 }, + { str22, WS_E_INVALID_FORMAT }, + { str23, WS_E_INVALID_FORMAT }, + { str24, S_OK, res24 }, + { str25, WS_E_INVALID_FORMAT }, + { str26, WS_E_INVALID_FORMAT }, + { str27, WS_E_INVALID_FORMAT }, + }; + HRESULT hr; + WS_XML_READER *reader; + const WS_XML_NODE *node; + const WS_XML_UTF8_TEXT *utf8; + ULONG i; + + hr = WsCreateReader( NULL, 0, &reader, NULL ) ; + ok( hr == S_OK, "got %08x\n", hr ); + + for (i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) + { + hr = set_input( reader, tests[i].str, strlen(tests[i].str) ); + ok( hr == S_OK, "%u: got %08x\n", i, hr ); + + hr = WsReadToStartElement( reader, NULL, NULL, NULL, NULL ); + ok( hr == S_OK, "%u: got %08x\n", i, hr ); + + hr = WsReadNode( reader, NULL ); + ok( hr == tests[i].hr, "%u: got %08x\n", i, hr ); + if (hr != S_OK) continue; + + hr = WsGetReaderNode( reader, &node, NULL ); + ok( hr == S_OK, "%u: got %08x\n", i, hr ); + + utf8 = (const WS_XML_UTF8_TEXT *)((const WS_XML_TEXT_NODE *)node)->text; + ok( utf8->value.length == strlen(tests[i].res), "%u: got %u\n", i, utf8->value.length ); + ok( !memcmp( utf8->value.bytes, tests[i].res, strlen(tests[i].res) ), "%u: wrong data\n", i ); + } + + hr = set_input( reader, "", sizeof("") - 1 ); + ok( hr == S_OK, "got %08x\n", hr ); + + hr = WsReadToStartElement( reader, NULL, NULL, NULL, NULL ); + ok( hr == S_OK, "got %08x\n", hr ); + + hr = WsGetReaderNode( reader, &node, NULL ); + ok( hr == S_OK, "got %08x\n", hr ); + + utf8 = (const WS_XML_UTF8_TEXT *)((const WS_XML_ELEMENT_NODE *)node)->attributes[0]->value; + ok( utf8->value.length == 2, "got %u\n", utf8->value.length ); + ok( !memcmp( utf8->value.bytes, "\n\n", 2 ), "wrong data\n" ); + + WsFreeReader( reader ); +} + START_TEST(reader) { test_WsCreateError(); @@ -3576,4 +3690,5 @@ START_TEST(reader) test_WsResetError(); test_WsGetReaderPosition(); test_WsSetReaderPosition(); + test_entities(); }