webservices: Decode XML entities.
Signed-off-by: Hans Leidekker <hans@codeweavers.com> Signed-off-by: Alexandre Julliard <julliard@winehq.org>
This commit is contained in:
parent
f01a209e2b
commit
f449b1324f
|
@ -1008,11 +1008,129 @@ static HRESULT parse_name( const unsigned char *str, unsigned int len,
|
||||||
return S_OK;
|
return S_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int codepoint_to_utf8( int cp, unsigned char *dst )
|
||||||
|
{
|
||||||
|
if (cp < 0x80)
|
||||||
|
{
|
||||||
|
*dst = cp;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (cp < 0x800)
|
||||||
|
{
|
||||||
|
dst[1] = 0x80 | (cp & 0x3f);
|
||||||
|
cp >>= 6;
|
||||||
|
dst[0] = 0xc0 | cp;
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
if ((cp >= 0xd800 && cp <= 0xdfff) || cp == 0xfffe || cp == 0xffff) return -1;
|
||||||
|
if (cp < 0x10000)
|
||||||
|
{
|
||||||
|
dst[2] = 0x80 | (cp & 0x3f);
|
||||||
|
cp >>= 6;
|
||||||
|
dst[1] = 0x80 | (cp & 0x3f);
|
||||||
|
cp >>= 6;
|
||||||
|
dst[0] = 0xe0 | cp;
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
dst[3] = 0x80 | (cp & 0x3f);
|
||||||
|
cp >>= 6;
|
||||||
|
dst[2] = 0x80 | (cp & 0x3f);
|
||||||
|
cp >>= 6;
|
||||||
|
dst[1] = 0x80 | (cp & 0x3f);
|
||||||
|
cp >>= 6;
|
||||||
|
dst[0] = 0xf0 | cp;
|
||||||
|
return 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
static HRESULT decode_text( const unsigned char *str, ULONG len, unsigned char *ret, ULONG *ret_len )
|
||||||
|
{
|
||||||
|
const unsigned char *p = str;
|
||||||
|
unsigned char *q = ret;
|
||||||
|
|
||||||
|
*ret_len = 0;
|
||||||
|
while (len)
|
||||||
|
{
|
||||||
|
if (*p == '&')
|
||||||
|
{
|
||||||
|
p++; len--;
|
||||||
|
if (!len) return WS_E_INVALID_FORMAT;
|
||||||
|
|
||||||
|
if (len >= 3 && !memcmp( p, "lt;", 3 ))
|
||||||
|
{
|
||||||
|
*q++ = '<';
|
||||||
|
p += 3;
|
||||||
|
len -= 3;
|
||||||
|
}
|
||||||
|
else if (len >= 3 && !memcmp( p, "gt;", 3 ))
|
||||||
|
{
|
||||||
|
*q++ = '>';
|
||||||
|
p += 3;
|
||||||
|
len -= 3;
|
||||||
|
}
|
||||||
|
else if (len >= 5 && !memcmp( p, "quot;", 5 ))
|
||||||
|
{
|
||||||
|
*q++ = '"';
|
||||||
|
p += 5;
|
||||||
|
len -= 5;
|
||||||
|
}
|
||||||
|
else if (len >= 4 && !memcmp( p, "amp;", 4 ))
|
||||||
|
{
|
||||||
|
*q++ = '&';
|
||||||
|
p += 4;
|
||||||
|
len -= 4;
|
||||||
|
}
|
||||||
|
else if (len >= 5 && !memcmp( p, "apos;", 5 ))
|
||||||
|
{
|
||||||
|
*q++ = '\'';
|
||||||
|
p += 5;
|
||||||
|
len -= 5;
|
||||||
|
}
|
||||||
|
else if (*p == '#')
|
||||||
|
{
|
||||||
|
ULONG start, nb_digits, i;
|
||||||
|
int len_utf8, cp = 0;
|
||||||
|
|
||||||
|
p++; len--;
|
||||||
|
if (!len || *p != 'x') return WS_E_INVALID_FORMAT;
|
||||||
|
p++; len--;
|
||||||
|
|
||||||
|
start = len;
|
||||||
|
while (len && isxdigit( *p )) { p++; len--; };
|
||||||
|
if (!len) return WS_E_INVALID_FORMAT;
|
||||||
|
|
||||||
|
p -= nb_digits = start - len;
|
||||||
|
if (!nb_digits || nb_digits > 5 || p[nb_digits] != ';') return WS_E_INVALID_FORMAT;
|
||||||
|
for (i = 0; i < nb_digits; i++)
|
||||||
|
{
|
||||||
|
cp *= 16;
|
||||||
|
if (*p >= '0' && *p <= '9') cp += *p - '0';
|
||||||
|
else if (*p >= 'a' && *p <= 'f') cp += *p - 'a' + 10;
|
||||||
|
else cp += *p - 'A' + 10;
|
||||||
|
p++;
|
||||||
|
}
|
||||||
|
p++; len--;
|
||||||
|
if ((len_utf8 = codepoint_to_utf8( cp, q )) < 0) return WS_E_INVALID_FORMAT;
|
||||||
|
*ret_len += len_utf8;
|
||||||
|
q += len_utf8;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else return WS_E_INVALID_FORMAT;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
*q++ = *p++;
|
||||||
|
len--;
|
||||||
|
}
|
||||||
|
*ret_len += 1;
|
||||||
|
}
|
||||||
|
return S_OK;
|
||||||
|
}
|
||||||
|
|
||||||
static HRESULT read_attribute( struct reader *reader, WS_XML_ATTRIBUTE **ret )
|
static HRESULT read_attribute( struct reader *reader, WS_XML_ATTRIBUTE **ret )
|
||||||
{
|
{
|
||||||
static const WS_XML_STRING xmlns = {5, (BYTE *)"xmlns"};
|
static const WS_XML_STRING xmlns = {5, (BYTE *)"xmlns"};
|
||||||
WS_XML_ATTRIBUTE *attr;
|
WS_XML_ATTRIBUTE *attr;
|
||||||
WS_XML_UTF8_TEXT *text;
|
WS_XML_UTF8_TEXT *text = NULL;
|
||||||
unsigned int len = 0, ch, skip, quote;
|
unsigned int len = 0, ch, skip, quote;
|
||||||
const unsigned char *start;
|
const unsigned char *start;
|
||||||
WS_XML_STRING *prefix, *localname;
|
WS_XML_STRING *prefix, *localname;
|
||||||
|
@ -1083,7 +1201,11 @@ static HRESULT read_attribute( struct reader *reader, WS_XML_ATTRIBUTE **ret )
|
||||||
if ((hr = bind_prefix( reader, attr->prefix, attr->ns )) != S_OK) goto error;
|
if ((hr = bind_prefix( reader, attr->prefix, attr->ns )) != S_OK) goto error;
|
||||||
if (!(text = alloc_utf8_text( NULL, 0 ))) goto error;
|
if (!(text = alloc_utf8_text( NULL, 0 ))) goto error;
|
||||||
}
|
}
|
||||||
else if (!(text = alloc_utf8_text( start, len ))) goto error;
|
else
|
||||||
|
{
|
||||||
|
if (!(text = alloc_utf8_text( NULL, len ))) goto error;
|
||||||
|
if ((hr = decode_text( start, len, text->value.bytes, &text->value.length )) != S_OK) goto error;
|
||||||
|
}
|
||||||
|
|
||||||
attr->value = &text->text;
|
attr->value = &text->text;
|
||||||
attr->singleQuote = (quote == '\'');
|
attr->singleQuote = (quote == '\'');
|
||||||
|
@ -1092,6 +1214,7 @@ static HRESULT read_attribute( struct reader *reader, WS_XML_ATTRIBUTE **ret )
|
||||||
return S_OK;
|
return S_OK;
|
||||||
|
|
||||||
error:
|
error:
|
||||||
|
heap_free( text );
|
||||||
free_attribute( attr );
|
free_attribute( attr );
|
||||||
return hr;
|
return hr;
|
||||||
}
|
}
|
||||||
|
@ -1207,6 +1330,7 @@ static HRESULT read_text( struct reader *reader )
|
||||||
struct node *node, *parent;
|
struct node *node, *parent;
|
||||||
WS_XML_TEXT_NODE *text;
|
WS_XML_TEXT_NODE *text;
|
||||||
WS_XML_UTF8_TEXT *utf8;
|
WS_XML_UTF8_TEXT *utf8;
|
||||||
|
HRESULT hr;
|
||||||
|
|
||||||
start = read_current_ptr( reader );
|
start = read_current_ptr( reader );
|
||||||
for (;;)
|
for (;;)
|
||||||
|
@ -1222,11 +1346,17 @@ static HRESULT read_text( struct reader *reader )
|
||||||
|
|
||||||
if (!(node = alloc_node( WS_XML_NODE_TYPE_TEXT ))) return E_OUTOFMEMORY;
|
if (!(node = alloc_node( WS_XML_NODE_TYPE_TEXT ))) return E_OUTOFMEMORY;
|
||||||
text = (WS_XML_TEXT_NODE *)node;
|
text = (WS_XML_TEXT_NODE *)node;
|
||||||
if (!(utf8 = alloc_utf8_text( start, len )))
|
if (!(utf8 = alloc_utf8_text( NULL, len )))
|
||||||
{
|
{
|
||||||
heap_free( node );
|
heap_free( node );
|
||||||
return E_OUTOFMEMORY;
|
return E_OUTOFMEMORY;
|
||||||
}
|
}
|
||||||
|
if ((hr = decode_text( start, len, utf8->value.bytes, &utf8->value.length )) != S_OK)
|
||||||
|
{
|
||||||
|
heap_free( utf8 );
|
||||||
|
heap_free( node );
|
||||||
|
return hr;
|
||||||
|
}
|
||||||
text->text = &utf8->text;
|
text->text = &utf8->text;
|
||||||
|
|
||||||
read_insert_node( reader, parent, node );
|
read_insert_node( reader, parent, node );
|
||||||
|
|
|
@ -3542,6 +3542,120 @@ static void test_WsSetReaderPosition(void)
|
||||||
WsFreeHeap( heap );
|
WsFreeHeap( heap );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void test_entities(void)
|
||||||
|
{
|
||||||
|
static const char str1[] = "<t>
</t>";
|
||||||
|
static const char str2[] = "<t>
</t>";
|
||||||
|
static const char str3[] = "<t>
</t>";
|
||||||
|
static const char str4[] = "<t>ꪪ</t>";
|
||||||
|
static const char str5[] = "<t>򪪪</t>";
|
||||||
|
static const char str6[] = "<t>&1</t>";
|
||||||
|
static const char str7[] = "<t>&1;</t>";
|
||||||
|
static const char str8[] = "<t>&1111;</t>";
|
||||||
|
static const char str9[] = "<t>&11111;</t>";
|
||||||
|
static const char str10[] = "<t><</t>";
|
||||||
|
static const char str11[] = "<t>></t>";
|
||||||
|
static const char str12[] = "<t>"</t>";
|
||||||
|
static const char str13[] = "<t>&</t>";
|
||||||
|
static const char str14[] = "<t>'</t>";
|
||||||
|
static const char str15[] = "<t>&sopa;</t>";
|
||||||
|
static const char str16[] = "<t>&#;</t>";
|
||||||
|
static const char str17[] = "<t>&;</t>";
|
||||||
|
static const char str18[] = "<t>&&</t>";
|
||||||
|
static const char str19[] = "<t>&</t>";
|
||||||
|
static const char str20[] = "<t>�</t>";
|
||||||
|
static const char str21[] = "<t>퟿</t>";
|
||||||
|
static const char str22[] = "<t>�</t>";
|
||||||
|
static const char str23[] = "<t>�</t>";
|
||||||
|
static const char str24[] = "<t></t>";
|
||||||
|
static const char str25[] = "<t></t>";
|
||||||
|
static const char str26[] = "<t></t>";
|
||||||
|
static const char str27[] = "<t><</t>";
|
||||||
|
static const char res4[] = {0xea, 0xaa, 0xaa, 0x00};
|
||||||
|
static const char res5[] = {0xf2, 0xaa, 0xaa, 0xaa, 0x00};
|
||||||
|
static const char res21[] = {0xed, 0x9f, 0xbf, 0x00};
|
||||||
|
static const char res24[] = {0xee, 0x80, 0x80, 0x00};
|
||||||
|
static const struct
|
||||||
|
{
|
||||||
|
const char *str;
|
||||||
|
HRESULT hr;
|
||||||
|
const char *res;
|
||||||
|
}
|
||||||
|
tests[] =
|
||||||
|
{
|
||||||
|
{ str1, WS_E_INVALID_FORMAT },
|
||||||
|
{ str2, S_OK, "\n" },
|
||||||
|
{ str3, S_OK, "\n" },
|
||||||
|
{ str4, S_OK, res4 },
|
||||||
|
{ str5, S_OK, res5 },
|
||||||
|
{ str6, WS_E_INVALID_FORMAT },
|
||||||
|
{ str7, WS_E_INVALID_FORMAT },
|
||||||
|
{ str8, WS_E_INVALID_FORMAT },
|
||||||
|
{ str9, WS_E_INVALID_FORMAT },
|
||||||
|
{ str10, S_OK, "<" },
|
||||||
|
{ str11, S_OK, ">" },
|
||||||
|
{ str12, S_OK, "\"" },
|
||||||
|
{ str13, S_OK, "&" },
|
||||||
|
{ str14, S_OK, "'" },
|
||||||
|
{ str15, WS_E_INVALID_FORMAT },
|
||||||
|
{ str16, WS_E_INVALID_FORMAT },
|
||||||
|
{ str17, WS_E_INVALID_FORMAT },
|
||||||
|
{ str18, WS_E_INVALID_FORMAT },
|
||||||
|
{ str19, WS_E_INVALID_FORMAT },
|
||||||
|
{ str20, WS_E_INVALID_FORMAT },
|
||||||
|
{ str21, S_OK, res21 },
|
||||||
|
{ str22, WS_E_INVALID_FORMAT },
|
||||||
|
{ str23, WS_E_INVALID_FORMAT },
|
||||||
|
{ str24, S_OK, res24 },
|
||||||
|
{ str25, WS_E_INVALID_FORMAT },
|
||||||
|
{ str26, WS_E_INVALID_FORMAT },
|
||||||
|
{ str27, WS_E_INVALID_FORMAT },
|
||||||
|
};
|
||||||
|
HRESULT hr;
|
||||||
|
WS_XML_READER *reader;
|
||||||
|
const WS_XML_NODE *node;
|
||||||
|
const WS_XML_UTF8_TEXT *utf8;
|
||||||
|
ULONG i;
|
||||||
|
|
||||||
|
hr = WsCreateReader( NULL, 0, &reader, NULL ) ;
|
||||||
|
ok( hr == S_OK, "got %08x\n", hr );
|
||||||
|
|
||||||
|
for (i = 0; i < sizeof(tests)/sizeof(tests[0]); i++)
|
||||||
|
{
|
||||||
|
hr = set_input( reader, tests[i].str, strlen(tests[i].str) );
|
||||||
|
ok( hr == S_OK, "%u: got %08x\n", i, hr );
|
||||||
|
|
||||||
|
hr = WsReadToStartElement( reader, NULL, NULL, NULL, NULL );
|
||||||
|
ok( hr == S_OK, "%u: got %08x\n", i, hr );
|
||||||
|
|
||||||
|
hr = WsReadNode( reader, NULL );
|
||||||
|
ok( hr == tests[i].hr, "%u: got %08x\n", i, hr );
|
||||||
|
if (hr != S_OK) continue;
|
||||||
|
|
||||||
|
hr = WsGetReaderNode( reader, &node, NULL );
|
||||||
|
ok( hr == S_OK, "%u: got %08x\n", i, hr );
|
||||||
|
|
||||||
|
utf8 = (const WS_XML_UTF8_TEXT *)((const WS_XML_TEXT_NODE *)node)->text;
|
||||||
|
ok( utf8->value.length == strlen(tests[i].res), "%u: got %u\n", i, utf8->value.length );
|
||||||
|
ok( !memcmp( utf8->value.bytes, tests[i].res, strlen(tests[i].res) ), "%u: wrong data\n", i );
|
||||||
|
}
|
||||||
|
|
||||||
|
hr = set_input( reader, "<t a='

'/>", sizeof("<t a='

'/>") - 1 );
|
||||||
|
ok( hr == S_OK, "got %08x\n", hr );
|
||||||
|
|
||||||
|
hr = WsReadToStartElement( reader, NULL, NULL, NULL, NULL );
|
||||||
|
ok( hr == S_OK, "got %08x\n", hr );
|
||||||
|
|
||||||
|
hr = WsGetReaderNode( reader, &node, NULL );
|
||||||
|
ok( hr == S_OK, "got %08x\n", hr );
|
||||||
|
|
||||||
|
utf8 = (const WS_XML_UTF8_TEXT *)((const WS_XML_ELEMENT_NODE *)node)->attributes[0]->value;
|
||||||
|
ok( utf8->value.length == 2, "got %u\n", utf8->value.length );
|
||||||
|
ok( !memcmp( utf8->value.bytes, "\n\n", 2 ), "wrong data\n" );
|
||||||
|
|
||||||
|
WsFreeReader( reader );
|
||||||
|
}
|
||||||
|
|
||||||
START_TEST(reader)
|
START_TEST(reader)
|
||||||
{
|
{
|
||||||
test_WsCreateError();
|
test_WsCreateError();
|
||||||
|
@ -3576,4 +3690,5 @@ START_TEST(reader)
|
||||||
test_WsResetError();
|
test_WsResetError();
|
||||||
test_WsGetReaderPosition();
|
test_WsGetReaderPosition();
|
||||||
test_WsSetReaderPosition();
|
test_WsSetReaderPosition();
|
||||||
|
test_entities();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue