/* * IXmlReader implementation * * Copyright 2010, 2012-2013 Nikolay Sivov * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA */ #define COBJMACROS #include #include #include "windef.h" #include "winbase.h" #include "initguid.h" #include "objbase.h" #include "xmllite.h" #include "xmllite_private.h" #include "wine/debug.h" #include "wine/list.h" #include "wine/unicode.h" WINE_DEFAULT_DEBUG_CHANNEL(xmllite); /* not defined in public headers */ DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda); typedef enum { XmlEncoding_UTF16, XmlEncoding_UTF8, XmlEncoding_Unknown } xml_encoding; typedef enum { XmlReadInState_Initial, XmlReadInState_XmlDecl, XmlReadInState_Misc_DTD, XmlReadInState_DTD, XmlReadInState_DTD_Misc, XmlReadInState_Element, XmlReadInState_Content, XmlReadInState_MiscEnd, /* optional Misc at the end of a document */ XmlReadInState_Eof } XmlReaderInternalState; /* This state denotes where parsing was interrupted by input problem. Reader resumes parsing using this information. */ typedef enum { XmlReadResumeState_Initial, XmlReadResumeState_PITarget, XmlReadResumeState_PIBody, XmlReadResumeState_CDATA, XmlReadResumeState_Comment, XmlReadResumeState_STag, XmlReadResumeState_CharData, XmlReadResumeState_Whitespace } XmlReaderResumeState; /* saved pointer index to resume from particular input position */ typedef enum { XmlReadResume_Name, /* PITarget, name for NCName, prefix for QName */ XmlReadResume_Local, /* local for QName */ XmlReadResume_Body, /* PI body, comment text, CDATA text, CharData text */ XmlReadResume_Last } XmlReaderResume; typedef enum { StringValue_LocalName, StringValue_Prefix, StringValue_QualifiedName, StringValue_Value, StringValue_Last } XmlReaderStringValue; static const WCHAR utf16W[] = {'U','T','F','-','1','6',0}; static const WCHAR utf8W[] = {'U','T','F','-','8',0}; static const WCHAR dblquoteW[] = {'\"',0}; static const WCHAR quoteW[] = {'\'',0}; static const WCHAR ltW[] = {'<',0}; static const WCHAR gtW[] = {'>',0}; static const WCHAR commentW[] = {'<','!','-','-',0}; static const WCHAR piW[] = {'<','?',0}; static const char *debugstr_nodetype(XmlNodeType nodetype) { static const char * const type_names[] = { "None", "Element", "Attribute", "Text", "CDATA", "", "", "ProcessingInstruction", "Comment", "", "DocumentType", "", "", "Whitespace", "", "EndElement", "", "XmlDeclaration" }; if (nodetype > _XmlNodeType_Last) return wine_dbg_sprintf("unknown type=%d", nodetype); return type_names[nodetype]; } static const char *debugstr_prop(XmlReaderProperty prop) { static const char * const prop_names[] = { "MultiLanguage", "ConformanceLevel", "RandomAccess", "XmlResolver", "DtdProcessing", "ReadState", "MaxElementDepth", "MaxEntityExpansion" }; if (prop > _XmlReaderProperty_Last) return wine_dbg_sprintf("unknown property=%d", prop); return prop_names[prop]; } struct xml_encoding_data { const WCHAR *name; xml_encoding enc; UINT cp; }; static const struct xml_encoding_data xml_encoding_map[] = { { utf16W, XmlEncoding_UTF16, ~0 }, { utf8W, XmlEncoding_UTF8, CP_UTF8 } }; typedef struct { char *data; UINT cur; unsigned int allocated; unsigned int written; } encoded_buffer; typedef struct input_buffer input_buffer; typedef struct { IXmlReaderInput IXmlReaderInput_iface; LONG ref; /* reference passed on IXmlReaderInput creation, is kept when input is created */ IUnknown *input; IMalloc *imalloc; xml_encoding encoding; BOOL hint; WCHAR *baseuri; /* stream reference set after SetInput() call from reader, stored as sequential stream, cause currently optimizations possible with IStream aren't implemented */ ISequentialStream *stream; input_buffer *buffer; unsigned int pending : 1; } xmlreaderinput; static const struct IUnknownVtbl xmlreaderinputvtbl; /* Structure to hold parsed string of specific length. Reader stores node value as 'start' pointer, on request a null-terminated version of it is allocated. To init a strval variable use reader_init_strval(), to set strval as a reader value use reader_set_strval(). */ typedef struct { WCHAR *str; /* allocated null-terminated string */ UINT len; /* length in WCHARs, altered after ReadValueChunk */ UINT start; /* input position where value starts */ } strval; static WCHAR emptyW[] = {0}; static const strval strval_empty = { emptyW }; struct attribute { struct list entry; strval localname; strval value; }; struct element { struct list entry; strval qname; strval localname; }; typedef struct { IXmlReader IXmlReader_iface; LONG ref; xmlreaderinput *input; IMalloc *imalloc; XmlReadState state; XmlReaderInternalState instate; XmlReaderResumeState resumestate; XmlNodeType nodetype; DtdProcessing dtdmode; UINT line, pos; /* reader position in XML stream */ struct list attrs; /* attributes list for current node */ struct attribute *attr; /* current attribute */ UINT attr_count; struct list elements; strval strvalues[StringValue_Last]; UINT depth; UINT max_depth; BOOL empty_element; UINT resume[XmlReadResume_Last]; /* offsets used to resume reader */ } xmlreader; struct input_buffer { encoded_buffer utf16; encoded_buffer encoded; UINT code_page; xmlreaderinput *input; }; static inline xmlreader *impl_from_IXmlReader(IXmlReader *iface) { return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface); } static inline xmlreaderinput *impl_from_IXmlReaderInput(IXmlReaderInput *iface) { return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface); } static inline void *m_alloc(IMalloc *imalloc, size_t len) { if (imalloc) return IMalloc_Alloc(imalloc, len); else return heap_alloc(len); } static inline void *m_realloc(IMalloc *imalloc, void *mem, size_t len) { if (imalloc) return IMalloc_Realloc(imalloc, mem, len); else return heap_realloc(mem, len); } static inline void m_free(IMalloc *imalloc, void *mem) { if (imalloc) IMalloc_Free(imalloc, mem); else heap_free(mem); } /* reader memory allocation functions */ static inline void *reader_alloc(xmlreader *reader, size_t len) { return m_alloc(reader->imalloc, len); } static inline void reader_free(xmlreader *reader, void *mem) { m_free(reader->imalloc, mem); } /* Just return pointer from offset, no attempt to read more. */ static inline WCHAR *reader_get_ptr2(const xmlreader *reader, UINT offset) { encoded_buffer *buffer = &reader->input->buffer->utf16; return (WCHAR*)buffer->data + offset; } static inline WCHAR *reader_get_strptr(const xmlreader *reader, const strval *v) { return v->str ? v->str : reader_get_ptr2(reader, v->start); } static HRESULT reader_strvaldup(xmlreader *reader, const strval *src, strval *dest) { *dest = *src; if (src->str != strval_empty.str) { dest->str = reader_alloc(reader, (dest->len+1)*sizeof(WCHAR)); if (!dest->str) return E_OUTOFMEMORY; memcpy(dest->str, reader_get_strptr(reader, src), dest->len*sizeof(WCHAR)); dest->str[dest->len] = 0; dest->start = 0; } return S_OK; } /* reader input memory allocation functions */ static inline void *readerinput_alloc(xmlreaderinput *input, size_t len) { return m_alloc(input->imalloc, len); } static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len) { return m_realloc(input->imalloc, mem, len); } static inline void readerinput_free(xmlreaderinput *input, void *mem) { m_free(input->imalloc, mem); } static inline WCHAR *readerinput_strdupW(xmlreaderinput *input, const WCHAR *str) { LPWSTR ret = NULL; if(str) { DWORD size; size = (strlenW(str)+1)*sizeof(WCHAR); ret = readerinput_alloc(input, size); if (ret) memcpy(ret, str, size); } return ret; } static void reader_clear_attrs(xmlreader *reader) { struct attribute *attr, *attr2; LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry) { reader_free(reader, attr); } list_init(&reader->attrs); reader->attr_count = 0; } /* attribute data holds pointers to buffer data, so buffer shrink is not possible while we are on a node with attributes */ static HRESULT reader_add_attr(xmlreader *reader, strval *localname, strval *value) { struct attribute *attr; attr = reader_alloc(reader, sizeof(*attr)); if (!attr) return E_OUTOFMEMORY; attr->localname = *localname; attr->value = *value; list_add_tail(&reader->attrs, &attr->entry); reader->attr_count++; return S_OK; } /* This one frees stored string value if needed */ static void reader_free_strvalued(xmlreader *reader, strval *v) { if (v->str != strval_empty.str) { reader_free(reader, v->str); *v = strval_empty; } } /* returns length in WCHARs from 'start' to current buffer offset */ static inline UINT reader_get_len(const xmlreader *reader, UINT start) { return reader->input->buffer->utf16.cur - start; } static inline void reader_init_strvalue(UINT start, UINT len, strval *v) { v->start = start; v->len = len; v->str = NULL; } static inline const char* debug_strval(const xmlreader *reader, const strval *v) { return debugstr_wn(reader_get_strptr(reader, v), v->len); } /* used to initialize from constant string */ static inline void reader_init_cstrvalue(WCHAR *str, UINT len, strval *v) { v->start = 0; v->len = len; v->str = str; } static void reader_free_strvalue(xmlreader *reader, XmlReaderStringValue type) { reader_free_strvalued(reader, &reader->strvalues[type]); } static void reader_free_strvalues(xmlreader *reader) { int type; for (type = 0; type < StringValue_Last; type++) reader_free_strvalue(reader, type); } /* This helper should only be used to test if strings are the same, it doesn't try to sort. */ static inline int strval_eq(const xmlreader *reader, const strval *str1, const strval *str2) { if (str1->len != str2->len) return 0; return !memcmp(reader_get_strptr(reader, str1), reader_get_strptr(reader, str2), str1->len*sizeof(WCHAR)); } static void reader_clear_elements(xmlreader *reader) { struct element *elem, *elem2; LIST_FOR_EACH_ENTRY_SAFE(elem, elem2, &reader->elements, struct element, entry) { reader_free_strvalued(reader, &elem->qname); reader_free(reader, elem); } list_init(&reader->elements); reader->empty_element = FALSE; } static HRESULT reader_inc_depth(xmlreader *reader) { if (++reader->depth > reader->max_depth) return SC_E_MAXELEMENTDEPTH; return S_OK; } static void reader_dec_depth(xmlreader *reader) { if (reader->depth > 1) reader->depth--; } static HRESULT reader_push_element(xmlreader *reader, strval *qname, strval *localname) { struct element *elem; HRESULT hr; elem = reader_alloc(reader, sizeof(*elem)); if (!elem) return E_OUTOFMEMORY; hr = reader_strvaldup(reader, qname, &elem->qname); if (FAILED(hr)) { reader_free(reader, elem); return hr; } hr = reader_strvaldup(reader, localname, &elem->localname); if (FAILED(hr)) { reader_free_strvalued(reader, &elem->qname); reader_free(reader, elem); return hr; } if (!list_empty(&reader->elements)) { hr = reader_inc_depth(reader); if (FAILED(hr)) { reader_free(reader, elem); return hr; } } list_add_head(&reader->elements, &elem->entry); reader->empty_element = FALSE; return hr; } static void reader_pop_element(xmlreader *reader) { struct element *elem = LIST_ENTRY(list_head(&reader->elements), struct element, entry); if (elem) { list_remove(&elem->entry); reader_free_strvalued(reader, &elem->qname); reader_free_strvalued(reader, &elem->localname); reader_free(reader, elem); reader_dec_depth(reader); } } /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value' means node value is to be determined. */ static void reader_set_strvalue(xmlreader *reader, XmlReaderStringValue type, const strval *value) { strval *v = &reader->strvalues[type]; reader_free_strvalue(reader, type); if (!value) { v->str = NULL; v->start = 0; v->len = 0; return; } if (value->str == strval_empty.str) *v = *value; else { if (type == StringValue_Value) { /* defer allocation for value string */ v->str = NULL; v->start = value->start; v->len = value->len; } else { v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR)); memcpy(v->str, reader_get_strptr(reader, value), value->len*sizeof(WCHAR)); v->str[value->len] = 0; v->len = value->len; } } } static inline int is_reader_pending(xmlreader *reader) { return reader->input->pending; } static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer) { const int initial_len = 0x2000; buffer->data = readerinput_alloc(input, initial_len); if (!buffer->data) return E_OUTOFMEMORY; memset(buffer->data, 0, 4); buffer->cur = 0; buffer->allocated = initial_len; buffer->written = 0; return S_OK; } static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer) { readerinput_free(input, buffer->data); } static HRESULT get_code_page(xml_encoding encoding, UINT *cp) { if (encoding == XmlEncoding_Unknown) { FIXME("unsupported encoding %d\n", encoding); return E_NOTIMPL; } *cp = xml_encoding_map[encoding].cp; return S_OK; } static xml_encoding parse_encoding_name(const WCHAR *name, int len) { int min, max, n, c; if (!name) return XmlEncoding_Unknown; min = 0; max = sizeof(xml_encoding_map)/sizeof(struct xml_encoding_data) - 1; while (min <= max) { n = (min+max)/2; if (len != -1) c = strncmpiW(xml_encoding_map[n].name, name, len); else c = strcmpiW(xml_encoding_map[n].name, name); if (!c) return xml_encoding_map[n].enc; if (c > 0) max = n-1; else min = n+1; } return XmlEncoding_Unknown; } static HRESULT alloc_input_buffer(xmlreaderinput *input) { input_buffer *buffer; HRESULT hr; input->buffer = NULL; buffer = readerinput_alloc(input, sizeof(*buffer)); if (!buffer) return E_OUTOFMEMORY; buffer->input = input; buffer->code_page = ~0; /* code page is unknown at this point */ hr = init_encoded_buffer(input, &buffer->utf16); if (hr != S_OK) { readerinput_free(input, buffer); return hr; } hr = init_encoded_buffer(input, &buffer->encoded); if (hr != S_OK) { free_encoded_buffer(input, &buffer->utf16); readerinput_free(input, buffer); return hr; } input->buffer = buffer; return S_OK; } static void free_input_buffer(input_buffer *buffer) { free_encoded_buffer(buffer->input, &buffer->encoded); free_encoded_buffer(buffer->input, &buffer->utf16); readerinput_free(buffer->input, buffer); } static void readerinput_release_stream(xmlreaderinput *readerinput) { if (readerinput->stream) { ISequentialStream_Release(readerinput->stream); readerinput->stream = NULL; } } /* Queries already stored interface for IStream/ISequentialStream. Interface supplied on creation will be overwritten */ static HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput) { HRESULT hr; readerinput_release_stream(readerinput); hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream); if (hr != S_OK) hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream); return hr; } /* reads a chunk to raw buffer */ static HRESULT readerinput_growraw(xmlreaderinput *readerinput) { encoded_buffer *buffer = &readerinput->buffer->encoded; /* to make sure aligned length won't exceed allocated length */ ULONG len = buffer->allocated - buffer->written - 4; ULONG read; HRESULT hr; /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is variable width encodings like UTF-8 */ len = (len + 3) & ~3; /* try to use allocated space or grow */ if (buffer->allocated - buffer->written < len) { buffer->allocated *= 2; buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated); len = buffer->allocated - buffer->written; } read = 0; hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read); TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer->written, buffer->allocated, len, read, hr); readerinput->pending = hr == E_PENDING; if (FAILED(hr)) return hr; buffer->written += read; return hr; } /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */ static void readerinput_grow(xmlreaderinput *readerinput, int length) { encoded_buffer *buffer = &readerinput->buffer->utf16; length *= sizeof(WCHAR); /* grow if needed, plus 4 bytes to be sure null terminator will fit in */ if (buffer->allocated < buffer->written + length + 4) { int grown_size = max(2*buffer->allocated, buffer->allocated + length); buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size); buffer->allocated = grown_size; } } static inline BOOL readerinput_is_utf8(xmlreaderinput *readerinput) { static const char startA[] = {'<','?'}; static const char commentA[] = {'<','!'}; encoded_buffer *buffer = &readerinput->buffer->encoded; unsigned char *ptr = (unsigned char*)buffer->data; return !memcmp(buffer->data, startA, sizeof(startA)) || !memcmp(buffer->data, commentA, sizeof(commentA)) || /* test start byte */ (ptr[0] == '<' && ( (ptr[1] && (ptr[1] <= 0x7f)) || (buffer->data[1] >> 5) == 0x6 || /* 2 bytes */ (buffer->data[1] >> 4) == 0xe || /* 3 bytes */ (buffer->data[1] >> 3) == 0x1e) /* 4 bytes */ ); } static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc) { encoded_buffer *buffer = &readerinput->buffer->encoded; static const WCHAR startW[] = {'<','?'}; static const WCHAR commentW[] = {'<','!'}; static const char utf8bom[] = {0xef,0xbb,0xbf}; static const char utf16lebom[] = {0xff,0xfe}; *enc = XmlEncoding_Unknown; if (buffer->written <= 3) { HRESULT hr = readerinput_growraw(readerinput); if (FAILED(hr)) return hr; if (buffer->written <= 3) return MX_E_INPUTEND; } /* try start symbols if we have enough data to do that, input buffer should contain first chunk already */ if (readerinput_is_utf8(readerinput)) *enc = XmlEncoding_UTF8; else if (!memcmp(buffer->data, startW, sizeof(startW)) || !memcmp(buffer->data, commentW, sizeof(commentW))) *enc = XmlEncoding_UTF16; /* try with BOM now */ else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom))) { buffer->cur += sizeof(utf8bom); *enc = XmlEncoding_UTF8; } else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom))) { buffer->cur += sizeof(utf16lebom); *enc = XmlEncoding_UTF16; } return S_OK; } static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput) { encoded_buffer *buffer = &readerinput->buffer->encoded; int len = buffer->written; /* complete single byte char */ if (!(buffer->data[len-1] & 0x80)) return len; /* find start byte of multibyte char */ while (--len && !(buffer->data[len] & 0xc0)) ; return len; } /* Returns byte length of complete char sequence for buffer code page, it's relative to current buffer position which is currently used for BOM handling only. */ static int readerinput_get_convlen(xmlreaderinput *readerinput) { encoded_buffer *buffer = &readerinput->buffer->encoded; int len; if (readerinput->buffer->code_page == CP_UTF8) len = readerinput_get_utf8_convlen(readerinput); else len = buffer->written; TRACE("%d\n", len - buffer->cur); return len - buffer->cur; } /* It's possible that raw buffer has some leftovers from last conversion - some char sequence that doesn't represent a full code point. Length argument should be calculated with readerinput_get_convlen(), if it's -1 it will be calculated here. */ static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len) { encoded_buffer *buffer = &readerinput->buffer->encoded; if (len == -1) len = readerinput_get_convlen(readerinput); memmove(buffer->data, buffer->data + buffer->cur + (buffer->written - len), len); /* everything below cur is lost too */ buffer->written -= len + buffer->cur; /* after this point we don't need cur offset really, it's used only to mark where actual data begins when first chunk is read */ buffer->cur = 0; } /* note that raw buffer content is kept */ static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc) { encoded_buffer *src = &readerinput->buffer->encoded; encoded_buffer *dest = &readerinput->buffer->utf16; int len, dest_len; HRESULT hr; WCHAR *ptr; UINT cp; hr = get_code_page(enc, &cp); if (FAILED(hr)) return; readerinput->buffer->code_page = cp; len = readerinput_get_convlen(readerinput); TRACE("switching to cp %d\n", cp); /* just copy in this case */ if (enc == XmlEncoding_UTF16) { readerinput_grow(readerinput, len); memcpy(dest->data, src->data + src->cur, len); dest->written += len*sizeof(WCHAR); return; } dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0); readerinput_grow(readerinput, dest_len); ptr = (WCHAR*)dest->data; MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len); ptr[dest_len] = 0; dest->written += dest_len*sizeof(WCHAR); } /* shrinks parsed data a buffer begins with */ static void reader_shrink(xmlreader *reader) { encoded_buffer *buffer = &reader->input->buffer->utf16; /* avoid to move too often using threshold shrink length */ if (buffer->cur*sizeof(WCHAR) > buffer->written / 2) { buffer->written -= buffer->cur*sizeof(WCHAR); memmove(buffer->data, (WCHAR*)buffer->data + buffer->cur, buffer->written); buffer->cur = 0; *(WCHAR*)&buffer->data[buffer->written] = 0; } } /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer. It won't attempt to shrink but will grow destination buffer if needed */ static HRESULT reader_more(xmlreader *reader) { xmlreaderinput *readerinput = reader->input; encoded_buffer *src = &readerinput->buffer->encoded; encoded_buffer *dest = &readerinput->buffer->utf16; UINT cp = readerinput->buffer->code_page; int len, dest_len; HRESULT hr; WCHAR *ptr; /* get some raw data from stream first */ hr = readerinput_growraw(readerinput); len = readerinput_get_convlen(readerinput); /* just copy for UTF-16 case */ if (cp == ~0) { readerinput_grow(readerinput, len); memcpy(dest->data + dest->written, src->data + src->cur, len); dest->written += len*sizeof(WCHAR); return hr; } dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0); readerinput_grow(readerinput, dest_len); ptr = (WCHAR*)(dest->data + dest->written); MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len); ptr[dest_len] = 0; dest->written += dest_len*sizeof(WCHAR); /* get rid of processed data */ readerinput_shrinkraw(readerinput, len); return hr; } static inline UINT reader_get_cur(xmlreader *reader) { return reader->input->buffer->utf16.cur; } static inline WCHAR *reader_get_ptr(xmlreader *reader) { encoded_buffer *buffer = &reader->input->buffer->utf16; WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur; if (!*ptr) reader_more(reader); return (WCHAR*)buffer->data + buffer->cur; } static int reader_cmp(xmlreader *reader, const WCHAR *str) { const WCHAR *ptr = reader_get_ptr(reader); return strncmpW(str, ptr, strlenW(str)); } /* moves cursor n WCHARs forward */ static void reader_skipn(xmlreader *reader, int n) { encoded_buffer *buffer = &reader->input->buffer->utf16; const WCHAR *ptr = reader_get_ptr(reader); while (*ptr++ && n--) { buffer->cur++; reader->pos++; } } static inline BOOL is_wchar_space(WCHAR ch) { return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n'; } /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */ static int reader_skipspaces(xmlreader *reader) { encoded_buffer *buffer = &reader->input->buffer->utf16; const WCHAR *ptr = reader_get_ptr(reader); UINT start = reader_get_cur(reader); while (is_wchar_space(*ptr)) { if (*ptr == '\r') reader->pos = 0; else if (*ptr == '\n') { reader->line++; reader->pos = 0; } else reader->pos++; buffer->cur++; ptr = reader_get_ptr(reader); } return reader_get_cur(reader) - start; } /* [26] VersionNum ::= '1.' [0-9]+ */ static HRESULT reader_parse_versionnum(xmlreader *reader, strval *val) { static const WCHAR onedotW[] = {'1','.',0}; WCHAR *ptr, *ptr2; UINT start; if (reader_cmp(reader, onedotW)) return WC_E_XMLDECL; start = reader_get_cur(reader); /* skip "1." */ reader_skipn(reader, 2); ptr2 = ptr = reader_get_ptr(reader); while (*ptr >= '0' && *ptr <= '9') { reader_skipn(reader, 1); ptr = reader_get_ptr(reader); } if (ptr2 == ptr) return WC_E_DIGIT; reader_init_strvalue(start, reader_get_cur(reader)-start, val); TRACE("version=%s\n", debug_strval(reader, val)); return S_OK; } /* [25] Eq ::= S? '=' S? */ static HRESULT reader_parse_eq(xmlreader *reader) { static const WCHAR eqW[] = {'=',0}; reader_skipspaces(reader); if (reader_cmp(reader, eqW)) return WC_E_EQUAL; /* skip '=' */ reader_skipn(reader, 1); reader_skipspaces(reader); return S_OK; } /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */ static HRESULT reader_parse_versioninfo(xmlreader *reader) { static const WCHAR versionW[] = {'v','e','r','s','i','o','n',0}; strval val, name; HRESULT hr; if (!reader_skipspaces(reader)) return WC_E_WHITESPACE; if (reader_cmp(reader, versionW)) return WC_E_XMLDECL; reader_init_strvalue(reader_get_cur(reader), 7, &name); /* skip 'version' */ reader_skipn(reader, 7); hr = reader_parse_eq(reader); if (FAILED(hr)) return hr; if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW)) return WC_E_QUOTE; /* skip "'"|'"' */ reader_skipn(reader, 1); hr = reader_parse_versionnum(reader, &val); if (FAILED(hr)) return hr; if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW)) return WC_E_QUOTE; /* skip "'"|'"' */ reader_skipn(reader, 1); return reader_add_attr(reader, &name, &val); } /* ([A-Za-z0-9._] | '-') */ static inline BOOL is_wchar_encname(WCHAR ch) { return ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9') || (ch == '.') || (ch == '_') || (ch == '-')); } /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */ static HRESULT reader_parse_encname(xmlreader *reader, strval *val) { WCHAR *start = reader_get_ptr(reader), *ptr; xml_encoding enc; int len; if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z')) return WC_E_ENCNAME; val->start = reader_get_cur(reader); ptr = start; while (is_wchar_encname(*++ptr)) ; len = ptr - start; enc = parse_encoding_name(start, len); TRACE("encoding name %s\n", debugstr_wn(start, len)); val->str = start; val->len = len; if (enc == XmlEncoding_Unknown) return WC_E_ENCNAME; /* skip encoding name */ reader_skipn(reader, len); return S_OK; } /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */ static HRESULT reader_parse_encdecl(xmlreader *reader) { static const WCHAR encodingW[] = {'e','n','c','o','d','i','n','g',0}; strval name, val; HRESULT hr; if (!reader_skipspaces(reader)) return S_FALSE; if (reader_cmp(reader, encodingW)) return S_FALSE; name.str = reader_get_ptr(reader); name.start = reader_get_cur(reader); name.len = 8; /* skip 'encoding' */ reader_skipn(reader, 8); hr = reader_parse_eq(reader); if (FAILED(hr)) return hr; if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW)) return WC_E_QUOTE; /* skip "'"|'"' */ reader_skipn(reader, 1); hr = reader_parse_encname(reader, &val); if (FAILED(hr)) return hr; if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW)) return WC_E_QUOTE; /* skip "'"|'"' */ reader_skipn(reader, 1); return reader_add_attr(reader, &name, &val); } /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */ static HRESULT reader_parse_sddecl(xmlreader *reader) { static const WCHAR standaloneW[] = {'s','t','a','n','d','a','l','o','n','e',0}; static const WCHAR yesW[] = {'y','e','s',0}; static const WCHAR noW[] = {'n','o',0}; strval name, val; UINT start; HRESULT hr; if (!reader_skipspaces(reader)) return S_FALSE; if (reader_cmp(reader, standaloneW)) return S_FALSE; reader_init_strvalue(reader_get_cur(reader), 10, &name); /* skip 'standalone' */ reader_skipn(reader, 10); hr = reader_parse_eq(reader); if (FAILED(hr)) return hr; if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW)) return WC_E_QUOTE; /* skip "'"|'"' */ reader_skipn(reader, 1); if (reader_cmp(reader, yesW) && reader_cmp(reader, noW)) return WC_E_XMLDECL; start = reader_get_cur(reader); /* skip 'yes'|'no' */ reader_skipn(reader, reader_cmp(reader, yesW) ? 2 : 3); reader_init_strvalue(start, reader_get_cur(reader)-start, &val); TRACE("standalone=%s\n", debug_strval(reader, &val)); if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW)) return WC_E_QUOTE; /* skip "'"|'"' */ reader_skipn(reader, 1); return reader_add_attr(reader, &name, &val); } /* [23] XMLDecl ::= '' */ static HRESULT reader_parse_xmldecl(xmlreader *reader) { static const WCHAR xmldeclW[] = {'<','?','x','m','l',' ',0}; static const WCHAR declcloseW[] = {'?','>',0}; HRESULT hr; /* check if we have "nodetype = XmlNodeType_XmlDeclaration; reader_set_strvalue(reader, StringValue_LocalName, &strval_empty); reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty); reader_set_strvalue(reader, StringValue_Value, &strval_empty); return S_OK; } /* [15] Comment ::= '' */ static HRESULT reader_parse_comment(xmlreader *reader) { WCHAR *ptr; UINT start; if (reader->resumestate == XmlReadResumeState_Comment) { start = reader->resume[XmlReadResume_Body]; ptr = reader_get_ptr(reader); } else { /* skip '