Sweden-Number/dlls/dwrite/analyzer.c

896 lines
33 KiB
C
Raw Normal View History

/*
* Text analyzer
*
* Copyright 2011 Aric Stewart for CodeWeavers
* Copyright 2012, 2014 Nikolay Sivov for CodeWeavers
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
#define COBJMACROS
#include "dwrite.h"
#include "dwrite_private.h"
WINE_DEFAULT_DEBUG_CHANNEL(dwrite);
extern const unsigned short wine_linebreak_table[];
enum scriptcode {
Script_Arabic = 0,
Script_Armenian = 1,
Script_Balinese = 2,
Script_Bengali = 3,
Script_Buginese = 6,
Script_Canadian = 8,
Script_Cherokee = 11,
Script_Controls = 12,
Script_Coptic = 13,
Script_Cyrillic = 16,
Script_Devanagari = 18,
Script_Ethiopic = 19,
Script_Georgian = 20,
Script_Glagolitic = 22,
Script_Greek = 23,
Script_Gujarati = 24,
Script_Gurmukhi = 25,
Script_Hangul = 27,
Script_Hebrew = 29,
Script_Kannada = 32,
Script_Khmer = 36,
Script_Lao = 37,
Script_Latin = 38,
Script_Lepcha = 39,
Script_Limbu = 40,
Script_Malayalam = 44,
Script_Mongolian = 45,
Script_Myanmar = 46,
Script_New_TaiLue = 47,
Script_NKo = 48,
Script_Ogham = 49,
Script_OlChiki = 50,
Script_Oriya = 53,
Script_Runic = 58,
Script_Sinhala = 61,
Script_Sundanese = 62,
Script_Syriac = 64,
Script_TaiLe = 67,
Script_Tamil = 68,
Script_Telugu = 69,
Script_Thaana = 70,
Script_Thai = 71,
Script_Tibetan = 72,
Script_Tifinagh = 73,
Script_Symbol = 77,
Script_Unknown = (UINT16)-1
};
struct script_range {
UINT16 script;
DWORD first;
DWORD last;
};
static const struct script_range script_ranges[] = {
/* C0 Controls: U+0000U+001F */
/* ASCII punctuation and symbols: U+0020U+002F */
/* ASCII digits: U+0030U+0039 */
/* ASCII punctuation and symbols: U+003AU+0040 */
{ Script_Symbol, 0x00, 0x040 },
/* Latin uppercase: U+0041U+005A */
{ Script_Latin, 0x41, 0x5a },
/* ASCII punctuation and symbols: U+005BU+0060 */
{ Script_Symbol, 0x5b, 0x060 },
/* Latin lowercase: U+0061U+007A */
{ Script_Latin, 0x61, 0x7a },
/* ASCII punctuation and symbols, control char DEL: U+007BU+007F */
{ Script_Symbol, 0x7b, 0x7f },
/* C1 Controls: U+0080U+009F */
{ Script_Controls, 0x80, 0x9f },
/* Latin-1 Supplement: U+00A0U+00FF */
/* Latin Extended-A: U+0100U+017F */
/* Latin Extended-B: U+0180U+024F */
/* IPA Extensions: U+0250U+02AF */
/* Spacing Modifier Letters: U+02B0U+02FF */
{ Script_Latin, 0xa0, 0x2ff },
/* Combining Diacritical Marks: U+0300U+036F */
{ Script_Symbol, 0x300, 0x36f },
/* Greek: U+0370U+03E1 */
{ Script_Greek, 0x370, 0x3e1 },
/* Coptic: U+03E2U+03Ef */
{ Script_Coptic, 0x3e2, 0x3ef },
/* Greek: U+03F0U+03FF */
{ Script_Greek, 0x3f0, 0x3ff },
/* Cyrillic: U+0400U+04FF */
/* Cyrillic Supplement: U+0500U+052F */
/* Cyrillic Supplement range is incomplete cause it's based on Unicode 5.2
that doesn't define some Abkhaz and Azerbaijani letters, we support Unicode 6.0 range here */
{ Script_Cyrillic, 0x400, 0x52f },
/* Armenian: U+0530U+058F */
{ Script_Armenian, 0x530, 0x58f },
/* Hebrew: U+0590U+05FF */
{ Script_Hebrew, 0x590, 0x5ff },
/* Arabic: U+0600U+06FF */
{ Script_Arabic, 0x600, 0x6ff },
/* Syriac: U+0600U+06FF */
{ Script_Syriac, 0x700, 0x74f },
/* Arabic Supplement: U+0750U+077F */
{ Script_Arabic, 0x750, 0x77f },
/* Thaana: U+0780U+07BF */
{ Script_Thaana, 0x780, 0x7bf },
/* N'Ko: U+07C0U+07FF */
{ Script_NKo, 0x7c0, 0x7ff },
/* Devanagari: U+0900U+097F */
{ Script_Devanagari, 0x900, 0x97f },
/* Bengali: U+0980U+09FF */
{ Script_Bengali, 0x980, 0x9ff },
/* Gurmukhi: U+0A00U+0A7F */
{ Script_Gurmukhi, 0xa00, 0xa7f },
/* Gujarati: U+0A80U+0AFF */
{ Script_Gujarati, 0xa80, 0xaff },
/* Oriya: U+0B00U+0B7F */
{ Script_Oriya, 0xb00, 0xb7f },
/* Tamil: U+0B80U+0BFF */
{ Script_Tamil, 0xb80, 0xbff },
/* Telugu: U+0C00U+0C7F */
{ Script_Telugu, 0xc00, 0xc7f },
/* Kannada: U+0C80U+0CFF */
{ Script_Kannada, 0xc80, 0xcff },
/* Malayalam: U+0D00U+0D7F */
{ Script_Malayalam, 0xd00, 0xd7f },
/* Sinhala: U+0D80U+0DFF */
{ Script_Sinhala, 0xd80, 0xdff },
/* Thai: U+0E00U+0E7F */
{ Script_Thai, 0xe00, 0xe7f },
/* Lao: U+0E80U+0EFF */
{ Script_Lao, 0xe80, 0xeff },
/* Tibetan: U+0F00U+0FFF */
{ Script_Tibetan, 0xf00, 0xfff },
/* Myanmar: U+1000U+109F */
{ Script_Myanmar, 0x1000, 0x109f },
/* Georgian: U+10A0U+10FF */
{ Script_Georgian, 0x10a0, 0x10ff },
/* Hangul Jamo: U+1100U+11FF */
{ Script_Hangul, 0x1100, 0x11ff },
/* Ethiopic: U+1200U+137F */
/* Ethiopic Extensions: U+1380U+139F */
{ Script_Ethiopic, 0x1200, 0x139f },
/* Cherokee: U+13A0U+13FF */
{ Script_Cherokee, 0x13a0, 0x13ff },
/* Canadian Aboriginal Syllabics: U+1400U+167F */
{ Script_Canadian, 0x1400, 0x167f },
/* Ogham: U+1680U+169F */
{ Script_Ogham, 0x1680, 0x169f },
/* Runic: U+16A0U+16F0 */
{ Script_Runic, 0x16a0, 0x16f0 },
/* Khmer: U+1780U+17FF */
{ Script_Khmer, 0x1780, 0x17ff },
/* Mongolian: U+1800U+18AF */
{ Script_Mongolian, 0x1800, 0x18af },
/* Limbu: U+1900U+194F */
{ Script_Limbu, 0x1900, 0x194f },
/* Tai Le: U+1950U+197F */
{ Script_TaiLe, 0x1950, 0x197f },
/* New Tai Lue: U+1980U+19DF */
{ Script_New_TaiLue, 0x1980, 0x19df },
/* Khmer Symbols: U+19E0U+19FF */
{ Script_Khmer, 0x19e0, 0x19ff },
/* Buginese: U+1A00U+1A1F */
{ Script_Buginese, 0x1a00, 0x1a1f },
/* Tai Tham: U+1A20U+1AAF */
{ Script_Symbol, 0x1a20, 0x1aaf },
/* Balinese: U+1B00U+1B7F */
{ Script_Balinese, 0x1b00, 0x1b7f },
/* Sundanese: U+1B80U+1BBF */
{ Script_Sundanese, 0x1b80, 0x1bbf },
/* Batak: U+1BC0U+1BFF */
{ Script_Symbol, 0x1bc0, 0x1bff },
/* Lepcha: U+1C00U+1C4F */
{ Script_Lepcha, 0x1c00, 0x1c4f },
/* Ol Chiki: U+1C50U+1C7F */
{ Script_OlChiki, 0x1c50, 0x1c7f },
/* Sundanese Supplement: U+1CC0U+1CCF */
{ Script_Symbol, 0x1cc0, 0x1ccf },
/* Vedic Extensions: U+1CD0-U+1CFF */
{ Script_Devanagari, 0x1cd0, 0x1cff },
/* Phonetic Extensions: U+1D00U+1DBF */
{ Script_Latin, 0x1d00, 0x1dbf },
/* Combining Diacritical Marks Supplement: U+1DC0U+1DFF */
{ Script_Symbol, 0x1dc0, 0x1dff },
/* Latin Extended Additional: U+1E00U+1EFF */
{ Script_Latin, 0x1e00, 0x1eff },
/* Greek Extended: U+1F00U+1F00 */
{ Script_Greek, 0x1f00, 0x1fff },
/* General Punctuation: U+2000U+206f */
/* Superscripts and Subscripts: U+2070U+209f */
/* Currency Symbols: U+20A0U+20CF */
/* Combining Diacritical Marks for Symbols: U+20D0U+20FF */
/* Letterlike Symbols: U+2100U+214F */
/* Number Forms: U+2150U+218F */
/* Arrows: U+2190U+21FF */
/* Mathematical Operators: U+2200U+22FF */
/* Miscellaneous Technical: U+2300U+23FF */
/* Control Pictures: U+2400U+243F */
/* Optical Character Recognition: U+2440U+245F */
/* Enclosed Alphanumerics: U+2460U+24FF */
/* Box Drawing: U+2500U+25FF */
/* Block Elements: U+2580U+259F */
/* Geometric Shapes: U+25A0U+25FF */
/* Miscellaneous Symbols: U+2600U+26FF */
/* Dingbats: U+2700U+27BF */
/* Miscellaneous Mathematical Symbols-A: U+27C0U+27EF */
/* Supplemental Arrows-A: U+27F0U+27FF */
/* Braille Patterns: U+2800U+28FF */
/* Supplemental Arrows-B: U+2900U+297F */
/* Miscellaneous Mathematical Symbols-B: U+2980U+29FF */
/* Supplemental Mathematical Operators: U+2A00U+2AFF */
/* Miscellaneous Symbols and Arrows: U+2B00U+2BFF */
{ Script_Symbol, 0x2000, 0x2bff },
/* Glagolitic: U+2C00U+2C5F */
{ Script_Glagolitic, 0x2c00, 0x2c5f },
/* Latin Extended-C: U+2C60U+2C7F */
{ Script_Latin, 0x2c60, 0x2c7f },
/* Coptic: U+2C80U+2CFF */
{ Script_Coptic, 0x2c80, 0x2cff },
/* Georgian Supplement: U+2D00U+2D2F */
{ Script_Georgian, 0x2d00, 0x2d2f },
/* Tifinagh: U+2D30U+2D7F */
{ Script_Tifinagh, 0x2d30, 0x2d7f },
/* unsupported range */
{ Script_Unknown }
};
static UINT16 get_char_script( WCHAR c )
{
DWORD ch = c;
unsigned int i;
for (i = 0; i < sizeof(script_ranges)/sizeof(struct script_range); i++)
{
const struct script_range *range = &script_ranges[i];
if (range->script == Script_Unknown || (range->first <= ch && range->last >= ch))
return range->script;
}
return Script_Unknown;
}
static HRESULT analyze_script(const WCHAR *text, UINT32 len, IDWriteTextAnalysisSink *sink)
{
DWRITE_SCRIPT_ANALYSIS sa;
UINT32 pos, i, length;
if (!len) return S_OK;
sa.script = get_char_script(*text);
pos = 0;
length = 1;
for (i = 1; i < len; i++)
{
UINT16 script = get_char_script(text[i]);
/* Script_Latin_Symb script type is ignored when preceded or followed by another script */
if (sa.script == Script_Symbol) sa.script = script;
if (script == Script_Symbol) script = sa.script;
/* this is a length of a sequence to be reported next */
if (sa.script == script) length++;
if (sa.script != script)
{
HRESULT hr;
sa.shapes = sa.script != Script_Controls ? DWRITE_SCRIPT_SHAPES_DEFAULT : DWRITE_SCRIPT_SHAPES_NO_VISUAL;
hr = IDWriteTextAnalysisSink_SetScriptAnalysis(sink, pos, length, &sa);
if (FAILED(hr)) return hr;
pos = i;
length = 1;
sa.script = script;
}
}
/* 1 length case or normal completion call */
sa.shapes = sa.script != Script_Controls ? DWRITE_SCRIPT_SHAPES_DEFAULT : DWRITE_SCRIPT_SHAPES_NO_VISUAL;
return IDWriteTextAnalysisSink_SetScriptAnalysis(sink, pos, length, &sa);
}
struct linebreaking_state {
DWRITE_LINE_BREAKPOINT *breakpoints;
UINT32 count;
};
enum BreakConditionLocation {
BreakConditionBefore,
BreakConditionAfter
};
enum linebreaking_classes {
b_BK = 1,
b_CR,
b_LF,
b_CM,
b_SG,
b_GL,
b_CB,
b_SP,
b_ZW,
b_NL,
b_WJ,
b_JL,
b_JV,
b_JT,
b_H2,
b_H3,
b_XX,
b_OP,
b_CL,
b_CP,
b_QU,
b_NS,
b_EX,
b_SY,
b_IS,
b_PR,
b_PO,
b_NU,
b_AL,
b_ID,
b_IN,
b_HY,
b_BB,
b_BA,
b_SA,
b_AI,
b_B2,
b_HL,
b_CJ,
b_RI
};
/* "Can break" is a weak condition, stronger "may not break" and "must break" override it. Initially all conditions are
set to "can break" and could only be changed once. */
static inline void set_break_condition(UINT32 pos, enum BreakConditionLocation location, DWRITE_BREAK_CONDITION condition,
struct linebreaking_state *state)
{
if (location == BreakConditionBefore) {
if (state->breakpoints[pos].breakConditionBefore != DWRITE_BREAK_CONDITION_CAN_BREAK)
return;
state->breakpoints[pos].breakConditionBefore = condition;
if (pos > 0)
state->breakpoints[pos-1].breakConditionAfter = condition;
}
else {
if (state->breakpoints[pos].breakConditionAfter != DWRITE_BREAK_CONDITION_CAN_BREAK)
return;
state->breakpoints[pos].breakConditionAfter = condition;
if (pos + 1 < state->count)
state->breakpoints[pos+1].breakConditionBefore = condition;
}
}
static inline WCHAR get_table_entry(const unsigned short *table, WCHAR ch)
{
return table[table[table[ch >> 8] + ((ch >> 4) & 0x0f)] + (ch & 0xf)];
}
static HRESULT analyze_linebreaks(const WCHAR *text, UINT32 count, DWRITE_LINE_BREAKPOINT *breakpoints)
{
struct linebreaking_state state;
short *break_class;
int i, j;
break_class = heap_alloc(count*sizeof(short));
if (!break_class)
return E_OUTOFMEMORY;
state.breakpoints = breakpoints;
state.count = count;
/* LB31 - allow breaks everywhere. It will be overridden if needed as
other rules dictate. */
for (i = 0; i < count; i++)
{
break_class[i] = get_table_entry(wine_linebreak_table, text[i]);
breakpoints[i].breakConditionBefore = DWRITE_BREAK_CONDITION_CAN_BREAK;
breakpoints[i].breakConditionAfter = DWRITE_BREAK_CONDITION_CAN_BREAK;
breakpoints[i].isWhitespace = break_class[i] == b_BK || break_class[i] == b_ZW || break_class[i] == b_SP || isspaceW(text[i]);
breakpoints[i].isSoftHyphen = FALSE;
breakpoints[i].padding = 0;
/* LB1 - resolve some classes. TODO: use external algorithms for these classes. */
switch (break_class[i])
{
case b_AI:
case b_SA:
case b_SG:
case b_XX:
break_class[i] = b_AL;
break;
case b_CJ:
break_class[i] = b_NS;
break;
}
}
/* LB2 - never break at the start */
set_break_condition(0, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
/* LB3 - always break at the end. This one is ignored. */
for (i = 0; i < count; i++)
{
switch (break_class[i])
{
/* LB4 - LB6 */
case b_CR:
/* LB5 - don't break CR x LF */
if (i < count-1 && break_class[i+1] == b_LF)
{
set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
break;
}
case b_LF:
case b_NL:
case b_BK:
/* LB4 - LB5 - always break after hard breaks */
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MUST_BREAK, &state);
/* LB6 - do not break before hard breaks */
set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
break;
/* LB7 - do not break before spaces */
case b_SP:
set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
break;
case b_ZW:
set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
/* LB8 - break before character after zero-width space, skip spaces inbetween */
while (i < count-1 && break_class[i+1] == b_SP)
i++;
set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_CAN_BREAK, &state);
break;
}
}
/* LB9 - LB10 */
for (i = 0; i < count; i++)
{
if (break_class[i] == b_CM)
{
if (i > 0)
{
switch (break_class[i-1])
{
case b_SP:
case b_BK:
case b_CR:
case b_LF:
case b_NL:
case b_ZW:
break_class[i] = b_AL;
break;
default:
break_class[i] = break_class[i-1];
}
}
else break_class[i] = b_AL;
}
}
for (i = 0; i < count; i++)
{
switch (break_class[i])
{
/* LB11 - don't break before and after word joiner */
case b_WJ:
set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
break;
/* LB12 - don't break after glue */
case b_GL:
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
/* LB12a */
if (i > 0)
{
if (break_class[i-1] != b_SP && break_class[i-1] != b_BA && break_class[i-1] != b_HY)
set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
}
break;
/* LB13 */
case b_CL:
case b_CP:
case b_EX:
case b_IS:
case b_SY:
set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
break;
/* LB14 */
case b_OP:
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
while (i < count-1 && break_class[i+1] == b_SP) {
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
i++;
}
break;
/* LB15 */
case b_QU:
j = i+1;
while (j < count-1 && break_class[j] == b_SP)
j++;
if (break_class[j] == b_OP)
for (; j > i; j--)
set_break_condition(j, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
break;
/* LB16 */
case b_NS:
j = i-1;
while(j > 0 && break_class[j] == b_SP)
j--;
if (break_class[j] == b_CL || break_class[j] == b_CP)
for (j++; j <= i; j++)
set_break_condition(j, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
break;
/* LB17 */
case b_B2:
j = i+1;
while (j < count && break_class[j] == b_SP)
j++;
if (break_class[j] == b_B2)
for (; j > i; j--)
set_break_condition(j, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
break;
}
}
for (i = 0; i < count; i++)
{
switch(break_class[i])
{
/* LB18 - break is allowed after space */
case b_SP:
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_CAN_BREAK, &state);
break;
/* LB19 - don't break before or after quotation mark */
case b_QU:
set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
break;
/* LB20 */
case b_CB:
set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_CAN_BREAK, &state);
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_CAN_BREAK, &state);
break;
/* LB21 */
case b_BA:
case b_HY:
case b_NS:
set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
break;
case b_BB:
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
break;
/* LB21a */
case b_HL:
if (i < count-2)
switch (break_class[i+1])
{
case b_HY:
case b_BA:
set_break_condition(i+1, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
}
break;
/* LB22 */
case b_IN:
if (i > 0)
{
switch (break_class[i-1])
{
case b_AL:
case b_HL:
case b_ID:
case b_IN:
case b_NU:
set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
}
}
break;
}
if (i < count-1)
{
/* LB23 */
if ((break_class[i] == b_ID && break_class[i+1] == b_PO) ||
(break_class[i] == b_AL && break_class[i+1] == b_NU) ||
(break_class[i] == b_HL && break_class[i+1] == b_NU) ||
(break_class[i] == b_NU && break_class[i+1] == b_AL) ||
(break_class[i] == b_NU && break_class[i+1] == b_HL))
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
/* LB24 */
if ((break_class[i] == b_PR && break_class[i+1] == b_ID) ||
(break_class[i] == b_PR && break_class[i+1] == b_AL) ||
(break_class[i] == b_PR && break_class[i+1] == b_HL) ||
(break_class[i] == b_PO && break_class[i+1] == b_AL) ||
(break_class[i] == b_PO && break_class[i+1] == b_HL))
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
/* LB25 */
if ((break_class[i] == b_CL && break_class[i+1] == b_PO) ||
(break_class[i] == b_CP && break_class[i+1] == b_PO) ||
(break_class[i] == b_CL && break_class[i+1] == b_PR) ||
(break_class[i] == b_CP && break_class[i+1] == b_PR) ||
(break_class[i] == b_NU && break_class[i+1] == b_PO) ||
(break_class[i] == b_NU && break_class[i+1] == b_PR) ||
(break_class[i] == b_PO && break_class[i+1] == b_OP) ||
(break_class[i] == b_PO && break_class[i+1] == b_NU) ||
(break_class[i] == b_PR && break_class[i+1] == b_OP) ||
(break_class[i] == b_PR && break_class[i+1] == b_NU) ||
(break_class[i] == b_HY && break_class[i+1] == b_NU) ||
(break_class[i] == b_IS && break_class[i+1] == b_NU) ||
(break_class[i] == b_NU && break_class[i+1] == b_NU) ||
(break_class[i] == b_SY && break_class[i+1] == b_NU))
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
/* LB26 */
if (break_class[i] == b_JL)
{
switch (break_class[i+1])
{
case b_JL:
case b_JV:
case b_H2:
case b_H3:
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
}
}
if ((break_class[i] == b_JV || break_class[i] == b_H2) &&
(break_class[i+1] == b_JV || break_class[i+1] == b_JT))
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
if ((break_class[i] == b_JT || break_class[i] == b_H3) &&
break_class[i+1] == b_JT)
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
/* LB27 */
switch (break_class[i])
{
case b_JL:
case b_JV:
case b_JT:
case b_H2:
case b_H3:
if (break_class[i+1] == b_IN || break_class[i+1] == b_PO)
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
}
if (break_class[i] == b_PO)
{
switch (break_class[i+1])
{
case b_JL:
case b_JV:
case b_JT:
case b_H2:
case b_H3:
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
}
}
/* LB28 */
if ((break_class[i] == b_AL && break_class[i+1] == b_AL) ||
(break_class[i] == b_AL && break_class[i+1] == b_HL) ||
(break_class[i] == b_HL && break_class[i+1] == b_AL) ||
(break_class[i] == b_HL && break_class[i+1] == b_HL))
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
/* LB29 */
if ((break_class[i] == b_IS && break_class[i+1] == b_AL) ||
(break_class[i] == b_IS && break_class[i+1] == b_HL))
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
/* LB30 */
if ((break_class[i] == b_AL || break_class[i] == b_HL || break_class[i] == b_NU) &&
break_class[i+1] == b_OP)
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
if (break_class[i] == b_CP &&
(break_class[i+1] == b_AL || break_class[i] == b_HL || break_class[i] == b_NU))
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
/* LB30a */
if (break_class[i] == b_RI && break_class[i+1] == b_RI)
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
}
}
heap_free(break_class);
return S_OK;
}
static HRESULT WINAPI dwritetextanalyzer_QueryInterface(IDWriteTextAnalyzer *iface, REFIID riid, void **obj)
{
TRACE("(%s %p)\n", debugstr_guid(riid), obj);
if (IsEqualIID(riid, &IID_IUnknown) || IsEqualIID(riid, &IID_IDWriteTextAnalyzer))
{
*obj = iface;
return S_OK;
}
*obj = NULL;
return E_NOINTERFACE;
}
static ULONG WINAPI dwritetextanalyzer_AddRef(IDWriteTextAnalyzer *iface)
{
return 2;
}
static ULONG WINAPI dwritetextanalyzer_Release(IDWriteTextAnalyzer *iface)
{
return 1;
}
static HRESULT WINAPI dwritetextanalyzer_AnalyzeScript(IDWriteTextAnalyzer *iface,
IDWriteTextAnalysisSource* source, UINT32 position, UINT32 length, IDWriteTextAnalysisSink* sink)
{
const WCHAR *text;
HRESULT hr;
UINT32 len;
TRACE("(%p %u %u %p)\n", source, position, length, sink);
hr = IDWriteTextAnalysisSource_GetTextAtPosition(source, position, &text, &len);
if (FAILED(hr)) return hr;
return analyze_script(text, len, sink);
}
static HRESULT WINAPI dwritetextanalyzer_AnalyzeBidi(IDWriteTextAnalyzer *iface,
IDWriteTextAnalysisSource* source, UINT32 position, UINT32 length, IDWriteTextAnalysisSink* sink)
{
FIXME("(%p %u %u %p): stub\n", source, position, length, sink);
return E_NOTIMPL;
}
static HRESULT WINAPI dwritetextanalyzer_AnalyzeNumberSubstitution(IDWriteTextAnalyzer *iface,
IDWriteTextAnalysisSource* source, UINT32 position, UINT32 length, IDWriteTextAnalysisSink* sink)
{
FIXME("(%p %u %u %p): stub\n", source, position, length, sink);
return E_NOTIMPL;
}
static HRESULT WINAPI dwritetextanalyzer_AnalyzeLineBreakpoints(IDWriteTextAnalyzer *iface,
IDWriteTextAnalysisSource* source, UINT32 position, UINT32 length, IDWriteTextAnalysisSink* sink)
{
DWRITE_LINE_BREAKPOINT *breakpoints = NULL;
WCHAR *buff = NULL;
const WCHAR *text;
HRESULT hr;
UINT32 len;
TRACE("(%p %u %u %p)\n", source, position, length, sink);
if (length == 0)
return S_OK;
/* get some, check for length */
text = NULL;
len = 0;
hr = IDWriteTextAnalysisSource_GetTextAtPosition(source, position, &text, &len);
if (FAILED(hr)) return hr;
if (len < length) {
UINT32 read;
buff = heap_alloc(length*sizeof(WCHAR));
if (!buff)
return E_OUTOFMEMORY;
memcpy(buff, text, len*sizeof(WCHAR));
read = len;
while (read < length && text) {
text = NULL;
len = 0;
hr = IDWriteTextAnalysisSource_GetTextAtPosition(source, read, &text, &len);
if (FAILED(hr))
goto done;
memcpy(&buff[read], text, min(len, length-read)*sizeof(WCHAR));
read += len;
}
text = buff;
}
breakpoints = heap_alloc(length*sizeof(*breakpoints));
if (!breakpoints) {
hr = E_OUTOFMEMORY;
goto done;
}
hr = analyze_linebreaks(text, length, breakpoints);
if (FAILED(hr))
goto done;
hr = IDWriteTextAnalysisSink_SetLineBreakpoints(sink, position, length, breakpoints);
done:
heap_free(breakpoints);
heap_free(buff);
return hr;
}
static HRESULT WINAPI dwritetextanalyzer_GetGlyphs(IDWriteTextAnalyzer *iface,
WCHAR const* text, UINT32 length, IDWriteFontFace* font_face, BOOL is_sideways,
BOOL is_rtl, DWRITE_SCRIPT_ANALYSIS const* analysis, WCHAR const* locale,
IDWriteNumberSubstitution* substitution, DWRITE_TYPOGRAPHIC_FEATURES const** features,
UINT32 const* feature_range_len, UINT32 feature_ranges, UINT32 max_glyph_count,
UINT16* clustermap, DWRITE_SHAPING_TEXT_PROPERTIES* text_props, UINT16* glyph_indices,
DWRITE_SHAPING_GLYPH_PROPERTIES* glyph_props, UINT32* actual_glyph_count)
{
FIXME("(%s:%u %p %d %d %p %s %p %p %p %u %u %p %p %p %p %p): stub\n", debugstr_wn(text, length),
length, font_face, is_sideways, is_rtl, analysis, debugstr_w(locale), substitution, features, feature_range_len,
feature_ranges, max_glyph_count, clustermap, text_props, glyph_indices, glyph_props, actual_glyph_count);
return E_NOTIMPL;
}
static HRESULT WINAPI dwritetextanalyzer_GetGlyphPlacements(IDWriteTextAnalyzer *iface,
WCHAR const* text, UINT16 const* clustermap, DWRITE_SHAPING_TEXT_PROPERTIES* props,
UINT32 text_len, UINT16 const* glyph_indices, DWRITE_SHAPING_GLYPH_PROPERTIES const* glyph_props,
UINT32 glyph_count, IDWriteFontFace * font_face, FLOAT fontEmSize, BOOL is_sideways, BOOL is_rtl,
DWRITE_SCRIPT_ANALYSIS const* analysis, WCHAR const* locale, DWRITE_TYPOGRAPHIC_FEATURES const** features,
UINT32 const* feature_range_len, UINT32 feature_ranges, FLOAT* glyph_advances, DWRITE_GLYPH_OFFSET* glyph_offsets)
{
FIXME("(%s %p %p %u %p %p %u %p %f %d %d %p %s %p %p %u %p %p): stub\n", debugstr_w(text),
clustermap, props, text_len, glyph_indices, glyph_props, glyph_count, font_face, fontEmSize, is_sideways,
is_rtl, analysis, debugstr_w(locale), features, feature_range_len, feature_ranges, glyph_advances, glyph_offsets);
return E_NOTIMPL;
}
static HRESULT WINAPI dwritetextanalyzer_GetGdiCompatibleGlyphPlacements(IDWriteTextAnalyzer *iface,
WCHAR const* text, UINT16 const* clustermap, DWRITE_SHAPING_TEXT_PROPERTIES* props,
UINT32 text_len, UINT16 const* glyph_indices, DWRITE_SHAPING_GLYPH_PROPERTIES const* glyph_props,
UINT32 glyph_count, IDWriteFontFace * font_face, FLOAT fontEmSize, FLOAT pixels_per_dip,
DWRITE_MATRIX const* transform, BOOL use_gdi_natural, BOOL is_sideways, BOOL is_rtl,
DWRITE_SCRIPT_ANALYSIS const* analysis, WCHAR const* locale, DWRITE_TYPOGRAPHIC_FEATURES const** features,
UINT32 const* feature_range_lengths, UINT32 feature_ranges, FLOAT* glyph_advances, DWRITE_GLYPH_OFFSET* glyph_offsets)
{
FIXME("(%s %p %p %u %p %p %u %p %f %f %p %d %d %d %p %s %p %p %u %p %p): stub\n", debugstr_w(text),
clustermap, props, text_len, glyph_indices, glyph_props, glyph_count, font_face, fontEmSize, pixels_per_dip,
transform, use_gdi_natural, is_sideways, is_rtl, analysis, debugstr_w(locale), features, feature_range_lengths,
feature_ranges, glyph_advances, glyph_offsets);
return E_NOTIMPL;
}
static const struct IDWriteTextAnalyzerVtbl textanalyzervtbl = {
dwritetextanalyzer_QueryInterface,
dwritetextanalyzer_AddRef,
dwritetextanalyzer_Release,
dwritetextanalyzer_AnalyzeScript,
dwritetextanalyzer_AnalyzeBidi,
dwritetextanalyzer_AnalyzeNumberSubstitution,
dwritetextanalyzer_AnalyzeLineBreakpoints,
dwritetextanalyzer_GetGlyphs,
dwritetextanalyzer_GetGlyphPlacements,
dwritetextanalyzer_GetGdiCompatibleGlyphPlacements
};
static IDWriteTextAnalyzer textanalyzer = { &textanalyzervtbl };
HRESULT get_textanalyzer(IDWriteTextAnalyzer **ret)
{
*ret = &textanalyzer;
return S_OK;
}