Sweden-Number/dlls/dwrite/analyzer.c

896 lines
33 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Text analyzer
*
* Copyright 2011 Aric Stewart for CodeWeavers
* Copyright 2012, 2014 Nikolay Sivov for CodeWeavers
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
#define COBJMACROS
#include "dwrite.h"
#include "dwrite_private.h"
WINE_DEFAULT_DEBUG_CHANNEL(dwrite);
extern const unsigned short wine_linebreak_table[];
enum scriptcode {
Script_Arabic = 0,
Script_Armenian = 1,
Script_Balinese = 2,
Script_Bengali = 3,
Script_Buginese = 6,
Script_Canadian = 8,
Script_Cherokee = 11,
Script_Controls = 12,
Script_Coptic = 13,
Script_Cyrillic = 16,
Script_Devanagari = 18,
Script_Ethiopic = 19,
Script_Georgian = 20,
Script_Glagolitic = 22,
Script_Greek = 23,
Script_Gujarati = 24,
Script_Gurmukhi = 25,
Script_Hangul = 27,
Script_Hebrew = 29,
Script_Kannada = 32,
Script_Khmer = 36,
Script_Lao = 37,
Script_Latin = 38,
Script_Lepcha = 39,
Script_Limbu = 40,
Script_Malayalam = 44,
Script_Mongolian = 45,
Script_Myanmar = 46,
Script_New_TaiLue = 47,
Script_NKo = 48,
Script_Ogham = 49,
Script_OlChiki = 50,
Script_Oriya = 53,
Script_Runic = 58,
Script_Sinhala = 61,
Script_Sundanese = 62,
Script_Syriac = 64,
Script_TaiLe = 67,
Script_Tamil = 68,
Script_Telugu = 69,
Script_Thaana = 70,
Script_Thai = 71,
Script_Tibetan = 72,
Script_Tifinagh = 73,
Script_Symbol = 77,
Script_Unknown = (UINT16)-1
};
struct script_range {
UINT16 script;
DWORD first;
DWORD last;
};
static const struct script_range script_ranges[] = {
/* C0 Controls: U+0000U+001F */
/* ASCII punctuation and symbols: U+0020U+002F */
/* ASCII digits: U+0030U+0039 */
/* ASCII punctuation and symbols: U+003AU+0040 */
{ Script_Symbol, 0x00, 0x040 },
/* Latin uppercase: U+0041U+005A */
{ Script_Latin, 0x41, 0x5a },
/* ASCII punctuation and symbols: U+005BU+0060 */
{ Script_Symbol, 0x5b, 0x060 },
/* Latin lowercase: U+0061U+007A */
{ Script_Latin, 0x61, 0x7a },
/* ASCII punctuation and symbols, control char DEL: U+007BU+007F */
{ Script_Symbol, 0x7b, 0x7f },
/* C1 Controls: U+0080U+009F */
{ Script_Controls, 0x80, 0x9f },
/* Latin-1 Supplement: U+00A0U+00FF */
/* Latin Extended-A: U+0100U+017F */
/* Latin Extended-B: U+0180U+024F */
/* IPA Extensions: U+0250U+02AF */
/* Spacing Modifier Letters: U+02B0U+02FF */
{ Script_Latin, 0xa0, 0x2ff },
/* Combining Diacritical Marks: U+0300U+036F */
{ Script_Symbol, 0x300, 0x36f },
/* Greek: U+0370U+03E1 */
{ Script_Greek, 0x370, 0x3e1 },
/* Coptic: U+03E2U+03Ef */
{ Script_Coptic, 0x3e2, 0x3ef },
/* Greek: U+03F0U+03FF */
{ Script_Greek, 0x3f0, 0x3ff },
/* Cyrillic: U+0400U+04FF */
/* Cyrillic Supplement: U+0500U+052F */
/* Cyrillic Supplement range is incomplete cause it's based on Unicode 5.2
that doesn't define some Abkhaz and Azerbaijani letters, we support Unicode 6.0 range here */
{ Script_Cyrillic, 0x400, 0x52f },
/* Armenian: U+0530U+058F */
{ Script_Armenian, 0x530, 0x58f },
/* Hebrew: U+0590U+05FF */
{ Script_Hebrew, 0x590, 0x5ff },
/* Arabic: U+0600U+06FF */
{ Script_Arabic, 0x600, 0x6ff },
/* Syriac: U+0600U+06FF */
{ Script_Syriac, 0x700, 0x74f },
/* Arabic Supplement: U+0750U+077F */
{ Script_Arabic, 0x750, 0x77f },
/* Thaana: U+0780U+07BF */
{ Script_Thaana, 0x780, 0x7bf },
/* N'Ko: U+07C0U+07FF */
{ Script_NKo, 0x7c0, 0x7ff },
/* Devanagari: U+0900U+097F */
{ Script_Devanagari, 0x900, 0x97f },
/* Bengali: U+0980U+09FF */
{ Script_Bengali, 0x980, 0x9ff },
/* Gurmukhi: U+0A00U+0A7F */
{ Script_Gurmukhi, 0xa00, 0xa7f },
/* Gujarati: U+0A80U+0AFF */
{ Script_Gujarati, 0xa80, 0xaff },
/* Oriya: U+0B00U+0B7F */
{ Script_Oriya, 0xb00, 0xb7f },
/* Tamil: U+0B80U+0BFF */
{ Script_Tamil, 0xb80, 0xbff },
/* Telugu: U+0C00U+0C7F */
{ Script_Telugu, 0xc00, 0xc7f },
/* Kannada: U+0C80U+0CFF */
{ Script_Kannada, 0xc80, 0xcff },
/* Malayalam: U+0D00U+0D7F */
{ Script_Malayalam, 0xd00, 0xd7f },
/* Sinhala: U+0D80U+0DFF */
{ Script_Sinhala, 0xd80, 0xdff },
/* Thai: U+0E00U+0E7F */
{ Script_Thai, 0xe00, 0xe7f },
/* Lao: U+0E80U+0EFF */
{ Script_Lao, 0xe80, 0xeff },
/* Tibetan: U+0F00U+0FFF */
{ Script_Tibetan, 0xf00, 0xfff },
/* Myanmar: U+1000U+109F */
{ Script_Myanmar, 0x1000, 0x109f },
/* Georgian: U+10A0U+10FF */
{ Script_Georgian, 0x10a0, 0x10ff },
/* Hangul Jamo: U+1100U+11FF */
{ Script_Hangul, 0x1100, 0x11ff },
/* Ethiopic: U+1200U+137F */
/* Ethiopic Extensions: U+1380U+139F */
{ Script_Ethiopic, 0x1200, 0x139f },
/* Cherokee: U+13A0U+13FF */
{ Script_Cherokee, 0x13a0, 0x13ff },
/* Canadian Aboriginal Syllabics: U+1400U+167F */
{ Script_Canadian, 0x1400, 0x167f },
/* Ogham: U+1680U+169F */
{ Script_Ogham, 0x1680, 0x169f },
/* Runic: U+16A0U+16F0 */
{ Script_Runic, 0x16a0, 0x16f0 },
/* Khmer: U+1780U+17FF */
{ Script_Khmer, 0x1780, 0x17ff },
/* Mongolian: U+1800U+18AF */
{ Script_Mongolian, 0x1800, 0x18af },
/* Limbu: U+1900U+194F */
{ Script_Limbu, 0x1900, 0x194f },
/* Tai Le: U+1950U+197F */
{ Script_TaiLe, 0x1950, 0x197f },
/* New Tai Lue: U+1980U+19DF */
{ Script_New_TaiLue, 0x1980, 0x19df },
/* Khmer Symbols: U+19E0U+19FF */
{ Script_Khmer, 0x19e0, 0x19ff },
/* Buginese: U+1A00U+1A1F */
{ Script_Buginese, 0x1a00, 0x1a1f },
/* Tai Tham: U+1A20U+1AAF */
{ Script_Symbol, 0x1a20, 0x1aaf },
/* Balinese: U+1B00U+1B7F */
{ Script_Balinese, 0x1b00, 0x1b7f },
/* Sundanese: U+1B80U+1BBF */
{ Script_Sundanese, 0x1b80, 0x1bbf },
/* Batak: U+1BC0U+1BFF */
{ Script_Symbol, 0x1bc0, 0x1bff },
/* Lepcha: U+1C00U+1C4F */
{ Script_Lepcha, 0x1c00, 0x1c4f },
/* Ol Chiki: U+1C50U+1C7F */
{ Script_OlChiki, 0x1c50, 0x1c7f },
/* Sundanese Supplement: U+1CC0U+1CCF */
{ Script_Symbol, 0x1cc0, 0x1ccf },
/* Vedic Extensions: U+1CD0-U+1CFF */
{ Script_Devanagari, 0x1cd0, 0x1cff },
/* Phonetic Extensions: U+1D00U+1DBF */
{ Script_Latin, 0x1d00, 0x1dbf },
/* Combining Diacritical Marks Supplement: U+1DC0U+1DFF */
{ Script_Symbol, 0x1dc0, 0x1dff },
/* Latin Extended Additional: U+1E00U+1EFF */
{ Script_Latin, 0x1e00, 0x1eff },
/* Greek Extended: U+1F00U+1F00 */
{ Script_Greek, 0x1f00, 0x1fff },
/* General Punctuation: U+2000U+206f */
/* Superscripts and Subscripts: U+2070U+209f */
/* Currency Symbols: U+20A0U+20CF */
/* Combining Diacritical Marks for Symbols: U+20D0U+20FF */
/* Letterlike Symbols: U+2100U+214F */
/* Number Forms: U+2150U+218F */
/* Arrows: U+2190U+21FF */
/* Mathematical Operators: U+2200U+22FF */
/* Miscellaneous Technical: U+2300U+23FF */
/* Control Pictures: U+2400U+243F */
/* Optical Character Recognition: U+2440U+245F */
/* Enclosed Alphanumerics: U+2460U+24FF */
/* Box Drawing: U+2500U+25FF */
/* Block Elements: U+2580U+259F */
/* Geometric Shapes: U+25A0U+25FF */
/* Miscellaneous Symbols: U+2600U+26FF */
/* Dingbats: U+2700U+27BF */
/* Miscellaneous Mathematical Symbols-A: U+27C0U+27EF */
/* Supplemental Arrows-A: U+27F0U+27FF */
/* Braille Patterns: U+2800U+28FF */
/* Supplemental Arrows-B: U+2900U+297F */
/* Miscellaneous Mathematical Symbols-B: U+2980U+29FF */
/* Supplemental Mathematical Operators: U+2A00U+2AFF */
/* Miscellaneous Symbols and Arrows: U+2B00U+2BFF */
{ Script_Symbol, 0x2000, 0x2bff },
/* Glagolitic: U+2C00U+2C5F */
{ Script_Glagolitic, 0x2c00, 0x2c5f },
/* Latin Extended-C: U+2C60U+2C7F */
{ Script_Latin, 0x2c60, 0x2c7f },
/* Coptic: U+2C80U+2CFF */
{ Script_Coptic, 0x2c80, 0x2cff },
/* Georgian Supplement: U+2D00U+2D2F */
{ Script_Georgian, 0x2d00, 0x2d2f },
/* Tifinagh: U+2D30U+2D7F */
{ Script_Tifinagh, 0x2d30, 0x2d7f },
/* unsupported range */
{ Script_Unknown }
};
static UINT16 get_char_script( WCHAR c )
{
DWORD ch = c;
unsigned int i;
for (i = 0; i < sizeof(script_ranges)/sizeof(struct script_range); i++)
{
const struct script_range *range = &script_ranges[i];
if (range->script == Script_Unknown || (range->first <= ch && range->last >= ch))
return range->script;
}
return Script_Unknown;
}
static HRESULT analyze_script(const WCHAR *text, UINT32 len, IDWriteTextAnalysisSink *sink)
{
DWRITE_SCRIPT_ANALYSIS sa;
UINT32 pos, i, length;
if (!len) return S_OK;
sa.script = get_char_script(*text);
pos = 0;
length = 1;
for (i = 1; i < len; i++)
{
UINT16 script = get_char_script(text[i]);
/* Script_Latin_Symb script type is ignored when preceded or followed by another script */
if (sa.script == Script_Symbol) sa.script = script;
if (script == Script_Symbol) script = sa.script;
/* this is a length of a sequence to be reported next */
if (sa.script == script) length++;
if (sa.script != script)
{
HRESULT hr;
sa.shapes = sa.script != Script_Controls ? DWRITE_SCRIPT_SHAPES_DEFAULT : DWRITE_SCRIPT_SHAPES_NO_VISUAL;
hr = IDWriteTextAnalysisSink_SetScriptAnalysis(sink, pos, length, &sa);
if (FAILED(hr)) return hr;
pos = i;
length = 1;
sa.script = script;
}
}
/* 1 length case or normal completion call */
sa.shapes = sa.script != Script_Controls ? DWRITE_SCRIPT_SHAPES_DEFAULT : DWRITE_SCRIPT_SHAPES_NO_VISUAL;
return IDWriteTextAnalysisSink_SetScriptAnalysis(sink, pos, length, &sa);
}
struct linebreaking_state {
DWRITE_LINE_BREAKPOINT *breakpoints;
UINT32 count;
};
enum BreakConditionLocation {
BreakConditionBefore,
BreakConditionAfter
};
enum linebreaking_classes {
b_BK = 1,
b_CR,
b_LF,
b_CM,
b_SG,
b_GL,
b_CB,
b_SP,
b_ZW,
b_NL,
b_WJ,
b_JL,
b_JV,
b_JT,
b_H2,
b_H3,
b_XX,
b_OP,
b_CL,
b_CP,
b_QU,
b_NS,
b_EX,
b_SY,
b_IS,
b_PR,
b_PO,
b_NU,
b_AL,
b_ID,
b_IN,
b_HY,
b_BB,
b_BA,
b_SA,
b_AI,
b_B2,
b_HL,
b_CJ,
b_RI
};
/* "Can break" is a weak condition, stronger "may not break" and "must break" override it. Initially all conditions are
set to "can break" and could only be changed once. */
static inline void set_break_condition(UINT32 pos, enum BreakConditionLocation location, DWRITE_BREAK_CONDITION condition,
struct linebreaking_state *state)
{
if (location == BreakConditionBefore) {
if (state->breakpoints[pos].breakConditionBefore != DWRITE_BREAK_CONDITION_CAN_BREAK)
return;
state->breakpoints[pos].breakConditionBefore = condition;
if (pos > 0)
state->breakpoints[pos-1].breakConditionAfter = condition;
}
else {
if (state->breakpoints[pos].breakConditionAfter != DWRITE_BREAK_CONDITION_CAN_BREAK)
return;
state->breakpoints[pos].breakConditionAfter = condition;
if (pos + 1 < state->count)
state->breakpoints[pos+1].breakConditionBefore = condition;
}
}
static inline WCHAR get_table_entry(const unsigned short *table, WCHAR ch)
{
return table[table[table[ch >> 8] + ((ch >> 4) & 0x0f)] + (ch & 0xf)];
}
static HRESULT analyze_linebreaks(const WCHAR *text, UINT32 count, DWRITE_LINE_BREAKPOINT *breakpoints)
{
struct linebreaking_state state;
short *break_class;
int i, j;
break_class = heap_alloc(count*sizeof(short));
if (!break_class)
return E_OUTOFMEMORY;
state.breakpoints = breakpoints;
state.count = count;
/* LB31 - allow breaks everywhere. It will be overridden if needed as
other rules dictate. */
for (i = 0; i < count; i++)
{
break_class[i] = get_table_entry(wine_linebreak_table, text[i]);
breakpoints[i].breakConditionBefore = DWRITE_BREAK_CONDITION_CAN_BREAK;
breakpoints[i].breakConditionAfter = DWRITE_BREAK_CONDITION_CAN_BREAK;
breakpoints[i].isWhitespace = break_class[i] == b_BK || break_class[i] == b_ZW || break_class[i] == b_SP || isspaceW(text[i]);
breakpoints[i].isSoftHyphen = FALSE;
breakpoints[i].padding = 0;
/* LB1 - resolve some classes. TODO: use external algorithms for these classes. */
switch (break_class[i])
{
case b_AI:
case b_SA:
case b_SG:
case b_XX:
break_class[i] = b_AL;
break;
case b_CJ:
break_class[i] = b_NS;
break;
}
}
/* LB2 - never break at the start */
set_break_condition(0, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
/* LB3 - always break at the end. This one is ignored. */
for (i = 0; i < count; i++)
{
switch (break_class[i])
{
/* LB4 - LB6 */
case b_CR:
/* LB5 - don't break CR x LF */
if (i < count-1 && break_class[i+1] == b_LF)
{
set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
break;
}
case b_LF:
case b_NL:
case b_BK:
/* LB4 - LB5 - always break after hard breaks */
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MUST_BREAK, &state);
/* LB6 - do not break before hard breaks */
set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
break;
/* LB7 - do not break before spaces */
case b_SP:
set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
break;
case b_ZW:
set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
/* LB8 - break before character after zero-width space, skip spaces inbetween */
while (i < count-1 && break_class[i+1] == b_SP)
i++;
set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_CAN_BREAK, &state);
break;
}
}
/* LB9 - LB10 */
for (i = 0; i < count; i++)
{
if (break_class[i] == b_CM)
{
if (i > 0)
{
switch (break_class[i-1])
{
case b_SP:
case b_BK:
case b_CR:
case b_LF:
case b_NL:
case b_ZW:
break_class[i] = b_AL;
break;
default:
break_class[i] = break_class[i-1];
}
}
else break_class[i] = b_AL;
}
}
for (i = 0; i < count; i++)
{
switch (break_class[i])
{
/* LB11 - don't break before and after word joiner */
case b_WJ:
set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
break;
/* LB12 - don't break after glue */
case b_GL:
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
/* LB12a */
if (i > 0)
{
if (break_class[i-1] != b_SP && break_class[i-1] != b_BA && break_class[i-1] != b_HY)
set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
}
break;
/* LB13 */
case b_CL:
case b_CP:
case b_EX:
case b_IS:
case b_SY:
set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
break;
/* LB14 */
case b_OP:
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
while (i < count-1 && break_class[i+1] == b_SP) {
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
i++;
}
break;
/* LB15 */
case b_QU:
j = i+1;
while (j < count-1 && break_class[j] == b_SP)
j++;
if (break_class[j] == b_OP)
for (; j > i; j--)
set_break_condition(j, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
break;
/* LB16 */
case b_NS:
j = i-1;
while(j > 0 && break_class[j] == b_SP)
j--;
if (break_class[j] == b_CL || break_class[j] == b_CP)
for (j++; j <= i; j++)
set_break_condition(j, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
break;
/* LB17 */
case b_B2:
j = i+1;
while (j < count && break_class[j] == b_SP)
j++;
if (break_class[j] == b_B2)
for (; j > i; j--)
set_break_condition(j, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
break;
}
}
for (i = 0; i < count; i++)
{
switch(break_class[i])
{
/* LB18 - break is allowed after space */
case b_SP:
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_CAN_BREAK, &state);
break;
/* LB19 - don't break before or after quotation mark */
case b_QU:
set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
break;
/* LB20 */
case b_CB:
set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_CAN_BREAK, &state);
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_CAN_BREAK, &state);
break;
/* LB21 */
case b_BA:
case b_HY:
case b_NS:
set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
break;
case b_BB:
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
break;
/* LB21a */
case b_HL:
if (i < count-2)
switch (break_class[i+1])
{
case b_HY:
case b_BA:
set_break_condition(i+1, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
}
break;
/* LB22 */
case b_IN:
if (i > 0)
{
switch (break_class[i-1])
{
case b_AL:
case b_HL:
case b_ID:
case b_IN:
case b_NU:
set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
}
}
break;
}
if (i < count-1)
{
/* LB23 */
if ((break_class[i] == b_ID && break_class[i+1] == b_PO) ||
(break_class[i] == b_AL && break_class[i+1] == b_NU) ||
(break_class[i] == b_HL && break_class[i+1] == b_NU) ||
(break_class[i] == b_NU && break_class[i+1] == b_AL) ||
(break_class[i] == b_NU && break_class[i+1] == b_HL))
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
/* LB24 */
if ((break_class[i] == b_PR && break_class[i+1] == b_ID) ||
(break_class[i] == b_PR && break_class[i+1] == b_AL) ||
(break_class[i] == b_PR && break_class[i+1] == b_HL) ||
(break_class[i] == b_PO && break_class[i+1] == b_AL) ||
(break_class[i] == b_PO && break_class[i+1] == b_HL))
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
/* LB25 */
if ((break_class[i] == b_CL && break_class[i+1] == b_PO) ||
(break_class[i] == b_CP && break_class[i+1] == b_PO) ||
(break_class[i] == b_CL && break_class[i+1] == b_PR) ||
(break_class[i] == b_CP && break_class[i+1] == b_PR) ||
(break_class[i] == b_NU && break_class[i+1] == b_PO) ||
(break_class[i] == b_NU && break_class[i+1] == b_PR) ||
(break_class[i] == b_PO && break_class[i+1] == b_OP) ||
(break_class[i] == b_PO && break_class[i+1] == b_NU) ||
(break_class[i] == b_PR && break_class[i+1] == b_OP) ||
(break_class[i] == b_PR && break_class[i+1] == b_NU) ||
(break_class[i] == b_HY && break_class[i+1] == b_NU) ||
(break_class[i] == b_IS && break_class[i+1] == b_NU) ||
(break_class[i] == b_NU && break_class[i+1] == b_NU) ||
(break_class[i] == b_SY && break_class[i+1] == b_NU))
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
/* LB26 */
if (break_class[i] == b_JL)
{
switch (break_class[i+1])
{
case b_JL:
case b_JV:
case b_H2:
case b_H3:
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
}
}
if ((break_class[i] == b_JV || break_class[i] == b_H2) &&
(break_class[i+1] == b_JV || break_class[i+1] == b_JT))
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
if ((break_class[i] == b_JT || break_class[i] == b_H3) &&
break_class[i+1] == b_JT)
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
/* LB27 */
switch (break_class[i])
{
case b_JL:
case b_JV:
case b_JT:
case b_H2:
case b_H3:
if (break_class[i+1] == b_IN || break_class[i+1] == b_PO)
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
}
if (break_class[i] == b_PO)
{
switch (break_class[i+1])
{
case b_JL:
case b_JV:
case b_JT:
case b_H2:
case b_H3:
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
}
}
/* LB28 */
if ((break_class[i] == b_AL && break_class[i+1] == b_AL) ||
(break_class[i] == b_AL && break_class[i+1] == b_HL) ||
(break_class[i] == b_HL && break_class[i+1] == b_AL) ||
(break_class[i] == b_HL && break_class[i+1] == b_HL))
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
/* LB29 */
if ((break_class[i] == b_IS && break_class[i+1] == b_AL) ||
(break_class[i] == b_IS && break_class[i+1] == b_HL))
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
/* LB30 */
if ((break_class[i] == b_AL || break_class[i] == b_HL || break_class[i] == b_NU) &&
break_class[i+1] == b_OP)
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
if (break_class[i] == b_CP &&
(break_class[i+1] == b_AL || break_class[i] == b_HL || break_class[i] == b_NU))
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
/* LB30a */
if (break_class[i] == b_RI && break_class[i+1] == b_RI)
set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
}
}
heap_free(break_class);
return S_OK;
}
static HRESULT WINAPI dwritetextanalyzer_QueryInterface(IDWriteTextAnalyzer *iface, REFIID riid, void **obj)
{
TRACE("(%s %p)\n", debugstr_guid(riid), obj);
if (IsEqualIID(riid, &IID_IUnknown) || IsEqualIID(riid, &IID_IDWriteTextAnalyzer))
{
*obj = iface;
return S_OK;
}
*obj = NULL;
return E_NOINTERFACE;
}
static ULONG WINAPI dwritetextanalyzer_AddRef(IDWriteTextAnalyzer *iface)
{
return 2;
}
static ULONG WINAPI dwritetextanalyzer_Release(IDWriteTextAnalyzer *iface)
{
return 1;
}
static HRESULT WINAPI dwritetextanalyzer_AnalyzeScript(IDWriteTextAnalyzer *iface,
IDWriteTextAnalysisSource* source, UINT32 position, UINT32 length, IDWriteTextAnalysisSink* sink)
{
const WCHAR *text;
HRESULT hr;
UINT32 len;
TRACE("(%p %u %u %p)\n", source, position, length, sink);
hr = IDWriteTextAnalysisSource_GetTextAtPosition(source, position, &text, &len);
if (FAILED(hr)) return hr;
return analyze_script(text, len, sink);
}
static HRESULT WINAPI dwritetextanalyzer_AnalyzeBidi(IDWriteTextAnalyzer *iface,
IDWriteTextAnalysisSource* source, UINT32 position, UINT32 length, IDWriteTextAnalysisSink* sink)
{
FIXME("(%p %u %u %p): stub\n", source, position, length, sink);
return E_NOTIMPL;
}
static HRESULT WINAPI dwritetextanalyzer_AnalyzeNumberSubstitution(IDWriteTextAnalyzer *iface,
IDWriteTextAnalysisSource* source, UINT32 position, UINT32 length, IDWriteTextAnalysisSink* sink)
{
FIXME("(%p %u %u %p): stub\n", source, position, length, sink);
return E_NOTIMPL;
}
static HRESULT WINAPI dwritetextanalyzer_AnalyzeLineBreakpoints(IDWriteTextAnalyzer *iface,
IDWriteTextAnalysisSource* source, UINT32 position, UINT32 length, IDWriteTextAnalysisSink* sink)
{
DWRITE_LINE_BREAKPOINT *breakpoints = NULL;
WCHAR *buff = NULL;
const WCHAR *text;
HRESULT hr;
UINT32 len;
TRACE("(%p %u %u %p)\n", source, position, length, sink);
if (length == 0)
return S_OK;
/* get some, check for length */
text = NULL;
len = 0;
hr = IDWriteTextAnalysisSource_GetTextAtPosition(source, position, &text, &len);
if (FAILED(hr)) return hr;
if (len < length) {
UINT32 read;
buff = heap_alloc(length*sizeof(WCHAR));
if (!buff)
return E_OUTOFMEMORY;
memcpy(buff, text, len*sizeof(WCHAR));
read = len;
while (read < length && text) {
text = NULL;
len = 0;
hr = IDWriteTextAnalysisSource_GetTextAtPosition(source, read, &text, &len);
if (FAILED(hr))
goto done;
memcpy(&buff[read], text, min(len, length-read)*sizeof(WCHAR));
read += len;
}
text = buff;
}
breakpoints = heap_alloc(length*sizeof(*breakpoints));
if (!breakpoints) {
hr = E_OUTOFMEMORY;
goto done;
}
hr = analyze_linebreaks(text, length, breakpoints);
if (FAILED(hr))
goto done;
hr = IDWriteTextAnalysisSink_SetLineBreakpoints(sink, position, length, breakpoints);
done:
heap_free(breakpoints);
heap_free(buff);
return hr;
}
static HRESULT WINAPI dwritetextanalyzer_GetGlyphs(IDWriteTextAnalyzer *iface,
WCHAR const* text, UINT32 length, IDWriteFontFace* font_face, BOOL is_sideways,
BOOL is_rtl, DWRITE_SCRIPT_ANALYSIS const* analysis, WCHAR const* locale,
IDWriteNumberSubstitution* substitution, DWRITE_TYPOGRAPHIC_FEATURES const** features,
UINT32 const* feature_range_len, UINT32 feature_ranges, UINT32 max_glyph_count,
UINT16* clustermap, DWRITE_SHAPING_TEXT_PROPERTIES* text_props, UINT16* glyph_indices,
DWRITE_SHAPING_GLYPH_PROPERTIES* glyph_props, UINT32* actual_glyph_count)
{
FIXME("(%s:%u %p %d %d %p %s %p %p %p %u %u %p %p %p %p %p): stub\n", debugstr_wn(text, length),
length, font_face, is_sideways, is_rtl, analysis, debugstr_w(locale), substitution, features, feature_range_len,
feature_ranges, max_glyph_count, clustermap, text_props, glyph_indices, glyph_props, actual_glyph_count);
return E_NOTIMPL;
}
static HRESULT WINAPI dwritetextanalyzer_GetGlyphPlacements(IDWriteTextAnalyzer *iface,
WCHAR const* text, UINT16 const* clustermap, DWRITE_SHAPING_TEXT_PROPERTIES* props,
UINT32 text_len, UINT16 const* glyph_indices, DWRITE_SHAPING_GLYPH_PROPERTIES const* glyph_props,
UINT32 glyph_count, IDWriteFontFace * font_face, FLOAT fontEmSize, BOOL is_sideways, BOOL is_rtl,
DWRITE_SCRIPT_ANALYSIS const* analysis, WCHAR const* locale, DWRITE_TYPOGRAPHIC_FEATURES const** features,
UINT32 const* feature_range_len, UINT32 feature_ranges, FLOAT* glyph_advances, DWRITE_GLYPH_OFFSET* glyph_offsets)
{
FIXME("(%s %p %p %u %p %p %u %p %f %d %d %p %s %p %p %u %p %p): stub\n", debugstr_w(text),
clustermap, props, text_len, glyph_indices, glyph_props, glyph_count, font_face, fontEmSize, is_sideways,
is_rtl, analysis, debugstr_w(locale), features, feature_range_len, feature_ranges, glyph_advances, glyph_offsets);
return E_NOTIMPL;
}
static HRESULT WINAPI dwritetextanalyzer_GetGdiCompatibleGlyphPlacements(IDWriteTextAnalyzer *iface,
WCHAR const* text, UINT16 const* clustermap, DWRITE_SHAPING_TEXT_PROPERTIES* props,
UINT32 text_len, UINT16 const* glyph_indices, DWRITE_SHAPING_GLYPH_PROPERTIES const* glyph_props,
UINT32 glyph_count, IDWriteFontFace * font_face, FLOAT fontEmSize, FLOAT pixels_per_dip,
DWRITE_MATRIX const* transform, BOOL use_gdi_natural, BOOL is_sideways, BOOL is_rtl,
DWRITE_SCRIPT_ANALYSIS const* analysis, WCHAR const* locale, DWRITE_TYPOGRAPHIC_FEATURES const** features,
UINT32 const* feature_range_lengths, UINT32 feature_ranges, FLOAT* glyph_advances, DWRITE_GLYPH_OFFSET* glyph_offsets)
{
FIXME("(%s %p %p %u %p %p %u %p %f %f %p %d %d %d %p %s %p %p %u %p %p): stub\n", debugstr_w(text),
clustermap, props, text_len, glyph_indices, glyph_props, glyph_count, font_face, fontEmSize, pixels_per_dip,
transform, use_gdi_natural, is_sideways, is_rtl, analysis, debugstr_w(locale), features, feature_range_lengths,
feature_ranges, glyph_advances, glyph_offsets);
return E_NOTIMPL;
}
static const struct IDWriteTextAnalyzerVtbl textanalyzervtbl = {
dwritetextanalyzer_QueryInterface,
dwritetextanalyzer_AddRef,
dwritetextanalyzer_Release,
dwritetextanalyzer_AnalyzeScript,
dwritetextanalyzer_AnalyzeBidi,
dwritetextanalyzer_AnalyzeNumberSubstitution,
dwritetextanalyzer_AnalyzeLineBreakpoints,
dwritetextanalyzer_GetGlyphs,
dwritetextanalyzer_GetGlyphPlacements,
dwritetextanalyzer_GetGdiCompatibleGlyphPlacements
};
static IDWriteTextAnalyzer textanalyzer = { &textanalyzervtbl };
HRESULT get_textanalyzer(IDWriteTextAnalyzer **ret)
{
*ret = &textanalyzer;
return S_OK;
}