Sweden-Number/dlls/dwrite/analyzer.c

/*
 *    Text analyzer
 *
 * Copyright 2011 Aric Stewart for CodeWeavers
 * Copyright 2012, 2014 Nikolay Sivov for CodeWeavers
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
 */

#define COBJMACROS

#include "dwrite.h"
#include "dwrite_private.h"

WINE_DEFAULT_DEBUG_CHANNEL(dwrite);

extern const unsigned short wine_linebreak_table[];

enum scriptcode {
    Script_Arabic = 0,
    Script_Armenian = 1,
    Script_Balinese = 2,
    Script_Bengali = 3,
    Script_Buginese = 6,
    Script_Canadian = 8,
    Script_Cherokee = 11,
    Script_Controls = 12,
    Script_Coptic = 13,
    Script_Cyrillic = 16,
    Script_Devanagari = 18,
    Script_Ethiopic = 19,
    Script_Georgian = 20,
    Script_Glagolitic = 22,
    Script_Greek = 23,
    Script_Gujarati = 24,
    Script_Gurmukhi = 25,
    Script_Hangul = 27,
    Script_Hebrew = 29,
    Script_Kannada = 32,
    Script_Khmer = 36,
    Script_Lao = 37,
    Script_Latin = 38,
    Script_Lepcha = 39,
    Script_Limbu = 40,
    Script_Malayalam = 44,
    Script_Mongolian = 45,
    Script_Myanmar = 46,
    Script_New_TaiLue = 47,
    Script_NKo = 48,
    Script_Ogham = 49,
    Script_OlChiki = 50,
    Script_Oriya = 53,
    Script_Runic = 58,
    Script_Sinhala = 61,
    Script_Sundanese = 62,
    Script_Syriac = 64,
    Script_TaiLe = 67,
    Script_Tamil = 68,
    Script_Telugu = 69,
    Script_Thaana = 70,
    Script_Thai = 71,
    Script_Tibetan = 72,
    Script_Tifinagh = 73,
    Script_Symbol = 77,
    Script_Unknown = (UINT16)-1
};

struct script_range {
    UINT16 script;
    DWORD first;
    DWORD last;
};

static const struct script_range script_ranges[] = {
    /* C0 Controls: U+0000–U+001F */
    /* ASCII punctuation and symbols: U+0020–U+002F */
    /* ASCII digits: U+0030–U+0039 */
    /* ASCII punctuation and symbols: U+003A–U+0040 */
    { Script_Symbol, 0x00, 0x040 },
    /* Latin uppercase: U+0041–U+005A */
    { Script_Latin, 0x41, 0x5a },
    /* ASCII punctuation and symbols: U+005B–U+0060 */
    { Script_Symbol, 0x5b, 0x060 },
    /* Latin lowercase: U+0061–U+007A */
    { Script_Latin, 0x61, 0x7a },
    /* ASCII punctuation and symbols, control char DEL: U+007B–U+007F */
    { Script_Symbol, 0x7b, 0x7f },
    /* C1 Controls: U+0080–U+009F */
    { Script_Controls, 0x80, 0x9f },
    /* Latin-1 Supplement: U+00A0–U+00FF */
    /* Latin Extended-A: U+0100–U+017F */
    /* Latin Extended-B: U+0180–U+024F */
    /* IPA Extensions: U+0250–U+02AF */
    /* Spacing Modifier Letters: U+02B0–U+02FF */
    { Script_Latin, 0xa0, 0x2ff },
    /* Combining Diacritical Marks: U+0300–U+036F */
    { Script_Symbol, 0x300, 0x36f },
    /* Greek: U+0370–U+03E1 */
    { Script_Greek, 0x370, 0x3e1 },
    /* Coptic: U+03E2–U+03Ef */
    { Script_Coptic, 0x3e2, 0x3ef },
    /* Greek: U+03F0–U+03FF */
    { Script_Greek, 0x3f0, 0x3ff },
    /* Cyrillic: U+0400–U+04FF */
    /* Cyrillic Supplement: U+0500–U+052F */
    /* Cyrillic Supplement range is incomplete cause it's based on Unicode 5.2
       that doesn't define some Abkhaz and Azerbaijani letters, we support Unicode 6.0 range here */
    { Script_Cyrillic, 0x400, 0x52f },
    /* Armenian: U+0530–U+058F */
    { Script_Armenian, 0x530, 0x58f },
    /* Hebrew: U+0590–U+05FF */
    { Script_Hebrew, 0x590, 0x5ff },
    /* Arabic: U+0600–U+06FF */
    { Script_Arabic, 0x600, 0x6ff },
    /* Syriac: U+0600–U+06FF */
    { Script_Syriac, 0x700, 0x74f },
    /* Arabic Supplement: U+0750–U+077F */
    { Script_Arabic, 0x750, 0x77f },
    /* Thaana: U+0780–U+07BF */
    { Script_Thaana, 0x780, 0x7bf },
    /* N'Ko: U+07C0–U+07FF */
    { Script_NKo, 0x7c0, 0x7ff },
    /* Devanagari: U+0900–U+097F */
    { Script_Devanagari, 0x900, 0x97f },
    /* Bengali: U+0980–U+09FF */
    { Script_Bengali, 0x980, 0x9ff },
    /* Gurmukhi: U+0A00–U+0A7F */
    { Script_Gurmukhi, 0xa00, 0xa7f },
    /* Gujarati: U+0A80–U+0AFF */
    { Script_Gujarati, 0xa80, 0xaff },
    /* Oriya: U+0B00–U+0B7F */
    { Script_Oriya, 0xb00, 0xb7f },
    /* Tamil: U+0B80–U+0BFF */
    { Script_Tamil, 0xb80, 0xbff },
    /* Telugu: U+0C00–U+0C7F */
    { Script_Telugu, 0xc00, 0xc7f },
    /* Kannada: U+0C80–U+0CFF */
    { Script_Kannada, 0xc80, 0xcff },
    /* Malayalam: U+0D00–U+0D7F */
    { Script_Malayalam, 0xd00, 0xd7f },
    /* Sinhala: U+0D80–U+0DFF */
    { Script_Sinhala, 0xd80, 0xdff },
    /* Thai: U+0E00–U+0E7F */
    { Script_Thai, 0xe00, 0xe7f },
    /* Lao: U+0E80–U+0EFF */
    { Script_Lao, 0xe80, 0xeff },
    /* Tibetan: U+0F00–U+0FFF */
    { Script_Tibetan, 0xf00, 0xfff },
    /* Myanmar: U+1000–U+109F */
    { Script_Myanmar, 0x1000, 0x109f },
    /* Georgian: U+10A0–U+10FF */
    { Script_Georgian, 0x10a0, 0x10ff },
    /* Hangul Jamo: U+1100–U+11FF */
    { Script_Hangul, 0x1100, 0x11ff },
    /* Ethiopic: U+1200–U+137F */
    /* Ethiopic Extensions: U+1380–U+139F */
    { Script_Ethiopic, 0x1200, 0x139f },
    /* Cherokee: U+13A0–U+13FF */
    { Script_Cherokee, 0x13a0, 0x13ff },
    /* Canadian Aboriginal Syllabics: U+1400–U+167F */
    { Script_Canadian, 0x1400, 0x167f },
    /* Ogham: U+1680–U+169F */
    { Script_Ogham, 0x1680, 0x169f },
    /* Runic: U+16A0–U+16F0 */
    { Script_Runic, 0x16a0, 0x16f0 },
    /* Khmer: U+1780–U+17FF */
    { Script_Khmer, 0x1780, 0x17ff },
    /* Mongolian: U+1800–U+18AF */
    { Script_Mongolian, 0x1800, 0x18af },
    /* Limbu: U+1900–U+194F */
    { Script_Limbu, 0x1900, 0x194f },
    /* Tai Le: U+1950–U+197F */
    { Script_TaiLe, 0x1950, 0x197f },
    /* New Tai Lue: U+1980–U+19DF */
    { Script_New_TaiLue, 0x1980, 0x19df },
    /* Khmer Symbols: U+19E0–U+19FF */
    { Script_Khmer, 0x19e0, 0x19ff },
    /* Buginese: U+1A00–U+1A1F */
    { Script_Buginese, 0x1a00, 0x1a1f },
    /* Tai Tham: U+1A20–U+1AAF */
    { Script_Symbol, 0x1a20, 0x1aaf },
    /* Balinese: U+1B00–U+1B7F */
    { Script_Balinese, 0x1b00, 0x1b7f },
    /* Sundanese: U+1B80–U+1BBF */
    { Script_Sundanese, 0x1b80, 0x1bbf },
    /* Batak: U+1BC0–U+1BFF */
    { Script_Symbol, 0x1bc0, 0x1bff },
    /* Lepcha: U+1C00–U+1C4F */
    { Script_Lepcha, 0x1c00, 0x1c4f },
    /* Ol Chiki: U+1C50–U+1C7F */
    { Script_OlChiki, 0x1c50, 0x1c7f },
    /* Sundanese Supplement: U+1CC0–U+1CCF */
    { Script_Symbol, 0x1cc0, 0x1ccf },
    /* Vedic Extensions: U+1CD0-U+1CFF */
    { Script_Devanagari, 0x1cd0, 0x1cff },
    /* Phonetic Extensions: U+1D00–U+1DBF */
    { Script_Latin, 0x1d00, 0x1dbf },
    /* Combining Diacritical Marks Supplement: U+1DC0–U+1DFF */
    { Script_Symbol, 0x1dc0, 0x1dff },
    /* Latin Extended Additional: U+1E00–U+1EFF */
    { Script_Latin, 0x1e00, 0x1eff },
    /* Greek Extended: U+1F00–U+1F00 */
    { Script_Greek, 0x1f00, 0x1fff },
    /* General Punctuation: U+2000–U+206f */
    /* Superscripts and Subscripts: U+2070–U+209f */
    /* Currency Symbols: U+20A0–U+20CF */
    /* Combining Diacritical Marks for Symbols: U+20D0–U+20FF */
    /* Letterlike Symbols: U+2100–U+214F */
    /* Number Forms: U+2150–U+218F */
    /* Arrows: U+2190–U+21FF */
    /* Mathematical Operators: U+2200–U+22FF */
    /* Miscellaneous Technical: U+2300–U+23FF */
    /* Control Pictures: U+2400–U+243F */
    /* Optical Character Recognition: U+2440–U+245F */
    /* Enclosed Alphanumerics: U+2460–U+24FF */
    /* Box Drawing: U+2500–U+25FF */
    /* Block Elements: U+2580–U+259F */
    /* Geometric Shapes: U+25A0–U+25FF */
    /* Miscellaneous Symbols: U+2600–U+26FF */
    /* Dingbats: U+2700–U+27BF */
    /* Miscellaneous Mathematical Symbols-A: U+27C0–U+27EF */
    /* Supplemental Arrows-A: U+27F0–U+27FF */
    /* Braille Patterns: U+2800–U+28FF */
    /* Supplemental Arrows-B: U+2900–U+297F */
    /* Miscellaneous Mathematical Symbols-B: U+2980–U+29FF */
    /* Supplemental Mathematical Operators: U+2A00–U+2AFF */
    /* Miscellaneous Symbols and Arrows: U+2B00–U+2BFF */
    { Script_Symbol, 0x2000, 0x2bff },
    /* Glagolitic: U+2C00–U+2C5F */
    { Script_Glagolitic, 0x2c00, 0x2c5f },
    /* Latin Extended-C: U+2C60–U+2C7F */
    { Script_Latin, 0x2c60, 0x2c7f },
    /* Coptic: U+2C80–U+2CFF */
    { Script_Coptic, 0x2c80, 0x2cff },
    /* Georgian Supplement: U+2D00–U+2D2F */
    { Script_Georgian, 0x2d00, 0x2d2f },
    /* Tifinagh: U+2D30–U+2D7F */
    { Script_Tifinagh, 0x2d30, 0x2d7f },
    /* unsupported range */
    { Script_Unknown }
};

static UINT16 get_char_script( WCHAR c )
{
    DWORD ch = c;
    unsigned int i;

    for (i = 0; i < sizeof(script_ranges)/sizeof(struct script_range); i++)
    {
        const struct script_range *range = &script_ranges[i];
        if (range->script == Script_Unknown || (range->first <= ch && range->last >= ch))
            return range->script;
    }

    return Script_Unknown;
}

static HRESULT analyze_script(const WCHAR *text, UINT32 len, IDWriteTextAnalysisSink *sink)
{
    DWRITE_SCRIPT_ANALYSIS sa;
    UINT32 pos, i, length;

    if (!len) return S_OK;

    sa.script = get_char_script(*text);

    pos = 0;
    length = 1;

    for (i = 1; i < len; i++)
    {
        UINT16 script = get_char_script(text[i]);

        /* Script_Latin_Symb script type is ignored when preceded or followed by another script */
        if (sa.script == Script_Symbol) sa.script = script;
        if (script    == Script_Symbol) script = sa.script;
        /* this is a length of a sequence to be reported next */
        if (sa.script == script) length++;

        if (sa.script != script)
        {
            HRESULT hr;

            sa.shapes = sa.script != Script_Controls ? DWRITE_SCRIPT_SHAPES_DEFAULT : DWRITE_SCRIPT_SHAPES_NO_VISUAL;
            hr = IDWriteTextAnalysisSink_SetScriptAnalysis(sink, pos, length, &sa);
            if (FAILED(hr)) return hr;
            pos = i;
            length = 1;
            sa.script = script;
        }
    }

    /* 1 length case or normal completion call */
    sa.shapes = sa.script != Script_Controls ? DWRITE_SCRIPT_SHAPES_DEFAULT : DWRITE_SCRIPT_SHAPES_NO_VISUAL;
    return IDWriteTextAnalysisSink_SetScriptAnalysis(sink, pos, length, &sa);
}

struct linebreaking_state {
    DWRITE_LINE_BREAKPOINT *breakpoints;
    UINT32 count;
};

enum BreakConditionLocation {
    BreakConditionBefore,
    BreakConditionAfter
};

enum linebreaking_classes {
    b_BK = 1,
    b_CR,
    b_LF,
    b_CM,
    b_SG,
    b_GL,
    b_CB,
    b_SP,
    b_ZW,
    b_NL,
    b_WJ,
    b_JL,
    b_JV,
    b_JT,
    b_H2,
    b_H3,
    b_XX,
    b_OP,
    b_CL,
    b_CP,
    b_QU,
    b_NS,
    b_EX,
    b_SY,
    b_IS,
    b_PR,
    b_PO,
    b_NU,
    b_AL,
    b_ID,
    b_IN,
    b_HY,
    b_BB,
    b_BA,
    b_SA,
    b_AI,
    b_B2,
    b_HL,
    b_CJ,
    b_RI
};

/* "Can break" is a weak condition, stronger "may not break" and "must break" override it. Initially all conditions are
    set to "can break" and could only be changed once. */
static inline void set_break_condition(UINT32 pos, enum BreakConditionLocation location, DWRITE_BREAK_CONDITION condition,
    struct linebreaking_state *state)
{
    if (location == BreakConditionBefore) {
        if (state->breakpoints[pos].breakConditionBefore != DWRITE_BREAK_CONDITION_CAN_BREAK)
            return;
        state->breakpoints[pos].breakConditionBefore = condition;
        if (pos > 0)
            state->breakpoints[pos-1].breakConditionAfter = condition;
    }
    else {
        if (state->breakpoints[pos].breakConditionAfter != DWRITE_BREAK_CONDITION_CAN_BREAK)
            return;
        state->breakpoints[pos].breakConditionAfter = condition;
        if (pos + 1 < state->count)
            state->breakpoints[pos+1].breakConditionBefore = condition;
    }
}

static inline WCHAR get_table_entry(const unsigned short *table, WCHAR ch)
{
    return table[table[table[ch >> 8] + ((ch >> 4) & 0x0f)] + (ch & 0xf)];
}

static HRESULT analyze_linebreaks(const WCHAR *text, UINT32 count, DWRITE_LINE_BREAKPOINT *breakpoints)
{
    struct linebreaking_state state;
    short *break_class;
    int i, j;

    break_class = heap_alloc(count*sizeof(short));
    if (!break_class)
        return E_OUTOFMEMORY;

    state.breakpoints = breakpoints;
    state.count = count;

    /* LB31 - allow breaks everywhere. It will be overridden if needed as
       other rules dictate. */
    for (i = 0; i < count; i++)
    {
        break_class[i] = get_table_entry(wine_linebreak_table, text[i]);

        breakpoints[i].breakConditionBefore = DWRITE_BREAK_CONDITION_CAN_BREAK;
        breakpoints[i].breakConditionAfter  = DWRITE_BREAK_CONDITION_CAN_BREAK;
        breakpoints[i].isWhitespace = break_class[i] == b_BK || break_class[i] == b_ZW || break_class[i] == b_SP || isspaceW(text[i]);
        breakpoints[i].isSoftHyphen = FALSE;
        breakpoints[i].padding = 0;

        /* LB1 - resolve some classes. TODO: use external algorithms for these classes. */
        switch (break_class[i])
        {
            case b_AI:
            case b_SA:
            case b_SG:
            case b_XX:
                break_class[i] = b_AL;
                break;
            case b_CJ:
                break_class[i] = b_NS;
                break;
        }
    }

    /* LB2 - never break at the start */
    set_break_condition(0, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
    /* LB3 - always break at the end. This one is ignored. */

    for (i = 0; i < count; i++)
    {
        switch (break_class[i])
        {
            /* LB4 - LB6 */
            case b_CR:
                /* LB5 - don't break CR x LF */
                if (i < count-1 && break_class[i+1] == b_LF)
                {
                    set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
                    set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
                    break;
                }
            case b_LF:
            case b_NL:
            case b_BK:
                /* LB4 - LB5 - always break after hard breaks */
                set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MUST_BREAK, &state);
                /* LB6 - do not break before hard breaks */
                set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
                break;
            /* LB7 - do not break before spaces */
            case b_SP:
                set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
                break;
            case b_ZW:
                set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
            /* LB8 - break before character after zero-width space, skip spaces inbetween */
                while (i < count-1 && break_class[i+1] == b_SP)
                    i++;
                set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_CAN_BREAK, &state);
                break;
        }
    }

    /* LB9 - LB10 */
    for (i = 0; i < count; i++)
    {
        if (break_class[i] == b_CM)
        {
            if (i > 0)
            {
                switch (break_class[i-1])
                {
                    case b_SP:
                    case b_BK:
                    case b_CR:
                    case b_LF:
                    case b_NL:
                    case b_ZW:
                        break_class[i] = b_AL;
                        break;
                    default:
                        break_class[i] = break_class[i-1];
                }
            }
            else break_class[i] = b_AL;
        }
    }

    for (i = 0; i < count; i++)
    {
        switch (break_class[i])
        {
            /* LB11 - don't break before and after word joiner */
            case b_WJ:
                set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
                set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
                break;
            /* LB12 - don't break after glue */
            case b_GL:
                set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
            /* LB12a */
                if (i > 0)
                {
                    if (break_class[i-1] != b_SP && break_class[i-1] != b_BA && break_class[i-1] != b_HY)
                        set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
                }
                break;
            /* LB13 */
            case b_CL:
            case b_CP:
            case b_EX:
            case b_IS:
            case b_SY:
                set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
                break;
            /* LB14 */
            case b_OP:
                set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
                while (i < count-1 && break_class[i+1] == b_SP) {
                    set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
                    i++;
                }
                break;
            /* LB15 */
            case b_QU:
                j = i+1;
                while (j < count-1 && break_class[j] == b_SP)
                    j++;
                if (break_class[j] == b_OP)
                    for (; j > i; j--)
                        set_break_condition(j, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
                break;
            /* LB16 */
            case b_NS:
                j = i-1;
                while(j > 0 && break_class[j] == b_SP)
                    j--;
                if (break_class[j] == b_CL || break_class[j] == b_CP)
                    for (j++; j <= i; j++)
                        set_break_condition(j, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
                break;
            /* LB17 */
            case b_B2:
                j = i+1;
                while (j < count && break_class[j] == b_SP)
                    j++;
                if (break_class[j] == b_B2)
                    for (; j > i; j--)
                        set_break_condition(j, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
                break;
        }
    }

    for (i = 0; i < count; i++)
    {
        switch(break_class[i])
        {
            /* LB18 - break is allowed after space */
            case b_SP:
                set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_CAN_BREAK, &state);
                break;
            /* LB19 - don't break before or after quotation mark */
            case b_QU:
                set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
                set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
                break;
            /* LB20 */
            case b_CB:
                set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_CAN_BREAK, &state);
                set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_CAN_BREAK, &state);
                break;
            /* LB21 */
            case b_BA:
            case b_HY:
            case b_NS:
                set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
                break;
            case b_BB:
                set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
                break;
            /* LB21a */
            case b_HL:
                if (i < count-2)
                    switch (break_class[i+1])
                    {
                    case b_HY:
                    case b_BA:
                        set_break_condition(i+1, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
                    }
                break;
            /* LB22 */
            case b_IN:
                if (i > 0)
                {
                    switch (break_class[i-1])
                    {
                        case b_AL:
                        case b_HL:
                        case b_ID:
                        case b_IN:
                        case b_NU:
                            set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
                    }
                }
                break;
        }

        if (i < count-1)
        {
            /* LB23 */
            if ((break_class[i] == b_ID && break_class[i+1] == b_PO) ||
                (break_class[i] == b_AL && break_class[i+1] == b_NU) ||
                (break_class[i] == b_HL && break_class[i+1] == b_NU) ||
                (break_class[i] == b_NU && break_class[i+1] == b_AL) ||
                (break_class[i] == b_NU && break_class[i+1] == b_HL))
                    set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
            /* LB24 */
            if ((break_class[i] == b_PR && break_class[i+1] == b_ID) ||
                (break_class[i] == b_PR && break_class[i+1] == b_AL) ||
                (break_class[i] == b_PR && break_class[i+1] == b_HL) ||
                (break_class[i] == b_PO && break_class[i+1] == b_AL) ||
                (break_class[i] == b_PO && break_class[i+1] == b_HL))
                    set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);

            /* LB25 */
            if ((break_class[i] == b_CL && break_class[i+1] == b_PO) ||
                (break_class[i] == b_CP && break_class[i+1] == b_PO) ||
                (break_class[i] == b_CL && break_class[i+1] == b_PR) ||
                (break_class[i] == b_CP && break_class[i+1] == b_PR) ||
                (break_class[i] == b_NU && break_class[i+1] == b_PO) ||
                (break_class[i] == b_NU && break_class[i+1] == b_PR) ||
                (break_class[i] == b_PO && break_class[i+1] == b_OP) ||
                (break_class[i] == b_PO && break_class[i+1] == b_NU) ||
                (break_class[i] == b_PR && break_class[i+1] == b_OP) ||
                (break_class[i] == b_PR && break_class[i+1] == b_NU) ||
                (break_class[i] == b_HY && break_class[i+1] == b_NU) ||
                (break_class[i] == b_IS && break_class[i+1] == b_NU) ||
                (break_class[i] == b_NU && break_class[i+1] == b_NU) ||
                (break_class[i] == b_SY && break_class[i+1] == b_NU))
                    set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);

            /* LB26 */
            if (break_class[i] == b_JL)
            {
                switch (break_class[i+1])
                {
                    case b_JL:
                    case b_JV:
                    case b_H2:
                    case b_H3:
                        set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
                }
            }
            if ((break_class[i] == b_JV || break_class[i] == b_H2) &&
                (break_class[i+1] == b_JV || break_class[i+1] == b_JT))
                    set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
            if ((break_class[i] == b_JT || break_class[i] == b_H3) &&
                 break_class[i+1] == b_JT)
                    set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);

            /* LB27 */
            switch (break_class[i])
            {
                case b_JL:
                case b_JV:
                case b_JT:
                case b_H2:
                case b_H3:
                    if (break_class[i+1] == b_IN || break_class[i+1] == b_PO)
                        set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
            }
            if (break_class[i] == b_PO)
            {
                switch (break_class[i+1])
                {
                    case b_JL:
                    case b_JV:
                    case b_JT:
                    case b_H2:
                    case b_H3:
                        set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
                }
            }

            /* LB28 */
            if ((break_class[i] == b_AL && break_class[i+1] == b_AL) ||
                (break_class[i] == b_AL && break_class[i+1] == b_HL) ||
                (break_class[i] == b_HL && break_class[i+1] == b_AL) ||
                (break_class[i] == b_HL && break_class[i+1] == b_HL))
                set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);

            /* LB29 */
            if ((break_class[i] == b_IS && break_class[i+1] == b_AL) ||
                (break_class[i] == b_IS && break_class[i+1] == b_HL))
                set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);

            /* LB30 */
            if ((break_class[i] == b_AL || break_class[i] == b_HL || break_class[i] == b_NU) &&
                 break_class[i+1] == b_OP)
                set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
            if (break_class[i] == b_CP &&
               (break_class[i+1] == b_AL || break_class[i] == b_HL || break_class[i] == b_NU))
                set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);

            /* LB30a */
            if (break_class[i] == b_RI && break_class[i+1] == b_RI)
                set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state);
        }
    }

    heap_free(break_class);
    return S_OK;
}

static HRESULT WINAPI dwritetextanalyzer_QueryInterface(IDWriteTextAnalyzer *iface, REFIID riid, void **obj)
{
    TRACE("(%s %p)\n", debugstr_guid(riid), obj);

    if (IsEqualIID(riid, &IID_IUnknown) || IsEqualIID(riid, &IID_IDWriteTextAnalyzer))
    {
        *obj = iface;
        return S_OK;
    }

    *obj = NULL;
    return E_NOINTERFACE;

}

static ULONG WINAPI dwritetextanalyzer_AddRef(IDWriteTextAnalyzer *iface)
{
    return 2;
}

static ULONG WINAPI dwritetextanalyzer_Release(IDWriteTextAnalyzer *iface)
{
    return 1;
}

static HRESULT WINAPI dwritetextanalyzer_AnalyzeScript(IDWriteTextAnalyzer *iface,
    IDWriteTextAnalysisSource* source, UINT32 position, UINT32 length, IDWriteTextAnalysisSink* sink)
{
    const WCHAR *text;
    HRESULT hr;
    UINT32 len;

    TRACE("(%p %u %u %p)\n", source, position, length, sink);

    hr = IDWriteTextAnalysisSource_GetTextAtPosition(source, position, &text, &len);
    if (FAILED(hr)) return hr;

    return analyze_script(text, len, sink);
}

static HRESULT WINAPI dwritetextanalyzer_AnalyzeBidi(IDWriteTextAnalyzer *iface,
    IDWriteTextAnalysisSource* source, UINT32 position, UINT32 length, IDWriteTextAnalysisSink* sink)
{
    FIXME("(%p %u %u %p): stub\n", source, position, length, sink);
    return E_NOTIMPL;
}

static HRESULT WINAPI dwritetextanalyzer_AnalyzeNumberSubstitution(IDWriteTextAnalyzer *iface,
    IDWriteTextAnalysisSource* source, UINT32 position, UINT32 length, IDWriteTextAnalysisSink* sink)
{
    FIXME("(%p %u %u %p): stub\n", source, position, length, sink);
    return E_NOTIMPL;
}

static HRESULT WINAPI dwritetextanalyzer_AnalyzeLineBreakpoints(IDWriteTextAnalyzer *iface,
    IDWriteTextAnalysisSource* source, UINT32 position, UINT32 length, IDWriteTextAnalysisSink* sink)
{
    DWRITE_LINE_BREAKPOINT *breakpoints = NULL;
    WCHAR *buff = NULL;
    const WCHAR *text;
    HRESULT hr;
    UINT32 len;

    TRACE("(%p %u %u %p)\n", source, position, length, sink);

    if (length == 0)
        return S_OK;

    /* get some, check for length */
    text = NULL;
    len = 0;
    hr = IDWriteTextAnalysisSource_GetTextAtPosition(source, position, &text, &len);
    if (FAILED(hr)) return hr;

    if (len < length) {
        UINT32 read;

        buff = heap_alloc(length*sizeof(WCHAR));
        if (!buff)
            return E_OUTOFMEMORY;
        memcpy(buff, text, len*sizeof(WCHAR));
        read = len;

        while (read < length && text) {
            text = NULL;
            len = 0;
            hr = IDWriteTextAnalysisSource_GetTextAtPosition(source, read, &text, &len);
            if (FAILED(hr))
                goto done;
            memcpy(&buff[read], text, min(len, length-read)*sizeof(WCHAR));
            read += len;
        }

        text = buff;
    }

    breakpoints = heap_alloc(length*sizeof(*breakpoints));
    if (!breakpoints) {
        hr = E_OUTOFMEMORY;
        goto done;
    }

    hr = analyze_linebreaks(text, length, breakpoints);
    if (FAILED(hr))
        goto done;

    hr = IDWriteTextAnalysisSink_SetLineBreakpoints(sink, position, length, breakpoints);

done:
    heap_free(breakpoints);
    heap_free(buff);

    return hr;
}

static HRESULT WINAPI dwritetextanalyzer_GetGlyphs(IDWriteTextAnalyzer *iface,
    WCHAR const* text, UINT32 length, IDWriteFontFace* font_face, BOOL is_sideways,
    BOOL is_rtl, DWRITE_SCRIPT_ANALYSIS const* analysis, WCHAR const* locale,
    IDWriteNumberSubstitution* substitution, DWRITE_TYPOGRAPHIC_FEATURES const** features,
    UINT32 const* feature_range_len, UINT32 feature_ranges, UINT32 max_glyph_count,
    UINT16* clustermap, DWRITE_SHAPING_TEXT_PROPERTIES* text_props, UINT16* glyph_indices,
    DWRITE_SHAPING_GLYPH_PROPERTIES* glyph_props, UINT32* actual_glyph_count)
{
    FIXME("(%s:%u %p %d %d %p %s %p %p %p %u %u %p %p %p %p %p): stub\n", debugstr_wn(text, length),
        length, font_face, is_sideways, is_rtl, analysis, debugstr_w(locale), substitution, features, feature_range_len,
        feature_ranges, max_glyph_count, clustermap, text_props, glyph_indices, glyph_props, actual_glyph_count);
    return E_NOTIMPL;
}

static HRESULT WINAPI dwritetextanalyzer_GetGlyphPlacements(IDWriteTextAnalyzer *iface,
    WCHAR const* text, UINT16 const* clustermap, DWRITE_SHAPING_TEXT_PROPERTIES* props,
    UINT32 text_len, UINT16 const* glyph_indices, DWRITE_SHAPING_GLYPH_PROPERTIES const* glyph_props,
    UINT32 glyph_count, IDWriteFontFace * font_face, FLOAT fontEmSize, BOOL is_sideways, BOOL is_rtl,
    DWRITE_SCRIPT_ANALYSIS const* analysis, WCHAR const* locale, DWRITE_TYPOGRAPHIC_FEATURES const** features,
    UINT32 const* feature_range_len, UINT32 feature_ranges, FLOAT* glyph_advances, DWRITE_GLYPH_OFFSET* glyph_offsets)
{
    FIXME("(%s %p %p %u %p %p %u %p %f %d %d %p %s %p %p %u %p %p): stub\n", debugstr_w(text),
        clustermap, props, text_len, glyph_indices, glyph_props, glyph_count, font_face, fontEmSize, is_sideways,
        is_rtl, analysis, debugstr_w(locale), features, feature_range_len, feature_ranges, glyph_advances, glyph_offsets);
    return E_NOTIMPL;
}

static HRESULT WINAPI dwritetextanalyzer_GetGdiCompatibleGlyphPlacements(IDWriteTextAnalyzer *iface,
    WCHAR const* text, UINT16 const* clustermap, DWRITE_SHAPING_TEXT_PROPERTIES* props,
    UINT32 text_len, UINT16 const* glyph_indices, DWRITE_SHAPING_GLYPH_PROPERTIES const* glyph_props,
    UINT32 glyph_count, IDWriteFontFace * font_face, FLOAT fontEmSize, FLOAT pixels_per_dip,
    DWRITE_MATRIX const* transform, BOOL use_gdi_natural, BOOL is_sideways, BOOL is_rtl,
    DWRITE_SCRIPT_ANALYSIS const* analysis, WCHAR const* locale, DWRITE_TYPOGRAPHIC_FEATURES const** features,
    UINT32 const* feature_range_lengths, UINT32 feature_ranges, FLOAT* glyph_advances, DWRITE_GLYPH_OFFSET* glyph_offsets)
{
    FIXME("(%s %p %p %u %p %p %u %p %f %f %p %d %d %d %p %s %p %p %u %p %p): stub\n", debugstr_w(text),
        clustermap, props, text_len, glyph_indices, glyph_props, glyph_count, font_face, fontEmSize, pixels_per_dip,
        transform, use_gdi_natural, is_sideways, is_rtl, analysis, debugstr_w(locale), features, feature_range_lengths,
        feature_ranges, glyph_advances, glyph_offsets);
    return E_NOTIMPL;
}

static const struct IDWriteTextAnalyzerVtbl textanalyzervtbl = {
    dwritetextanalyzer_QueryInterface,
    dwritetextanalyzer_AddRef,
    dwritetextanalyzer_Release,
    dwritetextanalyzer_AnalyzeScript,
    dwritetextanalyzer_AnalyzeBidi,
    dwritetextanalyzer_AnalyzeNumberSubstitution,
    dwritetextanalyzer_AnalyzeLineBreakpoints,
    dwritetextanalyzer_GetGlyphs,
    dwritetextanalyzer_GetGlyphPlacements,
    dwritetextanalyzer_GetGdiCompatibleGlyphPlacements
};

static IDWriteTextAnalyzer textanalyzer = { &textanalyzervtbl };

HRESULT get_textanalyzer(IDWriteTextAnalyzer **ret)
{
    *ret = &textanalyzer;
    return S_OK;
}