From 841202856276dfbc91a13de9fb5d51ab22a6db6d Mon Sep 17 00:00:00 2001
From: Aric Stewart <aric@codeweavers.com>
Date: Wed, 19 May 2010 09:16:45 -0500
Subject: [PATCH] usp10: Attempt to do shaping via the font GSUB table first.

Modern fonts expect this and the presentation form B glyphs are a fallback.
---
 dlls/usp10/shape.c          | 376 +++++++++++++++++++++++++++++++++++-
 dlls/usp10/usp10.c          |   1 +
 dlls/usp10/usp10_internal.h |   1 +
 3 files changed, 377 insertions(+), 1 deletion(-)

diff --git a/dlls/usp10/shape.c b/dlls/usp10/shape.c
index c743c040c1e..bccb1fdba9d 100644
--- a/dlls/usp10/shape.c
+++ b/dlls/usp10/shape.c
@@ -26,6 +26,7 @@
 #include "winuser.h"
 #include "winnls.h"
 #include "usp10.h"
+#include "winternl.h"
 
 #include "usp10_internal.h"
 
@@ -55,6 +56,371 @@ enum joined_forms {
     Xm
 };
 
+#ifdef WORDS_BIGENDIAN
+#define GET_BE_WORD(x) (x)
+#else
+#define GET_BE_WORD(x) RtlUshortByteSwap(x)
+#endif
+
+/* These are all structures needed for the GSUB table */
+#define MS_MAKE_TAG( _x1, _x2, _x3, _x4 ) \
+          ( ( (ULONG)_x4 << 24 ) |     \
+            ( (ULONG)_x3 << 16 ) |     \
+            ( (ULONG)_x2 <<  8 ) |     \
+              (ULONG)_x1         )
+
+#define GSUB_TAG MS_MAKE_TAG('G', 'S', 'U', 'B')
+
+typedef struct {
+    DWORD version;
+    WORD ScriptList;
+    WORD FeatureList;
+    WORD LookupList;
+} GSUB_Header;
+
+typedef struct {
+    CHAR ScriptTag[4];
+    WORD Script;
+} GSUB_ScriptRecord;
+
+typedef struct {
+    WORD ScriptCount;
+    GSUB_ScriptRecord ScriptRecord[1];
+} GSUB_ScriptList;
+
+typedef struct {
+    CHAR LangSysTag[4];
+    WORD LangSys;
+} GSUB_LangSysRecord;
+
+typedef struct {
+    WORD DefaultLangSys;
+    WORD LangSysCount;
+    GSUB_LangSysRecord LangSysRecord[1];
+} GSUB_Script;
+
+typedef struct {
+    WORD LookupOrder; /* Reserved */
+    WORD ReqFeatureIndex;
+    WORD FeatureCount;
+    WORD FeatureIndex[1];
+} GSUB_LangSys;
+
+typedef struct {
+    CHAR FeatureTag[4];
+    WORD Feature;
+} GSUB_FeatureRecord;
+
+typedef struct {
+    WORD FeatureCount;
+    GSUB_FeatureRecord FeatureRecord[1];
+} GSUB_FeatureList;
+
+typedef struct {
+    WORD FeatureParams; /* Reserved */
+    WORD LookupCount;
+    WORD LookupListIndex[1];
+} GSUB_Feature;
+
+typedef struct {
+    WORD LookupCount;
+    WORD Lookup[1];
+} GSUB_LookupList;
+
+typedef struct {
+    WORD LookupType;
+    WORD LookupFlag;
+    WORD SubTableCount;
+    WORD SubTable[1];
+} GSUB_LookupTable;
+
+typedef struct {
+    WORD CoverageFormat;
+    WORD GlyphCount;
+    WORD GlyphArray[1];
+} GSUB_CoverageFormat1;
+
+typedef struct {
+    WORD Start;
+    WORD End;
+    WORD StartCoverageIndex;
+} GSUB_RangeRecord;
+
+typedef struct {
+    WORD CoverageFormat;
+    WORD RangeCount;
+    GSUB_RangeRecord RangeRecord[1];
+} GSUB_CoverageFormat2;
+
+typedef struct {
+    WORD SubstFormat; /* = 1 */
+    WORD Coverage;
+    WORD DeltaGlyphID;
+} GSUB_SingleSubstFormat1;
+
+typedef struct {
+    WORD SubstFormat; /* = 2 */
+    WORD Coverage;
+    WORD GlyphCount;
+    WORD Substitute[1];
+}GSUB_SingleSubstFormat2;
+
+/* the orders of joined_forms and contextual_features need to line up */
+static const char* contextual_features[] =
+{
+    "isol",
+    "fina",
+    "init",
+    "medi"
+};
+
+static INT GSUB_is_glyph_covered(LPCVOID table , UINT glyph)
+{
+    const GSUB_CoverageFormat1* cf1;
+
+    cf1 = table;
+
+    if (GET_BE_WORD(cf1->CoverageFormat) == 1)
+    {
+        int count = GET_BE_WORD(cf1->GlyphCount);
+        int i;
+        TRACE("Coverage Format 1, %i glyphs\n",count);
+        for (i = 0; i < count; i++)
+            if (glyph == GET_BE_WORD(cf1->GlyphArray[i]))
+                return i;
+        return -1;
+    }
+    else if (GET_BE_WORD(cf1->CoverageFormat) == 2)
+    {
+        const GSUB_CoverageFormat2* cf2;
+        int i;
+        int count;
+        cf2 = (const GSUB_CoverageFormat2*)cf1;
+
+        count = GET_BE_WORD(cf2->RangeCount);
+        TRACE("Coverage Format 2, %i ranges\n",count);
+        for (i = 0; i < count; i++)
+        {
+            if (glyph < GET_BE_WORD(cf2->RangeRecord[i].Start))
+                return -1;
+            if ((glyph >= GET_BE_WORD(cf2->RangeRecord[i].Start)) &&
+                (glyph <= GET_BE_WORD(cf2->RangeRecord[i].End)))
+            {
+                return (GET_BE_WORD(cf2->RangeRecord[i].StartCoverageIndex) +
+                    glyph - GET_BE_WORD(cf2->RangeRecord[i].Start));
+            }
+        }
+        return -1;
+    }
+    else
+        ERR("Unknown CoverageFormat %i\n",GET_BE_WORD(cf1->CoverageFormat));
+
+    return -1;
+}
+
+static const GSUB_Script* GSUB_get_script_table( const GSUB_Header* header, const char* tag)
+{
+    const GSUB_ScriptList *script;
+    const GSUB_Script *deflt = NULL;
+    int i;
+    script = (const GSUB_ScriptList*)((const BYTE*)header + GET_BE_WORD(header->ScriptList));
+
+    TRACE("%i scripts in this font\n",GET_BE_WORD(script->ScriptCount));
+    for (i = 0; i < GET_BE_WORD(script->ScriptCount); i++)
+    {
+        const GSUB_Script *scr;
+        int offset;
+
+        offset = GET_BE_WORD(script->ScriptRecord[i].Script);
+        scr = (const GSUB_Script*)((const BYTE*)script + offset);
+
+        if (strncmp(script->ScriptRecord[i].ScriptTag, tag,4)==0)
+            return scr;
+        if (strncmp(script->ScriptRecord[i].ScriptTag, "dflt",4)==0)
+            deflt = scr;
+    }
+    return deflt;
+}
+
+static const GSUB_LangSys* GSUB_get_lang_table( const GSUB_Script* script, const char* tag)
+{
+    int i;
+    int offset;
+    const GSUB_LangSys *Lang;
+
+    TRACE("Deflang %x, LangCount %i\n",GET_BE_WORD(script->DefaultLangSys), GET_BE_WORD(script->LangSysCount));
+
+    for (i = 0; i < GET_BE_WORD(script->LangSysCount) ; i++)
+    {
+        offset = GET_BE_WORD(script->LangSysRecord[i].LangSys);
+        Lang = (const GSUB_LangSys*)((const BYTE*)script + offset);
+
+        if ( strncmp(script->LangSysRecord[i].LangSysTag,tag,4)==0)
+            return Lang;
+    }
+    offset = GET_BE_WORD(script->DefaultLangSys);
+    if (offset)
+    {
+        Lang = (const GSUB_LangSys*)((const BYTE*)script + offset);
+        return Lang;
+    }
+    return NULL;
+}
+
+static const GSUB_Feature * GSUB_get_feature(const GSUB_Header *header, const GSUB_LangSys *lang, const char* tag)
+{
+    int i;
+    const GSUB_FeatureList *feature;
+    feature = (const GSUB_FeatureList*)((const BYTE*)header + GET_BE_WORD(header->FeatureList));
+
+    TRACE("%i features\n",GET_BE_WORD(lang->FeatureCount));
+    for (i = 0; i < GET_BE_WORD(lang->FeatureCount); i++)
+    {
+        int index = GET_BE_WORD(lang->FeatureIndex[i]);
+        if (strncmp(feature->FeatureRecord[index].FeatureTag,tag,4)==0)
+        {
+            const GSUB_Feature *feat;
+            feat = (const GSUB_Feature*)((const BYTE*)feature + GET_BE_WORD(feature->FeatureRecord[index].Feature));
+            return feat;
+        }
+    }
+    return NULL;
+}
+
+static UINT GSUB_apply_feature(const GSUB_Header * header, const GSUB_Feature* feature, UINT glyph)
+{
+    int i;
+    int offset;
+    const GSUB_LookupList *lookup;
+    lookup = (const GSUB_LookupList*)((const BYTE*)header + GET_BE_WORD(header->LookupList));
+
+    TRACE("%i lookups\n", GET_BE_WORD(feature->LookupCount));
+    for (i = 0; i < GET_BE_WORD(feature->LookupCount); i++)
+    {
+        const GSUB_LookupTable *look;
+        offset = GET_BE_WORD(lookup->Lookup[GET_BE_WORD(feature->LookupListIndex[i])]);
+        look = (const GSUB_LookupTable*)((const BYTE*)lookup + offset);
+        TRACE("type %i, flag %x, subtables %i\n",GET_BE_WORD(look->LookupType),GET_BE_WORD(look->LookupFlag),GET_BE_WORD(look->SubTableCount));
+        if (GET_BE_WORD(look->LookupType) != 1)
+            FIXME("We only handle SubType 1 (%i)\n",GET_BE_WORD(look->LookupType));
+        else
+        {
+            int j;
+
+            for (j = 0; j < GET_BE_WORD(look->SubTableCount); j++)
+            {
+                const GSUB_SingleSubstFormat1 *ssf1;
+                offset = GET_BE_WORD(look->SubTable[j]);
+                ssf1 = (const GSUB_SingleSubstFormat1*)((const BYTE*)look+offset);
+                if (GET_BE_WORD(ssf1->SubstFormat) == 1)
+                {
+                    int offset = GET_BE_WORD(ssf1->Coverage);
+                    TRACE("  subtype 1, delta %i\n", GET_BE_WORD(ssf1->DeltaGlyphID));
+                    if (GSUB_is_glyph_covered((const BYTE*)ssf1+offset, glyph) != -1)
+                    {
+                        TRACE("  Glyph 0x%x ->",glyph);
+                        glyph += GET_BE_WORD(ssf1->DeltaGlyphID);
+                        TRACE(" 0x%x\n",glyph);
+                    }
+                }
+                else
+                {
+                    const GSUB_SingleSubstFormat2 *ssf2;
+                    INT index;
+                    INT offset;
+
+                    ssf2 = (const GSUB_SingleSubstFormat2 *)ssf1;
+                    offset = GET_BE_WORD(ssf1->Coverage);
+                    TRACE("  subtype 2,  glyph count %i\n", GET_BE_WORD(ssf2->GlyphCount));
+                    index = GSUB_is_glyph_covered((const BYTE*)ssf2+offset, glyph);
+                    TRACE("  Coverage index %i\n",index);
+                    if (index != -1)
+                    {
+                        TRACE("    Glyph is 0x%x ->",glyph);
+                        glyph = GET_BE_WORD(ssf2->Substitute[index]);
+                        TRACE("0x%x\n",glyph);
+                    }
+                }
+            }
+        }
+    }
+    return glyph;
+}
+
+static const char* get_opentype_script(HDC hdc)
+{
+    /*
+     * I am not sure if this is the correct way to generate our script tag
+     */
+    UINT charset = GetTextCharsetInfo(hdc, NULL, 0x0);
+
+    switch (charset)
+    {
+        case ANSI_CHARSET: return "latn";
+        case BALTIC_CHARSET: return "latn"; /* ?? */
+        case CHINESEBIG5_CHARSET: return "hani";
+        case EASTEUROPE_CHARSET: return "latn"; /* ?? */
+        case GB2312_CHARSET: return "hani";
+        case GREEK_CHARSET: return "grek";
+        case HANGUL_CHARSET: return "hang";
+        case RUSSIAN_CHARSET: return "cyrl";
+        case SHIFTJIS_CHARSET: return "kana";
+        case TURKISH_CHARSET: return "latn"; /* ?? */
+        case VIETNAMESE_CHARSET: return "latn";
+        case JOHAB_CHARSET: return "latn"; /* ?? */
+        case ARABIC_CHARSET: return "arab";
+        case HEBREW_CHARSET: return "hebr";
+        case THAI_CHARSET: return "thai";
+        default: return "latn";
+    }
+}
+
+static WORD get_GSUB_feature_glyph(HDC hdc, void* GSUB_Table, UINT glyph, const char* feat)
+{
+    const GSUB_Header *header;
+    const GSUB_Script *script;
+    const GSUB_LangSys *language;
+    const GSUB_Feature *feature;
+
+    if (!GSUB_Table)
+        return glyph;
+
+    header = GSUB_Table;
+
+    script = GSUB_get_script_table(header, get_opentype_script(hdc));
+    if (!script)
+    {
+        TRACE("Script not found\n");
+        return glyph;
+    }
+    language = GSUB_get_lang_table(script, "xxxx"); /* Need to get Lang tag */
+    if (!language)
+    {
+        TRACE("Language not found\n");
+        return glyph;
+    }
+    feature  =  GSUB_get_feature(header, language, feat);
+    if (!feature)
+    {
+        TRACE("%s feature not found\n",feat);
+        return glyph;
+    }
+    return GSUB_apply_feature(header, feature, glyph);
+}
+
+static VOID *load_gsub_table(HDC hdc)
+{
+    VOID* GSUB_Table = NULL;
+    int length = GetFontData(hdc, GSUB_TAG , 0, NULL, 0);
+    if (length != GDI_ERROR)
+    {
+        GSUB_Table = HeapAlloc(GetProcessHeap(),0,length);
+        GetFontData(hdc, GSUB_TAG , 0, GSUB_Table, length);
+        TRACE("Loaded GSUB table of %i bytes\n",length);
+    }
+    return GSUB_Table;
+}
+
 static CHAR neighbour_joining_type(int i, int delta, const CHAR* context_type, INT cchLen, SCRIPT_ANALYSIS *psa)
 {
     if (i + delta < 0)
@@ -113,6 +479,9 @@ void SHAPE_ShapeArabicGlyphs(HDC hdc, ScriptCache *psc, SCRIPT_ANALYSIS *psa, WC
         dirL = -1;
     }
 
+    if (!psc->GSUB_Table)
+        psc->GSUB_Table = load_gsub_table(hdc);
+
     context_type = HeapAlloc(GetProcessHeap(),0,cChars);
     context_shape = HeapAlloc(GetProcessHeap(),0,sizeof(INT) * cChars);
 
@@ -139,12 +508,17 @@ void SHAPE_ShapeArabicGlyphs(HDC hdc, ScriptCache *psc, SCRIPT_ANALYSIS *psa, WC
     {
         WORD newGlyph = pwOutGlyphs[i];
 
-        if (pwcChars[i] >= FIRST_ARABIC_CHAR && pwcChars[i] <= LAST_ARABIC_CHAR)
+        if (psc->GSUB_Table)
+            newGlyph = get_GSUB_feature_glyph(hdc, psc->GSUB_Table, pwOutGlyphs[i], contextual_features[context_shape[i]]);
+        if (newGlyph == pwOutGlyphs[i] && pwcChars[i] >= FIRST_ARABIC_CHAR && pwcChars[i] <= LAST_ARABIC_CHAR)
         {
+            /* fall back to presentation form B */
             WCHAR context_char = wine_shaping_forms[pwcChars[i] - FIRST_ARABIC_CHAR][context_shape[i]];
             if (context_char != pwcChars[i] && GetGlyphIndicesW(hdc, &context_char, 1, &newGlyph, 0) != GDI_ERROR && newGlyph != 0x0000)
                 pwOutGlyphs[i] = newGlyph;
         }
+        else if (newGlyph != pwOutGlyphs[i])
+            pwOutGlyphs[i] = newGlyph;
     }
 
     HeapFree(GetProcessHeap(),0,context_shape);
diff --git a/dlls/usp10/usp10.c b/dlls/usp10/usp10.c
index 70d37380f56..261b6343e6d 100644
--- a/dlls/usp10/usp10.c
+++ b/dlls/usp10/usp10.c
@@ -306,6 +306,7 @@ HRESULT WINAPI ScriptFreeCache(SCRIPT_CACHE *psc)
             heap_free(((ScriptCache *)*psc)->glyphs[i]);
             heap_free(((ScriptCache *)*psc)->widths[i]);
         }
+        heap_free(((ScriptCache *)*psc)->GSUB_Table);
         heap_free(*psc);
         *psc = NULL;
     }
diff --git a/dlls/usp10/usp10_internal.h b/dlls/usp10/usp10_internal.h
index dfe3460f326..203dd30e404 100644
--- a/dlls/usp10/usp10_internal.h
+++ b/dlls/usp10/usp10_internal.h
@@ -37,6 +37,7 @@ typedef struct {
     TEXTMETRICW tm;
     WORD *glyphs[GLYPH_MAX / GLYPH_BLOCK_SIZE];
     ABC *widths[GLYPH_MAX / GLYPH_BLOCK_SIZE];
+    LPVOID *GSUB_Table;
 } ScriptCache;
 
 #define odd(x) ((x) & 1)