usp10: Improve Sinhala shaping using Indic rules.

This commit is contained in:
Aric Stewart 2011-06-02 14:56:07 -05:00 committed by Alexandre Julliard
parent 301c6ef4e5
commit 397c3ac51f
4 changed files with 441 additions and 1 deletions

View File

@ -4,6 +4,7 @@ IMPORTS = gdi32
C_SRCS = \
bidi.c \
indic.c \
mirror.c \
shape.c \
shaping.c \

270
dlls/usp10/indic.c Normal file
View File

@ -0,0 +1,270 @@
/*
* Implementation of Indic Syllables for the Uniscribe Script Processor
*
* Copyright 2011 CodeWeavers, Aric Stewart
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*
*/
#include "config.h"
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include "windef.h"
#include "winbase.h"
#include "winuser.h"
#include "wingdi.h"
#include "winnls.h"
#include "usp10.h"
#include "winternl.h"
#include "wine/debug.h"
#include "usp10_internal.h"
WINE_DEFAULT_DEBUG_CHANNEL(uniscribe);
static void debug_output_string(LPCWSTR str, int cChar, lexical_function f)
{
int i;
if (TRACE_ON(uniscribe))
{
for (i = 0; i < cChar; i++)
{
switch (f(str[i]))
{
case lex_Consonant: TRACE("C"); break;
case lex_Ra: TRACE("Ra"); break;
case lex_Vowel: TRACE("V"); break;
case lex_Nukta: TRACE("N"); break;
case lex_Halant: TRACE("H"); break;
case lex_ZWNJ: TRACE("Zwnj"); break;
case lex_ZWJ: TRACE("Zwj"); break;
case lex_Mantra_post: TRACE("Mp");break;
case lex_Mantra_above: TRACE("Ma");break;
case lex_Mantra_below: TRACE("Mb");break;
case lex_Mantra_pre: TRACE("Mm");break;
case lex_Modifier: TRACE("Sm"); break;
case lex_Vedic: TRACE("Vd"); break;
case lex_Anudatta: TRACE("A"); break;
case lex_Composed_Vowel: TRACE("t"); break;
default:
TRACE("X"); break;
}
}
TRACE("\n");
}
}
static inline BOOL is_consonant( int type )
{
return (type == lex_Ra || type == lex_Consonant);
}
static inline BOOL is_mantra( int type )
{
return (type == lex_Mantra_above || type == lex_Mantra_below ||
type == lex_Mantra_pre || type == lex_Mantra_post);
}
static inline BOOL is_joiner( int type )
{
return (type == lex_ZWJ || type == lex_ZWNJ);
}
static INT consonant_header(LPCWSTR input, INT cChar, INT start, INT next,
lexical_function lex)
{
if (!is_consonant( lex(input[next]) )) return -1;
next++;
if ((next < cChar) && lex(input[next]) == lex_Nukta)
next++;
if (lex(input[next])==lex_Halant)
{
next++;
if((next < cChar) && is_joiner( lex(input[next]) ))
next++;
if ((next < cChar) && is_consonant( lex(input[next]) ))
return next;
}
else if (is_joiner( lex(input[next]) ) && lex(input[next+1])==lex_Halant)
{
next+=2;
if ((next < cChar) && is_consonant( lex(input[next]) ))
return next;
}
return -1;
}
static INT parse_consonant_syllable(LPCWSTR input, INT cChar, INT start,
INT *main, INT next, lexical_function lex)
{
int check;
int headers = 0;
do
{
check = consonant_header(input,cChar,start,next,lex);
if (check != -1)
{
next = check;
headers++;
}
} while (check != -1);
if (headers || is_consonant( lex(input[next]) ))
{
*main = next;
next++;
}
else
return -1;
if ((next < cChar) && lex(input[next]) == lex_Nukta)
next++;
if ((next < cChar) && lex(input[next]) == lex_Anudatta)
next++;
if ((next < cChar) && lex(input[next]) == lex_Halant)
{
next++;
if((next < cChar) && is_joiner( lex(input[next]) ))
next++;
}
else if (next < cChar)
{
while((next < cChar) && is_mantra( lex(input[next]) ))
next++;
if ((next < cChar) && lex(input[next]) == lex_Nukta)
next++;
if ((next < cChar) && lex(input[next]) == lex_Halant)
next++;
}
if ((next < cChar) && lex(input[next]) == lex_Modifier)
next++;
if ((next < cChar) && lex(input[next]) == lex_Vedic)
next++;
return next;
}
static INT parse_vowel_syllable(LPCWSTR input, INT cChar, INT start,
INT next, lexical_function lex)
{
if ((next < cChar) && lex(input[next]) == lex_Nukta)
next++;
if ((next < cChar) && is_joiner( lex(input[next]) ) && lex(input[next+1])==lex_Halant && is_consonant( lex(input[next+2]) ))
next+=3;
else if ((next < cChar) && lex(input[next])==lex_Halant && is_consonant( lex(input[next+1]) ))
next+=2;
else if ((next < cChar) && lex(input[next])==lex_ZWJ && is_consonant( lex(input[next+1]) ))
next+=2;
if (is_mantra( lex(input[next]) ))
{
while((next < cChar) && is_mantra( lex(input[next]) ))
next++;
if ((next < cChar) && lex(input[next]) == lex_Nukta)
next++;
if ((next < cChar) && lex(input[next]) == lex_Halant)
next++;
}
if ((next < cChar) && lex(input[next]) == lex_Modifier)
next++;
if ((next < cChar) && lex(input[next]) == lex_Vedic)
next++;
return next;
}
static INT Indic_process_next_syllable( LPCWSTR input, INT cChar, INT start, INT* main, INT next, lexical_function lex )
{
if (lex(input[next])==lex_Vowel)
{
*main = next;
return parse_vowel_syllable(input, cChar, start, next+1, lex);
}
else if ((cChar > next+3) && lex(input[next]) == lex_Ra && lex(input[next+1]) == lex_Halant && lex(input[next+2]) == lex_Vowel)
{
*main = next+2;
return parse_vowel_syllable(input, cChar, start, next+3, lex);
}
else if (start == next && lex(input[next])==lex_NBSP)
{
*main = next;
return parse_vowel_syllable(input, cChar, start, next+1, lex);
}
else if (start == next && (cChar > next+3) && lex(input[next]) == lex_Ra && lex(input[next+1]) == lex_Halant && lex(input[next+2]) == lex_NBSP)
{
*main = next+2;
return parse_vowel_syllable(input, cChar, start, next+3, lex);
}
return parse_consonant_syllable(input, cChar, start, main, next, lex);
}
void Indic_ReorderCharacters( LPWSTR input, int cChar, lexical_function lex, reorder_function reorder_f)
{
int index = 0;
int next = 0;
int center = 0;
if (!lex || ! reorder_f)
{
ERR("Failure to have required functions\n");
return;
}
debug_output_string(input, cChar, lex);
while (next != -1)
{
while((next < cChar) && lex(input[next]) == lex_Generic)
next++;
index = next;
next = Indic_process_next_syllable(input, cChar, 0, &center, index, lex);
if (next != -1)
{
reorder_f(input, index, center, next-1, lex);
index = next;
}
else if (index < cChar)
{
int i;
TRACE("Processing failed at %i\n",index);
for (i = index; i < cChar; i++)
if (lex(input[i])==lex_Generic)
{
TRACE("Restart processing at %i\n",i);
next = i;
index = i;
break;
}
}
}
TRACE("Processed %i of %i characters\n",index,cChar);
}
int Indic_FindBaseConsonant(LPWSTR input, INT start, INT main, INT end, lexical_function lex)
{
int i;
/* try to find a base consonant */
if (!is_consonant( lex(input[main]) ))
{
for (i = end; i >= start; i--)
if (is_consonant( lex(input[i]) ))
{
main = i;
break;
}
}
return main;
}

View File

@ -43,6 +43,7 @@ typedef VOID (*ContextualShapingProc)(HDC, ScriptCache*, SCRIPT_ANALYSIS*,
static void ContextualShape_Arabic(HDC hdc, ScriptCache *psc, SCRIPT_ANALYSIS *psa, WCHAR* pwcChars, INT cChars, WORD* pwOutGlyphs, INT* pcGlyphs, INT cMaxGlyphs, WORD *pwLogClust);
static void ContextualShape_Syriac(HDC hdc, ScriptCache *psc, SCRIPT_ANALYSIS *psa, WCHAR* pwcChars, INT cChars, WORD* pwOutGlyphs, INT* pcGlyphs, INT cMaxGlyphs, WORD *pwLogClust);
static void ContextualShape_Phags_pa(HDC hdc, ScriptCache *psc, SCRIPT_ANALYSIS *psa, WCHAR* pwcChars, INT cChars, WORD* pwOutGlyphs, INT* pcGlyphs, INT cMaxGlyphs, WORD *pwLogClust);
static void ContextualShape_Sinhala(HDC hdc, ScriptCache *psc, SCRIPT_ANALYSIS *psa, WCHAR* pwcChars, INT cChars, WORD* pwOutGlyphs, INT* pcGlyphs, INT cMaxGlyphs, WORD *pwLogClust);
typedef VOID (*ShapeCharGlyphPropProc)( HDC , ScriptCache*, SCRIPT_ANALYSIS*, const WCHAR*, const INT, const WORD*, const INT, WORD*, SCRIPT_CHARPROP*, SCRIPT_GLYPHPROP*);
@ -279,6 +280,12 @@ typedef struct {
static INT GSUB_apply_lookup(const GSUB_LookupList* lookup, INT lookup_index, WORD *glyphs, INT glyph_index, INT write_dir, INT *glyph_count);
typedef struct tagVowelComponents
{
WCHAR base;
WCHAR parts[3];
} VowelComponents;
/* the orders of joined_forms and contextual_features need to line up */
static const char* contextual_features[] =
{
@ -400,7 +407,7 @@ static const ScriptShapeData ShapingData[] =
{{ standard_features, 2}, NULL, "cyrl", "", NULL, NULL},
{{ standard_features, 2}, NULL, "armn", "", NULL, NULL},
{{ standard_features, 2}, NULL, "geor", "", NULL, NULL},
{{ sinhala_features, 7}, NULL, "sinh", "", NULL, NULL},
{{ sinhala_features, 7}, NULL, "sinh", "", ContextualShape_Sinhala, NULL},
{{ tibetan_features, 2}, NULL, "tibt", "", NULL, ShapeCharGlyphProp_Tibet},
{{ tibetan_features, 2}, NULL, "tibt", "", NULL, ShapeCharGlyphProp_Tibet},
{{ tibetan_features, 2}, NULL, "phag", "", ContextualShape_Phags_pa, ShapeCharGlyphProp_Thai},
@ -1493,6 +1500,160 @@ static void ContextualShape_Phags_pa(HDC hdc, ScriptCache *psc, SCRIPT_ANALYSIS
HeapFree(GetProcessHeap(),0,context_shape);
}
static void ReplaceInsertChars(HDC hdc, INT cWalk, INT* pcChars, WCHAR *pwOutChars, const WCHAR *replacements)
{
int i;
/* Replace */
pwOutChars[cWalk] = replacements[0];
cWalk=cWalk+1;
/* Insert */
for (i = 1; replacements[i] != 0x0000 && i < 3; i++)
{
int j;
for (j = *pcChars; j > cWalk; j--)
pwOutChars[j] = pwOutChars[j-1];
*pcChars= *pcChars+1;
pwOutChars[cWalk] = replacements[i];
cWalk = cWalk+1;
}
}
static void DecomposeVowels(HDC hdc, WCHAR *pwOutChars, INT *pcChars, const VowelComponents vowels[])
{
int i;
int cWalk;
for (cWalk = 0; cWalk < *pcChars; cWalk++)
{
for (i = 0; vowels[i].base != 0x0; i++)
{
if (pwOutChars[cWalk] == vowels[i].base)
{
ReplaceInsertChars(hdc, cWalk, pcChars, pwOutChars, vowels[i].parts);
break;
}
}
}
}
static void Reorder_Ra_follows_base(LPWSTR pwChar, INT start, INT main, INT end, lexical_function lexical)
{
if (start != main && end > start+1 && lexical(pwChar[start]) == lex_Ra && lexical(pwChar[start+1]) == lex_Halant)
{
int j;
WORD Ra = pwChar[start];
WORD H = pwChar[start+1];
TRACE("Doing reorder of Ra to %i\n",main);
for (j = start; j < main-1; j++)
pwChar[j] = pwChar[j+2];
pwChar[main-1] = Ra;
pwChar[main] = H;
}
}
static void Reorder_Mantra_precede_base(LPWSTR pwChar, INT start, INT main, INT end, lexical_function lexical)
{
int i;
/* reorder Mantras */
if (end > main)
{
for (i = 1; i <= end-main; i++)
{
if (lexical(pwChar[main+i]) == lex_Mantra_pre)
{
int j;
WCHAR c = pwChar[main+i];
TRACE("Doing reorder of %x %x\n",c,pwChar[main]);
for (j = main+i; j > main; j--)
pwChar[j] = pwChar[j-1];
pwChar[main] = c;
}
}
}
}
static void Reorder_Like_Sinhala(LPWSTR pwChar, INT start, INT main, INT end, lexical_function lexical)
{
TRACE("Syllable (%i..%i..%i)\n",start,main,end);
if (start == main && main == end) return;
main = Indic_FindBaseConsonant(pwChar, start, main, end, lexical);
if (lexical(pwChar[main]) == lex_Vowel) return;
Reorder_Ra_follows_base(pwChar, start, main, end, lexical);
Reorder_Mantra_precede_base(pwChar, start, main, end, lexical);
}
static int sinhala_lex(WCHAR c)
{
switch (c)
{
case 0x0DCA: return lex_Halant;
case 0x0DCF:
case 0x0DDF:
case 0x0DD8: return lex_Mantra_post;
case 0x0DD9:
case 0x0DDB: return lex_Mantra_pre;
case 0x0DDA:
case 0x0DDC: return lex_Composed_Vowel;
case 0x200D: return lex_ZWJ;
case 0x200C: return lex_ZWNJ;
case 0x00A0: return lex_NBSP;
default:
if (c>=0x0D82 && c <=0x0D83) return lex_Modifier;
else if (c>=0x0D85 && c <=0x0D96) return lex_Vowel;
else if (c>=0x0D96 && c <=0x0DC6) return lex_Consonant;
else if (c>=0x0DD0 && c <=0x0DD1) return lex_Mantra_post;
else if (c>=0x0DD2 && c <=0x0DD3) return lex_Mantra_above;
else if (c>=0x0DD4 && c <=0x0DD6) return lex_Mantra_below;
else if (c>=0x0DDD && c <=0x0DDE) return lex_Composed_Vowel;
else if (c>=0x0DF2 && c <=0x0DF3) return lex_Mantra_post;
else return lex_Generic;
}
}
static const VowelComponents Sinhala_vowels[] = {
{0x0DDA, {0x0DD9,0x0DCA,0x0}},
{0x0DDC, {0x0DD9,0x0DCF,0x0}},
{0x0DDD, {0x0DD9,0x0DCF,0x0DCA}},
{0x0DDE, {0x0DD9,0x0DDF,0x0}},
{0x0000, {0x0000,0x0000,0x0}}};
static void ContextualShape_Sinhala(HDC hdc, ScriptCache *psc, SCRIPT_ANALYSIS *psa, WCHAR* pwcChars, INT cChars, WORD* pwOutGlyphs, INT* pcGlyphs, INT cMaxGlyphs, WORD *pwLogClust)
{
int cCount = cChars;
WCHAR *input;
if (*pcGlyphs != cChars)
{
ERR("Number of Glyphs and Chars need to match at the beginning\n");
return;
}
input = HeapAlloc(GetProcessHeap(),0,sizeof(WCHAR) * (cChars * 3));
memcpy(input, pwcChars, cChars * sizeof(WCHAR));
/* Step 1: Decompose multi part vowels */
DecomposeVowels(hdc, input, &cCount, Sinhala_vowels);
TRACE("New double vowel expanded string %s (%i)\n",debugstr_wn(input,cCount),cCount);
/* Step 2: Reorder within Syllables */
Indic_ReorderCharacters( input, cCount, sinhala_lex, Reorder_Like_Sinhala);
TRACE("reordered string %s\n",debugstr_wn(input,cCount));
/* Step 3: Get glyphs */
GetGlyphIndicesW(hdc, input, cCount, pwOutGlyphs, 0);
*pcGlyphs = cCount;
HeapFree(GetProcessHeap(),0,input);
}
static void ShapeCharGlyphProp_Default( HDC hdc, ScriptCache* psc, SCRIPT_ANALYSIS* psa, const WCHAR* pwcChars, const INT cChars, const WORD* pwGlyphs, const INT cGlyphs, WORD* pwLogClust, SCRIPT_CHARPROP* pCharProp, SCRIPT_GLYPHPROP* pGlyphProp)
{
int i,k;

View File

@ -74,6 +74,11 @@ typedef struct {
OPENTYPE_TAG userLang;
} ScriptCache;
enum {lex_Halant, lex_Composed_Vowel, lex_Mantra_post, lex_Mantra_pre, lex_Mantra_above, lex_Mantra_below, lex_ZWJ, lex_ZWNJ, lex_NBSP, lex_Modifier, lex_Vowel, lex_Consonant, lex_Generic, lex_Ra, lex_Vedic, lex_Anudatta, lex_Nukta};
typedef int (*lexical_function)(WCHAR c);
typedef void (*reorder_function)(LPWSTR pwChar, INT start, INT main, INT end, lexical_function lex);
#define odd(x) ((x) & 1)
BOOL BIDI_DetermineLevels( LPCWSTR lpString, INT uCount, const SCRIPT_STATE *s,
@ -86,3 +91,6 @@ void SHAPE_ContextualShaping(HDC hdc, ScriptCache *psc, SCRIPT_ANALYSIS *psa, WC
void SHAPE_ApplyDefaultOpentypeFeatures(HDC hdc, ScriptCache *psc, SCRIPT_ANALYSIS *psa, WORD* pwOutGlyphs, INT* pcGlyphs, INT cMaxGlyphs, INT cChars, WORD *pwLogClust) DECLSPEC_HIDDEN;
HRESULT SHAPE_CheckFontForRequiredFeatures(HDC hdc, ScriptCache *psc, SCRIPT_ANALYSIS *psa) DECLSPEC_HIDDEN;
void SHAPE_CharGlyphProp(HDC hdc, ScriptCache *psc, SCRIPT_ANALYSIS *psa, const WCHAR* pwcChars, const INT cChars, const WORD* pwGlyphs, const INT cGlyphs, WORD *pwLogClust, SCRIPT_CHARPROP *pCharProp, SCRIPT_GLYPHPROP *pGlyphProp) DECLSPEC_HIDDEN;
void Indic_ReorderCharacters( LPWSTR input, int cChars, lexical_function lexical_f, reorder_function reorder_f) DECLSPEC_HIDDEN;
int Indic_FindBaseConsonant(LPWSTR pwChar, INT start, INT main, INT end, lexical_function lex) DECLSPEC_HIDDEN;