usp10: Rewrite resolveExplicit for Unicode 6.3.

This commit is contained in:
Aric Stewart 2013-11-01 12:00:24 -05:00 committed by Alexandre Julliard
parent e50dc73b86
commit 2531d40b51
1 changed files with 257 additions and 87 deletions

View File

@ -57,7 +57,7 @@
WINE_DEFAULT_DEBUG_CHANNEL(bidi);
#define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
#define MAX_LEVEL 61
#define MAX_DEPTH 125
/* HELPER FUNCTIONS AND DECLARATIONS */
@ -75,7 +75,7 @@ WINE_DEFAULT_DEBUG_CHANNEL(bidi);
enum directions
{
/* input types */
/* ON MUST be zero, code relies on ON = N = 0 */
/* ON MUST be zero, code relies on ON = NI = 0 */
ON = 0, /* Other Neutral */
L, /* Left Letter */
R, /* Right Letter */
@ -102,12 +102,53 @@ enum directions
LRE,
PDF,
LRI, /* Isolate formatting characters new with 6.3 */
RLI,
FSI,
PDI,
/* resolved types, also resolved directions */
N = ON, /* alias, where ON, WS and S are treated the same */
NI = ON, /* alias, where ON, WS, S and Isolates are treated the same */
};
static const char debug_type[][4] =
{
"ON", /* Other Neutral */
"L", /* Left Letter */
"R", /* Right Letter */
"AN", /* Arabic Number */
"EN", /* European Number */
"AL", /* Arabic Letter (Right-to-left) */
"NSM", /* Non-spacing Mark */
"CS", /* Common Separator */
"ES", /* European Separator */
"ET", /* European Terminator (post/prefix e.g. $ and %) */
"BN", /* Boundary neutral (type of RLE etc after explicit levels) */
"S", /* Segment Separator (TAB) // used only in L1 */
"WS", /* White space // used only in L1 */
"B", /* Paragraph Separator (aka as PS) */
"RLO", /* these are used only in X1-X9 */
"RLE",
"LRO",
"LRE",
"PDF",
"LRI", /* Isolate formatting characters new with 6.3 */
"RLI",
"FSI",
"PDI",
};
/* HELPER FUNCTIONS */
static inline void dump_types(const char* header, WORD *types, int start, int end)
{
int i;
TRACE("%s:",header);
for (i = start; i< end; i++)
TRACE(" %s",debug_type[types[i]]);
TRACE("\n");
}
/* Convert the libwine information to the direction enum */
static void classify(LPCWSTR lpString, WORD *chartype, DWORD uCount, const SCRIPT_CONTROL *c)
{
@ -143,7 +184,7 @@ static void classify(LPCWSTR lpString, WORD *chartype, DWORD uCount, const SCRIP
switch (lpString[i])
{
case '-':
case '+': chartype[i] = N; break;
case '+': chartype[i] = NI; break;
case '/': chartype[i] = CS; break;
}
break;
@ -155,6 +196,10 @@ static void classify(LPCWSTR lpString, WORD *chartype, DWORD uCount, const SCRIP
case 0x202C: chartype[i] = PDF; break;
case 0x202D: chartype[i] = LRO; break;
case 0x202E: chartype[i] = RLO; break;
case 0x2066: chartype[i] = LRI; break;
case 0x2067: chartype[i] = RLI; break;
case 0x2068: chartype[i] = FSI; break;
case 0x2069: chartype[i] = PDI; break;
}
break;
}
@ -208,81 +253,204 @@ static WORD EmbeddingDirection(int level)
the outermost call. The nesting counter counts the recursion
depth and not the embedding level.
------------------------------------------------------------------------*/
typedef struct tagStackItem {
int level;
int override;
BOOL isolate;
} StackItem;
static int resolveExplicit(int level, int dir, WORD *pcls, WORD *plevel, int cch, int nNest)
#define push_stack(l,o,i) \
do { stack_top--; \
stack[stack_top].level = l; \
stack[stack_top].override = o; \
stack[stack_top].isolate = i;} while(0)
#define pop_stack() do { stack_top++; } while(0)
#define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
static void resolveExplicit(int level, WORD *pclass, WORD *poutLevel, int count)
{
/* always called with a valid nesting level
nesting levels are != embedding levels */
int nLastValid = nNest;
int ich = 0;
/* X1 */
int overflow_isolate_count = 0;
int overflow_embedding_count = 0;
int valid_isolate_count = 0;
int i;
/* check input values */
ASSERT(nNest >= 0 && level >= 0 && level <= MAX_LEVEL);
StackItem stack[MAX_DEPTH+2];
int stack_top = MAX_DEPTH+1;
/* process the text */
for (; ich < cch; ich++)
stack[stack_top].level = level;
stack[stack_top].override = NI;
stack[stack_top].isolate = FALSE;
for (i = 0; i < count; i++)
{
WORD cls = pcls[ich];
switch (cls)
/* X2 */
if (pclass[i] == RLE)
{
case LRO:
case LRE:
nNest++;
if (GreaterEven(level) <= MAX_LEVEL - (cls == LRO ? 2 : 0))
int least_odd = GreaterOdd(stack[stack_top].level);
poutLevel[i] = stack[stack_top].level;
if (valid_level(least_odd))
push_stack(least_odd, NI, FALSE);
else if (overflow_isolate_count == 0)
overflow_embedding_count++;
}
/* X3 */
else if (pclass[i] == LRE)
{
int least_even = GreaterEven(stack[stack_top].level);
poutLevel[i] = stack[stack_top].level;
if (valid_level(least_even))
push_stack(least_even, NI, FALSE);
else if (overflow_isolate_count == 0)
overflow_embedding_count++;
}
/* X4 */
else if (pclass[i] == RLO)
{
int least_odd = GreaterOdd(stack[stack_top].level);
poutLevel[i] = stack[stack_top].level;
if (valid_level(least_odd))
push_stack(least_odd, R, FALSE);
else if (overflow_isolate_count == 0)
overflow_embedding_count++;
}
/* X5 */
else if (pclass[i] == LRO)
{
int least_even = GreaterEven(stack[stack_top].level);
poutLevel[i] = stack[stack_top].level;
if (valid_level(least_even))
push_stack(least_even, L, FALSE);
else if (overflow_isolate_count == 0)
overflow_embedding_count++;
}
/* X5a */
else if (pclass[i] == RLI)
{
int least_odd = GreaterOdd(stack[stack_top].level);
poutLevel[i] = stack[stack_top].level;
if (valid_level(least_odd))
{
plevel[ich] = GreaterEven(level);
pcls[ich] = BN;
ich += resolveExplicit(plevel[ich], (cls == LRE ? N : L),
&pcls[ich+1], &plevel[ich+1],
cch - (ich+1), nNest);
nNest--;
continue;
valid_isolate_count++;
push_stack(least_odd, NI, TRUE);
}
cls = pcls[ich] = BN;
break;
case RLO:
case RLE:
nNest++;
if (GreaterOdd(level) <= MAX_LEVEL - (cls == RLO ? 2 : 0))
else
overflow_isolate_count++;
pclass[i] = NI;
}
/* X5b */
else if (pclass[i] == LRI)
{
int least_even = GreaterEven(stack[stack_top].level);
poutLevel[i] = stack[stack_top].level;
if (valid_level(least_even))
{
plevel[ich] = GreaterOdd(level);
pcls[ich] = BN;
ich += resolveExplicit(plevel[ich], (cls == RLE ? N : R),
&pcls[ich+1], &plevel[ich+1],
cch - (ich+1), nNest);
nNest--;
continue;
valid_isolate_count++;
push_stack(least_even, NI, TRUE);
}
cls = pcls[ich] = BN;
break;
case PDF:
cls = pcls[ich] = BN;
if (nNest)
else
overflow_isolate_count++;
pclass[i] = NI;
}
/* X5c */
else if (pclass[i] == FSI)
{
int j;
int new_level = 0;
int skipping = 0;
poutLevel[i] = stack[stack_top].level;
for (j = i+1; j < count; j++)
{
if (nLastValid < nNest)
if (pclass[j] == LRI || pclass[j] == RLI || pclass[j] == FSI)
{
nNest--;
skipping++;
continue;
}
else if (pclass[j] == PDI)
{
if (skipping)
skipping --;
else
break;
continue;
}
if (skipping) continue;
if (pclass[j] == L)
{
new_level = 0;
break;
}
else if (pclass[j] == R || pclass[j] == AL)
{
new_level = 1;
break;
}
}
if (odd(new_level))
{
int least_odd = GreaterOdd(stack[stack_top].level);
if (valid_level(least_odd))
{
valid_isolate_count++;
push_stack(least_odd, NI, TRUE);
}
else
{
cch = ich; /* break the loop, but complete body */
}
overflow_isolate_count++;
}
else
{
int least_even = GreaterEven(stack[stack_top].level);
if (valid_level(least_even))
{
valid_isolate_count++;
push_stack(least_even, NI, TRUE);
}
else
overflow_isolate_count++;
}
pclass[i] = NI;
}
/* Apply the override */
if (dir != N)
/* X6 */
else if (pclass[i] != B && pclass[i] != BN && pclass[i] != PDI && pclass[i] != PDF)
{
cls = dir;
poutLevel[i] = stack[stack_top].level;
if (stack[stack_top].override != NI)
pclass[i] = stack[stack_top].override;
}
plevel[ich] = level;
if (pcls[ich] != BN)
pcls[ich] = cls;
/* X6a */
else if (pclass[i] == PDI)
{
if (overflow_isolate_count) overflow_isolate_count--;
else if (!valid_isolate_count) {/* do nothing */}
else
{
overflow_embedding_count = 0;
while (!stack[stack_top].isolate) pop_stack();
pop_stack();
valid_isolate_count --;
}
poutLevel[i] = stack[stack_top].level;
pclass[i] = NI;
}
/* X7 */
else if (pclass[i] == PDF)
{
poutLevel[i] = stack[stack_top].level;
if (overflow_isolate_count) {/* do nothing */}
else if (overflow_embedding_count) overflow_embedding_count--;
else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1))
pop_stack();
}
/* X8: Nothing */
}
return ich;
/* X9: Based on 5.2 Retaining Explicit Formatting Characters */
for (i = 0; i < count ; i++)
if (pclass[i] == RLE || pclass[i] == LRE || pclass[i] == RLO || pclass[i] == LRO || pclass[i] == PDF)
pclass[i] = BN;
}
/* RESOLVE WEAK TYPES */
@ -318,7 +486,7 @@ enum states /* possible states */
static const int stateWeak[][10] =
{
/* N, L, R, AN, EN, AL,NSM, CS, ES, ET */
/* NI, L, R, AN, EN, AL,NSM, CS, ES, ET */
/*xa*/ { ao, xl, xr, cn, cn, xa, xa, ao, ao, ao }, /* Arabic letter */
/*xr*/ { ro, xl, xr, ra, re, xa, xr, ro, ro, rt }, /* right letter */
/*xl*/ { lo, xl, xr, la, le, xa, xl, lo, lo, lt }, /* left letter */
@ -355,7 +523,7 @@ enum actions /* possible actions */
/* actions */
xxx = (XX << 4) + XX, /* no-op */
xIx = IX + xxx, /* increment run */
xxN = (XX << 4) + ON, /* set current to N */
xxN = (XX << 4) + ON, /* set current to NI */
xxE = (XX << 4) + EN, /* set current to EN */
xxA = (XX << 4) + AN, /* set current to AN */
xxR = (XX << 4) + R, /* set current to R */
@ -363,19 +531,19 @@ enum actions /* possible actions */
Nxx = (ON << 4) + 0xF, /* set run to neutral */
Axx = (AN << 4) + 0xF, /* set run to AN */
ExE = (EN << 4) + EN, /* set run to EN, set current to EN */
NIx = (ON << 4) + 0xF + IX, /* set run to N, increment */
NxN = (ON << 4) + ON, /* set run to N, set current to N */
NxR = (ON << 4) + R, /* set run to N, set current to R */
NxE = (ON << 4) + EN, /* set run to N, set current to EN */
NIx = (ON << 4) + 0xF + IX, /* set run to NI, increment */
NxN = (ON << 4) + ON, /* set run to NI, set current to NI */
NxR = (ON << 4) + R, /* set run to NI, set current to R */
NxE = (ON << 4) + EN, /* set run to NI, set current to EN */
AxA = (AN << 4) + AN, /* set run to AN, set current to AN */
NxL = (ON << 4) + L, /* set run to N, set current to L */
NxL = (ON << 4) + L, /* set run to NI, set current to L */
LxL = (L << 4) + L, /* set run to L, set current to L */
} ;
static const int actionWeak[][10] =
{
/* N, L, R, AN, EN, AL, NSM, CS, ES, ET */
/* NI, L, R, AN, EN, AL, NSM, CS, ES, ET */
/*xa*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxR, xxN, xxN, xxN }, /* Arabic letter */
/*xr*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxR, xxN, xxN, xIx }, /* right letter */
/*xl*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxL, xxN, xxN, xIx }, /* left letter */
@ -425,7 +593,7 @@ static int GetResolvedType(int action)
Input classes are of three kinds
- Static Input Token, where the class of the token remains
unchanged on output (AN, L, N, R)
unchanged on output (AN, L, NI, R)
- Replaced Input Token, where the class of the token is
always replaced on output (AL, BN, NSM, CS, ES, ET)
- Conditional Input Token, where the class of the token is
@ -574,10 +742,10 @@ enum resolvestates
/* new temporary class */
r, /* R and characters resolved to R */
l, /* L and characters resolved to L */
rn, /* N preceded by right */
ln, /* N preceded by left */
rn, /* NI preceded by right */
ln, /* NI preceded by left */
a, /* AN preceded by left (the abbreviation 'la' is used up above) */
na, /* N preceded by a */
na, /* NI preceded by a */
} ;
@ -593,28 +761,28 @@ enum resolvestates
static const int actionNeutrals[][5] =
{
/* N, L, R, AN, EN = cls */
/* NI, L, R, AN, EN = cls */
{ In, 0, 0, 0, 0 }, /* r right */
{ In, 0, 0, 0, L }, /* l left */
{ In, En, Rn, Rn, Rn }, /* rn N preceded by right */
{ In, Ln, En, En, LnL}, /* ln N preceded by left */
{ In, En, Rn, Rn, Rn }, /* rn NI preceded by right */
{ In, Ln, En, En, LnL}, /* ln NI preceded by left */
{ In, 0, 0, 0, L }, /* a AN preceded by left */
{ In, En, Rn, Rn, En }, /* na N preceded by a */
{ In, En, Rn, Rn, En }, /* na NI preceded by a */
} ;
static const int stateNeutrals[][5] =
{
/* N, L, R, AN, EN */
/* NI, L, R, AN, EN */
{ rn, l, r, r, r }, /* r right */
{ ln, l, r, a, l }, /* l left */
{ rn, l, r, r, r }, /* rn N preceded by right */
{ ln, l, r, a, l }, /* ln N preceded by left */
{ rn, l, r, r, r }, /* rn NI preceded by right */
{ ln, l, r, a, l }, /* ln NI preceded by left */
{ na, l, r, a, l }, /* a AN preceded by left */
{ na, l, r, a, l }, /* na N preceded by la */
{ na, l, r, a, l }, /* na NI preceded by la */
} ;
/*------------------------------------------------------------------------
@ -631,7 +799,7 @@ static const int stateNeutrals[][5] =
In/Out: Array of directional classes
Note: On input only these directional classes are expected
R, L, N, AN, EN and BN
R, L, NI, AN, EN and BN
W8 resolves a number of ENs to L
------------------------------------------------------------------------*/
@ -659,14 +827,14 @@ static void resolveNeutrals(int baselevel, WORD *pcls, const WORD *plevel, int c
continue;
}
ASSERT(pcls[ich] < 5); /* "Only N, L, R, AN, EN are allowed" */
ASSERT(pcls[ich] < 5); /* "Only NI, L, R, AN, EN are allowed" */
cls = pcls[ich];
action = actionNeutrals[state][cls];
/* resolve the directionality for deferred runs */
clsRun = GetDeferredNeutrals(action, level);
if (clsRun != N)
if (clsRun != NI)
{
SetDeferredRun(pcls, cchRun, ich, clsRun);
cchRun = 0;
@ -674,7 +842,7 @@ static void resolveNeutrals(int baselevel, WORD *pcls, const WORD *plevel, int c
/* resolve the directionality class at the current location */
clsNew = GetResolvedNeutrals(action);
if (clsNew != N)
if (clsNew != NI)
pcls[ich] = clsNew;
if (In & action)
@ -689,7 +857,7 @@ static void resolveNeutrals(int baselevel, WORD *pcls, const WORD *plevel, int c
/* resolve the directionality for deferred runs */
clsRun = GetDeferredNeutrals(actionNeutrals[state][cls], level);
if (clsRun != N)
if (clsRun != NI)
SetDeferredRun(pcls, cchRun, ich, clsRun);
}
@ -763,6 +931,7 @@ BOOL BIDI_DetermineLevels(
baselevel = s->uBidiLevel;
classify(lpString, chartype, uCount, c);
if (TRACE_ON(bidi)) dump_types("Start ", chartype, 0, uCount);
for (j = 0; j < uCount; ++j)
switch(chartype[j])
@ -770,12 +939,13 @@ BOOL BIDI_DetermineLevels(
case B:
case S:
case WS:
case ON: chartype[j] = N;
case ON: chartype[j] = NI;
default: continue;
}
/* resolve explicit */
resolveExplicit(baselevel, N, chartype, lpOutLevels, uCount, 0);
resolveExplicit(baselevel, chartype, lpOutLevels, uCount);
if (TRACE_ON(bidi)) dump_types("After Explicit", chartype, 0, uCount);
/* resolve weak */
resolveWeak(baselevel, chartype, lpOutLevels, uCount);