diff --git a/dlls/gdi32/bidi.c b/dlls/gdi32/bidi.c index d21b23ba91d..c10dd43e65a 100644 --- a/dlls/gdi32/bidi.c +++ b/dlls/gdi32/bidi.c @@ -1,8 +1,8 @@ - /* * GDI BiDirectional handling * * Copyright 2003 Shachar Shemesh + * Copyright 2007 Maarten Lankhorst * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -17,6 +17,27 @@ * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + * + * Code derived from the modified reference implementation + * that was found in revision 17 of http://unicode.org/reports/tr9/ + * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM" + * + * -- Copyright (C) 1999-2005, ASMUS, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of the Unicode data files and any associated documentation (the + * "Data Files") or Unicode software and any associated documentation (the + * "Software") to deal in the Data Files or Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, and/or sell copies of the Data Files or Software, + * and to permit persons to whom the Data Files or Software are furnished + * to do so, provided that (a) the above copyright notice(s) and this + * permission notice appear with all copies of the Data Files or Software, + * (b) both the above copyright notice(s) and this permission notice appear + * in associated documentation, and (c) there is clear notice in each + * modified Data File or in the Software as well as in the documentation + * associated with the Data File(s) or Software that the data or software + * has been modified. */ #include "config.h" @@ -30,6 +51,192 @@ WINE_DEFAULT_DEBUG_CHANNEL(bidi); +#define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0) + +/* HELPER FUNCTIONS AND DECLARATIONS */ + +#define odd(x) ((x) & 1) + +/*------------------------------------------------------------------------ + Bidirectional Character Types + + as defined by the Unicode Bidirectional Algorithm Table 3-7. + + Note: + + The list of bidirectional character types here is not grouped the + same way as the table 3-7, since the numberic values for the types + are chosen to keep the state and action tables compact. +------------------------------------------------------------------------*/ +enum directions +{ + /* input types */ + /* ON MUST be zero, code relies on ON = N = 0 */ + ON = 0, /* Other Neutral */ + L, /* Left Letter */ + R, /* Right Letter */ + AN, /* Arabic Number */ + EN, /* European Number */ + AL, /* Arabic Letter (Right-to-left) */ + NSM, /* Non-spacing Mark */ + CS, /* Common Separator */ + ES, /* European Separator */ + ET, /* European Terminator (post/prefix e.g. $ and %) */ + + /* resolved types */ + BN, /* Boundary neutral (type of RLE etc after explicit levels) */ + + /* input types, */ + S, /* Segment Separator (TAB) // used only in L1 */ + WS, /* White space // used only in L1 */ + B, /* Paragraph Separator (aka as PS) */ + + /* types for explicit controls */ + RLO, /* these are used only in X1-X9 */ + RLE, + LRO, + LRE, + PDF, + + /* resolved types, also resolved directions */ + N = ON, /* alias, where ON, WS and S are treated the same */ +}; + +/* HELPER FUNCTIONS */ + +/* grep -r ';BN;' data.txt | grep -v [0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F] | sed -e s@\;.*@@ -e s/^..../0x\&,\ / | xargs echo */ +static const WCHAR BNs[] = { + 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, + 0x000E, 0x000F, 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, + 0x0017, 0x0018, 0x0019, 0x001A, 0x001B, 0x007F, 0x0080, 0x0081, 0x0082, + 0x0083, 0x0084, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, + 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, + 0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, + 0x009F, 0x00AD, 0x070F, 0x200B, 0x200C, 0x200D, 0x2060, 0x2061, 0x2062, + 0x2063, 0x206A, 0x206B, 0x206C, 0x206D, 0x206E, 0x206F, 0xFEFF +}; + +/* Idem, but with ';R;' instead of ';BN;' */ +static const WCHAR Rs[] = { + 0x05BE, 0x05C0, 0x05C3, 0x05C6, 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, + 0x05D5, 0x05D6, 0x05D7, 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, + 0x05DE, 0x05DF, 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, + 0x05E7, 0x05E8, 0x05E9, 0x05EA, 0x05F0, 0x05F1, 0x05F2, 0x05F3, 0x05F4, + 0x07C0, 0x07C1, 0x07C2, 0x07C3, 0x07C4, 0x07C5, 0x07C6, 0x07C7, 0x07C8, + 0x07C9, 0x07CA, 0x07CB, 0x07CC, 0x07CD, 0x07CE, 0x07CF, 0x07D0, 0x07D1, + 0x07D2, 0x07D3, 0x07D4, 0x07D5, 0x07D6, 0x07D7, 0x07D8, 0x07D9, 0x07DA, + 0x07DB, 0x07DC, 0x07DD, 0x07DE, 0x07DF, 0x07E0, 0x07E1, 0x07E2, 0x07E3, + 0x07E4, 0x07E5, 0x07E6, 0x07E7, 0x07E8, 0x07E9, 0x07EA, 0x07F4, 0x07F5, + 0x07FA, 0x200F, 0xFB1D, 0xFB1F, 0xFB20, 0xFB21, 0xFB22, 0xFB23, 0xFB24, + 0xFB25, 0xFB26, 0xFB27, 0xFB28, 0xFB2A, 0xFB2B, 0xFB2C, 0xFB2D, 0xFB2E, + 0xFB2F, 0xFB30, 0xFB31, 0xFB32, 0xFB33, 0xFB34, 0xFB35, 0xFB36, 0xFB38, + 0xFB39, 0xFB3A, 0xFB3B, 0xFB3C, 0xFB3E, 0xFB40, 0xFB41, 0xFB43, 0xFB44, + 0xFB46, 0xFB47, 0xFB48, 0xFB49, 0xFB4A, 0xFB4B, 0xFB4C, 0xFB4D, 0xFB4E, + 0xFB4F +}; + +/* Convert the incomplete win32 table to some slightly more useful data */ +static void classify(LPCWSTR lpString, WORD *chartype, DWORD uCount) +{ + unsigned i, j; + GetStringTypeW(CT_CTYPE2, lpString, uCount, chartype); + for (i = 0; i < uCount; ++i) + switch (chartype[i]) + { + case C2_LEFTTORIGHT: chartype[i] = L; break; + case C2_RIGHTTOLEFT: + chartype[i] = AL; + for (j = 0; j < sizeof(Rs)/sizeof(WCHAR); ++j) + if (Rs[j] == lpString[i]) + { + chartype[i] = R; + break; + } + break; + + case C2_EUROPENUMBER: chartype[i] = EN; break; + case C2_EUROPESEPARATOR: chartype[i] = ES; break; + case C2_EUROPETERMINATOR: chartype[i] = ET; break; + case C2_ARABICNUMBER: chartype[i] = AN; break; + case C2_COMMONSEPARATOR: chartype[i] = CS; break; + case C2_BLOCKSEPARATOR: chartype[i] = B; break; + case C2_SEGMENTSEPARATOR: chartype[i] = S; break; + case C2_WHITESPACE: chartype[i] = WS; break; + case C2_OTHERNEUTRAL: + switch (lpString[i]) + { + case 0x202A: chartype[i] = LRE; break; + case 0x202B: chartype[i] = RLE; break; + case 0x202C: chartype[i] = PDF; break; + case 0x202D: chartype[i] = LRO; break; + case 0x202E: chartype[i] = RLO; break; + default: chartype[i] = ON; break; + } + break; + case C2_NOTAPPLICABLE: + chartype[i] = NSM; + for (j = 0; j < sizeof(BNs)/sizeof(WCHAR); ++j) + if (BNs[j] == lpString[i]) + { + chartype[i] = BN; + break; + } + break; + + default: + /* According to BiDi spec, unassigned characters default to L */ + FIXME("Unhandled character type: %04x\n", chartype[i]); + chartype[i] = L; + break; + } +} + +/* reverse cch characters */ +static void reverse(LPWSTR psz, int cch) +{ + WCHAR chTemp; + int ich = 0; + for (; ich < --cch; ich++) + { + chTemp = psz[ich]; + psz[ich] = psz[cch]; + psz[cch] = chTemp; + } +} + +/* THE PARAGRAPH LEVEL */ + +/*------------------------------------------------------------------------ + Function: resolveParagraphs + + Resolves the input strings into blocks over which the algorithm + is then applied. + + Implements Rule P1 of the Unicode Bidi Algorithm + + Input: Text string + Character count + + Output: revised character count + + Note: This is a very simplfistic function. In effect it restricts + the action of the algorithm to the first paragraph in the input + where a paragraph ends at the end of the first block separator + or at the end of the input text. + +------------------------------------------------------------------------*/ + +static int resolveParagraphs(WORD *types, int cch) +{ + /* skip characters not of type B */ + int ich = 0; + for(; ich < cch && types[ich] != B; ich++); + /* stop after first B, make it a BN for use in the next steps */ + if (ich < cch && types[ich] == B) + types[ich++] = BN; + return ich; +} + /************************************************************* * BIDI_Reorder */ @@ -43,7 +250,9 @@ BOOL BIDI_Reorder( UINT *lpOrder /* [out] Logical -> Visual order map */ ) { - unsigned i; + WORD *levels; + WORD *chartype; + unsigned i, baselevel = 0, forcedir = 0, done; TRACE("%s, %d, 0x%08x lpOutString=%p, lpOrder=%p\n", debugstr_wn(lpString, uCount), uCount, dwFlags, lpOutString, lpOrder); @@ -53,10 +262,100 @@ BOOL BIDI_Reorder( FIXME("Asked to reorder without reorder flag set\n"); return FALSE; } - memcpy(lpOutString, lpString, uCount * sizeof(WCHAR)); - if (lpOrder) - for (i = 0; i < uCount; ++i) - *(lpOrder++) = i; + if (uCountOut < uCount) + { + FIXME("lpOutString too small"); + return FALSE; + } + + chartype = HeapAlloc(GetProcessHeap(), 0, uCount * 2 * sizeof(WORD)); + levels = chartype + uCount; + if (!chartype) + { + WARN("Out of memory\n"); + return FALSE; + } + + memcpy(lpOutString, lpString, uCount * sizeof(WCHAR)); + + switch (dwWineGCP_Flags&WINE_GCPW_DIR_MASK) + { + /* force means initial level is set directly, + * loose means initial level is determined by first character that has a direction */ + case WINE_GCPW_FORCE_LTR: forcedir = L; break; + case WINE_GCPW_FORCE_RTL: forcedir = R; baselevel = 1; break; + default: break; + } + + i = done = 0; + while (done < uCount) + { + unsigned j, lastgood; + classify(lpOutString + done, chartype, uCount - done); + /* limit text to first block */ + i = resolveParagraphs(chartype, uCount - done); + for (j = 0; j < i - 1; ++j) + switch(chartype[j]) + { + case B: + case S: + case WS: + case ON: chartype[i] = N; + default: continue; + } + + if (!forcedir) + { + if ((dwWineGCP_Flags&WINE_GCPW_DIR_MASK) == WINE_GCPW_LOOSE_RTL) + baselevel = 1; + else baselevel = 0; + + for (j = 0; j < i; ++j) + { + if (chartype[j] == L) + { + baselevel = 0; + break; + } + if (chartype[j] == R) + { + baselevel = 1; + break; + } + } + } + + /* Temporary stub: Assume everything is in the direction we want */ + memset(levels, baselevel, i * sizeof(WORD)); + if (baselevel) + reverse(lpOutString, i); + + if (lpOrder) + { + unsigned k; + for (j = lastgood = 0; j < i; ++j) + if (levels[j] != levels[lastgood]) + { + --j; + if (odd(levels[lastgood])) + for (k = j; k >= lastgood; --k) + lpOrder[done + k] = done + j - k; + else + for (k = lastgood; k <= j; ++k) + lpOrder[done + k] = done + k; + lastgood = ++j; + } + if (odd(levels[lastgood])) + for (k = j - 1; k >= lastgood; --k) + lpOrder[done + k] = done + j - 1 - k; + else + for (k = lastgood; k <= j; ++k) + lpOrder[done + k] = done + k; + } + done += i; + } + + HeapFree(GetProcessHeap(), 0, chartype); return TRUE; }