Got all the Unicode codepoints listed in Plorkyeran's patch and listed them in an array, to avoid encoding the source file as UTF-8 or UTF-16. Updates #710, seems to fix it but someone please confirm.

Originally committed to SVN as r3067.
This commit is contained in:
Niels Martin Hansen 2009-06-16 23:29:56 +00:00
parent dbf286136e
commit 7654458b0f
1 changed files with 18 additions and 5 deletions

View File

@ -231,11 +231,24 @@ void GetWordBoundaries(const wxString text,IntPairVector &results,int start,int
bool isDelim;
// Delimiters
wxString delim(_T(" .,;:!?-(){}[]\"\\/"));
wxChar temp = 0xBF;
delim += temp;
temp = 0xA1;
delim += temp;
const wchar_t delim_chars[] = {
0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0028,
0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 0x003a,
0x003b, 0x003d, 0x003f, 0x0040, 0x005b, 0x005c, 0x005d, 0x005e,
0x005f, 0x0060, 0x007b, 0x007c, 0x007d, 0x007e, 0x00a1, 0x00a2,
0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00aa, 0x00ab,
0x00b0, 0x00b6, 0x00b7, 0x00ba, 0x00bb, 0x00bf, 0x02dc, 0x0e3f,
0x2010, 0x2013, 0x2014, 0x2015, 0x2018, 0x2019, 0x201c, 0x201d,
0x2020, 0x2021, 0x2022, 0x2025, 0x2026, 0x2026, 0x2030, 0x2031,
0x2032, 0x203b, 0x203b, 0x203d, 0x2042, 0x2044, 0x20a6, 0x20a9,
0x20aa, 0x20ac, 0x20ad, 0x2116, 0x2234, 0x2235, 0x2420, 0x2422,
0x2423, 0x2506, 0x25ca, 0x2605, 0x261e, 0x2e2e, 0x3000, 0x3001,
0x3002, 0x3008, 0x3009, 0x300a, 0x300b, 0x300c, 0x300d, 0x300e,
0x300f, 0x3010, 0x3011, 0x3014, 0x3015, 0x3016, 0x3017, 0x3018,
0x3019, 0x301a, 0x301b, 0x301c, 0x3030, 0x303d, 0x30fb, 0xff0a,
0xff5b, 0xff5d, 0xff5e, 0
};
wxString delim(delim_chars);
// Scan
for (int i=start;i<end+1;i++) {