diff --git a/build/hunspell/hunspell.vcxproj b/build/hunspell/hunspell.vcxproj index 4e9a00166..67ce1c543 100644 --- a/build/hunspell/hunspell.vcxproj +++ b/build/hunspell/hunspell.vcxproj @@ -4,6 +4,7 @@ {CC791693-6B28-40AC-879D-64A6C16468E3} hunspell + lib @@ -12,6 +13,7 @@ + @@ -20,6 +22,7 @@ false + @@ -35,13 +38,7 @@ - - - - - - @@ -55,15 +52,5 @@ - - - - - - - - - CompileAsCpp - diff --git a/build/hunspell/hunspell.vcxproj.filters b/build/hunspell/hunspell.vcxproj.filters index 8a3be6726..6ddb687b8 100644 --- a/build/hunspell/hunspell.vcxproj.filters +++ b/build/hunspell/hunspell.vcxproj.filters @@ -9,10 +9,6 @@ {93995380-89BD-4b04-88EB-625FBE52EBFB} h;hpp;hxx;hm;inl;inc;xsd - - {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} - rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms - @@ -51,27 +47,9 @@ Header Files - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - Header Files - - Header Files - Header Files @@ -101,27 +79,6 @@ Source Files - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - Source Files @@ -132,4 +89,4 @@ Source Files - + \ No newline at end of file diff --git a/src/spellchecker_hunspell.cpp b/src/spellchecker_hunspell.cpp index 08f628034..a294a51ac 100644 --- a/src/spellchecker_hunspell.cpp +++ b/src/spellchecker_hunspell.cpp @@ -13,11 +13,6 @@ // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. // -/// @file spellchecker_hunspell.cpp -/// @brief Hunspell-based spell checker implementation -/// @ingroup spelling -/// - #ifdef WITH_HUNSPELL #include "spellchecker_hunspell.h" @@ -33,6 +28,9 @@ #include #include + +#define HUNSPELL_STATIC +#undef near #include HunspellSpellChecker::HunspellSpellChecker() diff --git a/vendor/hunspell/AUTHORS b/vendor/hunspell/AUTHORS index 3befa829f..f137fa26b 100644 --- a/vendor/hunspell/AUTHORS +++ b/vendor/hunspell/AUTHORS @@ -1,5 +1,5 @@ Author of Hunspell: -Németh László nemeth (at) OpenOffice.org +Németh László nemeth (at) numbertext.org Hunspell based on OpenOffice.org's Myspell. MySpell's author: Kevin Hendricks kevin.hendricks (at) sympatico.ca diff --git a/vendor/hunspell/ChangeLog b/vendor/hunspell/ChangeLog index b26991f9f..4b99a3f1b 100644 --- a/vendor/hunspell/ChangeLog +++ b/vendor/hunspell/ChangeLog @@ -1,3 +1,120 @@ +2014-06-02 Németh László : + * escape spaces in paths of ODF files + +2014-05-28 Németh László : + * add long path/Unicode path support in WIN32 environment: + - hunspell#233 (reported by mahak gark) and LibreOffice fdo#48017 + * flat ODF support, eg.: + hunspell doc.fodt + cat doc.fodt | hunspell -l -O + * new options: + - -X (XML) input format + - -O (ODF or flat ODF) input format + - --check-apostrophe: check and force Unicode apostrophe usage + (ASCII or Unicode apostrophe has to be in the + WORDCHARS section of the affix file) + * fix ODF support: + - break 1-line XML of ODT documents at , too, + not only at (limiting tokenization problems, when + fgets stops within an XML tag) + - show ODF file path on the UI instead of the temporary file + * fix XML support: + - ', ", &, < and > in replacements converted to XML entities + - recognize &apos at tokenization, depending from WORDCHARS + - ' in tokens converted to ' before spell checking and + in the output of the pipe interface + * better apostrophe usage: + - WORDCHARS only with one of the Unicode or ASCII apostrophe + results extended word tokenization: both of them will be part of + the words (if they are inside: eg. word's, but not words'). + - convert Unicode apostrophes to ASCII ones for 8-bit dictionaries + (eg. English dictionaries), or for UTF-8 dictionaries only + with ASCII apostrophe supports (eg. French dictionaries). + * updated manual: + - hunspell.4 renamed to hunspell.5, see + hunspell#241 reported by Cristopher Yeleighton + - updated translations + - note about long/Unicode paths in WIN32 (hunspell.3) + +2014-04-25 Németh László : + * OpenDocument support, eg. + hunspell *.odt + hunspell -l *.odt + * always load default personal dictionary (fix + filtering bad words - reduce this word list - using + it as a personal dictionary workflow) + * fix parsing/URL recognition problem (bad tokens + with aposthrophes) + +2013-07-25 pchang9@cs.wisc.edu + * moz#897255 Wasted work in line_uniq + * moz#897780 Wasted work in SuggestMgr::twowords + +2013-07-25 Caolán McNamara : + * hunspell#167 layout problems with long lines + - based on the original fix by xorho + adapted to HEAD + * rhbz#925562 upgrade config.guess for aarch64 + +2013-07-24 pchang9@cs.wisc.edu + * moz#896301 Wasted work in SfxEntry::checkword + * moz#896844 Wasted work in AffixMgr::defcpd_check + +2013-06-13 Konstantin Khlebniko + * #49 HashMgr::add_word computes wrong size for struct hentry + +2013-06-13 Ville Skyttä + * #53 Man page syntax fixes + +2013-04-19 John Thomson + * win_api: add remove() of Hunspell API (hun#3606435) + +2013-04-19 Rouslan Solomokhin + * fix crash in suggestions for 99-character long words + by extending arrays of SuggestMgr::forgotchar_* + (hun#3595024, also http://crbug.com/130128), + thanks to also PaweÅ‚ Hajdan to report the patch + +2013-04-01 Caolán McNamara : + * hunspell: -Werror=undef + +2013-03-13 Caolán McNamara : + * rhbz#918938 crash in interaction with danish thesaurus + +2012-09-18 Németh László : + * src/hunspell/affixmgr.*: - fix morphological analysis of + compound words (hun#3544994, reported by Dávid Nemeskey, fdo#55045) + +2012-06-29 Caolán McNamara : + * fix various coverity warnings + +2012-01-10 Ehsan Akhgari + * moz#710940 Firefox Crash [@ AffixMgr::parse_file(char const*, char + const*) ] + +2011-12-16 Jared Wein + * moz#710967 Incorrect argument passed to strncmp in + AffixMgr::parse_convtable + +2011-12-06 Caolán McNamara : + * rhbz#759647 fixed tempname of hunSPELL.bak collides with other users + when multiple edits in one dir + +2011-10-13 Caolán McNamara : + * moz#694002 crash in hunspell affixmgr on exit with bad .aff + * leak in hunspell affixmgr with bad .aff + +2011-09-19 Caolán McNamara : + * make libparsers.a not installed thanks to Tomáš Chvátal + +2011-06-23 Caolán McNamara : + * fix some windows compiler warnings + +2011-05-24 Németh László : + * src/hunspell/affixmgr.*: allow twofold suffixes in compounds + by extended version of Arno Teigseth's patch, see hun#3288562. + - new option for this feature: COMPOUNDMORESUFFIXES + 2011-02-16 Németh László : * src/*/Makefile.am: fix library versioning, the probem reported by Rene Engerhald and Simon Brouwer. diff --git a/vendor/hunspell/NEWS b/vendor/hunspell/NEWS index 957a70edc..b874a047c 100644 --- a/vendor/hunspell/NEWS +++ b/vendor/hunspell/NEWS @@ -1,3 +1,7 @@ +2014-06-02: Hunspell 1.3.3 release: + - OpenDocument (ODF and Flat ODF) support (ODF needs unzip program) + - various bug fixes + 2011-02-02: Hunspell 1.3.2 release: - fix library versioning - improved manual diff --git a/vendor/hunspell/README b/vendor/hunspell/README index ee34e264b..8289413f3 100644 --- a/vendor/hunspell/README +++ b/vendor/hunspell/README @@ -47,7 +47,7 @@ glibc-devel optional developer packages: -ncurses (need for --with-ui) +ncurses (need for --with-ui), eg. libncursesw5 for UTF-8 readline (for fancy input line editing, configure parameter: --with-readline) locale and gettext (but you can also use the @@ -118,7 +118,7 @@ Documentation ------------- features and dictionary format: -man 4 hunspell +man 5 hunspell man hunspell hunspell -h @@ -169,6 +169,9 @@ Dictionaries ------------ Myspell & Hunspell dictionaries: +http://extensions.libreoffice.org +http://cgit.freedesktop.org/libreoffice/dictionaries +http://extensions.openoffice.org http://wiki.services.openoffice.org/wiki/Dictionaries Aspell dictionaries (need some conversion): @@ -176,4 +179,4 @@ ftp://ftp.gnu.org/gnu/aspell/dict Conversion steps: see relevant feature request at http://hunspell.sf.net. László Németh -nemeth at OOo +nemeth at numbertext org diff --git a/vendor/hunspell/THANKS b/vendor/hunspell/THANKS index f6db77741..761fa7743 100644 --- a/vendor/hunspell/THANKS +++ b/vendor/hunspell/THANKS @@ -12,6 +12,7 @@ Ingo H. de Boer Simon Brouwer Jeppe Bundsgaard Ginn Chen +Tomáš Chvátal Aaron Digulla Dmitri Gabinski Dvornik László @@ -107,6 +108,9 @@ and others (see also AUTHORS.myspell) FSF.hu Foundation http://www.fsf.hu +LibreOffice community +http://www.libreoffice.org + MOKK Research Centre Budapest University of Technology and Economics Sociology and Communications Department @@ -129,4 +133,4 @@ UHU-Linux Kft. Thanks, Németh László -nemeth at OOo +nemeth at numbertext org diff --git a/vendor/hunspell/src/hunspell/affentry.cxx b/vendor/hunspell/src/hunspell/affentry.cxx index fef0cca5f..45c9ef58e 100644 --- a/vendor/hunspell/src/hunspell/affentry.cxx +++ b/vendor/hunspell/src/hunspell/affentry.cxx @@ -9,13 +9,17 @@ #include "affentry.hxx" #include "csutil.hxx" +#define MAXTEMPWORDLEN (MAXWORDUTF8LEN + 4) + PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp) + // register affix manager + : pmyMgr(pmgr) + , next(NULL) + , nexteq(NULL) + , nextne(NULL) + , flgnxt(NULL) { - // register affix manager - pmyMgr = pmgr; - // set up its initial values - aflag = dp->aflag; // flag strip = dp->strip; // string to strip appnd = dp->appnd; // string to append @@ -28,9 +32,6 @@ PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp) memcpy(c.conds, dp->c.l.conds1, MAXCONDLEN_1); c.l.conds2 = dp->c.l.conds2; } else memcpy(c.conds, dp->c.conds, MAXCONDLEN); - next = NULL; - nextne = NULL; - nexteq = NULL; morphcode = dp->morphcode; contclass = dp->contclass; contclasslen = dp->contclasslen; @@ -53,16 +54,17 @@ PfxEntry::~PfxEntry() // add prefix to this word assuming conditions hold char * PfxEntry::add(const char * word, int len) { - char tword[MAXWORDUTF8LEN + 4]; + char tword[MAXTEMPWORDLEN]; if ((len > stripl || (len == 0 && pmyMgr->get_fullstrip())) && (len >= numconds) && test_condition(word) && (!stripl || (strncmp(word, strip, stripl) == 0)) && - ((MAXWORDUTF8LEN + 4) > (len + appndl - stripl))) { + ((MAXTEMPWORDLEN) > (len + appndl - stripl))) { /* we have a match so add prefix */ char * pp = tword; if (appndl) { - strcpy(tword,appnd); + strncpy(tword, appnd, MAXTEMPWORDLEN-1); + tword[MAXTEMPWORDLEN-1] = '\0'; pp += appndl; } strcpy(pp, (word + stripl)); @@ -110,13 +112,15 @@ inline int PfxEntry::test_condition(const char * st) if (*st == '\0' && p) return 0; // word <= condition break; } - case '.': if (!pos) { // dots are not metacharacters in groups: [.] + case '.': + if (!pos) { // dots are not metacharacters in groups: [.] p = nextchar(p); // skip the next character for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++); if (*st == '\0' && p) return 0; // word <= condition break; } + /* FALLTHROUGH */ default: { if (*st == *p) { st++; @@ -133,11 +137,11 @@ inline int PfxEntry::test_condition(const char * st) } if (pos && st != pos) { ingroup = true; - while (p && *p != ']' && (p = nextchar(p))); + while (p && *p != ']' && ((p = nextchar(p)) != NULL)); } } else if (pos) { ingroup = true; - while (p && *p != ']' && (p = nextchar(p))); + while (p && *p != ']' && ((p = nextchar(p)) != NULL)); } } else if (pos) { // group p = nextchar(p); @@ -153,7 +157,7 @@ struct hentry * PfxEntry::checkword(const char * word, int len, char in_compound { int tmpl; // length of tmpword struct hentry * he; // hash entry of root word or NULL - char tmpword[MAXWORDUTF8LEN + 4]; + char tmpword[MAXTEMPWORDLEN]; // on entry prefix is 0 length or already matches the beginning of the word. // So if the remaining root word has positive length @@ -167,7 +171,10 @@ struct hentry * PfxEntry::checkword(const char * word, int len, char in_compound // generate new root word by removing prefix and adding // back any characters that would have been stripped - if (stripl) strcpy (tmpword, strip); + if (stripl) { + strncpy(tmpword, strip, MAXTEMPWORDLEN-1); + tmpword[MAXTEMPWORDLEN-1] = '\0'; + } strcpy ((tmpword + stripl), (word + appndl)); // now make sure all of the conditions on characters @@ -214,7 +221,7 @@ struct hentry * PfxEntry::check_twosfx(const char * word, int len, { int tmpl; // length of tmpword struct hentry * he; // hash entry of root word or NULL - char tmpword[MAXWORDUTF8LEN + 4]; + char tmpword[MAXTEMPWORDLEN]; // on entry prefix is 0 length or already matches the beginning of the word. // So if the remaining root word has positive length @@ -229,7 +236,10 @@ struct hentry * PfxEntry::check_twosfx(const char * word, int len, // generate new root word by removing prefix and adding // back any characters that would have been stripped - if (stripl) strcpy (tmpword, strip); + if (stripl) { + strncpy(tmpword, strip, MAXTEMPWORDLEN-1); + tmpword[MAXTEMPWORDLEN-1] = '\0'; + } strcpy ((tmpword + stripl), (word + appndl)); // now make sure all of the conditions on characters @@ -261,7 +271,7 @@ char * PfxEntry::check_twosfx_morph(const char * word, int len, char in_compound, const FLAG needflag) { int tmpl; // length of tmpword - char tmpword[MAXWORDUTF8LEN + 4]; + char tmpword[MAXTEMPWORDLEN]; // on entry prefix is 0 length or already matches the beginning of the word. // So if the remaining root word has positive length @@ -276,7 +286,10 @@ char * PfxEntry::check_twosfx_morph(const char * word, int len, // generate new root word by removing prefix and adding // back any characters that would have been stripped - if (stripl) strcpy (tmpword, strip); + if (stripl) { + strncpy(tmpword, strip, MAXTEMPWORDLEN-1); + tmpword[MAXTEMPWORDLEN-1] = '\0'; + } strcpy ((tmpword + stripl), (word + appndl)); // now make sure all of the conditions on characters @@ -308,7 +321,7 @@ char * PfxEntry::check_morph(const char * word, int len, char in_compound, const { int tmpl; // length of tmpword struct hentry * he; // hash entry of root word or NULL - char tmpword[MAXWORDUTF8LEN + 4]; + char tmpword[MAXTEMPWORDLEN]; char result[MAXLNLEN]; char * st; @@ -327,7 +340,10 @@ char * PfxEntry::check_morph(const char * word, int len, char in_compound, const // generate new root word by removing prefix and adding // back any characters that would have been stripped - if (stripl) strcpy (tmpword, strip); + if (stripl) { + strncpy(tmpword, strip, MAXTEMPWORDLEN-1); + tmpword[MAXTEMPWORDLEN-1] = '\0'; + } strcpy ((tmpword + stripl), (word + appndl)); // now make sure all of the conditions on characters @@ -395,10 +411,15 @@ char * PfxEntry::check_morph(const char * word, int len, char in_compound, const } SfxEntry::SfxEntry(AffixMgr * pmgr, affentry* dp) + : pmyMgr(pmgr) // register affix manager + , next(NULL) + , nexteq(NULL) + , nextne(NULL) + , flgnxt(NULL) + , l_morph(NULL) + , r_morph(NULL) + , eq_morph(NULL) { - // register affix manager - pmyMgr = pmgr; - // set up its initial values aflag = dp->aflag; // char flag strip = dp->strip; // string to strip @@ -413,7 +434,6 @@ SfxEntry::SfxEntry(AffixMgr * pmgr, affentry* dp) memcpy(c.l.conds1, dp->c.l.conds1, MAXCONDLEN_1); c.l.conds2 = dp->c.l.conds2; } else memcpy(c.conds, dp->c.conds, MAXCONDLEN); - rappnd = myrevstrdup(appnd); morphcode = dp->morphcode; contclass = dp->contclass; @@ -438,15 +458,16 @@ SfxEntry::~SfxEntry() // add suffix to this word assuming conditions hold char * SfxEntry::add(const char * word, int len) { - char tword[MAXWORDUTF8LEN + 4]; + char tword[MAXTEMPWORDLEN]; /* make sure all conditions match */ if ((len > stripl || (len == 0 && pmyMgr->get_fullstrip())) && (len >= numconds) && test_condition(word + len, word) && (!stripl || (strcmp(word + len - stripl, strip) == 0)) && - ((MAXWORDUTF8LEN + 4) > (len + appndl - stripl))) { + ((MAXTEMPWORDLEN) > (len + appndl - stripl))) { /* we have a match so add suffix */ - strcpy(tword,word); + strncpy(tword, word, MAXTEMPWORDLEN-1); + tword[MAXTEMPWORDLEN-1] = '\0'; if (appndl) { strcpy(tword + len - stripl, appnd); } else { @@ -481,24 +502,37 @@ inline int SfxEntry::test_condition(const char * st, const char * beg) int i = 1; while (1) { switch (*p) { - case '\0': return 1; - case '[': { p = nextchar(p); pos = st; break; } - case '^': { p = nextchar(p); neg = true; break; } - case ']': { if (!neg && !ingroup) return 0; - i++; - // skip the next character - if (!ingroup) { - for (; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--); - st--; - } - pos = NULL; - neg = false; - ingroup = false; - p = nextchar(p); - if (st < beg && p) return 0; // word <= condition - break; - } - case '.': if (!pos) { // dots are not metacharacters in groups: [.] + case '\0': + return 1; + case '[': + p = nextchar(p); + pos = st; + break; + case '^': + p = nextchar(p); + neg = true; + break; + case ']': + if (!neg && !ingroup) + return 0; + i++; + // skip the next character + if (!ingroup) + { + for (; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--); + st--; + } + pos = NULL; + neg = false; + ingroup = false; + p = nextchar(p); + if (st < beg && p) + return 0; // word <= condition + break; + case '.': + if (!pos) + { + // dots are not metacharacters in groups: [.] p = nextchar(p); // skip the next character for (st--; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--); @@ -513,6 +547,7 @@ inline int SfxEntry::test_condition(const char * st, const char * beg) } break; } + /* FALLTHROUGH */ default: { if (*st == *p) { p = nextchar(p); @@ -533,7 +568,7 @@ inline int SfxEntry::test_condition(const char * st, const char * beg) if (neg) return 0; else if (i == numconds) return 1; ingroup = true; - while (p && *p != ']' && (p = nextchar(p))); + while (p && *p != ']' && ((p = nextchar(p)) != NULL)); st--; } if (p && *p != ']') p = nextchar(p); @@ -541,7 +576,7 @@ inline int SfxEntry::test_condition(const char * st, const char * beg) if (neg) return 0; else if (i == numconds) return 1; ingroup = true; - while (p && *p != ']' && (p = nextchar(p))); + while (p && *p != ']' && ((p = nextchar(p)) != NULL)); // if (p && *p != ']') p = nextchar(p); st--; } @@ -567,7 +602,7 @@ struct hentry * SfxEntry::checkword(const char * word, int len, int optflags, int tmpl; // length of tmpword struct hentry * he; // hash entry pointer unsigned char * cp; - char tmpword[MAXWORDUTF8LEN + 4]; + char tmpword[MAXTEMPWORDLEN]; PfxEntry* ep = ppfx; // if this suffix is being cross checked with a prefix @@ -592,7 +627,8 @@ struct hentry * SfxEntry::checkword(const char * word, int len, int optflags, // back any characters that would have been stripped or // or null terminating the shorter string - strcpy (tmpword, word); + strncpy (tmpword, word, MAXTEMPWORDLEN-1); + tmpword[MAXTEMPWORDLEN-1] = '\0'; cp = (unsigned char *)(tmpword + tmpl); if (stripl) { strcpy ((char *)cp, strip); @@ -645,7 +681,10 @@ struct hentry * SfxEntry::checkword(const char * word, int len, int optflags, } else if (wlst && (*ns < maxSug)) { int cwrd = 1; for (int k=0; k < *ns; k++) - if (strcmp(tmpword, wlst[k]) == 0) cwrd = 0; + if (strcmp(tmpword, wlst[k]) == 0) { + cwrd = 0; + break; + } if (cwrd) { wlst[*ns] = mystrdup(tmpword); if (wlst[*ns] == NULL) { @@ -668,7 +707,7 @@ struct hentry * SfxEntry::check_twosfx(const char * word, int len, int optflags, int tmpl; // length of tmpword struct hentry * he; // hash entry pointer unsigned char * cp; - char tmpword[MAXWORDUTF8LEN + 4]; + char tmpword[MAXTEMPWORDLEN]; PfxEntry* ep = ppfx; @@ -692,7 +731,8 @@ struct hentry * SfxEntry::check_twosfx(const char * word, int len, int optflags, // back any characters that would have been stripped or // or null terminating the shorter string - strcpy (tmpword, word); + strncpy(tmpword, word, MAXTEMPWORDLEN-1); + tmpword[MAXTEMPWORDLEN-1] = '\0'; cp = (unsigned char *)(tmpword + tmpl); if (stripl) { strcpy ((char *)cp, strip); @@ -729,7 +769,7 @@ char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags, { int tmpl; // length of tmpword unsigned char * cp; - char tmpword[MAXWORDUTF8LEN + 4]; + char tmpword[MAXTEMPWORDLEN]; PfxEntry* ep = ppfx; char * st; @@ -757,7 +797,8 @@ char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags, // back any characters that would have been stripped or // or null terminating the shorter string - strcpy (tmpword, word); + strncpy(tmpword, word, MAXTEMPWORDLEN-1); + tmpword[MAXTEMPWORDLEN-1] = '\0'; cp = (unsigned char *)(tmpword + tmpl); if (stripl) { strcpy ((char *)cp, strip); diff --git a/vendor/hunspell/src/hunspell/affentry.hxx b/vendor/hunspell/src/hunspell/affentry.hxx index eaf361fcc..923ee5ffe 100644 --- a/vendor/hunspell/src/hunspell/affentry.hxx +++ b/vendor/hunspell/src/hunspell/affentry.hxx @@ -11,6 +11,10 @@ class LIBHUNSPELL_DLL_EXPORTED PfxEntry : protected AffEntry { +private: + PfxEntry(const PfxEntry&); + PfxEntry& operator = (const PfxEntry&); +private: AffixMgr* pmyMgr; PfxEntry * next; @@ -67,6 +71,10 @@ public: class LIBHUNSPELL_DLL_EXPORTED SfxEntry : protected AffEntry { +private: + SfxEntry(const SfxEntry&); + SfxEntry& operator = (const SfxEntry&); +private: AffixMgr* pmyMgr; char * rappnd; diff --git a/vendor/hunspell/src/hunspell/affixmgr.cxx b/vendor/hunspell/src/hunspell/affixmgr.cxx index b9108d45e..ee74ee47b 100644 --- a/vendor/hunspell/src/hunspell/affixmgr.cxx +++ b/vendor/hunspell/src/hunspell/affixmgr.cxx @@ -48,6 +48,7 @@ AffixMgr::AffixMgr(const char * affpath, HashMgr** ptr, int * md, const char * k compoundroot = FLAG_NULL; // compound word signing flag compoundpermitflag = FLAG_NULL; // compound permitting flag for suffixed word compoundforbidflag = FLAG_NULL; // compound fordidden flag for suffixed word + compoundmoresuffixes = 0; // allow more suffixes within compound words checkcompounddup = 0; // forbid double words in compounds checkcompoundrep = 0; // forbid bad compounds (may be non compound word with a REP substitution) checkcompoundcase = 0; // forbid upper and lowercase combinations at word bounds @@ -253,6 +254,14 @@ AffixMgr::~AffixMgr() #endif } +void AffixMgr::finishFileMgr(FileMgr *afflst) +{ + delete afflst; + + // convert affix trees to sorted list + process_pfx_tree_to_list(); + process_sfx_tree_to_list(); +} // read in aff file and build up prefix and suffix entry objects int AffixMgr::parse_file(const char * affpath, const char * key) @@ -279,7 +288,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) // read in each line ignoring any that do not // start with a known line type indicator - while ((line = afflst->getline())) { + while ((line = afflst->getline()) != NULL) { mychomp(line); /* remove byte order mark */ @@ -294,7 +303,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the keyboard string */ if (strncmp(line,"KEY",3) == 0) { if (parse_string(line, &keystring, afflst->getlinenum())) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -302,7 +311,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the try string */ if (strncmp(line,"TRY",3) == 0) { if (parse_string(line, &trystring, afflst->getlinenum())) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -310,7 +319,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the name of the character set used by the .dict and .aff */ if (strncmp(line,"SET",3) == 0) { if (parse_string(line, &encoding, afflst->getlinenum())) { - delete afflst; + finishFileMgr(afflst); return 1; } if (strcmp(encoding, "UTF-8") == 0) { @@ -330,7 +339,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the flag used by the controlled compound words */ if (strncmp(line,"COMPOUNDFLAG",12) == 0) { if (parse_flag(line, &compoundflag, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -339,12 +348,12 @@ int AffixMgr::parse_file(const char * affpath, const char * key) if (strncmp(line,"COMPOUNDBEGIN",13) == 0) { if (complexprefixes) { if (parse_flag(line, &compoundend, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } else { if (parse_flag(line, &compoundbegin, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -353,7 +362,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the flag used by compound words */ if (strncmp(line,"COMPOUNDMIDDLE",14) == 0) { if (parse_flag(line, &compoundmiddle, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -361,12 +370,12 @@ int AffixMgr::parse_file(const char * affpath, const char * key) if (strncmp(line,"COMPOUNDEND",11) == 0) { if (complexprefixes) { if (parse_flag(line, &compoundbegin, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } else { if (parse_flag(line, &compoundend, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -375,7 +384,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the data used by compound_check() method */ if (strncmp(line,"COMPOUNDWORDMAX",15) == 0) { if (parse_num(line, &cpdwordmax, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -383,7 +392,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the flag sign compounds in dictionary */ if (strncmp(line,"COMPOUNDROOT",12) == 0) { if (parse_flag(line, &compoundroot, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -391,7 +400,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the flag used by compound_check() method */ if (strncmp(line,"COMPOUNDPERMITFLAG",18) == 0) { if (parse_flag(line, &compoundpermitflag, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -399,11 +408,15 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the flag used by compound_check() method */ if (strncmp(line,"COMPOUNDFORBIDFLAG",18) == 0) { if (parse_flag(line, &compoundforbidflag, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } + if (strncmp(line,"COMPOUNDMORESUFFIXES",20) == 0) { + compoundmoresuffixes = 1; + } + if (strncmp(line,"CHECKCOMPOUNDDUP",16) == 0) { checkcompounddup = 1; } @@ -426,14 +439,14 @@ int AffixMgr::parse_file(const char * affpath, const char * key) if (strncmp(line,"NOSUGGEST",9) == 0) { if (parse_flag(line, &nosuggest, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } if (strncmp(line,"NONGRAMSUGGEST",14) == 0) { if (parse_flag(line, &nongramsuggest, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -441,7 +454,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the flag used by forbidden words */ if (strncmp(line,"FORBIDDENWORD",13) == 0) { if (parse_flag(line, &forbiddenword, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -449,7 +462,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the flag used by forbidden words */ if (strncmp(line,"LEMMA_PRESENT",13) == 0) { if (parse_flag(line, &lemma_present, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -457,7 +470,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the flag used by circumfixes */ if (strncmp(line,"CIRCUMFIX",9) == 0) { if (parse_flag(line, &circumfix, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -465,7 +478,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the flag used by fogemorphemes */ if (strncmp(line,"ONLYINCOMPOUND",14) == 0) { if (parse_flag(line, &onlyincompound, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -473,7 +486,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the flag used by `needaffixs' */ if (strncmp(line,"PSEUDOROOT",10) == 0) { if (parse_flag(line, &needaffix, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -481,7 +494,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the flag used by `needaffixs' */ if (strncmp(line,"NEEDAFFIX",9) == 0) { if (parse_flag(line, &needaffix, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -489,7 +502,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the minimal length for words in compounds */ if (strncmp(line,"COMPOUNDMIN",11) == 0) { if (parse_num(line, &cpdmin, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } if (cpdmin < 1) cpdmin = 1; @@ -498,7 +511,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the max. words and syllables in compounds */ if (strncmp(line,"COMPOUNDSYLLABLE",16) == 0) { if (parse_cpdsyllable(line, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -506,7 +519,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the flag used by compound_check() method */ if (strncmp(line,"SYLLABLENUM",11) == 0) { if (parse_string(line, &cpdsyllablenum, afflst->getlinenum())) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -519,7 +532,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the extra word characters */ if (strncmp(line,"WORDCHARS",9) == 0) { if (parse_array(line, &wordchars, &wordchars_utf16, &wordchars_utf16_len, utf8, afflst->getlinenum())) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -527,7 +540,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the ignored characters (for example, Arabic optional diacretics charachters */ if (strncmp(line,"IGNORE",6) == 0) { if (parse_array(line, &ignorechars, &ignorechars_utf16, &ignorechars_utf16_len, utf8, afflst->getlinenum())) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -535,7 +548,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the typical fault correcting table */ if (strncmp(line,"REP",3) == 0) { if (parse_reptable(line, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -543,7 +556,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the input conversion table */ if (strncmp(line,"ICONV",5) == 0) { if (parse_convtable(line, afflst, &iconvtable, "ICONV")) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -551,7 +564,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the input conversion table */ if (strncmp(line,"OCONV",5) == 0) { if (parse_convtable(line, afflst, &oconvtable, "OCONV")) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -559,7 +572,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the phonetic translation table */ if (strncmp(line,"PHONE",5) == 0) { if (parse_phonetable(line, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -567,7 +580,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the checkcompoundpattern table */ if (strncmp(line,"CHECKCOMPOUNDPATTERN",20) == 0) { if (parse_checkcpdtable(line, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -575,7 +588,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the defcompound table */ if (strncmp(line,"COMPOUNDRULE",12) == 0) { if (parse_defcpdtable(line, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -583,7 +596,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the related character map table */ if (strncmp(line,"MAP",3) == 0) { if (parse_maptable(line, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -591,7 +604,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the word breakpoints table */ if (strncmp(line,"BREAK",5) == 0) { if (parse_breaktable(line, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -599,7 +612,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the language for language specific codes */ if (strncmp(line,"LANG",4) == 0) { if (parse_string(line, &lang, afflst->getlinenum())) { - delete afflst; + finishFileMgr(afflst); return 1; } langnum = get_lang_num(lang); @@ -612,7 +625,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) if (strncmp(line,"MAXNGRAMSUGS",12) == 0) { if (parse_num(line, &maxngramsugs, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -622,14 +635,14 @@ int AffixMgr::parse_file(const char * affpath, const char * key) if (strncmp(line,"MAXDIFF",7) == 0) { if (parse_num(line, &maxdiff, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } if (strncmp(line,"MAXCPDSUGS",10) == 0) { if (parse_num(line, &maxcpdsugs, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -649,7 +662,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the flag used by forbidden words */ if (strncmp(line,"KEEPCASE",8) == 0) { if (parse_flag(line, &keepcase, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -657,7 +670,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the flag used by `forceucase' */ if (strncmp(line,"FORCEUCASE",10) == 0) { if (parse_flag(line, &forceucase, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -665,7 +678,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the flag used by `warn' */ if (strncmp(line,"WARN",4) == 0) { if (parse_flag(line, &warn, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -677,7 +690,7 @@ int AffixMgr::parse_file(const char * affpath, const char * key) /* parse in the flag used by the affix generator */ if (strncmp(line,"SUBSTANDARD",11) == 0) { if (parse_flag(line, &substandard, afflst)) { - delete afflst; + finishFileMgr(afflst); return 1; } } @@ -696,19 +709,14 @@ int AffixMgr::parse_file(const char * affpath, const char * key) dupflags_ini = 0; } if (parse_affix(line, ft, afflst, dupflags)) { - delete afflst; - process_pfx_tree_to_list(); - process_sfx_tree_to_list(); + finishFileMgr(afflst); return 1; } } - } - delete afflst; - // convert affix trees to sorted list - process_pfx_tree_to_list(); - process_sfx_tree_to_list(); + finishFileMgr(afflst); + // affix trees are sorted now // now we can speed up performance greatly taking advantage of the // relationship between the affixes and the idea of "subsets". @@ -1319,7 +1327,7 @@ int AffixMgr::cpdrep_check(const char * word, int wl) } // forbid compoundings when there are special patterns at word bound -int AffixMgr::cpdpat_check(const char * word, int pos, hentry * r1, hentry * r2, const char affixed) +int AffixMgr::cpdpat_check(const char * word, int pos, hentry * r1, hentry * r2, const char /*affixed*/) { int len; for (int i = 0; i < numcheckcpd; i++) { @@ -1332,7 +1340,7 @@ int AffixMgr::cpdpat_check(const char * word, int pos, hentry * r1, hentry * r2, // zero pattern (0/flag) => unmodified stem (zero affixes allowed) (!*(checkcpdtable[i].pattern) || ( (*(checkcpdtable[i].pattern)=='0' && r1->blen <= pos && strncmp(word + pos - r1->blen, r1->word, r1->blen) == 0) || - (*(checkcpdtable[i].pattern)!='0' && (len = strlen(checkcpdtable[i].pattern)) && + (*(checkcpdtable[i].pattern)!='0' && ((len = strlen(checkcpdtable[i].pattern)) != 0) && strncmp(word + pos - len, checkcpdtable[i].pattern, len) == 0)))) { return 1; } @@ -1393,7 +1401,10 @@ int AffixMgr::defcpd_check(hentry *** words, short wnum, hentry * rv, hentry ** for (i = 0; i < numdefcpd; i++) { for (j = 0; j < defcpdtable[i].len; j++) { if (defcpdtable[i].def[j] != '*' && defcpdtable[i].def[j] != '?' && - TESTAFF(rv->astr, defcpdtable[i].def[j], rv->alen)) ok = 1; + TESTAFF(rv->astr, defcpdtable[i].def[j], rv->alen)) { + ok = 1; + break; + } } } if (ok == 0) { @@ -1544,7 +1555,7 @@ struct hentry * AffixMgr::compound_check(const char * word, int len, int oldlen = 0; int checkedstriple = 0; int onlycpdrule; - int affixed = 0; + char affixed = 0; hentry ** oldwords = words; int checked_prefix; @@ -1626,8 +1637,9 @@ struct hentry * AffixMgr::compound_check(const char * word, int len, if (onlycpdrule) break; if (compoundflag && !(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) { - if ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, - FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) && !hu_mov_rule && + if (((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, + FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || + (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundflag)))) && !hu_mov_rule && sfx->getCont() && ((compoundforbidflag && TESTAFF(sfx->getCont(), compoundforbidflag, sfx->getContLen())) || (compoundend && @@ -1640,9 +1652,11 @@ struct hentry * AffixMgr::compound_check(const char * word, int len, if (rv || (((wordnum == 0) && compoundbegin && ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || + (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundbegin))) || // twofold suffixes + compound (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundbegin)))) || ((wordnum > 0) && compoundmiddle && ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || + (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundmiddle))) || // twofold suffixes + compound (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundmiddle))))) ) checked_prefix = 1; // else check forbiddenwords and needaffix @@ -2045,7 +2059,7 @@ int AffixMgr::compound_check_morph(const char * word, int len, int cmax; int onlycpdrule; - int affixed = 0; + char affixed = 0; hentry ** oldwords = words; setcminmax(&cmin, &cmax, word, len); @@ -2115,11 +2129,12 @@ int AffixMgr::compound_check_morph(const char * word, int len, } if (!rv) { - if (onlycpdrule) break; + if (onlycpdrule && strlen(*result) > MAXLNLEN/10) break; if (compoundflag && !(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) { - if ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, - FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) && !hu_mov_rule && + if (((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, + FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || + (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundflag)))) && !hu_mov_rule && sfx->getCont() && ((compoundforbidflag && TESTAFF(sfx->getCont(), compoundforbidflag, sfx->getContLen())) || (compoundend && @@ -2132,9 +2147,11 @@ int AffixMgr::compound_check_morph(const char * word, int len, if (rv || (((wordnum == 0) && compoundbegin && ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || + (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundbegin))) || // twofold suffix+compound (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundbegin)))) || ((wordnum > 0) && compoundmiddle && ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || + (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundmiddle))) || // twofold suffix+compound (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundmiddle))))) ) { // char * p = prefix_check_morph(st, i, 0, compound); @@ -3554,7 +3571,7 @@ int AffixMgr::parse_reptable(char * line, FileMgr * af) /* now parse the numrep lines to read in the remainder of the table */ char * nl; for (int j=0; j < numrep; j++) { - if (!(nl = af->getline())) return 1; + if ((nl = af->getline()) == NULL) return 1; mychomp(nl); tp = nl; i = 0; @@ -3651,7 +3668,7 @@ int AffixMgr::parse_convtable(char * line, FileMgr * af, RepList ** rl, const c if (*piece != '\0') { switch(i) { case 0: { - if (strncmp(piece, keyword, sizeof(keyword)) != 0) { + if (strncmp(piece, keyword, strlen(keyword)) != 0) { HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum()); delete *rl; *rl = NULL; @@ -4258,7 +4275,7 @@ int AffixMgr::parse_affix(char * line, const char at, FileMgr * af, char * dupf std::vector::iterator start = affentries.begin(); std::vector::iterator end = affentries.end(); for (std::vector::iterator entry = start; entry != end; ++entry) { - if (!(nl = af->getline())) return 1; + if ((nl = af->getline()) == NULL) return 1; mychomp(nl); tp = nl; i = 0; diff --git a/vendor/hunspell/src/hunspell/affixmgr.hxx b/vendor/hunspell/src/hunspell/affixmgr.hxx index d9c625aed..736816f04 100644 --- a/vendor/hunspell/src/hunspell/affixmgr.hxx +++ b/vendor/hunspell/src/hunspell/affixmgr.hxx @@ -41,6 +41,7 @@ class LIBHUNSPELL_DLL_EXPORTED AffixMgr FLAG compoundroot; FLAG compoundforbidflag; FLAG compoundpermitflag; + int compoundmoresuffixes; int checkcompounddup; int checkcompoundrep; int checkcompoundcase; @@ -244,6 +245,7 @@ private: int process_sfx_tree_to_list(); int redundant_condition(char, char * strip, int stripl, const char * cond, int); + void finishFileMgr(FileMgr *afflst); }; #endif diff --git a/vendor/hunspell/src/hunspell/atypes.hxx b/vendor/hunspell/src/hunspell/atypes.hxx index df27c4d1c..61c59d5ff 100644 --- a/vendor/hunspell/src/hunspell/atypes.hxx +++ b/vendor/hunspell/src/hunspell/atypes.hxx @@ -57,7 +57,7 @@ static inline void HUNSPELL_WARNING(FILE *, const char *, ...) {} #define FLAG_NULL 0x00 #define FREE_FLAG(a) a = 0 -#define TESTAFF( a, b , c ) flag_bsearch((unsigned short *) a, (unsigned short) b, c) +#define TESTAFF( a, b , c ) (flag_bsearch((unsigned short *) a, (unsigned short) b, c)) struct affentry { diff --git a/vendor/hunspell/src/hunspell/baseaffix.hxx b/vendor/hunspell/src/hunspell/baseaffix.hxx index ed64f3d84..f417acaa4 100644 --- a/vendor/hunspell/src/hunspell/baseaffix.hxx +++ b/vendor/hunspell/src/hunspell/baseaffix.hxx @@ -5,7 +5,11 @@ class LIBHUNSPELL_DLL_EXPORTED AffEntry { +private: + AffEntry(const AffEntry&); + AffEntry& operator = (const AffEntry&); protected: + AffEntry() {} char * appnd; char * strip; unsigned char appndl; diff --git a/vendor/hunspell/src/hunspell/csutil.cxx b/vendor/hunspell/src/hunspell/csutil.cxx index dd89c1909..f877f2815 100644 --- a/vendor/hunspell/src/hunspell/csutil.cxx +++ b/vendor/hunspell/src/hunspell/csutil.cxx @@ -17,6 +17,11 @@ struct unicode_info { unsigned short clower; }; +#ifdef _WIN32 +#include +#include +#endif + #ifdef OPENOFFICEORG # include #else @@ -46,6 +51,21 @@ struct unicode_info2 { static struct unicode_info2 * utf_tbl = NULL; static int utf_tbl_count = 0; // utf_tbl can be used by multiple Hunspell instances +FILE * myfopen(const char * path, const char * mode) { +#ifdef _WIN32 +#define WIN32_LONG_PATH_PREFIX "\\\\?\\" + if (strncmp(path, WIN32_LONG_PATH_PREFIX, 4) == 0) { + int len = MultiByteToWideChar(CP_UTF8, 0, path, -1, NULL, 0); + wchar_t *buff = (wchar_t *) malloc(len * sizeof(wchar_t)); + MultiByteToWideChar(CP_UTF8, 0, path, -1, buff, len); + FILE * f = _wfopen(buff, (strcmp(mode, "r") == 0) ? L"r" : L"rb"); + free(buff); + return f; + } +#endif + return fopen(path, mode); +} + /* only UTF-16 (BMP) implementation */ char * u16_u8(char * dest, int size, const w_char * src, int srclen) { signed char * u8 = (signed char *)dest; @@ -342,7 +362,10 @@ char * line_uniq(char * text, char breakchar) { for ( i = 1; i < linenum; i++ ) { int dup = 0; for (int j = 0; j < i; j++) { - if (strcmp(lines[i], lines[j]) == 0) dup = 1; + if (strcmp(lines[i], lines[j]) == 0) { + dup = 1; + break; + } } if (!dup) { if ((i > 1) || (*(lines[0]) != '\0')) { @@ -5468,7 +5491,15 @@ struct cs_info * get_current_cs(const char * es) { // conversion tables static in this file, create them when needed // with help the mozilla backend. struct cs_info * get_current_cs(const char * es) { - struct cs_info *ccs; + struct cs_info *ccs = new cs_info[256]; + // Initialze the array with dummy data so that we wouldn't need + // to return null in case of failures. + for (int i = 0; i <= 0xff; ++i) { + ccs[i].ccase = false; + ccs[i].clower = i; + ccs[i].cupper = i; + } + nsCOMPtr encoder; nsCOMPtr decoder; @@ -5476,21 +5507,19 @@ struct cs_info * get_current_cs(const char * es) { nsresult rv; nsCOMPtr ccm = do_GetService(kCharsetConverterManagerCID, &rv); if (NS_FAILED(rv)) - return nsnull; + return ccs; rv = ccm->GetUnicodeEncoder(es, getter_AddRefs(encoder)); if (NS_FAILED(rv)) - return nsnull; + return ccs; encoder->SetOutputErrorBehavior(encoder->kOnError_Signal, nsnull, '?'); rv = ccm->GetUnicodeDecoder(es, getter_AddRefs(decoder)); if (NS_FAILED(rv)) - return nsnull; + return ccs; decoder->SetInputErrorBehavior(decoder->kOnError_Signal); if (NS_FAILED(rv)) - return nsnull; - - ccs = new cs_info[256]; + return ccs; for (unsigned int i = 0; i <= 0xff; ++i) { PRBool success = PR_FALSE; @@ -5653,7 +5682,7 @@ unsigned short unicodetoupper(unsigned short c, int langnum) if (c == 0x0069 && ((langnum == LANG_az) || (langnum == LANG_tr))) return 0x0130; #ifdef OPENOFFICEORG - return u_toupper(c); + return static_cast(u_toupper(c)); #else #ifdef MOZILLA_CLIENT return ToUpperCase((PRUnichar) c); @@ -5671,7 +5700,7 @@ unsigned short unicodetolower(unsigned short c, int langnum) if (c == 0x0049 && ((langnum == LANG_az) || (langnum == LANG_tr))) return 0x0131; #ifdef OPENOFFICEORG - return u_tolower(c); + return static_cast(u_tolower(c)); #else #ifdef MOZILLA_CLIENT return ToLowerCase((PRUnichar) c); diff --git a/vendor/hunspell/src/hunspell/csutil.hxx b/vendor/hunspell/src/hunspell/csutil.hxx index 7bd0b919b..e034b53fd 100644 --- a/vendor/hunspell/src/hunspell/csutil.hxx +++ b/vendor/hunspell/src/hunspell/csutil.hxx @@ -52,6 +52,9 @@ #define FORBIDDENWORD 65510 #define ONLYUPCASEFLAG 65511 +// fopen or optional _wfopen to fix long pathname problem of WIN32 +LIBHUNSPELL_DLL_EXPORTED FILE * myfopen(const char * path, const char * mode); + // convert UTF-16 characters to UTF-8 LIBHUNSPELL_DLL_EXPORTED char * u16_u8(char * dest, int size, const w_char * src, int srclen); diff --git a/vendor/hunspell/src/hunspell/dictmgr.cxx b/vendor/hunspell/src/hunspell/dictmgr.cxx index b4a15b1a5..a94429e59 100644 --- a/vendor/hunspell/src/hunspell/dictmgr.cxx +++ b/vendor/hunspell/src/hunspell/dictmgr.cxx @@ -5,6 +5,7 @@ #include #include "dictmgr.hxx" +#include "csutil.hxx" DictMgr::DictMgr(const char * dictpath, const char * etype) : numdict(0) { @@ -57,7 +58,7 @@ int DictMgr::parse_file(const char * dictpath, const char * etype) // open the dictionary list file FILE * dictlst; - dictlst = fopen(dictpath,"r"); + dictlst = myfopen(dictpath,"r"); if (!dictlst) { return 1; } @@ -100,7 +101,8 @@ int DictMgr::parse_file(const char * dictpath, const char * etype) case 3: free(pdict->region); pdict->region=NULL; - case 2: //deliberate fallthrough + /* FALLTHROUGH */ + case 2: free(pdict->lang); pdict->lang=NULL; default: diff --git a/vendor/hunspell/src/hunspell/dictmgr.hxx b/vendor/hunspell/src/hunspell/dictmgr.hxx index bb197f84f..692ed964c 100644 --- a/vendor/hunspell/src/hunspell/dictmgr.hxx +++ b/vendor/hunspell/src/hunspell/dictmgr.hxx @@ -15,7 +15,10 @@ struct dictentry { class LIBHUNSPELL_DLL_EXPORTED DictMgr { - +private: + DictMgr(const DictMgr&); + DictMgr& operator = (const DictMgr&); +private: int numdict; dictentry * pdentry; diff --git a/vendor/hunspell/src/hunspell/filemgr.cxx b/vendor/hunspell/src/hunspell/filemgr.cxx index 5fb82bcf8..e1fb80d92 100644 --- a/vendor/hunspell/src/hunspell/filemgr.cxx +++ b/vendor/hunspell/src/hunspell/filemgr.cxx @@ -6,16 +6,20 @@ #include #include "filemgr.hxx" +#include "csutil.hxx" int FileMgr::fail(const char * err, const char * par) { fprintf(stderr, err, par); return -1; } -FileMgr::FileMgr(const char * file, const char * key) { - linenum = 0; - hin = NULL; - fin = fopen(file, "r"); +FileMgr::FileMgr(const char * file, const char * key) + : hin(NULL) + , linenum(0) +{ + in[0] = '\0'; + + fin = myfopen(file, "r"); if (!fin) { // check hzipped file char * st = (char *) malloc(strlen(file) + strlen(HZIP_EXTENSION) + 1); @@ -39,7 +43,7 @@ char * FileMgr::getline() { const char * l; linenum++; if (fin) return fgets(in, BUFSIZE - 1, fin); - if (hin && (l = hin->getline())) return strcpy(in, l); + if (hin && ((l = hin->getline()) != NULL)) return strcpy(in, l); linenum--; return NULL; } diff --git a/vendor/hunspell/src/hunspell/filemgr.hxx b/vendor/hunspell/src/hunspell/filemgr.hxx index 94cb7233d..37b2ae9ea 100644 --- a/vendor/hunspell/src/hunspell/filemgr.hxx +++ b/vendor/hunspell/src/hunspell/filemgr.hxx @@ -9,6 +9,9 @@ class LIBHUNSPELL_DLL_EXPORTED FileMgr { +private: + FileMgr(const FileMgr&); + FileMgr& operator = (const FileMgr&); protected: FILE * fin; Hunzip * hin; diff --git a/vendor/hunspell/src/hunspell/hashmgr.cxx b/vendor/hunspell/src/hunspell/hashmgr.cxx index ea93b8787..12adf420d 100644 --- a/vendor/hunspell/src/hunspell/hashmgr.cxx +++ b/vendor/hunspell/src/hunspell/hashmgr.cxx @@ -5,6 +5,7 @@ #include #include #include +#include #include "hashmgr.hxx" #include "csutil.hxx" @@ -13,12 +14,19 @@ // build a hash table from a munched word list HashMgr::HashMgr(const char * tpath, const char * apath, const char * key) + : tablesize(0) + , tableptr(NULL) + , userword(0) + , flag_mode(FLAG_CHAR) + , complexprefixes(0) + , utf8(0) + , forbiddenword(FORBIDDENWORD) // forbidden word signing flag + , numaliasf(0) + , aliasf(NULL) + , aliasflen(0) + , numaliasm(0) + , aliasm(NULL) { - tablesize = 0; - tableptr = NULL; - flag_mode = FLAG_CHAR; - complexprefixes = 0; - utf8 = 0; langnum = 0; lang = NULL; enc = NULL; @@ -26,11 +34,6 @@ HashMgr::HashMgr(const char * tpath, const char * apath, const char * key) ignorechars = NULL; ignorechars_utf16 = NULL; ignorechars_utf16_len = 0; - numaliasf = 0; - aliasf = NULL; - numaliasm = 0; - aliasm = NULL; - forbiddenword = FORBIDDENWORD; // forbidden word signing flag load_config(apath, key); int ec = load_tables(tpath, key); if (ec) { @@ -116,7 +119,7 @@ int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff, int al, const char * desc, bool onlyupcase) { bool upcasehomonym = false; - int descl = desc ? (aliasm ? sizeof(short) : strlen(desc) + 1) : 0; + int descl = desc ? (aliasm ? sizeof(char *) : strlen(desc) + 1) : 0; // variable-length hash record with word and optional fields struct hentry* hp = (struct hentry *) malloc (sizeof(struct hentry) + wbl + descl); @@ -210,18 +213,21 @@ int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff, } int HashMgr::add_hidden_capitalized_word(char * word, int wbl, int wcl, - unsigned short * flags, int al, char * dp, int captype) + unsigned short * flags, int flagslen, char * dp, int captype) { + if (flags == NULL) + flagslen = 0; + // add inner capitalized forms to handle the following allcap forms: // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG // Allcaps with suffixes: CIA's -> CIA'S if (((captype == HUHCAP) || (captype == HUHINITCAP) || - ((captype == ALLCAP) && (flags != NULL))) && - !((flags != NULL) && TESTAFF(flags, forbiddenword, al))) { - unsigned short * flags2 = (unsigned short *) malloc (sizeof(unsigned short) * (al+1)); + ((captype == ALLCAP) && (flagslen != 0))) && + !((flagslen != 0) && TESTAFF(flags, forbiddenword, flagslen))) { + unsigned short * flags2 = (unsigned short *) malloc (sizeof(unsigned short) * (flagslen+1)); if (!flags2) return 1; - if (al) memcpy(flags2, flags, al * sizeof(unsigned short)); - flags2[al] = ONLYUPCASEFLAG; + if (flagslen) memcpy(flags2, flags, flagslen * sizeof(unsigned short)); + flags2[flagslen] = ONLYUPCASEFLAG; if (utf8) { char st[BUFSIZE]; w_char w[BUFSIZE]; @@ -229,11 +235,11 @@ int HashMgr::add_hidden_capitalized_word(char * word, int wbl, int wcl, mkallsmall_utf(w, wlen, langnum); mkallcap_utf(w, 1, langnum); u16_u8(st, BUFSIZE, w, wlen); - return add_word(st,wbl,wcl,flags2,al+1,dp, true); + return add_word(st,wbl,wcl,flags2,flagslen+1,dp, true); } else { mkallsmall(word, csconv); mkinitcap(word, csconv); - return add_word(word,wbl,wcl,flags2,al+1,dp, true); + return add_word(word,wbl,wcl,flags2,flagslen+1,dp, true); } } return 0; @@ -363,8 +369,8 @@ int HashMgr::load_tables(const char * tpath, const char * key) if (dict == NULL) return 1; // first read the first line of file to get hash table size */ - if (!(ts = dict->getline())) { - HUNSPELL_WARNING(stderr, "error: empty dic file\n"); + if ((ts = dict->getline()) == NULL) { + HUNSPELL_WARNING(stderr, "error: empty dic file %s\n", tpath); delete dict; return 2; } @@ -377,30 +383,32 @@ int HashMgr::load_tables(const char * tpath, const char * key) } tablesize = atoi(ts); - if (tablesize == 0) { + + int nExtra = 5 + USERWORD; + + if (tablesize <= 0 || (tablesize >= (std::numeric_limits::max() - 1 - nExtra) / int(sizeof(struct hentry *)))) { HUNSPELL_WARNING(stderr, "error: line 1: missing or bad word count in the dic file\n"); delete dict; return 4; } - tablesize = tablesize + 5 + USERWORD; - if ((tablesize %2) == 0) tablesize++; + tablesize += nExtra; + if ((tablesize % 2) == 0) tablesize++; // allocate the hash table - tableptr = (struct hentry **) malloc(tablesize * sizeof(struct hentry *)); + tableptr = (struct hentry **) calloc(tablesize, sizeof(struct hentry *)); if (! tableptr) { delete dict; return 3; } - for (int i=0; igetline())) { + while ((ts = dict->getline()) != NULL) { mychomp(ts); // split each line into word and morphological description dp = ts; - while ((dp = strchr(dp, ':'))) { + while ((dp = strchr(dp, ':')) != NULL) { if ((dp > ts + 3) && (*(dp - 3) == ' ' || *(dp - 3) == '\t')) { for (dp -= 4; dp >= ts && (*dp == ' ' || *dp == '\t'); dp--); if (dp < ts) { // missing word @@ -616,7 +624,7 @@ int HashMgr::load_config(const char * affpath, const char * key) // read in each line ignoring any that do not // start with a known line type indicator - while ((line = afflst->getline())) { + while ((line = afflst->getline()) != NULL) { mychomp(line); /* remove byte order mark */ @@ -756,7 +764,7 @@ int HashMgr::parse_aliasf(char * line, FileMgr * af) /* now parse the numaliasf lines to read in the remainder of the table */ char * nl; for (int j=0; j < numaliasf; j++) { - if (!(nl = af->getline())) return 1; + if ((nl = af->getline()) == NULL) return 1; mychomp(nl); tp = nl; i = 0; @@ -863,7 +871,7 @@ int HashMgr::parse_aliasm(char * line, FileMgr * af) /* now parse the numaliasm lines to read in the remainder of the table */ char * nl = line; for (int j=0; j < numaliasm; j++) { - if (!(nl = af->getline())) return 1; + if ((nl = af->getline()) == NULL) return 1; mychomp(nl); tp = nl; i = 0; diff --git a/vendor/hunspell/src/hunspell/hunspell.cxx b/vendor/hunspell/src/hunspell/hunspell.cxx index a9b261a37..7dbcd2f91 100644 --- a/vendor/hunspell/src/hunspell/hunspell.cxx +++ b/vendor/hunspell/src/hunspell/hunspell.cxx @@ -12,6 +12,8 @@ #endif #include "csutil.hxx" +#include + Hunspell::Hunspell(const char * affpath, const char * dpath, const char * key) { encoding = NULL; @@ -328,6 +330,10 @@ int Hunspell::spell(const char * word, int * info, char ** root) char cw[MAXWORDUTF8LEN]; char wspace[MAXWORDUTF8LEN]; w_char unicw[MAXWORDLEN]; + + int info2 = 0; + if (!info) info = &info2; else *info = 0; + // Hunspell supports XML input of the simplified API (see manual) if (strcmp(word, SPELL_XML) == 0) return 1; int nc = strlen(word); @@ -346,7 +352,6 @@ int Hunspell::spell(const char * word, int * info, char ** root) if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); - int info2 = 0; if (wl == 0 || maxdic == 0) return 1; if (root) *root = NULL; @@ -364,13 +369,14 @@ int Hunspell::spell(const char * word, int * info, char ** root) } else break; } if ((i == wl) && (nstate == NNUM)) return 1; - if (!info) info = &info2; else *info = 0; switch(captype) { case HUHCAP: + /* FALLTHROUGH */ case HUHINITCAP: *info += SPELL_ORIGCAP; - case NOCAP: { + /* FALLTHROUGH */ + case NOCAP: rv = checkword(cw, info, root); if ((abbv) && !(rv)) { memcpy(wspace,cw,wl); @@ -379,7 +385,6 @@ int Hunspell::spell(const char * word, int * info, char ** root) rv = checkword(wspace, info, root); } break; - } case ALLCAP: { *info += SPELL_ORIGCAP; rv = checkword(cw, info, root); @@ -403,7 +408,7 @@ int Hunspell::spell(const char * word, int * info, char ** root) *apostrophe = '\0'; wl2 = u8_u16(tmpword, MAXWORDLEN, cw); *apostrophe = '\''; - if (wl2 < nc) { + if (wl2 >= 0 && wl2 < nc) { mkinitcap2(apostrophe + 1, unicw + wl2 + 1, nc - wl2 - 1); rv = checkword(cw, info, root); if (rv) break; @@ -750,19 +755,28 @@ int Hunspell::suggest(char*** slst, const char * word) char * dot = strchr(cw, '.'); if (dot && (dot > cw)) { int captype_; - if (utf8) { + if (utf8) + { w_char w_[MAXWORDLEN]; int wl_ = u8_u16(w_, MAXWORDLEN, dot + 1); captype_ = get_captype_utf8(w_, wl_, langnum); } else captype_ = get_captype(dot+1, strlen(dot+1), csconv); - if (captype_ == INITCAP) { + if (captype_ == INITCAP) + { char * st = mystrdup(cw); - if (st) st = (char *) realloc(st, wl + 2); - if (st) { - st[(dot - cw) + 1] = ' '; - strcpy(st + (dot - cw) + 2, dot + 1); - ns = insert_sug(slst, st, ns); - free(st); + if (st) + { + char *newst = (char *) realloc(st, wl + 2); + if (newst == NULL) + free(st); + st = newst; + } + if (st) + { + st[(dot - cw) + 1] = ' '; + strcpy(st + (dot - cw) + 2, dot + 1); + ns = insert_sug(slst, st, ns); + free(st); } } } @@ -848,7 +862,7 @@ int Hunspell::suggest(char*** slst, const char * word) *pos = '\0'; strcpy(w, (*slst)[j]); strcat(w, pos + 1); - spell(w, &info, NULL); + (void)spell(w, &info, NULL); if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) { *pos = ' '; } else *pos = '-'; @@ -1670,6 +1684,13 @@ int Hunspell::get_langnum() const return langnum; } +int Hunspell::input_conv(const char * word, char * dest) +{ + RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; + return (rl && rl->conv(word, dest)); +} + + // return the beginning of the element (attr == NULL) or the attribute const char * Hunspell::get_xml_pos(const char * s, const char * attr) { @@ -1694,11 +1715,11 @@ int Hunspell::get_xml_list(char ***slst, char * list, const char * tag) { int n = 0; char * p; if (!list) return 0; - for (p = list; (p = strstr(p, tag)); p++) n++; + for (p = list; ((p = strstr(p, tag)) != NULL); p++) n++; if (n == 0) return 0; *slst = (char **) malloc(sizeof(char *) * n); if (!*slst) return 0; - for (p = list, n = 0; (p = strstr(p, tag)); p++, n++) { + for (p = list, n = 0; ((p = strstr(p, tag)) != NULL); p++, n++) { int l = strlen(p); (*slst)[n] = (char *) malloc(l + 1); if (!(*slst)[n]) return n; @@ -1710,6 +1731,19 @@ int Hunspell::get_xml_list(char ***slst, char * list, const char * tag) { return n; } +namespace +{ + void myrep(std::string& str, const std::string& search, const std::string& replace) + { + size_t pos = 0; + while ((pos = str.find(search, pos)) != std::string::npos) + { + str.replace(pos, search.length(), replace); + pos += replace.length(); + } + } +} + int Hunspell::spellml(char*** slst, const char * word) { char *q, *q2; @@ -1721,26 +1755,26 @@ int Hunspell::spellml(char*** slst, const char * word) q2 = strstr(q2, "'), MAXWORDUTF8LEN - 10)) n = analyze(slst, cw); if (n == 0) return 0; // convert the result to ana1ana2 format - for (int i = 0; i < n; i++) s+= strlen((*slst)[i]); - char * r = (char *) malloc(6 + 5 * s + 7 * n + 7 + 1); // XXX 5*s->&->& - if (!r) return 0; - strcpy(r, ""); + std::string r; + r.append(""); for (int i = 0; i < n; i++) { - int l = strlen(r); - strcpy(r + l, ""); - strcpy(r + l + 3, (*slst)[i]); - mystrrep(r + l + 3, "\t", " "); - mystrrep(r + l + 3, "<", "<"); - mystrrep(r + l + 3, "&", "&"); - strcat(r, ""); + r.append(""); + + std::string entry((*slst)[i]); free((*slst)[i]); + myrep(entry, "\t", " "); + myrep(entry, "&", "&"); + myrep(entry, "<", "<"); + r.append(entry); + + r.append(""); } - strcat(r, ""); - (*slst)[0] = r; + r.append(""); + (*slst)[0] = mystrdup(r.c_str()); return 1; } else if (check_xml_par(q, "type=", "stem")) { if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1)) return stem(slst, cw); @@ -1753,9 +1787,9 @@ int Hunspell::spellml(char*** slst, const char * word) return generate(slst, cw, cw2); } } else { - if ((q2 = strstr(q2 + 1, "'), ""))) { + if ((n = get_xml_list(&slst2, strchr(q2, '>'), "")) != 0) { int n2 = generate(slst, cw, slst2, n); freelist(&slst2, n); return uniqlist(*slst, n2); diff --git a/vendor/hunspell/src/hunspell/hunspell.hxx b/vendor/hunspell/src/hunspell/hunspell.hxx index 261575788..1c119fd05 100644 --- a/vendor/hunspell/src/hunspell/hunspell.hxx +++ b/vendor/hunspell/src/hunspell/hunspell.hxx @@ -17,8 +17,12 @@ #ifndef _MYSPELLMGR_HXX_ #define _MYSPELLMGR_HXX_ -class Hunspell +class LIBHUNSPELL_DLL_EXPORTED Hunspell { +private: + Hunspell(const Hunspell&); + Hunspell& operator = (const Hunspell&); +private: AffixMgr* pAMgr; HashMgr* pHMgr[MAXDIC]; int maxdic; @@ -35,6 +39,11 @@ public: /* Hunspell(aff, dic) - constructor of Hunspell class * input: path of affix file and dictionary file + * + * In WIN32 environment, use UTF-8 encoded paths started with the long path + * prefix \\\\?\\ to handle system-independent character encoding and very + * long path names (without the long path prefix Hunspell will use fopen() + * with system-dependent character encoding instead of _wfopen()). */ Hunspell(const char * affpath, const char * dpath, const char * key = NULL); @@ -131,6 +140,9 @@ public: const char * get_version(); int get_langnum() const; + + /* need for putdic */ + int input_conv(const char * word, char * dest); /* experimental and deprecated functions */ diff --git a/vendor/hunspell/src/hunspell/hunvisapi.h b/vendor/hunspell/src/hunspell/hunvisapi.h index 98480b068..503c20f66 100644 --- a/vendor/hunspell/src/hunspell/hunvisapi.h +++ b/vendor/hunspell/src/hunspell/hunvisapi.h @@ -1,10 +1,6 @@ #ifndef _HUNSPELL_VISIBILITY_H_ #define _HUNSPELL_VISIBILITY_H_ -#ifndef HUNSPELL_STATIC -#define HUNSPELL_STATIC -#endif - #if defined(HUNSPELL_STATIC) # define LIBHUNSPELL_DLL_EXPORTED #elif defined(_MSC_VER) @@ -13,7 +9,7 @@ # else # define LIBHUNSPELL_DLL_EXPORTED __declspec(dllimport) # endif -#elif BUILDING_LIBHUNSPELL && 1 +#elif defined(BUILDING_LIBHUNSPELL) && 1 # define LIBHUNSPELL_DLL_EXPORTED __attribute__((__visibility__("default"))) #else # define LIBHUNSPELL_DLL_EXPORTED diff --git a/vendor/hunspell/src/hunspell/hunvisapi.h.in b/vendor/hunspell/src/hunspell/hunvisapi.h.in index 9c7f1b7b1..abf025ae9 100644 --- a/vendor/hunspell/src/hunspell/hunvisapi.h.in +++ b/vendor/hunspell/src/hunspell/hunvisapi.h.in @@ -9,7 +9,7 @@ # else # define LIBHUNSPELL_DLL_EXPORTED __declspec(dllimport) # endif -#elif BUILDING_LIBHUNSPELL && @HAVE_VISIBILITY@ +#elif defined(BUILDING_LIBHUNSPELL) && @HAVE_VISIBILITY@ # define LIBHUNSPELL_DLL_EXPORTED __attribute__((__visibility__("default"))) #else # define LIBHUNSPELL_DLL_EXPORTED diff --git a/vendor/hunspell/src/hunspell/hunzip.cxx b/vendor/hunspell/src/hunspell/hunzip.cxx index b50599fa8..db5a881af 100644 --- a/vendor/hunspell/src/hunspell/hunzip.cxx +++ b/vendor/hunspell/src/hunspell/hunzip.cxx @@ -3,6 +3,7 @@ #include #include "hunzip.hxx" +#include "csutil.hxx" #define CODELEN 65536 #define BASEBITREC 5000 @@ -17,15 +18,17 @@ int Hunzip::fail(const char * err, const char * par) { return -1; } -Hunzip::Hunzip(const char * file, const char * key) { - bufsiz = 0; - lastbit = 0; - inc = 0; - outc = 0; - dec = NULL; - fin = NULL; - filename = (char *) malloc(strlen(file) + 1); - if (filename) strcpy(filename, file); +Hunzip::Hunzip(const char * file, const char * key) + : fin(NULL) + , bufsiz(0) + , lastbit(0) + , inc(0) + , inbits(0) + , outc(0) + , dec(NULL) +{ + in[0] = out[0] = line[0] = '\0'; + filename = mystrdup(file); if (getcode(key) == -1) bufsiz = -1; else bufsiz = getbuf(); } @@ -38,7 +41,7 @@ int Hunzip::getcode(const char * key) { if (!filename) return -1; - fin = fopen(filename, "rb"); + fin = myfopen(filename, "rb"); if (!fin) return -1; // read magic number diff --git a/vendor/hunspell/src/hunspell/hunzip.hxx b/vendor/hunspell/src/hunspell/hunzip.hxx index b58e3ab1d..bd02fd8f1 100644 --- a/vendor/hunspell/src/hunspell/hunzip.hxx +++ b/vendor/hunspell/src/hunspell/hunzip.hxx @@ -23,7 +23,9 @@ struct bit { class LIBHUNSPELL_DLL_EXPORTED Hunzip { - +private: + Hunzip(const Hunzip&); + Hunzip& operator = (const Hunzip&); protected: char * filename; FILE * fin; diff --git a/vendor/hunspell/src/hunspell/phonet.cxx b/vendor/hunspell/src/hunspell/phonet.cxx index 144bd40d0..b33edeb02 100644 --- a/vendor/hunspell/src/hunspell/phonet.cxx +++ b/vendor/hunspell/src/hunspell/phonet.cxx @@ -87,7 +87,8 @@ int phonet (const char * inword, char * target, char word[MAXPHONETUTF8LEN + 1]; if (len == -1) len = strlen(inword); if (len > MAXPHONETUTF8LEN) return 0; - strcpy(word, inword); + strncpy(word, inword, MAXPHONETUTF8LEN); + word[MAXPHONETUTF8LEN] = '\0'; /** check word **/ i = j = z = 0; diff --git a/vendor/hunspell/src/hunspell/replist.hxx b/vendor/hunspell/src/hunspell/replist.hxx index da79ea9ad..2dbc0160b 100644 --- a/vendor/hunspell/src/hunspell/replist.hxx +++ b/vendor/hunspell/src/hunspell/replist.hxx @@ -6,9 +6,11 @@ #include "w_char.hxx" -#undef near class LIBHUNSPELL_DLL_EXPORTED RepList { +private: + RepList(const RepList&); + RepList& operator = (const RepList&); protected: replentry ** dat; int size; diff --git a/vendor/hunspell/src/hunspell/suggestmgr.cxx b/vendor/hunspell/src/hunspell/suggestmgr.cxx index ebf9bc0a4..f0e336c97 100644 --- a/vendor/hunspell/src/hunspell/suggestmgr.cxx +++ b/vendor/hunspell/src/hunspell/suggestmgr.cxx @@ -107,7 +107,10 @@ int SuggestMgr::testsug(char** wlst, const char * candidate, int wl, int ns, int int cwrd = 1; if (ns == maxSug) return maxSug; for (int k=0; k < ns; k++) { - if (strcmp(candidate,wlst[k]) == 0) cwrd = 0; + if (strcmp(candidate,wlst[k]) == 0) { + cwrd = 0; + break; + } } if ((cwrd) && checkword(candidate, wl, cpdsuggest, timer, timelimit)) { wlst[ns] = mystrdup(candidate); @@ -364,8 +367,12 @@ int SuggestMgr::map_related(const char * word, char * candidate, int wn, int cn, int cwrd = 1; *(candidate + cn) = '\0'; int wl = strlen(candidate); - for (int m=0; m < ns; m++) - if (strcmp(candidate, wlst[m]) == 0) cwrd = 0; + for (int m=0; m < ns; m++) { + if (strcmp(candidate, wlst[m]) == 0) { + cwrd = 0; + break; + } + } if ((cwrd) && checkword(candidate, wl, cpdsuggest, timer, timelimit)) { if (ns < maxSug) { wlst[ns] = mystrdup(candidate); @@ -678,7 +685,7 @@ int SuggestMgr::extrachar(char** wlst, const char * word, int ns, int cpdsuggest // error is missing a letter it needs int SuggestMgr::forgotchar(char ** wlst, const char * word, int ns, int cpdsuggest) { - char candidate[MAXSWUTF8L]; + char candidate[MAXSWUTF8L + 4]; char * p; clock_t timelimit = clock(); int timer = MINTIMER; @@ -700,8 +707,8 @@ int SuggestMgr::forgotchar(char ** wlst, const char * word, int ns, int cpdsugge // error is missing a letter it needs int SuggestMgr::forgotchar_utf(char ** wlst, const w_char * word, int wl, int ns, int cpdsuggest) { - w_char candidate_utf[MAXSWL]; - char candidate[MAXSWUTF8L]; + w_char candidate_utf[MAXSWL + 1]; + char candidate[MAXSWUTF8L + 4]; w_char * p; clock_t timelimit = clock(); int timer = MINTIMER; @@ -761,8 +768,12 @@ int SuggestMgr::twowords(char ** wlst, const char * word, int ns, int cpdsuggest ((c1 == 3) && (c2 >= 2)))) *p = '-'; cwrd = 1; - for (int k=0; k < ns; k++) - if (strcmp(candidate,wlst[k]) == 0) cwrd = 0; + for (int k=0; k < ns; k++) { + if (strcmp(candidate,wlst[k]) == 0) { + cwrd = 0; + break; + } + } if (ns < maxSug) { if (cwrd) { wlst[ns] = mystrdup(candidate); @@ -777,8 +788,12 @@ int SuggestMgr::twowords(char ** wlst, const char * word, int ns, int cpdsuggest mystrlen(p + 1) > 1 && mystrlen(candidate) - mystrlen(p) > 1) { *p = '-'; - for (int k=0; k < ns; k++) - if (strcmp(candidate,wlst[k]) == 0) cwrd = 0; + for (int k=0; k < ns; k++) { + if (strcmp(candidate,wlst[k]) == 0) { + cwrd = 0; + break; + } + } if (ns < maxSug) { if (cwrd) { wlst[ns] = mystrdup(candidate); @@ -1333,7 +1348,10 @@ int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr** pHMgr, int md if ((!guessorig[i] && strstr(guess[i], wlst[j])) || (guessorig[i] && strstr(guessorig[i], wlst[j])) || // check forbidden words - !checkword(guess[i], strlen(guess[i]), 0, NULL, NULL)) unique = 0; + !checkword(guess[i], strlen(guess[i]), 0, NULL, NULL)) { + unique = 0; + break; + } } if (unique) { wlst[ns++] = guess[i]; @@ -1361,7 +1379,10 @@ int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr** pHMgr, int md // don't suggest previous suggestions or a previous suggestion with prefixes or affixes if (strstr(rootsphon[i], wlst[j]) || // check forbidden words - !checkword(rootsphon[i], strlen(rootsphon[i]), 0, NULL, NULL)) unique = 0; + !checkword(rootsphon[i], strlen(rootsphon[i]), 0, NULL, NULL)) { + unique = 0; + break; + } } if (unique) { wlst[ns++] = mystrdup(rootsphon[i]); @@ -1855,6 +1876,10 @@ int SuggestMgr::commoncharacterpositions(char * s1, const char * s2, int * is_sw w_char su2[MAXSWL]; int l1 = u8_u16(su1, MAXSWL, s1); int l2 = u8_u16(su2, MAXSWL, s2); + + if (l1 <= 0 || l2 <= 0) + return 0; + // decapitalize dictionary word if (complexprefixes) { mkallsmall_utf(su2+l2-1, 1, langnum); diff --git a/vendor/hunspell/src/hunspell/suggestmgr.hxx b/vendor/hunspell/src/hunspell/suggestmgr.hxx index 5f043fdfd..8456b5b3e 100644 --- a/vendor/hunspell/src/hunspell/suggestmgr.hxx +++ b/vendor/hunspell/src/hunspell/suggestmgr.hxx @@ -32,6 +32,10 @@ enum { LCS_UP, LCS_LEFT, LCS_UPLEFT }; class LIBHUNSPELL_DLL_EXPORTED SuggestMgr { +private: + SuggestMgr(const SuggestMgr&); + SuggestMgr& operator = (const SuggestMgr&); +private: char * ckey; int ckeyl; w_char * ckey_utf; diff --git a/vendor/hunspell/src/parsers/firstparser.cxx b/vendor/hunspell/src/parsers/firstparser.cxx deleted file mode 100644 index 786ecea2e..000000000 --- a/vendor/hunspell/src/parsers/firstparser.cxx +++ /dev/null @@ -1,33 +0,0 @@ -#include -#include -#include -#include - -#include "../hunspell/csutil.hxx" -#include "firstparser.hxx" - -#ifndef W32 -using namespace std; -#endif - -FirstParser::FirstParser(const char * wordchars) -{ - init(wordchars); -} - -FirstParser::~FirstParser() -{ -} - -char * FirstParser::next_token() -{ - char * tabpos = strchr(line[actual],'\t'); - if ((tabpos) && (tabpos - line[actual]>token)) { - char * t = (char *) malloc(tabpos - line[actual] + 1); - t[tabpos - line[actual]] = '\0'; - token = tabpos - line[actual] +1; - if (t) return strncpy(t, line[actual], tabpos - line[actual]); - fprintf(stderr,"Error - Insufficient Memory\n"); - } - return NULL; -} diff --git a/vendor/hunspell/src/parsers/firstparser.hxx b/vendor/hunspell/src/parsers/firstparser.hxx deleted file mode 100644 index 1f792899c..000000000 --- a/vendor/hunspell/src/parsers/firstparser.hxx +++ /dev/null @@ -1,34 +0,0 @@ -/* - * parser classes of HunTools - * - * implemented: text, HTML, TeX, first word - * - * Copyright (C) 2003, Laszlo Nemeth - * - */ - -#ifndef _FIRSTPARSER_HXX_ -#define _FIRSTPARSER_HXX_ - -#include "textparser.hxx" - -/* - * Check first word of the input line - * - */ - -class FirstParser : public TextParser -{ - -public: - - - FirstParser(const char * wc); - virtual ~FirstParser(); - - virtual char * next_token(); - -}; - -#endif - diff --git a/vendor/hunspell/src/parsers/htmlparser.cxx b/vendor/hunspell/src/parsers/htmlparser.cxx deleted file mode 100644 index 341be4e89..000000000 --- a/vendor/hunspell/src/parsers/htmlparser.cxx +++ /dev/null @@ -1,151 +0,0 @@ -#include -#include -#include -#include - -#include "../hunspell/csutil.hxx" -#include "htmlparser.hxx" - - -#ifndef W32 -using namespace std; -#endif - -enum { ST_NON_WORD, ST_WORD, ST_TAG, ST_CHAR_ENTITY, ST_OTHER_TAG, ST_ATTRIB }; - -static const char * PATTERN[][2] = { - { "" }, - { "" }, - { "" }, - { "" }, - { "" }, - { "" }, - { "" }, - { "" }, - { "" }, - { "" }, - { "<[cdata[", "]]>" }, // XML comment - { "<", ">" } -}; - -#define PATTERN_LEN (sizeof(PATTERN) / (sizeof(char *) * 2)) - -static const char * PATTERN2[][2] = { - { " 0) && (line[actual][head] == '>')) { - state = ST_NON_WORD; - } else if (((i = look_pattern(PATTERN, PATTERN_LEN, 1)) != -1) && - (strcmp(PATTERN[i][1],PATTERN[pattern_num][1]) == 0)) { - state = ST_NON_WORD; - head += strlen(PATTERN[pattern_num][1]) - 1; - } else if ( (strcmp(PATTERN[pattern_num][0], "<") == 0) && - ((line[actual][head] == '"') || (line[actual][head] == '\''))) { - quotmark = line[actual][head]; - state = ST_ATTRIB; - } - break; - case ST_ATTRIB: // non word chars - prevstate = ST_ATTRIB; - if (line[actual][head] == quotmark) { - state = ST_TAG; - if (checkattr == 2) checkattr = 1; - // for IMG ALT - } else if (is_wordchar(line[actual] + head) && (checkattr == 2)) { - state = ST_WORD; - token = head; - } else if (line[actual][head] == '&') { - state = ST_CHAR_ENTITY; - } - break; - case ST_CHAR_ENTITY: // SGML element - if ((tolower(line[actual][head]) == ';')) { - state = prevstate; - head--; - } - } - if (next_char(line[actual], &head)) return NULL; - } -} diff --git a/vendor/hunspell/src/parsers/htmlparser.hxx b/vendor/hunspell/src/parsers/htmlparser.hxx deleted file mode 100644 index 9a0da7a79..000000000 --- a/vendor/hunspell/src/parsers/htmlparser.hxx +++ /dev/null @@ -1,44 +0,0 @@ -/* - * HTML parser class for MySpell - * - * implemented: text, HTML, TeX - * - * Copyright (C) 2002, Laszlo Nemeth - * - */ - -#ifndef _HTMLPARSER_HXX_ -#define _HTMLPARSER_HXX_ - - -#include "textparser.hxx" - -/* - * HTML Parser - * - */ - -class HTMLParser : public TextParser -{ -public: - - HTMLParser(const char * wc); - HTMLParser(unsigned short * wordchars, int len); - virtual ~HTMLParser(); - - virtual char * next_token(); - -private: - - int look_pattern(const char * p[][2], unsigned int len, int column); - int pattern_num; - int pattern2_num; - int prevstate; - int checkattr; - char quotmark; - -}; - - -#endif - diff --git a/vendor/hunspell/src/parsers/latexparser.cxx b/vendor/hunspell/src/parsers/latexparser.cxx deleted file mode 100644 index 5ffe3fd44..000000000 --- a/vendor/hunspell/src/parsers/latexparser.cxx +++ /dev/null @@ -1,223 +0,0 @@ -#include -#include -#include -#include - -#include "../hunspell/csutil.hxx" -#include "latexparser.hxx" - -#ifndef W32 -using namespace std; -#endif - -static struct { - const char * pat[2]; - int arg; -} PATTERN[] = { - { { "\\(", "\\)" } , 0 }, - { { "$$", "$$" } , 0 }, - { { "$", "$" } , 0 }, - { { "\\begin{math}", "\\end{math}" } , 0 }, - { { "\\[", "\\]" } , 0 }, - { { "\\begin{displaymath}", "\\end{displaymath}" } , 0 }, - { { "\\begin{equation}", "\\end{equation}" } , 0 }, - { { "\\begin{equation*}", "\\end{equation*}" } , 0 }, - { { "\\cite", NULL } , 1 }, - { { "\\nocite", NULL } , 1 }, - { { "\\index", NULL } , 1 }, - { { "\\label", NULL } , 1 }, - { { "\\ref", NULL } , 1 }, - { { "\\pageref", NULL } , 1 }, - { { "\\parbox", NULL } , 1 }, - { { "\\begin{verbatim}", "\\end{verbatim}" } , 0 }, - { { "\\verb+", "+" } , 0 }, - { { "\\verb|", "|" } , 0 }, - { { "\\verb#", "#" } , 0 }, - { { "\\verb*", "*" } , 0 }, - { { "\\documentstyle", "\\begin{document}" } , 0 }, - { { "\\documentclass", "\\begin{document}" } , 0 }, -// { { "\\documentclass", NULL } , 1 }, - { { "\\usepackage", NULL } , 1 }, - { { "\\includeonly", NULL } , 1 }, - { { "\\include", NULL } , 1 }, - { { "\\input", NULL } , 1 }, - { { "\\vspace", NULL } , 1 }, - { { "\\setlength", NULL } , 2 }, - { { "\\addtolength", NULL } , 2 }, - { { "\\settowidth", NULL } , 2 }, - { { "\\rule", NULL } , 2 }, - { { "\\hspace", NULL } , 1 } , - { { "\\vspace", NULL } , 1 } , - { { "\\\\[", "]" } , 0 }, - { { "\\pagebreak[", "]" } , 0 } , - { { "\\nopagebreak[", "]" } , 0 } , - { { "\\enlargethispage", NULL } , 1 } , - { { "\\begin{tabular}", NULL } , 1 } , - { { "\\addcontentsline", NULL } , 2 } , - { { "\\begin{thebibliography}", NULL } , 1 } , - { { "\\bibliography", NULL } , 1 } , - { { "\\bibliographystyle", NULL } , 1 } , - { { "\\bibitem", NULL } , 1 } , - { { "\\begin", NULL } , 1 } , - { { "\\end", NULL } , 1 } , - { { "\\pagestyle", NULL } , 1 } , - { { "\\pagenumbering", NULL } , 1 } , - { { "\\thispagestyle", NULL } , 1 } , - { { "\\newtheorem", NULL } , 2 }, - { { "\\newcommand", NULL } , 2 }, - { { "\\renewcommand", NULL } , 2 }, - { { "\\setcounter", NULL } , 2 }, - { { "\\addtocounter", NULL } , 1 }, - { { "\\stepcounter", NULL } , 1 }, - { { "\\selectlanguage", NULL } , 1 }, - { { "\\inputencoding", NULL } , 1 }, - { { "\\hyphenation", NULL } , 1 }, - { { "\\definecolor", NULL } , 3 }, - { { "\\color", NULL } , 1 }, - { { "\\textcolor", NULL } , 1 }, - { { "\\pagecolor", NULL } , 1 }, - { { "\\colorbox", NULL } , 2 }, - { { "\\fcolorbox", NULL } , 2 }, - { { "\\declaregraphicsextensions", NULL } , 1 }, - { { "\\psfig", NULL } , 1 }, - { { "\\url", NULL } , 1 }, - { { "\\eqref", NULL } , 1 }, - { { "\\vskip", NULL } , 1 }, - { { "\\vglue", NULL } , 1 }, - { { "\'\'", NULL } , 1 } -}; - -#define PATTERN_LEN (sizeof(PATTERN) / sizeof(PATTERN[0])) - -LaTeXParser::LaTeXParser(const char * wordchars) -{ - init(wordchars); -} - -LaTeXParser::LaTeXParser(unsigned short * wordchars, int len) -{ - init(wordchars, len); -} - -LaTeXParser::~LaTeXParser() -{ -} - -int LaTeXParser::look_pattern(int col) -{ - for (unsigned int i = 0; i < PATTERN_LEN; i++) { - char * j = line[actual] + head; - const char * k = PATTERN[i].pat[col]; - if (! k) continue; - while ((*k != '\0') && (tolower(*j) == *k)) { - j++; - k++; - } - if (*k == '\0') return i; - } - return -1; -} - -/* - * LaTeXParser - * - * state 0: not wordchar - * state 1: wordchar - * state 2: comments - * state 3: commands - * state 4: commands with arguments - * state 5: % comment - * - */ - - -char * LaTeXParser::next_token() -{ - int i; - int slash = 0; - int apostrophe; - for (;;) { - // fprintf(stderr,"depth: %d, state: %d, , arg: %d, token: %s\n",depth,state,arg,line[actual]+head); - - switch (state) - { - case 0: // non word chars - if ((pattern_num = look_pattern(0)) != -1) { - if (PATTERN[pattern_num].pat[1]) { - state = 2; - } else { - state = 4; - depth = 0; - arg = 0; - opt = 1; - } - head += strlen(PATTERN[pattern_num].pat[0]) - 1; - } else if ((line[actual][head] == '%')) { - state = 5; - } else if (is_wordchar(line[actual] + head)) { - state = 1; - token = head; - } else if (line[actual][head] == '\\') { - if (line[actual][head + 1] == '\\' || // \\ (linebreak) - (line[actual][head + 1] == '$') || // \$ (dollar sign) - (line[actual][head + 1] == '%')) { // \% (percent) - head++; - break; - } - state = 3; - } else if (line[actual][head] == '%') { - if ((head==0) || (line[actual][head - 1] != '\\')) state = 5; - } - break; - case 1: // wordchar - apostrophe = 0; - if (! is_wordchar(line[actual] + head) || - (line[actual][head] == '\'' && line[actual][head+1] == '\'' && ++apostrophe)) { - state = 0; - char * t = alloc_token(token, &head); - if (apostrophe) head += 2; - if (t) return t; - } - break; - case 2: // comment, labels, etc - if (((i = look_pattern(1)) != -1) && - (strcmp(PATTERN[i].pat[1],PATTERN[pattern_num].pat[1]) == 0)) { - state = 0; - head += strlen(PATTERN[pattern_num].pat[1]) - 1; - } - break; - case 3: // command - if ((tolower(line[actual][head]) < 'a') || (tolower(line[actual][head]) > 'z')) { - state = 0; - head--; - } - break; - case 4: // command with arguments - if (slash && (line[actual][head] != '\0')) { - slash = 0; - head++; - break; - } else if (line[actual][head]=='\\') { - slash = 1; - } else if ((line[actual][head] == '{') || - ((opt) && (line[actual][head] == '['))) { - depth++; - opt = 0; - } else if (line[actual][head] == '}') { - depth--; - if (depth == 0) { - opt = 1; - arg++; - } - if (((depth == 0) && (arg == PATTERN[pattern_num].arg)) || - (depth < 0) ) { - state = 0; // XXX not handles the last optional arg. - } - } else if (line[actual][head] == ']') depth--; - } // case - if (next_char(line[actual], &head)) { - if (state == 5) state = 0; - return NULL; - } - } -} diff --git a/vendor/hunspell/src/parsers/latexparser.hxx b/vendor/hunspell/src/parsers/latexparser.hxx deleted file mode 100644 index 851ecb9a4..000000000 --- a/vendor/hunspell/src/parsers/latexparser.hxx +++ /dev/null @@ -1,44 +0,0 @@ -/* - * parser classes for MySpell - * - * implemented: text, HTML, TeX - * - * Copyright (C) 2002, Laszlo Nemeth - * - */ - -#ifndef _LATEXPARSER_HXX_ -#define _LATEXPARSER_HXX_ - - -#include "textparser.hxx" - -/* - * HTML Parser - * - */ - -class LaTeXParser : public TextParser -{ - int pattern_num; // number of comment - int depth; // depth of blocks - int arg; // arguments's number - int opt; // optional argument attrib. - -public: - - LaTeXParser(const char * wc); - LaTeXParser(unsigned short * wordchars, int len); - virtual ~LaTeXParser(); - - virtual char * next_token(); - -private: - - int look_pattern(int col); - -}; - - -#endif - diff --git a/vendor/hunspell/src/parsers/manparser.cxx b/vendor/hunspell/src/parsers/manparser.cxx deleted file mode 100644 index 25858dad8..000000000 --- a/vendor/hunspell/src/parsers/manparser.cxx +++ /dev/null @@ -1,71 +0,0 @@ -#include -#include -#include -#include - -#include "../hunspell/csutil.hxx" -#include "manparser.hxx" - - -#ifndef W32 -using namespace std; -#endif - -ManParser::ManParser() { -} - -ManParser::ManParser(const char * wordchars) -{ - init(wordchars); -} - -ManParser::ManParser(unsigned short * wordchars, int len) -{ - init(wordchars, len); -} - -ManParser::~ManParser() -{ -} - -char * ManParser::next_token() -{ - for (;;) { - switch (state) - { - case 1: // command arguments - if (line[actual][head] == ' ') state = 2; - break; - case 0: // dot in begin of line - if (line[actual][0] == '.') { - state = 1; - break; - } else { - state = 2; - } - // no break - case 2: // non word chars - if (is_wordchar(line[actual] + head)) { - state = 3; - token = head; - } else if ((line[actual][head] == '\\') && - (line[actual][head + 1] == 'f') && - (line[actual][head + 2] != '\0')) { - head += 2; - } - break; - case 3: // wordchar - if (! is_wordchar(line[actual] + head)) { - state = 2; - char * t = alloc_token(token, &head); - if (t) return t; - } - break; - } - if (next_char(line[actual], &head)) { - state = 0; - return NULL; - } - } -} - diff --git a/vendor/hunspell/src/parsers/manparser.hxx b/vendor/hunspell/src/parsers/manparser.hxx deleted file mode 100644 index 6db37c539..000000000 --- a/vendor/hunspell/src/parsers/manparser.hxx +++ /dev/null @@ -1,38 +0,0 @@ -/* - * parser classes for MySpell - * - * implemented: text, HTML, TeX - * - * Copyright (C) 2002, Laszlo Nemeth - * - */ - -#ifndef _MANPARSER_HXX_ -#define _MANPARSER_HXX_ - -#include "textparser.hxx" - -/* - * Manparse Parser - * - */ - -class ManParser : public TextParser -{ - -protected: - - -public: - - ManParser(); - ManParser(const char * wc); - ManParser(unsigned short * wordchars, int len); - virtual ~ManParser(); - - virtual char * next_token(); - -}; - -#endif - diff --git a/vendor/hunspell/src/parsers/testparser.cxx b/vendor/hunspell/src/parsers/testparser.cxx deleted file mode 100644 index b257f12fb..000000000 --- a/vendor/hunspell/src/parsers/testparser.cxx +++ /dev/null @@ -1,53 +0,0 @@ -#include -#include -#include - -#include "textparser.hxx" -#include "htmlparser.hxx" -#include "latexparser.hxx" - -#ifndef W32 -using namespace std; -#endif - -int -main(int argc, char** argv) -{ - FILE * f; - /* first parse the command line options */ - - if (argc < 2) { - fprintf(stderr,"correct syntax is:\n"); - fprintf(stderr,"testparser file\n"); - fprintf(stderr,"example: testparser /dev/stdin\n"); - exit(1); - } - - /* open the words to check list */ - f = fopen(argv[1],"r"); - if (!f) { - fprintf(stderr,"Error - could not open file of words to check\n"); - exit(1); - } - - TextParser * p = new LaTeXParser("qwertzuiopasdfghjklyxcvbnméáúõûóüöíQWERTZUIOPASDFGHJKLYXCVBNMÍÉÁÕÚÖÜÓÛ"); - - char buf[MAXLNLEN]; - char * next; - - while(fgets(buf,MAXLNLEN,f)) { - fprintf(stdout,"---------------------------------------\n"); - p->put_line(buf); - fprintf(stderr, "x:%s\n", buf); - p->set_url_checking(1); - while ((next=p->next_token())) { - fprintf(stdout,"token: %s\n",next); - free(next); - } - } - - delete p; - fclose(f); - return 0; -} - diff --git a/vendor/hunspell/src/parsers/textparser.cxx b/vendor/hunspell/src/parsers/textparser.cxx deleted file mode 100644 index 033813680..000000000 --- a/vendor/hunspell/src/parsers/textparser.cxx +++ /dev/null @@ -1,291 +0,0 @@ -#include -#include -#include -#include - -#include "../hunspell/csutil.hxx" -#include "textparser.hxx" - -#ifndef W32 -using namespace std; -#endif - -// ISO-8859-1 HTML character entities - -static const char * LATIN1[] = { - "À", - "Ã", - "Å", - "Æ", - "È", - "Ê", - "Ì", - "Ï", - "Ð", - "Ñ", - "Ò", - "Ø", - "Ù", - "Þ", - "à", - "ã", - "å", - "æ", - "è", - "ê", - "ì", - "ï", - "ð", - "ñ", - "ò", - "ø", - "ù", - "þ", - "ÿ" -}; - -#define LATIN1_LEN (sizeof(LATIN1) / sizeof(char *)) - -TextParser::TextParser() { - init((char *) NULL); -} - -TextParser::TextParser(const char * wordchars) -{ - init(wordchars); -} - -TextParser::TextParser(unsigned short * wordchars, int len) -{ - init(wordchars, len); -} - -TextParser::~TextParser() -{ -} - -int TextParser::is_wordchar(char * w) -{ - if (*w == '\0') return 0; - if (utf8) { - w_char wc; - unsigned short idx; - u8_u16(&wc, 1, w); - idx = (wc.h << 8) + wc.l; - return (unicodeisalpha(idx) || (wordchars_utf16 && flag_bsearch(wordchars_utf16, *((unsigned short *) &wc), wclen))); - } else { - return wordcharacters[(*w + 256) % 256]; - } -} - -const char * TextParser::get_latin1(char * s) -{ - if (s[0] == '&') { - unsigned int i = 0; - while ((i < LATIN1_LEN) && - strncmp(LATIN1[i], s, strlen(LATIN1[i]))) i++; - if (i != LATIN1_LEN) return LATIN1[i]; - } - return NULL; -} - -void TextParser::init(const char * wordchars) -{ - for (int i = 0; i < MAXPREVLINE; i++) { - line[i][0] = '\0'; - } - actual = 0; - head = 0; - token = 0; - state = 0; - utf8 = 0; - checkurl = 0; - unsigned int j; - for (j = 0; j < 256; j++) { - wordcharacters[j] = 0; - } - if (!wordchars) wordchars = "qwertzuiopasdfghjklyxcvbnmQWERTZUIOPASDFGHJKLYXCVBNM"; - for (j = 0; j < strlen(wordchars); j++) { - wordcharacters[(wordchars[j] + 256) % 256] = 1; - } -} - -void TextParser::init(unsigned short * wc, int len) -{ - for (int i = 0; i < MAXPREVLINE; i++) { - line[i][0] = '\0'; - } - actual = 0; - head = 0; - token = 0; - state = 0; - utf8 = 1; - checkurl = 0; - wordchars_utf16 = wc; - wclen = len; -} - -int TextParser::next_char(char * line, int * pos) { - if (*(line + *pos) == '\0') return 1; - if (utf8) { - if (*(line + *pos) >> 7) { - // jump to next UTF-8 character - for((*pos)++; (*(line + *pos) & 0xc0) == 0x80; (*pos)++); - } else { - (*pos)++; - } - } else (*pos)++; - return 0; -} - -void TextParser::put_line(char * word) -{ - actual = (actual + 1) % MAXPREVLINE; - strcpy(line[actual], word); - token = 0; - head = 0; - check_urls(); -} - -char * TextParser::get_prevline(int n) -{ - return mystrdup(line[(actual + MAXPREVLINE - n) % MAXPREVLINE]); -} - -char * TextParser::get_line() -{ - return get_prevline(0); -} - -char * TextParser::next_token() -{ - const char * latin1; - - for (;;) { - switch (state) - { - case 0: // non word chars - if (is_wordchar(line[actual] + head)) { - state = 1; - token = head; - } else if ((latin1 = get_latin1(line[actual] + head))) { - state = 1; - token = head; - head += strlen(latin1); - } - break; - case 1: // wordchar - if ((latin1 = get_latin1(line[actual] + head))) { - head += strlen(latin1); - } else if (! is_wordchar(line[actual] + head)) { - state = 0; - char * t = alloc_token(token, &head); - if (t) return t; - } - break; - } - if (next_char(line[actual], &head)) return NULL; - } -} - -int TextParser::get_tokenpos() -{ - return token; -} - -int TextParser::change_token(const char * word) -{ - if (word) { - char * r = mystrdup(line[actual] + head); - strcpy(line[actual] + token, word); - strcat(line[actual], r); - head = token; - free(r); - return 1; - } - return 0; -} - -void TextParser::check_urls() -{ - int url_state = 0; - int url_head = 0; - int url_token = 0; - int url = 0; - for (;;) { - switch (url_state) - { - case 0: // non word chars - if (is_wordchar(line[actual] + url_head)) { - url_state = 1; - url_token = url_head; - // Unix path - } else if (*(line[actual] + url_head) == '/') { - url_state = 1; - url_token = url_head; - url = 1; - } - break; - case 1: // wordchar - char ch = *(line[actual] + url_head); - // e-mail address - if ((ch == '@') || - // MS-DOS, Windows path - (strncmp(line[actual] + url_head, ":\\", 2) == 0) || - // URL - (strncmp(line[actual] + url_head, "://", 3) == 0)) { - url = 1; - } else if (! (is_wordchar(line[actual] + url_head) || - (ch == '-') || (ch == '_') || (ch == '\\') || - (ch == '.') || (ch == ':') || (ch == '/') || - (ch == '~') || (ch == '%') || (ch == '*') || - (ch == '$') || (ch == '[') || (ch == ']') || - (ch == '?') || (ch == '!') || - ((ch >= '0') && (ch <= '9')))) { - url_state = 0; - if (url == 1) { - for (int i = url_token; i < url_head; i++) { - *(urlline + i) = 1; - } - } - url = 0; - } - break; - } - *(urlline + url_head) = 0; - if (next_char(line[actual], &url_head)) return; - } -} - -int TextParser::get_url(int token_pos, int * head) -{ - for (int i = *head; urlline[i] && *(line[actual]+i); i++, (*head)++); - return checkurl ? 0 : urlline[token_pos]; -} - -void TextParser::set_url_checking(int check) -{ - checkurl = check; -} - - -char * TextParser::alloc_token(int token, int * head) -{ - if (get_url(token, head)) return NULL; - char * t = (char *) malloc(*head - token + 1); - if (t) { - t[*head - token] = '\0'; - strncpy(t, line[actual] + token, *head - token); - // remove colon for Finnish and Swedish language - if (t[*head - token - 1] == ':') { - t[*head - token - 1] = '\0'; - if (!t[0]) { - free(t); - return NULL; - } - } - return t; - } - fprintf(stderr,"Error - Insufficient Memory\n"); - return NULL; -} diff --git a/vendor/hunspell/src/parsers/textparser.hxx b/vendor/hunspell/src/parsers/textparser.hxx deleted file mode 100644 index a6f472a6c..000000000 --- a/vendor/hunspell/src/parsers/textparser.hxx +++ /dev/null @@ -1,69 +0,0 @@ -/* - * parser classes for MySpell - * - * implemented: text, HTML, TeX - * - * Copyright (C) 2002, Laszlo Nemeth - * - */ - -#ifndef _TEXTPARSER_HXX_ -#define _TEXTPARSER_HXX_ - -// set sum of actual and previous lines -#define MAXPREVLINE 4 - -#ifndef MAXLNLEN -#define MAXLNLEN 8192 -#endif - -/* - * Base Text Parser - * - */ - -class TextParser -{ - -protected: - void init(const char *); - void init(unsigned short * wordchars, int len); - int wordcharacters[256]; // for detection of the word boundaries - char line[MAXPREVLINE][MAXLNLEN]; // parsed and previous lines - char urlline[MAXLNLEN]; // mask for url detection - int checkurl; - int actual; // actual line - int head; // head position - int token; // begin of token - int state; // state of automata - int utf8; // UTF-8 character encoding - int next_char(char * line, int * pos); - unsigned short * wordchars_utf16; - int wclen; - -public: - - TextParser(); - TextParser(unsigned short * wordchars, int len); - TextParser(const char * wc); - virtual ~TextParser(); - - void put_line(char * line); - char * get_line(); - char * get_prevline(int n); - virtual char * next_token(); - int change_token(const char * word); - void set_url_checking(int check); - - int get_tokenpos(); - int is_wordchar(char * w); - const char * get_latin1(char * s); - char * next_char(); - int tokenize_urls(); - void check_urls(); - int get_url(int token_pos, int * head); - char * alloc_token(int token, int * head); -}; - -#endif - diff --git a/vendor/hunspell/src/win_api/Hunspell.rc b/vendor/hunspell/src/win_api/Hunspell.rc deleted file mode 100644 index 64eea8ce1..000000000 --- a/vendor/hunspell/src/win_api/Hunspell.rc +++ /dev/null @@ -1,32 +0,0 @@ - -#include - -VS_VERSION_INFO VERSIONINFO -FILEVERSION 1,3,1,0 -PRODUCTVERSION 1,3,1,0 -FILEFLAGSMASK 0x17L -FILEFLAGS 0 -FILEOS VOS_NT_WINDOWS32 -FILETYPE VFT_APP -FILESUBTYPE VFT2_UNKNOWN -BEGIN - BLOCK "VarFileInfo" - BEGIN - VALUE "Translation", 0x409, 1200 - END - BLOCK "StringFileInfo" - BEGIN - BLOCK "040904b0" - BEGIN - VALUE "Comments", "Hunspell (http://hunspell.sourceforge.net/) by László Németh" - VALUE "CompanyName", "http://hunspell.sourceforge.net/" - VALUE "FileDescription", "libhunspell" - VALUE "FileVersion", "1.3.2" - VALUE "InternalName", "libhunspell" - VALUE "LegalCopyright", "Copyright (c) 2007-2011" - VALUE "OriginalFilename", "libhunspell.dll" - VALUE "ProductName", "Hunspell Dynamic Link Library" - VALUE "ProductVersion", "1.3.2" - END - END -END diff --git a/vendor/hunspell/src/win_api/config.h b/vendor/hunspell/src/win_api/config.h index 5596065bf..9a00faee8 100644 --- a/vendor/hunspell/src/win_api/config.h +++ b/vendor/hunspell/src/win_api/config.h @@ -204,5 +204,5 @@ #define PACKAGE_TARNAME /* Define to the version of this package. */ -#define PACKAGE_VERSION "1.3.2" -#define VERSION "1.3.2" +#define PACKAGE_VERSION "1.3.3" +#define VERSION "1.3.3" diff --git a/vendor/hunspell/src/win_api/hunspelldll.c b/vendor/hunspell/src/win_api/hunspelldll.c deleted file mode 100644 index 96c25f177..000000000 --- a/vendor/hunspell/src/win_api/hunspelldll.c +++ /dev/null @@ -1,126 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * Copyright (C) 2006 - * Miha Vrhovnik (http://simail.sf.net, http://xcollect.sf.net) - * All Rights Reserved. - * - * Contributor(s): - * - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** **/ -#include "hunspelldll.h" -#include - -#include -#include -#include -#include -#include - -LIBHUNSPELL_DLL_EXPORTED void * hunspell_initialize(char *aff_file, char *dict_file) -{ - Hunspell * pMS = new Hunspell(aff_file, dict_file); - return pMS; -} - -LIBHUNSPELL_DLL_EXPORTED void * hunspell_initialize_key(char *aff_file, char *dict_file, char * key) -{ - Hunspell * pMS = new Hunspell(aff_file, dict_file, key); - return pMS; -} - -LIBHUNSPELL_DLL_EXPORTED void hunspell_uninitialize(Hunspell *pMS) -{ - delete pMS; -} - -LIBHUNSPELL_DLL_EXPORTED int hunspell_spell(Hunspell *pMS, char *word) -{ - return pMS->spell(word); -} - -LIBHUNSPELL_DLL_EXPORTED int hunspell_suggest(Hunspell *pMS, char *word, char ***slst) -{ - return pMS->suggest(slst, word); -} - -#ifdef HUNSPELL_EXPERIMENTAL -LIBHUNSPELL_DLL_EXPORTED int hunspell_suggest_auto(Hunspell *pMS, char *word, char ***slst) -{ - return pMS->suggest_auto(slst, word); -} -#endif - -LIBHUNSPELL_DLL_EXPORTED void hunspell_free_list(Hunspell *pMS, char ***slst, int len) -{ - pMS->free_list(slst, len); -} - -// deprecated (use hunspell_free_list) -LIBHUNSPELL_DLL_EXPORTED void hunspell_suggest_free(Hunspell *pMS, char **slst, int len) -{ - for (int i = 0; i < len; i++) { - free(slst[i]); - } -} - -LIBHUNSPELL_DLL_EXPORTED char * hunspell_get_dic_encoding(Hunspell *pMS) -{ - return pMS->get_dic_encoding(); -} - -LIBHUNSPELL_DLL_EXPORTED int hunspell_add(Hunspell *pMS, char *word) -{ - return pMS->add(word); -} - -LIBHUNSPELL_DLL_EXPORTED int hunspell_add_with_affix(Hunspell *pMS, char *word, char *modelword) -{ - return pMS->add_with_affix(word, modelword); -} - -BOOL APIENTRY DllMain (HINSTANCE hInst /* Library instance handle. */ , - DWORD reason /* Reason this function is being called. */ , - LPVOID reserved /* Not used. */ ) -{ - switch (reason) - { - case DLL_PROCESS_ATTACH: - break; - - case DLL_PROCESS_DETACH: - break; - - case DLL_THREAD_ATTACH: - break; - - case DLL_THREAD_DETACH: - break; - } - - /* Returns TRUE on success, FALSE on failure */ - return TRUE; -} diff --git a/vendor/hunspell/src/win_api/hunspelldll.h b/vendor/hunspell/src/win_api/hunspelldll.h deleted file mode 100644 index a90e38a73..000000000 --- a/vendor/hunspell/src/win_api/hunspelldll.h +++ /dev/null @@ -1,68 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * Copyright (C) 2006 - * Miha Vrhovnik (http://simail.sf.net, http://xcollect.sf.net) - * All Rights Reserved. - * - * Contributor(s): - * - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** **/ -#include "hunspell.hxx" - -#ifndef _DLL_H_ -#define _DLL_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -//returns pointer to spell object, params are aff file name and dict file name -LIBHUNSPELL_DLL_EXPORTED void *hunspell_initialize(char *aff_file, char *dict_file); -//frees spell object -LIBHUNSPELL_DLL_EXPORTED void hunspell_uninitialize(Hunspell *pMS); -//spellcheck word, returns 1 if word ok otherwise 0 -LIBHUNSPELL_DLL_EXPORTED int hunspell_spell(Hunspell *pMS, char *word); -//suggest words for word, returns number of words in slst -// YOU NEED TO CALL hunspell_suggest_free after you've done with words -LIBHUNSPELL_DLL_EXPORTED int hunspell_suggest(Hunspell *pMS, char *word, char ***slst); -LIBHUNSPELL_DLL_EXPORTED int hunspell_suggest_auto(Hunspell *pMS, char *word, char ***slst); -//free slst array -LIBHUNSPELL_DLL_EXPORTED void hunspell_free_list(Hunspell *pMS, char ***slst, int len); -// deprecated (use hunspell_free_list) -LIBHUNSPELL_DLL_EXPORTED void hunspell_suggest_free(Hunspell *pMS, char **slst, int len); -//make local copy of returned string!! -LIBHUNSPELL_DLL_EXPORTED char * hunspell_get_dic_encoding(Hunspell *pMS); -//add word to dict (word is valid until spell object is not destroyed) -LIBHUNSPELL_DLL_EXPORTED int hunspell_add(Hunspell *pMS, char *word); -//add word to dict with affixes of the modelword (word is valid until spell object is not destroyed) -LIBHUNSPELL_DLL_EXPORTED int hunspell_add_with_affix(Hunspell *pMS, char *word, char *modelword); - -#ifdef __cplusplus -} -#endif - -#endif /* _DLL_H_ */