2000-06-13 06:34:41 +02:00
|
|
|
/*
|
|
|
|
* Wine Message Compiler lexical scanner
|
|
|
|
*
|
|
|
|
* Copyright 2000 Bertho A. Stultiens (BS)
|
|
|
|
*
|
2002-03-10 00:29:33 +01:00
|
|
|
* This library is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
|
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This library is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Lesser General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
|
|
* License along with this library; if not, write to the Free Software
|
2006-05-18 14:49:52 +02:00
|
|
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
|
2000-06-13 06:34:41 +02:00
|
|
|
*/
|
2001-10-14 18:18:52 +02:00
|
|
|
|
|
|
|
#include "config.h"
|
|
|
|
|
2000-06-13 06:34:41 +02:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <ctype.h>
|
|
|
|
#include <assert.h>
|
2001-03-21 21:30:46 +01:00
|
|
|
#include <string.h>
|
2000-06-13 06:34:41 +02:00
|
|
|
|
|
|
|
#include "utils.h"
|
|
|
|
#include "wmc.h"
|
|
|
|
#include "lang.h"
|
|
|
|
|
2005-09-28 20:17:08 +02:00
|
|
|
#include "mcy.tab.h"
|
2000-06-13 06:34:41 +02:00
|
|
|
|
|
|
|
/*
|
2008-01-16 02:28:45 +01:00
|
|
|
* Keywords are case insensitive. All normal input is treated as
|
2000-06-13 06:34:41 +02:00
|
|
|
* being in codepage iso-8859-1 for ascii input files (unicode
|
|
|
|
* page 0) and as equivalent unicode if unicode input is selected.
|
|
|
|
* All normal input, which is not part of a message text, is
|
|
|
|
* enforced to be unicode page 0. Otherwise an error will be
|
|
|
|
* generated. The normal file data should only be ASCII because
|
|
|
|
* that is the basic definition of the grammar.
|
|
|
|
*
|
|
|
|
* Byteorder or unicode input is determined automatically by
|
|
|
|
* reading the first 8 bytes and checking them against unicode
|
|
|
|
* page 0 byteorder (hibyte must be 0).
|
|
|
|
* -- FIXME --
|
|
|
|
* Alternatively, the input is checked against a special byte
|
|
|
|
* sequence to identify the file.
|
|
|
|
* -- FIXME --
|
|
|
|
*
|
2002-06-01 01:06:46 +02:00
|
|
|
*
|
2000-06-13 06:34:41 +02:00
|
|
|
* Keywords:
|
|
|
|
* Codepages
|
|
|
|
* Facility
|
|
|
|
* FacilityNames
|
|
|
|
* LanguageNames
|
|
|
|
* MessageId
|
|
|
|
* MessageIdTypedef
|
|
|
|
* Severity
|
|
|
|
* SeverityNames
|
|
|
|
* SymbolicName
|
|
|
|
*
|
|
|
|
* Default added identifiers for classes:
|
|
|
|
* SeverityNames:
|
|
|
|
* Success = 0x0
|
|
|
|
* Informational = 0x1
|
|
|
|
* Warning = 0x2
|
|
|
|
* Error = 0x3
|
|
|
|
* FacilityNames:
|
|
|
|
* System = 0x0FF
|
|
|
|
* Application = 0xFFF
|
|
|
|
*
|
|
|
|
* The 'Codepages' keyword is a wmc extension.
|
|
|
|
*/
|
|
|
|
|
2006-01-24 14:00:32 +01:00
|
|
|
static const WCHAR ustr_application[] = { 'A', 'p', 'p', 'l', 'i', 'c', 'a', 't', 'i', 'o', 'n', 0 };
|
|
|
|
static const WCHAR ustr_codepages[] = { 'C', 'o', 'd', 'e', 'p', 'a', 'g', 'e', 's', 0 };
|
|
|
|
static const WCHAR ustr_english[] = { 'E', 'n', 'g', 'l', 'i', 's', 'h', 0 };
|
|
|
|
static const WCHAR ustr_error[] = { 'E', 'r', 'r', 'o', 'r', 0 };
|
|
|
|
static const WCHAR ustr_facility[] = { 'F', 'a', 'c', 'i', 'l', 'i', 't', 'y', 0 };
|
|
|
|
static const WCHAR ustr_facilitynames[] = { 'F', 'a', 'c', 'i', 'l', 'i', 't', 'y', 'N', 'a', 'm', 'e', 's', 0 };
|
|
|
|
static const WCHAR ustr_informational[] = { 'I', 'n', 'f', 'o', 'r', 'm', 'a', 't', 'i', 'o', 'n', 'a', 'l', 0 };
|
|
|
|
static const WCHAR ustr_language[] = { 'L', 'a', 'n', 'g', 'u', 'a', 'g', 'e', 0};
|
|
|
|
static const WCHAR ustr_languagenames[] = { 'L', 'a', 'n', 'g', 'u', 'a', 'g', 'e', 'N', 'a', 'm', 'e', 's', 0};
|
|
|
|
static const WCHAR ustr_messageid[] = { 'M', 'e', 's', 's', 'a', 'g', 'e', 'I', 'd', 0 };
|
|
|
|
static const WCHAR ustr_messageidtypedef[] = { 'M', 'e', 's', 's', 'a', 'g', 'e', 'I', 'd', 'T', 'y', 'p', 'e', 'd', 'e', 'f', 0 };
|
|
|
|
static const WCHAR ustr_outputbase[] = { 'O', 'u', 't', 'p', 'u', 't', 'B', 'a', 's', 'e', 0 };
|
|
|
|
static const WCHAR ustr_severity[] = { 'S', 'e', 'v', 'e', 'r', 'i', 't', 'y', 0 };
|
|
|
|
static const WCHAR ustr_severitynames[] = { 'S', 'e', 'v', 'e', 'r', 'i', 't', 'y', 'N', 'a', 'm', 'e', 's', 0 };
|
|
|
|
static const WCHAR ustr_success[] = { 'S', 'u', 'c', 'c', 'e', 's', 's', 0 };
|
|
|
|
static const WCHAR ustr_symbolicname[] = { 'S', 'y', 'm', 'b', 'o', 'l', 'i', 'c', 'N', 'a', 'm', 'e', 0 };
|
|
|
|
static const WCHAR ustr_system[] = { 'S', 'y', 's', 't', 'e', 'm', 0 };
|
|
|
|
static const WCHAR ustr_warning[] = { 'W', 'a', 'r', 'n', 'i', 'n', 'g', 0 };
|
|
|
|
static const WCHAR ustr_msg00001[] = { 'm', 's', 'g', '0', '0', '0', '0', '1', 0 };
|
2000-06-13 06:34:41 +02:00
|
|
|
/*
|
|
|
|
* This table is to beat any form of "expression building" to check for
|
|
|
|
* correct filename characters. It is also used for ident checks.
|
|
|
|
* FIXME: use it more consistently.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define CH_SHORTNAME 0x01
|
|
|
|
#define CH_LONGNAME 0x02
|
|
|
|
#define CH_IDENT 0x04
|
|
|
|
#define CH_NUMBER 0x08
|
|
|
|
/*#define CH_WILDCARD 0x10*/
|
|
|
|
/*#define CH_DOT 0x20*/
|
|
|
|
#define CH_PUNCT 0x40
|
|
|
|
#define CH_INVALID 0x80
|
|
|
|
|
|
|
|
static const char char_table[256] = {
|
|
|
|
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x00 - 0x07 */
|
|
|
|
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x08 - 0x0F */
|
|
|
|
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x10 - 0x17 */
|
|
|
|
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x18 - 0x1F */
|
|
|
|
0x80, 0x03, 0x80, 0x03, 0x03, 0x03, 0x03, 0x03, /* 0x20 - 0x27 " !"#$%&'" */
|
|
|
|
0x43, 0x43, 0x10, 0x80, 0x03, 0x03, 0x22, 0x80, /* 0x28 - 0x2F "()*+,-./" */
|
|
|
|
0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, /* 0x30 - 0x37 "01234567" */
|
|
|
|
0x0b, 0x0b, 0xc0, 0x80, 0x80, 0x80, 0x80, 0x10, /* 0x38 - 0x3F "89:;<=>?" */
|
|
|
|
0x03, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x40 - 0x47 "@ABCDEFG" */
|
|
|
|
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x48 - 0x4F "HIJKLMNO" */
|
|
|
|
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x50 - 0x57 "PQRSTUVW" */
|
|
|
|
0x07, 0x07, 0x07, 0x80, 0x80, 0x80, 0x80, 0x07, /* 0x58 - 0x5F "XYZ[\]^_" */
|
|
|
|
0x03, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x60 - 0x67 "`abcdefg" */
|
|
|
|
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x68 - 0x6F "hijklmno" */
|
|
|
|
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x70 - 0x77 "pqrstuvw" */
|
|
|
|
0x07, 0x07, 0x07, 0x03, 0x80, 0x03, 0x03, 0x80, /* 0x78 - 0x7F "xyz{|}~ " */
|
|
|
|
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x80 - 0x87 */
|
|
|
|
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x88 - 0x8F */
|
|
|
|
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x90 - 0x97 */
|
|
|
|
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x98 - 0x9F */
|
|
|
|
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xA0 - 0xA7 */
|
|
|
|
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xA8 - 0xAF */
|
|
|
|
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xB0 - 0xB7 */
|
|
|
|
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xB8 - 0xBF */
|
|
|
|
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xC0 - 0xC7 */
|
|
|
|
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xC8 - 0xCF */
|
|
|
|
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xD0 - 0xD7 */
|
|
|
|
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xD8 - 0xDF */
|
|
|
|
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xE0 - 0xE7 */
|
|
|
|
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xE8 - 0xEF */
|
|
|
|
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xF0 - 0xF7 */
|
|
|
|
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x80, /* 0xF8 - 0xFF */
|
|
|
|
};
|
|
|
|
|
|
|
|
static int isisochar(int ch)
|
|
|
|
{
|
|
|
|
return !(ch & (~0xff));
|
|
|
|
}
|
|
|
|
|
|
|
|
static int codepage;
|
|
|
|
|
|
|
|
void set_codepage(int cp)
|
|
|
|
{
|
|
|
|
codepage = cp;
|
2019-04-12 12:14:59 +02:00
|
|
|
if (!is_valid_codepage( cp ))
|
2007-10-18 17:11:57 +02:00
|
|
|
xyyerror("Codepage %d not found; cannot process\n", codepage);
|
2000-06-13 06:34:41 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Input functions
|
|
|
|
*/
|
|
|
|
static int nungetstack = 0;
|
|
|
|
static int allocungetstack = 0;
|
|
|
|
static char *ungetstack = NULL;
|
|
|
|
static int ninputbuffer = 0;
|
|
|
|
static WCHAR *inputbuffer = NULL;
|
|
|
|
static char *xlatebuffer = NULL;
|
|
|
|
|
|
|
|
#define INPUTBUFFER_SIZE 2048 /* Must be larger than 4 and approx. large enough to hold a line */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Fill the input buffer with *one* line of input.
|
|
|
|
* The line is '\n' terminated so that scanning
|
|
|
|
* messages with translation works as expected
|
|
|
|
* (otherwise we cannot pre-translate because the
|
|
|
|
* language is first known one line before the
|
|
|
|
* actual message).
|
|
|
|
*/
|
|
|
|
static int fill_inputbuffer(void)
|
|
|
|
{
|
|
|
|
int n;
|
2004-05-04 06:13:05 +02:00
|
|
|
static const char err_fatalread[] = "Fatal: reading input failed";
|
2000-06-13 06:34:41 +02:00
|
|
|
static int endian = -1;
|
|
|
|
|
|
|
|
if(!inputbuffer)
|
|
|
|
{
|
2007-02-08 16:41:22 +01:00
|
|
|
inputbuffer = xmalloc(INPUTBUFFER_SIZE*sizeof(WCHAR));
|
2000-06-13 06:34:41 +02:00
|
|
|
xlatebuffer = xmalloc(INPUTBUFFER_SIZE);
|
|
|
|
}
|
|
|
|
|
|
|
|
try_again:
|
|
|
|
if(!unicodein)
|
|
|
|
{
|
|
|
|
char *cptr;
|
|
|
|
cptr = fgets(xlatebuffer, INPUTBUFFER_SIZE, yyin);
|
|
|
|
if(!cptr && ferror(yyin))
|
|
|
|
xyyerror(err_fatalread);
|
|
|
|
else if(!cptr)
|
|
|
|
return 0;
|
2019-04-12 12:14:59 +02:00
|
|
|
n = wmc_mbstowcs(codepage, 0, xlatebuffer, strlen(xlatebuffer)+1, inputbuffer, INPUTBUFFER_SIZE);
|
2000-06-13 06:34:41 +02:00
|
|
|
if(n < 0)
|
2007-10-18 17:11:57 +02:00
|
|
|
internal_error(__FILE__, __LINE__, "Could not translate to unicode (%d)\n", n);
|
2000-06-13 06:34:41 +02:00
|
|
|
if(n <= 1)
|
2008-01-16 02:28:45 +01:00
|
|
|
goto try_again; /* Should not happen */
|
2000-06-13 06:34:41 +02:00
|
|
|
n--; /* Strip added conversion '\0' from input length */
|
|
|
|
/*
|
|
|
|
* FIXME:
|
|
|
|
* Detect UTF-8 in the first time we read some bytes by
|
|
|
|
* checking the special sequence "FE..." or something like
|
|
|
|
* that. I need to check www.unicode.org for details.
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if(endian == -1)
|
|
|
|
{
|
|
|
|
n = fread(inputbuffer, 1, 8, yyin);
|
|
|
|
if(n != 8)
|
|
|
|
{
|
|
|
|
if(!n && ferror(yyin))
|
|
|
|
xyyerror(err_fatalread);
|
|
|
|
else
|
2015-08-17 15:11:55 +02:00
|
|
|
xyyerror("Fatal: file too short to determine byteorder (should never happen)\n");
|
2000-06-13 06:34:41 +02:00
|
|
|
}
|
|
|
|
if(isisochar(inputbuffer[0]) &&
|
|
|
|
isisochar(inputbuffer[1]) &&
|
|
|
|
isisochar(inputbuffer[2]) &&
|
|
|
|
isisochar(inputbuffer[3]))
|
|
|
|
{
|
|
|
|
#ifdef WORDS_BIGENDIAN
|
|
|
|
endian = WMC_BO_BIG;
|
|
|
|
#else
|
|
|
|
endian = WMC_BO_LITTLE;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
else if(isisochar(BYTESWAP_WORD(inputbuffer[0])) &&
|
|
|
|
isisochar(BYTESWAP_WORD(inputbuffer[1])) &&
|
|
|
|
isisochar(BYTESWAP_WORD(inputbuffer[2])) &&
|
|
|
|
isisochar(BYTESWAP_WORD(inputbuffer[3])))
|
|
|
|
{
|
|
|
|
#ifdef WORDS_BIGENDIAN
|
|
|
|
endian = WMC_BO_LITTLE;
|
|
|
|
#else
|
|
|
|
endian = WMC_BO_BIG;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
else
|
2007-10-18 17:11:57 +02:00
|
|
|
xyyerror("Fatal: cannot determine file's byteorder\n");
|
2000-06-13 06:34:41 +02:00
|
|
|
/* FIXME:
|
|
|
|
* Determine the file-endian with the leader-bytes
|
|
|
|
* "FF FE..."; can't remember the exact sequence.
|
|
|
|
*/
|
|
|
|
n /= 2;
|
|
|
|
#ifdef WORDS_BIGENDIAN
|
|
|
|
if(endian == WMC_BO_LITTLE)
|
|
|
|
#else
|
|
|
|
if(endian == WMC_BO_BIG)
|
|
|
|
#endif
|
|
|
|
{
|
|
|
|
inputbuffer[0] = BYTESWAP_WORD(inputbuffer[0]);
|
|
|
|
inputbuffer[1] = BYTESWAP_WORD(inputbuffer[1]);
|
|
|
|
inputbuffer[2] = BYTESWAP_WORD(inputbuffer[2]);
|
|
|
|
inputbuffer[3] = BYTESWAP_WORD(inputbuffer[3]);
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
n = 0;
|
|
|
|
for(i = 0; i < INPUTBUFFER_SIZE; i++)
|
|
|
|
{
|
|
|
|
int t;
|
|
|
|
t = fread(&inputbuffer[i], 2, 1, yyin);
|
|
|
|
if(!t && ferror(yyin))
|
|
|
|
xyyerror(err_fatalread);
|
|
|
|
else if(!t && n)
|
|
|
|
break;
|
|
|
|
n++;
|
|
|
|
#ifdef WORDS_BIGENDIAN
|
|
|
|
if(endian == WMC_BO_LITTLE)
|
|
|
|
#else
|
|
|
|
if(endian == WMC_BO_BIG)
|
|
|
|
#endif
|
|
|
|
{
|
|
|
|
if((inputbuffer[i] = BYTESWAP_WORD(inputbuffer[i])) == '\n')
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if(inputbuffer[i] == '\n')
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if(!n)
|
|
|
|
{
|
2007-10-18 17:11:57 +02:00
|
|
|
mcy_warning("Re-read line (input was or converted to zilch)\n");
|
2000-06-13 06:34:41 +02:00
|
|
|
goto try_again; /* Should not happen, but could be due to stdin reading and a signal */
|
|
|
|
}
|
|
|
|
|
|
|
|
ninputbuffer += n;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int get_unichar(void)
|
|
|
|
{
|
|
|
|
static WCHAR *b = NULL;
|
|
|
|
char_number++;
|
|
|
|
|
|
|
|
if(nungetstack)
|
|
|
|
return ungetstack[--nungetstack];
|
|
|
|
|
|
|
|
if(!ninputbuffer)
|
|
|
|
{
|
|
|
|
if(!fill_inputbuffer())
|
|
|
|
return EOF;
|
|
|
|
b = inputbuffer;
|
|
|
|
}
|
|
|
|
|
|
|
|
ninputbuffer--;
|
|
|
|
return (int)(*b++ & 0xffff);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void unget_unichar(int ch)
|
|
|
|
{
|
|
|
|
if(ch == EOF)
|
|
|
|
return;
|
|
|
|
|
|
|
|
char_number--;
|
|
|
|
|
|
|
|
if(nungetstack == allocungetstack)
|
|
|
|
{
|
|
|
|
allocungetstack += 32;
|
|
|
|
ungetstack = xrealloc(ungetstack, allocungetstack * sizeof(*ungetstack));
|
|
|
|
}
|
|
|
|
|
|
|
|
ungetstack[nungetstack++] = (WCHAR)ch;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Normal character stack.
|
|
|
|
* Used for number scanning.
|
|
|
|
*/
|
|
|
|
static int ncharstack = 0;
|
|
|
|
static int alloccharstack = 0;
|
|
|
|
static char *charstack = NULL;
|
|
|
|
|
|
|
|
static void empty_char_stack(void)
|
|
|
|
{
|
|
|
|
ncharstack = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void push_char(int ch)
|
|
|
|
{
|
|
|
|
if(ncharstack == alloccharstack)
|
|
|
|
{
|
|
|
|
alloccharstack += 32;
|
|
|
|
charstack = xrealloc(charstack, alloccharstack * sizeof(*charstack));
|
|
|
|
}
|
|
|
|
charstack[ncharstack++] = (char)ch;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int tos_char_stack(void)
|
|
|
|
{
|
|
|
|
if(!ncharstack)
|
|
|
|
return 0;
|
|
|
|
else
|
|
|
|
return (int)(charstack[ncharstack-1] & 0xff);
|
|
|
|
}
|
|
|
|
|
|
|
|
static char *get_char_stack(void)
|
|
|
|
{
|
|
|
|
return charstack;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Unicode character stack.
|
|
|
|
* Used for general scanner.
|
|
|
|
*/
|
|
|
|
static int nunicharstack = 0;
|
|
|
|
static int allocunicharstack = 0;
|
|
|
|
static WCHAR *unicharstack = NULL;
|
|
|
|
|
|
|
|
static void empty_unichar_stack(void)
|
|
|
|
{
|
|
|
|
nunicharstack = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void push_unichar(int ch)
|
|
|
|
{
|
|
|
|
if(nunicharstack == allocunicharstack)
|
|
|
|
{
|
|
|
|
allocunicharstack += 128;
|
|
|
|
unicharstack = xrealloc(unicharstack, allocunicharstack * sizeof(*unicharstack));
|
|
|
|
}
|
|
|
|
unicharstack[nunicharstack++] = (WCHAR)ch;
|
|
|
|
}
|
|
|
|
|
|
|
|
#if 0
|
|
|
|
static int tos_unichar_stack(void)
|
|
|
|
{
|
|
|
|
if(!nunicharstack)
|
|
|
|
return 0;
|
|
|
|
else
|
|
|
|
return (int)(unicharstack[nunicharstack-1] & 0xffff);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static WCHAR *get_unichar_stack(void)
|
|
|
|
{
|
|
|
|
return unicharstack;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Number scanner
|
|
|
|
*
|
|
|
|
* state | ch | next state
|
|
|
|
* ------+-----------------+--------------------------
|
|
|
|
* 0 | [0] | 1
|
|
|
|
* 0 | [1-9] | 4
|
|
|
|
* 0 | . | error (should never occur)
|
|
|
|
* 1 | [xX] | 2
|
|
|
|
* 1 | [0-7] | 3
|
|
|
|
* 1 | [89a-wyzA-WYZ_] | error invalid digit
|
|
|
|
* 1 | . | return 0
|
|
|
|
* 2 | [0-9a-fA-F] | 2
|
|
|
|
* 2 | [g-zG-Z_] | error invalid hex digit
|
|
|
|
* 2 | . | return (hex-number) if TOS != [xX] else error
|
|
|
|
* 3 | [0-7] | 3
|
|
|
|
* 3 | [89a-zA-Z_] | error invalid octal digit
|
|
|
|
* 3 | . | return (octal-number)
|
|
|
|
* 4 | [0-9] | 4
|
|
|
|
* 4 | [a-zA-Z_] | error invalid decimal digit
|
|
|
|
* 4 | . | return (decimal-number)
|
|
|
|
*
|
|
|
|
* All non-identifier characters [^a-zA-Z_0-9] terminate the scan
|
|
|
|
* and return the value. This is not entirely correct, but close
|
|
|
|
* enough (should check punctuators as trailing context, but the
|
|
|
|
* char_table is not adapted to that and it is questionable whether
|
|
|
|
* it is worth the trouble).
|
|
|
|
* All non-iso-8859-1 characters are an error.
|
|
|
|
*/
|
|
|
|
static int scan_number(int ch)
|
|
|
|
{
|
|
|
|
int state = 0;
|
|
|
|
int base = 10;
|
|
|
|
empty_char_stack();
|
|
|
|
|
|
|
|
while(1)
|
|
|
|
{
|
|
|
|
if(!isisochar(ch))
|
2007-10-18 17:11:57 +02:00
|
|
|
xyyerror("Invalid digit\n");
|
2000-06-13 06:34:41 +02:00
|
|
|
|
|
|
|
switch(state)
|
|
|
|
{
|
|
|
|
case 0:
|
|
|
|
if(isdigit(ch))
|
|
|
|
{
|
|
|
|
push_char(ch);
|
|
|
|
if(ch == '0')
|
|
|
|
state = 1;
|
|
|
|
else
|
|
|
|
state = 4;
|
|
|
|
}
|
|
|
|
else
|
2007-10-18 17:11:57 +02:00
|
|
|
internal_error(__FILE__, __LINE__, "Non-digit in first number-scanner state\n");
|
2000-06-13 06:34:41 +02:00
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
if(ch == 'x' || ch == 'X')
|
|
|
|
{
|
|
|
|
push_char(ch);
|
|
|
|
state = 2;
|
|
|
|
}
|
|
|
|
else if(ch >= '0' && ch <= '7')
|
|
|
|
{
|
|
|
|
push_char(ch);
|
|
|
|
state = 3;
|
|
|
|
}
|
|
|
|
else if(isalpha(ch) || ch == '_')
|
2007-10-18 17:11:57 +02:00
|
|
|
xyyerror("Invalid number digit\n");
|
2000-06-13 06:34:41 +02:00
|
|
|
else
|
|
|
|
{
|
|
|
|
unget_unichar(ch);
|
2006-09-12 09:05:15 +02:00
|
|
|
mcy_lval.num = 0;
|
2000-06-13 06:34:41 +02:00
|
|
|
return tNUMBER;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
if(isxdigit(ch))
|
|
|
|
push_char(ch);
|
|
|
|
else if(isalpha(ch) || ch == '_' || !isxdigit(tos_char_stack()))
|
2007-10-18 17:11:57 +02:00
|
|
|
xyyerror("Invalid hex digit\n");
|
2000-06-13 06:34:41 +02:00
|
|
|
else
|
|
|
|
{
|
|
|
|
base = 16;
|
|
|
|
goto finish;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 3:
|
|
|
|
if(ch >= '0' && ch <= '7')
|
|
|
|
push_char(ch);
|
|
|
|
else if(isalnum(ch) || ch == '_')
|
2007-10-18 17:11:57 +02:00
|
|
|
xyyerror("Invalid octal digit\n");
|
2000-06-13 06:34:41 +02:00
|
|
|
else
|
|
|
|
{
|
|
|
|
base = 8;
|
|
|
|
goto finish;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 4:
|
|
|
|
if(isdigit(ch))
|
|
|
|
push_char(ch);
|
|
|
|
else if(isalnum(ch) || ch == '_')
|
2007-10-18 17:11:57 +02:00
|
|
|
xyyerror("Invalid decimal digit\n");
|
2000-06-13 06:34:41 +02:00
|
|
|
else
|
|
|
|
{
|
|
|
|
base = 10;
|
|
|
|
goto finish;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
2007-10-18 17:11:57 +02:00
|
|
|
internal_error(__FILE__, __LINE__, "Invalid state in number-scanner\n");
|
2000-06-13 06:34:41 +02:00
|
|
|
}
|
|
|
|
ch = get_unichar();
|
|
|
|
}
|
|
|
|
finish:
|
|
|
|
unget_unichar(ch);
|
|
|
|
push_char(0);
|
2006-09-12 09:05:15 +02:00
|
|
|
mcy_lval.num = strtoul(get_char_stack(), NULL, base);
|
2000-06-13 06:34:41 +02:00
|
|
|
return tNUMBER;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void newline(void)
|
|
|
|
{
|
|
|
|
line_number++;
|
|
|
|
char_number = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int unisort(const void *p1, const void *p2)
|
|
|
|
{
|
2004-12-06 21:43:55 +01:00
|
|
|
return unistricmp(((const token_t *)p1)->name, ((const token_t *)p2)->name);
|
2000-06-13 06:34:41 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static token_t *tokentable = NULL;
|
|
|
|
static int ntokentable = 0;
|
|
|
|
|
|
|
|
token_t *lookup_token(const WCHAR *s)
|
|
|
|
{
|
|
|
|
token_t tok;
|
|
|
|
|
|
|
|
tok.name = s;
|
|
|
|
return (token_t *)bsearch(&tok, tokentable, ntokentable, sizeof(*tokentable), unisort);
|
|
|
|
}
|
|
|
|
|
|
|
|
void add_token(tok_e type, const WCHAR *name, int tok, int cp, const WCHAR *alias, int fix)
|
|
|
|
{
|
|
|
|
ntokentable++;
|
|
|
|
tokentable = xrealloc(tokentable, ntokentable * sizeof(*tokentable));
|
|
|
|
tokentable[ntokentable-1].type = type;
|
|
|
|
tokentable[ntokentable-1].name = name;
|
|
|
|
tokentable[ntokentable-1].token = tok;
|
|
|
|
tokentable[ntokentable-1].codepage = cp;
|
|
|
|
tokentable[ntokentable-1].alias = alias;
|
|
|
|
tokentable[ntokentable-1].fixed = fix;
|
|
|
|
qsort(tokentable, ntokentable, sizeof(*tokentable), unisort);
|
|
|
|
}
|
|
|
|
|
|
|
|
void get_tokentable(token_t **tab, int *len)
|
|
|
|
{
|
|
|
|
assert(tab != NULL);
|
|
|
|
assert(len != NULL);
|
|
|
|
*tab = tokentable;
|
|
|
|
*len = ntokentable;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The scanner
|
|
|
|
*
|
|
|
|
*/
|
2006-09-12 09:05:15 +02:00
|
|
|
int mcy_lex(void)
|
2000-06-13 06:34:41 +02:00
|
|
|
{
|
2004-04-20 02:34:52 +02:00
|
|
|
static const WCHAR ustr_dot1[] = { '.', '\n', 0 };
|
|
|
|
static const WCHAR ustr_dot2[] = { '.', '\r', '\n', 0 };
|
2000-06-13 06:34:41 +02:00
|
|
|
static int isinit = 0;
|
|
|
|
int ch;
|
|
|
|
|
|
|
|
if(!isinit)
|
|
|
|
{
|
|
|
|
isinit++;
|
|
|
|
set_codepage(WMC_DEFAULT_CODEPAGE);
|
|
|
|
add_token(tok_keyword, ustr_codepages, tCODEPAGE, 0, NULL, 0);
|
|
|
|
add_token(tok_keyword, ustr_facility, tFACILITY, 0, NULL, 1);
|
|
|
|
add_token(tok_keyword, ustr_facilitynames, tFACNAMES, 0, NULL, 1);
|
|
|
|
add_token(tok_keyword, ustr_language, tLANGUAGE, 0, NULL, 1);
|
|
|
|
add_token(tok_keyword, ustr_languagenames, tLANNAMES, 0, NULL, 1);
|
|
|
|
add_token(tok_keyword, ustr_messageid, tMSGID, 0, NULL, 1);
|
|
|
|
add_token(tok_keyword, ustr_messageidtypedef, tTYPEDEF, 0, NULL, 1);
|
|
|
|
add_token(tok_keyword, ustr_outputbase, tBASE, 0, NULL, 1);
|
|
|
|
add_token(tok_keyword, ustr_severity, tSEVERITY, 0, NULL, 1);
|
|
|
|
add_token(tok_keyword, ustr_severitynames, tSEVNAMES, 0, NULL, 1);
|
|
|
|
add_token(tok_keyword, ustr_symbolicname, tSYMNAME, 0, NULL, 1);
|
|
|
|
add_token(tok_severity, ustr_error, 0x03, 0, NULL, 0);
|
|
|
|
add_token(tok_severity, ustr_warning, 0x02, 0, NULL, 0);
|
|
|
|
add_token(tok_severity, ustr_informational, 0x01, 0, NULL, 0);
|
|
|
|
add_token(tok_severity, ustr_success, 0x00, 0, NULL, 0);
|
|
|
|
add_token(tok_facility, ustr_application, 0xFFF, 0, NULL, 0);
|
|
|
|
add_token(tok_facility, ustr_system, 0x0FF, 0, NULL, 0);
|
|
|
|
add_token(tok_language, ustr_english, 0x409, 437, ustr_msg00001, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
empty_unichar_stack();
|
|
|
|
|
|
|
|
while(1)
|
|
|
|
{
|
|
|
|
if(want_line)
|
|
|
|
{
|
|
|
|
while((ch = get_unichar()) != '\n')
|
|
|
|
{
|
|
|
|
if(ch == EOF)
|
2007-10-18 17:11:57 +02:00
|
|
|
xyyerror("Unexpected EOF\n");
|
2000-06-13 06:34:41 +02:00
|
|
|
push_unichar(ch);
|
|
|
|
}
|
|
|
|
newline();
|
|
|
|
push_unichar(ch);
|
|
|
|
push_unichar(0);
|
|
|
|
if(!unistrcmp(ustr_dot1, get_unichar_stack()) || !unistrcmp(ustr_dot2, get_unichar_stack()))
|
|
|
|
{
|
|
|
|
want_line = 0;
|
|
|
|
/* Reset the codepage to our default after each message */
|
|
|
|
set_codepage(WMC_DEFAULT_CODEPAGE);
|
|
|
|
return tMSGEND;
|
|
|
|
}
|
2006-09-12 09:05:15 +02:00
|
|
|
mcy_lval.str = xunistrdup(get_unichar_stack());
|
2000-06-13 06:34:41 +02:00
|
|
|
return tLINE;
|
|
|
|
}
|
|
|
|
|
|
|
|
ch = get_unichar();
|
|
|
|
|
|
|
|
if(ch == EOF)
|
|
|
|
return EOF;
|
|
|
|
|
|
|
|
if(ch == '\n')
|
|
|
|
{
|
|
|
|
newline();
|
|
|
|
if(want_nl)
|
|
|
|
{
|
|
|
|
want_nl = 0;
|
|
|
|
return tNL;
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(isisochar(ch))
|
|
|
|
{
|
|
|
|
if(want_file)
|
|
|
|
{
|
|
|
|
int n = 0;
|
|
|
|
while(n < 8 && isisochar(ch))
|
|
|
|
{
|
|
|
|
int t = char_table[ch];
|
|
|
|
if((t & CH_PUNCT) || !(t & CH_SHORTNAME))
|
|
|
|
break;
|
2002-06-01 01:06:46 +02:00
|
|
|
|
2000-06-13 06:34:41 +02:00
|
|
|
push_unichar(ch);
|
|
|
|
n++;
|
|
|
|
ch = get_unichar();
|
|
|
|
}
|
|
|
|
unget_unichar(ch);
|
|
|
|
push_unichar(0);
|
|
|
|
want_file = 0;
|
2006-09-12 09:05:15 +02:00
|
|
|
mcy_lval.str = xunistrdup(get_unichar_stack());
|
2000-06-13 06:34:41 +02:00
|
|
|
return tFILE;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(char_table[ch] & CH_IDENT)
|
|
|
|
{
|
|
|
|
token_t *tok;
|
|
|
|
while(isisochar(ch) && (char_table[ch] & (CH_IDENT|CH_NUMBER)))
|
|
|
|
{
|
|
|
|
push_unichar(ch);
|
|
|
|
ch = get_unichar();
|
|
|
|
}
|
|
|
|
unget_unichar(ch);
|
|
|
|
push_unichar(0);
|
|
|
|
if(!(tok = lookup_token(get_unichar_stack())))
|
|
|
|
{
|
2006-09-12 09:05:15 +02:00
|
|
|
mcy_lval.str = xunistrdup(get_unichar_stack());
|
2000-06-13 06:34:41 +02:00
|
|
|
return tIDENT;
|
|
|
|
}
|
|
|
|
switch(tok->type)
|
|
|
|
{
|
|
|
|
case tok_keyword:
|
|
|
|
return tok->token;
|
|
|
|
|
|
|
|
case tok_language:
|
|
|
|
codepage = tok->codepage;
|
|
|
|
/* Fall through */
|
|
|
|
case tok_severity:
|
|
|
|
case tok_facility:
|
2006-09-12 09:05:15 +02:00
|
|
|
mcy_lval.tok = tok;
|
2000-06-13 06:34:41 +02:00
|
|
|
return tTOKEN;
|
|
|
|
|
|
|
|
default:
|
2007-10-18 17:11:57 +02:00
|
|
|
internal_error(__FILE__, __LINE__, "Invalid token type encountered\n");
|
2000-06-13 06:34:41 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if(isspace(ch)) /* Ignore space */
|
|
|
|
continue;
|
2002-06-01 01:06:46 +02:00
|
|
|
|
2000-06-13 06:34:41 +02:00
|
|
|
if(isdigit(ch))
|
|
|
|
return scan_number(ch);
|
|
|
|
}
|
|
|
|
|
|
|
|
switch(ch)
|
|
|
|
{
|
|
|
|
case ':':
|
|
|
|
case '=':
|
|
|
|
case '+':
|
|
|
|
case '(':
|
|
|
|
case ')':
|
|
|
|
return ch;
|
|
|
|
case ';':
|
|
|
|
while(ch != '\n' && ch != EOF)
|
|
|
|
{
|
|
|
|
push_unichar(ch);
|
|
|
|
ch = get_unichar();
|
|
|
|
}
|
|
|
|
newline();
|
|
|
|
push_unichar(ch); /* Include the newline */
|
|
|
|
push_unichar(0);
|
2006-09-12 09:05:15 +02:00
|
|
|
mcy_lval.str = xunistrdup(get_unichar_stack());
|
2000-06-13 06:34:41 +02:00
|
|
|
return tCOMMENT;
|
|
|
|
default:
|
2007-10-18 17:11:57 +02:00
|
|
|
xyyerror("Invalid character '%c' (0x%04x)\n", isisochar(ch) && isprint(ch) ? ch : '.', ch);
|
2000-06-13 06:34:41 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|