Started on grammar, but probably won't work

This commit is contained in:
Niles Rogoff 2017-03-25 15:37:11 -04:00
parent 3de17daa74
commit 9f88ed9d0d
No known key found for this signature in database
GPG Key ID: B78B908F23430F80
2 changed files with 98 additions and 1 deletions

View File

@ -26,6 +26,10 @@ relop = nfa.compile("+".join(["<", "<=", ">", ">=", "=", "<>", "!="]))
relop.type = "relop"
semicolon = nfa.compile(";")
semicolon.type = "semicolon"
colon = nfa.compile(":")
colon.type = "colon"
comma = nfa.compile(",")
comma.type = "comma"
whitespace_nfa = nfa.compile(whitespace)
whitespace_nfa.type = "whitespace_nfa"
parens = nfa.either(nfa.build_from_char("("), nfa.build_from_char(")"))
@ -68,7 +72,7 @@ def lex(data):
# process = subprocess.Popen(["gpp", "+c", "--", "\\n"], stdin = subprocess.PIPE, stdout = subprocess.PIPE)
# data = process.communicate(input=data.encode("utf-8"))[0].decode("utf-8")
# whichever of these is the first to match a substring of the text is used to create the token
priority_order = [whitespace_nfa, comment, integer, parens, bracket, brace, mathbinop, mathunop, unop, semicolon, keyword, assign, relop, string, identifier]
priority_order = [whitespace_nfa, comment, integer, parens, bracket, brace, mathbinop, mathunop, unop, semicolon, colon, comma, keyword, assign, relop, string, identifier]
done = []
data_ptr = 0
while data_ptr < len(data): # loop until we've read the whole input string

View File

@ -66,3 +66,96 @@ grammar = {
p = parser(grammar)
print(p.parse("(10 + (99 * 44))*3 + 1231"))
import sys
sys.exit(0)
grammar = {
"program": [
[["nonterminal", "class"], ["terminal", ["semicolon"]], ["nonterminal", "program"]],
[["nonterminal", "class"]],
],
"class": [
[
["terminal", ["keyword", "class"]],
["terminal", ["identifier"]],
["terminal", ["keyword", "inherits"]],
["terminal", ["identifier"]],
["terminal", ["braces", "{"]],
["nonterminal", "feature;+"],
["terminal", ["braces", "}"]]
],
[
["terminal", ["keyword", "class"]],
["terminal", ["identifier"]],
["terminal", ["braces", "{"]],
["nonterminal", "feature;+"],
["terminal", ["braces", "}"]]
]
],
"feature;+": [
[
["terminal", ["semicolon"]]
],
[
["nonterminal", "feature"],
["terminal", ["semicolon"]],
["nonterminal", ["feature;+"]]
]
],
"feature": [
[
["terminal", ["identifier"]],
["terminal", ["parens", "("]],
["terminal", ["parens", ")"]],
["terminal", ["colon"]],
["terminal", ["identifier"]],
["terminal", ["braces", "{"]],
["nonterminal", "expr"],
["terminal", ["braces", "}"]]
],
[
["terminal", ["identifier"]],
["terminal", ["parens", "("]],
["nonterminal", "formal,+"],
["terminal", ["parens", ")"]],
["terminal", ["colon"]],
["terminal", ["identifier"]],
["terminal", ["braces", "{"]],
["nonterminal", "expr"],
["terminal", ["braces", "}"]]
],
[
["terminal", ["identifier"]],
["terminal", ["colon"]],
["terminal", ["identifier"]],
["terminal", ["assign"]],
["nonterminal", "expr"],
],
[
["terminal", ["identifier"]],
["terminal", ["colon"]],
["terminal", ["identifier"]],
]
],
"formal": [
[
["terminal", ["identifier"]],
["terminal", ["colon"]],
["terminal", ["identifier"]],
]
],
"formal,+": [
[
["nonterminal", "formal"],
["terminal", ["comma"]],
["nonterminal", "formal,+"]
],
[
["nonterminal", "formal"]
]
]
}