From 9f88ed9d0d2e9837648ec686b399bce5b8b753ca Mon Sep 17 00:00:00 2001 From: Niles Rogoff Date: Sat, 25 Mar 2017 15:37:11 -0400 Subject: [PATCH] Started on grammar, but probably won't work --- lexer.py | 6 +++- parser.py | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+), 1 deletion(-) diff --git a/lexer.py b/lexer.py index bf13c53..f80d03c 100644 --- a/lexer.py +++ b/lexer.py @@ -26,6 +26,10 @@ relop = nfa.compile("+".join(["<", "<=", ">", ">=", "=", "<>", "!="])) relop.type = "relop" semicolon = nfa.compile(";") semicolon.type = "semicolon" +colon = nfa.compile(":") +colon.type = "colon" +comma = nfa.compile(",") +comma.type = "comma" whitespace_nfa = nfa.compile(whitespace) whitespace_nfa.type = "whitespace_nfa" parens = nfa.either(nfa.build_from_char("("), nfa.build_from_char(")")) @@ -68,7 +72,7 @@ def lex(data): # process = subprocess.Popen(["gpp", "+c", "--", "\\n"], stdin = subprocess.PIPE, stdout = subprocess.PIPE) # data = process.communicate(input=data.encode("utf-8"))[0].decode("utf-8") # whichever of these is the first to match a substring of the text is used to create the token - priority_order = [whitespace_nfa, comment, integer, parens, bracket, brace, mathbinop, mathunop, unop, semicolon, keyword, assign, relop, string, identifier] + priority_order = [whitespace_nfa, comment, integer, parens, bracket, brace, mathbinop, mathunop, unop, semicolon, colon, comma, keyword, assign, relop, string, identifier] done = [] data_ptr = 0 while data_ptr < len(data): # loop until we've read the whole input string diff --git a/parser.py b/parser.py index c03b8ae..54e1633 100644 --- a/parser.py +++ b/parser.py @@ -66,3 +66,96 @@ grammar = { p = parser(grammar) print(p.parse("(10 + (99 * 44))*3 + 1231")) + + +import sys +sys.exit(0) + + + +grammar = { + "program": [ + [["nonterminal", "class"], ["terminal", ["semicolon"]], ["nonterminal", "program"]], + [["nonterminal", "class"]], + ], + "class": [ + [ + ["terminal", ["keyword", "class"]], + ["terminal", ["identifier"]], + ["terminal", ["keyword", "inherits"]], + ["terminal", ["identifier"]], + ["terminal", ["braces", "{"]], + ["nonterminal", "feature;+"], + ["terminal", ["braces", "}"]] + ], + [ + ["terminal", ["keyword", "class"]], + ["terminal", ["identifier"]], + ["terminal", ["braces", "{"]], + ["nonterminal", "feature;+"], + ["terminal", ["braces", "}"]] + ] + ], + "feature;+": [ + [ + ["terminal", ["semicolon"]] + ], + [ + ["nonterminal", "feature"], + ["terminal", ["semicolon"]], + ["nonterminal", ["feature;+"]] + ] + ], + "feature": [ + [ + ["terminal", ["identifier"]], + ["terminal", ["parens", "("]], + ["terminal", ["parens", ")"]], + ["terminal", ["colon"]], + ["terminal", ["identifier"]], + ["terminal", ["braces", "{"]], + ["nonterminal", "expr"], + ["terminal", ["braces", "}"]] + ], + [ + ["terminal", ["identifier"]], + ["terminal", ["parens", "("]], + ["nonterminal", "formal,+"], + ["terminal", ["parens", ")"]], + ["terminal", ["colon"]], + ["terminal", ["identifier"]], + ["terminal", ["braces", "{"]], + ["nonterminal", "expr"], + ["terminal", ["braces", "}"]] + ], + [ + ["terminal", ["identifier"]], + ["terminal", ["colon"]], + ["terminal", ["identifier"]], + ["terminal", ["assign"]], + ["nonterminal", "expr"], + ], + [ + ["terminal", ["identifier"]], + ["terminal", ["colon"]], + ["terminal", ["identifier"]], + ] + ], + "formal": [ + [ + ["terminal", ["identifier"]], + ["terminal", ["colon"]], + ["terminal", ["identifier"]], + ] + ], + "formal,+": [ + [ + ["nonterminal", "formal"], + ["terminal", ["comma"]], + ["nonterminal", "formal,+"] + ], + [ + ["nonterminal", "formal"] + ] + ] +}