From 837b302d0f57a10f9f423d0aeaea244f37adf231 Mon Sep 17 00:00:00 2001 From: Niles Rogoff Date: Sat, 25 Mar 2017 14:55:36 -0400 Subject: [PATCH] Parser doesn't use global variables --- nfa.py | 10 +++++-- parser.py | 86 ++++++++++++++++++++++++++++++++----------------------- 2 files changed, 58 insertions(+), 38 deletions(-) diff --git a/nfa.py b/nfa.py index b7b58a7..d20b9c0 100644 --- a/nfa.py +++ b/nfa.py @@ -126,7 +126,7 @@ def match(f, inp): return True return False - +# takes a list of fields and returns a new field with all of the lists fields concatenated together def list_to_field(l): if len(l) == 0: # this base case shouldn't be hit unless you have an empty regex or start your regex with a + or something # all it does is make a field with one terminal node in it @@ -141,6 +141,11 @@ def list_to_field(l): for k in l[1:]: final = concatenate(final, k) return final + +# used for ? +# start -> (passed field) --ε--> (node) <- that one is terminal +# \_______________ε____/ +# adds an ε move to the passed field's start node to the new end node, as well as adding it to each of passed field's terminal nodes def zero_or_one(f): f2 = field() f2.nodes = f.nodes @@ -154,6 +159,7 @@ def zero_or_one(f): f2.nodes.add(n) f2.start.moves['ε'].add(n) return f2 +# takes a regex like "ab(cd*)+f" and makes an nfa field out of it def compile(regex): to_concat = [] # empty list of things to concatenate inparens = False # parenthesis parsing stuff @@ -178,7 +184,7 @@ def compile(regex): ret = either(list_to_field(to_concat), compile(regex[i+1:])) ret.orig = regex return ret - elif regex[i] == "?": # COMPLETELY UNTESTED + elif regex[i] == "?": to_concat[-1] = zero_or_one(to_concat[-1]) else: # if we just found a regular character, add it to the stuff to concatenate to_concat.append(build_from_char(regex[i])) diff --git a/parser.py b/parser.py index d4ccf24..651adf2 100644 --- a/parser.py +++ b/parser.py @@ -1,17 +1,54 @@ import sys sys.path.append(".") import lexer -def term(t_type, literal = False): - global tokens_ptr, tokens - this_token = tokens[tokens_ptr] - tokens_ptr += 1 - print("attempting to match '" + str(literal) + "' ("+t_type+") to " + this_token.matched_string + " at position " + str(tokens_ptr - 1)) - if t_type != this_token.type: - return False - if not literal: - return True - return literal == this_token.matched_string +# I know I really shouldn't be using a class for this but tokens and tokens_ptr were global variables and I didn't feel like rewriting the whole thing to not use global variables, so now they're technically instance variables + +class parser(): + def __init__(self, grammar): + self.grammar = grammar + def parse(self, inp): + self.tokens = lexer.lex(inp) + self.tokens = [t for t in self.tokens if not t.type == "whitespace_nfa"] + self.tokens_ptr = 0 + return self.match_nterm(self.grammar["start"]) + + def term(self, t_type, literal = False): + this_token = self.tokens[self.tokens_ptr] + self.tokens_ptr += 1 + print("attempting to match '" + str(literal) + "' ("+t_type+") to " + this_token.matched_string + " at position " + str(self.tokens_ptr - 1)) + if t_type != this_token.type: + return False + if not literal: + return True + return literal == this_token.matched_string + + def match_syms(self, syms): + # return term(a) and term(b) and term(c) + for sym in syms: + if not self.match_sym(sym): + return False + return True + + def match_sym(self, sym): + if sym[0] == "terminal": + return self.term(*(sym[1])) + return self.match_nterm(sym[1]) + + def match_nterm(self, nterm): + save = self.tokens_ptr + for f in grammar[nterm]: + self.tokens_ptr = save + if self.match_syms(f): + return True + return False + +# Our productions for this context-free grammar +# E -> T + E +# | T +# T -> int * T +# | int +# | ( E ) grammar = { "e": [ [["nonterminal", "t"], ["terminal", ["mathbinop", "+"]], ["nonterminal", "e"]], @@ -22,31 +59,8 @@ grammar = { [["terminal", ["integer"]]], [["terminal", ["parens", "("]], ["nonterminal", "e"], ["terminal", ["parens", ")"]]], ], - "order": ["e", "t"] + "start": "e", } -def match_syms(syms): - # return term(a) and term(b) and term(c) - for sym in syms: - if not match_sym(sym): - return False - return True - -def match_sym(sym): - if sym[0] == "terminal": - return term(*(sym[1])) - return match_nterm(sym[1]) - -def match_nterm(nterm): - global tokens_ptr - save = tokens_ptr - for f in grammar[nterm]: - tokens_ptr = save - if match_syms(f): - return True - return False - -tokens = lexer.lex("(10+1)*3") -tokens_ptr = 0 -print(match_nterm("e")) -print(tokens_ptr) +p = parser(grammar) +print(p.parse("(10 + (99 * 44))*3 + 1231"))