Parser doesn't use global variables
This commit is contained in:
parent
d913f0f62b
commit
837b302d0f
10
nfa.py
10
nfa.py
|
@ -126,7 +126,7 @@ def match(f, inp):
|
|||
return True
|
||||
return False
|
||||
|
||||
|
||||
# takes a list of fields and returns a new field with all of the lists fields concatenated together
|
||||
def list_to_field(l):
|
||||
if len(l) == 0: # this base case shouldn't be hit unless you have an empty regex or start your regex with a + or something
|
||||
# all it does is make a field with one terminal node in it
|
||||
|
@ -141,6 +141,11 @@ def list_to_field(l):
|
|||
for k in l[1:]:
|
||||
final = concatenate(final, k)
|
||||
return final
|
||||
|
||||
# used for ?
|
||||
# start -> (passed field) --ε--> (node) <- that one is terminal
|
||||
# \_______________ε____/
|
||||
# adds an ε move to the passed field's start node to the new end node, as well as adding it to each of passed field's terminal nodes
|
||||
def zero_or_one(f):
|
||||
f2 = field()
|
||||
f2.nodes = f.nodes
|
||||
|
@ -154,6 +159,7 @@ def zero_or_one(f):
|
|||
f2.nodes.add(n)
|
||||
f2.start.moves['ε'].add(n)
|
||||
return f2
|
||||
# takes a regex like "ab(cd*)+f" and makes an nfa field out of it
|
||||
def compile(regex):
|
||||
to_concat = [] # empty list of things to concatenate
|
||||
inparens = False # parenthesis parsing stuff
|
||||
|
@ -178,7 +184,7 @@ def compile(regex):
|
|||
ret = either(list_to_field(to_concat), compile(regex[i+1:]))
|
||||
ret.orig = regex
|
||||
return ret
|
||||
elif regex[i] == "?": # COMPLETELY UNTESTED
|
||||
elif regex[i] == "?":
|
||||
to_concat[-1] = zero_or_one(to_concat[-1])
|
||||
else: # if we just found a regular character, add it to the stuff to concatenate
|
||||
to_concat.append(build_from_char(regex[i]))
|
||||
|
|
76
parser.py
76
parser.py
|
@ -1,17 +1,54 @@
|
|||
import sys
|
||||
sys.path.append(".")
|
||||
import lexer
|
||||
def term(t_type, literal = False):
|
||||
global tokens_ptr, tokens
|
||||
this_token = tokens[tokens_ptr]
|
||||
tokens_ptr += 1
|
||||
print("attempting to match '" + str(literal) + "' ("+t_type+") to " + this_token.matched_string + " at position " + str(tokens_ptr - 1))
|
||||
|
||||
# I know I really shouldn't be using a class for this but tokens and tokens_ptr were global variables and I didn't feel like rewriting the whole thing to not use global variables, so now they're technically instance variables
|
||||
|
||||
class parser():
|
||||
def __init__(self, grammar):
|
||||
self.grammar = grammar
|
||||
def parse(self, inp):
|
||||
self.tokens = lexer.lex(inp)
|
||||
self.tokens = [t for t in self.tokens if not t.type == "whitespace_nfa"]
|
||||
self.tokens_ptr = 0
|
||||
return self.match_nterm(self.grammar["start"])
|
||||
|
||||
def term(self, t_type, literal = False):
|
||||
this_token = self.tokens[self.tokens_ptr]
|
||||
self.tokens_ptr += 1
|
||||
print("attempting to match '" + str(literal) + "' ("+t_type+") to " + this_token.matched_string + " at position " + str(self.tokens_ptr - 1))
|
||||
if t_type != this_token.type:
|
||||
return False
|
||||
if not literal:
|
||||
return True
|
||||
return literal == this_token.matched_string
|
||||
|
||||
def match_syms(self, syms):
|
||||
# return term(a) and term(b) and term(c)
|
||||
for sym in syms:
|
||||
if not self.match_sym(sym):
|
||||
return False
|
||||
return True
|
||||
|
||||
def match_sym(self, sym):
|
||||
if sym[0] == "terminal":
|
||||
return self.term(*(sym[1]))
|
||||
return self.match_nterm(sym[1])
|
||||
|
||||
def match_nterm(self, nterm):
|
||||
save = self.tokens_ptr
|
||||
for f in grammar[nterm]:
|
||||
self.tokens_ptr = save
|
||||
if self.match_syms(f):
|
||||
return True
|
||||
return False
|
||||
|
||||
# Our productions for this context-free grammar
|
||||
# E -> T + E
|
||||
# | T
|
||||
# T -> int * T
|
||||
# | int
|
||||
# | ( E )
|
||||
grammar = {
|
||||
"e": [
|
||||
[["nonterminal", "t"], ["terminal", ["mathbinop", "+"]], ["nonterminal", "e"]],
|
||||
|
@ -22,31 +59,8 @@ grammar = {
|
|||
[["terminal", ["integer"]]],
|
||||
[["terminal", ["parens", "("]], ["nonterminal", "e"], ["terminal", ["parens", ")"]]],
|
||||
],
|
||||
"order": ["e", "t"]
|
||||
"start": "e",
|
||||
}
|
||||
|
||||
def match_syms(syms):
|
||||
# return term(a) and term(b) and term(c)
|
||||
for sym in syms:
|
||||
if not match_sym(sym):
|
||||
return False
|
||||
return True
|
||||
|
||||
def match_sym(sym):
|
||||
if sym[0] == "terminal":
|
||||
return term(*(sym[1]))
|
||||
return match_nterm(sym[1])
|
||||
|
||||
def match_nterm(nterm):
|
||||
global tokens_ptr
|
||||
save = tokens_ptr
|
||||
for f in grammar[nterm]:
|
||||
tokens_ptr = save
|
||||
if match_syms(f):
|
||||
return True
|
||||
return False
|
||||
|
||||
tokens = lexer.lex("(10+1)*3")
|
||||
tokens_ptr = 0
|
||||
print(match_nterm("e"))
|
||||
print(tokens_ptr)
|
||||
p = parser(grammar)
|
||||
print(p.parse("(10 + (99 * 44))*3 + 1231"))
|
||||
|
|
Loading…
Reference in New Issue