updated lexer definitions
This commit is contained in:
parent
bda45492f8
commit
852c3a30b0
15
lexer.py
15
lexer.py
|
@ -5,11 +5,18 @@ digit = '+'.join("0123456789")
|
|||
letter = '+'.join(string.ascii_letters) + "+_"
|
||||
upper = '+'.join(string.ascii_uppercase)
|
||||
lower = '+'.join(string.ascii_lowercase)
|
||||
any_char = digit + "+" + letter
|
||||
whitespace = " +\n+\f+\r+\t+\v"
|
||||
any_char = digit + "+" + letter + "+" + whitespace
|
||||
any_string = "("+any_char+")*"
|
||||
|
||||
integer = nfa.compile("(" + digit + ")*")
|
||||
identifier = nfa.compile("(" + letter + ")(" + any_char + ")*") # letter followed by any number of letters or digits
|
||||
string = nfa.compile("(\"(" + any_char + ")*\")" + "('("+any_char+")*')")
|
||||
integer = nfa.compile("-?(" + digit + ")*") # may or may not be negative
|
||||
identifier = nfa.compile("(" + letter + ")(" + letter + "+" + digit + ")*") # letter followed by any number of letters or digits
|
||||
string = nfa.compile("(\"(" + any_string + ")\")+('("+any_string+")')")
|
||||
comment = nfa.compile("--(" + any_char + ")*")
|
||||
keyword = nfa.compile("+".join(["class", "else", "false", "fi", "if", "in", "inherits", "isvoid", "let", "loop", "pool", "then", "while", "case", "esac", "new", "of", "not", "true"]))
|
||||
assign = nfa.compile("<-")
|
||||
relop = nfa.compile("+".join(["<", "<=", ">", ">=", "=", "<>"]))
|
||||
|
||||
# print("Integer's regex was " + integer.orig)
|
||||
# print("string's regex was " + string.orig)
|
||||
#nfa.pmap(identifier)
|
||||
|
|
20
nfa.py
20
nfa.py
|
@ -139,6 +139,19 @@ def list_to_field(l):
|
|||
for k in l[1:]:
|
||||
final = concatenate(final, k)
|
||||
return final
|
||||
def zero_or_more(f):
|
||||
f2 = field()
|
||||
f2.nodes = f.nodes
|
||||
f2.start = f.start
|
||||
n = nfa()
|
||||
n.terminal = True
|
||||
for k in f.nodes:
|
||||
if k.terminal:
|
||||
k.terminal = False
|
||||
k.moves['ε'].add(n)
|
||||
f2.nodes.add(n)
|
||||
f2.start.moves['ε'].add(n)
|
||||
return f2
|
||||
def compile(regex):
|
||||
to_concat = [] # empty list of things to concatenate
|
||||
inparens = False # parenthesis parsing stuff
|
||||
|
@ -161,9 +174,13 @@ def compile(regex):
|
|||
to_concat[-1] = iterate(to_concat[-1])
|
||||
elif regex[i] == "+": # kind of a hack and gives + the highest possible operator precedence
|
||||
return either(list_to_field(to_concat), compile(regex[i+1:]))
|
||||
elif regex[i] == "?": # COMPLETELY UNTESTED
|
||||
to_concat.append(zero_or_more(to_concat[-1]))
|
||||
else: # if we just found a regular character, add it to the stuff to concatenate
|
||||
to_concat.append(build_from_char(regex[i]))
|
||||
return list_to_field(to_concat)
|
||||
ret = list_to_field(to_concat)
|
||||
ret.orig = regex
|
||||
return ret
|
||||
|
||||
def addr(node): # this used to be a hack that would print the memory address of the node starting with a _ so graphviz didn't split it at the end of the number
|
||||
return str(node.id)
|
||||
|
@ -206,6 +223,7 @@ def pmap(f): # prints out the passed field in a way that dot can compile to an s
|
|||
|
||||
#print(match(full2, "aab"))
|
||||
#pmap(full2)
|
||||
#pmap(zero_or_more(build_from_char("a")))
|
||||
|
||||
#pmap(compile("ab(c1+2d(e*f)d)*e"))
|
||||
#pmap(either(build_from_char('a'), build_from_char('b')))
|
||||
|
|
Loading…
Reference in New Issue