initial commit
This commit is contained in:
commit
26d83f08dc
|
@ -0,0 +1,198 @@
|
||||||
|
import random
|
||||||
|
epsilon_literal = 'ε'
|
||||||
|
class field():
|
||||||
|
def __init__(self):
|
||||||
|
self.nodes = set()
|
||||||
|
self.start = False
|
||||||
|
class nfa():
|
||||||
|
def __init__(self):
|
||||||
|
self.terminal = False
|
||||||
|
self.moves = {'ε': set()}
|
||||||
|
self.id = random.randrange(1000)
|
||||||
|
|
||||||
|
def build_from_char(c):
|
||||||
|
base = nfa()
|
||||||
|
end = nfa()
|
||||||
|
end.terminal = True
|
||||||
|
base.moves[c] = end
|
||||||
|
f = field()
|
||||||
|
f.start = base
|
||||||
|
f.nodes = set([base, end])
|
||||||
|
return f
|
||||||
|
def iterate(f):
|
||||||
|
new_f = field()
|
||||||
|
outer_start = nfa()
|
||||||
|
outer_end = nfa()
|
||||||
|
for node in f.nodes:
|
||||||
|
if node.terminal:
|
||||||
|
node.moves['ε'].add(outer_end)
|
||||||
|
node.moves['ε'].add(outer_start)
|
||||||
|
node.terminal = False
|
||||||
|
for k in f.nodes:
|
||||||
|
new_f.nodes.add(k)
|
||||||
|
new_f.nodes.add(outer_start)
|
||||||
|
new_f.nodes.add(outer_end)
|
||||||
|
outer_end.terminal = True
|
||||||
|
outer_start.moves['ε'] = set([f.start, outer_end])
|
||||||
|
new_f.start = outer_start
|
||||||
|
return new_f
|
||||||
|
def either(a, b):
|
||||||
|
f = field()
|
||||||
|
f.nodes = a.nodes
|
||||||
|
for k in b.nodes:
|
||||||
|
f.nodes.add(k)
|
||||||
|
new_start = nfa()
|
||||||
|
new_start.moves['ε'].add(a.start)
|
||||||
|
new_start.moves['ε'].add(b.start)
|
||||||
|
new_end = nfa()
|
||||||
|
new_end.terminal = True
|
||||||
|
for node in union(a.nodes, b.nodes):
|
||||||
|
if node.terminal:
|
||||||
|
node.terminal = False
|
||||||
|
node.moves['ε'].add(new_end)
|
||||||
|
f.start = new_start
|
||||||
|
f.nodes.add(new_start)
|
||||||
|
f.nodes.add(new_end)
|
||||||
|
return f
|
||||||
|
def concatenate(a, b):
|
||||||
|
f = field()
|
||||||
|
f.start = a.start
|
||||||
|
for node in a.nodes:
|
||||||
|
f.nodes.add(node)
|
||||||
|
if node.terminal:
|
||||||
|
node.terminal = False
|
||||||
|
node.moves['ε'].add(b.start)
|
||||||
|
for node in b.nodes:
|
||||||
|
f.nodes.add(node)
|
||||||
|
return f
|
||||||
|
|
||||||
|
def union(a, b):
|
||||||
|
r = set()
|
||||||
|
for e in a:
|
||||||
|
r.add(e)
|
||||||
|
for e in b:
|
||||||
|
r.add(e)
|
||||||
|
return r
|
||||||
|
|
||||||
|
# def add_epsilon_moves(s): # needs to be redone from scratch
|
||||||
|
# # new_s = set(s)
|
||||||
|
# new_s = set()
|
||||||
|
# for state in s:
|
||||||
|
# new_s.add(state)
|
||||||
|
# for possible_move in state.moves['ε']:
|
||||||
|
# #new_s.add(possible_move)
|
||||||
|
# #new_s = union(new_s, add_epsilon_moves(set([possible_move])))
|
||||||
|
# for m in add_epsilon_moves(set([possible_move])):
|
||||||
|
# new_s.add(m)
|
||||||
|
# return new_s
|
||||||
|
def add_epsilon_moves(s, ignore=set()): # needs to be redone from scratch
|
||||||
|
new_s = set(s)
|
||||||
|
if len(ignore) == 0: ignore = new_s
|
||||||
|
for state in s:
|
||||||
|
# if state in ignore:
|
||||||
|
# continue
|
||||||
|
for possible_move in state.moves['ε']:
|
||||||
|
if possible_move in ignore:
|
||||||
|
continue
|
||||||
|
for m in add_epsilon_moves(set([possible_move]), union([possible_move], ignore)):
|
||||||
|
new_s.add(m)
|
||||||
|
return new_s
|
||||||
|
|
||||||
|
def compute(f, inp):
|
||||||
|
states = set([f.start])
|
||||||
|
idx = 0
|
||||||
|
while idx < len(inp):
|
||||||
|
# print(states)
|
||||||
|
states = add_epsilon_moves(states)
|
||||||
|
# print(states)
|
||||||
|
# print()
|
||||||
|
new_states = set()
|
||||||
|
c = inp[idx]
|
||||||
|
for state in states:
|
||||||
|
for key, move in state.moves.items():
|
||||||
|
if key == c:
|
||||||
|
new_states.add(move)
|
||||||
|
states = new_states;
|
||||||
|
idx += 1
|
||||||
|
for state in states:
|
||||||
|
if state.terminal:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
full = concatenate(build_from_char('a'), build_from_char('b'))
|
||||||
|
full2 = concatenate(iterate(build_from_char('a')), build_from_char('b'))
|
||||||
|
|
||||||
|
|
||||||
|
# strings = ["aa", "ab", "ba", "bb", "aba", "aab", "aaab", "aaba", "b"]
|
||||||
|
# for s in strings:
|
||||||
|
# print("String " + s + " " + ("matches" if compute(full, s) else "does not match") + " pattern " + "ab")
|
||||||
|
# print()
|
||||||
|
# for s in strings:
|
||||||
|
# print("String " + s + " " + ("matches" if compute(full2, s) else "does not match") + " pattern " + "a*b")
|
||||||
|
|
||||||
|
#print(compute(full2, "aab"))
|
||||||
|
def addr(node):
|
||||||
|
#return str(node).split()[3].replace(">", "").replace("0x", "_")
|
||||||
|
return str(node.id)
|
||||||
|
def pmap(f):
|
||||||
|
print("digraph test {")
|
||||||
|
for node in f.nodes:
|
||||||
|
for char, move in node.moves.items():
|
||||||
|
if char == 'ε':
|
||||||
|
for m in move:
|
||||||
|
print(addr(node) + " -> " + addr(m) + " [label=epsilon]")
|
||||||
|
else:
|
||||||
|
print(addr(node) + " -> " + addr(move) + " [label="+char+"]")
|
||||||
|
if node.terminal:
|
||||||
|
print(addr(node) + " -> " + addr(node) + " [label=terminal]")
|
||||||
|
print(addr(f.start) + " -> " + addr(f.start) + " [label=start]")
|
||||||
|
print("}")
|
||||||
|
#pmap(full2)
|
||||||
|
#pmap(iterate(a))
|
||||||
|
|
||||||
|
def list_to_field(l):
|
||||||
|
if len(l) == 0:
|
||||||
|
f = field()
|
||||||
|
n = nfa()
|
||||||
|
n.terminal = True
|
||||||
|
f.start = n
|
||||||
|
f.nodes = set([n])
|
||||||
|
return f
|
||||||
|
final = l[0]
|
||||||
|
for k in l[1:]:
|
||||||
|
final = concatenate(final, k)
|
||||||
|
return final
|
||||||
|
def build_from_regex(regex):
|
||||||
|
to_concat = []
|
||||||
|
|
||||||
|
inparens = False
|
||||||
|
for i in range(len(regex)):
|
||||||
|
if inparens:
|
||||||
|
if regex[i] == ")":
|
||||||
|
if inparens == 1:
|
||||||
|
to_concat.append(build_from_regex(subregex)) # FIX
|
||||||
|
inparens = False
|
||||||
|
else:
|
||||||
|
inparens -= 1
|
||||||
|
if regex[i] == "(":
|
||||||
|
inparens += 1
|
||||||
|
subregex += regex[i]
|
||||||
|
elif regex[i] == "(":
|
||||||
|
inparens = 1
|
||||||
|
subregex = ""
|
||||||
|
continue
|
||||||
|
elif regex[i] == "*":
|
||||||
|
to_concat[-1] = iterate(to_concat[-1])
|
||||||
|
elif regex[i] == "+":
|
||||||
|
return either(list_to_field(to_concat), build_from_regex(regex[i+1:])) # kind of a hack and gives + the highest possible operator precedence
|
||||||
|
else:
|
||||||
|
to_concat.append(build_from_char(regex[i]))
|
||||||
|
return list_to_field(to_concat)
|
||||||
|
#pmap(build_from_regex("ab(c1+2d(e*f)d)*e"))
|
||||||
|
#pmap(either(build_from_char('a'), build_from_char('b')))
|
||||||
|
# x = build_from_regex("(1+0)*1")
|
||||||
|
# #pmap(x)
|
||||||
|
# for s in ["101", "111110", "11001", "1", "0"]:
|
||||||
|
# print(compute(x, s))
|
||||||
|
x = build_from_regex("a+b+c")
|
||||||
|
pmap(x)
|
Loading…
Reference in New Issue