Skip to content

Commit 751b0aa

Browse files
committed
add nfa
1 parent 1be234e commit 751b0aa

File tree

3 files changed

+91
-2
lines changed

3 files changed

+91
-2
lines changed

README.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,8 @@ Try to keep the interface and variable name consistent with the original book wh
8585
* [TrieST](algs4/trie_st.py)
8686
* [TST](algs4/tst.py)
8787
* [KMP](algs4/kmp.py)
88-
88+
* [NFA](algs4/nfa.py)
89+
8990
## License
9091

9192
This code is released under MIT.

algs4/bag.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def __init__(self):
1919
self.n = 0
2020

2121
def __str__(self):
22-
return " ".join(i for i in self)
22+
return " ".join(str(i) for i in self)
2323

2424
def __iter__(self):
2525
return LinkIterator(self.first)

algs4/nfa.py

+88
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
"""
2+
* Execution: python nfa.py regexp text
3+
*
4+
* % python nfa.py "(A*B|AC)D" AAAABD
5+
* true
6+
*
7+
* % python nfa.py "(A*B|AC)D" AAAAC
8+
* false
9+
*
10+
* % python nfa.py "(a|(bc)*d)*" abcbcd
11+
* true
12+
*
13+
* % python nfa.py "(a|(bc)*d)*" abcbcbcdaaaabcbcdaaaddd
14+
* true
15+
*
16+
* Remarks
17+
* -----------
18+
* The following features are not supported:
19+
* - The + operator
20+
* - Multiway or
21+
* - Metacharacters in the text
22+
* - Character classes.
23+
*
24+
"""
25+
26+
from algs4.bag import Bag
27+
from algs4.digraph import Digraph
28+
from algs4.directed_dfs import DirectedDFS
29+
from algs4.stack import Stack
30+
31+
32+
class NFA:
33+
def __init__(self, regexp):
34+
ops = Stack()
35+
M = len(regexp)
36+
G = Digraph(M+1)
37+
for i in range(M):
38+
lp = i
39+
if regexp[i] == "(" or regexp[i] == "|":
40+
ops.push(i)
41+
elif regexp[i] == ")":
42+
op = ops.pop()
43+
if regexp[op] == "|":
44+
lp = ops.pop()
45+
G.add_edge(lp, op+1)
46+
G.add_edge(op, i)
47+
else:
48+
lp = op
49+
if i < M-1 and regexp[i+1] == "*":
50+
G.add_edge(lp, i+1)
51+
G.add_edge(i+1, lp)
52+
if regexp[i] in ("(", "*", ")"):
53+
G.add_edge(i, i+1)
54+
self.M = M
55+
self.G = G
56+
self.re = regexp
57+
58+
def recognizes(self, txt):
59+
pc = Bag()
60+
dfs = DirectedDFS(self.G, [0])
61+
for v in range(self.G.V):
62+
if dfs.marked(v):
63+
pc.add(v)
64+
for i in range(len(txt)):
65+
match = Bag()
66+
for v in pc:
67+
if v < self.M:
68+
if self.re[v] == txt[i] or self.re[v] == ".":
69+
match.add(v+1)
70+
pc = Bag()
71+
dfs = DirectedDFS(self.G, match)
72+
for v in range(self.G.V):
73+
if dfs.marked(v):
74+
pc.add(v)
75+
for v in pc:
76+
if v == self.M:
77+
return True
78+
return False
79+
80+
def char_at(self, s, d):
81+
return ord(s[d])
82+
83+
84+
if __name__ == "__main__":
85+
import sys
86+
pattern, txt = sys.argv[1], sys.argv[2]
87+
nfa = NFA(pattern)
88+
print(nfa.recognizes(txt))

0 commit comments

Comments
 (0)