|
| 1 | +""" |
| 2 | + * Execution: python nfa.py regexp text |
| 3 | + * |
| 4 | + * % python nfa.py "(A*B|AC)D" AAAABD |
| 5 | + * true |
| 6 | + * |
| 7 | + * % python nfa.py "(A*B|AC)D" AAAAC |
| 8 | + * false |
| 9 | + * |
| 10 | + * % python nfa.py "(a|(bc)*d)*" abcbcd |
| 11 | + * true |
| 12 | + * |
| 13 | + * % python nfa.py "(a|(bc)*d)*" abcbcbcdaaaabcbcdaaaddd |
| 14 | + * true |
| 15 | + * |
| 16 | + * Remarks |
| 17 | + * ----------- |
| 18 | + * The following features are not supported: |
| 19 | + * - The + operator |
| 20 | + * - Multiway or |
| 21 | + * - Metacharacters in the text |
| 22 | + * - Character classes. |
| 23 | + * |
| 24 | +""" |
| 25 | + |
| 26 | +from algs4.bag import Bag |
| 27 | +from algs4.digraph import Digraph |
| 28 | +from algs4.directed_dfs import DirectedDFS |
| 29 | +from algs4.stack import Stack |
| 30 | + |
| 31 | + |
| 32 | +class NFA: |
| 33 | + def __init__(self, regexp): |
| 34 | + ops = Stack() |
| 35 | + M = len(regexp) |
| 36 | + G = Digraph(M+1) |
| 37 | + for i in range(M): |
| 38 | + lp = i |
| 39 | + if regexp[i] == "(" or regexp[i] == "|": |
| 40 | + ops.push(i) |
| 41 | + elif regexp[i] == ")": |
| 42 | + op = ops.pop() |
| 43 | + if regexp[op] == "|": |
| 44 | + lp = ops.pop() |
| 45 | + G.add_edge(lp, op+1) |
| 46 | + G.add_edge(op, i) |
| 47 | + else: |
| 48 | + lp = op |
| 49 | + if i < M-1 and regexp[i+1] == "*": |
| 50 | + G.add_edge(lp, i+1) |
| 51 | + G.add_edge(i+1, lp) |
| 52 | + if regexp[i] in ("(", "*", ")"): |
| 53 | + G.add_edge(i, i+1) |
| 54 | + self.M = M |
| 55 | + self.G = G |
| 56 | + self.re = regexp |
| 57 | + |
| 58 | + def recognizes(self, txt): |
| 59 | + pc = Bag() |
| 60 | + dfs = DirectedDFS(self.G, [0]) |
| 61 | + for v in range(self.G.V): |
| 62 | + if dfs.marked(v): |
| 63 | + pc.add(v) |
| 64 | + for i in range(len(txt)): |
| 65 | + match = Bag() |
| 66 | + for v in pc: |
| 67 | + if v < self.M: |
| 68 | + if self.re[v] == txt[i] or self.re[v] == ".": |
| 69 | + match.add(v+1) |
| 70 | + pc = Bag() |
| 71 | + dfs = DirectedDFS(self.G, match) |
| 72 | + for v in range(self.G.V): |
| 73 | + if dfs.marked(v): |
| 74 | + pc.add(v) |
| 75 | + for v in pc: |
| 76 | + if v == self.M: |
| 77 | + return True |
| 78 | + return False |
| 79 | + |
| 80 | + def char_at(self, s, d): |
| 81 | + return ord(s[d]) |
| 82 | + |
| 83 | + |
| 84 | +if __name__ == "__main__": |
| 85 | + import sys |
| 86 | + pattern, txt = sys.argv[1], sys.argv[2] |
| 87 | + nfa = NFA(pattern) |
| 88 | + print(nfa.recognizes(txt)) |
0 commit comments