|
| 1 | +""" |
| 2 | + * Execution: python kmp.py pattern text |
| 3 | + * |
| 4 | + * Reads in two strings, the pattern and the input text, and |
| 5 | + * searches for the pattern in the input text using the |
| 6 | + * KMP algorithm. |
| 7 | + * |
| 8 | + * % python kmp.py abracadabra abacadabrabracabracadabrabrabracad |
| 9 | + * text: abacadabrabracabracadabrabrabracad |
| 10 | + * pattern: abracadabra |
| 11 | + * |
| 12 | + * % python kmp.py rab abacadabrabracabracadabrabrabracad |
| 13 | + * text: abacadabrabracabracadabrabrabracad |
| 14 | + * pattern: rab |
| 15 | + * |
| 16 | + * % python kmp.py bcara abacadabrabracabracadabrabrabracad |
| 17 | + * text: abacadabrabracabracadabrabrabracad |
| 18 | + * pattern: bcara |
| 19 | + * |
| 20 | + * % python kmp.py rabrabracad abacadabrabracabracadabrabrabracad |
| 21 | + * text: abacadabrabracabracadabrabrabracad |
| 22 | + * pattern: rabrabracad |
| 23 | + * |
| 24 | + * % python kmp.py abacad abacadabrabracabracadabrabrabracad |
| 25 | + * text: abacadabrabracabracadabrabrabracad |
| 26 | + * pattern: abacad |
| 27 | + * |
| 28 | +""" |
| 29 | + |
| 30 | + |
| 31 | +class KMP: |
| 32 | + |
| 33 | + def __init__(self, pattern): |
| 34 | + self.pattern = pattern |
| 35 | + self.R = 256 |
| 36 | + M = len(pattern) |
| 37 | + self.dfa = [[0 for x in range(M)] for y in range(self.R)] |
| 38 | + self.dfa[self.char_at(pattern, 0)][0] = 1 |
| 39 | + X = 0 |
| 40 | + for j in range(1, M): |
| 41 | + for c in range(self.R): |
| 42 | + self.dfa[c][j] = self.dfa[c][X] |
| 43 | + self.dfa[self.char_at(pattern, j)][j] = j + 1 |
| 44 | + X = self.dfa[self.char_at(pattern, j)][X] |
| 45 | + |
| 46 | + def search(self, txt): |
| 47 | + N, M = len(txt), len(self.pattern) |
| 48 | + i, j = 0, 0 |
| 49 | + while i < N and j < M: |
| 50 | + j = self.dfa[self.char_at(txt, i)][j] |
| 51 | + i += 1 |
| 52 | + # Found (hit end of pattern) |
| 53 | + if j == M: |
| 54 | + return i - M |
| 55 | + # Not Found (hit end of text) |
| 56 | + return N |
| 57 | + |
| 58 | + def char_at(self, s, d): |
| 59 | + return ord(s[d]) |
| 60 | + |
| 61 | + |
| 62 | +if __name__ == "__main__": |
| 63 | + import sys |
| 64 | + pat, txt = sys.argv[1], sys.argv[2] |
| 65 | + kmp = KMP(pat) |
| 66 | + offset = kmp.search(txt) |
| 67 | + print("text: " + txt) |
| 68 | + print("pattern: ", end="") |
| 69 | + for i in range(offset): |
| 70 | + print(" ", end="") |
| 71 | + print(pat) |
0 commit comments