diff --git a/strings/rabin_karp.py b/strings/rabin_karp.py index 04a849266ead..9fae48abd6fd 100644 --- a/strings/rabin_karp.py +++ b/strings/rabin_karp.py @@ -1,6 +1,11 @@ +# Numbers of alphabet which we call base +alphabet_size = 256 +# Modulus to hash a string +modulus = 1000003 + + def rabin_karp(pattern, text): """ - The Rabin-Karp Algorithm for finding a pattern within a piece of text with complexity O(nm), most efficient when it is used with multiple patterns as it is able to check if any of a set of patterns match a section of text in o(1) given the precomputed hashes. @@ -12,22 +17,38 @@ def rabin_karp(pattern, text): 2) Step through the text one character at a time passing a window with the same length as the pattern calculating the hash of the text within the window compare it with the hash of the pattern. Only testing equality if the hashes match - """ p_len = len(pattern) - p_hash = hash(pattern) + t_len = len(text) + if p_len > t_len: + return False + + p_hash = 0 + text_hash = 0 + modulus_power = 1 - for i in range(0, len(text) - (p_len - 1)): + # Calculating the hash of pattern and substring of text + for i in range(p_len): + p_hash = (ord(pattern[i]) + p_hash * alphabet_size) % modulus + text_hash = (ord(text[i]) + text_hash * alphabet_size) % modulus + if i == p_len - 1: + continue + modulus_power = (modulus_power * alphabet_size) % modulus - # written like this t - text_hash = hash(text[i:i + p_len]) - if text_hash == p_hash and \ - text[i:i + p_len] == pattern: + for i in range(0, t_len - p_len + 1): + if text_hash == p_hash and text[i : i + p_len] == pattern: return True + if i == t_len - p_len: + continue + # Calculating the ruling hash + text_hash = ( + (text_hash - ord(text[i]) * modulus_power) * alphabet_size + + ord(text[i + p_len]) + ) % modulus return False -if __name__ == '__main__': +if __name__ == "__main__": # Test 1) pattern = "abc1abc12" text1 = "alskfjaldsabc1abc1abc12k23adsfabcabc"