-
Notifications
You must be signed in to change notification settings - Fork 39
/
Copy pathrabin_karp.py
55 lines (47 loc) · 1.51 KB
/
rabin_karp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/python
# Date: 2020-08-14
#
# Description:
# Rabin karp algo to find all occurrences of a pattern string from a large text
# string.
#
# Approach:
# Idea behind rabin karp is a rolling hash is used which can be used to
# compute new hash from existing hash when a new character is added at end and
# an existing character is removed from beginning.
#
# Reference:
# http://www-igm.univ-mlv.fr/~lecroq/string/node5.html#SECTION0050
#
# Complexity:
# Space: O(1)
# Time: Average case - O(n + m), Worst case - O(m*(n-m))
def rehash(prev_hash, first, last, d):
"""Returns new hash if first char removed and last char is added."""
return ((prev_hash - ord(first) * d) << 1) + ord(last)
def rabin_karp(txt, pat):
"""Returns list of indices in text string which matches given pattern."""
matches = []
hy = 0 # Hash of running text frame
hx = 0 # Hash of pattern
n = len(txt)
m = len(pat)
if m > n:
return []
d = 1 << m - 1
for i in range(m):
hx = (hx << 1) + ord(pat[i])
hy = (hy << 1) + ord(txt[i])
# Search
j = 0
while j <= n - m:
if hx == hy and pat == txt[j:j + m]:
matches.append(j)
if j < n - m:
hy = rehash(hy, txt[j], txt[j + m], d)
j += 1
return matches
assert rabin_karp('THIS IS A TEST TEXT', 'TEST') == [10]
assert rabin_karp('AABAACAADAABAABA', 'AABA') == [0, 9, 12]
assert rabin_karp('AAAAABAAABA', 'AAAA') == [0, 1]
assert rabin_karp('ABABDABACDABABCABAB', 'ABABCABAB') == [10]