KMP Algorithm — O(n+m) Pattern Matching with Failure Function
Advertisement
KMP Overview
Failure function lps[i] = length of longest proper prefix of pattern[0..i] that is also a suffix.
During search: when mismatch at pattern[j], jump to pattern[lps[j-1]] — don't reset to 0.
Solutions
Python
def kmp_search(text, pattern):
def build_lps(p):
lps = [0]*len(p)
length = 0
i = 1
while i < len(p):
if p[i] == p[length]:
length += 1
lps[i] = length
i += 1
elif length:
length = lps[length-1]
else:
lps[i] = 0
i += 1
return lps
lps = build_lps(pattern)
results = []
i = j = 0
while i < len(text):
if text[i] == pattern[j]:
i += 1; j += 1
if j == len(pattern):
results.append(i-j)
j = lps[j-1]
elif i < len(text) and text[i] != pattern[j]:
if j: j = lps[j-1]
else: i += 1
return results
JavaScript
function kmpSearch(text, pattern) {
const buildLPS=(p)=>{const lps=new Array(p.length).fill(0);let len=0,i=1;while(i<p.length){if(p[i]===p[len]){lps[i++]=++len;}else if(len){len=lps[len-1];}else lps[i++]=0;}return lps;};
const lps=buildLPS(pattern); const res=[]; let i=0,j=0;
while(i<text.length){
if(text[i]===pattern[j]){i++;j++;}
if(j===pattern.length){res.push(i-j);j=lps[j-1];}
else if(i<text.length&&text[i]!==pattern[j]){if(j)j=lps[j-1];else i++;}
}
return res;
}
Java
public List<Integer> kmpSearch(String text, String pat) {
int n=text.length(),m=pat.length();
int[] lps=new int[m]; int len=0,i=1;
while(i<m){if(pat.charAt(i)==pat.charAt(len))lps[i++]=++len;else if(len>0)len=lps[len-1];else lps[i++]=0;}
List<Integer>res=new ArrayList<>(); int j=0; i=0;
while(i<n){if(text.charAt(i)==pat.charAt(j)){i++;j++;}
if(j==m){res.add(i-j);j=lps[j-1];}
else if(i<n&&text.charAt(i)!=pat.charAt(j)){if(j>0)j=lps[j-1];else i++;}}
return res;
}
C++
#include <vector>
#include <string>
using namespace std;
vector<int> kmpSearch(string text,string pat){
int m=pat.size(); vector<int>lps(m,0); int len=0,i=1;
while(i<m){if(pat[i]==pat[len])lps[i++]=++len;else if(len)len=lps[len-1];else lps[i++]=0;}
vector<int>res; int j=0; i=0;
while(i<(int)text.size()){if(text[i]==pat[j]){i++;j++;}
if(j==m){res.push_back(i-j);j=lps[j-1];}
else if(i<(int)text.size()&&text[i]!=pat[j]){if(j)j=lps[j-1];else i++;}}
return res;
}
C
#include <string.h>
#include <stdlib.h>
void kmpSearch(char* text, char* pat, int* res, int* cnt) {
int n=strlen(text),m=strlen(pat);
int* lps=calloc(m,sizeof(int)); int len=0,i=1;
while(i<m){if(pat[i]==pat[len])lps[i++]=++len;else if(len)len=lps[len-1];else lps[i++]=0;}
int j=0; i=0; *cnt=0;
while(i<n){if(text[i]==pat[j]){i++;j++;}
if(j==m){res[(*cnt)++]=i-j;j=lps[j-1];}
else if(i<n&&text[i]!=pat[j]){if(j)j=lps[j-1];else i++;}}
free(lps);
}
Application: Repeated Substring Pattern
def repeatedSubstringPattern(s):
lps = [0]*len(s)
length = 0; i = 1
while i < len(s):
if s[i] == s[length]: length += 1; lps[i] = length; i += 1
elif length: length = lps[length-1]
else: lps[i] = 0; i += 1
n = len(s); l = lps[-1]
return l > 0 and n % (n-l) == 0
Advertisement