KMP Algorithm — O(n+m) Pattern Matching with Failure Function

Sanjeev SharmaSanjeev Sharma
2 min read

Advertisement

KMP Overview

Failure function lps[i] = length of longest proper prefix of pattern[0..i] that is also a suffix.

During search: when mismatch at pattern[j], jump to pattern[lps[j-1]] — don't reset to 0.


Solutions

Python

def kmp_search(text, pattern):
    def build_lps(p):
        lps = [0]*len(p)
        length = 0
        i = 1
        while i < len(p):
            if p[i] == p[length]:
                length += 1
                lps[i] = length
                i += 1
            elif length:
                length = lps[length-1]
            else:
                lps[i] = 0
                i += 1
        return lps

    lps = build_lps(pattern)
    results = []
    i = j = 0
    while i < len(text):
        if text[i] == pattern[j]:
            i += 1; j += 1
        if j == len(pattern):
            results.append(i-j)
            j = lps[j-1]
        elif i < len(text) and text[i] != pattern[j]:
            if j: j = lps[j-1]
            else: i += 1
    return results

JavaScript

function kmpSearch(text, pattern) {
    const buildLPS=(p)=>{const lps=new Array(p.length).fill(0);let len=0,i=1;while(i<p.length){if(p[i]===p[len]){lps[i++]=++len;}else if(len){len=lps[len-1];}else lps[i++]=0;}return lps;};
    const lps=buildLPS(pattern); const res=[]; let i=0,j=0;
    while(i<text.length){
        if(text[i]===pattern[j]){i++;j++;}
        if(j===pattern.length){res.push(i-j);j=lps[j-1];}
        else if(i<text.length&&text[i]!==pattern[j]){if(j)j=lps[j-1];else i++;}
    }
    return res;
}

Java

public List<Integer> kmpSearch(String text, String pat) {
    int n=text.length(),m=pat.length();
    int[] lps=new int[m]; int len=0,i=1;
    while(i<m){if(pat.charAt(i)==pat.charAt(len))lps[i++]=++len;else if(len>0)len=lps[len-1];else lps[i++]=0;}
    List<Integer>res=new ArrayList<>(); int j=0; i=0;
    while(i<n){if(text.charAt(i)==pat.charAt(j)){i++;j++;}
        if(j==m){res.add(i-j);j=lps[j-1];}
        else if(i<n&&text.charAt(i)!=pat.charAt(j)){if(j>0)j=lps[j-1];else i++;}}
    return res;
}

C++

#include <vector>
#include <string>
using namespace std;
vector<int> kmpSearch(string text,string pat){
    int m=pat.size(); vector<int>lps(m,0); int len=0,i=1;
    while(i<m){if(pat[i]==pat[len])lps[i++]=++len;else if(len)len=lps[len-1];else lps[i++]=0;}
    vector<int>res; int j=0; i=0;
    while(i<(int)text.size()){if(text[i]==pat[j]){i++;j++;}
        if(j==m){res.push_back(i-j);j=lps[j-1];}
        else if(i<(int)text.size()&&text[i]!=pat[j]){if(j)j=lps[j-1];else i++;}}
    return res;
}

C

#include <string.h>
#include <stdlib.h>
void kmpSearch(char* text, char* pat, int* res, int* cnt) {
    int n=strlen(text),m=strlen(pat);
    int* lps=calloc(m,sizeof(int)); int len=0,i=1;
    while(i<m){if(pat[i]==pat[len])lps[i++]=++len;else if(len)len=lps[len-1];else lps[i++]=0;}
    int j=0; i=0; *cnt=0;
    while(i<n){if(text[i]==pat[j]){i++;j++;}
        if(j==m){res[(*cnt)++]=i-j;j=lps[j-1];}
        else if(i<n&&text[i]!=pat[j]){if(j)j=lps[j-1];else i++;}}
    free(lps);
}

Application: Repeated Substring Pattern

def repeatedSubstringPattern(s):
    lps = [0]*len(s)
    length = 0; i = 1
    while i < len(s):
        if s[i] == s[length]: length += 1; lps[i] = length; i += 1
        elif length: length = lps[length-1]
        else: lps[i] = 0; i += 1
    n = len(s); l = lps[-1]
    return l > 0 and n % (n-l) == 0

Advertisement

Sanjeev Sharma

Written by

Sanjeev Sharma

Full Stack Engineer · E-mopro