Longest Common Substring — DP and Rolling Hash Approaches

Sanjeev SharmaSanjeev Sharma
2 min read

Advertisement

Problem

Find the longest string that appears as a contiguous substring in both s1 and s2.


Solutions

Python — DP O(nm)

def longestCommonSubstring(s1, s2):
    m, n = len(s1), len(s2)
    dp = [[0]*(n+1) for _ in range(m+1)]
    best = start = 0
    for i in range(1, m+1):
        for j in range(1, n+1):
            if s1[i-1] == s2[j-1]:
                dp[i][j] = dp[i-1][j-1] + 1
                if dp[i][j] > best:
                    best = dp[i][j]
                    start = i-best
    return s1[start:start+best]

Python — Binary Search + Hash O((n+m) log(n+m))

def longestCommonSubstring(s1, s2):
    MOD = (1<<61)-1; BASE = 131

    def get_hashes(s, length):
        h = pw = 0
        hset = set()
        for i,c in enumerate(s):
            h = (h*BASE + ord(c)) % MOD
            if i < length-1: pw = (pw*BASE+1)%MOD if pw else 1
            if i >= length:
                h = (h - ord(s[i-length])*pow(BASE,length,MOD)) % MOD
            if i >= length-1: hset.add(h)
        return hset

    lo, hi = 0, min(len(s1), len(s2))
    res = ""
    while lo <= hi:
        mid = (lo+hi)//2
        h1 = get_hashes(s1, mid)
        h2 = get_hashes(s2, mid)
        if h1 & h2:
            res_len = mid; lo = mid+1
        else: hi = mid-1
    return res

JavaScript

function longestCommonSubstring(s1,s2){
    const m=s1.length,n=s2.length,dp=Array.from({length:m+1},()=>new Array(n+1).fill(0));
    let best=0,start=0;
    for(let i=1;i<=m;i++)for(let j=1;j<=n;j++)if(s1[i-1]===s2[j-1]){dp[i][j]=dp[i-1][j-1]+1;if(dp[i][j]>best){best=dp[i][j];start=i-best;}}
    return s1.slice(start,start+best);
}

Java

public String longestCommonSubstring(String s1,String s2){
    int m=s1.length(),n=s2.length(); int[][]dp=new int[m+1][n+1]; int best=0,start=0;
    for(int i=1;i<=m;i++)for(int j=1;j<=n;j++)if(s1.charAt(i-1)==s2.charAt(j-1)){dp[i][j]=dp[i-1][j-1]+1;if(dp[i][j]>best){best=dp[i][j];start=i-best;}}
    return s1.substring(start,start+best);
}

C++

#include <string>
#include <vector>
using namespace std;
string longestCommonSubstring(string s1,string s2){
    int m=s1.size(),n=s2.size(); vector<vector<int>>dp(m+1,vector<int>(n+1,0)); int best=0,start=0;
    for(int i=1;i<=m;i++)for(int j=1;j<=n;j++)if(s1[i-1]==s2[j-1]){dp[i][j]=dp[i-1][j-1]+1;if(dp[i][j]>best){best=dp[i][j];start=i-best;}}
    return s1.substr(start,best);
}

C

/* C: 2D DP array, O(nm) time and space */

Advertisement

Sanjeev Sharma

Written by

Sanjeev Sharma

Full Stack Engineer · E-mopro