String Hashing — Polynomial Hash for O(1) Substring Comparison
Advertisement
Polynomial Hash
h[i] = s[0]*p^(i-1) + s[1]*p^(i-2) + ... + s[i-1]*p^0
Substring s[l..r]:
hash(l,r) = (h[r+1] - h[l]*p^(r-l+1)) mod MOD
Solutions
Python
class StringHash:
MOD = (1<<61)-1
BASE = 131
def __init__(self, s):
n = len(s)
self.h = [0]*(n+1)
self.pw = [1]*(n+1)
for i in range(n):
self.h[i+1] = (self.h[i]*self.BASE + ord(s[i])) % self.MOD
self.pw[i+1] = self.pw[i]*self.BASE % self.MOD
def get(self, l, r):
return (self.h[r+1] - self.h[l]*self.pw[r-l+1]) % self.MOD
JavaScript
class StringHash {
constructor(s,BASE=131,MOD=1e9+7){
this.MOD=MOD; this.BASE=BASE;
const n=s.length; this.h=new Array(n+1).fill(0); this.pw=new Array(n+1).fill(1);
for(let i=0;i<n;i++){this.h[i+1]=(this.h[i]*BASE+s.charCodeAt(i))%MOD;this.pw[i+1]=this.pw[i]*BASE%MOD;}
}
get(l,r){return((this.h[r+1]-this.h[l]*this.pw[r-l+1])%this.MOD+this.MOD)%this.MOD;}
}
Java
class StringHash {
long[] h,pw; long MOD=1_000_000_007L,BASE=131;
StringHash(String s){int n=s.length();h=new long[n+1];pw=new long[n+1];pw[0]=1;for(int i=0;i<n;i++){h[i+1]=(h[i]*BASE+s.charAt(i))%MOD;pw[i+1]=pw[i]*BASE%MOD;}}
long get(int l,int r){return((h[r+1]-h[l]*pw[r-l+1])%MOD+MOD)%MOD;}
}
C++
#include <vector>
#include <string>
using namespace std;
struct StringHash {
vector<long long>h,pw; long long MOD=1e9+7,BASE=131;
StringHash(string s):h(s.size()+1,0),pw(s.size()+1,1){
for(int i=0;i<(int)s.size();i++){h[i+1]=(h[i]*BASE+s[i])%MOD;pw[i+1]=pw[i]*BASE%MOD;}
}
long long get(int l,int r){return((h[r+1]-h[l]*pw[r-l+1])%MOD+MOD)%MOD;}
};
C
long long h[100001],pw[100001]; long long MOD=1000000007LL,BASE=131;
void build(char*s,int n){pw[0]=1;for(int i=0;i<n;i++){h[i+1]=(h[i]*BASE+s[i])%MOD;pw[i+1]=pw[i]*BASE%MOD;}}
long long get_hash(int l,int r){return((h[r+1]-h[l]*pw[r-l+1])%MOD+MOD)%MOD;}
Application: Longest Duplicate Substring (Binary Search + Hash)
def longestDupSubstring(s):
n = len(s)
h = StringHash(s)
def has_dup(length):
seen = set()
for i in range(n-length+1):
hv = h.get(i, i+length-1)
if hv in seen: return i
seen.add(hv)
return -1
lo, hi, res = 1, n-1, 0
while lo <= hi:
mid = (lo+hi)//2
if has_dup(mid) >= 0: res = mid; lo = mid+1
else: hi = mid-1
i = has_dup(res)
return s[i:i+res] if i >= 0 else ""
Advertisement