Suffix Array — O(n log n) Construction and Applications
Advertisement
Suffix Array
Sort all suffixes of a string lexicographically. sa[i] = starting index of the i-th lexicographically smallest suffix.
s = "banana"
suffixes: banana, anana, nana, ana, na, a
sorted: a, ana, anana, banana, na, nana
sa: [5, 3, 1, 0, 4, 2]
O(n log²n) Construction (Doubling)
def build_suffix_array(s):
n = len(s)
sa = list(range(n))
rank = [ord(c) for c in s]
tmp = [0]*n
k = 1
while k < n:
def key(i):
return (rank[i], rank[i+k] if i+k < n else -1)
sa.sort(key=key)
tmp[sa[0]] = 0
for i in range(1, n):
tmp[sa[i]] = tmp[sa[i-1]] + (key(sa[i]) != key(sa[i-1]))
rank = tmp[:]
if rank[sa[-1]] == n-1: break
k *= 2
return sa
Python
def build_suffix_array(s):
n = len(s)
sa = sorted(range(n), key=lambda i: s[i:])
return sa
def build_lcp(s, sa):
n = len(s)
rank = [0]*n
for i, suf in enumerate(sa): rank[suf] = i
lcp = [0]*n
h = 0
for i in range(n):
if rank[i] > 0:
j = sa[rank[i]-1]
while i+h < n and j+h < n and s[i+h] == s[j+h]:
h += 1
lcp[rank[i]] = h
if h: h -= 1
return lcp
JavaScript
function buildSuffixArray(s){
const n=s.length;
return Array.from({length:n},(_,i)=>i).sort((a,b)=>s.slice(a)<s.slice(b)?-1:1);
}
Java
public int[] buildSuffixArray(String s) {
int n=s.length();
Integer[]sa=new Integer[n]; for(int i=0;i<n;i++)sa[i]=i;
Arrays.sort(sa,(a,b)->s.substring(a).compareTo(s.substring(b)));
return Arrays.stream(sa).mapToInt(Integer::intValue).toArray();
}
C++
#include <vector>
#include <string>
#include <algorithm>
using namespace std;
vector<int> buildSuffixArray(string s){
int n=s.size(); vector<int>sa(n);
iota(sa.begin(),sa.end(),0);
sort(sa.begin(),sa.end(),[&](int a,int b){return s.substr(a)<s.substr(b);});
return sa;
}
C
/* C: qsort with strcmp on suffix pointers */
Application: Number of Distinct Substrings
def countDistinctSubstrings(s):
n = len(s)
sa = build_suffix_array(s)
lcp = build_lcp(s, sa)
total = n*(n+1)//2
return total - sum(lcp)
Advertisement