Randomized Algorithms: Reservoir Sampling and Quickselect
Advertisement
Randomized Algorithms
Reservoir Sampling
Select k items uniformly at random from a stream of unknown size n.
Algorithm: Keep first k items. For item i (i > k), include it with probability k/i; if included, replace a random item from the reservoir.
import random
def reservoir_sample(stream, k):
reservoir = []
for i, item in enumerate(stream):
if i < k:
reservoir.append(item)
else:
j = random.randint(0, i)
if j < k:
reservoir[j] = item
return reservoir
# LeetCode 382: Linked List Random Node
class Solution:
def __init__(self, head):
self.head = head
def getRandom(self):
result = self.head.val
node = self.head.next
i = 2
while node:
j = random.randint(1, i)
if j == 1:
result = node.val
node = node.next
i += 1
return result
Quickselect (kth Smallest/Largest)
Expected O(n) using random pivot selection:
import random
def quickselect(arr, k):
if len(arr) == 1: return arr[0]
pivot = random.choice(arr)
low = [x for x in arr if x < pivot]
mid = [x for x in arr if x == pivot]
high = [x for x in arr if x > pivot]
if k <= len(low):
return quickselect(low, k)
elif k <= len(low) + len(mid):
return pivot
else:
return quickselect(high, k - len(low) - len(mid))
def kth_largest(arr, k):
return quickselect(arr, len(arr) - k + 1)
C++ Implementation (Quickselect with Hoare partition)
#include <bits/stdc++.h>
using namespace std;
int quickselect(vector<int>& arr, int l, int r, int k) {
if (l == r) return arr[l];
int pivot_idx = l + rand() % (r - l + 1);
swap(arr[pivot_idx], arr[r]);
int pivot = arr[r];
int i = l - 1;
for (int j = l; j < r; j++) {
if (arr[j] <= pivot) swap(arr[++i], arr[j]);
}
swap(arr[++i], arr[r]);
if (i == k) return arr[i];
if (i < k) return quickselect(arr, i+1, r, k);
return quickselect(arr, l, i-1, k);
}
int kthSmallest(vector<int> arr, int k) {
return quickselect(arr, 0, arr.size()-1, k-1);
}
Fisher-Yates Shuffle (Unbiased)
def shuffle(arr):
n = len(arr)
for i in range(n - 1, 0, -1):
j = random.randint(0, i)
arr[i], arr[j] = arr[j], arr[i]
return arr
Bloom Filters (Probabilistic)
import hashlib
class BloomFilter:
def __init__(self, size=1000, num_hashes=3):
self.size = size
self.bits = [False] * size
self.seeds = list(range(num_hashes))
def _hashes(self, item):
for seed in self.seeds:
h = int(hashlib.md5(f"{seed}{item}".encode()).hexdigest(), 16)
yield h % self.size
def add(self, item):
for h in self._hashes(item):
self.bits[h] = True
def might_contain(self, item):
return all(self.bits[h] for h in self._hashes(item))
LeetCode Problems
- 215. Kth Largest Element — quickselect
- 382. Linked List Random Node — reservoir sampling
- 398. Random Pick Index — reservoir sampling
- 384. Shuffle an Array — Fisher-Yates
Advertisement