Randomized Algorithms: Reservoir Sampling and Quickselect

Sanjeev SharmaSanjeev Sharma
3 min read

Advertisement

Randomized Algorithms

Reservoir Sampling

Select k items uniformly at random from a stream of unknown size n.

Algorithm: Keep first k items. For item i (i > k), include it with probability k/i; if included, replace a random item from the reservoir.

import random

def reservoir_sample(stream, k):
    reservoir = []
    for i, item in enumerate(stream):
        if i < k:
            reservoir.append(item)
        else:
            j = random.randint(0, i)
            if j < k:
                reservoir[j] = item
    return reservoir

# LeetCode 382: Linked List Random Node
class Solution:
    def __init__(self, head):
        self.head = head

    def getRandom(self):
        result = self.head.val
        node = self.head.next
        i = 2
        while node:
            j = random.randint(1, i)
            if j == 1:
                result = node.val
            node = node.next
            i += 1
        return result

Quickselect (kth Smallest/Largest)

Expected O(n) using random pivot selection:

import random

def quickselect(arr, k):
    if len(arr) == 1: return arr[0]
    pivot = random.choice(arr)
    low = [x for x in arr if x < pivot]
    mid = [x for x in arr if x == pivot]
    high = [x for x in arr if x > pivot]
    if k <= len(low):
        return quickselect(low, k)
    elif k <= len(low) + len(mid):
        return pivot
    else:
        return quickselect(high, k - len(low) - len(mid))

def kth_largest(arr, k):
    return quickselect(arr, len(arr) - k + 1)

C++ Implementation (Quickselect with Hoare partition)

#include <bits/stdc++.h>
using namespace std;

int quickselect(vector<int>& arr, int l, int r, int k) {
    if (l == r) return arr[l];
    int pivot_idx = l + rand() % (r - l + 1);
    swap(arr[pivot_idx], arr[r]);
    int pivot = arr[r];
    int i = l - 1;
    for (int j = l; j < r; j++) {
        if (arr[j] <= pivot) swap(arr[++i], arr[j]);
    }
    swap(arr[++i], arr[r]);
    if (i == k) return arr[i];
    if (i < k) return quickselect(arr, i+1, r, k);
    return quickselect(arr, l, i-1, k);
}

int kthSmallest(vector<int> arr, int k) {
    return quickselect(arr, 0, arr.size()-1, k-1);
}

Fisher-Yates Shuffle (Unbiased)

def shuffle(arr):
    n = len(arr)
    for i in range(n - 1, 0, -1):
        j = random.randint(0, i)
        arr[i], arr[j] = arr[j], arr[i]
    return arr

Bloom Filters (Probabilistic)

import hashlib

class BloomFilter:
    def __init__(self, size=1000, num_hashes=3):
        self.size = size
        self.bits = [False] * size
        self.seeds = list(range(num_hashes))

    def _hashes(self, item):
        for seed in self.seeds:
            h = int(hashlib.md5(f"{seed}{item}".encode()).hexdigest(), 16)
            yield h % self.size

    def add(self, item):
        for h in self._hashes(item):
            self.bits[h] = True

    def might_contain(self, item):
        return all(self.bits[h] for h in self._hashes(item))

LeetCode Problems

  • 215. Kth Largest Element — quickselect
  • 382. Linked List Random Node — reservoir sampling
  • 398. Random Pick Index — reservoir sampling
  • 384. Shuffle an Array — Fisher-Yates

Advertisement

Sanjeev Sharma

Written by

Sanjeev Sharma

Full Stack Engineer · E-mopro