Redis Caching Complete Guide 2026: Improve API Performance 10x

Redis 2026: Cache Everything That Matters

A cache miss costs 100ms (database query). A cache hit costs 1ms (Redis). Cache the right things and your API becomes 10x faster.

Setup: Redis Client in Node.js
Cache-Aside Pattern (Most Common)
Write-Through Caching
Rate Limiting with Redis
Session Storage
Pub/Sub: Real-Time Events
Sorted Sets: Leaderboards
Upstash Redis for Vercel/Edge

Setup: Redis Client in Node.js

npm install ioredis

// lib/redis.ts
import Redis from 'ioredis'

const redis = new Redis(process.env.REDIS_URL || 'redis://localhost:6379', {
  maxRetriesPerRequest: 3,
  enableReadyCheck: true,
  lazyConnect: false,
  connectTimeout: 5000,
  keepAlive: 30000,
})

redis.on('error', (err) => console.error('Redis error:', err))
redis.on('connect', () => console.log('Redis connected'))

export { redis }

// For serverless (Vercel, AWS Lambda) — use Upstash
import { Redis as UpstashRedis } from '@upstash/redis'

export const upstashRedis = new UpstashRedis({
  url: process.env.UPSTASH_REDIS_REST_URL!,
  token: process.env.UPSTASH_REDIS_REST_TOKEN!,
})

Cache-Aside Pattern (Most Common)

// lib/cache.ts
export class Cache {
  constructor(private redis: Redis, private defaultTTL: number = 300) {}

  async get<T>(key: string): Promise<T | null> {
    const cached = await this.redis.get(key)
    if (!cached) return null
    return JSON.parse(cached) as T
  }

  async set(key: string, value: unknown, ttl?: number): Promise<void> {
    const serialized = JSON.stringify(value)
    if (ttl ?? this.defaultTTL) {
      await this.redis.setex(key, ttl ?? this.defaultTTL, serialized)
    } else {
      await this.redis.set(key, serialized)
    }
  }

  async del(key: string | string[]): Promise<void> {
    await this.redis.del(...(Array.isArray(key) ? key : [key]))
  }

  async invalidatePattern(pattern: string): Promise<void> {
    const keys = await this.redis.keys(pattern)
    if (keys.length > 0) await this.redis.del(...keys)
  }

  // Stale-while-revalidate
  async getOrSet<T>(
    key: string,
    fetcher: () => Promise<T>,
    ttl = this.defaultTTL
  ): Promise<T> {
    const cached = await this.get<T>(key)
    if (cached !== null) return cached

    const fresh = await fetcher()
    await this.set(key, fresh, ttl)
    return fresh
  }
}

const cache = new Cache(redis)

// Usage in route handler
async function getPost(slug: string) {
  return cache.getOrSet(
    `post:${slug}`,
    () => prisma.post.findUnique({ where: { slug }, include: { author: true } }),
    600  // 10 minutes
  )
}

Write-Through Caching

// Keeps cache in sync by writing to both DB and cache
class PostService {
  async update(id: string, data: UpdatePostInput) {
    // Update database
    const updated = await prisma.post.update({ where: { id }, data })

    // Write to cache immediately (write-through)
    await cache.set(`post:${id}`, updated, 600)
    await cache.set(`post:slug:${updated.slug}`, updated, 600)

    // Invalidate list caches
    await cache.invalidatePattern('posts:list:*')

    return updated
  }

  async delete(id: string) {
    const post = await prisma.post.delete({ where: { id } })

    // Remove from all caches
    await cache.del([`post:${id}`, `post:slug:${post.slug}`])
    await cache.invalidatePattern('posts:list:*')

    return post
  }
}

Rate Limiting with Redis

// Sliding window rate limiter
class RateLimiter {
  constructor(private redis: Redis) {}

  async check(
    identifier: string,
    limit: number,
    windowSeconds: number
  ): Promise<{ allowed: boolean; remaining: number; resetAt: number }> {
    const key = `ratelimit:${identifier}`
    const now = Date.now()
    const windowMs = windowSeconds * 1000

    const pipeline = this.redis.pipeline()
    pipeline.zremrangebyscore(key, 0, now - windowMs)  // Remove old entries
    pipeline.zadd(key, now, now.toString())            // Add current request
    pipeline.zcard(key)                                 // Count requests
    pipeline.expire(key, windowSeconds)                 // TTL cleanup

    const results = await pipeline.exec()
    const count = results![2][1] as number

    return {
      allowed: count <= limit,
      remaining: Math.max(0, limit - count),
      resetAt: now + windowMs,
    }
  }
}

// Use in middleware
export async function rateLimitMiddleware(
  req: Request,
  identifier: string
): Promise<Response | null> {
  const limiter = new RateLimiter(redis)
  const result = await limiter.check(identifier, 100, 60)

  if (!result.allowed) {
    return new Response(JSON.stringify({ error: 'Rate limit exceeded' }), {
      status: 429,
      headers: {
        'X-RateLimit-Limit': '100',
        'X-RateLimit-Remaining': '0',
        'X-RateLimit-Reset': result.resetAt.toString(),
      },
    })
  }

  return null  // Allowed
}

Session Storage

// Store sessions in Redis instead of DB for performance
class SessionStore {
  private readonly SESSION_TTL = 60 * 60 * 24 * 7  // 7 days

  async create(userId: string, metadata: object): Promise<string> {
    const sessionId = crypto.randomUUID()
    const session = {
      userId,
      ...metadata,
      createdAt: Date.now(),
      lastAccessedAt: Date.now(),
    }

    await redis.setex(
      `session:${sessionId}`,
      this.SESSION_TTL,
      JSON.stringify(session)
    )

    // Track user sessions for logout all devices
    await redis.sadd(`user:sessions:${userId}`, sessionId)
    await redis.expire(`user:sessions:${userId}`, this.SESSION_TTL)

    return sessionId
  }

  async get(sessionId: string): Promise<Session | null> {
    const data = await redis.get(`session:${sessionId}`)
    if (!data) return null

    const session = JSON.parse(data)
    // Refresh TTL on access
    await redis.expire(`session:${sessionId}`, this.SESSION_TTL)
    return session
  }

  async invalidateAll(userId: string): Promise<void> {
    const sessionIds = await redis.smembers(`user:sessions:${userId}`)
    if (sessionIds.length > 0) {
      await redis.del(...sessionIds.map(id => `session:${id}`))
      await redis.del(`user:sessions:${userId}`)
    }
  }
}

Pub/Sub: Real-Time Events

// Publisher
async function notifyUserUpdate(userId: string, event: object) {
  await redis.publish(`user:${userId}`, JSON.stringify(event))
}

// Subscriber (separate connection — Redis requirement)
const subscriber = redis.duplicate()

await subscriber.subscribe('user:*')

subscriber.on('pmessage', (pattern, channel, message) => {
  const userId = channel.split(':')[1]
  const event = JSON.parse(message)
  console.log(`User ${userId} event:`, event)
  // Broadcast to WebSocket clients
})

Sorted Sets: Leaderboards

// O(log n) operations for leaderboards
class Leaderboard {
  async addScore(userId: string, score: number): Promise<void> {
    await redis.zadd('leaderboard', score, userId)
  }

  async getTop(n: number): Promise<Array<{ userId: string; score: number }>> {
    const results = await redis.zrevrangebyscore(
      'leaderboard', '+inf', '-inf',
      'WITHSCORES', 'LIMIT', 0, n
    )

    const entries = []
    for (let i = 0; i < results.length; i += 2) {
      entries.push({ userId: results[i], score: parseFloat(results[i + 1]) })
    }
    return entries
  }

  async getRank(userId: string): Promise<number | null> {
    const rank = await redis.zrevrank('leaderboard', userId)
    return rank !== null ? rank + 1 : null
  }
}

Upstash Redis for Vercel/Edge

// Compatible with Edge Runtime (no TCP connections)
import { Redis } from '@upstash/redis'
import { Ratelimit } from '@upstash/ratelimit'

const redis = new Redis({ url: process.env.UPSTASH_URL!, token: process.env.UPSTASH_TOKEN! })

const ratelimit = new Ratelimit({
  redis,
  limiter: Ratelimit.slidingWindow(10, '10 s'),
  analytics: true,
})

export async function middleware(req: NextRequest) {
  const ip = req.ip ?? '127.0.0.1'
  const { success, remaining } = await ratelimit.limit(ip)

  if (!success) return NextResponse.json({ error: 'Rate limited' }, { status: 429 })
  return NextResponse.next()
}

The rule of thumb: if your API reads the same data more than once per second, cache it. Redis gives you millisecond latency with almost zero operational overhead.