Published on

Webhook Reliability — Delivery Guarantees, Retry Logic, and Signature Verification

Authors

Introduction

Webhooks are how applications talk asynchronously—order system tells billing system about a new order; GitHub notifies your CI/CD on code push. But webhooks are unreliable by design: the receiver might be down, the network might fail, or the message might be duplicated.

Production webhook systems must handle three guarantees: authenticity (verify the sender), idempotency (safely replay duplicates), and reliability (guarantee eventual delivery or alerting).

This guide implements a complete production webhook infrastructure.

HMAC-SHA256 Signature Verification

Every webhook must be signed. The sender includes an HMAC signature; the receiver verifies using the shared secret.

// lib/webhook.ts
import crypto from 'crypto';

export interface WebhookPayload {
  id: string;
  event: string;
  timestamp: number;
  data: Record<string, any>;
}

export function signWebhookPayload(
  payload: WebhookPayload,
  secret: string
): string {
  const json = JSON.stringify(payload);

  // HMAC-SHA256: keyed hash using shared secret
  const signature = crypto
    .createHmac('sha256', secret)
    .update(json)
    .digest('hex');

  return signature;
}

export function verifyWebhookSignature(
  payloadJson: string,
  signature: string,
  secret: string
): boolean {
  const expectedSignature = crypto
    .createHmac('sha256', secret)
    .update(payloadJson)
    .digest('hex');

  // Constant-time comparison (prevents timing attacks)
  return crypto.timingSafeEqual(
    Buffer.from(signature, 'hex'),
    Buffer.from(expectedSignature, 'hex')
  );
}

// Webhook sender (in billing service)
export async function sendWebhook(
  event: string,
  data: Record<string, any>,
  recipientUrl: string,
  secret: string
) {
  const payload: WebhookPayload = {
    id: crypto.randomUUID(),
    event,
    timestamp: Date.now(),
    data,
  };

  const signature = signWebhookPayload(payload, secret);

  const response = await fetch(recipientUrl, {
    method: 'POST',
    headers: {
      'Content-Type': 'application/json',
      'X-Webhook-Signature': signature,
      'X-Webhook-Event': event,
      'X-Webhook-Timestamp': payload.timestamp.toString(),
      'X-Webhook-ID': payload.id, // For idempotency
    },
    body: JSON.stringify(payload),
  });

  return response.ok;
}

Receiver-side verification:

// pages/api/webhooks/billing.ts
import { NextApiRequest, NextApiResponse } from 'next';
import { verifyWebhookSignature } from '@/lib/webhook';

export default async function handler(
  req: NextApiRequest,
  res: NextApiResponse
) {
  if (req.method !== 'POST') {
    return res.status(405).json({ error: 'Method not allowed' });
  }

  const signature = req.headers['x-webhook-signature'] as string;
  if (!signature) {
    return res.status(401).json({ error: 'Missing signature' });
  }

  // Verify signature using raw body (not parsed JSON)
  const rawBody = JSON.stringify(req.body);
  const secret = process.env.WEBHOOK_SECRET!;

  if (!verifyWebhookSignature(rawBody, signature, secret)) {
    return res.status(401).json({ error: 'Invalid signature' });
  }

  // Safe to process trusted webhook
  const webhookId = req.headers['x-webhook-id'] as string;
  const event = req.headers['x-webhook-event'] as string;

  try {
    await handleWebhookEvent(webhookId, event, req.body);
    return res.status(200).json({ received: true });
  } catch (err) {
    return res.status(500).json({ error: 'Processing failed' });
  }
}

Idempotency with Event IDs

Webhooks may be delivered multiple times. Idempotency keys prevent processing the same event twice.

// lib/idempotency.ts
import { PrismaClient } from '@prisma/client';
import Redis from 'ioredis';

const db = new PrismaClient();
const redis = new Redis(process.env.REDIS_URL);

export interface ProcessedEvent {
  id: string;
  webhookId: string;
  event: string;
  processedAt: Date;
  result?: Record<string, any>;
}

export async function processWebhookIdempotent(
  webhookId: string,
  event: string,
  payload: any,
  handler: (payload: any) => Promise<any>
): Promise<ProcessedEvent | null> {
  const idempotencyKey = `webhook:${webhookId}`;

  // Check Redis cache first (fast path)
  const cached = await redis.get(idempotencyKey);
  if (cached) {
    console.log(`Webhook ${webhookId} already processed, returning cached result`);
    return JSON.parse(cached);
  }

  // Check database for older entries
  const existing = await db.processedEvent.findUnique({
    where: { webhookId },
  });

  if (existing) {
    // Cache the result for fast subsequent lookups
    await redis.setex(idempotencyKey, 86400, JSON.stringify(existing)); // 24 hours
    return existing;
  }

  // First time seeing this webhook; process it
  try {
    const result = await handler(payload);

    const processed: ProcessedEvent = {
      id: crypto.randomUUID(),
      webhookId,
      event,
      processedAt: new Date(),
      result,
    };

    // Persist to database
    await db.processedEvent.create({
      data: processed,
    });

    // Cache in Redis
    await redis.setex(idempotencyKey, 86400, JSON.stringify(processed));

    return processed;
  } catch (err) {
    // Don't cache failures; allow retry
    throw err;
  }
}

// Schema
// model ProcessedEvent {
//   id        String   @id @default(cuid())
//   webhookId String   @unique
//   event     String
//   result    Json?
//   processedAt DateTime @default(now())
// }

Exponential Retry With Jitter

Failed webhooks are retried with exponential backoff + jitter to prevent thundering herd.

// lib/webhookRetry.ts
import Bull, { Queue } from 'bull';

const webhookQueue: Queue = new Bull('webhooks', {
  redis: {
    host: process.env.REDIS_HOST!,
    port: parseInt(process.env.REDIS_PORT!),
  },
});

export interface WebhookJob {
  id: string;
  event: string;
  recipientUrl: string;
  payload: any;
  secret: string;
}

export async function enqueueWebhook(job: WebhookJob) {
  await webhookQueue.add(job, {
    attempts: 5, // Max 5 attempts
    backoff: {
      type: 'exponential',
      delay: 2000, // Start with 2 seconds
    },
    removeOnComplete: true,
  });
}

// Process webhooks with retry logic
webhookQueue.process(async (job) => {
  const { recipientUrl, payload, secret, id } = job.data as WebhookJob;

  try {
    const response = await fetch(recipientUrl, {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
        'X-Webhook-Signature': signWebhookPayload(payload, secret),
        'X-Webhook-ID': id,
      },
      body: JSON.stringify(payload),
      timeout: 30000, // 30 second timeout
    });

    if (!response.ok) {
      // 4xx errors: don't retry (likely misconfiguration)
      if (response.status >= 400 && response.status < 500) {
        throw new Error(`Recipient returned ${response.status}; not retrying`);
      }

      // 5xx errors: retry
      throw new Error(`Recipient returned ${response.status}`);
    }

    return { success: true };
  } catch (err) {
    // Jitter: add randomness to prevent synchronized retries
    const jitter = Math.random() * 1000; // 0-1000ms
    const backoffDelay = Math.pow(2, job.attemptsMade) * 1000 + jitter;

    if (job.attemptsMade < 5) {
      console.log(
        `Webhook ${id} failed, retrying in ${Math.round(backoffDelay)}ms...`
      );
      throw err; // Bull will retry with backoff
    }

    // Final failure: move to DLQ
    console.error(`Webhook ${id} failed after 5 attempts, moving to DLQ`);
    throw err;
  }
});

Dead-Letter Queue for Failed Webhooks

Webhooks that fail after all retries go to a DLQ for manual investigation.

// lib/dlq.ts
const dlqQueue: Queue = new Bull('webhooks-dlq', {
  redis: {
    host: process.env.REDIS_HOST!,
    port: parseInt(process.env.REDIS_PORT!),
  },
});

webhookQueue.on('failed', async (job, err) => {
  // Move failed job to DLQ
  const dlqJob = {
    originalJobId: job.id,
    webhook: job.data,
    error: err.message,
    failedAt: new Date(),
    attemptCount: job.attemptsMade,
  };

  await dlqQueue.add(dlqJob, {
    removeOnComplete: false, // Keep for audit trail
  });

  // Alert on critical webhooks
  if (job.data.event === 'payment.completed') {
    await notifyOps({
      severity: 'critical',
      message: `Payment webhook ${job.data.id} failed after ${job.attemptsMade} attempts`,
      jobId: job.id,
    });
  }
});

// Manual retry from DLQ
export async function retryFromDLQ(dlqJobId: string) {
  const job = await dlqQueue.getJob(dlqJobId);
  if (!job) throw new Error('DLQ job not found');

  const { webhook } = job.data;

  // Retry the original webhook
  await enqueueWebhook(webhook);

  // Remove from DLQ
  await job.remove();
}

// DLQ dashboard query
export async function getDLQWebhooks(filters?: {
  event?: string;
  startDate?: Date;
  endDate?: Date;
}) {
  const allJobs = await dlqQueue.getJobs();

  return allJobs.filter((job) => {
    if (filters?.event && job.data.webhook.event !== filters.event) {
      return false;
    }
    return true;
  });
}

Webhook Receiver as Queue Producer

When webhooks arrive, don't process synchronously. Enqueue immediately and process async.

// pages/api/webhooks/events.ts
import { NextApiRequest, NextApiResponse } from 'next';
import { verifyWebhookSignature } from '@/lib/webhook';
import { PrismaClient } from '@prisma/client';

const db = new PrismaClient();

export default async function handler(
  req: NextApiRequest,
  res: NextApiResponse
) {
  // Verify webhook signature
  const signature = req.headers['x-webhook-signature'] as string;
  const rawBody = JSON.stringify(req.body);

  if (!verifyWebhookSignature(rawBody, signature, process.env.WEBHOOK_SECRET!)) {
    return res.status(401).json({ error: 'Invalid signature' });
  }

  const webhookId = req.headers['x-webhook-id'] as string;
  const event = req.headers['x-webhook-event'] as string;

  try {
    // Quick database write (webhook received)
    const received = await db.webhookReceived.create({
      data: {
        webhookId,
        event,
        payload: req.body,
        receivedAt: new Date(),
      },
    });

    // Immediately enqueue for processing (don't block on process)
    const processingQueue = new Queue('webhook-processing');
    await processingQueue.add(
      {
        webhookId,
        event,
        payload: req.body,
      },
      { jobId: webhookId } // Idempotency: same jobId won't duplicate
    );

    // Return 202 Accepted immediately
    return res.status(202).json({
      received: true,
      webhookId,
    });
  } catch (err) {
    console.error('Webhook enqueue failed:', err);
    return res.status(500).json({ error: 'Failed to process webhook' });
  }
}

// Process webhooks from queue
const processingQueue = new Queue('webhook-processing');

processingQueue.process(async (job) => {
  const { webhookId, event, payload } = job.data;

  try {
    switch (event) {
      case 'order.created':
        await handleOrderCreated(payload);
        break;
      case 'payment.completed':
        await handlePaymentCompleted(payload);
        break;
      default:
        console.warn(`Unknown webhook event: ${event}`);
    }

    // Mark as processed
    await db.webhookReceived.update({
      where: { webhookId },
      data: {
        processedAt: new Date(),
        status: 'processed',
      },
    });
  } catch (err) {
    // Mark as failed (retry later)
    await db.webhookReceived.update({
      where: { webhookId },
      data: {
        status: 'failed',
        error: err.message,
      },
    });

    throw err; // Re-throw to trigger retry
  }
});

Delivery Status Tracking

Track webhook delivery status for debugging and customer support.

// schema.ts (Prisma)
model WebhookDelivery {
  id              String    @id @default(cuid())
  webhookId       String    @unique
  event           String
  recipientUrl    String

  status          String    // 'pending' | 'delivered' | 'failed' | 'dlq'
  statusUpdatedAt DateTime  @default(now())

  deliveredAt     DateTime?
  failedAt        DateTime?

  attemptCount    Int       @default(0)
  lastError       String?

  receivedAt      DateTime  @default(now())
  createdAt       DateTime  @default(now())

  @@index([event])
  @@index([status])
  @@index([recipientUrl])
  @@index([createdAt])
}

// Query delivery status
export async function getWebhookStatus(webhookId: string) {
  const delivery = await db.webhookDelivery.findUnique({
    where: { webhookId },
  });

  return {
    webhookId,
    status: delivery?.status,
    attemptCount: delivery?.attemptCount,
    deliveredAt: delivery?.deliveredAt,
    lastError: delivery?.lastError,
  };
}

Testing Webhooks Locally

Use ngrok or Smee to expose your local server to webhook senders.

# ngrok: expose localhost:3000 to internet
ngrok http 3000
# https://1234-56-78-90.ngrok.io

# Set in environment:
# WEBHOOK_CALLBACK_URL=https://1234-56-78-90.ngrok.io/api/webhooks/events

# Send test webhook
curl -X POST https://1234-56-78-90.ngrok.io/api/webhooks/events \
  -H "Content-Type: application/json" \
  -H "X-Webhook-Signature: <generated-signature>" \
  -H "X-Webhook-ID: test-123" \
  -d '{"event": "test.event", "data": {"test": true}}'

Smee for request inspection:

# Create relay URL at https://smee.io
# Forward all requests to local server
smee -u https://smee.io/abc123 -t http://localhost:3000/api/webhooks

# Monitor deliveries at https://smee.io/abc123

Conclusion

Reliable webhooks require cryptographic signatures, idempotency keys, exponential backoff with jitter, and dead-letter queues. Decouple receipt from processing with job queues. Monitor delivery status and alert on critical event failures.

Build webhook systems that don't lose data, don't duplicate processing, and don't wake you up at 3am.