Published on

AI Cost Monitoring — Tracking Every Dollar Spent on LLM APIs

Authors

Introduction

Your LLM API bill arrives: <$50k for the month. You have no idea which features caused it, which users are most expensive, or where to cut costs. Cost visibility is the fastest path to profitability.

This post covers per-request cost tracking, attribution by user and feature, anomaly detection, and budget forecasting.

Per-Request Cost Calculation

Every LLM call has a cost based on input and output tokens. Calculate it immediately after the response:

interface ModelPricing {
  modelName: string;
  inputPricePerMillion: number; // e.g., 3.00 for Sonnet
  outputPricePerMillion: number; // e.g., 15.00 for Sonnet
  costPerRequest: number; // Fixed cost per API call, if any
}

interface RequestCost {
  requestId: string;
  modelName: string;
  inputTokens: number;
  outputTokens: number;
  inputCostUSD: number;
  outputCostUSD: number;
  totalCostUSD: number;
  timestamp: Date;
}

const MODEL_PRICING: Record<string, ModelPricing> = {
  "claude-3-5-sonnet-20241022": {
    modelName: "claude-3-5-sonnet-20241022",
    inputPricePerMillion: 3.0,
    outputPricePerMillion: 15.0,
    costPerRequest: 0,
  },
  "claude-3-5-haiku-20241022": {
    modelName: "claude-3-5-haiku-20241022",
    inputPricePerMillion: 0.8,
    outputPricePerMillion: 4.0,
    costPerRequest: 0,
  },
};

function calculateRequestCost(
  requestId: string,
  modelName: string,
  inputTokens: number,
  outputTokens: number
): RequestCost {
  const pricing = MODEL_PRICING[modelName];
  if (!pricing) {
    throw new Error(`Unknown model: ${modelName}`);
  }

  const inputCostUSD = (inputTokens * pricing.inputPricePerMillion) / 1000000;
  const outputCostUSD =
    (outputTokens * pricing.outputPricePerMillion) / 1000000;
  const totalCostUSD =
    inputCostUSD + outputCostUSD + pricing.costPerRequest;

  return {
    requestId,
    modelName,
    inputTokens,
    outputTokens,
    inputCostUSD,
    outputCostUSD,
    totalCostUSD,
    timestamp: new Date(),
  };
}

export { calculateRequestCost, RequestCost, ModelPricing };

Cost Attribution by Feature, User, and Team

Track which feature or user generated each cost:

interface CostAttribution {
  requestId: string;
  costUSD: number;
  userId: string;
  teamId: string;
  featureId: string; // e.g., "search-summarization", "code-generation"
  endpoint: string;
  timestamp: Date;
}

class CostAttributor {
  async attributeRequestCost(
    requestId: string,
    cost: number,
    userId: string,
    context: {
      featureId?: string;
      endpoint?: string;
      teamId?: string;
    }
  ): Promise<CostAttribution> {
    const attribution: CostAttribution = {
      requestId,
      costUSD: cost,
      userId,
      teamId: context.teamId || "default",
      featureId: context.featureId || "unknown",
      endpoint: context.endpoint || "unknown",
      timestamp: new Date(),
    };

    // Persist to database
    await this.persistAttribution(attribution);

    return attribution;
  }

  async getUserCost(
    userId: string,
    startDate: Date,
    endDate: Date
  ): Promise<number> {
    // Query database for all costs attributed to this user in date range
    const attributions = await this.queryAttributions({
      userId,
      startDate,
      endDate,
    });

    return attributions.reduce((sum, a) => sum + a.costUSD, 0);
  }

  async getFeatureCost(
    featureId: string,
    startDate: Date,
    endDate: Date
  ): Promise<number> {
    const attributions = await this.queryAttributions({
      featureId,
      startDate,
      endDate,
    });

    return attributions.reduce((sum, a) => sum + a.costUSD, 0);
  }

  async getTeamCost(
    teamId: string,
    startDate: Date,
    endDate: Date
  ): Promise<number> {
    const attributions = await this.queryAttributions({
      teamId,
      startDate,
      endDate,
    });

    return attributions.reduce((sum, a) => sum + a.costUSD, 0);
  }

  private async persistAttribution(
    attribution: CostAttribution
  ): Promise<void> {
    // Store in PostgreSQL, DynamoDB, etc.
  }

  private async queryAttributions(
    filters: Partial<CostAttribution>
  ): Promise<CostAttribution[]> {
    // Query database
    return [];
  }
}

export { CostAttributor, CostAttribution };

Redis Cost Accumulator for Real-Time Tracking

For sub-second cost tracking without hitting the database on every request, use Redis:

import Redis from "ioredis";

interface CostAccumulator {
  requests_1h: number;
  cost_1h: number;
  requests_1d: number;
  cost_1d: number;
  tokens_1h: number;
  tokens_1d: number;
}

class RedisCostAccumulator {
  private redis: Redis;

  constructor(redisUrl: string = "redis://localhost:6379") {
    this.redis = new Redis(redisUrl);
  }

  async recordCost(
    userId: string,
    featureId: string,
    costUSD: number,
    tokens: number
  ): Promise<void> {
    const now = new Date();
    const hourKey = `cost:1h:${userId}:${now.toISOString().slice(0, 13)}`;
    const dayKey = `cost:1d:${userId}:${now.toISOString().slice(0, 10)}`;

    // Increment 1-hour and 1-day accumulators
    await Promise.all([
      this.redis.incrby(`${hourKey}:requests`, 1),
      this.redis.incrbyfloat(`${hourKey}:cost`, costUSD),
      this.redis.incrby(`${hourKey}:tokens`, tokens),
      this.redis.incrby(`${dayKey}:requests`, 1),
      this.redis.incrbyfloat(`${dayKey}:cost`, costUSD),
      this.redis.incrby(`${dayKey}:tokens`, tokens),
      // Expire keys after 30 days
      this.redis.expire(hourKey, 30 * 24 * 60 * 60),
      this.redis.expire(dayKey, 30 * 24 * 60 * 60),
    ]);
  }

  async getUserCostAccumulator(userId: string): Promise<CostAccumulator> {
    const now = new Date();
    const hourKey = `cost:1h:${userId}:${now.toISOString().slice(0, 13)}`;
    const dayKey = `cost:1d:${userId}:${now.toISOString().slice(0, 10)}`;

    const [
      requests1h,
      cost1h,
      tokens1h,
      requests1d,
      cost1d,
      tokens1d,
    ] = await Promise.all([
      this.redis.get(`${hourKey}:requests`),
      this.redis.get(`${hourKey}:cost`),
      this.redis.get(`${hourKey}:tokens`),
      this.redis.get(`${dayKey}:requests`),
      this.redis.get(`${dayKey}:cost`),
      this.redis.get(`${dayKey}:tokens`),
    ]);

    return {
      requests_1h: parseInt(requests1h || "0"),
      cost_1h: parseFloat(cost1h || "0"),
      requests_1d: parseInt(requests1d || "0"),
      cost_1d: parseFloat(cost1d || "0"),
      tokens_1h: parseInt(tokens1h || "0"),
      tokens_1d: parseInt(tokens1d || "0"),
    };
  }

  async close(): Promise<void> {
    await this.redis.quit();
  }
}

export { RedisCostAccumulator, CostAccumulator };

Monthly Budget Projection

Use current spend to forecast month-end costs:

interface BudgetProjection {
  currentSpendUSD: number;
  projectedMonthEndUSD: number;
  daysRemaining: number;
  dailyAverageUSD: number;
  willExceedBudget: boolean;
  daysUntilBudgetExceeded: number | null;
}

class BudgetProjector {
  async projectMonthlyBudget(
    userId: string,
    monthlyBudgetUSD: number
  ): Promise<BudgetProjection> {
    const now = new Date();
    const monthStart = new Date(now.getFullYear(), now.getMonth(), 1);
    const daysElapsed = Math.ceil(
      (now.getTime() - monthStart.getTime()) / (1000 * 60 * 60 * 24)
    );

    // Query actual spend from database
    const currentSpendUSD = await this.getMonthlySpend(userId);

    const dailyAverageUSD = currentSpendUSD / daysElapsed;
    const daysInMonth = new Date(
      now.getFullYear(),
      now.getMonth() + 1,
      0
    ).getDate();
    const daysRemaining = daysInMonth - daysElapsed;

    const projectedMonthEndUSD = currentSpendUSD + dailyAverageUSD * daysRemaining;

    const willExceedBudget = projectedMonthEndUSD > monthlyBudgetUSD;
    const daysUntilBudgetExceeded = willExceedBudget
      ? Math.floor((monthlyBudgetUSD - currentSpendUSD) / dailyAverageUSD)
      : null;

    return {
      currentSpendUSD,
      projectedMonthEndUSD,
      daysRemaining,
      dailyAverageUSD,
      willExceedBudget,
      daysUntilBudgetExceeded,
    };
  }

  private async getMonthlySpend(userId: string): Promise<number> {
    // Query database for month-to-date spend
    return 1234.56;
  }
}

export { BudgetProjector, BudgetProjection };

Anomaly Detection: 10x Spend Alert

Detect unusual spikes in spending:

interface SpendAnomaly {
  detected: boolean;
  currentDailySpendUSD: number;
  baselineDailySpendUSD: number;
  multiplier: number;
  recommendation: string;
}

class CostAnomalyDetector {
  private readonly ANOMALY_THRESHOLD = 10; // Alert if 10x normal

  async detectAnomalies(userId: string): Promise<SpendAnomaly> {
    const last7Days = await this.getLast7DaysSpend(userId);
    const baselineDaily = last7Days.reduce((a, b) => a + b, 0) / 7;

    const today = await this.getTodaysSpend(userId);
    const multiplier = today / baselineDaily;

    const detected = multiplier > this.ANOMALY_THRESHOLD;

    let recommendation = "";
    if (multiplier > 5) {
      recommendation =
        "Elevated spend detected. Review recent feature changes.";
    }
    if (multiplier > 10) {
      recommendation =
        "CRITICAL: 10x normal spend. Consider limiting LLM usage.";
    }

    return {
      detected,
      currentDailySpendUSD: today,
      baselineDailySpendUSD: baselineDaily,
      multiplier,
      recommendation,
    };
  }

  private async getLast7DaysSpend(userId: string): Promise<number[]> {
    // Query database for last 7 days
    return [100, 95, 102, 98, 101, 99, 97];
  }

  private async getTodaysSpend(userId: string): Promise<number> {
    // Query database for today''s spend
    return 1050;
  }
}

export { CostAnomalyDetector, SpendAnomaly };

Conclusion

Cost visibility transforms LLM profitability. Track per-request costs, attribute them to features and users, use Redis for real-time aggregation, project monthly budgets, and alert on anomalies.

With this infrastructure, you''ll spot cost problems within hours, not after the bill arrives. More importantly, you''ll know exactly which features are worth investing in based on ROI.