- Published on
AI Cost Monitoring — Tracking Every Dollar Spent on LLM APIs
- Authors

- Name
- Sanjeev Sharma
- @webcoderspeed1
Introduction
Your LLM API bill arrives: <$50k for the month. You have no idea which features caused it, which users are most expensive, or where to cut costs. Cost visibility is the fastest path to profitability.
This post covers per-request cost tracking, attribution by user and feature, anomaly detection, and budget forecasting.
- Per-Request Cost Calculation
- Cost Attribution by Feature, User, and Team
- Redis Cost Accumulator for Real-Time Tracking
- Monthly Budget Projection
- Anomaly Detection: 10x Spend Alert
- Conclusion
Per-Request Cost Calculation
Every LLM call has a cost based on input and output tokens. Calculate it immediately after the response:
interface ModelPricing {
modelName: string;
inputPricePerMillion: number; // e.g., 3.00 for Sonnet
outputPricePerMillion: number; // e.g., 15.00 for Sonnet
costPerRequest: number; // Fixed cost per API call, if any
}
interface RequestCost {
requestId: string;
modelName: string;
inputTokens: number;
outputTokens: number;
inputCostUSD: number;
outputCostUSD: number;
totalCostUSD: number;
timestamp: Date;
}
const MODEL_PRICING: Record<string, ModelPricing> = {
"claude-3-5-sonnet-20241022": {
modelName: "claude-3-5-sonnet-20241022",
inputPricePerMillion: 3.0,
outputPricePerMillion: 15.0,
costPerRequest: 0,
},
"claude-3-5-haiku-20241022": {
modelName: "claude-3-5-haiku-20241022",
inputPricePerMillion: 0.8,
outputPricePerMillion: 4.0,
costPerRequest: 0,
},
};
function calculateRequestCost(
requestId: string,
modelName: string,
inputTokens: number,
outputTokens: number
): RequestCost {
const pricing = MODEL_PRICING[modelName];
if (!pricing) {
throw new Error(`Unknown model: ${modelName}`);
}
const inputCostUSD = (inputTokens * pricing.inputPricePerMillion) / 1000000;
const outputCostUSD =
(outputTokens * pricing.outputPricePerMillion) / 1000000;
const totalCostUSD =
inputCostUSD + outputCostUSD + pricing.costPerRequest;
return {
requestId,
modelName,
inputTokens,
outputTokens,
inputCostUSD,
outputCostUSD,
totalCostUSD,
timestamp: new Date(),
};
}
export { calculateRequestCost, RequestCost, ModelPricing };
Cost Attribution by Feature, User, and Team
Track which feature or user generated each cost:
interface CostAttribution {
requestId: string;
costUSD: number;
userId: string;
teamId: string;
featureId: string; // e.g., "search-summarization", "code-generation"
endpoint: string;
timestamp: Date;
}
class CostAttributor {
async attributeRequestCost(
requestId: string,
cost: number,
userId: string,
context: {
featureId?: string;
endpoint?: string;
teamId?: string;
}
): Promise<CostAttribution> {
const attribution: CostAttribution = {
requestId,
costUSD: cost,
userId,
teamId: context.teamId || "default",
featureId: context.featureId || "unknown",
endpoint: context.endpoint || "unknown",
timestamp: new Date(),
};
// Persist to database
await this.persistAttribution(attribution);
return attribution;
}
async getUserCost(
userId: string,
startDate: Date,
endDate: Date
): Promise<number> {
// Query database for all costs attributed to this user in date range
const attributions = await this.queryAttributions({
userId,
startDate,
endDate,
});
return attributions.reduce((sum, a) => sum + a.costUSD, 0);
}
async getFeatureCost(
featureId: string,
startDate: Date,
endDate: Date
): Promise<number> {
const attributions = await this.queryAttributions({
featureId,
startDate,
endDate,
});
return attributions.reduce((sum, a) => sum + a.costUSD, 0);
}
async getTeamCost(
teamId: string,
startDate: Date,
endDate: Date
): Promise<number> {
const attributions = await this.queryAttributions({
teamId,
startDate,
endDate,
});
return attributions.reduce((sum, a) => sum + a.costUSD, 0);
}
private async persistAttribution(
attribution: CostAttribution
): Promise<void> {
// Store in PostgreSQL, DynamoDB, etc.
}
private async queryAttributions(
filters: Partial<CostAttribution>
): Promise<CostAttribution[]> {
// Query database
return [];
}
}
export { CostAttributor, CostAttribution };
Redis Cost Accumulator for Real-Time Tracking
For sub-second cost tracking without hitting the database on every request, use Redis:
import Redis from "ioredis";
interface CostAccumulator {
requests_1h: number;
cost_1h: number;
requests_1d: number;
cost_1d: number;
tokens_1h: number;
tokens_1d: number;
}
class RedisCostAccumulator {
private redis: Redis;
constructor(redisUrl: string = "redis://localhost:6379") {
this.redis = new Redis(redisUrl);
}
async recordCost(
userId: string,
featureId: string,
costUSD: number,
tokens: number
): Promise<void> {
const now = new Date();
const hourKey = `cost:1h:${userId}:${now.toISOString().slice(0, 13)}`;
const dayKey = `cost:1d:${userId}:${now.toISOString().slice(0, 10)}`;
// Increment 1-hour and 1-day accumulators
await Promise.all([
this.redis.incrby(`${hourKey}:requests`, 1),
this.redis.incrbyfloat(`${hourKey}:cost`, costUSD),
this.redis.incrby(`${hourKey}:tokens`, tokens),
this.redis.incrby(`${dayKey}:requests`, 1),
this.redis.incrbyfloat(`${dayKey}:cost`, costUSD),
this.redis.incrby(`${dayKey}:tokens`, tokens),
// Expire keys after 30 days
this.redis.expire(hourKey, 30 * 24 * 60 * 60),
this.redis.expire(dayKey, 30 * 24 * 60 * 60),
]);
}
async getUserCostAccumulator(userId: string): Promise<CostAccumulator> {
const now = new Date();
const hourKey = `cost:1h:${userId}:${now.toISOString().slice(0, 13)}`;
const dayKey = `cost:1d:${userId}:${now.toISOString().slice(0, 10)}`;
const [
requests1h,
cost1h,
tokens1h,
requests1d,
cost1d,
tokens1d,
] = await Promise.all([
this.redis.get(`${hourKey}:requests`),
this.redis.get(`${hourKey}:cost`),
this.redis.get(`${hourKey}:tokens`),
this.redis.get(`${dayKey}:requests`),
this.redis.get(`${dayKey}:cost`),
this.redis.get(`${dayKey}:tokens`),
]);
return {
requests_1h: parseInt(requests1h || "0"),
cost_1h: parseFloat(cost1h || "0"),
requests_1d: parseInt(requests1d || "0"),
cost_1d: parseFloat(cost1d || "0"),
tokens_1h: parseInt(tokens1h || "0"),
tokens_1d: parseInt(tokens1d || "0"),
};
}
async close(): Promise<void> {
await this.redis.quit();
}
}
export { RedisCostAccumulator, CostAccumulator };
Monthly Budget Projection
Use current spend to forecast month-end costs:
interface BudgetProjection {
currentSpendUSD: number;
projectedMonthEndUSD: number;
daysRemaining: number;
dailyAverageUSD: number;
willExceedBudget: boolean;
daysUntilBudgetExceeded: number | null;
}
class BudgetProjector {
async projectMonthlyBudget(
userId: string,
monthlyBudgetUSD: number
): Promise<BudgetProjection> {
const now = new Date();
const monthStart = new Date(now.getFullYear(), now.getMonth(), 1);
const daysElapsed = Math.ceil(
(now.getTime() - monthStart.getTime()) / (1000 * 60 * 60 * 24)
);
// Query actual spend from database
const currentSpendUSD = await this.getMonthlySpend(userId);
const dailyAverageUSD = currentSpendUSD / daysElapsed;
const daysInMonth = new Date(
now.getFullYear(),
now.getMonth() + 1,
0
).getDate();
const daysRemaining = daysInMonth - daysElapsed;
const projectedMonthEndUSD = currentSpendUSD + dailyAverageUSD * daysRemaining;
const willExceedBudget = projectedMonthEndUSD > monthlyBudgetUSD;
const daysUntilBudgetExceeded = willExceedBudget
? Math.floor((monthlyBudgetUSD - currentSpendUSD) / dailyAverageUSD)
: null;
return {
currentSpendUSD,
projectedMonthEndUSD,
daysRemaining,
dailyAverageUSD,
willExceedBudget,
daysUntilBudgetExceeded,
};
}
private async getMonthlySpend(userId: string): Promise<number> {
// Query database for month-to-date spend
return 1234.56;
}
}
export { BudgetProjector, BudgetProjection };
Anomaly Detection: 10x Spend Alert
Detect unusual spikes in spending:
interface SpendAnomaly {
detected: boolean;
currentDailySpendUSD: number;
baselineDailySpendUSD: number;
multiplier: number;
recommendation: string;
}
class CostAnomalyDetector {
private readonly ANOMALY_THRESHOLD = 10; // Alert if 10x normal
async detectAnomalies(userId: string): Promise<SpendAnomaly> {
const last7Days = await this.getLast7DaysSpend(userId);
const baselineDaily = last7Days.reduce((a, b) => a + b, 0) / 7;
const today = await this.getTodaysSpend(userId);
const multiplier = today / baselineDaily;
const detected = multiplier > this.ANOMALY_THRESHOLD;
let recommendation = "";
if (multiplier > 5) {
recommendation =
"Elevated spend detected. Review recent feature changes.";
}
if (multiplier > 10) {
recommendation =
"CRITICAL: 10x normal spend. Consider limiting LLM usage.";
}
return {
detected,
currentDailySpendUSD: today,
baselineDailySpendUSD: baselineDaily,
multiplier,
recommendation,
};
}
private async getLast7DaysSpend(userId: string): Promise<number[]> {
// Query database for last 7 days
return [100, 95, 102, 98, 101, 99, 97];
}
private async getTodaysSpend(userId: string): Promise<number> {
// Query database for today''s spend
return 1050;
}
}
export { CostAnomalyDetector, SpendAnomaly };
Conclusion
Cost visibility transforms LLM profitability. Track per-request costs, attribute them to features and users, use Redis for real-time aggregation, project monthly budgets, and alert on anomalies.
With this infrastructure, you''ll spot cost problems within hours, not after the bill arrives. More importantly, you''ll know exactly which features are worth investing in based on ROI.