Published on

Building a Conversational AI Backend — Context Management, Memory, and Multi-Turn Handling

Authors

Introduction

Conversational AI requires careful state management across multiple turns. This guide covers session storage, context windowing, intent detection, and handoff to human agents in production systems.

Conversation State Storage with Redis

Store conversation state with automatic expiration:

interface Message {
  role: 'user' | 'assistant' | 'system';
  content: string;
  timestamp: number;
  metadata?: { intent?: string; entities?: string[] };
}

interface ConversationSession {
  sessionId: string;
  userId: string;
  messages: Message[];
  createdAt: number;
  lastUpdatedAt: number;
  metadata: {
    topic?: string;
    sentiment?: 'positive' | 'negative' | 'neutral';
    escalated?: boolean;
  };
}

async function saveConversationToRedis(
  session: ConversationSession,
  ttlSeconds: number = 86400 // 24 hours
): Promise<void> {
  // In production: use Redis client
  const redisKey = `conversation:${session.sessionId}`;
  const serialized = JSON.stringify(session);

  // Example Redis command:
  // redis.setex(redisKey, ttlSeconds, serialized);

  console.log(`Saved session ${session.sessionId} with TTL ${ttlSeconds}s`);
}

async function loadConversationFromRedis(
  sessionId: string
): Promise<ConversationSession | null> {
  const redisKey = `conversation:${sessionId}`;

  // Example Redis command:
  // const data = await redis.get(redisKey);

  // In production:
  // if (!data) return null;
  // return JSON.parse(data);

  return null; // Placeholder
}

async function addMessageToSession(
  sessionId: string,
  message: Message
): Promise<void> {
  const session = await loadConversationFromRedis(sessionId);
  if (!session) return;

  session.messages.push(message);
  session.lastUpdatedAt = Date.now();

  // Update in Redis
  await saveConversationToRedis(session);
}

// TTL strategy:
// Active session: 24 hours
// Idle &gt;1 hour: 4 hours
// Escalated to human: 7 days for reference

Context Window Management

LLMs have finite context. Summarize old messages intelligently:

interface ContextWindow {
  systemPrompt: string;
  recentMessages: Message[];
  summary?: string;
  maxTokens: number;
  currentTokens: number;
}

function estimateTokens(text: string): number {
  // Rough estimate: 1 token per 4 characters
  // In production, use tiktoken or similar
  return Math.ceil(text.length / 4);
}

function buildContextWindow(
  session: ConversationSession,
  maxTokens: number = 4000
): ContextWindow {
  const systemPrompt = `You are a helpful customer support assistant.
Be concise, accurate, and professional.
If you don''t know something, say so.`;

  let tokensUsed = estimateTokens(systemPrompt);
  const recentMessages: Message[] = [];

  // Work backwards from most recent
  for (let i = session.messages.length - 1; i &gt;= 0; i--) {
    const msg = session.messages[i];
    const msgTokens = estimateTokens(msg.content);

    if (tokensUsed + msgTokens &lt; maxTokens) {
      recentMessages.unshift(msg);
      tokensUsed += msgTokens;
    } else {
      break;
    }
  }

  // If too many messages excluded, summarize older ones
  let summary: string | undefined;
  const excludedMessages = session.messages.slice(
    0,
    session.messages.length - recentMessages.length
  );

  if (excludedMessages.length &gt; 0) {
    summary = summarizeMessages(excludedMessages);
  }

  return {
    systemPrompt,
    recentMessages,
    summary,
    maxTokens,
    currentTokens: tokensUsed
  };
}

function summarizeMessages(messages: Message[]): string {
  // Summarize conversation history
  const topics = new Set<string>();
  let resolution = '';

  messages.forEach(msg => {
    if (msg.metadata?.intent) {
      topics.add(msg.metadata.intent);
    }
  });

  return `Previous conversation covered: ${Array.from(topics).join(', ')}`;
}

async function buildPromptWithContext(
  context: ContextWindow
): Promise<{ messages: Message[]; tokenBudget: number }> {
  const systemMsg: Message = {
    role: 'system',
    content: context.systemPrompt,
    timestamp: Date.now()
  };

  const summaryMsg: Message | undefined = context.summary
    ? {
        role: 'system',
        content: `Summary of previous discussion: ${context.summary}`,
        timestamp: Date.now()
      }
    : undefined;

  const allMessages: Message[] = [
    systemMsg,
    ...(summaryMsg ? [summaryMsg] : []),
    ...context.recentMessages
  ];

  const remainingTokens = context.maxTokens - context.currentTokens;

  return {
    messages: allMessages,
    tokenBudget: Math.max(100, remainingTokens) // Reserve at least 100 for response
  };
}

// Result: Fits all recent messages + summary of old context
// Allows 100+ turn conversations

Session Management and User Identity

interface SessionMetadata {
  sessionId: string;
  userId: string;
  userTier: 'free' | 'pro' | 'enterprise';
  authToken?: string;
  ipAddress: string;
  userAgent: string;
  createdAt: number;
}

async function createSession(
  userId: string,
  userTier: string,
  ipAddress: string,
  userAgent: string
): Promise<string> {
  const sessionId = generateSessionId();

  const metadata: SessionMetadata = {
    sessionId,
    userId,
    userTier: userTier as any,
    ipAddress,
    userAgent,
    createdAt: Date.now()
  };

  // Store in Redis with 24h TTL
  await saveSessionMetadata(sessionId, metadata);

  return sessionId;
}

async function validateSessionToken(
  sessionId: string,
  token: string
): Promise<boolean> {
  const metadata = await loadSessionMetadata(sessionId);
  if (!metadata) return false;

  // Check token matches and session not expired
  const age = Date.now() - metadata.createdAt;
  const maxAge = 86400000; // 24 hours

  return metadata.authToken === token && age &lt; maxAge;
}

async function enrichSessionWithUser(
  sessionId: string
): Promise<{ userId: string; userTier: string }> {
  const metadata = await loadSessionMetadata(sessionId);
  if (!metadata) throw new Error('Session not found');

  // Load user preferences from database
  const userPrefs = await getUserPreferences(metadata.userId);

  return {
    userId: metadata.userId,
    userTier: metadata.userTier
  };
}

function generateSessionId(): string {
  return `sess_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
}

async function saveSessionMetadata(
  sessionId: string,
  metadata: SessionMetadata
): Promise<void> {
  // Redis: redis.setex(`session:${sessionId}`, 86400, JSON.stringify(metadata));
}

async function loadSessionMetadata(
  sessionId: string
): Promise<SessionMetadata | null> {
  // Redis: return JSON.parse(await redis.get(`session:${sessionId}`));
  return null;
}

async function getUserPreferences(
  userId: string
): Promise<{ language: string; timezone: string }> {
  return { language: 'en', timezone: 'UTC' };
}

Multi-Turn Context Injection Strategy

Inject relevant context at each turn to improve coherence:

interface TurnContext {
  sessionId: string;
  turnNumber: number;
  userMessage: string;
  previousResponses: Message[];
  detectedIntent: string;
  entities: Array<{ type: string; value: string }>;
}

async function processTurn(
  context: TurnContext
): Promise<{ response: string; intent: string; entities: string[] }> {
  // Build context window
  const session = await loadConversationFromRedis(context.sessionId);
  if (!session) throw new Error('Session expired');

  const contextWindow = buildContextWindow(session);
  const { messages: prompts, tokenBudget } = await buildPromptWithContext(
    contextWindow
  );

  // Add user message
  const userMsg: Message = {
    role: 'user',
    content: context.userMessage,
    timestamp: Date.now(),
    metadata: {
      intent: context.detectedIntent,
      entities: context.entities.map(e => `${e.type}:${e.value}`)
    }
  };

  prompts.push(userMsg);

  // Get response
  const response = await fetch('https://api.openai.com/v1/chat/completions', {
    method: 'POST',
    headers: {
      'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`,
      'Content-Type': 'application/json'
    },
    body: JSON.stringify({
      model: 'gpt-4-turbo',
      messages: prompts,
      temperature: 0.7,
      max_tokens: tokenBudget,
      top_p: 0.95
    })
  });

  const data = await response.json();
  const responseText = data.choices[0].message.content;

  // Detect intent in response
  const responseIntent = await detectIntent(responseText);

  // Save both messages
  await addMessageToSession(context.sessionId, userMsg);
  await addMessageToSession(context.sessionId, {
    role: 'assistant',
    content: responseText,
    timestamp: Date.now(),
    metadata: { intent: responseIntent }
  });

  return {
    response: responseText,
    intent: responseIntent,
    entities: context.entities.map(e => e.value)
  };
}

async function detectIntent(text: string): Promise<string> {
  // Use LLM or intent classifier
  const response = await fetch('https://api.openai.com/v1/chat/completions', {
    method: 'POST',
    headers: {
      'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`,
      'Content-Type': 'application/json'
    },
    body: JSON.stringify({
      model: 'gpt-3.5-turbo',
      messages: [
        {
          role: 'user',
          content: `Classify intent: greeting, question, request, complaint, other. Text: "${text}"`
        }
      ],
      temperature: 0,
      max_tokens: 10
    })
  });

  const data = await response.json();
  return data.choices[0].message.content.trim().toLowerCase();
}

Topic Tracking Across Turns

Maintain conversation topic across multiple turns:

interface Topic {
  name: string;
  keywords: string[];
  startTurn: number;
  endTurn?: number;
  resolved: boolean;
}

interface TopicTracker {
  currentTopics: Topic[];
  completedTopics: Topic[];
}

async function trackTopicTransition(
  userMessage: string,
  currentTopics: Topic[]
): Promise<{
  continuingTopic?: string;
  newTopic?: string;
  topicEnded?: boolean;
}> {
  // Analyze if user is continuing same topic or switching
  const response = await fetch('https://api.openai.com/v1/chat/completions', {
    method: 'POST',
    headers: {
      'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`,
      'Content-Type': 'application/json'
    },
    body: JSON.stringify({
      model: 'gpt-4',
      messages: [
        {
          role: 'user',
          content: `Current topics: ${currentTopics.map(t => t.name).join(', ')}.
User: "${userMessage}"
Does user continue a topic, start new one, or end current?
Response format: { continuing: string or null, new: string or null, ended: boolean }`
        }
      ],
      temperature: 0
    })
  });

  const data = await response.json();
  return JSON.parse(data.choices[0].message.content);
}

function updateTopicTracker(
  tracker: TopicTracker,
  userMessage: string,
  transition: {
    continuingTopic?: string;
    newTopic?: string;
    topicEnded?: boolean;
  }
): TopicTracker {
  if (transition.topicEnded && tracker.currentTopics.length &gt; 0) {
    const lastTopic = tracker.currentTopics.pop()!;
    lastTopic.resolved = true;
    tracker.completedTopics.push(lastTopic);
  }

  if (transition.newTopic) {
    tracker.currentTopics.push({
      name: transition.newTopic,
      keywords: extractKeywords(userMessage),
      startTurn: 0, // Would be actual turn number
      resolved: false
    });
  }

  return tracker;
}

function extractKeywords(text: string): string[] {
  // Simplified: split and filter common words
  return text
    .toLowerCase()
    .split(/\s+/)
    .filter(w => w.length &gt; 3);
}

// Tracks: Billing issue &rarr; Shipping delay &rarr; Back to billing
// Recognizes topic transitions
// Can summarize each topic independently

Intent Detection Per Message

interface IntentClassification {
  primaryIntent: string;
  confidence: number;
  secondaryIntents: Array<{ intent: string; confidence: number }>;
  requiresEscalation: boolean;
}

async function classifyMessageIntent(
  text: string,
  conversationHistory?: Message[]
): Promise<IntentClassification> {
  const historyContext = conversationHistory
    ? `Previous context: ${conversationHistory
        .slice(-3)
        .map(m => `${m.role}: ${m.content}`)
        .join('\n')}`
    : '';

  const response = await fetch('https://api.openai.com/v1/chat/completions', {
    method: 'POST',
    headers: {
      'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`,
      'Content-Type': 'application/json'
    },
    body: JSON.stringify({
      model: 'gpt-4',
      messages: [
        {
          role: 'user',
          content: `${historyContext}
Classify intent and urgency.
Message: "${text}"
Response: { primary: string, confidence: 0-1, secondary: [{intent, confidence}], escalate: boolean }`
        }
      ],
      temperature: 0
    })
  });

  const data = await response.json();
  const parsed = JSON.parse(data.choices[0].message.content);

  return {
    primaryIntent: parsed.primary,
    confidence: parsed.confidence,
    secondaryIntents: parsed.secondary || [],
    requiresEscalation: parsed.escalate || false
  };
}

// Intents:
// - greeting, farewell
// - question, clarification
// - complaint, praise
// - request for action
// - escalation request

Slot Filling for Structured Conversations

Extract required information across multiple turns:

interface Slot {
  name: string;
  type: 'text' | 'number' | 'date' | 'email' | 'phone' | 'choice';
  required: boolean;
  value?: string;
  extractedAt?: number;
}

interface DialogueFlow {
  name: string;
  slots: Slot[];
  filledSlots: Set<string>;
  completedAt?: number;
}

async function fillSlots(
  userMessage: string,
  flow: DialogueFlow
): Promise<{ filledSlots: Slot[]; nextSlot?: Slot }> {
  // Try to extract values for unfilled slots
  const unfilledSlots = flow.slots.filter(
    s => !flow.filledSlots.has(s.name)
  );

  const response = await fetch('https://api.openai.com/v1/chat/completions', {
    method: 'POST',
    headers: {
      'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`,
      'Content-Type': 'application/json'
    },
    body: JSON.stringify({
      model: 'gpt-3.5-turbo',
      messages: [
        {
          role: 'user',
          content: `Extract slot values from user message.
Slots: ${unfilledSlots.map(s => `${s.name} (${s.type})`).join(', ')}
Message: "${userMessage}"
Response: { slots: [{name, value}], confidence: number }`
        }
      ],
      temperature: 0
    })
  });

  const data = await response.json();
  const extracted = JSON.parse(data.choices[0].message.content);

  const filledSlots: Slot[] = [];

  extracted.slots?.forEach((slot: any) => {
    const slotDef = flow.slots.find(s => s.name === slot.name);
    if (slotDef) {
      slotDef.value = slot.value;
      slotDef.extractedAt = Date.now();
      flow.filledSlots.add(slot.name);
      filledSlots.push(slotDef);
    }
  });

  // Find next slot to ask for
  const nextSlot = unfilledSlots.find(s => !flow.filledSlots.has(s.name));

  return { filledSlots, nextSlot };
}

// Example: Booking flow
// Slots: [date, time, partySize, email, phone]
// Asks for each slot, validates format
// Completes when all filled

Handoff to Human Agent

interface HandoffRequest {
  sessionId: string;
  reason: string;
  priority: 'low' | 'medium' | 'high';
  context: {
    messagesSummary: string;
    userTier: string;
    sentiment: string;
  };
}

async function initiateHandoff(
  session: ConversationSession,
  reason: string
): Promise<void> {
  // Save full context for agent
  const summary = await summarizeConversation(session);

  const handoff: HandoffRequest = {
    sessionId: session.sessionId,
    reason,
    priority: determinePriority(reason, session),
    context: {
      messagesSummary: summary,
      userTier: 'pro', // From session metadata
      sentiment: analyzeSentiment(session)
    }
  };

  // Queue for agent assignment
  await enqueueForAgent(handoff);

  // Notify user
  const message: Message = {
    role: 'assistant',
    content: 'Connecting you with a human agent. Please hold...',
    timestamp: Date.now()
  };

  await addMessageToSession(session.sessionId, message);
}

function determinePriority(reason: string, session: ConversationSession): 'low' | 'medium' | 'high' {
  if (reason.includes('angry') || reason.includes('escalation')) {
    return 'high';
  }
  if (session.metadata.sentiment === 'negative') {
    return 'medium';
  }
  return 'low';
}

function analyzeSentiment(session: ConversationSession): string {
  // Analyze most recent messages for sentiment
  const recentContent = session.messages
    .slice(-3)
    .map(m => m.content)
    .join(' ');

  // Simplified: would use actual sentiment model
  if (
    recentContent.includes('frustrated') ||
    recentContent.includes('angry')
  ) {
    return 'negative';
  }
  if (recentContent.includes('happy') || recentContent.includes('great')) {
    return 'positive';
  }
  return 'neutral';
}

async function summarizeConversation(
  session: ConversationSession
): Promise<string> {
  const response = await fetch('https://api.openai.com/v1/chat/completions', {
    method: 'POST',
    headers: {
      'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`,
      'Content-Type': 'application/json'
    },
    body: JSON.stringify({
      model: 'gpt-3.5-turbo',
      messages: [
        {
          role: 'user',
          content: `Summarize this conversation in 2-3 sentences for agent handoff.
${session.messages.map(m => `${m.role}: ${m.content}`).join('\n')}`
        }
      ],
      temperature: 0.5
    })
  });

  const data = await response.json();
  return data.choices[0].message.content;
}

async function enqueueForAgent(request: HandoffRequest): Promise<void> {
  // Store in queue service (SQS, Redis, database)
  // Agent assignment service picks it up
  console.log(`Queued for agent: ${request.sessionId} (${request.priority})`);
}

Conversation Analytics

interface ConversationAnalytics {
  totalTurns: number;
  averageTurnTime: number; // ms
  topIntents: Array<{ intent: string; count: number }>;
  escalationRate: number;
  resolutionRate: number;
  userSatisfaction: number; // 1-5
}

async function analyzeConversation(
  session: ConversationSession
): Promise<ConversationAnalytics> {
  const intents: { [key: string]: number } = {};

  session.messages.forEach(msg => {
    if (msg.metadata?.intent) {
      intents[msg.metadata.intent] =
        (intents[msg.metadata.intent] || 0) + 1;
    }
  });

  const topIntents = Object.entries(intents)
    .map(([intent, count]) => ({ intent, count }))
    .sort((a, b) => b.count - a.count)
    .slice(0, 5);

  // Calculate turn times
  let totalTurnTime = 0;
  for (let i = 1; i &lt; session.messages.length; i++) {
    totalTurnTime +=
      session.messages[i].timestamp - session.messages[i - 1].timestamp;
  }

  const avgTurnTime = totalTurnTime / (session.messages.length - 1);

  return {
    totalTurns: session.messages.length,
    averageTurnTime: Math.round(avgTurnTime),
    topIntents,
    escalationRate: session.metadata.escalated ? 1 : 0,
    resolutionRate: 0.95, // Would measure from feedback
    userSatisfaction: 4.2 // Would get from survey
  };
}

Checklist

  • Implemented Redis-based session storage with 24h TTL
  • Built context window manager that summarizes old messages
  • Created session authentication and user enrichment
  • Developed multi-turn context injection strategy
  • Implemented topic tracking across conversation turns
  • Built intent classifier for each user message
  • Created slot-filling system for structured conversations
  • Set up escalation criteria and handoff queue
  • Built conversation summarization for agent handoff
  • Implemented sentiment analysis for prioritization
  • Created analytics dashboard for conversation metrics

Conclusion

Production conversational AI requires stateful architecture with Redis caching, careful context management, and seamless human handoff. Track topics, intents, and sentiment across turns. Target <500ms response time, 95%+ first-contact resolution, and smooth escalation when needed.