Published on

Reliable Structured Output From LLMs — JSON Mode, Zod Validation, and Retry Logic

Authors

Introduction

LLM outputs are probabilistic. They hallucinate, truncate JSON, and violate schemas. This guide covers OpenAI's JSON mode, Anthropic's structured output, Zod validation, and retry strategies that actually work.

OpenAI JSON Mode vs Response Format Schema

OpenAI offers two JSON modes: guaranteed JSON at token level, and enforced schema validation.

import Anthropic from '@anthropic-ai/sdk';
import OpenAI from 'openai';

interface UserProfile {
  name: string;
  age: number;
  email: string;
  interests: string[];
}

class OpenAIJSONModeHandler {
  private openai = new OpenAI();

  // Mode 1: Strict JSON mode (newer)
  async extractWithStrictMode(text: string): Promise<UserProfile> {
    const response = await this.openai.chat.completions.create({
      model: 'gpt-4o',
      messages: [
        {
          role: 'user',
          content: `Extract user profile from this text: ${text}`
        }
      ],
      response_format: {
        type: 'json_schema',
        json_schema: {
          name: 'UserProfile',
          schema: {
            type: 'object',
            properties: {
              name: { type: 'string' },
              age: { type: 'number' },
              email: { type: 'string', format: 'email' },
              interests: { type: 'array', items: { type: 'string' } }
            },
            required: ['name', 'age', 'email', 'interests'],
            additionalProperties: false
          },
          strict: true
        }
      }
    });

    const content = response.choices[0].message.content;
    if (!content) throw new Error('No response from API');

    return JSON.parse(content);
  }

  // Mode 2: Legacy JSON mode (more lenient)
  async extractWithJSONMode(text: string): Promise<UserProfile> {
    const response = await this.openai.chat.completions.create({
      model: 'gpt-4o',
      messages: [
        {
          role: 'user',
          content: `Extract user profile as JSON: ${text}`
        }
      ],
      response_format: { type: 'json_object' }
    });

    const content = response.choices[0].message.content;
    if (!content) throw new Error('No response from API');

    return JSON.parse(content);
  }
}

class AnthropicStructuredOutputHandler {
  private client = new Anthropic();

  async extractWithStructuredOutput(text: string): Promise<UserProfile> {
    const response = await this.client.messages.create({
      model: 'claude-3-5-sonnet-20241022',
      max_tokens: 1024,
      messages: [
        {
          role: 'user',
          content: `Extract user profile: ${text}`
        }
      ]
    });

    // Parse the structured output
    const content = response.content[0];
    if (content.type !== 'text') throw new Error('Unexpected response type');

    return JSON.parse(content.text);
  }
}

Zod Schema Validation

Define runtime-checkable schemas for type safety and validation.

import { z } from 'zod';

// Define Zod schema
const UserProfileSchema = z.object({
  name: z.string().min(1).max(100),
  age: z.number().int().min(0).max(150),
  email: z.string().email(),
  interests: z.array(z.string()).min(1).max(10)
});

type UserProfile = z.infer<typeof UserProfileSchema>;

class ZodValidator {
  validate<T>(data: unknown, schema: z.ZodSchema<T>): { valid: boolean; data?: T; errors?: string[] } {
    try {
      const result = schema.parse(data);
      return { valid: true, data: result };
    } catch (error) {
      if (error instanceof z.ZodError) {
        const errors = error.errors.map(e => `${e.path.join('.')}: ${e.message}`);
        return { valid: false, errors };
      }
      return { valid: false, errors: [(error as Error).message] };
    }
  }

  async parseWithLLM(
    data: string,
    schema: z.ZodSchema,
    client: OpenAI
  ): Promise<any> {
    let parsed: any;

    try {
      // First try direct JSON parse
      parsed = JSON.parse(data);
    } catch {
      // If invalid JSON, ask LLM to fix it
      const fixPrompt = `This JSON is malformed: ${data}\n\nFix it and return valid JSON only.`;
      const fixResponse = await client.chat.completions.create({
        model: 'gpt-4o',
        messages: [{ role: 'user', content: fixPrompt }],
        response_format: { type: 'json_object' }
      });

      parsed = JSON.parse(fixResponse.choices[0].message.content || '{}');
    }

    // Validate against schema
    const result = this.validate(parsed, schema);
    if (!result.valid) {
      throw new Error(`Validation failed: ${result.errors?.join('; ')}`);
    }

    return result.data;
  }
}

Retry on Parse Failure with Error Feedback

When validation fails, feed the error back to the LLM to self-correct.

interface RetryConfig {
  max_attempts: number;
  initial_delay_ms: number;
  backoff_factor: number;
}

class StructuredOutputRetrier {
  async extractWithRetry<T>(
    prompt: string,
    schema: z.ZodSchema<T>,
    client: OpenAI,
    config: RetryConfig = { max_attempts: 3, initial_delay_ms: 500, backoff_factor: 2 }
  ): Promise<T> {
    let lastError: Error | null = null;

    for (let attempt = 0; attempt < config.max_attempts; attempt++) {
      try {
        const response = await client.chat.completions.create({
          model: 'gpt-4o',
          messages: [
            {
              role: 'user',
              content: attempt === 0
                ? prompt
                : `${prompt}\n\nPrevious attempt failed with error: ${lastError?.message}\n\nPlease fix and try again.`
            }
          ],
          response_format: { type: 'json_object' }
        });

        const content = response.choices[0].message.content;
        if (!content) throw new Error('Empty response');

        const parsed = JSON.parse(content);
        const result = schema.safeParse(parsed);

        if (result.success) {
          return result.data;
        }

        lastError = new Error(
          `Validation error: ${result.error.errors.map(e => `${e.path.join('.')}: ${e.message}`).join('; ')}`
        );

        if (attempt < config.max_attempts - 1) {
          const delay = config.initial_delay_ms * Math.pow(config.backoff_factor, attempt);
          await new Promise(resolve => setTimeout(resolve, delay));
        }
      } catch (error) {
        lastError = error as Error;

        if (attempt < config.max_attempts - 1) {
          const delay = config.initial_delay_ms * Math.pow(config.backoff_factor, attempt);
          await new Promise(resolve => setTimeout(resolve, delay));
        }
      }
    }

    throw new Error(`Failed to extract structured output after ${config.max_attempts} attempts: ${lastError?.message}`);
  }
}

Instructor Library Pattern

Use the instructor pattern for seamless structured output with retries.

interface Task {
  id: string;
  title: string;
  priority: 'low' | 'medium' | 'high';
  estimated_hours: number;
  completed: boolean;
}

const TaskSchema = z.object({
  id: z.string().uuid(),
  title: z.string(),
  priority: z.enum(['low', 'medium', 'high']),
  estimated_hours: z.number().positive(),
  completed: z.boolean()
});

class InstructorPattern {
  async extractWithRetry<T>(
    model: string,
    prompt: string,
    schema: z.ZodSchema<T>,
    client: OpenAI
  ): Promise<T> {
    const systemPrompt = `You are an expert at extracting structured information from text.
Always respond with valid JSON matching the requested schema.
If the extraction is impossible, set fields to reasonable defaults.`;

    for (let attempt = 0; attempt < 3; attempt++) {
      const messages = attempt === 0
        ? [
          { role: 'system', content: systemPrompt },
          { role: 'user', content: prompt }
        ]
        : [
          { role: 'system', content: systemPrompt },
          { role: 'user', content: prompt },
          { role: 'assistant', content: 'I will extract the structured data.' },
          { role: 'user', content: 'Please ensure the JSON is valid and matches the schema exactly.' }
        ];

      const response = await client.chat.completions.create({
        model,
        messages: messages as any,
        response_format: { type: 'json_object' },
        temperature: 0 // Deterministic for structured output
      });

      const content = response.choices[0].message.content;
      if (!content) continue;

      try {
        const parsed = JSON.parse(content);
        const result = schema.safeParse(parsed);

        if (result.success) {
          return result.data;
        }
      } catch {
        // Continue to retry
      }
    }

    throw new Error('Failed to extract structured output');
  }
}

Discriminated Union Outputs

Handle multiple output types with discriminated unions for type safety.

const AnalysisResultSchema = z.discriminatedUnion('type', [
  z.object({
    type: z.literal('success'),
    data: z.object({
      sentiment: z.enum(['positive', 'negative', 'neutral']),
      confidence: z.number().min(0).max(1),
      key_phrases: z.array(z.string())
    })
  }),
  z.object({
    type: z.literal('error'),
    error_code: z.string(),
    message: z.string(),
    retry_after_ms: z.number().optional()
  }),
  z.object({
    type: z.literal('partial'),
    data: z.object({
      sentiment: z.enum(['positive', 'negative', 'neutral']).optional(),
      confidence: z.number().optional(),
      key_phrases: z.array(z.string()).optional()
    }),
    incomplete_fields: z.array(z.string())
  })
]);

type AnalysisResult = z.infer<typeof AnalysisResultSchema>;

class DiscriminatedUnionHandler {
  async analyze(text: string, client: OpenAI): Promise<AnalysisResult> {
    const response = await client.chat.completions.create({
      model: 'gpt-4o',
      messages: [
        {
          role: 'user',
          content: `Analyze sentiment of: "${text}". Return JSON with type (success/error/partial) and appropriate fields.`
        }
      ],
      response_format: { type: 'json_object' }
    });

    const content = response.choices[0].message.content;
    if (!content) throw new Error('No response');

    const parsed = JSON.parse(content);
    const result = AnalysisResultSchema.safeParse(parsed);

    if (!result.success) {
      throw new Error(`Invalid response: ${result.error.message}`);
    }

    return result.data;
  }

  handle(result: AnalysisResult): void {
    switch (result.type) {
      case 'success':
        console.log(`Sentiment: ${result.data.sentiment} (${(result.data.confidence * 100).toFixed(1)}%)`);
        break;

      case 'error':
        console.error(`Error: ${result.message} (code: ${result.error_code})`);
        if (result.retry_after_ms) {
          console.log(`Retry after ${result.retry_after_ms}ms`);
        }
        break;

      case 'partial':
        console.log('Partial analysis:', result.data);
        console.log('Incomplete fields:', result.incomplete_fields);
        break;
    }
  }
}

Nested Object Extraction

Handle complex nested structures with proper validation.

const CompanySchema = z.object({
  name: z.string(),
  founded_year: z.number().int(),
  departments: z.array(
    z.object({
      name: z.string(),
      head: z.string(),
      employees: z.number().int(),
      budget_usd: z.number().positive()
    })
  ),
  locations: z.array(
    z.object({
      city: z.string(),
      country: z.string(),
      office_count: z.number().int().optional()
    })
  ),
  financial: z.object({
    annual_revenue_usd: z.number().positive(),
    profit_margin: z.number().min(0).max(1),
    debt_ratio: z.number().min(0).optional()
  })
});

type Company = z.infer<typeof CompanySchema>;

class NestedObjectExtractor {
  async extract(text: string, client: OpenAI): Promise<Company> {
    const prompt = `Extract company information from this text:\n\n${text}\n\nInclude all departments, locations, and financial data in the JSON.`;

    const response = await client.chat.completions.create({
      model: 'gpt-4o',
      messages: [{ role: 'user', content: prompt }],
      response_format: { type: 'json_object' }
    });

    const content = response.choices[0].message.content;
    if (!content) throw new Error('No response');

    const parsed = JSON.parse(content);
    const result = CompanySchema.safeParse(parsed);

    if (!result.success) {
      const errors = result.error.errors.map(e => ({
        path: e.path.join('.'),
        message: e.message,
        received: e.code
      }));

      throw new Error(`Nested validation failed:\n${JSON.stringify(errors, null, 2)}`);
    }

    return result.data;
  }

  validatePartial(partial: unknown): Partial<Company> {
    // Allow partial extraction
    const PartialCompanySchema = CompanySchema.partial();
    const result = PartialCompanySchema.safeParse(partial);

    if (result.success) {
      return result.data;
    }

    throw new Error(`Partial validation failed: ${result.error.message}`);
  }
}

Hallucination in Structured Fields

Detect and mitigate hallucinations in LLM outputs.

class HallucinationDetector {
  // Detect inconsistencies and impossible values
  detectHallucinations(data: any, schema: z.ZodSchema): string[] {
    const issues: string[] = [];

    // Check for inconsistent dates
    if (data.created_at && data.updated_at) {
      const created = new Date(data.created_at);
      const updated = new Date(data.updated_at);
      if (updated < created) {
        issues.push('updated_at is before created_at');
      }
    }

    // Check for impossible numerical values
    if (data.confidence !== undefined && (data.confidence < 0 || data.confidence > 1)) {
      issues.push('Confidence outside 0-1 range');
    }

    if (data.percentage !== undefined && (data.percentage < 0 || data.percentage > 100)) {
      issues.push('Percentage outside 0-100 range');
    }

    // Check for Lorem Ipsum or filler text
    const fillerPatterns = /lorem ipsum|placeholder|dummy|test data|fake|example123/i;
    for (const key in data) {
      if (typeof data[key] === 'string' && fillerPatterns.test(data[key])) {
        issues.push(`Field ${key} contains filler text`);
      }
    }

    // Check for inconsistent arrays
    if (data.items && Array.isArray(data.items)) {
      const uniqueItems = new Set(data.items.map(JSON.stringify));
      if (uniqueItems.size !== data.items.length) {
        issues.push('Array contains duplicate items');
      }
    }

    return issues;
  }

  async detectAndRetry<T>(
    data: any,
    schema: z.ZodSchema<T>,
    client: OpenAI,
    originalPrompt: string
  ): Promise<T> {
    const issues = this.detectHallucinations(data, schema);

    if (issues.length === 0) {
      return schema.parse(data);
    }

    console.warn(`Detected potential hallucinations: ${issues.join('; ')}`);

    // Retry with warning
    const retryPrompt = `${originalPrompt}\n\nPlease ensure the output is realistic and doesn't contain: ${issues.join(', ')}`;

    const response = await client.chat.completions.create({
      model: 'gpt-4o',
      messages: [{ role: 'user', content: retryPrompt }],
      response_format: { type: 'json_object' },
      temperature: 0
    });

    const newData = JSON.parse(response.choices[0].message.content || '{}');
    return schema.parse(newData);
  }
}

Checklist

  • Use OpenAI's json_schema with strict: true for guaranteed schema adherence
  • Define Zod schemas as single source of truth for validation
  • Implement retry logic with exponential backoff on validation failure
  • Feed validation errors back to LLM in retry attempts
  • Use discriminated unions for multiple output types
  • Always validate nested objects recursively
  • Detect hallucinations (impossible dates, out-of-range percentages, filler text)
  • Set temperature to 0 for structured output to improve consistency
  • Test schemas with edge cases and malformed inputs
  • Log failed extractions with full error context for debugging

Conclusion

Structured output reliability depends on schema clarity, rigorous validation, and intelligent retries. Combine JSON mode with Zod validation and error-aware retries, and you've got a system that extracts data reliably at scale.