Published on

LLM Function Calling in Production — Tool Design, Parallel Calls, and Error Recovery

Authors

Introduction

Function calling (tools) turns LLMs into agents that can take actions. But careless tool design causes hallucinations, infinite loops, and cascade failures. This guide covers production patterns for reliability.

Function/Tool Schema Design

Clear descriptions prevent hallucinations. Vague schemas cause wrong tool invocations.

interface ToolSchema {
  name: string;
  description: string;
  parameters: {
    type: 'object';
    properties: Record<string, any>;
    required: string[];
  };
}

class ToolSchemaBuilder {
  buildSearchTool(): ToolSchema {
    return {
      name: 'search_documents',
      description: 'Search company documents for information about specific topics. Returns relevant sections and their relevance scores.',
      parameters: {
        type: 'object',
        properties: {
          query: {
            type: 'string',
            description: 'The search query (3-50 words). Be specific: instead of "pricing", use "pricing for enterprise plans".'
          },
          document_type: {
            type: 'string',
            enum: ['technical_docs', 'pricing', 'api_reference', 'blog'],
            description: 'Restrict search to specific document types for faster results'
          },
          max_results: {
            type: 'number',
            description: 'Maximum number of results to return (1-10). Default 5.',
            default: 5
          }
        },
        required: ['query']
      }
    };
  }

  buildDatabaseQueryTool(): ToolSchema {
    return {
      name: 'query_database',
      description: 'Execute read-only SQL SELECT queries. Cannot INSERT, UPDATE, or DELETE.',
      parameters: {
        type: 'object',
        properties: {
          query: {
            type: 'string',
            description: 'Valid SQL SELECT query. Must include table and column names exactly.'
          },
          timeout_seconds: {
            type: 'number',
            description: 'Query timeout in seconds (5-60). Slow queries auto-terminate.',
            default: 30
          }
        },
        required: ['query']
      }
    };
  }

  buildSlackTool(): ToolSchema {
    return {
      name: 'post_to_slack',
      description: 'Post a message to a Slack channel. Only post actionable summaries, not raw data dumps.',
      parameters: {
        type: 'object',
        properties: {
          channel: {
            type: 'string',
            description: 'Target channel (e.g., #engineering, #alerts). Must exist.',
            pattern: '^#[a-z-]+$'
          },
          message: {
            type: 'string',
            description: 'Message content (max 2000 chars). Use plain text or markdown, not raw JSON.',
            maxLength: 2000
          },
          thread_ts: {
            type: 'string',
            description: 'Optional: post as thread reply using parent message timestamp'
          }
        },
        required: ['channel', 'message']
      }
    };
  }
}

Parallel Tool Calls

Execute independent tools simultaneously to reduce latency.

interface ToolCall {
  id: string;
  name: string;
  arguments: Record<string, any>;
}

interface ToolResult {
  call_id: string;
  name: string;
  result: any;
  duration_ms: number;
}

class ParallelToolExecutor {
  private tools: Record<string, (args: any) => Promise<any>> = {
    search_documents: async (args) => {
      // Mock implementation
      return { results: [], count: 0 };
    },
    query_database: async (args) => {
      // Mock implementation
      return { rows: [] };
    }
  };

  async executeParallel(calls: ToolCall[]): Promise<ToolResult[]> {
    const promises = calls.map(async (call) => {
      const startTime = Date.now();

      try {
        const tool = this.tools[call.name];
        if (!tool) throw new Error(`Unknown tool: ${call.name}`);

        const result = await tool(call.arguments);

        return {
          call_id: call.id,
          name: call.name,
          result,
          duration_ms: Date.now() - startTime
        };
      } catch (error) {
        return {
          call_id: call.id,
          name: call.name,
          result: { error: (error as Error).message },
          duration_ms: Date.now() - startTime
        };
      }
    });

    return Promise.all(promises);
  }

  async executeWithTimeout(
    calls: ToolCall[],
    timeoutMs: number = 30000
  ): Promise<ToolResult[]> {
    return Promise.race([
      this.executeParallel(calls),
      new Promise<ToolResult[]>((_, reject) =>
        setTimeout(
          () => reject(new Error(`Tool execution timed out after ${timeoutMs}ms`)),
          timeoutMs
        )
      )
    ]);
  }
}

Tool Call Result Injection

Feed tool results back to the LLM for agentic loops.

interface Message {
  role: 'user' | 'assistant' | 'tool';
  content?: string;
  tool_call_id?: string;
  tool_use_id?: string;
  name?: string;
}

class ToolResultInjector {
  injectResults(
    originalMessages: Message[],
    toolResults: ToolResult[]
  ): Message[] {
    const enhanced = [...originalMessages];

    // Add tool results as new messages
    for (const result of toolResults) {
      enhanced.push({
        role: 'tool',
        tool_call_id: result.call_id,
        name: result.name,
        content: JSON.stringify(result.result)
      });
    }

    return enhanced;
  }

  formatResultsForLLM(results: ToolResult[]): string {
    return results
      .map(
        r =>
          `Tool: ${r.name}\nDuration: ${r.duration_ms}ms\nResult:\n${JSON.stringify(r.result, null, 2)}`
      )
      .join('\n\n---\n\n');
  }
}

Recursive Tool Use Loop

Implement agentic loops: LLM calls tools → results fed back → repeat until done.

interface AgentLoopConfig {
  max_iterations: number;
  timeout_ms: number;
  verbose: boolean;
}

class RecursiveToolUseLoop {
  private executor = new ParallelToolExecutor();
  private injector = new ToolResultInjector();

  async runLoop(
    systemPrompt: string,
    userQuery: string,
    tools: ToolSchema[],
    client: any,
    config: AgentLoopConfig = { max_iterations: 10, timeout_ms: 60000, verbose: false }
  ): Promise<string> {
    const startTime = Date.now();
    let iteration = 0;
    const messages: Message[] = [
      { role: 'user', content: userQuery }
    ];

    while (iteration < config.max_iterations) {
      if (Date.now() - startTime > config.timeout_ms) {
        throw new Error('Agent loop timed out');
      }

      iteration++;
      if (config.verbose) {
        console.log(`Iteration ${iteration}`);
      }

      // Get next action from LLM
      const response = await client.chat.completions.create({
        model: 'gpt-4o',
        system: systemPrompt,
        messages,
        tools: tools.map(t => ({ type: 'function', function: t })),
        tool_choice: 'auto'
      });

      const choice = response.choices[0];

      // Check if done
      if (choice.finish_reason === 'end_turn' || !choice.message.tool_calls) {
        messages.push({ role: 'assistant', content: choice.message.content || '' });
        return choice.message.content || '';
      }

      // Execute tools
      messages.push({ role: 'assistant', content: choice.message.content });

      const toolCalls: ToolCall[] = choice.message.tool_calls.map((call: any) => ({
        id: call.id,
        name: call.function.name,
        arguments: JSON.parse(call.function.arguments)
      }));

      const results = await this.executor.executeParallel(toolCalls);
      const enhancedMessages = this.injector.injectResults(messages, results);
      messages.splice(0, messages.length, ...enhancedMessages);

      if (config.verbose) {
        console.log(`Executed ${toolCalls.length} tools, received results`);
      }
    }

    throw new Error(`Agent loop exceeded max iterations (${config.max_iterations})`);
  }
}

Error Handling When Tool Fails

Graceful degradation when tools fail instead of crashing the agent.

interface ToolFailurePolicy {
  retry_count: number;
  fallback_response?: string;
  escalate: boolean;
}

class ToolErrorHandler {
  private policies: Record<string, ToolFailurePolicy> = {
    search_documents: { retry_count: 1, escalate: false },
    query_database: { retry_count: 0, escalate: true },
    post_to_slack: { retry_count: 1, fallback_response: 'Could not post to Slack' }
  };

  async executeWithErrorHandling(
    call: ToolCall,
    executor: ParallelToolExecutor,
    retryCount: number = 0
  ): Promise<ToolResult | null> {
    try {
      const results = await executor.executeParallel([call]);
      const result = results[0];

      if (result.result.error) {
        return this.handleToolError(call.name, result.result.error, retryCount);
      }

      return result;
    } catch (error) {
      return this.handleToolError(call.name, (error as Error).message, retryCount);
    }
  }

  private async handleToolError(
    toolName: string,
    error: string,
    retryCount: number
  ): Promise<ToolResult | null> {
    const policy = this.policies[toolName];
    if (!policy) return null;

    if (retryCount < policy.retry_count) {
      console.log(`Retrying ${toolName} (attempt ${retryCount + 1})`);
      await new Promise(resolve => setTimeout(resolve, 1000 * (retryCount + 1)));
      // Would retry here
      return null;
    }

    if (policy.escalate) {
      throw new Error(`Tool ${toolName} failed: ${error}`);
    }

    return {
      call_id: `error-${toolName}`,
      name: toolName,
      result: { error, fallback: policy.fallback_response },
      duration_ms: 0
    };
  }
}

Tool Call Validation

Validate tool calls before execution to prevent injection attacks.

class ToolCallValidator {
  validateCall(call: ToolCall, schema: ToolSchema): { valid: boolean; errors: string[] } {
    const errors: string[] = [];

    // Check required parameters
    for (const required of schema.parameters.required) {
      if (!(required in call.arguments)) {
        errors.push(`Missing required parameter: ${required}`);
      }
    }

    // Validate parameter types
    for (const [param, value] of Object.entries(call.arguments)) {
      const paramSchema = schema.parameters.properties[param];
      if (!paramSchema) {
        errors.push(`Unknown parameter: ${param}`);
        continue;
      }

      if (paramSchema.type === 'string' && typeof value !== 'string') {
        errors.push(`Parameter ${param} must be a string`);
      }

      if (paramSchema.type === 'number' && typeof value !== 'number') {
        errors.push(`Parameter ${param} must be a number`);
      }

      // Validate enums
      if (paramSchema.enum && !paramSchema.enum.includes(value)) {
        errors.push(`Parameter ${param} must be one of: ${paramSchema.enum.join(', ')}`);
      }

      // Validate regex patterns
      if (paramSchema.pattern) {
        const regex = new RegExp(paramSchema.pattern);
        if (!regex.test(value as string)) {
          errors.push(`Parameter ${param} does not match pattern: ${paramSchema.pattern}`);
        }
      }
    }

    return {
      valid: errors.length === 0,
      errors
    };
  }
}

Preventing Tool Abuse

Rate limit and monitor tool usage to prevent malicious loops.

interface ToolUsageMetrics {
  tool_name: string;
  call_count: number;
  error_count: number;
  last_call_time: Date;
}

class ToolAbusePreventionManager {
  private metrics: Map<string, ToolUsageMetrics> = new Map();
  private rateLimits: Record<string, { calls_per_minute: number }> = {
    query_database: { calls_per_minute: 10 },
    search_documents: { calls_per_minute: 30 },
    post_to_slack: { calls_per_minute: 5 }
  };

  canExecuteTool(toolName: string): boolean {
    const limit = this.rateLimits[toolName];
    if (!limit) return true;

    const metrics = this.metrics.get(toolName) || {
      tool_name: toolName,
      call_count: 0,
      error_count: 0,
      last_call_time: new Date()
    };

    const timeSinceLastCall = Date.now() - metrics.last_call_time.getTime();
    const callsPerSecond = limit.calls_per_minute / 60;

    // Reset counter every minute
    if (timeSinceLastCall > 60000) {
      metrics.call_count = 0;
    }

    if (metrics.call_count >= limit.calls_per_minute) {
      console.warn(`Tool ${toolName} rate limit exceeded`);
      return false;
    }

    return true;
  }

  recordCall(toolName: string, success: boolean): void {
    const metrics = this.metrics.get(toolName) || {
      tool_name: toolName,
      call_count: 0,
      error_count: 0,
      last_call_time: new Date()
    };

    metrics.call_count++;
    if (!success) metrics.error_count++;
    metrics.last_call_time = new Date();

    this.metrics.set(toolName, metrics);
  }

  detectAbusePattern(toolName: string): boolean {
    const metrics = this.metrics.get(toolName);
    if (!metrics) return false;

    // Flag if error rate > 50%
    if (metrics.call_count > 5 && metrics.error_count / metrics.call_count > 0.5) {
      return true;
    }

    return false;
  }
}

Tool Call Logging for Debugging

Comprehensive logging for debugging agent behavior.

interface ToolCallLog {
  timestamp: Date;
  tool_name: string;
  arguments: Record<string, any>;
  result: any;
  duration_ms: number;
  error?: string;
  iteration: number;
}

class ToolCallLogger {
  private logs: ToolCallLog[] = [];

  logCall(
    toolName: string,
    args: Record<string, any>,
    result: any,
    durationMs: number,
    iteration: number,
    error?: string
  ): void {
    this.logs.push({
      timestamp: new Date(),
      tool_name: toolName,
      arguments: args,
      result,
      duration_ms: durationMs,
      error,
      iteration
    });
  }

  exportForAnalysis(): string {
    return JSON.stringify(this.logs, null, 2);
  }

  summarizeToolUsage(): Record<string, any> {
    const summary: Record<string, any> = {};

    for (const log of this.logs) {
      if (!summary[log.tool_name]) {
        summary[log.tool_name] = {
          count: 0,
          errors: 0,
          avg_duration_ms: 0,
          total_duration_ms: 0
        };
      }

      summary[log.tool_name].count++;
      summary[log.tool_name].total_duration_ms += log.duration_ms;
      if (log.error) summary[log.tool_name].errors++;
    }

    // Calculate averages
    for (const toolName in summary) {
      const stats = summary[toolName];
      stats.avg_duration_ms = stats.total_duration_ms / stats.count;
    }

    return summary;
  }
}

Checklist

  • Write detailed tool descriptions (50+ words) to reduce hallucinations
  • Include examples and anti-patterns in schema descriptions
  • Execute independent tools in parallel using Promise.all()
  • Implement retries with exponential backoff for transient failures
  • Always validate tool arguments before execution
  • Feed tool results back as tool messages, not system messages
  • Implement max iteration limits to prevent infinite loops
  • Rate limit tool calls to prevent abuse
  • Log all tool calls with arguments and results for debugging
  • Monitor error rates per tool and escalate or fallback gracefully

Conclusion

Tool calling transforms LLMs from text generators into action takers. Success depends on clear schemas, robust error handling, and proper result injection. Build the scaffolding once, and autonomous agents follow naturally.