Anthropic Claude API Complete Guide 2026: Build with Claude Opus, Sonnet & Haiku

Anthropic Claude API 2026: The Developer Guide

Claude is Anthropic's family of AI models — Opus for the most complex tasks, Sonnet for the best balance of speed and intelligence, and Haiku for fast, cost-effective applications.

Setup and Installation
Basic Text Generation
Streaming Responses
Vision: Analyzing Images
Tool Use (Function Calling)
Prompt Caching (Save Costs)
Extended Thinking (claude-3-7-sonnet)
Model Selection Guide
TypeScript / JavaScript SDK
Production Best Practices

Setup and Installation

pip install anthropic

import anthropic

client = anthropic.Anthropic(api_key="your-api-key")
# Or set ANTHROPIC_API_KEY environment variable

Basic Text Generation

# Simple message
message = client.messages.create(
    model="claude-opus-4-6",  # or claude-sonnet-4-6, claude-haiku-4-5-20251001
    max_tokens=1024,
    messages=[
        {"role": "user", "content": "Explain quantum computing in simple terms"}
    ]
)
print(message.content[0].text)

# With system prompt
message = client.messages.create(
    model="claude-sonnet-4-6",
    max_tokens=2048,
    system="You are an expert Python developer. Always include working code examples.",
    messages=[
        {"role": "user", "content": "How do I implement a binary search tree in Python?"}
    ]
)

# Multi-turn conversation
messages = [
    {"role": "user", "content": "What is a REST API?"},
    {"role": "assistant", "content": "A REST API is an architectural style for distributed hypermedia systems..."},
    {"role": "user", "content": "Show me a Python example of calling one"},
]

response = client.messages.create(
    model="claude-sonnet-4-6",
    max_tokens=1024,
    messages=messages,
)

Streaming Responses

# Stream for real-time output
with client.messages.stream(
    model="claude-sonnet-4-6",
    max_tokens=1024,
    messages=[{"role": "user", "content": "Write a detailed explanation of neural networks"}],
) as stream:
    for text in stream.text_stream:
        print(text, end="", flush=True)

print()  # New line after stream

# Async streaming with FastAPI
from anthropic import AsyncAnthropic
from fastapi import FastAPI
from fastapi.responses import StreamingResponse

async_client = AsyncAnthropic()
app = FastAPI()

@app.post("/chat")
async def chat(question: str):
    async def generate():
        async with async_client.messages.stream(
            model="claude-sonnet-4-6",
            max_tokens=1024,
            messages=[{"role": "user", "content": question}],
        ) as stream:
            async for text in stream.text_stream:
                yield f"data: {text}\n\n"

    return StreamingResponse(generate(), media_type="text/event-stream")

Vision: Analyzing Images

import base64
import httpx

# From URL
message = client.messages.create(
    model="claude-sonnet-4-6",
    max_tokens=1024,
    messages=[{
        "role": "user",
        "content": [
            {
                "type": "image",
                "source": {
                    "type": "url",
                    "url": "https://example.com/chart.png",
                },
            },
            {"type": "text", "text": "Describe this chart and extract all data points"},
        ],
    }],
)

# From local file
def analyze_local_image(image_path: str, prompt: str) -> str:
    with open(image_path, "rb") as f:
        image_data = base64.standard_b64encode(f.read()).decode("utf-8")

    ext = image_path.split(".")[-1].lower()
    media_type_map = {"jpg": "image/jpeg", "jpeg": "image/jpeg", "png": "image/png", "gif": "image/gif", "webp": "image/webp"}

    message = client.messages.create(
        model="claude-sonnet-4-6",
        max_tokens=1024,
        messages=[{
            "role": "user",
            "content": [
                {
                    "type": "image",
                    "source": {
                        "type": "base64",
                        "media_type": media_type_map.get(ext, "image/jpeg"),
                        "data": image_data,
                    },
                },
                {"type": "text", "text": prompt},
            ],
        }],
    )
    return message.content[0].text

# Analyze multiple images
result = analyze_local_image("screenshot.png", "What bugs do you see in this code?")
print(result)

Tool Use (Function Calling)

import json

# Define tools
tools = [
    {
        "name": "get_weather",
        "description": "Get current weather for a location",
        "input_schema": {
            "type": "object",
            "properties": {
                "location": {"type": "string", "description": "City and country"},
                "units": {"type": "string", "enum": ["celsius", "fahrenheit"], "default": "celsius"},
            },
            "required": ["location"],
        },
    },
    {
        "name": "search_web",
        "description": "Search the web for current information",
        "input_schema": {
            "type": "object",
            "properties": {
                "query": {"type": "string", "description": "Search query"},
            },
            "required": ["query"],
        },
    },
]

def get_weather(location: str, units: str = "celsius") -> dict:
    # Your weather API call here
    return {"location": location, "temp": 22, "condition": "sunny", "units": units}

def search_web(query: str) -> dict:
    # Your search implementation
    return {"results": [f"Result for: {query}"]}

TOOL_FUNCTIONS = {"get_weather": get_weather, "search_web": search_web}

def run_with_tools(user_message: str) -> str:
    messages = [{"role": "user", "content": user_message}]

    while True:
        response = client.messages.create(
            model="claude-sonnet-4-6",
            max_tokens=1024,
            tools=tools,
            messages=messages,
        )

        # If no tool use, we're done
        if response.stop_reason != "tool_use":
            text_blocks = [b.text for b in response.content if b.type == "text"]
            return " ".join(text_blocks)

        # Process tool calls
        messages.append({"role": "assistant", "content": response.content})

        tool_results = []
        for block in response.content:
            if block.type == "tool_use":
                fn = TOOL_FUNCTIONS.get(block.name)
                result = fn(**block.input) if fn else {"error": f"Unknown tool: {block.name}"}
                tool_results.append({
                    "type": "tool_result",
                    "tool_use_id": block.id,
                    "content": json.dumps(result),
                })

        messages.append({"role": "user", "content": tool_results})

answer = run_with_tools("What's the weather like in Mumbai right now?")
print(answer)

Prompt Caching (Save Costs)

# Cache large system prompts or documents
# Saves money when the same context is reused

long_document = "..." * 5000  # Large document

response = client.messages.create(
    model="claude-sonnet-4-6",
    max_tokens=1024,
    system=[
        {
            "type": "text",
            "text": "You are a helpful assistant analyzing the following document.",
        },
        {
            "type": "text",
            "text": long_document,
            "cache_control": {"type": "ephemeral"},  # Cache this block
        },
    ],
    messages=[{"role": "user", "content": "What are the main points of this document?"}],
)

# Usage stats show cache hit/miss
print(response.usage)
# cache_creation_input_tokens: 12000 (first call, paid to cache)
# cache_read_input_tokens: 12000   (subsequent calls, 90% cheaper)

Extended Thinking (claude-3-7-sonnet)

# For complex reasoning tasks
response = client.messages.create(
    model="claude-sonnet-4-6",
    max_tokens=8000,
    thinking={
        "type": "enabled",
        "budget_tokens": 5000,  # How much to "think" before answering
    },
    messages=[{
        "role": "user",
        "content": "Solve: A train leaves Chicago at 60mph. Another leaves NYC at 80mph. They're 790 miles apart. When do they meet?"
    }],
)

for block in response.content:
    if block.type == "thinking":
        print(f"Thinking: {block.thinking[:200]}...")
    elif block.type == "text":
        print(f"Answer: {block.text}")

Model Selection Guide

Use Case	Model	Why
Complex analysis, writing	claude-opus-4-6	Best intelligence
Coding, general tasks	claude-sonnet-4-6	Best balance
High-volume, fast response	claude-haiku-4-5-20251001	Fast + cheap
Mathematical reasoning	claude-sonnet-4-6 + thinking	Extended reasoning
Simple classification	claude-haiku-4-5-20251001	Very cheap

TypeScript / JavaScript SDK

import Anthropic from "@anthropic-ai/sdk";

const client = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY });

// Basic message
const message = await client.messages.create({
  model: "claude-sonnet-4-6",
  max_tokens: 1024,
  messages: [{ role: "user", content: "Hello, Claude!" }],
});

console.log(message.content[0].type === "text" ? message.content[0].text : "");

// Streaming in Node.js
const stream = await client.messages.stream({
  model: "claude-sonnet-4-6",
  max_tokens: 1024,
  messages: [{ role: "user", content: "Tell me a story" }],
});

for await (const event of stream) {
  if (event.type === "content_block_delta" && event.delta.type === "text_delta") {
    process.stdout.write(event.delta.text);
  }
}

Production Best Practices

import anthropic
from tenacity import retry, stop_after_attempt, wait_exponential

@retry(
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=1, min=4, max=10),
    reraise=True,
)
def robust_claude_call(messages: list, system: str = "", model: str = "claude-sonnet-4-6") -> str:
    response = client.messages.create(
        model=model,
        max_tokens=2048,
        system=system,
        messages=messages,
    )
    return response.content[0].text

# Cost tracking
def track_usage(response: anthropic.types.Message, model: str) -> dict:
    COSTS = {
        "claude-opus-4-6": {"input": 15.00, "output": 75.00},    # per 1M tokens
        "claude-sonnet-4-6": {"input": 3.00, "output": 15.00},
        "claude-haiku-4-5-20251001": {"input": 0.25, "output": 1.25},
    }
    rates = COSTS.get(model, COSTS["claude-sonnet-4-6"])
    input_cost = (response.usage.input_tokens / 1_000_000) * rates["input"]
    output_cost = (response.usage.output_tokens / 1_000_000) * rates["output"]
    return {
        "input_tokens": response.usage.input_tokens,
        "output_tokens": response.usage.output_tokens,
        "cost_usd": round(input_cost + output_cost, 6),
    }

Claude's API is one of the best in the industry for reasoning, code generation, and following complex instructions. Start with Sonnet for most tasks — upgrade to Opus when you need the best, switch to Haiku when cost matters most.