Anthropic Claude API Complete Guide 2026: Build with Claude Opus, Sonnet & Haiku
Advertisement
Anthropic Claude API 2026: The Developer Guide
Claude is Anthropic's family of AI models — Opus for the most complex tasks, Sonnet for the best balance of speed and intelligence, and Haiku for fast, cost-effective applications.
- Setup and Installation
- Basic Text Generation
- Streaming Responses
- Vision: Analyzing Images
- Tool Use (Function Calling)
- Prompt Caching (Save Costs)
- Extended Thinking (claude-3-7-sonnet)
- Model Selection Guide
- TypeScript / JavaScript SDK
- Production Best Practices
Setup and Installation
pip install anthropic
import anthropic
client = anthropic.Anthropic(api_key="your-api-key")
# Or set ANTHROPIC_API_KEY environment variable
Basic Text Generation
# Simple message
message = client.messages.create(
model="claude-opus-4-6", # or claude-sonnet-4-6, claude-haiku-4-5-20251001
max_tokens=1024,
messages=[
{"role": "user", "content": "Explain quantum computing in simple terms"}
]
)
print(message.content[0].text)
# With system prompt
message = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=2048,
system="You are an expert Python developer. Always include working code examples.",
messages=[
{"role": "user", "content": "How do I implement a binary search tree in Python?"}
]
)
# Multi-turn conversation
messages = [
{"role": "user", "content": "What is a REST API?"},
{"role": "assistant", "content": "A REST API is an architectural style for distributed hypermedia systems..."},
{"role": "user", "content": "Show me a Python example of calling one"},
]
response = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=messages,
)
Streaming Responses
# Stream for real-time output
with client.messages.stream(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=[{"role": "user", "content": "Write a detailed explanation of neural networks"}],
) as stream:
for text in stream.text_stream:
print(text, end="", flush=True)
print() # New line after stream
# Async streaming with FastAPI
from anthropic import AsyncAnthropic
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
async_client = AsyncAnthropic()
app = FastAPI()
@app.post("/chat")
async def chat(question: str):
async def generate():
async with async_client.messages.stream(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=[{"role": "user", "content": question}],
) as stream:
async for text in stream.text_stream:
yield f"data: {text}\n\n"
return StreamingResponse(generate(), media_type="text/event-stream")
Vision: Analyzing Images
import base64
import httpx
# From URL
message = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=[{
"role": "user",
"content": [
{
"type": "image",
"source": {
"type": "url",
"url": "https://example.com/chart.png",
},
},
{"type": "text", "text": "Describe this chart and extract all data points"},
],
}],
)
# From local file
def analyze_local_image(image_path: str, prompt: str) -> str:
with open(image_path, "rb") as f:
image_data = base64.standard_b64encode(f.read()).decode("utf-8")
ext = image_path.split(".")[-1].lower()
media_type_map = {"jpg": "image/jpeg", "jpeg": "image/jpeg", "png": "image/png", "gif": "image/gif", "webp": "image/webp"}
message = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=[{
"role": "user",
"content": [
{
"type": "image",
"source": {
"type": "base64",
"media_type": media_type_map.get(ext, "image/jpeg"),
"data": image_data,
},
},
{"type": "text", "text": prompt},
],
}],
)
return message.content[0].text
# Analyze multiple images
result = analyze_local_image("screenshot.png", "What bugs do you see in this code?")
print(result)
Tool Use (Function Calling)
import json
# Define tools
tools = [
{
"name": "get_weather",
"description": "Get current weather for a location",
"input_schema": {
"type": "object",
"properties": {
"location": {"type": "string", "description": "City and country"},
"units": {"type": "string", "enum": ["celsius", "fahrenheit"], "default": "celsius"},
},
"required": ["location"],
},
},
{
"name": "search_web",
"description": "Search the web for current information",
"input_schema": {
"type": "object",
"properties": {
"query": {"type": "string", "description": "Search query"},
},
"required": ["query"],
},
},
]
def get_weather(location: str, units: str = "celsius") -> dict:
# Your weather API call here
return {"location": location, "temp": 22, "condition": "sunny", "units": units}
def search_web(query: str) -> dict:
# Your search implementation
return {"results": [f"Result for: {query}"]}
TOOL_FUNCTIONS = {"get_weather": get_weather, "search_web": search_web}
def run_with_tools(user_message: str) -> str:
messages = [{"role": "user", "content": user_message}]
while True:
response = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
tools=tools,
messages=messages,
)
# If no tool use, we're done
if response.stop_reason != "tool_use":
text_blocks = [b.text for b in response.content if b.type == "text"]
return " ".join(text_blocks)
# Process tool calls
messages.append({"role": "assistant", "content": response.content})
tool_results = []
for block in response.content:
if block.type == "tool_use":
fn = TOOL_FUNCTIONS.get(block.name)
result = fn(**block.input) if fn else {"error": f"Unknown tool: {block.name}"}
tool_results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": json.dumps(result),
})
messages.append({"role": "user", "content": tool_results})
answer = run_with_tools("What's the weather like in Mumbai right now?")
print(answer)
Prompt Caching (Save Costs)
# Cache large system prompts or documents
# Saves money when the same context is reused
long_document = "..." * 5000 # Large document
response = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
system=[
{
"type": "text",
"text": "You are a helpful assistant analyzing the following document.",
},
{
"type": "text",
"text": long_document,
"cache_control": {"type": "ephemeral"}, # Cache this block
},
],
messages=[{"role": "user", "content": "What are the main points of this document?"}],
)
# Usage stats show cache hit/miss
print(response.usage)
# cache_creation_input_tokens: 12000 (first call, paid to cache)
# cache_read_input_tokens: 12000 (subsequent calls, 90% cheaper)
Extended Thinking (claude-3-7-sonnet)
# For complex reasoning tasks
response = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=8000,
thinking={
"type": "enabled",
"budget_tokens": 5000, # How much to "think" before answering
},
messages=[{
"role": "user",
"content": "Solve: A train leaves Chicago at 60mph. Another leaves NYC at 80mph. They're 790 miles apart. When do they meet?"
}],
)
for block in response.content:
if block.type == "thinking":
print(f"Thinking: {block.thinking[:200]}...")
elif block.type == "text":
print(f"Answer: {block.text}")
Model Selection Guide
| Use Case | Model | Why |
|---|---|---|
| Complex analysis, writing | claude-opus-4-6 | Best intelligence |
| Coding, general tasks | claude-sonnet-4-6 | Best balance |
| High-volume, fast response | claude-haiku-4-5-20251001 | Fast + cheap |
| Mathematical reasoning | claude-sonnet-4-6 + thinking | Extended reasoning |
| Simple classification | claude-haiku-4-5-20251001 | Very cheap |
TypeScript / JavaScript SDK
import Anthropic from "@anthropic-ai/sdk";
const client = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY });
// Basic message
const message = await client.messages.create({
model: "claude-sonnet-4-6",
max_tokens: 1024,
messages: [{ role: "user", content: "Hello, Claude!" }],
});
console.log(message.content[0].type === "text" ? message.content[0].text : "");
// Streaming in Node.js
const stream = await client.messages.stream({
model: "claude-sonnet-4-6",
max_tokens: 1024,
messages: [{ role: "user", content: "Tell me a story" }],
});
for await (const event of stream) {
if (event.type === "content_block_delta" && event.delta.type === "text_delta") {
process.stdout.write(event.delta.text);
}
}
Production Best Practices
import anthropic
from tenacity import retry, stop_after_attempt, wait_exponential
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=10),
reraise=True,
)
def robust_claude_call(messages: list, system: str = "", model: str = "claude-sonnet-4-6") -> str:
response = client.messages.create(
model=model,
max_tokens=2048,
system=system,
messages=messages,
)
return response.content[0].text
# Cost tracking
def track_usage(response: anthropic.types.Message, model: str) -> dict:
COSTS = {
"claude-opus-4-6": {"input": 15.00, "output": 75.00}, # per 1M tokens
"claude-sonnet-4-6": {"input": 3.00, "output": 15.00},
"claude-haiku-4-5-20251001": {"input": 0.25, "output": 1.25},
}
rates = COSTS.get(model, COSTS["claude-sonnet-4-6"])
input_cost = (response.usage.input_tokens / 1_000_000) * rates["input"]
output_cost = (response.usage.output_tokens / 1_000_000) * rates["output"]
return {
"input_tokens": response.usage.input_tokens,
"output_tokens": response.usage.output_tokens,
"cost_usd": round(input_cost + output_cost, 6),
}
Claude's API is one of the best in the industry for reasoning, code generation, and following complex instructions. Start with Sonnet for most tasks — upgrade to Opus when you need the best, switch to Haiku when cost matters most.
Advertisement