OpenAI API Complete Guide 2026: GPT-4o, Assistants, Vision, Function Calling
Advertisement
OpenAI API Complete Guide 2026
The OpenAI API powers millions of AI applications. This guide covers every major feature with production-ready examples.
- Setup
- Chat Completions (Core API)
- Vision: Analyze Images
- Function Calling (Tool Use)
- Embeddings
- Structured Outputs (Pydantic)
- Rate Limiting and Retries
- Cost Optimization Tips
Setup
pip install openai
from openai import OpenAI
client = OpenAI(api_key="sk-...") # or set OPENAI_API_KEY env var
Chat Completions (Core API)
# Basic completion
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "You are a helpful coding assistant."},
{"role": "user", "content": "Explain async/await in Python"},
],
max_tokens=500,
temperature=0.7,
)
print(response.choices[0].message.content)
print(f"Tokens used: {response.usage.total_tokens}")
# Streaming response
with client.chat.completions.stream(
model="gpt-4o",
messages=[{"role": "user", "content": "Write a quicksort in Go"}],
) as stream:
for text in stream.text_stream:
print(text, end="", flush=True)
Vision: Analyze Images
import base64
# Analyze local image
with open("screenshot.png", "rb") as f:
image_data = base64.b64encode(f.read()).decode("utf-8")
response = client.chat.completions.create(
model="gpt-4o",
messages=[{
"role": "user",
"content": [
{"type": "text", "text": "Describe any bugs or issues you see in this UI screenshot"},
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_data}"}},
],
}],
)
# Analyze image from URL
response = client.chat.completions.create(
model="gpt-4o",
messages=[{
"role": "user",
"content": [
{"type": "text", "text": "What programming language is this code written in?"},
{"type": "image_url", "image_url": {"url": "https://example.com/code-screenshot.png"}},
],
}],
)
Function Calling (Tool Use)
Function calling lets the LLM decide when to call your functions:
import json
# Define tools
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get current weather for a location",
"parameters": {
"type": "object",
"properties": {
"location": {"type": "string", "description": "City name, e.g. 'Delhi'"},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
},
},
},
{
"type": "function",
"function": {
"name": "search_database",
"description": "Search the product database",
"parameters": {
"type": "object",
"properties": {
"query": {"type": "string"},
"limit": {"type": "integer", "default": 5},
},
"required": ["query"],
},
},
},
]
def get_weather(location: str, unit: str = "celsius") -> dict:
# Your real implementation
return {"temperature": 28, "condition": "sunny", "location": location}
def search_database(query: str, limit: int = 5) -> list:
return [{"id": 1, "name": "Example Product", "price": 99.99}]
# Agentic loop
messages = [{"role": "user", "content": "What's the weather in Mumbai and search for monsoon gear?"}]
while True:
response = client.chat.completions.create(
model="gpt-4o",
messages=messages,
tools=tools,
tool_choice="auto",
)
message = response.choices[0].message
messages.append(message)
if response.choices[0].finish_reason == "tool_calls":
for tool_call in message.tool_calls:
fn_name = tool_call.function.name
fn_args = json.loads(tool_call.function.arguments)
if fn_name == "get_weather":
result = get_weather(**fn_args)
elif fn_name == "search_database":
result = search_database(**fn_args)
messages.append({
"role": "tool",
"tool_call_id": tool_call.id,
"content": json.dumps(result),
})
else:
print(message.content)
break
Embeddings
# Single embedding
response = client.embeddings.create(
model="text-embedding-3-large",
input="The quick brown fox jumps over the lazy dog",
)
vector = response.data[0].embedding # 3072-dimensional list of floats
# Batch embeddings
texts = ["Hello world", "How are you?", "Python is great"]
response = client.embeddings.create(model="text-embedding-3-large", input=texts)
vectors = [item.embedding for item in response.data]
# Semantic search
import numpy as np
def cosine_similarity(a, b):
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
query_vec = client.embeddings.create(model="text-embedding-3-large", input="python tutorial").data[0].embedding
scores = [(cosine_similarity(query_vec, v), t) for v, t in zip(vectors, texts)]
scores.sort(reverse=True)
print("Most similar:", scores[0][1])
Structured Outputs (Pydantic)
from pydantic import BaseModel
from typing import Optional
class CodeReview(BaseModel):
overall_quality: str # "good" | "needs_work" | "critical"
bugs_found: int
suggestions: list[str]
refactored_code: Optional[str] = None
response = client.beta.chat.completions.parse(
model="gpt-4o",
messages=[{
"role": "user",
"content": "Review this Python code:\n\ndef add(a, b):\n return a + b"
}],
response_format=CodeReview,
)
review = response.choices[0].message.parsed
print(f"Quality: {review.overall_quality}")
print(f"Bugs: {review.bugs_found}")
for s in review.suggestions:
print(f" - {s}")
Rate Limiting and Retries
from openai import RateLimitError, APIError
import time
def call_with_retry(prompt: str, max_retries: int = 3) -> str:
for attempt in range(max_retries):
try:
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": prompt}],
)
return response.choices[0].message.content
except RateLimitError:
wait = 2 ** attempt # Exponential backoff: 1s, 2s, 4s
print(f"Rate limited. Waiting {wait}s...")
time.sleep(wait)
except APIError as e:
print(f"API error: {e}")
raise
raise Exception("Max retries exceeded")
Cost Optimization Tips
# 1. Use gpt-4o-mini for simple tasks (90% cheaper)
response = client.chat.completions.create(
model="gpt-4o-mini", # vs "gpt-4o"
messages=[{"role": "user", "content": "Summarize in 1 sentence: " + long_text}],
max_tokens=50, # limit output tokens
)
# 2. Cache frequent prompts (same input = same output at temp=0)
import hashlib, functools
@functools.lru_cache(maxsize=1000)
def cached_completion(prompt_hash: str, prompt: str) -> str:
return call_with_retry(prompt)
def smart_complete(prompt: str) -> str:
h = hashlib.md5(prompt.encode()).hexdigest()
return cached_completion(h, prompt)
Advertisement