Google Gemini API Guide 2026: Build AI Apps with Gemini 2.0 Flash and Pro
Advertisement
Google Gemini API Guide 2026
Google Gemini 2.0 Flash is the most cost-efficient high-quality model available in 2026. At $0.075/1M tokens input and 1M context window, it's a game-changer for document processing and multimodal apps.
- Setup
- Basic Text Generation
- Multi-turn Chat
- Vision: Analyze Images and Video
- Process PDFs and Documents (1M Token Context)
- Code Execution (Sandboxed Python)
- Grounding with Google Search
- Streaming Responses
- Gemini vs OpenAI: The Practical Comparison
- JavaScript/Node.js SDK
Setup
pip install google-generativeai
# or for the newer SDK:
pip install google-genai
import google.generativeai as genai
genai.configure(api_key="YOUR_GEMINI_API_KEY")
Basic Text Generation
model = genai.GenerativeModel("gemini-2.0-flash")
# Simple generation
response = model.generate_content("Explain gradient descent in simple terms")
print(response.text)
# With configuration
response = model.generate_content(
"Write a Python web scraper",
generation_config=genai.types.GenerationConfig(
temperature=0.2,
max_output_tokens=1000,
top_p=0.95,
)
)
Multi-turn Chat
model = genai.GenerativeModel(
"gemini-2.0-flash",
system_instruction="You are a Python tutor. Be concise and practical."
)
chat = model.start_chat(history=[])
while True:
user_input = input("You: ")
if user_input.lower() == "quit":
break
response = chat.send_message(user_input)
print(f"Gemini: {response.text}\n")
# Access history
for message in chat.history:
print(f"{message.role}: {message.parts[0].text[:100]}...")
Vision: Analyze Images and Video
import PIL.Image
# Analyze a local image
img = PIL.Image.open("diagram.png")
response = model.generate_content([
"Explain what this system architecture diagram shows",
img
])
print(response.text)
# Analyze image from URL
import httpx
image_url = "https://example.com/chart.png"
image = PIL.Image.open(httpx.get(image_url, stream=True).raw)
response = model.generate_content([
"What trend does this chart show? Extract the key data points.",
image
])
# Analyze video (Gemini 2.0 Pro)
video_file = genai.upload_file("demo.mp4")
response = model.generate_content([
"Summarize what happens in this video. List the main topics covered.",
video_file
])
Process PDFs and Documents (1M Token Context)
# Upload a large PDF for analysis
pdf_file = genai.upload_file("annual_report_2025.pdf", mime_type="application/pdf")
# Ask questions about the entire document
model = genai.GenerativeModel("gemini-2.0-flash")
questions = [
"What was the total revenue for 2025?",
"What are the three biggest risk factors mentioned?",
"Summarize the CEO's letter to shareholders",
]
for q in questions:
response = model.generate_content([pdf_file, q])
print(f"Q: {q}")
print(f"A: {response.text}\n")
Code Execution (Sandboxed Python)
model = genai.GenerativeModel(
"gemini-2.0-flash",
tools="code_execution" # Enable built-in Python executor
)
response = model.generate_content(
"""
I have a list of stock prices: [150, 152, 148, 155, 160, 158, 162]
Calculate the moving average (window=3), find the max drawdown,
and plot the results.
"""
)
# Gemini will write and EXECUTE the code, return results + charts
for part in response.candidates[0].content.parts:
if hasattr(part, 'executable_code'):
print("Executed code:", part.executable_code.code)
elif hasattr(part, 'code_execution_result'):
print("Output:", part.code_execution_result.output)
else:
print("Text:", part.text)
Grounding with Google Search
model = genai.GenerativeModel("gemini-2.0-flash")
# Gemini will search Google for up-to-date info
response = model.generate_content(
"What are the latest developments in quantum computing this week?",
tools=[{"google_search": {}}]
)
# View search queries used
for part in response.candidates[0].content.parts:
if hasattr(part, 'text'):
print(part.text)
# Check grounding metadata
if response.candidates[0].grounding_metadata:
for source in response.candidates[0].grounding_metadata.search_entry_point:
print(f"Source: {source}")
Streaming Responses
for chunk in model.generate_content("Write a short story about AI", stream=True):
print(chunk.text, end="", flush=True)
Gemini vs OpenAI: The Practical Comparison
# Same task with both APIs — compare quality vs cost
# OpenAI: $5.00/1M input tokens
openai_response = openai_client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": long_document + "\n\nSummarize this."}]
)
# Gemini 2.0 Flash: $0.075/1M input tokens (66x cheaper!)
gemini_response = genai.GenerativeModel("gemini-2.0-flash").generate_content(
long_document + "\n\nSummarize this."
)
For high-volume document processing, Gemini 2.0 Flash is the clear winner on cost.
JavaScript/Node.js SDK
import { GoogleGenerativeAI } from '@google/generative-ai';
const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY);
const model = genAI.getGenerativeModel({ model: 'gemini-2.0-flash' });
// Chat with streaming
const chat = model.startChat();
const result = await chat.sendMessageStream('Explain React hooks');
for await (const chunk of result.stream) {
process.stdout.write(chunk.text());
}
// With Next.js App Router
export async function POST(req: Request) {
const { message } = await req.json();
const result = await model.generateContent(message);
return Response.json({ reply: result.response.text() });
}
Advertisement