Google Gemini API Guide 2026: Build AI Apps with Gemini 2.0 Flash and Pro

Google Gemini API Guide 2026

Google Gemini 2.0 Flash is the most cost-efficient high-quality model available in 2026. At $0.075/1M tokens input and 1M context window, it's a game-changer for document processing and multimodal apps.

Setup
Basic Text Generation
Multi-turn Chat
Vision: Analyze Images and Video
Process PDFs and Documents (1M Token Context)
Code Execution (Sandboxed Python)
Grounding with Google Search
Streaming Responses
Gemini vs OpenAI: The Practical Comparison
JavaScript/Node.js SDK

Setup

pip install google-generativeai
# or for the newer SDK:
pip install google-genai

import google.generativeai as genai
genai.configure(api_key="YOUR_GEMINI_API_KEY")

Basic Text Generation

model = genai.GenerativeModel("gemini-2.0-flash")

# Simple generation
response = model.generate_content("Explain gradient descent in simple terms")
print(response.text)

# With configuration
response = model.generate_content(
    "Write a Python web scraper",
    generation_config=genai.types.GenerationConfig(
        temperature=0.2,
        max_output_tokens=1000,
        top_p=0.95,
    )
)

Multi-turn Chat

model = genai.GenerativeModel(
    "gemini-2.0-flash",
    system_instruction="You are a Python tutor. Be concise and practical."
)

chat = model.start_chat(history=[])

while True:
    user_input = input("You: ")
    if user_input.lower() == "quit":
        break

    response = chat.send_message(user_input)
    print(f"Gemini: {response.text}\n")

# Access history
for message in chat.history:
    print(f"{message.role}: {message.parts[0].text[:100]}...")

Vision: Analyze Images and Video

import PIL.Image

# Analyze a local image
img = PIL.Image.open("diagram.png")
response = model.generate_content([
    "Explain what this system architecture diagram shows",
    img
])
print(response.text)

# Analyze image from URL
import httpx
image_url = "https://example.com/chart.png"
image = PIL.Image.open(httpx.get(image_url, stream=True).raw)

response = model.generate_content([
    "What trend does this chart show? Extract the key data points.",
    image
])

# Analyze video (Gemini 2.0 Pro)
video_file = genai.upload_file("demo.mp4")
response = model.generate_content([
    "Summarize what happens in this video. List the main topics covered.",
    video_file
])

Process PDFs and Documents (1M Token Context)

# Upload a large PDF for analysis
pdf_file = genai.upload_file("annual_report_2025.pdf", mime_type="application/pdf")

# Ask questions about the entire document
model = genai.GenerativeModel("gemini-2.0-flash")

questions = [
    "What was the total revenue for 2025?",
    "What are the three biggest risk factors mentioned?",
    "Summarize the CEO's letter to shareholders",
]

for q in questions:
    response = model.generate_content([pdf_file, q])
    print(f"Q: {q}")
    print(f"A: {response.text}\n")

Code Execution (Sandboxed Python)

model = genai.GenerativeModel(
    "gemini-2.0-flash",
    tools="code_execution"  # Enable built-in Python executor
)

response = model.generate_content(
    """
    I have a list of stock prices: [150, 152, 148, 155, 160, 158, 162]
    Calculate the moving average (window=3), find the max drawdown,
    and plot the results.
    """
)

# Gemini will write and EXECUTE the code, return results + charts
for part in response.candidates[0].content.parts:
    if hasattr(part, 'executable_code'):
        print("Executed code:", part.executable_code.code)
    elif hasattr(part, 'code_execution_result'):
        print("Output:", part.code_execution_result.output)
    else:
        print("Text:", part.text)

Grounding with Google Search

model = genai.GenerativeModel("gemini-2.0-flash")

# Gemini will search Google for up-to-date info
response = model.generate_content(
    "What are the latest developments in quantum computing this week?",
    tools=[{"google_search": {}}]
)

# View search queries used
for part in response.candidates[0].content.parts:
    if hasattr(part, 'text'):
        print(part.text)

# Check grounding metadata
if response.candidates[0].grounding_metadata:
    for source in response.candidates[0].grounding_metadata.search_entry_point:
        print(f"Source: {source}")

Streaming Responses

for chunk in model.generate_content("Write a short story about AI", stream=True):
    print(chunk.text, end="", flush=True)

Gemini vs OpenAI: The Practical Comparison

# Same task with both APIs — compare quality vs cost

# OpenAI: $5.00/1M input tokens
openai_response = openai_client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": long_document + "\n\nSummarize this."}]
)

# Gemini 2.0 Flash: $0.075/1M input tokens (66x cheaper!)
gemini_response = genai.GenerativeModel("gemini-2.0-flash").generate_content(
    long_document + "\n\nSummarize this."
)

For high-volume document processing, Gemini 2.0 Flash is the clear winner on cost.

JavaScript/Node.js SDK

import { GoogleGenerativeAI } from '@google/generative-ai';

const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY);
const model = genAI.getGenerativeModel({ model: 'gemini-2.0-flash' });

// Chat with streaming
const chat = model.startChat();
const result = await chat.sendMessageStream('Explain React hooks');

for await (const chunk of result.stream) {
  process.stdout.write(chunk.text());
}

// With Next.js App Router
export async function POST(req: Request) {
  const { message } = await req.json();
  const result = await model.generateContent(message);
  return Response.json({ reply: result.response.text() });
}