PostgreSQL Complete Guide 2026: Performance, JSON, Full-Text Search, and Scaling
Advertisement
PostgreSQL 2026: The Database That Does Everything
PostgreSQL is the #1 choice for production web apps. It handles relational data, JSON, time-series, full-text search, and geospatial queries — all in one system.
- JSONB: Flexible Schemas
- Full-Text Search
- Window Functions
- CTEs (Common Table Expressions)
- Index Optimization
- EXPLAIN ANALYZE: Find Slow Queries
- Connection Pooling with PgBouncer
- Partitioning for Scale
JSONB: Flexible Schemas
-- Create table with JSONB column
CREATE TABLE products (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name TEXT NOT NULL,
price DECIMAL(10,2) NOT NULL,
metadata JSONB, -- Flexible attributes
specs JSONB,
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- Insert with JSON
INSERT INTO products (name, price, metadata, specs) VALUES
('MacBook Pro', 2499.00,
'{"color": "space-gray", "in_stock": true, "tags": ["laptop", "apple"]}',
'{"ram": 16, "storage": 512, "cpu": "M3 Pro"}');
-- Query JSON fields
SELECT name, metadata->>'color' as color
FROM products
WHERE metadata->>'in_stock' = 'true'
AND (specs->>'ram')::int >= 16;
-- Create index on JSONB field
CREATE INDEX idx_products_color ON products ((metadata->>'color'));
CREATE INDEX idx_products_metadata ON products USING GIN(metadata);
-- Array containment
SELECT * FROM products
WHERE metadata->'tags' @> '["laptop"]'::jsonb;
-- Update JSON field
UPDATE products
SET metadata = metadata || '{"sale_price": 1999.00}'::jsonb
WHERE id = '...';
Full-Text Search
-- Built-in full-text search
ALTER TABLE posts ADD COLUMN search_vector tsvector;
-- Automatic update with trigger
CREATE FUNCTION update_search_vector()
RETURNS trigger AS $$
BEGIN
NEW.search_vector := to_tsvector('english',
coalesce(NEW.title, '') || ' ' ||
coalesce(NEW.content, '') || ' ' ||
coalesce(array_to_string(NEW.tags, ' '), '')
);
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER posts_search_update
BEFORE INSERT OR UPDATE ON posts
FOR EACH ROW EXECUTE FUNCTION update_search_vector();
-- Create GIN index for fast full-text search
CREATE INDEX idx_posts_search ON posts USING GIN(search_vector);
-- Search query
SELECT
id, title,
ts_rank(search_vector, query) as rank,
ts_headline('english', content, query, 'MaxWords=50,MinWords=25') as excerpt
FROM posts,
to_tsquery('english', 'nextjs & typescript') as query
WHERE search_vector @@ query
ORDER BY rank DESC
LIMIT 10;
Window Functions
-- Running totals
SELECT
date_trunc('day', created_at) as day,
COUNT(*) as daily_signups,
SUM(COUNT(*)) OVER (ORDER BY date_trunc('day', created_at)) as cumulative_signups,
AVG(COUNT(*)) OVER (ORDER BY date_trunc('day', created_at) ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) as rolling_7day_avg
FROM users
GROUP BY date_trunc('day', created_at)
ORDER BY day;
-- Rank users by posts
SELECT
u.name,
COUNT(p.id) as post_count,
RANK() OVER (ORDER BY COUNT(p.id) DESC) as rank,
DENSE_RANK() OVER (ORDER BY COUNT(p.id) DESC) as dense_rank,
NTILE(4) OVER (ORDER BY COUNT(p.id) DESC) as quartile
FROM users u
LEFT JOIN posts p ON p.author_id = u.id
GROUP BY u.id, u.name;
-- Previous/next values
SELECT
title,
created_at,
LAG(title) OVER (ORDER BY created_at) as previous_post,
LEAD(title) OVER (ORDER BY created_at) as next_post
FROM posts
WHERE author_id = '...'
ORDER BY created_at;
CTEs (Common Table Expressions)
-- Readable complex queries
WITH
active_users AS (
SELECT id, name, email
FROM users
WHERE last_login > NOW() - INTERVAL '30 days'
),
user_stats AS (
SELECT
u.id,
u.name,
COUNT(p.id) as post_count,
SUM(p.views) as total_views
FROM active_users u
LEFT JOIN posts p ON p.author_id = u.id AND p.published = true
GROUP BY u.id, u.name
)
SELECT
name,
post_count,
total_views,
ROUND(total_views::numeric / NULLIF(post_count, 0), 1) as views_per_post
FROM user_stats
ORDER BY total_views DESC
LIMIT 20;
-- Recursive CTE: organization hierarchy
WITH RECURSIVE org_tree AS (
-- Base: top-level managers
SELECT id, name, manager_id, 0 as level, ARRAY[id] as path
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- Recursive: employees with managers
SELECT e.id, e.name, e.manager_id, t.level + 1, t.path || e.id
FROM employees e
JOIN org_tree t ON e.manager_id = t.id
WHERE NOT e.id = ANY(t.path) -- Prevent cycles
)
SELECT level, repeat(' ', level) || name as hierarchy
FROM org_tree
ORDER BY path;
Index Optimization
-- Partial index (only index subset of rows)
CREATE INDEX idx_posts_published_recent
ON posts (created_at DESC)
WHERE published = true;
-- Covering index (includes all needed columns)
CREATE INDEX idx_posts_list_covering
ON posts (created_at DESC, published)
INCLUDE (id, title, slug, author_id);
-- SELECT id, title, slug → uses index, no heap fetch needed
-- Expression index
CREATE INDEX idx_users_email_lower
ON users (LOWER(email));
-- Supports: WHERE LOWER(email) = 'test@example.com'
-- Composite index: order matters!
CREATE INDEX idx_posts_author_date
ON posts (author_id, created_at DESC);
-- Supports: WHERE author_id = ? ORDER BY created_at DESC
-- Also supports: WHERE author_id = ?
-- Does NOT support: WHERE created_at > ?
EXPLAIN ANALYZE: Find Slow Queries
-- Find slow queries in production
SELECT
query,
calls,
mean_exec_time::numeric(10,2) as avg_ms,
total_exec_time::numeric(10,2) as total_ms,
rows
FROM pg_stat_statements
WHERE mean_exec_time > 100
ORDER BY mean_exec_time DESC
LIMIT 20;
-- Analyze specific query
EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT)
SELECT p.*, u.name as author_name
FROM posts p
JOIN users u ON u.id = p.author_id
WHERE p.published = true
ORDER BY p.created_at DESC
LIMIT 20;
-- Look for: Seq Scan on large tables, high cost estimates
-- Want: Index Scan, Index Only Scan
Connection Pooling with PgBouncer
; pgbouncer.ini
[databases]
myapp = host=localhost port=5432 dbname=myapp
[pgbouncer]
pool_mode = transaction ; Best for web apps
max_client_conn = 1000 ; Total client connections
default_pool_size = 20 ; Per-database pool
min_pool_size = 5
reserve_pool_size = 5
listen_port = 6432
Direct: 200 connections × 5MB = 1GB RAM on PostgreSQL
PgBouncer: 1000 clients → 20 actual connections = 100MB RAM
Partitioning for Scale
-- Partition posts by year for massive tables
CREATE TABLE posts (
id UUID NOT NULL,
title TEXT NOT NULL,
created_at TIMESTAMPTZ NOT NULL
) PARTITION BY RANGE (created_at);
-- Create partitions
CREATE TABLE posts_2024 PARTITION OF posts
FOR VALUES FROM ('2024-01-01') TO ('2025-01-01');
CREATE TABLE posts_2025 PARTITION OF posts
FOR VALUES FROM ('2025-01-01') TO ('2026-01-01');
CREATE TABLE posts_2026 PARTITION OF posts
FOR VALUES FROM ('2026-01-01') TO ('2027-01-01');
-- Queries automatically hit only relevant partitions
SELECT * FROM posts WHERE created_at >= '2026-01-01';
-- Only scans posts_2026 partition!
PostgreSQL handles billions of rows with the right indexes, partitioning, and connection pooling. Don't migrate to a different database — optimize the one you have.
Advertisement