PostgreSQL Complete Guide 2026: Performance, JSON, Full-Text Search, and Scaling

Sanjeev SharmaSanjeev Sharma
5 min read

Advertisement

PostgreSQL 2026: The Database That Does Everything

PostgreSQL is the #1 choice for production web apps. It handles relational data, JSON, time-series, full-text search, and geospatial queries — all in one system.

JSONB: Flexible Schemas

-- Create table with JSONB column
CREATE TABLE products (
  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
  name TEXT NOT NULL,
  price DECIMAL(10,2) NOT NULL,
  metadata JSONB,          -- Flexible attributes
  specs JSONB,
  created_at TIMESTAMPTZ DEFAULT NOW()
);

-- Insert with JSON
INSERT INTO products (name, price, metadata, specs) VALUES
('MacBook Pro', 2499.00,
  '{"color": "space-gray", "in_stock": true, "tags": ["laptop", "apple"]}',
  '{"ram": 16, "storage": 512, "cpu": "M3 Pro"}');

-- Query JSON fields
SELECT name, metadata->>'color' as color
FROM products
WHERE metadata->>'in_stock' = 'true'
  AND (specs->>'ram')::int >= 16;

-- Create index on JSONB field
CREATE INDEX idx_products_color ON products ((metadata->>'color'));
CREATE INDEX idx_products_metadata ON products USING GIN(metadata);

-- Array containment
SELECT * FROM products
WHERE metadata->'tags' @> '["laptop"]'::jsonb;

-- Update JSON field
UPDATE products
SET metadata = metadata || '{"sale_price": 1999.00}'::jsonb
WHERE id = '...';

-- Built-in full-text search
ALTER TABLE posts ADD COLUMN search_vector tsvector;

-- Automatic update with trigger
CREATE FUNCTION update_search_vector()
RETURNS trigger AS $$
BEGIN
  NEW.search_vector := to_tsvector('english',
    coalesce(NEW.title, '') || ' ' ||
    coalesce(NEW.content, '') || ' ' ||
    coalesce(array_to_string(NEW.tags, ' '), '')
  );
  RETURN NEW;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER posts_search_update
BEFORE INSERT OR UPDATE ON posts
FOR EACH ROW EXECUTE FUNCTION update_search_vector();

-- Create GIN index for fast full-text search
CREATE INDEX idx_posts_search ON posts USING GIN(search_vector);

-- Search query
SELECT
  id, title,
  ts_rank(search_vector, query) as rank,
  ts_headline('english', content, query, 'MaxWords=50,MinWords=25') as excerpt
FROM posts,
     to_tsquery('english', 'nextjs & typescript') as query
WHERE search_vector @@ query
ORDER BY rank DESC
LIMIT 10;

Window Functions

-- Running totals
SELECT
  date_trunc('day', created_at) as day,
  COUNT(*) as daily_signups,
  SUM(COUNT(*)) OVER (ORDER BY date_trunc('day', created_at)) as cumulative_signups,
  AVG(COUNT(*)) OVER (ORDER BY date_trunc('day', created_at) ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) as rolling_7day_avg
FROM users
GROUP BY date_trunc('day', created_at)
ORDER BY day;

-- Rank users by posts
SELECT
  u.name,
  COUNT(p.id) as post_count,
  RANK() OVER (ORDER BY COUNT(p.id) DESC) as rank,
  DENSE_RANK() OVER (ORDER BY COUNT(p.id) DESC) as dense_rank,
  NTILE(4) OVER (ORDER BY COUNT(p.id) DESC) as quartile
FROM users u
LEFT JOIN posts p ON p.author_id = u.id
GROUP BY u.id, u.name;

-- Previous/next values
SELECT
  title,
  created_at,
  LAG(title) OVER (ORDER BY created_at) as previous_post,
  LEAD(title) OVER (ORDER BY created_at) as next_post
FROM posts
WHERE author_id = '...'
ORDER BY created_at;

CTEs (Common Table Expressions)

-- Readable complex queries
WITH
  active_users AS (
    SELECT id, name, email
    FROM users
    WHERE last_login > NOW() - INTERVAL '30 days'
  ),
  user_stats AS (
    SELECT
      u.id,
      u.name,
      COUNT(p.id) as post_count,
      SUM(p.views) as total_views
    FROM active_users u
    LEFT JOIN posts p ON p.author_id = u.id AND p.published = true
    GROUP BY u.id, u.name
  )
SELECT
  name,
  post_count,
  total_views,
  ROUND(total_views::numeric / NULLIF(post_count, 0), 1) as views_per_post
FROM user_stats
ORDER BY total_views DESC
LIMIT 20;

-- Recursive CTE: organization hierarchy
WITH RECURSIVE org_tree AS (
  -- Base: top-level managers
  SELECT id, name, manager_id, 0 as level, ARRAY[id] as path
  FROM employees
  WHERE manager_id IS NULL

  UNION ALL

  -- Recursive: employees with managers
  SELECT e.id, e.name, e.manager_id, t.level + 1, t.path || e.id
  FROM employees e
  JOIN org_tree t ON e.manager_id = t.id
  WHERE NOT e.id = ANY(t.path)  -- Prevent cycles
)
SELECT level, repeat('  ', level) || name as hierarchy
FROM org_tree
ORDER BY path;

Index Optimization

-- Partial index (only index subset of rows)
CREATE INDEX idx_posts_published_recent
ON posts (created_at DESC)
WHERE published = true;

-- Covering index (includes all needed columns)
CREATE INDEX idx_posts_list_covering
ON posts (created_at DESC, published)
INCLUDE (id, title, slug, author_id);
-- SELECT id, title, slug → uses index, no heap fetch needed

-- Expression index
CREATE INDEX idx_users_email_lower
ON users (LOWER(email));
-- Supports: WHERE LOWER(email) = 'test@example.com'

-- Composite index: order matters!
CREATE INDEX idx_posts_author_date
ON posts (author_id, created_at DESC);
-- Supports: WHERE author_id = ? ORDER BY created_at DESC
-- Also supports: WHERE author_id = ?
-- Does NOT support: WHERE created_at > ?

EXPLAIN ANALYZE: Find Slow Queries

-- Find slow queries in production
SELECT
  query,
  calls,
  mean_exec_time::numeric(10,2) as avg_ms,
  total_exec_time::numeric(10,2) as total_ms,
  rows
FROM pg_stat_statements
WHERE mean_exec_time > 100
ORDER BY mean_exec_time DESC
LIMIT 20;

-- Analyze specific query
EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT)
SELECT p.*, u.name as author_name
FROM posts p
JOIN users u ON u.id = p.author_id
WHERE p.published = true
ORDER BY p.created_at DESC
LIMIT 20;

-- Look for: Seq Scan on large tables, high cost estimates
-- Want: Index Scan, Index Only Scan

Connection Pooling with PgBouncer

; pgbouncer.ini
[databases]
myapp = host=localhost port=5432 dbname=myapp

[pgbouncer]
pool_mode = transaction      ; Best for web apps
max_client_conn = 1000       ; Total client connections
default_pool_size = 20       ; Per-database pool
min_pool_size = 5
reserve_pool_size = 5
listen_port = 6432
Direct: 200 connections × 5MB = 1GB RAM on PostgreSQL
PgBouncer: 1000 clients → 20 actual connections = 100MB RAM

Partitioning for Scale

-- Partition posts by year for massive tables
CREATE TABLE posts (
  id UUID NOT NULL,
  title TEXT NOT NULL,
  created_at TIMESTAMPTZ NOT NULL
) PARTITION BY RANGE (created_at);

-- Create partitions
CREATE TABLE posts_2024 PARTITION OF posts
  FOR VALUES FROM ('2024-01-01') TO ('2025-01-01');

CREATE TABLE posts_2025 PARTITION OF posts
  FOR VALUES FROM ('2025-01-01') TO ('2026-01-01');

CREATE TABLE posts_2026 PARTITION OF posts
  FOR VALUES FROM ('2026-01-01') TO ('2027-01-01');

-- Queries automatically hit only relevant partitions
SELECT * FROM posts WHERE created_at >= '2026-01-01';
-- Only scans posts_2026 partition!

PostgreSQL handles billions of rows with the right indexes, partitioning, and connection pooling. Don't migrate to a different database — optimize the one you have.

Advertisement

Sanjeev Sharma

Written by

Sanjeev Sharma

Full Stack Engineer · E-mopro