MLOps Guide 2026: Deploy ML Models to Production at Scale

MLOps 2026: The Gap Between Research and Production

Training a model is 10% of the work. Getting it to production reliably, monitoring it, retraining it, and scaling it — that's the other 90%. This is MLOps.

The MLOps Stack in 2026
Model Versioning with MLflow
Model Serving with FastAPI
Containerization with Docker
CI/CD Pipeline for ML (GitHub Actions)
Model Monitoring

The MLOps Stack in 2026

Data Pipeline → Training → Evaluation → Registry → Serving → Monitoring
  (dbt, Spark)  (PyTorch)  (Weights&Biases)  (MLflow)  (FastAPI)  (Prometheus)

Model Versioning with MLflow

import mlflow
import mlflow.sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
import pandas as pd

mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("iris-classifier")

X, y = load_iris(return_X_y=True, as_frame=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Log everything about this training run
with mlflow.start_run(run_name="rf-v3") as run:
    params = {"n_estimators": 100, "max_depth": 5, "random_state": 42}

    model = RandomForestClassifier(**params)
    model.fit(X_train, y_train)

    preds = model.predict(X_test)
    acc = accuracy_score(y_test, preds)
    f1 = f1_score(y_test, preds, average="weighted")

    # Log params, metrics, artifacts
    mlflow.log_params(params)
    mlflow.log_metrics({"accuracy": acc, "f1_score": f1})
    mlflow.log_artifact("data/train.csv")

    # Log the model
    mlflow.sklearn.log_model(model, "model", registered_model_name="iris-rf")

    print(f"Run ID: {run.info.run_id}")
    print(f"Accuracy: {acc:.4f}")

Model Serving with FastAPI

import mlflow.sklearn
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import numpy as np

app = FastAPI(title="ML Model API", version="1.0")

# Load model from MLflow registry
model_uri = "models:/iris-rf/Production"
model = mlflow.sklearn.load_model(model_uri)

class PredictRequest(BaseModel):
    sepal_length: float
    sepal_width: float
    petal_length: float
    petal_width: float

class PredictResponse(BaseModel):
    prediction: int
    confidence: float
    class_name: str

CLASS_NAMES = ["setosa", "versicolor", "virginica"]

@app.post("/predict", response_model=PredictResponse)
async def predict(req: PredictRequest):
    features = np.array([[req.sepal_length, req.sepal_width, req.petal_length, req.petal_width]])

    try:
        prediction = model.predict(features)[0]
        proba = model.predict_proba(features)[0]
        confidence = float(proba[prediction])

        return PredictResponse(
            prediction=int(prediction),
            confidence=confidence,
            class_name=CLASS_NAMES[prediction]
        )
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/health")
async def health():
    return {"status": "healthy", "model": "iris-rf/Production"}

Containerization with Docker

# Dockerfile
FROM python:3.12-slim

WORKDIR /app

# Install dependencies first (layer caching)
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

# Copy application
COPY . .

# Non-root user for security
RUN useradd -m appuser && chown -R appuser /app
USER appuser

EXPOSE 8000
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "2"]

# docker-compose.yml
version: '3.8'
services:
  model-api:
    build: .
    ports:
      - "8000:8000"
    environment:
      - MLFLOW_TRACKING_URI=http://mlflow:5000
    depends_on:
      - mlflow

  mlflow:
    image: ghcr.io/mlflow/mlflow
    ports:
      - "5000:5000"
    command: mlflow server --backend-store-uri sqlite:///mlflow.db --host 0.0.0.0
    volumes:
      - mlflow_data:/mlflow

volumes:
  mlflow_data:

CI/CD Pipeline for ML (GitHub Actions)

# .github/workflows/ml-pipeline.yml
name: ML Training and Deployment

on:
  push:
    paths:
      - 'data/**'
      - 'models/**'
      - 'train.py'

jobs:
  train-and-deploy:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      - name: Setup Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.12'

      - name: Install dependencies
        run: pip install -r requirements.txt

      - name: Run data validation
        run: python validate_data.py

      - name: Train model
        run: python train.py
        env:
          MLFLOW_TRACKING_URI: ${{ secrets.MLFLOW_URI }}

      - name: Evaluate model (fail if worse than baseline)
        run: python evaluate.py --min-accuracy 0.90

      - name: Build and push Docker image
        run: |
          docker build -t my-model-api:${{ github.sha }} .
          docker push my-registry/my-model-api:${{ github.sha }}

      - name: Deploy to Kubernetes
        run: |
          kubectl set image deployment/model-api model-api=my-registry/my-model-api:${{ github.sha }}

Model Monitoring

# Detect data drift and model degradation
import pandas as pd
from scipy import stats

class ModelMonitor:
    def __init__(self, baseline_data: pd.DataFrame):
        self.baseline = baseline_data
        self.alerts = []

    def check_data_drift(self, new_data: pd.DataFrame, threshold: float = 0.05):
        """KS test for distribution drift on each feature."""
        drifted_features = []
        for col in self.baseline.columns:
            ks_stat, p_value = stats.ks_2samp(
                self.baseline[col], new_data[col]
            )
            if p_value < threshold:
                drifted_features.append({"feature": col, "p_value": p_value})

        if drifted_features:
            self.alerts.append({
                "type": "data_drift",
                "drifted_features": drifted_features
            })
        return drifted_features

    def check_prediction_drift(self, old_preds: list, new_preds: list):
        """Check if prediction distribution has shifted."""
        old_mean, new_mean = sum(old_preds)/len(old_preds), sum(new_preds)/len(new_preds)
        if abs(new_mean - old_mean) / old_mean > 0.1:  # >10% shift
            self.alerts.append({"type": "prediction_drift", "shift": new_mean - old_mean})