MLOps Guide 2026: Deploy ML Models to Production at Scale
Advertisement
MLOps 2026: The Gap Between Research and Production
Training a model is 10% of the work. Getting it to production reliably, monitoring it, retraining it, and scaling it — that's the other 90%. This is MLOps.
- The MLOps Stack in 2026
- Model Versioning with MLflow
- Model Serving with FastAPI
- Containerization with Docker
- CI/CD Pipeline for ML (GitHub Actions)
- Model Monitoring
The MLOps Stack in 2026
Data Pipeline → Training → Evaluation → Registry → Serving → Monitoring
(dbt, Spark) (PyTorch) (Weights&Biases) (MLflow) (FastAPI) (Prometheus)
Model Versioning with MLflow
import mlflow
import mlflow.sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
import pandas as pd
mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("iris-classifier")
X, y = load_iris(return_X_y=True, as_frame=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# Log everything about this training run
with mlflow.start_run(run_name="rf-v3") as run:
params = {"n_estimators": 100, "max_depth": 5, "random_state": 42}
model = RandomForestClassifier(**params)
model.fit(X_train, y_train)
preds = model.predict(X_test)
acc = accuracy_score(y_test, preds)
f1 = f1_score(y_test, preds, average="weighted")
# Log params, metrics, artifacts
mlflow.log_params(params)
mlflow.log_metrics({"accuracy": acc, "f1_score": f1})
mlflow.log_artifact("data/train.csv")
# Log the model
mlflow.sklearn.log_model(model, "model", registered_model_name="iris-rf")
print(f"Run ID: {run.info.run_id}")
print(f"Accuracy: {acc:.4f}")
Model Serving with FastAPI
import mlflow.sklearn
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import numpy as np
app = FastAPI(title="ML Model API", version="1.0")
# Load model from MLflow registry
model_uri = "models:/iris-rf/Production"
model = mlflow.sklearn.load_model(model_uri)
class PredictRequest(BaseModel):
sepal_length: float
sepal_width: float
petal_length: float
petal_width: float
class PredictResponse(BaseModel):
prediction: int
confidence: float
class_name: str
CLASS_NAMES = ["setosa", "versicolor", "virginica"]
@app.post("/predict", response_model=PredictResponse)
async def predict(req: PredictRequest):
features = np.array([[req.sepal_length, req.sepal_width, req.petal_length, req.petal_width]])
try:
prediction = model.predict(features)[0]
proba = model.predict_proba(features)[0]
confidence = float(proba[prediction])
return PredictResponse(
prediction=int(prediction),
confidence=confidence,
class_name=CLASS_NAMES[prediction]
)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/health")
async def health():
return {"status": "healthy", "model": "iris-rf/Production"}
Containerization with Docker
# Dockerfile
FROM python:3.12-slim
WORKDIR /app
# Install dependencies first (layer caching)
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy application
COPY . .
# Non-root user for security
RUN useradd -m appuser && chown -R appuser /app
USER appuser
EXPOSE 8000
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "2"]
# docker-compose.yml
version: '3.8'
services:
model-api:
build: .
ports:
- "8000:8000"
environment:
- MLFLOW_TRACKING_URI=http://mlflow:5000
depends_on:
- mlflow
mlflow:
image: ghcr.io/mlflow/mlflow
ports:
- "5000:5000"
command: mlflow server --backend-store-uri sqlite:///mlflow.db --host 0.0.0.0
volumes:
- mlflow_data:/mlflow
volumes:
mlflow_data:
CI/CD Pipeline for ML (GitHub Actions)
# .github/workflows/ml-pipeline.yml
name: ML Training and Deployment
on:
push:
paths:
- 'data/**'
- 'models/**'
- 'train.py'
jobs:
train-and-deploy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Install dependencies
run: pip install -r requirements.txt
- name: Run data validation
run: python validate_data.py
- name: Train model
run: python train.py
env:
MLFLOW_TRACKING_URI: ${{ secrets.MLFLOW_URI }}
- name: Evaluate model (fail if worse than baseline)
run: python evaluate.py --min-accuracy 0.90
- name: Build and push Docker image
run: |
docker build -t my-model-api:${{ github.sha }} .
docker push my-registry/my-model-api:${{ github.sha }}
- name: Deploy to Kubernetes
run: |
kubectl set image deployment/model-api model-api=my-registry/my-model-api:${{ github.sha }}
Model Monitoring
# Detect data drift and model degradation
import pandas as pd
from scipy import stats
class ModelMonitor:
def __init__(self, baseline_data: pd.DataFrame):
self.baseline = baseline_data
self.alerts = []
def check_data_drift(self, new_data: pd.DataFrame, threshold: float = 0.05):
"""KS test for distribution drift on each feature."""
drifted_features = []
for col in self.baseline.columns:
ks_stat, p_value = stats.ks_2samp(
self.baseline[col], new_data[col]
)
if p_value < threshold:
drifted_features.append({"feature": col, "p_value": p_value})
if drifted_features:
self.alerts.append({
"type": "data_drift",
"drifted_features": drifted_features
})
return drifted_features
def check_prediction_drift(self, old_preds: list, new_preds: list):
"""Check if prediction distribution has shifted."""
old_mean, new_mean = sum(old_preds)/len(old_preds), sum(new_preds)/len(new_preds)
if abs(new_mean - old_mean) / old_mean > 0.1: # >10% shift
self.alerts.append({"type": "prediction_drift", "shift": new_mean - old_mean})
Advertisement