Grafana Loki Log Aggregation 2026: The Prometheus-Native Logging Stack

Sanjeev SharmaSanjeev Sharma
6 min read

Advertisement

Grafana Loki 2026: Logs That Work Like Metrics

Elasticsearch is expensive and operationally complex. Loki stores only log labels (not full-text indexes), making it 10x cheaper to operate. Combined with Grafana, you get logs, metrics, and traces in one UI — the modern observability trifecta.

The PLG Stack: Promtail + Loki + Grafana

Application logs
Promtail (log collector) — runs on every node
Loki (log aggregation) — indexes labels only
Grafana (visualization + alerting)
# docker-compose.yml — Full PLG stack
version: '3.8'

services:
  loki:
    image: grafana/loki:2.9.4
    ports:
      - "3100:3100"
    command: -config.file=/etc/loki/local-config.yaml
    volumes:
      - ./loki-config.yml:/etc/loki/local-config.yaml
      - loki-data:/loki

  promtail:
    image: grafana/promtail:2.9.4
    volumes:
      - /var/log:/var/log:ro
      - /var/lib/docker/containers:/var/lib/docker/containers:ro
      - ./promtail-config.yml:/etc/promtail/config.yml
    command: -config.file=/etc/promtail/config.yml

  grafana:
    image: grafana/grafana:10.3.3
    ports:
      - "3000:3000"
    environment:
      GF_SECURITY_ADMIN_PASSWORD: admin
      GF_FEATURE_TOGGLES_ENABLE: traceqlEditor
    volumes:
      - grafana-data:/var/lib/grafana
      - ./grafana/provisioning:/etc/grafana/provisioning

volumes:
  loki-data:
  grafana-data:

Loki Configuration

# loki-config.yml
auth_enabled: false

server:
  http_listen_port: 3100
  grpc_listen_port: 9096

common:
  instance_addr: 127.0.0.1
  path_prefix: /loki
  storage:
    filesystem:
      chunks_directory: /loki/chunks
      rules_directory: /loki/rules
  replication_factor: 1
  ring:
    kvstore:
      store: inmemory

# For production: use S3 storage
# storage_config:
#   aws:
#     s3: s3://us-east-1/loki-chunks-bucket
#     s3forcepathstyle: true

query_range:
  results_cache:
    cache:
      embedded_cache:
        enabled: true
        max_size_mb: 100

schema_config:
  configs:
    - from: 2024-01-01
      store: tsdb
      object_store: filesystem
      schema: v13
      index:
        prefix: index_
        period: 24h

ruler:
  alertmanager_url: http://alertmanager:9093

# Limits
limits_config:
  reject_old_samples: true
  reject_old_samples_max_age: 168h  # 7 days
  ingestion_rate_mb: 16
  ingestion_burst_size_mb: 32
  max_query_length: 721h
  max_entries_limit_per_query: 50000

Promtail: Shipping Logs to Loki

# promtail-config.yml
server:
  http_listen_port: 9080
  grpc_listen_port: 0

positions:
  filename: /tmp/positions.yaml  # Tracks file read positions

clients:
  - url: http://loki:3100/loki/api/v1/push

scrape_configs:
  # Docker container logs
  - job_name: docker
    docker_sd_configs:
      - host: unix:///var/run/docker.sock
        refresh_interval: 5s
    relabel_configs:
      - source_labels: ['__meta_docker_container_name']
        regex: '/(.*)'
        target_label: 'container'
      - source_labels: ['__meta_docker_container_log_stream']
        target_label: 'stream'
      - source_labels: ['__meta_docker_container_label_com_docker_compose_service']
        target_label: 'service'
    pipeline_stages:
      - json:
          expressions:
            level: level
            message: message
            timestamp: time
      - labels:
          level:
      - timestamp:
          source: timestamp
          format: RFC3339Nano

  # Application log files
  - job_name: application
    static_configs:
      - targets: [localhost]
        labels:
          job: myapp
          environment: production
          __path__: /var/log/app/*.log

    pipeline_stages:
      # Parse JSON logs
      - json:
          expressions:
            level: level
            msg: msg
            err: err
            duration: duration
            path: path
            method: method
            status: status
      - labels:
          level:
          method:
          status:
      - metrics:
          http_requests_total:
            type: Counter
            description: HTTP requests
            source: status
            config:
              action: inc

  # System logs
  - job_name: syslog
    static_configs:
      - targets: [localhost]
        labels:
          job: syslog
          host: "{{ .Hostname }}"
          __path__: /var/log/syslog

Structured Logging: Make Logs Queryable

// logger.ts — Structured JSON logging with Pino
import pino from 'pino'

export const logger = pino({
  level: process.env.LOG_LEVEL ?? 'info',
  formatters: {
    level: (label) => ({ level: label }),  // Use string level not number
  },
  timestamp: pino.stdTimeFunctions.isoTime,
  base: {
    service: 'myapp-api',
    environment: process.env.NODE_ENV,
    version: process.env.APP_VERSION,
  },
  redact: {
    paths: ['req.headers.authorization', '*.password', '*.token'],
    censor: '[REDACTED]',
  },
})

// Request logging middleware
export function requestLogger(req: Request, res: Response, next: NextFunction) {
  const start = Date.now()

  res.on('finish', () => {
    logger.info({
      msg: 'HTTP request',
      method: req.method,
      path: req.path,
      status: res.statusCode,
      duration: Date.now() - start,
      userAgent: req.headers['user-agent'],
      ip: req.ip,
      userId: req.user?.id,
    })
  })

  next()
}

// Error logging
export function logError(error: Error, context?: Record<string, unknown>) {
  logger.error({
    msg: error.message,
    err: {
      name: error.name,
      message: error.message,
      stack: error.stack,
    },
    ...context,
  })
}
Example JSON log output:
{"level":"info","time":"2026-03-26T10:30:00.000Z","service":"myapp-api","environment":"production","version":"1.2.3","msg":"HTTP request","method":"GET","path":"/api/users","status":200,"duration":45,"userId":"usr_123"}

LogQL: Querying Logs Like Metrics

LogQL is Loki's query language — it combines log filtering with metric extraction:

# Basic log stream selector
{service="myapp-api", environment="production"}

# Filter by log content
{service="myapp-api"} |= "ERROR"
{service="myapp-api"} != "healthcheck"

# JSON parsing + filter
{service="myapp-api"} | json | level="error"

# Filter by parsed field value
{service="myapp-api"} | json | status >= 500

# Pattern matching
{service="myapp-api"} | json | path =~ "/api/.*"

# Multiple filters
{service="myapp-api"}
  | json
  | level="error"
  | duration > 1000

# Count errors per minute (metric query)
sum(rate({service="myapp-api"} | json | level="error" [1m])) by (path)

# P95 response time (requires duration field)
quantile_over_time(0.95, {service="myapp-api"} | json | unwrap duration [5m]) by (path)

# Top slow endpoints
topk(10, avg_over_time({service="myapp-api"} | json | unwrap duration [5m]) by (path))

# Error rate percentage
(
  sum(rate({service="myapp-api"} | json | status >= 500 [5m]))
  /
  sum(rate({service="myapp-api"} | json [5m]))
) * 100

# Count unique users (approx)
count_over_time({service="myapp-api"} | json | userId != "" [1h])

Kubernetes Log Collection

# kubernetes/promtail-daemonset.yml
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: promtail
  namespace: monitoring
spec:
  selector:
    matchLabels:
      app: promtail
  template:
    metadata:
      labels:
        app: promtail
    spec:
      serviceAccountName: promtail
      containers:
        - name: promtail
          image: grafana/promtail:2.9.4
          args:
            - -config.file=/etc/promtail/config.yml
          env:
            - name: HOSTNAME
              valueFrom:
                fieldRef:
                  fieldPath: spec.nodeName
          volumeMounts:
            - name: config
              mountPath: /etc/promtail
            - name: varlog
              mountPath: /var/log
              readOnly: true
            - name: varlibdockercontainers
              mountPath: /var/lib/docker/containers
              readOnly: true

      volumes:
        - name: config
          configMap:
            name: promtail-config
        - name: varlog
          hostPath:
            path: /var/log
        - name: varlibdockercontainers
          hostPath:
            path: /var/lib/docker/containers
# promtail kubernetes config
scrape_configs:
  - job_name: kubernetes-pods
    kubernetes_sd_configs:
      - role: pod
    pipeline_stages:
      - cri: {}  # Parse CRI log format
      - json:
          expressions:
            level: level
            msg: msg
    relabel_configs:
      - source_labels: [__meta_kubernetes_namespace]
        target_label: namespace
      - source_labels: [__meta_kubernetes_pod_name]
        target_label: pod
      - source_labels: [__meta_kubernetes_pod_container_name]
        target_label: container
      - source_labels: [__meta_kubernetes_pod_label_app]
        target_label: app
      # Only scrape pods with this annotation
      - source_labels: [__meta_kubernetes_pod_annotation_promtail_io_scrape]
        action: keep
        regex: "true"

Grafana Dashboards and Alerting

# grafana/provisioning/datasources/loki.yml
apiVersion: 1

datasources:
  - name: Loki
    type: loki
    url: http://loki:3100
    access: proxy
    jsonData:
      maxLines: 1000
      derivedFields:
        # Extract trace IDs from logs and link to Tempo
        - datasourceUid: tempo
          matcherRegex: '"traceId":"(\w+)"'
          name: TraceID
          url: '$${__value.raw}'
# Loki alert rule
groups:
  - name: application-alerts
    rules:
      - alert: HighErrorRate
        expr: |
          (
            sum(rate({service="myapp-api"} | json | level="error" [5m]))
            /
            sum(rate({service="myapp-api"} | json [5m]))
          ) > 0.05
        for: 2m
        labels:
          severity: critical
        annotations:
          summary: "High error rate on myapp-api"
          description: "Error rate is {{ $value | humanizePercentage }} over last 5m"

      - alert: SlowResponses
        expr: |
          quantile_over_time(0.95,
            {service="myapp-api"} | json | unwrap duration [5m]
          ) > 2000
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "P95 response time above 2s"

      - alert: AppDown
        expr: |
          sum(rate({service="myapp-api"} [1m])) == 0
        for: 1m
        labels:
          severity: critical
        annotations:
          summary: "No logs received from myapp-api"

Loki vs Alternatives

FeatureLokiElasticsearchDatadog Logs
Cost (self-hosted)Very lowHighN/A
Cost (managed)LowHighHigh
Query languageLogQLLucene/KQLDQL
Full-text searchNoYesYes
Label-based searchYesYesYes
Setup complexityLowHighNone
Grafana integrationNativePluginPlugin
Metrics from logsYes (LogQL)Via pipelineYes

Loki's sweet spot: teams already using Prometheus and Grafana who want unified observability without Elasticsearch's operational overhead. For full-text search across large volumes, Elasticsearch still wins — but for most microservices use cases, LogQL is enough.

Advertisement

Sanjeev Sharma

Written by

Sanjeev Sharma

Full Stack Engineer · E-mopro