Grafana Loki Log Aggregation 2026: The Prometheus-Native Logging Stack
Advertisement
Grafana Loki 2026: Logs That Work Like Metrics
Elasticsearch is expensive and operationally complex. Loki stores only log labels (not full-text indexes), making it 10x cheaper to operate. Combined with Grafana, you get logs, metrics, and traces in one UI — the modern observability trifecta.
- The PLG Stack: Promtail + Loki + Grafana
- Loki Configuration
- Promtail: Shipping Logs to Loki
- Structured Logging: Make Logs Queryable
- LogQL: Querying Logs Like Metrics
- Kubernetes Log Collection
- Grafana Dashboards and Alerting
- Loki vs Alternatives
The PLG Stack: Promtail + Loki + Grafana
Application logs
↓
Promtail (log collector) — runs on every node
↓
Loki (log aggregation) — indexes labels only
↓
Grafana (visualization + alerting)
# docker-compose.yml — Full PLG stack
version: '3.8'
services:
loki:
image: grafana/loki:2.9.4
ports:
- "3100:3100"
command: -config.file=/etc/loki/local-config.yaml
volumes:
- ./loki-config.yml:/etc/loki/local-config.yaml
- loki-data:/loki
promtail:
image: grafana/promtail:2.9.4
volumes:
- /var/log:/var/log:ro
- /var/lib/docker/containers:/var/lib/docker/containers:ro
- ./promtail-config.yml:/etc/promtail/config.yml
command: -config.file=/etc/promtail/config.yml
grafana:
image: grafana/grafana:10.3.3
ports:
- "3000:3000"
environment:
GF_SECURITY_ADMIN_PASSWORD: admin
GF_FEATURE_TOGGLES_ENABLE: traceqlEditor
volumes:
- grafana-data:/var/lib/grafana
- ./grafana/provisioning:/etc/grafana/provisioning
volumes:
loki-data:
grafana-data:
Loki Configuration
# loki-config.yml
auth_enabled: false
server:
http_listen_port: 3100
grpc_listen_port: 9096
common:
instance_addr: 127.0.0.1
path_prefix: /loki
storage:
filesystem:
chunks_directory: /loki/chunks
rules_directory: /loki/rules
replication_factor: 1
ring:
kvstore:
store: inmemory
# For production: use S3 storage
# storage_config:
# aws:
# s3: s3://us-east-1/loki-chunks-bucket
# s3forcepathstyle: true
query_range:
results_cache:
cache:
embedded_cache:
enabled: true
max_size_mb: 100
schema_config:
configs:
- from: 2024-01-01
store: tsdb
object_store: filesystem
schema: v13
index:
prefix: index_
period: 24h
ruler:
alertmanager_url: http://alertmanager:9093
# Limits
limits_config:
reject_old_samples: true
reject_old_samples_max_age: 168h # 7 days
ingestion_rate_mb: 16
ingestion_burst_size_mb: 32
max_query_length: 721h
max_entries_limit_per_query: 50000
Promtail: Shipping Logs to Loki
# promtail-config.yml
server:
http_listen_port: 9080
grpc_listen_port: 0
positions:
filename: /tmp/positions.yaml # Tracks file read positions
clients:
- url: http://loki:3100/loki/api/v1/push
scrape_configs:
# Docker container logs
- job_name: docker
docker_sd_configs:
- host: unix:///var/run/docker.sock
refresh_interval: 5s
relabel_configs:
- source_labels: ['__meta_docker_container_name']
regex: '/(.*)'
target_label: 'container'
- source_labels: ['__meta_docker_container_log_stream']
target_label: 'stream'
- source_labels: ['__meta_docker_container_label_com_docker_compose_service']
target_label: 'service'
pipeline_stages:
- json:
expressions:
level: level
message: message
timestamp: time
- labels:
level:
- timestamp:
source: timestamp
format: RFC3339Nano
# Application log files
- job_name: application
static_configs:
- targets: [localhost]
labels:
job: myapp
environment: production
__path__: /var/log/app/*.log
pipeline_stages:
# Parse JSON logs
- json:
expressions:
level: level
msg: msg
err: err
duration: duration
path: path
method: method
status: status
- labels:
level:
method:
status:
- metrics:
http_requests_total:
type: Counter
description: HTTP requests
source: status
config:
action: inc
# System logs
- job_name: syslog
static_configs:
- targets: [localhost]
labels:
job: syslog
host: "{{ .Hostname }}"
__path__: /var/log/syslog
Structured Logging: Make Logs Queryable
// logger.ts — Structured JSON logging with Pino
import pino from 'pino'
export const logger = pino({
level: process.env.LOG_LEVEL ?? 'info',
formatters: {
level: (label) => ({ level: label }), // Use string level not number
},
timestamp: pino.stdTimeFunctions.isoTime,
base: {
service: 'myapp-api',
environment: process.env.NODE_ENV,
version: process.env.APP_VERSION,
},
redact: {
paths: ['req.headers.authorization', '*.password', '*.token'],
censor: '[REDACTED]',
},
})
// Request logging middleware
export function requestLogger(req: Request, res: Response, next: NextFunction) {
const start = Date.now()
res.on('finish', () => {
logger.info({
msg: 'HTTP request',
method: req.method,
path: req.path,
status: res.statusCode,
duration: Date.now() - start,
userAgent: req.headers['user-agent'],
ip: req.ip,
userId: req.user?.id,
})
})
next()
}
// Error logging
export function logError(error: Error, context?: Record<string, unknown>) {
logger.error({
msg: error.message,
err: {
name: error.name,
message: error.message,
stack: error.stack,
},
...context,
})
}
Example JSON log output:
{"level":"info","time":"2026-03-26T10:30:00.000Z","service":"myapp-api","environment":"production","version":"1.2.3","msg":"HTTP request","method":"GET","path":"/api/users","status":200,"duration":45,"userId":"usr_123"}
LogQL: Querying Logs Like Metrics
LogQL is Loki's query language — it combines log filtering with metric extraction:
# Basic log stream selector
{service="myapp-api", environment="production"}
# Filter by log content
{service="myapp-api"} |= "ERROR"
{service="myapp-api"} != "healthcheck"
# JSON parsing + filter
{service="myapp-api"} | json | level="error"
# Filter by parsed field value
{service="myapp-api"} | json | status >= 500
# Pattern matching
{service="myapp-api"} | json | path =~ "/api/.*"
# Multiple filters
{service="myapp-api"}
| json
| level="error"
| duration > 1000
# Count errors per minute (metric query)
sum(rate({service="myapp-api"} | json | level="error" [1m])) by (path)
# P95 response time (requires duration field)
quantile_over_time(0.95, {service="myapp-api"} | json | unwrap duration [5m]) by (path)
# Top slow endpoints
topk(10, avg_over_time({service="myapp-api"} | json | unwrap duration [5m]) by (path))
# Error rate percentage
(
sum(rate({service="myapp-api"} | json | status >= 500 [5m]))
/
sum(rate({service="myapp-api"} | json [5m]))
) * 100
# Count unique users (approx)
count_over_time({service="myapp-api"} | json | userId != "" [1h])
Kubernetes Log Collection
# kubernetes/promtail-daemonset.yml
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: promtail
namespace: monitoring
spec:
selector:
matchLabels:
app: promtail
template:
metadata:
labels:
app: promtail
spec:
serviceAccountName: promtail
containers:
- name: promtail
image: grafana/promtail:2.9.4
args:
- -config.file=/etc/promtail/config.yml
env:
- name: HOSTNAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
volumeMounts:
- name: config
mountPath: /etc/promtail
- name: varlog
mountPath: /var/log
readOnly: true
- name: varlibdockercontainers
mountPath: /var/lib/docker/containers
readOnly: true
volumes:
- name: config
configMap:
name: promtail-config
- name: varlog
hostPath:
path: /var/log
- name: varlibdockercontainers
hostPath:
path: /var/lib/docker/containers
# promtail kubernetes config
scrape_configs:
- job_name: kubernetes-pods
kubernetes_sd_configs:
- role: pod
pipeline_stages:
- cri: {} # Parse CRI log format
- json:
expressions:
level: level
msg: msg
relabel_configs:
- source_labels: [__meta_kubernetes_namespace]
target_label: namespace
- source_labels: [__meta_kubernetes_pod_name]
target_label: pod
- source_labels: [__meta_kubernetes_pod_container_name]
target_label: container
- source_labels: [__meta_kubernetes_pod_label_app]
target_label: app
# Only scrape pods with this annotation
- source_labels: [__meta_kubernetes_pod_annotation_promtail_io_scrape]
action: keep
regex: "true"
Grafana Dashboards and Alerting
# grafana/provisioning/datasources/loki.yml
apiVersion: 1
datasources:
- name: Loki
type: loki
url: http://loki:3100
access: proxy
jsonData:
maxLines: 1000
derivedFields:
# Extract trace IDs from logs and link to Tempo
- datasourceUid: tempo
matcherRegex: '"traceId":"(\w+)"'
name: TraceID
url: '$${__value.raw}'
# Loki alert rule
groups:
- name: application-alerts
rules:
- alert: HighErrorRate
expr: |
(
sum(rate({service="myapp-api"} | json | level="error" [5m]))
/
sum(rate({service="myapp-api"} | json [5m]))
) > 0.05
for: 2m
labels:
severity: critical
annotations:
summary: "High error rate on myapp-api"
description: "Error rate is {{ $value | humanizePercentage }} over last 5m"
- alert: SlowResponses
expr: |
quantile_over_time(0.95,
{service="myapp-api"} | json | unwrap duration [5m]
) > 2000
for: 5m
labels:
severity: warning
annotations:
summary: "P95 response time above 2s"
- alert: AppDown
expr: |
sum(rate({service="myapp-api"} [1m])) == 0
for: 1m
labels:
severity: critical
annotations:
summary: "No logs received from myapp-api"
Loki vs Alternatives
| Feature | Loki | Elasticsearch | Datadog Logs |
|---|---|---|---|
| Cost (self-hosted) | Very low | High | N/A |
| Cost (managed) | Low | High | High |
| Query language | LogQL | Lucene/KQL | DQL |
| Full-text search | No | Yes | Yes |
| Label-based search | Yes | Yes | Yes |
| Setup complexity | Low | High | None |
| Grafana integration | Native | Plugin | Plugin |
| Metrics from logs | Yes (LogQL) | Via pipeline | Yes |
Loki's sweet spot: teams already using Prometheus and Grafana who want unified observability without Elasticsearch's operational overhead. For full-text search across large volumes, Elasticsearch still wins — but for most microservices use cases, LogQL is enough.
Advertisement