Files
dbis_core/monitoring/grafana/dashboards/system-health.json
2026-03-02 12:14:07 -08:00

148 lines
3.9 KiB
JSON

{
"dashboard": {
"title": "DBIS Core - System Health",
"tags": ["system", "health", "overview"],
"timezone": "browser",
"schemaVersion": 27,
"version": 1,
"refresh": "30s",
"panels": [
{
"id": 1,
"title": "Service Health Status",
"type": "stat",
"targets": [
{
"expr": "up{job=\"dbis-core\"}",
"legendFormat": "{{instance}}"
}
],
"fieldConfig": {
"defaults": {
"thresholds": {
"mode": "absolute",
"steps": [
{"value": 0, "color": "red"},
{"value": 1, "color": "green"}
]
}
}
},
"gridPos": {"h": 4, "w": 6, "x": 0, "y": 0}
},
{
"id": 2,
"title": "Overall System Status",
"type": "stat",
"targets": [
{
"expr": "count(up{job=\"dbis-core\"} == 1) / count(up{job=\"dbis-core\"}) * 100",
"legendFormat": "Health %"
}
],
"gridPos": {"h": 4, "w": 6, "x": 6, "y": 0}
},
{
"id": 3,
"title": "Total Error Rate",
"type": "graph",
"targets": [
{
"expr": "sum(rate(http_requests_total{status=~\"5..\"}[5m]))",
"legendFormat": "5xx Errors/sec"
},
{
"expr": "sum(rate(http_requests_total{status=~\"4..\"}[5m]))",
"legendFormat": "4xx Errors/sec"
}
],
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 0}
},
{
"id": 4,
"title": "CPU Usage by Service",
"type": "graph",
"targets": [
{
"expr": "rate(process_cpu_seconds_total{job=\"dbis-core\"}[5m]) * 100",
"legendFormat": "{{instance}} - {{service}}"
}
],
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 8}
},
{
"id": 5,
"title": "Memory Usage by Service",
"type": "graph",
"targets": [
{
"expr": "process_resident_memory_bytes{job=\"dbis-core\"} / 1024 / 1024",
"legendFormat": "{{instance}} - {{service}} (MB)"
}
],
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 8}
},
{
"id": 6,
"title": "Database Connection Pool",
"type": "graph",
"targets": [
{
"expr": "db_pool_size{job=\"dbis-core\"}",
"legendFormat": "Pool Size"
},
{
"expr": "db_pool_active{job=\"dbis-core\"}",
"legendFormat": "Active Connections"
},
{
"expr": "db_pool_idle{job=\"dbis-core\"}",
"legendFormat": "Idle Connections"
}
],
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 16}
},
{
"id": 7,
"title": "Request Rate by Service",
"type": "graph",
"targets": [
{
"expr": "sum(rate(http_requests_total{job=\"dbis-core\"}[5m])) by (service)",
"legendFormat": "{{service}}"
}
],
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 16}
},
{
"id": 8,
"title": "Active Sessions",
"type": "stat",
"targets": [
{
"expr": "dbis_sessions_active",
"legendFormat": "Active"
}
],
"gridPos": {"h": 4, "w": 6, "x": 0, "y": 24}
},
{
"id": 9,
"title": "Queue Length",
"type": "graph",
"targets": [
{
"expr": "dbis_queue_length{queue=\"dual_ledger_outbox\"}",
"legendFormat": "Outbox Queue"
},
{
"expr": "dbis_queue_length{queue=\"settlement\"}",
"legendFormat": "Settlement Queue"
}
],
"gridPos": {"h": 8, "w": 18, "x": 6, "y": 24}
}
]
}
}