148 lines
3.9 KiB
JSON
148 lines
3.9 KiB
JSON
{
|
|
"dashboard": {
|
|
"title": "DBIS Core - System Health",
|
|
"tags": ["system", "health", "overview"],
|
|
"timezone": "browser",
|
|
"schemaVersion": 27,
|
|
"version": 1,
|
|
"refresh": "30s",
|
|
"panels": [
|
|
{
|
|
"id": 1,
|
|
"title": "Service Health Status",
|
|
"type": "stat",
|
|
"targets": [
|
|
{
|
|
"expr": "up{job=\"dbis-core\"}",
|
|
"legendFormat": "{{instance}}"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{"value": 0, "color": "red"},
|
|
{"value": 1, "color": "green"}
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"gridPos": {"h": 4, "w": 6, "x": 0, "y": 0}
|
|
},
|
|
{
|
|
"id": 2,
|
|
"title": "Overall System Status",
|
|
"type": "stat",
|
|
"targets": [
|
|
{
|
|
"expr": "count(up{job=\"dbis-core\"} == 1) / count(up{job=\"dbis-core\"}) * 100",
|
|
"legendFormat": "Health %"
|
|
}
|
|
],
|
|
"gridPos": {"h": 4, "w": 6, "x": 6, "y": 0}
|
|
},
|
|
{
|
|
"id": 3,
|
|
"title": "Total Error Rate",
|
|
"type": "graph",
|
|
"targets": [
|
|
{
|
|
"expr": "sum(rate(http_requests_total{status=~\"5..\"}[5m]))",
|
|
"legendFormat": "5xx Errors/sec"
|
|
},
|
|
{
|
|
"expr": "sum(rate(http_requests_total{status=~\"4..\"}[5m]))",
|
|
"legendFormat": "4xx Errors/sec"
|
|
}
|
|
],
|
|
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 0}
|
|
},
|
|
{
|
|
"id": 4,
|
|
"title": "CPU Usage by Service",
|
|
"type": "graph",
|
|
"targets": [
|
|
{
|
|
"expr": "rate(process_cpu_seconds_total{job=\"dbis-core\"}[5m]) * 100",
|
|
"legendFormat": "{{instance}} - {{service}}"
|
|
}
|
|
],
|
|
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 8}
|
|
},
|
|
{
|
|
"id": 5,
|
|
"title": "Memory Usage by Service",
|
|
"type": "graph",
|
|
"targets": [
|
|
{
|
|
"expr": "process_resident_memory_bytes{job=\"dbis-core\"} / 1024 / 1024",
|
|
"legendFormat": "{{instance}} - {{service}} (MB)"
|
|
}
|
|
],
|
|
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 8}
|
|
},
|
|
{
|
|
"id": 6,
|
|
"title": "Database Connection Pool",
|
|
"type": "graph",
|
|
"targets": [
|
|
{
|
|
"expr": "db_pool_size{job=\"dbis-core\"}",
|
|
"legendFormat": "Pool Size"
|
|
},
|
|
{
|
|
"expr": "db_pool_active{job=\"dbis-core\"}",
|
|
"legendFormat": "Active Connections"
|
|
},
|
|
{
|
|
"expr": "db_pool_idle{job=\"dbis-core\"}",
|
|
"legendFormat": "Idle Connections"
|
|
}
|
|
],
|
|
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 16}
|
|
},
|
|
{
|
|
"id": 7,
|
|
"title": "Request Rate by Service",
|
|
"type": "graph",
|
|
"targets": [
|
|
{
|
|
"expr": "sum(rate(http_requests_total{job=\"dbis-core\"}[5m])) by (service)",
|
|
"legendFormat": "{{service}}"
|
|
}
|
|
],
|
|
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 16}
|
|
},
|
|
{
|
|
"id": 8,
|
|
"title": "Active Sessions",
|
|
"type": "stat",
|
|
"targets": [
|
|
{
|
|
"expr": "dbis_sessions_active",
|
|
"legendFormat": "Active"
|
|
}
|
|
],
|
|
"gridPos": {"h": 4, "w": 6, "x": 0, "y": 24}
|
|
},
|
|
{
|
|
"id": 9,
|
|
"title": "Queue Length",
|
|
"type": "graph",
|
|
"targets": [
|
|
{
|
|
"expr": "dbis_queue_length{queue=\"dual_ledger_outbox\"}",
|
|
"legendFormat": "Outbox Queue"
|
|
},
|
|
{
|
|
"expr": "dbis_queue_length{queue=\"settlement\"}",
|
|
"legendFormat": "Settlement Queue"
|
|
}
|
|
],
|
|
"gridPos": {"h": 8, "w": 18, "x": 6, "y": 24}
|
|
}
|
|
]
|
|
}
|
|
}
|