86 lines
2.6 KiB
YAML
86 lines
2.6 KiB
YAML
# Prometheus Alerting Rules for AS4 Settlement
|
|
|
|
groups:
|
|
- name: as4_settlement
|
|
interval: 30s
|
|
rules:
|
|
# High Latency Alert
|
|
- alert: AS4HighLatency
|
|
expr: as4_message_latency_p99 > 5
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "AS4 message processing latency is high"
|
|
description: "P99 latency is {{ $value }}s (threshold: 5s)"
|
|
|
|
# High Failure Rate Alert
|
|
- alert: AS4HighFailureRate
|
|
expr: rate(as4_instructions_failed[5m]) > 0.01
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "AS4 instruction failure rate is high"
|
|
description: "Failure rate is {{ $value }} (threshold: 1%)"
|
|
|
|
# Certificate Expiring Alert
|
|
- alert: AS4CertificateExpiring
|
|
expr: as4_certificate_days_until_expiry < 30
|
|
for: 1h
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "AS4 certificate expiring soon"
|
|
description: "Certificate expires in {{ $value }} days"
|
|
|
|
# System Unavailable Alert
|
|
- alert: AS4SystemUnavailable
|
|
expr: up{job="as4-settlement"} == 0
|
|
for: 1m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "AS4 Settlement system is down"
|
|
description: "AS4 service is not responding"
|
|
|
|
# Database Connection Alert
|
|
- alert: AS4DatabaseConnectionFailed
|
|
expr: as4_database_connection_status == 0
|
|
for: 1m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "AS4 database connection failed"
|
|
description: "Cannot connect to database"
|
|
|
|
# Redis Connection Alert
|
|
- alert: AS4RedisConnectionFailed
|
|
expr: as4_redis_connection_status == 0
|
|
for: 1m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "AS4 Redis connection failed"
|
|
description: "Cannot connect to Redis (nonce tracking may be affected)"
|
|
|
|
# High Memory Usage Alert
|
|
- alert: AS4HighMemoryUsage
|
|
expr: as4_memory_usage_percent > 80
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "AS4 system memory usage is high"
|
|
description: "Memory usage is {{ $value }}%"
|
|
|
|
# Queue Backlog Alert
|
|
- alert: AS4QueueBacklog
|
|
expr: as4_instruction_queue_length > 1000
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "AS4 instruction queue backlog"
|
|
description: "Queue length is {{ $value }} instructions"
|