2026-03-02 11:37:34 -08:00
|
|
|
#!/usr/bin/env bash
|
2025-12-21 22:32:09 -08:00
|
|
|
# Simple Alert Script
|
|
|
|
|
# Sends alerts when Besu services are down
|
|
|
|
|
# Can be extended to send email, Slack, etc.
|
|
|
|
|
|
|
|
|
|
set -euo pipefail
|
|
|
|
|
|
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
|
|
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
|
|
|
|
|
|
|
|
|
# Configuration
|
|
|
|
|
ALERT_EMAIL="${ALERT_EMAIL:-}"
|
|
|
|
|
ALERT_LOG="$PROJECT_ROOT/logs/alerts.log"
|
|
|
|
|
ALERT_SENT_LOG="$PROJECT_ROOT/logs/alerts-sent.log"
|
|
|
|
|
|
|
|
|
|
# Ensure log directory exists
|
|
|
|
|
mkdir -p "$(dirname "$ALERT_LOG")"
|
|
|
|
|
|
|
|
|
|
log_alert() {
|
|
|
|
|
local message="$1"
|
|
|
|
|
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
|
|
|
|
|
echo "[$timestamp] ALERT: $message" >> "$ALERT_LOG"
|
|
|
|
|
|
|
|
|
|
# Check if we've already sent this alert (avoid spam)
|
|
|
|
|
local alert_key=$(echo "$message" | md5sum | cut -d' ' -f1)
|
|
|
|
|
if ! grep -q "$alert_key" "$ALERT_SENT_LOG" 2>/dev/null; then
|
|
|
|
|
echo "[$timestamp] $alert_key" >> "$ALERT_SENT_LOG"
|
|
|
|
|
|
|
|
|
|
# Send email if configured
|
|
|
|
|
if [[ -n "$ALERT_EMAIL" ]] && command -v mail >/dev/null 2>&1; then
|
|
|
|
|
echo "$message" | mail -s "Besu Alert: Container Issue" "$ALERT_EMAIL" 2>/dev/null || true
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Log to console
|
|
|
|
|
echo "ALERT: $message"
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Check all containers
|
|
|
|
|
for vmid in 1000 1001 1002 1003 1004 1500 1501 1502 1503 2500 2501 2502; do
|
|
|
|
|
# Check if container is running
|
|
|
|
|
if ! pct status "$vmid" 2>/dev/null | grep -q running; then
|
|
|
|
|
log_alert "Container $vmid is not running"
|
|
|
|
|
continue
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Determine service name
|
|
|
|
|
service_name=""
|
|
|
|
|
if [[ $vmid -ge 1000 ]] && [[ $vmid -le 1004 ]]; then
|
|
|
|
|
service_name="besu-validator"
|
|
|
|
|
elif [[ $vmid -ge 1500 ]] && [[ $vmid -le 1503 ]]; then
|
|
|
|
|
service_name="besu-sentry"
|
|
|
|
|
elif [[ $vmid -ge 2500 ]] && [[ $vmid -le 2502 ]]; then
|
|
|
|
|
service_name="besu-rpc"
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Check service status
|
|
|
|
|
if [[ -n "$service_name" ]]; then
|
|
|
|
|
if ! pct exec "$vmid" -- systemctl is-active --quiet "$service_name" 2>/dev/null; then
|
|
|
|
|
log_alert "Service $service_name on container $vmid is not running"
|
|
|
|
|
fi
|
|
|
|
|
fi
|
|
|
|
|
done
|
|
|
|
|
|
|
|
|
|
# Check disk space (alert if < 10% free)
|
|
|
|
|
for vmid in 1000 1001 1002 1003 1004 1500 1501 1502 1503 2500 2501 2502; do
|
|
|
|
|
if pct status "$vmid" 2>/dev/null | grep -q running; then
|
|
|
|
|
disk_usage=$(pct exec "$vmid" -- df -h / | awk 'NR==2 {print $5}' | sed 's/%//' 2>/dev/null || echo "0")
|
|
|
|
|
if [[ $disk_usage -gt 90 ]]; then
|
|
|
|
|
log_alert "Container $vmid disk usage is at ${disk_usage}%"
|
|
|
|
|
fi
|
|
|
|
|
fi
|
|
|
|
|
done
|