Files
proxmox/scripts/monitoring/simple-alert.sh

74 lines
2.4 KiB
Bash
Executable File

#!/bin/bash
# Simple Alert Script
# Sends alerts when Besu services are down
# Can be extended to send email, Slack, etc.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
# Configuration
ALERT_EMAIL="${ALERT_EMAIL:-}"
ALERT_LOG="$PROJECT_ROOT/logs/alerts.log"
ALERT_SENT_LOG="$PROJECT_ROOT/logs/alerts-sent.log"
# Ensure log directory exists
mkdir -p "$(dirname "$ALERT_LOG")"
log_alert() {
local message="$1"
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
echo "[$timestamp] ALERT: $message" >> "$ALERT_LOG"
# Check if we've already sent this alert (avoid spam)
local alert_key=$(echo "$message" | md5sum | cut -d' ' -f1)
if ! grep -q "$alert_key" "$ALERT_SENT_LOG" 2>/dev/null; then
echo "[$timestamp] $alert_key" >> "$ALERT_SENT_LOG"
# Send email if configured
if [[ -n "$ALERT_EMAIL" ]] && command -v mail >/dev/null 2>&1; then
echo "$message" | mail -s "Besu Alert: Container Issue" "$ALERT_EMAIL" 2>/dev/null || true
fi
# Log to console
echo "ALERT: $message"
fi
}
# Check all containers
for vmid in 1000 1001 1002 1003 1004 1500 1501 1502 1503 2500 2501 2502; do
# Check if container is running
if ! pct status "$vmid" 2>/dev/null | grep -q running; then
log_alert "Container $vmid is not running"
continue
fi
# Determine service name
service_name=""
if [[ $vmid -ge 1000 ]] && [[ $vmid -le 1004 ]]; then
service_name="besu-validator"
elif [[ $vmid -ge 1500 ]] && [[ $vmid -le 1503 ]]; then
service_name="besu-sentry"
elif [[ $vmid -ge 2500 ]] && [[ $vmid -le 2502 ]]; then
service_name="besu-rpc"
fi
# Check service status
if [[ -n "$service_name" ]]; then
if ! pct exec "$vmid" -- systemctl is-active --quiet "$service_name" 2>/dev/null; then
log_alert "Service $service_name on container $vmid is not running"
fi
fi
done
# Check disk space (alert if < 10% free)
for vmid in 1000 1001 1002 1003 1004 1500 1501 1502 1503 2500 2501 2502; do
if pct status "$vmid" 2>/dev/null | grep -q running; then
disk_usage=$(pct exec "$vmid" -- df -h / | awk 'NR==2 {print $5}' | sed 's/%//' 2>/dev/null || echo "0")
if [[ $disk_usage -gt 90 ]]; then
log_alert "Container $vmid disk usage is at ${disk_usage}%"
fi
fi
done