Files
proxmox/scripts/monitoring/monitor-block-production.sh

88 lines
3.0 KiB
Bash
Raw Permalink Normal View History

#!/usr/bin/env bash
# Block Production Monitor
# Continuously monitors block production and alerts on stalls
set -euo pipefail
# Load IP configuration
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
RPC_URL="${RPC_URL_138:-http://${RPC_CORE_1}:8545}"
CHECK_INTERVAL=30 # Check every 30 seconds
STALL_THRESHOLD=60 # Alert if no blocks for 60 seconds
ALERT_SCRIPT="${SCRIPT_DIR}/alert-block-stall.sh"
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; }
log_error() { echo -e "${RED}[✗]${NC} $1"; }
monitor_blocks() {
local last_block=0
local last_block_time=$(date +%s)
local stall_detected=false
log_info "Starting block production monitor..."
log_info "RPC URL: $RPC_URL"
log_info "Check interval: ${CHECK_INTERVAL}s"
log_info "Stall threshold: ${STALL_THRESHOLD}s"
echo ""
while true; do
local current_block=$(cast block-number --rpc-url "$RPC_URL" 2>/dev/null || echo "0")
local current_time=$(date +%s)
if [ "$current_block" != "0" ] && [ "$current_block" != "" ]; then
if [ "$current_block" != "$last_block" ]; then
# Block advanced
if [ "$stall_detected" = true ]; then
log_success "Block production RESUMED! Block: $current_block"
stall_detected=false
fi
local time_since_last=$((current_time - last_block_time))
log_success "Block: $current_block (advanced in ${time_since_last}s)"
last_block=$current_block
last_block_time=$current_time
else
# Block not advancing
local time_stalled=$((current_time - last_block_time))
if [ "$time_stalled" -ge "$STALL_THRESHOLD" ]; then
if [ "$stall_detected" = false ]; then
log_error "BLOCK PRODUCTION STALLED! Block: $current_block (stalled for ${time_stalled}s)"
stall_detected=true
# Trigger alert
if [ -f "$ALERT_SCRIPT" ]; then
bash "$ALERT_SCRIPT" "$current_block" "$time_stalled"
fi
else
log_error "Still stalled... (${time_stalled}s)"
fi
else
log_warn "Block not advancing (${time_stalled}s, threshold: ${STALL_THRESHOLD}s)"
fi
fi
else
log_error "Cannot get block number from RPC"
fi
sleep "$CHECK_INTERVAL"
done
}
monitor_blocks "$@"