Some checks failed
Deploy to Phoenix / deploy (push) Has been cancelled
- Config, docs, scripts, and backup manifests - Submodule refs unchanged (m = modified content in submodules) Made-with: Cursor
93 lines
3.1 KiB
Bash
93 lines
3.1 KiB
Bash
#!/usr/bin/env bash
|
|
# Check health of RPC node VMs only (container status + besu-rpc service + RPC block).
|
|
# Uses SSH to Proxmox hosts. Run from project root.
|
|
# Usage: ./scripts/health/check-rpc-vms-health.sh
|
|
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
|
cd "$PROJECT_ROOT"
|
|
[ -f config/ip-addresses.conf ] && source config/ip-addresses.conf 2>/dev/null || true
|
|
|
|
# SSH user for shell (PROXMOX_USER in .env may be root@pam for API)
|
|
PROXMOX_SSH_USER="${PROXMOX_SSH_USER:-${PROXMOX_USER:-root}}"
|
|
[[ "$PROXMOX_SSH_USER" == *"@"* ]] && PROXMOX_SSH_USER="root"
|
|
PROXMOX_USER="${PROXMOX_SSH_USER}"
|
|
R630_01="${PROXMOX_HOST_R630_01:-192.168.11.11}"
|
|
R630_02="${PROXMOX_HOST_R630_02:-192.168.11.12}"
|
|
ML110="${PROXMOX_HOST_ML110:-192.168.11.10}"
|
|
|
|
# VMID:host:service (same mapping as review-sentry-and-rpc-nodes.sh)
|
|
RPC_NODES=(
|
|
"2101:$R630_01:besu-rpc"
|
|
"2201:$R630_02:besu-rpc"
|
|
"2301:$ML110:besu-rpc"
|
|
"2303:$R630_02:besu-rpc"
|
|
"2304:$ML110:besu-rpc"
|
|
"2305:$ML110:besu-rpc"
|
|
"2306:$ML110:besu-rpc"
|
|
"2307:$ML110:besu-rpc"
|
|
"2308:$ML110:besu-rpc"
|
|
"2400:$ML110:besu-rpc"
|
|
"2401:$R630_02:besu-rpc"
|
|
"2402:$ML110:besu-rpc"
|
|
"2403:$ML110:besu-rpc"
|
|
)
|
|
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
CYAN='\033[0;36m'
|
|
NC='\033[0m'
|
|
|
|
SSH_OPTS="-o ConnectTimeout=5 -o StrictHostKeyChecking=no"
|
|
|
|
echo -e "${CYAN}=== RPC Node VMs Health ===${NC}"
|
|
echo ""
|
|
|
|
ok=0
|
|
fail=0
|
|
for entry in "${RPC_NODES[@]}"; do
|
|
IFS=: read -r vmid host service <<< "$entry"
|
|
ssh_target="${PROXMOX_USER}@${host}"
|
|
|
|
ct_status=$(ssh $SSH_OPTS "$ssh_target" "pct status $vmid 2>/dev/null" | awk '{print $2}' || echo "unknown")
|
|
if [[ "$ct_status" != "running" ]]; then
|
|
echo -e " VMID $vmid: container ${RED}$ct_status${NC} (host $host)"
|
|
((fail++)) || true
|
|
continue
|
|
fi
|
|
|
|
service_status=$(ssh $SSH_OPTS "$ssh_target" "pct exec $vmid -- systemctl is-active $service 2>/dev/null" || echo "unknown")
|
|
if [[ "$service_status" != "active" ]]; then
|
|
echo -e " VMID $vmid: container running, ${YELLOW}$service $service_status${NC} (host $host)"
|
|
((fail++)) || true
|
|
continue
|
|
fi
|
|
|
|
ip=$(ssh $SSH_OPTS "$ssh_target" "pct exec $vmid -- hostname -I 2>/dev/null | awk '{print \$1}'" 2>/dev/null || echo "")
|
|
block_info=""
|
|
if [[ -n "$ip" ]]; then
|
|
resp=$(curl -s -m 3 -X POST -H "Content-Type: application/json" -d '{"jsonrpc":"2.0","method":"eth_blockNumber","params":[],"id":1}' "http://$ip:8545" 2>/dev/null || echo "")
|
|
if echo "$resp" | grep -q '"result"'; then
|
|
block_hex=$(echo "$resp" | jq -r '.result' 2>/dev/null)
|
|
block_dec=$((block_hex))
|
|
if [[ "$block_dec" -lt 2050000 ]]; then
|
|
block_info=" → block ${YELLOW}$block_dec (behind)${NC}"
|
|
else
|
|
block_info=" → block ${GREEN}$block_dec${NC}"
|
|
fi
|
|
else
|
|
block_info=" → ${YELLOW}RPC no response${NC}"
|
|
fi
|
|
fi
|
|
|
|
echo -e " VMID $vmid: container running, ${GREEN}$service active${NC} ($host)$block_info"
|
|
((ok++)) || true
|
|
done
|
|
|
|
echo ""
|
|
echo -e "${CYAN}Summary: ${GREEN}$ok healthy${NC}, ${RED}$fail with issues${NC} (total ${#RPC_NODES[@]} RPC nodes)"
|
|
exit $fail
|