#!/bin/bash # Vault Cluster Health Check Script # Monitors cluster health and node status set -euo pipefail # Colors RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' log_info() { echo -e "${BLUE}[INFO]${NC} $1"; } log_success() { echo -e "${GREEN}[✓]${NC} $1"; } log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; } log_error() { echo -e "${RED}[✗]${NC} $1"; } # Configuration PROXMOX_HOST_1="${PROXMOX_HOST_1:-192.168.11.11}" PROXMOX_HOST_2="${PROXMOX_HOST_2:-192.168.11.12}" VAULT_NODES=( "8640:192.168.11.11:192.168.11.200" "8641:192.168.11.12:192.168.11.215" "8642:192.168.11.11:192.168.11.202" ) VAULT_TOKEN="${VAULT_TOKEN:-}" EXIT_CODE=0 echo "═══════════════════════════════════════════════════════════" echo " Vault Cluster Health Check" echo "═══════════════════════════════════════════════════════════" echo "" # Check each node for node_info in "${VAULT_NODES[@]}"; do IFS=':' read -r vmid host ip <<< "$node_info" log_info "Checking Node $vmid ($ip)..." # Check container status if ssh root@"$host" "pct status $vmid" 2>/dev/null | grep -q "running"; then log_success "Container $vmid is running" else log_error "Container $vmid is not running" EXIT_CODE=1 continue fi # Check Vault service if ssh root@"$host" "pct exec $vmid -- systemctl is-active vault" 2>/dev/null | grep -q "active"; then log_success "Vault service is active" else log_error "Vault service is not active on $vmid" EXIT_CODE=1 continue fi # Check Vault status VAULT_STATUS=$(ssh root@"$host" "pct exec $vmid -- bash -c 'export VAULT_ADDR=http://127.0.0.1:8200 && vault status -format=json 2>/dev/null'" 2>/dev/null || echo "{}") if [ "$VAULT_STATUS" != "{}" ]; then SEALED=$(echo "$VAULT_STATUS" | grep -o '"sealed":[^,]*' | cut -d: -f2 | tr -d ' "') HA_MODE=$(echo "$VAULT_STATUS" | grep -o '"ha_mode":"[^"]*"' | cut -d'"' -f4 || echo "unknown") if [ "$SEALED" = "false" ]; then log_success "Vault is unsealed" else log_error "Vault is sealed on $vmid" EXIT_CODE=1 fi log_info " HA Mode: $HA_MODE" else log_error "Failed to get Vault status from $vmid" EXIT_CODE=1 fi # Check API endpoint if curl -s -f "http://$ip:8200/v1/sys/health" > /dev/null 2>&1; then log_success "API endpoint is accessible" else log_warn "API endpoint may not be accessible" fi echo "" done # Check cluster status if token provided if [ -n "$VAULT_TOKEN" ]; then log_info "Checking cluster status..." CLUSTER_PEERS=$(ssh root@"$PROXMOX_HOST_1" "pct exec 8640 -- bash -c 'export VAULT_ADDR=http://127.0.0.1:8200 && export VAULT_TOKEN=$VAULT_TOKEN && vault operator raft list-peers 2>/dev/null'" 2>/dev/null || echo "") if [ -n "$CLUSTER_PEERS" ]; then PEER_COUNT=$(echo "$CLUSTER_PEERS" | grep -c "vault-phoenix" || echo "0") if [ "$PEER_COUNT" -eq 3 ]; then log_success "All 3 nodes in cluster" echo "$CLUSTER_PEERS" else log_warn "Only $PEER_COUNT nodes in cluster (expected 3)" EXIT_CODE=1 fi else log_warn "Could not retrieve cluster peer list" fi else log_warn "VAULT_TOKEN not provided, skipping cluster status check" fi echo "" # Summary if [ $EXIT_CODE -eq 0 ]; then log_success "✅ All health checks passed" else log_error "✗ Some health checks failed" fi exit $EXIT_CODE