Files
proxmox/scripts/vault-health-check.sh

127 lines
4.2 KiB
Bash
Raw Permalink Normal View History

#!/usr/bin/env bash
# Vault Cluster Health Check Script
# Monitors cluster health and node status
set -euo pipefail
# Load IP configuration
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; }
log_error() { echo -e "${RED}[✗]${NC} $1"; }
# Configuration
PROXMOX_HOST_1="${PROXMOX_HOST_1:-192.168.11.11}"
PROXMOX_HOST_2="${PROXMOX_HOST_2:-192.168.11.12}"
VAULT_NODES=(
"8640:${PROXMOX_HOST_R630_01}:${IP_SERVICE_200:-${IP_SERVICE_200:-192.168.11.200}}"
"8641:${PROXMOX_HOST_R630_02}:${IP_SERVICE_21:-${IP_SERVICE_21:-${IP_SERVICE_21:-${IP_SERVICE_21:-${IP_SERVICE_21:-192.168.11.21}}}}}5"
"8642:${PROXMOX_HOST_R630_01}:${IP_SERVICE_202:-${IP_SERVICE_202:-192.168.11.202}}"
)
VAULT_TOKEN="${VAULT_TOKEN:-}"
EXIT_CODE=0
echo "═══════════════════════════════════════════════════════════"
echo " Vault Cluster Health Check"
echo "═══════════════════════════════════════════════════════════"
echo ""
# Check each node
for node_info in "${VAULT_NODES[@]}"; do
IFS=':' read -r vmid host ip <<< "$node_info"
log_info "Checking Node $vmid ($ip)..."
# Check container status
if ssh root@"$host" "pct status $vmid" 2>/dev/null | grep -q "running"; then
log_success "Container $vmid is running"
else
log_error "Container $vmid is not running"
EXIT_CODE=1
continue
fi
# Check Vault service
if ssh root@"$host" "pct exec $vmid -- systemctl is-active vault" 2>/dev/null | grep -q "active"; then
log_success "Vault service is active"
else
log_error "Vault service is not active on $vmid"
EXIT_CODE=1
continue
fi
# Check Vault status
VAULT_STATUS=$(ssh root@"$host" "pct exec $vmid -- bash -c 'export VAULT_ADDR=http://127.0.0.1:8200 && vault status -format=json 2>/dev/null'" 2>/dev/null || echo "{}")
if [ "$VAULT_STATUS" != "{}" ]; then
SEALED=$(echo "$VAULT_STATUS" | grep -o '"sealed":[^,]*' | cut -d: -f2 | tr -d ' "')
HA_MODE=$(echo "$VAULT_STATUS" | grep -o '"ha_mode":"[^"]*"' | cut -d'"' -f4 || echo "unknown")
if [ "$SEALED" = "false" ]; then
log_success "Vault is unsealed"
else
log_error "Vault is sealed on $vmid"
EXIT_CODE=1
fi
log_info " HA Mode: $HA_MODE"
else
log_error "Failed to get Vault status from $vmid"
EXIT_CODE=1
fi
# Check API endpoint
if curl -s -f "http://$ip:8200/v1/sys/health" > /dev/null 2>&1; then
log_success "API endpoint is accessible"
else
log_warn "API endpoint may not be accessible"
fi
echo ""
done
# Check cluster status if token provided
if [ -n "$VAULT_TOKEN" ]; then
log_info "Checking cluster status..."
CLUSTER_PEERS=$(ssh root@"$PROXMOX_HOST_1" "pct exec 8640 -- bash -c 'export VAULT_ADDR=http://127.0.0.1:8200 && export VAULT_TOKEN=$VAULT_TOKEN && vault operator raft list-peers 2>/dev/null'" 2>/dev/null || echo "")
if [ -n "$CLUSTER_PEERS" ]; then
PEER_COUNT=$(echo "$CLUSTER_PEERS" | grep -c "vault-phoenix" || echo "0")
if [ "$PEER_COUNT" -eq 3 ]; then
log_success "All 3 nodes in cluster"
echo "$CLUSTER_PEERS"
else
log_warn "Only $PEER_COUNT nodes in cluster (expected 3)"
EXIT_CODE=1
fi
else
log_warn "Could not retrieve cluster peer list"
fi
else
log_warn "VAULT_TOKEN not provided, skipping cluster status check"
fi
echo ""
# Summary
if [ $EXIT_CODE -eq 0 ]; then
log_success "✅ All health checks passed"
else
log_error "✗ Some health checks failed"
fi
exit $EXIT_CODE