Some checks failed
Deploy to Phoenix / deploy (push) Has been cancelled
- Config, docs, scripts, and backup manifests - Submodule refs unchanged (m = modified content in submodules) Made-with: Cursor
127 lines
4.2 KiB
Bash
Executable File
127 lines
4.2 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Vault Cluster Health Check Script
|
|
# Monitors cluster health and node status
|
|
|
|
set -euo pipefail
|
|
|
|
# Load IP configuration
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
|
|
|
|
|
|
# Colors
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
BLUE='\033[0;34m'
|
|
NC='\033[0m'
|
|
|
|
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
|
|
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
|
|
log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; }
|
|
log_error() { echo -e "${RED}[✗]${NC} $1"; }
|
|
|
|
# Configuration
|
|
PROXMOX_HOST_1="${PROXMOX_HOST_1:-192.168.11.11}"
|
|
PROXMOX_HOST_2="${PROXMOX_HOST_2:-192.168.11.12}"
|
|
VAULT_NODES=(
|
|
"8640:${PROXMOX_HOST_R630_01}:${IP_SERVICE_200:-${IP_SERVICE_200:-192.168.11.200}}"
|
|
"8641:${PROXMOX_HOST_R630_02}:${IP_SERVICE_21:-${IP_SERVICE_21:-${IP_SERVICE_21:-${IP_SERVICE_21:-${IP_SERVICE_21:-192.168.11.21}}}}}5"
|
|
"8642:${PROXMOX_HOST_R630_01}:${IP_SERVICE_202:-${IP_SERVICE_202:-192.168.11.202}}"
|
|
)
|
|
VAULT_TOKEN="${VAULT_TOKEN:-}"
|
|
|
|
EXIT_CODE=0
|
|
|
|
echo "═══════════════════════════════════════════════════════════"
|
|
echo " Vault Cluster Health Check"
|
|
echo "═══════════════════════════════════════════════════════════"
|
|
echo ""
|
|
|
|
# Check each node
|
|
for node_info in "${VAULT_NODES[@]}"; do
|
|
IFS=':' read -r vmid host ip <<< "$node_info"
|
|
|
|
log_info "Checking Node $vmid ($ip)..."
|
|
|
|
# Check container status
|
|
if ssh root@"$host" "pct status $vmid" 2>/dev/null | grep -q "running"; then
|
|
log_success "Container $vmid is running"
|
|
else
|
|
log_error "Container $vmid is not running"
|
|
EXIT_CODE=1
|
|
continue
|
|
fi
|
|
|
|
# Check Vault service
|
|
if ssh root@"$host" "pct exec $vmid -- systemctl is-active vault" 2>/dev/null | grep -q "active"; then
|
|
log_success "Vault service is active"
|
|
else
|
|
log_error "Vault service is not active on $vmid"
|
|
EXIT_CODE=1
|
|
continue
|
|
fi
|
|
|
|
# Check Vault status
|
|
VAULT_STATUS=$(ssh root@"$host" "pct exec $vmid -- bash -c 'export VAULT_ADDR=http://127.0.0.1:8200 && vault status -format=json 2>/dev/null'" 2>/dev/null || echo "{}")
|
|
|
|
if [ "$VAULT_STATUS" != "{}" ]; then
|
|
SEALED=$(echo "$VAULT_STATUS" | grep -o '"sealed":[^,]*' | cut -d: -f2 | tr -d ' "')
|
|
HA_MODE=$(echo "$VAULT_STATUS" | grep -o '"ha_mode":"[^"]*"' | cut -d'"' -f4 || echo "unknown")
|
|
|
|
if [ "$SEALED" = "false" ]; then
|
|
log_success "Vault is unsealed"
|
|
else
|
|
log_error "Vault is sealed on $vmid"
|
|
EXIT_CODE=1
|
|
fi
|
|
|
|
log_info " HA Mode: $HA_MODE"
|
|
else
|
|
log_error "Failed to get Vault status from $vmid"
|
|
EXIT_CODE=1
|
|
fi
|
|
|
|
# Check API endpoint
|
|
if curl -s -f "http://$ip:8200/v1/sys/health" > /dev/null 2>&1; then
|
|
log_success "API endpoint is accessible"
|
|
else
|
|
log_warn "API endpoint may not be accessible"
|
|
fi
|
|
|
|
echo ""
|
|
done
|
|
|
|
# Check cluster status if token provided
|
|
if [ -n "$VAULT_TOKEN" ]; then
|
|
log_info "Checking cluster status..."
|
|
CLUSTER_PEERS=$(ssh root@"$PROXMOX_HOST_1" "pct exec 8640 -- bash -c 'export VAULT_ADDR=http://127.0.0.1:8200 && export VAULT_TOKEN=$VAULT_TOKEN && vault operator raft list-peers 2>/dev/null'" 2>/dev/null || echo "")
|
|
|
|
if [ -n "$CLUSTER_PEERS" ]; then
|
|
PEER_COUNT=$(echo "$CLUSTER_PEERS" | grep -c "vault-phoenix" || echo "0")
|
|
if [ "$PEER_COUNT" -eq 3 ]; then
|
|
log_success "All 3 nodes in cluster"
|
|
echo "$CLUSTER_PEERS"
|
|
else
|
|
log_warn "Only $PEER_COUNT nodes in cluster (expected 3)"
|
|
EXIT_CODE=1
|
|
fi
|
|
else
|
|
log_warn "Could not retrieve cluster peer list"
|
|
fi
|
|
else
|
|
log_warn "VAULT_TOKEN not provided, skipping cluster status check"
|
|
fi
|
|
|
|
echo ""
|
|
|
|
# Summary
|
|
if [ $EXIT_CODE -eq 0 ]; then
|
|
log_success "✅ All health checks passed"
|
|
else
|
|
log_error "✗ Some health checks failed"
|
|
fi
|
|
|
|
exit $EXIT_CODE
|