#!/bin/bash # Fix Proxmox VE SSL and cluster issues on pve and pve2 # Usage: ./scripts/fix-proxmox-ssl-cluster.sh [pve|pve2|both] set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" # Colors RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' log_info() { echo -e "${BLUE}[INFO]${NC} $1"; } log_success() { echo -e "${GREEN}[✓]${NC} $1"; } log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; } log_error() { echo -e "${RED}[✗]${NC} $1"; } # Host configuration declare -A HOSTS HOSTS[pve]="192.168.11.11:password" HOSTS[pve2]="192.168.11.12:password" # Determine which hosts to fix TARGET="${1:-both}" fix_host() { local hostname="$1" local ip="${HOSTS[$hostname]%%:*}" local password="${HOSTS[$hostname]#*:}" log_info "=== Fixing ${hostname} (${ip}) ===" echo "" # Test connectivity if ! ping -c 2 -W 2 "$ip" >/dev/null 2>&1; then log_error "Host is NOT reachable" return 1 fi log_info "Running fixes via SSH..." echo "" sshpass -p "$password" ssh -o StrictHostKeyChecking=no root@"$ip" bash <<'ENDSSH' set -e echo "=== Step 1: Checking current SSL certificate status ===" ls -la /etc/pve/local/ 2>/dev/null || echo "Directory exists but may have issues" echo "" echo "=== Step 2: Stopping Proxmox services ===" systemctl stop pveproxy pvedaemon pvestatd pve-cluster 2>/dev/null || true sleep 2 echo "" echo "=== Step 3: Checking pve-cluster service ===" systemctl status pve-cluster --no-pager -l | head -20 || true echo "" echo "=== Step 4: Attempting to regenerate SSL certificates ===" # Try to regenerate certificates pvecm updatecerts --force 2>&1 || echo "pvecm updatecerts failed (may be expected if not in cluster)" echo "" # Alternative: regenerate local certificates if [ -f /usr/bin/pvecm ]; then echo "Regenerating local certificates..." /usr/bin/pvecm updatecerts --silent 2>&1 || true fi echo "" echo "=== Step 5: Checking /etc/pve mount ===" mount | grep /etc/pve || echo "/etc/pve is not mounted (cluster filesystem issue)" echo "" echo "=== Step 6: Starting pve-cluster service ===" systemctl start pve-cluster || { echo "pve-cluster failed to start. Checking logs..." journalctl -u pve-cluster --no-pager -n 30 echo "" echo "Attempting to fix cluster filesystem..." systemctl stop pve-cluster 2>/dev/null || true killall -9 pmxcfs 2>/dev/null || true sleep 2 systemctl start pve-cluster || echo "Still failing - may need manual intervention" } sleep 3 echo "" echo "=== Step 7: Verifying /etc/pve is accessible ===" if [ -d /etc/pve ] && [ -f /etc/pve/local/pve-ssl.key ]; then echo "SSL key file exists" ls -la /etc/pve/local/pve-ssl.key else echo "WARNING: SSL key file missing or /etc/pve not accessible" echo "This may require manual certificate regeneration" fi echo "" echo "=== Step 8: Starting Proxmox services ===" systemctl start pvestatd || echo "pvestatd failed to start" sleep 1 systemctl start pvedaemon || echo "pvedaemon failed to start" sleep 1 systemctl start pveproxy || echo "pveproxy failed to start" sleep 3 echo "" echo "=== Step 9: Checking service status ===" systemctl status pve-cluster --no-pager -l | head -15 || true echo "" systemctl status pveproxy --no-pager -l | head -15 || true echo "" echo "=== Step 10: Testing web interface ===" curl -k -s -o /dev/null -w "HTTP Status: %{http_code}\n" https://localhost:8006/ || echo "Web interface test failed" echo "" echo "=== Step 11: Checking for worker exits ===" journalctl -u pveproxy --no-pager -n 10 | grep -E "worker exit|failed to load" || echo "No recent worker exit errors" echo "" ENDSSH echo "" log_info "Testing web interface from remote..." sleep 2 if curl -k -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "https://${ip}:8006/" | grep -q "200\|401\|302"; then log_success "Web interface is now accessible!" else log_warn "Web interface may still not be accessible" log_info "You may need to:" log_info " 1. Check if pve-cluster service is running" log_info " 2. Manually regenerate SSL certificates" log_info " 3. Check cluster configuration if in a cluster" fi echo "" log_success "Fix attempt complete for ${hostname}" echo "" echo "----------------------------------------" echo "" } # Run fixes if [[ "$TARGET" == "both" ]]; then fix_host "pve" fix_host "pve2" elif [[ "$TARGET" == "pve" ]]; then fix_host "pve" elif [[ "$TARGET" == "pve2" ]]; then fix_host "pve2" else log_error "Invalid target: $TARGET" echo "Usage: $0 [pve|pve2|both]" exit 1 fi log_success "All fix attempts complete!" log_info "Please review the output above and verify services are running correctly."