#!/bin/bash # Complete fix for r630-04: hostname resolution, SSL, cluster, and pveproxy # Usage: ./scripts/fix-r630-04-complete.sh set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" # Colors RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' log_info() { echo -e "${BLUE}[INFO]${NC} $1"; } log_success() { echo -e "${GREEN}[✓]${NC} $1"; } log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; } log_error() { echo -e "${RED}[✗]${NC} $1"; } # Host configuration R630_04_IP="192.168.11.14" R630_04_PASSWORD="${1:-}" # Allow password as first argument R630_04_HOSTNAME="r630-04" # Try common passwords if not provided if [[ -z "$R630_04_PASSWORD" ]]; then log_warn "Password not provided. Trying common passwords..." for pwd in "L@kers2010" "password" "L@kers2010!"; do if sshpass -p "$pwd" ssh -o StrictHostKeyChecking=no -o ConnectTimeout=3 root@"$R630_04_IP" "echo 'OK'" >/dev/null 2>&1; then R630_04_PASSWORD="$pwd" log_success "Found working password" break fi done fi if [[ -z "$R630_04_PASSWORD" ]]; then log_error "Could not determine password. Please provide as first argument:" log_error " $0 " log_error "" log_error "Or ensure one of these works: L@kers2010, password, L@kers2010!" exit 1 fi log_info "=========================================" log_info "Complete Fix for r630-04" log_info "=========================================" echo "" # Test connectivity log_info "1. Testing connectivity to ${R630_04_IP}..." if ping -c 2 -W 2 "$R630_04_IP" >/dev/null 2>&1; then log_success "Host is reachable" else log_error "Host is NOT reachable" exit 1 fi echo "" # Test SSH log_info "2. Testing SSH access..." if sshpass -p "$R630_04_PASSWORD" ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@"$R630_04_IP" "echo 'SSH OK'" >/dev/null 2>&1; then log_success "SSH access works" else log_error "SSH access failed - password may be incorrect" exit 1 fi echo "" # Run complete fix log_info "3. Running complete fix sequence..." echo "" sshpass -p "$R630_04_PASSWORD" ssh -o StrictHostKeyChecking=no root@"$R630_04_IP" bash <> /etc/hosts echo "Updated /etc/hosts:" cat /etc/hosts | grep -E "127.0.0.1|\${HOSTNAME}|r630-04" echo "" echo "=== Step 2: Testing hostname resolution ===" resolved_ip=\$(getent hosts \${HOSTNAME} | awk '{print \$1}') if [[ "\$resolved_ip" == "\${IP}" ]]; then echo "SUCCESS: Hostname resolves to correct IP (\${IP})" else echo "WARNING: Hostname resolves to: \$resolved_ip (expected \${IP})" fi echo "" echo "=== Step 3: Stopping Proxmox services ===" systemctl stop pveproxy pvedaemon pvestatd pve-cluster 2>/dev/null || true sleep 2 echo "" echo "=== Step 4: Checking pve-cluster service ===" systemctl status pve-cluster --no-pager -l | head -20 || true echo "" echo "=== Step 5: Starting pve-cluster service ===" systemctl start pve-cluster || { echo "pve-cluster failed to start. Checking logs..." journalctl -u pve-cluster --no-pager -n 30 echo "" echo "Attempting to fix cluster filesystem..." systemctl stop pve-cluster 2>/dev/null || true killall -9 pmxcfs 2>/dev/null || true sleep 2 systemctl start pve-cluster || echo "Still failing - may need manual intervention" } sleep 3 echo "" echo "=== Step 6: Verifying /etc/pve is accessible ===" if [ -d /etc/pve ] && [ -f /etc/pve/local/pve-ssl.key ]; then echo "SSL key file exists" ls -la /etc/pve/local/pve-ssl.key else echo "WARNING: SSL key file missing or /etc/pve not accessible" echo "Attempting certificate regeneration..." pvecm updatecerts --force 2>&1 || echo "Certificate regeneration failed (may be expected if not in cluster)" fi echo "" echo "=== Step 7: Starting Proxmox services ===" systemctl start pvestatd || echo "pvestatd failed to start" sleep 1 systemctl start pvedaemon || echo "pvedaemon failed to start" sleep 1 systemctl start pveproxy || echo "pveproxy failed to start" sleep 3 echo "" echo "=== Step 8: Checking service status ===" for service in pve-cluster pvestatd pvedaemon pveproxy; do echo "--- \${service} ---" systemctl status \${service} --no-pager -l | head -15 || true echo "" done echo "=== Step 9: Testing web interface ===" curl -k -s -o /dev/null -w "HTTP Status: %{http_code}\n" https://localhost:8006/ || echo "Web interface test failed" echo "" echo "=== Step 10: Checking for worker exits ===" journalctl -u pveproxy --no-pager -n 20 | grep -E "worker exit|failed to load" || echo "No recent worker exit errors" echo "" echo "=== Step 11: Port 8006 status ===" ss -tlnp | grep 8006 || echo "Port 8006 is NOT listening" echo "" echo "=== Step 12: Cluster status ===" if command -v pvecm >/dev/null 2>&1; then pvecm status 2>&1 || echo "Not in cluster or cluster check failed" else echo "pvecm command not found" fi echo "" ENDSSH echo "" log_info "4. Testing web interface from remote..." sleep 2 if curl -k -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "https://${R630_04_IP}:8006/" | grep -q "200\|401\|302"; then log_success "Web interface is now accessible!" else log_warn "Web interface may still not be accessible" log_info "You may need to:" log_info " 1. Check if pve-cluster service is running" log_info " 2. Manually regenerate SSL certificates" log_info " 3. Check cluster configuration if in a cluster" fi echo "" log_success "Fix complete for r630-04" echo "" log_info "Next steps:" log_info " 1. Verify cluster membership: ssh root@${R630_04_IP} 'pvecm status'" log_info " 2. If not in cluster, join to cluster 'h'" log_info " 3. Verify storage configuration" echo ""