Files
proxmox/scripts/fix-r630-04-complete.sh
defiQUG cb47cce074 Complete markdown files cleanup and organization
- Organized 252 files across project
- Root directory: 187 → 2 files (98.9% reduction)
- Moved configuration guides to docs/04-configuration/
- Moved troubleshooting guides to docs/09-troubleshooting/
- Moved quick start guides to docs/01-getting-started/
- Moved reports to reports/ directory
- Archived temporary files
- Generated comprehensive reports and documentation
- Created maintenance scripts and guides

All files organized according to established standards.
2026-01-06 01:46:25 -08:00

199 lines
6.5 KiB
Bash
Executable File

#!/bin/bash
# Complete fix for r630-04: hostname resolution, SSL, cluster, and pveproxy
# Usage: ./scripts/fix-r630-04-complete.sh
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; }
log_error() { echo -e "${RED}[✗]${NC} $1"; }
# Host configuration
R630_04_IP="192.168.11.14"
R630_04_PASSWORD="${1:-}" # Allow password as first argument
R630_04_HOSTNAME="r630-04"
# Try common passwords if not provided
if [[ -z "$R630_04_PASSWORD" ]]; then
log_warn "Password not provided. Trying common passwords..."
for pwd in "L@kers2010" "password" "L@kers2010!"; do
if sshpass -p "$pwd" ssh -o StrictHostKeyChecking=no -o ConnectTimeout=3 root@"$R630_04_IP" "echo 'OK'" >/dev/null 2>&1; then
R630_04_PASSWORD="$pwd"
log_success "Found working password"
break
fi
done
fi
if [[ -z "$R630_04_PASSWORD" ]]; then
log_error "Could not determine password. Please provide as first argument:"
log_error " $0 <password>"
log_error ""
log_error "Or ensure one of these works: L@kers2010, password, L@kers2010!"
exit 1
fi
log_info "========================================="
log_info "Complete Fix for r630-04"
log_info "========================================="
echo ""
# Test connectivity
log_info "1. Testing connectivity to ${R630_04_IP}..."
if ping -c 2 -W 2 "$R630_04_IP" >/dev/null 2>&1; then
log_success "Host is reachable"
else
log_error "Host is NOT reachable"
exit 1
fi
echo ""
# Test SSH
log_info "2. Testing SSH access..."
if sshpass -p "$R630_04_PASSWORD" ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@"$R630_04_IP" "echo 'SSH OK'" >/dev/null 2>&1; then
log_success "SSH access works"
else
log_error "SSH access failed - password may be incorrect"
exit 1
fi
echo ""
# Run complete fix
log_info "3. Running complete fix sequence..."
echo ""
sshpass -p "$R630_04_PASSWORD" ssh -o StrictHostKeyChecking=no root@"$R630_04_IP" bash <<ENDSSH
set -e
echo "=== Step 1: Fixing /etc/hosts ==="
HOSTNAME=\$(hostname)
IP=\$(hostname -I | awk '{print \$1}')
# Backup
cp /etc/hosts /etc/hosts.backup.\$(date +%Y%m%d_%H%M%S)
# Remove any existing entries for this hostname and IP
sed -i "/^\${IP}/d" /etc/hosts
sed -i "/\${HOSTNAME}/d" /etc/hosts
sed -i "/r630-04/d" /etc/hosts
# Add correct entry - hostname must resolve to non-loopback IP
echo "\${IP} \${HOSTNAME} \${HOSTNAME}.sankofa.nexus r630-04 r630-04.sankofa.nexus" >> /etc/hosts
echo "Updated /etc/hosts:"
cat /etc/hosts | grep -E "127.0.0.1|\${HOSTNAME}|r630-04"
echo ""
echo "=== Step 2: Testing hostname resolution ==="
resolved_ip=\$(getent hosts \${HOSTNAME} | awk '{print \$1}')
if [[ "\$resolved_ip" == "\${IP}" ]]; then
echo "SUCCESS: Hostname resolves to correct IP (\${IP})"
else
echo "WARNING: Hostname resolves to: \$resolved_ip (expected \${IP})"
fi
echo ""
echo "=== Step 3: Stopping Proxmox services ==="
systemctl stop pveproxy pvedaemon pvestatd pve-cluster 2>/dev/null || true
sleep 2
echo ""
echo "=== Step 4: Checking pve-cluster service ==="
systemctl status pve-cluster --no-pager -l | head -20 || true
echo ""
echo "=== Step 5: Starting pve-cluster service ==="
systemctl start pve-cluster || {
echo "pve-cluster failed to start. Checking logs..."
journalctl -u pve-cluster --no-pager -n 30
echo ""
echo "Attempting to fix cluster filesystem..."
systemctl stop pve-cluster 2>/dev/null || true
killall -9 pmxcfs 2>/dev/null || true
sleep 2
systemctl start pve-cluster || echo "Still failing - may need manual intervention"
}
sleep 3
echo ""
echo "=== Step 6: Verifying /etc/pve is accessible ==="
if [ -d /etc/pve ] && [ -f /etc/pve/local/pve-ssl.key ]; then
echo "SSL key file exists"
ls -la /etc/pve/local/pve-ssl.key
else
echo "WARNING: SSL key file missing or /etc/pve not accessible"
echo "Attempting certificate regeneration..."
pvecm updatecerts --force 2>&1 || echo "Certificate regeneration failed (may be expected if not in cluster)"
fi
echo ""
echo "=== Step 7: Starting Proxmox services ==="
systemctl start pvestatd || echo "pvestatd failed to start"
sleep 1
systemctl start pvedaemon || echo "pvedaemon failed to start"
sleep 1
systemctl start pveproxy || echo "pveproxy failed to start"
sleep 3
echo ""
echo "=== Step 8: Checking service status ==="
for service in pve-cluster pvestatd pvedaemon pveproxy; do
echo "--- \${service} ---"
systemctl status \${service} --no-pager -l | head -15 || true
echo ""
done
echo "=== Step 9: Testing web interface ==="
curl -k -s -o /dev/null -w "HTTP Status: %{http_code}\n" https://localhost:8006/ || echo "Web interface test failed"
echo ""
echo "=== Step 10: Checking for worker exits ==="
journalctl -u pveproxy --no-pager -n 20 | grep -E "worker exit|failed to load" || echo "No recent worker exit errors"
echo ""
echo "=== Step 11: Port 8006 status ==="
ss -tlnp | grep 8006 || echo "Port 8006 is NOT listening"
echo ""
echo "=== Step 12: Cluster status ==="
if command -v pvecm >/dev/null 2>&1; then
pvecm status 2>&1 || echo "Not in cluster or cluster check failed"
else
echo "pvecm command not found"
fi
echo ""
ENDSSH
echo ""
log_info "4. Testing web interface from remote..."
sleep 2
if curl -k -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "https://${R630_04_IP}:8006/" | grep -q "200\|401\|302"; then
log_success "Web interface is now accessible!"
else
log_warn "Web interface may still not be accessible"
log_info "You may need to:"
log_info " 1. Check if pve-cluster service is running"
log_info " 2. Manually regenerate SSL certificates"
log_info " 3. Check cluster configuration if in a cluster"
fi
echo ""
log_success "Fix complete for r630-04"
echo ""
log_info "Next steps:"
log_info " 1. Verify cluster membership: ssh root@${R630_04_IP} 'pvecm status'"
log_info " 2. If not in cluster, join to cluster 'h'"
log_info " 3. Verify storage configuration"
echo ""