- Organized 252 files across project - Root directory: 187 → 2 files (98.9% reduction) - Moved configuration guides to docs/04-configuration/ - Moved troubleshooting guides to docs/09-troubleshooting/ - Moved quick start guides to docs/01-getting-started/ - Moved reports to reports/ directory - Archived temporary files - Generated comprehensive reports and documentation - Created maintenance scripts and guides All files organized according to established standards.
199 lines
6.5 KiB
Bash
Executable File
199 lines
6.5 KiB
Bash
Executable File
#!/bin/bash
|
|
# Complete fix for r630-04: hostname resolution, SSL, cluster, and pveproxy
|
|
# Usage: ./scripts/fix-r630-04-complete.sh
|
|
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
|
|
# Colors
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
BLUE='\033[0;34m'
|
|
NC='\033[0m'
|
|
|
|
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
|
|
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
|
|
log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; }
|
|
log_error() { echo -e "${RED}[✗]${NC} $1"; }
|
|
|
|
# Host configuration
|
|
R630_04_IP="192.168.11.14"
|
|
R630_04_PASSWORD="${1:-}" # Allow password as first argument
|
|
R630_04_HOSTNAME="r630-04"
|
|
|
|
# Try common passwords if not provided
|
|
if [[ -z "$R630_04_PASSWORD" ]]; then
|
|
log_warn "Password not provided. Trying common passwords..."
|
|
for pwd in "L@kers2010" "password" "L@kers2010!"; do
|
|
if sshpass -p "$pwd" ssh -o StrictHostKeyChecking=no -o ConnectTimeout=3 root@"$R630_04_IP" "echo 'OK'" >/dev/null 2>&1; then
|
|
R630_04_PASSWORD="$pwd"
|
|
log_success "Found working password"
|
|
break
|
|
fi
|
|
done
|
|
fi
|
|
|
|
if [[ -z "$R630_04_PASSWORD" ]]; then
|
|
log_error "Could not determine password. Please provide as first argument:"
|
|
log_error " $0 <password>"
|
|
log_error ""
|
|
log_error "Or ensure one of these works: L@kers2010, password, L@kers2010!"
|
|
exit 1
|
|
fi
|
|
|
|
log_info "========================================="
|
|
log_info "Complete Fix for r630-04"
|
|
log_info "========================================="
|
|
echo ""
|
|
|
|
# Test connectivity
|
|
log_info "1. Testing connectivity to ${R630_04_IP}..."
|
|
if ping -c 2 -W 2 "$R630_04_IP" >/dev/null 2>&1; then
|
|
log_success "Host is reachable"
|
|
else
|
|
log_error "Host is NOT reachable"
|
|
exit 1
|
|
fi
|
|
echo ""
|
|
|
|
# Test SSH
|
|
log_info "2. Testing SSH access..."
|
|
if sshpass -p "$R630_04_PASSWORD" ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@"$R630_04_IP" "echo 'SSH OK'" >/dev/null 2>&1; then
|
|
log_success "SSH access works"
|
|
else
|
|
log_error "SSH access failed - password may be incorrect"
|
|
exit 1
|
|
fi
|
|
echo ""
|
|
|
|
# Run complete fix
|
|
log_info "3. Running complete fix sequence..."
|
|
echo ""
|
|
|
|
sshpass -p "$R630_04_PASSWORD" ssh -o StrictHostKeyChecking=no root@"$R630_04_IP" bash <<ENDSSH
|
|
set -e
|
|
|
|
echo "=== Step 1: Fixing /etc/hosts ==="
|
|
HOSTNAME=\$(hostname)
|
|
IP=\$(hostname -I | awk '{print \$1}')
|
|
|
|
# Backup
|
|
cp /etc/hosts /etc/hosts.backup.\$(date +%Y%m%d_%H%M%S)
|
|
|
|
# Remove any existing entries for this hostname and IP
|
|
sed -i "/^\${IP}/d" /etc/hosts
|
|
sed -i "/\${HOSTNAME}/d" /etc/hosts
|
|
sed -i "/r630-04/d" /etc/hosts
|
|
|
|
# Add correct entry - hostname must resolve to non-loopback IP
|
|
echo "\${IP} \${HOSTNAME} \${HOSTNAME}.sankofa.nexus r630-04 r630-04.sankofa.nexus" >> /etc/hosts
|
|
|
|
echo "Updated /etc/hosts:"
|
|
cat /etc/hosts | grep -E "127.0.0.1|\${HOSTNAME}|r630-04"
|
|
echo ""
|
|
|
|
echo "=== Step 2: Testing hostname resolution ==="
|
|
resolved_ip=\$(getent hosts \${HOSTNAME} | awk '{print \$1}')
|
|
if [[ "\$resolved_ip" == "\${IP}" ]]; then
|
|
echo "SUCCESS: Hostname resolves to correct IP (\${IP})"
|
|
else
|
|
echo "WARNING: Hostname resolves to: \$resolved_ip (expected \${IP})"
|
|
fi
|
|
echo ""
|
|
|
|
echo "=== Step 3: Stopping Proxmox services ==="
|
|
systemctl stop pveproxy pvedaemon pvestatd pve-cluster 2>/dev/null || true
|
|
sleep 2
|
|
echo ""
|
|
|
|
echo "=== Step 4: Checking pve-cluster service ==="
|
|
systemctl status pve-cluster --no-pager -l | head -20 || true
|
|
echo ""
|
|
|
|
echo "=== Step 5: Starting pve-cluster service ==="
|
|
systemctl start pve-cluster || {
|
|
echo "pve-cluster failed to start. Checking logs..."
|
|
journalctl -u pve-cluster --no-pager -n 30
|
|
echo ""
|
|
echo "Attempting to fix cluster filesystem..."
|
|
systemctl stop pve-cluster 2>/dev/null || true
|
|
killall -9 pmxcfs 2>/dev/null || true
|
|
sleep 2
|
|
systemctl start pve-cluster || echo "Still failing - may need manual intervention"
|
|
}
|
|
sleep 3
|
|
echo ""
|
|
|
|
echo "=== Step 6: Verifying /etc/pve is accessible ==="
|
|
if [ -d /etc/pve ] && [ -f /etc/pve/local/pve-ssl.key ]; then
|
|
echo "SSL key file exists"
|
|
ls -la /etc/pve/local/pve-ssl.key
|
|
else
|
|
echo "WARNING: SSL key file missing or /etc/pve not accessible"
|
|
echo "Attempting certificate regeneration..."
|
|
pvecm updatecerts --force 2>&1 || echo "Certificate regeneration failed (may be expected if not in cluster)"
|
|
fi
|
|
echo ""
|
|
|
|
echo "=== Step 7: Starting Proxmox services ==="
|
|
systemctl start pvestatd || echo "pvestatd failed to start"
|
|
sleep 1
|
|
systemctl start pvedaemon || echo "pvedaemon failed to start"
|
|
sleep 1
|
|
systemctl start pveproxy || echo "pveproxy failed to start"
|
|
sleep 3
|
|
echo ""
|
|
|
|
echo "=== Step 8: Checking service status ==="
|
|
for service in pve-cluster pvestatd pvedaemon pveproxy; do
|
|
echo "--- \${service} ---"
|
|
systemctl status \${service} --no-pager -l | head -15 || true
|
|
echo ""
|
|
done
|
|
|
|
echo "=== Step 9: Testing web interface ==="
|
|
curl -k -s -o /dev/null -w "HTTP Status: %{http_code}\n" https://localhost:8006/ || echo "Web interface test failed"
|
|
echo ""
|
|
|
|
echo "=== Step 10: Checking for worker exits ==="
|
|
journalctl -u pveproxy --no-pager -n 20 | grep -E "worker exit|failed to load" || echo "No recent worker exit errors"
|
|
echo ""
|
|
|
|
echo "=== Step 11: Port 8006 status ==="
|
|
ss -tlnp | grep 8006 || echo "Port 8006 is NOT listening"
|
|
echo ""
|
|
|
|
echo "=== Step 12: Cluster status ==="
|
|
if command -v pvecm >/dev/null 2>&1; then
|
|
pvecm status 2>&1 || echo "Not in cluster or cluster check failed"
|
|
else
|
|
echo "pvecm command not found"
|
|
fi
|
|
echo ""
|
|
ENDSSH
|
|
|
|
echo ""
|
|
log_info "4. Testing web interface from remote..."
|
|
sleep 2
|
|
if curl -k -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "https://${R630_04_IP}:8006/" | grep -q "200\|401\|302"; then
|
|
log_success "Web interface is now accessible!"
|
|
else
|
|
log_warn "Web interface may still not be accessible"
|
|
log_info "You may need to:"
|
|
log_info " 1. Check if pve-cluster service is running"
|
|
log_info " 2. Manually regenerate SSL certificates"
|
|
log_info " 3. Check cluster configuration if in a cluster"
|
|
fi
|
|
echo ""
|
|
|
|
log_success "Fix complete for r630-04"
|
|
echo ""
|
|
log_info "Next steps:"
|
|
log_info " 1. Verify cluster membership: ssh root@${R630_04_IP} 'pvecm status'"
|
|
log_info " 2. If not in cluster, join to cluster 'h'"
|
|
log_info " 3. Verify storage configuration"
|
|
echo ""
|