Files
proxmox/scripts/fix-r630-04-via-cluster.sh
defiQUG cb47cce074 Complete markdown files cleanup and organization
- Organized 252 files across project
- Root directory: 187 → 2 files (98.9% reduction)
- Moved configuration guides to docs/04-configuration/
- Moved troubleshooting guides to docs/09-troubleshooting/
- Moved quick start guides to docs/01-getting-started/
- Moved reports to reports/ directory
- Archived temporary files
- Generated comprehensive reports and documentation
- Created maintenance scripts and guides

All files organized according to established standards.
2026-01-06 01:46:25 -08:00

165 lines
5.5 KiB
Bash
Executable File

#!/bin/bash
# Fix r630-04 via cluster access (from ml110 or other cluster node)
# This script attempts to fix r630-04 by accessing it from within the cluster network
# Usage: ./scripts/fix-r630-04-via-cluster.sh
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; }
log_error() { echo -e "${RED}[✗]${NC} $1"; }
# Cluster node to use as jump host
JUMP_HOST="192.168.11.10"
JUMP_PASSWORD="L@kers2010"
R630_04_IP="192.168.11.14"
log_info "========================================="
log_info "Fixing r630-04 via Cluster Access"
log_info "========================================="
echo ""
# Test jump host connectivity
log_info "1. Testing jump host (${JUMP_HOST}) connectivity..."
if ping -c 2 -W 2 "$JUMP_HOST" >/dev/null 2>&1; then
log_success "Jump host is reachable"
else
log_error "Jump host is NOT reachable"
exit 1
fi
echo ""
# Test r630-04 from jump host
log_info "2. Testing r630-04 connectivity from jump host..."
R630_04_REACHABLE=$(sshpass -p "$JUMP_PASSWORD" ssh -o StrictHostKeyChecking=no root@"$JUMP_HOST" "ping -c 2 -W 2 $R630_04_IP >/dev/null 2>&1 && echo 'yes' || echo 'no'" 2>/dev/null || echo "no")
if [[ "$R630_04_REACHABLE" == "yes" ]]; then
log_success "r630-04 is reachable from cluster network"
else
log_error "r630-04 is NOT reachable from cluster network"
log_info "This suggests a network issue on r630-04"
exit 1
fi
echo ""
# Try to access r630-04 via jump host
log_info "3. Attempting to access r630-04 via jump host..."
echo ""
# Try different passwords from jump host
PASSWORDS=("L@kers2010" "password" "L@kers2010!" "L@kers2010@" "L@kers2010#")
WORKING_PASSWORD=""
for pwd in "${PASSWORDS[@]}"; do
log_info "Trying password: ${pwd:0:3}***"
if sshpass -p "$JUMP_PASSWORD" ssh -o StrictHostKeyChecking=no root@"$JUMP_HOST" "sshpass -p '$pwd' ssh -o StrictHostKeyChecking=no -o ConnectTimeout=3 root@$R630_04_IP 'echo SSH_OK' 2>/dev/null" 2>/dev/null; then
WORKING_PASSWORD="$pwd"
log_success "Found working password!"
break
fi
done
if [[ -z "$WORKING_PASSWORD" ]]; then
log_error "Could not find working password"
log_info "r630-04 requires console access to reset password"
log_info "See: R630-04-CONSOLE-ACCESS-GUIDE.md"
exit 1
fi
log_info "4. Running fix script on r630-04 via jump host..."
echo ""
# Execute fix script remotely via jump host
sshpass -p "$JUMP_PASSWORD" ssh -o StrictHostKeyChecking=no root@"$JUMP_HOST" bash <<ENDSSH
sshpass -p "$WORKING_PASSWORD" ssh -o StrictHostKeyChecking=no root@$R630_04_IP bash <<'R630_04_FIX'
set -e
echo "=== Fixing r630-04 ==="
echo ""
HOSTNAME=\$(hostname)
IP=\$(hostname -I | awk '{print \$1}')
echo "=== Step 1: Fixing /etc/hosts ==="
cp /etc/hosts /etc/hosts.backup.\$(date +%Y%m%d_%H%M%S)
sed -i "/^\${IP}/d" /etc/hosts
sed -i "/\${HOSTNAME}/d" /etc/hosts
sed -i "/r630-04/d" /etc/hosts
echo "\${IP} \${HOSTNAME} \${HOSTNAME}.sankofa.nexus r630-04 r630-04.sankofa.nexus" >> /etc/hosts
echo "Updated /etc/hosts"
echo ""
echo "=== Step 2: Stopping Proxmox services ==="
systemctl stop pveproxy pvedaemon pvestatd pve-cluster 2>/dev/null || true
sleep 2
echo ""
echo "=== Step 3: Starting pve-cluster service ==="
systemctl start pve-cluster || {
systemctl stop pve-cluster 2>/dev/null || true
killall -9 pmxcfs 2>/dev/null || true
sleep 2
systemctl start pve-cluster || echo "pve-cluster may need manual intervention"
}
sleep 3
echo ""
echo "=== Step 4: Regenerating certificates ==="
pvecm updatecerts --force 2>&1 || echo "Certificate regeneration failed (may be expected)"
echo ""
echo "=== Step 5: Starting Proxmox services ==="
systemctl start pvestatd || echo "pvestatd failed"
sleep 1
systemctl start pvedaemon || echo "pvedaemon failed"
sleep 1
systemctl start pveproxy || echo "pveproxy failed"
sleep 3
echo ""
echo "=== Step 6: Service status ==="
for service in pve-cluster pvestatd pvedaemon pveproxy; do
if systemctl is-active --quiet \$service 2>/dev/null; then
echo "✓ \$service: Active"
else
echo "✗ \$service: Inactive"
fi
done
echo ""
echo "=== Step 7: Port 8006 status ==="
ss -tlnp | grep 8006 || echo "Port 8006 not listening"
echo ""
echo "=== Step 8: Web interface test ==="
curl -k -s -o /dev/null -w "HTTP Status: %{http_code}\n" https://localhost:8006/ 2>&1 || echo "Web interface test failed"
echo ""
echo "=== Step 9: Cluster status ==="
if command -v pvecm >/dev/null 2>&1; then
pvecm status 2>&1 | head -15 || echo "Not in cluster"
fi
echo ""
R630_04_FIX
ENDSSH
echo ""
log_success "Fix attempt complete for r630-04"
echo ""
log_info "Verification:"
log_info " - Check service status above"
log_info " - Test web interface: https://${R630_04_IP}:8006"
log_info " - Verify cluster membership if applicable"
echo ""