- Organized 252 files across project - Root directory: 187 → 2 files (98.9% reduction) - Moved configuration guides to docs/04-configuration/ - Moved troubleshooting guides to docs/09-troubleshooting/ - Moved quick start guides to docs/01-getting-started/ - Moved reports to reports/ directory - Archived temporary files - Generated comprehensive reports and documentation - Created maintenance scripts and guides All files organized according to established standards.
165 lines
5.5 KiB
Bash
Executable File
165 lines
5.5 KiB
Bash
Executable File
#!/bin/bash
|
|
# Fix r630-04 via cluster access (from ml110 or other cluster node)
|
|
# This script attempts to fix r630-04 by accessing it from within the cluster network
|
|
# Usage: ./scripts/fix-r630-04-via-cluster.sh
|
|
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
|
|
# Colors
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
BLUE='\033[0;34m'
|
|
NC='\033[0m'
|
|
|
|
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
|
|
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
|
|
log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; }
|
|
log_error() { echo -e "${RED}[✗]${NC} $1"; }
|
|
|
|
# Cluster node to use as jump host
|
|
JUMP_HOST="192.168.11.10"
|
|
JUMP_PASSWORD="L@kers2010"
|
|
R630_04_IP="192.168.11.14"
|
|
|
|
log_info "========================================="
|
|
log_info "Fixing r630-04 via Cluster Access"
|
|
log_info "========================================="
|
|
echo ""
|
|
|
|
# Test jump host connectivity
|
|
log_info "1. Testing jump host (${JUMP_HOST}) connectivity..."
|
|
if ping -c 2 -W 2 "$JUMP_HOST" >/dev/null 2>&1; then
|
|
log_success "Jump host is reachable"
|
|
else
|
|
log_error "Jump host is NOT reachable"
|
|
exit 1
|
|
fi
|
|
echo ""
|
|
|
|
# Test r630-04 from jump host
|
|
log_info "2. Testing r630-04 connectivity from jump host..."
|
|
R630_04_REACHABLE=$(sshpass -p "$JUMP_PASSWORD" ssh -o StrictHostKeyChecking=no root@"$JUMP_HOST" "ping -c 2 -W 2 $R630_04_IP >/dev/null 2>&1 && echo 'yes' || echo 'no'" 2>/dev/null || echo "no")
|
|
|
|
if [[ "$R630_04_REACHABLE" == "yes" ]]; then
|
|
log_success "r630-04 is reachable from cluster network"
|
|
else
|
|
log_error "r630-04 is NOT reachable from cluster network"
|
|
log_info "This suggests a network issue on r630-04"
|
|
exit 1
|
|
fi
|
|
echo ""
|
|
|
|
# Try to access r630-04 via jump host
|
|
log_info "3. Attempting to access r630-04 via jump host..."
|
|
echo ""
|
|
|
|
# Try different passwords from jump host
|
|
PASSWORDS=("L@kers2010" "password" "L@kers2010!" "L@kers2010@" "L@kers2010#")
|
|
|
|
WORKING_PASSWORD=""
|
|
for pwd in "${PASSWORDS[@]}"; do
|
|
log_info "Trying password: ${pwd:0:3}***"
|
|
if sshpass -p "$JUMP_PASSWORD" ssh -o StrictHostKeyChecking=no root@"$JUMP_HOST" "sshpass -p '$pwd' ssh -o StrictHostKeyChecking=no -o ConnectTimeout=3 root@$R630_04_IP 'echo SSH_OK' 2>/dev/null" 2>/dev/null; then
|
|
WORKING_PASSWORD="$pwd"
|
|
log_success "Found working password!"
|
|
break
|
|
fi
|
|
done
|
|
|
|
if [[ -z "$WORKING_PASSWORD" ]]; then
|
|
log_error "Could not find working password"
|
|
log_info "r630-04 requires console access to reset password"
|
|
log_info "See: R630-04-CONSOLE-ACCESS-GUIDE.md"
|
|
exit 1
|
|
fi
|
|
|
|
log_info "4. Running fix script on r630-04 via jump host..."
|
|
echo ""
|
|
|
|
# Execute fix script remotely via jump host
|
|
sshpass -p "$JUMP_PASSWORD" ssh -o StrictHostKeyChecking=no root@"$JUMP_HOST" bash <<ENDSSH
|
|
sshpass -p "$WORKING_PASSWORD" ssh -o StrictHostKeyChecking=no root@$R630_04_IP bash <<'R630_04_FIX'
|
|
set -e
|
|
|
|
echo "=== Fixing r630-04 ==="
|
|
echo ""
|
|
|
|
HOSTNAME=\$(hostname)
|
|
IP=\$(hostname -I | awk '{print \$1}')
|
|
|
|
echo "=== Step 1: Fixing /etc/hosts ==="
|
|
cp /etc/hosts /etc/hosts.backup.\$(date +%Y%m%d_%H%M%S)
|
|
sed -i "/^\${IP}/d" /etc/hosts
|
|
sed -i "/\${HOSTNAME}/d" /etc/hosts
|
|
sed -i "/r630-04/d" /etc/hosts
|
|
echo "\${IP} \${HOSTNAME} \${HOSTNAME}.sankofa.nexus r630-04 r630-04.sankofa.nexus" >> /etc/hosts
|
|
echo "Updated /etc/hosts"
|
|
echo ""
|
|
|
|
echo "=== Step 2: Stopping Proxmox services ==="
|
|
systemctl stop pveproxy pvedaemon pvestatd pve-cluster 2>/dev/null || true
|
|
sleep 2
|
|
echo ""
|
|
|
|
echo "=== Step 3: Starting pve-cluster service ==="
|
|
systemctl start pve-cluster || {
|
|
systemctl stop pve-cluster 2>/dev/null || true
|
|
killall -9 pmxcfs 2>/dev/null || true
|
|
sleep 2
|
|
systemctl start pve-cluster || echo "pve-cluster may need manual intervention"
|
|
}
|
|
sleep 3
|
|
echo ""
|
|
|
|
echo "=== Step 4: Regenerating certificates ==="
|
|
pvecm updatecerts --force 2>&1 || echo "Certificate regeneration failed (may be expected)"
|
|
echo ""
|
|
|
|
echo "=== Step 5: Starting Proxmox services ==="
|
|
systemctl start pvestatd || echo "pvestatd failed"
|
|
sleep 1
|
|
systemctl start pvedaemon || echo "pvedaemon failed"
|
|
sleep 1
|
|
systemctl start pveproxy || echo "pveproxy failed"
|
|
sleep 3
|
|
echo ""
|
|
|
|
echo "=== Step 6: Service status ==="
|
|
for service in pve-cluster pvestatd pvedaemon pveproxy; do
|
|
if systemctl is-active --quiet \$service 2>/dev/null; then
|
|
echo "✓ \$service: Active"
|
|
else
|
|
echo "✗ \$service: Inactive"
|
|
fi
|
|
done
|
|
echo ""
|
|
|
|
echo "=== Step 7: Port 8006 status ==="
|
|
ss -tlnp | grep 8006 || echo "Port 8006 not listening"
|
|
echo ""
|
|
|
|
echo "=== Step 8: Web interface test ==="
|
|
curl -k -s -o /dev/null -w "HTTP Status: %{http_code}\n" https://localhost:8006/ 2>&1 || echo "Web interface test failed"
|
|
echo ""
|
|
|
|
echo "=== Step 9: Cluster status ==="
|
|
if command -v pvecm >/dev/null 2>&1; then
|
|
pvecm status 2>&1 | head -15 || echo "Not in cluster"
|
|
fi
|
|
echo ""
|
|
R630_04_FIX
|
|
ENDSSH
|
|
|
|
echo ""
|
|
log_success "Fix attempt complete for r630-04"
|
|
echo ""
|
|
log_info "Verification:"
|
|
log_info " - Check service status above"
|
|
log_info " - Test web interface: https://${R630_04_IP}:8006"
|
|
log_info " - Verify cluster membership if applicable"
|
|
echo ""
|