Files
loc_az_hci/scripts/deploy/complete-all-next-steps.sh
defiQUG c39465c2bd
Some checks failed
Test / test (push) Has been cancelled
Initial commit: loc_az_hci (smom-dbis-138 excluded via .gitignore)
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-08 09:04:46 -08:00

286 lines
8.4 KiB
Bash
Executable File

#!/bin/bash
source ~/.bashrc
# Master Orchestration Script - Complete All Next Steps
# Executes all deployment steps in recommended order
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
# Load environment variables
if [ -f "$PROJECT_ROOT/.env" ]; then
set -a
source <(grep -v '^#' "$PROJECT_ROOT/.env" | grep -v '^$' | sed 's/#.*$//' | grep '=')
set +a
fi
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
log_info() {
echo -e "${GREEN}[INFO]${NC} $1"
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
log_step() {
echo ""
echo -e "${BLUE}========================================${NC}"
echo -e "${BLUE}$1${NC}"
echo -e "${BLUE}========================================${NC}"
echo ""
}
SSH_KEY="${SSH_KEY:-$HOME/.ssh/id_ed25519_proxmox}"
# Check prerequisites
check_prerequisites() {
log_step "Checking Prerequisites"
if [ ! -f "$SSH_KEY" ]; then
log_error "SSH key not found: $SSH_KEY"
exit 1
fi
if [ ! -f "$PROJECT_ROOT/scripts/lib/proxmox_vm_helpers.sh" ]; then
log_error "Helper library not found"
exit 1
fi
log_info "Prerequisites check passed"
}
# Step 1: Manual SSH Fix
step1_ssh_fix() {
log_step "Step 1: Fix SSH Access to VMs (MANUAL)"
log_warn "This step requires manual intervention via Proxmox Console"
echo ""
log_info "Running SSH fix instructions script..."
"$PROJECT_ROOT/scripts/fix/fix-vm-ssh-via-console.sh"
echo ""
log_info "After fixing SSH manually, press Enter to continue..."
read -r
# Test SSH access
log_info "Testing SSH access..."
local all_ok=true
for ip in 192.168.1.60 192.168.1.188 192.168.1.121 192.168.1.82; do
if ssh -i "$SSH_KEY" -o ConnectTimeout=5 -o StrictHostKeyChecking=no ubuntu@$ip "echo 'SSH OK'" &>/dev/null; then
log_info " $ip: ✓ SSH working"
else
log_error " $ip: ✗ SSH not working"
all_ok=false
fi
done
if [ "$all_ok" = false ]; then
log_error "SSH access not working for all VMs. Please fix SSH access first."
exit 1
fi
log_info "✓ SSH access verified for all VMs"
}
# Step 2: Install QEMU Guest Agent
step2_install_qga() {
log_step "Step 2: Install QEMU Guest Agent"
if [ ! -f "$PROJECT_ROOT/scripts/infrastructure/install-qemu-guest-agent.sh" ]; then
log_error "QGA installation script not found"
return 1
fi
"$PROJECT_ROOT/scripts/infrastructure/install-qemu-guest-agent.sh"
log_info "✓ QEMU Guest Agent installation complete"
}
# Step 3: Deploy Services
step3_deploy_services() {
log_step "Step 3: Deploy Services"
# 3.1 Deploy Gitea
log_info "3.1 Deploying Gitea (VM 102)..."
if [ -f "$PROJECT_ROOT/scripts/deploy/deploy-gitea.sh" ]; then
"$PROJECT_ROOT/scripts/deploy/deploy-gitea.sh"
else
log_warn "Gitea deployment script not found, skipping"
fi
echo ""
# 3.2 Deploy Observability
log_info "3.2 Deploying Observability Stack (VM 103)..."
if [ -f "$PROJECT_ROOT/scripts/deploy/deploy-observability.sh" ]; then
"$PROJECT_ROOT/scripts/deploy/deploy-observability.sh"
else
log_warn "Observability deployment script not found, skipping"
fi
echo ""
# 3.3 Verify K3s
log_info "3.3 Verifying K3s (VM 101)..."
source "$PROJECT_ROOT/scripts/lib/proxmox_vm_helpers.sh"
local k3s_ip
k3s_ip="$(get_vm_ip_or_warn 101 "k3s-master" || true)"
if [[ -n "$k3s_ip" ]]; then
if ssh -i "$SSH_KEY" -o StrictHostKeyChecking=no ubuntu@$k3s_ip "sudo kubectl get nodes" &>/dev/null; then
log_info "✓ K3s is running"
ssh -i "$SSH_KEY" -o StrictHostKeyChecking=no ubuntu@$k3s_ip "sudo kubectl get nodes"
else
log_warn "K3s may not be fully configured"
fi
fi
log_info "✓ Service deployment complete"
}
# Step 4: Join R630 to Cluster
step4_join_r630() {
log_step "Step 4: Join R630 to Cluster"
log_info "Checking SSH access to R630..."
if ssh -i "$SSH_KEY" -o ConnectTimeout=5 root@192.168.1.49 "echo 'SSH OK'" &>/dev/null; then
log_info "✓ SSH to R630 is working"
log_info "Joining R630 to cluster..."
ssh -i "$SSH_KEY" root@192.168.1.49 <<EOF
cd /home/intlc/projects/loc_az_hci
export CLUSTER_NAME=hc-cluster
export NODE_ROLE=join
export CLUSTER_NODE_IP=192.168.1.206
export ROOT_PASSWORD=${PVE_ROOT_PASS:-}
./infrastructure/proxmox/cluster-setup.sh
EOF
log_info "Verifying cluster status..."
ssh -i "$SSH_KEY" root@192.168.1.49 "pvecm status"
log_info "✓ R630 joined to cluster"
else
log_warn "SSH to R630 not working. Please:"
log_info " 1. Enable SSH on R630: https://192.168.1.49:8006 → System → Services → ssh"
log_info " 2. Add SSH key: ssh-copy-id -i $SSH_KEY.pub root@192.168.1.49"
log_info " 3. Re-run this script"
fi
}
# Step 5: Configure NFS Storage
step5_configure_nfs() {
log_step "Step 5: Configure NFS Storage"
local nfs_server="${NFS_SERVER:-10.10.10.1}"
log_info "Checking NFS server reachability: $nfs_server"
if ping -c 1 -W 2 "$nfs_server" &>/dev/null; then
log_info "✓ NFS server is reachable"
# Configure on ML110
log_info "Configuring NFS on ML110..."
ssh -i "$SSH_KEY" root@192.168.1.206 <<EOF
cd /home/intlc/projects/loc_az_hci
export NFS_SERVER=$nfs_server
export NFS_PATH=/mnt/storage
export STORAGE_NAME=router-storage
./infrastructure/proxmox/nfs-storage.sh
EOF
# Configure on R630 (if SSH working)
if ssh -i "$SSH_KEY" -o ConnectTimeout=5 root@192.168.1.49 "echo 'SSH OK'" &>/dev/null; then
log_info "Configuring NFS on R630..."
ssh -i "$SSH_KEY" root@192.168.1.49 <<EOF
cd /home/intlc/projects/loc_az_hci
export NFS_SERVER=$nfs_server
export NFS_PATH=/mnt/storage
export STORAGE_NAME=router-storage
./infrastructure/proxmox/nfs-storage.sh
EOF
fi
log_info "Verifying NFS storage..."
ssh -i "$SSH_KEY" root@192.168.1.206 "pvesm status | grep router-storage || echo 'NFS storage not found'"
log_info "✓ NFS storage configured"
else
log_warn "NFS server ($nfs_server) is not reachable. Skipping NFS configuration."
fi
}
# Step 6: Configure VLAN Bridges on R630
step6_configure_vlans() {
log_step "Step 6: Configure VLAN Bridges on R630"
if ssh -i "$SSH_KEY" -o ConnectTimeout=5 root@192.168.1.49 "echo 'SSH OK'" &>/dev/null; then
log_info "Configuring VLAN bridges on R630..."
ssh -i "$SSH_KEY" root@192.168.1.49 <<EOF
cd /home/intlc/projects/loc_az_hci
./infrastructure/network/configure-proxmox-vlans.sh
systemctl restart networking
EOF
log_info "Verifying VLAN bridges..."
ssh -i "$SSH_KEY" root@192.168.1.49 "ip addr show | grep -E 'vmbr[0-9]+'"
log_info "✓ VLAN bridges configured"
else
log_warn "SSH to R630 not working. Skipping VLAN configuration."
fi
}
# Final status report
final_status() {
log_step "Final Status Report"
log_info "Checking cluster status..."
ssh -i "$SSH_KEY" root@192.168.1.206 "pvecm status" 2>/dev/null || log_warn "Could not get cluster status"
echo ""
log_info "Checking VM status..."
source "$PROJECT_ROOT/scripts/lib/proxmox_vm_helpers.sh"
for vmid in 100 101 102 103; do
local ip
ip="$(get_vm_ip_from_guest_agent "$vmid" 2>/dev/null || true)"
if [[ -n "$ip" ]]; then
log_info " VM $vmid: ✓ Running (IP: $ip)"
else
log_warn " VM $vmid: Could not get IP"
fi
done
echo ""
log_info "Service URLs:"
log_info " Gitea: http://192.168.1.121:3000"
log_info " Prometheus: http://192.168.1.82:9090"
log_info " Grafana: http://192.168.1.82:3000 (admin/admin)"
echo ""
log_info "✓ Deployment complete!"
log_info "Next steps: Configure services (Gitea, Grafana, Cloudflare Tunnel)"
}
main() {
log_step "Complete Deployment - All Next Steps"
check_prerequisites
step1_ssh_fix
step2_install_qga
step3_deploy_services
step4_join_r630
step5_configure_nfs
step6_configure_vlans
final_status
}
main "$@"