Files
proxmox/scripts/maintenance/fix-keycloak-relay-via-ssh.sh
defiQUG dbd517b279 Sync workspace: config, docs, scripts, CI, operator rules, and submodule pointers.
- Update dbis_core, cross-chain-pmm-lps, explorer-monorepo, metamask-integration, pr-workspace/chains
- Omit embedded publish git dirs and empty placeholders from index

Made-with: Cursor
2026-04-12 06:12:20 -07:00

119 lines
4.8 KiB
Bash

#!/usr/bin/env bash
# Repair keycloak.sankofa.nexus after duplicate-IP / stale-neighbor regressions.
# Current durable path is the direct upstream:
# keycloak.sankofa.nexus -> 192.168.11.52:8080
#
# This script:
# 1. Removes the stray 192.168.11.52 alias from CT 10232 if present
# 2. Removes the guest-side reboot job that reintroduces the bad alias
# 3. Flushes stale neighbor state in the primary NPMplus CT
# 4. Forces NPMplus proxy host 60 back to 192.168.11.52:8080
# 5. Disables temporary relay services if they exist
#
# Usage: ./scripts/maintenance/fix-keycloak-relay-via-ssh.sh [--dry-run]
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
[[ -f "${PROJECT_ROOT}/scripts/lib/load-project-env.sh" ]] && source "${PROJECT_ROOT}/scripts/lib/load-project-env.sh" 2>/dev/null || true
DRY_RUN=false
[[ "${1:-}" == "--dry-run" ]] && DRY_RUN=true
PROXMOX_HOST="${PROXMOX_HOST_R630_01:-192.168.11.11}"
KEYCLOAK_IP="${IP_KEYCLOAK:-192.168.11.52}"
NPM_CID="${NPMPLUS_PRIMARY_VMID:-10233}"
CONFLICT_CID="${KEYCLOAK_CONFLICT_VMID:-10232}"
PROXY_HOST_ID="${KEYCLOAK_NPM_PROXY_HOST_ID:-60}"
log_info() { echo -e "\033[0;34m[INFO]\033[0m $1"; }
log_ok() { echo -e "\033[0;32m[✓]\033[0m $1"; }
log_warn() { echo -e "\033[0;33m[⚠]\033[0m $1"; }
run_ssh() { ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no root@"$PROXMOX_HOST" "$@"; }
if [[ "$DRY_RUN" == true ]]; then
echo ""
echo "=== Fix Keycloak direct routing via SSH ==="
echo " Host: $PROXMOX_HOST dry-run=true"
echo ""
log_info "Would remove stray ${KEYCLOAK_IP}/24 from CT ${CONFLICT_CID}"
log_info "Would remove CT ${CONFLICT_CID} reboot hooks that re-add ${KEYCLOAK_IP}/24"
log_info "Would flush neighbor cache for ${KEYCLOAK_IP} in NPMplus CT ${NPM_CID}"
log_info "Would update NPMplus proxy host ${PROXY_HOST_ID} to ${KEYCLOAK_IP}:8080"
log_info "Would disable temporary keycloak relay services if present"
echo ""
exit 0
fi
echo ""
echo "=== Fix Keycloak direct routing via SSH ==="
echo " Host: $PROXMOX_HOST dry-run=false"
echo ""
log_info "Removing any stray ${KEYCLOAK_IP}/24 alias from CT ${CONFLICT_CID}"
run_ssh "pct exec ${CONFLICT_CID} -- bash --norc -c '
ip addr del ${KEYCLOAK_IP}/24 dev eth0 2>/dev/null || true
ip -br addr
'"
log_ok "Conflict CT ${CONFLICT_CID} no longer carries ${KEYCLOAK_IP}"
log_info "Removing guest-side reboot hooks that reintroduce ${KEYCLOAK_IP}/24 in CT ${CONFLICT_CID}"
run_ssh "pct exec ${CONFLICT_CID} -- bash --norc -c '
set -e
CRON_TMP=\$(mktemp)
if crontab -l >/tmp/keycloak-crontab.current 2>/dev/null; then
grep -vF \"/usr/local/bin/configure-network.sh\" /tmp/keycloak-crontab.current >\"\$CRON_TMP\" || true
crontab \"\$CRON_TMP\"
else
: >\"\$CRON_TMP\"
fi
rm -f /tmp/keycloak-crontab.current \"\$CRON_TMP\"
if [[ -f /usr/local/bin/configure-network.sh ]]; then
cp /usr/local/bin/configure-network.sh /usr/local/bin/configure-network.sh.bak.\$(date +%Y%m%d%H%M%S)
cat > /usr/local/bin/configure-network.sh <<\"EOF\"
#!/bin/bash
set -euo pipefail
ip link set eth0 up 2>/dev/null || true
ip addr del ${KEYCLOAK_IP}/24 dev eth0 2>/dev/null || true
ip addr flush dev eth0 scope global 2>/dev/null || true
ip addr add 192.168.11.56/24 dev eth0
ip route replace default via 192.168.11.11 dev eth0
EOF
chmod 0755 /usr/local/bin/configure-network.sh
fi
ip addr del ${KEYCLOAK_IP}/24 dev eth0 2>/dev/null || true
ip route del default via 192.168.11.1 dev eth0 2>/dev/null || true
ip route replace default via 192.168.11.11 dev eth0
ip -br addr show dev eth0
ip route show default
crontab -l 2>/dev/null || true
'"
log_ok "Conflict CT ${CONFLICT_CID} no longer re-adds ${KEYCLOAK_IP} on reboot"
log_info "Disabling temporary relay services"
run_ssh "bash --norc -c '
systemctl disable --now keycloak-host-relay.service 2>/dev/null || true
pct exec 7802 -- systemctl disable --now keycloak-ct-relay.service 2>/dev/null || true
pct exec 7804 -- pkill -f /tmp/keycloak_gov_relay.py 2>/dev/null || true
'"
log_ok "Temporary relays disabled"
log_info "Flushing neighbor state for ${KEYCLOAK_IP} in NPMplus CT ${NPM_CID}"
run_ssh "pct exec ${NPM_CID} -- bash --norc -c '
ip neigh del ${KEYCLOAK_IP} dev eth0 2>/dev/null || true
curl -s -o /dev/null -w \"%{http_code} %{redirect_url}\n\" -H \"Host: keycloak.sankofa.nexus\" http://${KEYCLOAK_IP}:8080/
'"
log_ok "Direct Keycloak upstream responds from NPMplus CT ${NPM_CID}"
log_info "Re-applying canonical NPMplus proxy host mapping for Keycloak"
bash "${PROJECT_ROOT}/scripts/nginx-proxy-manager/update-npmplus-proxy-hosts-api.sh" >/tmp/keycloak-npmplus-sync.log 2>&1
run_ssh "pct exec ${NPM_CID} -- bash --norc -c '
curl -k -I -s -H \"Host: keycloak.sankofa.nexus\" https://127.0.0.1 | sed -n \"1,10p\"
'"
log_ok "NPMplus proxy host ${PROXY_HOST_ID} restored to direct upstream"
echo ""