feat(it-ops): live inventory, drift API, Keycloak IT role, portal sync hint

- Add scripts/it-ops (Proxmox collector, IPAM drift, export orchestrator)
- Add sankofa-it-read-api stub with optional CORS and refresh
- Add systemd examples for read API, weekly inventory export, timer
- Add live-inventory-drift GitHub workflow (dispatch + weekly)
- Add IT controller spec, runbooks, Keycloak ensure-it-admin-role script
- Note IT_READ_API env on portal sync completion output

Made-with: Cursor
This commit is contained in:
defiQUG
2026-04-09 01:20:00 -07:00
parent 4eead3e53f
commit 61841b8291
14 changed files with 1384 additions and 0 deletions

View File

@@ -0,0 +1,120 @@
#!/usr/bin/env bash
# Create Keycloak realm role sankofa-it-admin if missing (IT operations portal /it gate).
# Runs Admin API against http://127.0.0.1:8080 inside the Keycloak CT (same pattern as
# keycloak-sankofa-ensure-client-redirects-via-proxmox-pct.sh).
#
# After the role exists, assign it to IT staff in Keycloak Admin (Users → Role mapping)
# or map it to a group and add a token mapper if you rely on group claims.
#
# Env: KEYCLOAK_ADMIN_PASSWORD in repo .env; optional KEYCLOAK_REALM (default master),
# KEYCLOAK_CT_VMID (7802), PROXMOX_HOST.
#
# Usage:
# ./scripts/deployment/keycloak-sankofa-ensure-it-admin-role.sh [--dry-run]
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
# shellcheck source=/dev/null
source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
if [ -f "$PROJECT_ROOT/.env" ]; then
set +u
set -a
# shellcheck source=/dev/null
source "$PROJECT_ROOT/.env" 2>/dev/null || true
set +a
set -u
fi
PROXMOX_HOST="${PROXMOX_HOST:-${PROXMOX_HOST_R630_01:-192.168.11.11}}"
KEYCLOAK_CT_VMID="${KEYCLOAK_CT_VMID:-${SANKOFA_KEYCLOAK_VMID:-7802}}"
REALM="${KEYCLOAK_REALM:-master}"
ADMIN_USER="${KEYCLOAK_ADMIN:-admin}"
ADMIN_PASS="${KEYCLOAK_ADMIN_PASSWORD:-}"
ROLE_NAME="${SANKOFA_IT_ADMIN_ROLE_NAME:-sankofa-it-admin}"
SSH_OPTS=(-o BatchMode=yes -o StrictHostKeyChecking=accept-new -o ConnectTimeout=15)
DRY=0
[[ "${1:-}" == "--dry-run" ]] && DRY=1
if [ -z "$ADMIN_PASS" ]; then
echo "KEYCLOAK_ADMIN_PASSWORD is not set in .env" >&2
exit 1
fi
if [ "$DRY" = 1 ]; then
echo "[dry-run] Would ssh root@${PROXMOX_HOST} pct exec ${KEYCLOAK_CT_VMID} -- python3 (ensure realm role ${ROLE_NAME} in realm ${REALM})"
exit 0
fi
ssh "${SSH_OPTS[@]}" "root@${PROXMOX_HOST}" \
"pct exec ${KEYCLOAK_CT_VMID} -- env KC_PASS=\"${ADMIN_PASS}\" ADMUSER=\"${ADMIN_USER}\" REALM=\"${REALM}\" ROLE_NAME=\"${ROLE_NAME}\" python3 -u -" <<'PY'
import json
import os
import urllib.error
import urllib.parse
import urllib.request
base = "http://127.0.0.1:8080"
realm = os.environ["REALM"]
role_name = os.environ["ROLE_NAME"]
admin_user = os.environ["ADMUSER"]
password = os.environ["KC_PASS"]
def post_form(url: str, data: dict) -> dict:
body = urllib.parse.urlencode(data).encode()
req = urllib.request.Request(url, data=body, method="POST")
with urllib.request.urlopen(req, timeout=60) as resp:
return json.loads(resp.read().decode())
tok = post_form(
f"{base}/realms/master/protocol/openid-connect/token",
{
"grant_type": "password",
"client_id": "admin-cli",
"username": admin_user,
"password": password,
},
)
access = tok.get("access_token")
if not access:
raise SystemExit(f"token failed: {tok}")
headers = {"Authorization": f"Bearer {access}"}
role_url = f"{base}/admin/realms/{realm}/roles/{urllib.parse.quote(role_name, safe='')}"
req_get = urllib.request.Request(role_url, headers=headers)
try:
with urllib.request.urlopen(req_get, timeout=60) as resp:
if resp.getcode() in (200, 204):
print(f"Realm role {role_name!r} already exists in {realm!r}.", flush=True)
raise SystemExit(0)
except urllib.error.HTTPError as e:
if e.code != 404:
err = e.read().decode() if e.fp else str(e)
raise SystemExit(f"GET role failed HTTP {e.code}: {err}") from e
payload = json.dumps(
{
"name": role_name,
"description": "Sankofa IT operations (portal /it, inventory read API consumers)",
"clientRole": False,
}
).encode()
req_post = urllib.request.Request(
f"{base}/admin/realms/{realm}/roles",
data=payload,
headers={**headers, "Content-Type": "application/json"},
method="POST",
)
try:
with urllib.request.urlopen(req_post, timeout=120) as resp:
if resp.getcode() not in (200, 201):
raise SystemExit(f"create role unexpected HTTP {resp.getcode()}")
except urllib.error.HTTPError as e:
err = e.read().decode() if e.fp else str(e)
raise SystemExit(f"POST role failed HTTP {e.code}: {err}") from e
print(f"Created realm role {role_name!r} in realm {realm!r}. Assign it to IT users in Admin Console.", flush=True)
PY

View File

@@ -106,5 +106,6 @@ echo "✅ Done. Verify:"
echo " curl -sS http://${IP_SANKOFA_PORTAL:-192.168.11.51}:3000/ | head -c 120"
echo " curl -sSI https://portal.sankofa.nexus/api/auth/signin | head -n 15"
echo " https://portal.sankofa.nexus/ (via NPM; corporate apex is sankofa.nexus → IP_SANKOFA_PUBLIC_WEB)"
echo " IT /it console: set IT_READ_API_URL (and optional IT_READ_API_KEY) in CT ${CT_APP_DIR}/.env — see portal/.env.example"
echo ""
echo "Legacy apex auth URL only if needed: SANKOFA_PORTAL_NEXTAUTH_URL=https://sankofa.nexus $0"

View File

@@ -0,0 +1,203 @@
#!/usr/bin/env python3
"""Merge live JSON with config/ip-addresses.conf; write live_inventory.json + drift.json."""
from __future__ import annotations
import argparse
import json
import re
import sys
from pathlib import Path
IPV4_RE = re.compile(
r"(?<![0-9.])(?:[0-9]{1,3}\.){3}[0-9]{1,3}(?![0-9.])"
)
# VMID | IP | ... (optional ** markdown bold around cells)
MD_VMID_IP_ROW = re.compile(
r"^\|\s*\*{0,2}(\d+)\*{0,2}\s*\|\s*\*{0,2}((?:[0-9]{1,3}\.){3}[0-9]{1,3})\*{0,2}\s*\|"
)
def is_lan_11(ip: str) -> bool:
return ip.startswith("192.168.11.")
def parse_all_vmids_markdown(path: Path) -> tuple[set[str], dict[str, str]]:
"""Extract declared LAN IPs and vmid->ip from ALL_VMIDS pipe tables."""
ips: set[str] = set()
vmid_to_ip: dict[str, str] = {}
if not path.is_file():
return ips, vmid_to_ip
for line in path.read_text(encoding="utf-8", errors="replace").splitlines():
m = MD_VMID_IP_ROW.match(line.strip())
if not m:
continue
vmid, ip = m.group(1), m.group(2)
if is_lan_11(ip):
ips.add(ip)
vmid_to_ip[vmid] = ip
return ips, vmid_to_ip
def parse_ip_addresses_conf(path: Path) -> tuple[dict[str, str], set[str]]:
var_map: dict[str, str] = {}
all_ips: set[str] = set()
if not path.is_file():
return var_map, all_ips
for line in path.read_text(encoding="utf-8", errors="replace").splitlines():
s = line.strip()
if not s or s.startswith("#") or "=" not in s:
continue
key, _, val = s.partition("=")
key = key.strip()
val = val.strip()
if val.startswith('"') and val.endswith('"'):
val = val[1:-1]
elif val.startswith("'") and val.endswith("'"):
val = val[1:-1]
var_map[key] = val
for m in IPV4_RE.findall(val):
all_ips.add(m)
return var_map, all_ips
def hypervisor_related_keys(var_map: dict[str, str]) -> set[str]:
keys = set()
for k in var_map:
ku = k.upper()
if any(
x in ku
for x in (
"PROXMOX_HOST",
"PROXMOX_ML110",
"PROXMOX_R630",
"PROXMOX_R750",
"WAN_AGGREGATOR",
"NETWORK_GATEWAY",
"UDM_PRO",
"PUBLIC_IP_GATEWAY",
"PUBLIC_IP_ER605",
)
):
keys.add(k)
return keys
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("--live", type=Path, help="live JSON file (default stdin)")
ap.add_argument(
"--ip-conf",
type=Path,
default=Path("config/ip-addresses.conf"),
help="path to ip-addresses.conf",
)
ap.add_argument("--out-dir", type=Path, required=True)
ap.add_argument(
"--all-vmids-md",
type=Path,
default=None,
help="optional ALL_VMIDS_ENDPOINTS.md for declared VMID/IP tables",
)
args = ap.parse_args()
if args.live:
live_raw = args.live.read_text(encoding="utf-8")
else:
live_raw = sys.stdin.read()
try:
live = json.loads(live_raw)
except json.JSONDecodeError as e:
print(f"Invalid live JSON: {e}", file=sys.stderr)
sys.exit(1)
guests = live.get("guests") or []
var_map, conf_ips = parse_ip_addresses_conf(args.ip_conf)
doc_ips: set[str] = set()
vmid_to_ip_doc: dict[str, str] = {}
if args.all_vmids_md:
doc_ips, vmid_to_ip_doc = parse_all_vmids_markdown(args.all_vmids_md)
declared_union = conf_ips | doc_ips
hyp_keys = hypervisor_related_keys(var_map)
hyp_ips: set[str] = set()
for k in hyp_keys:
if k not in var_map:
continue
for m in IPV4_RE.findall(var_map[k]):
hyp_ips.add(m)
ip_to_vmids: dict[str, list[str]] = {}
vmid_to_ip_live: dict[str, str] = {}
for g in guests:
ip = (g.get("ip") or "").strip()
vmid = str(g.get("vmid", "")).strip()
if ip:
ip_to_vmids.setdefault(ip, []).append(vmid or "?")
if vmid and ip:
vmid_to_ip_live[vmid] = ip
duplicate_ips = {ip: vms for ip, vms in ip_to_vmids.items() if len(vms) > 1}
guest_ip_set = set(ip_to_vmids.keys())
conf_only = sorted(conf_ips - guest_ip_set - hyp_ips)
live_only_legacy = sorted(guest_ip_set - conf_ips)
declared_lan11 = {ip for ip in declared_union if is_lan_11(ip)}
guest_lan11 = {ip for ip in guest_ip_set if is_lan_11(ip)}
guest_lan_not_declared = sorted(
guest_lan11 - declared_union - hyp_ips
)
declared_lan11_not_on_guests = sorted(
declared_lan11 - guest_ip_set - hyp_ips
)
vmid_ip_mismatch: list[dict[str, str]] = []
for vmid, doc_ip in vmid_to_ip_doc.items():
lip = vmid_to_ip_live.get(vmid)
if lip and doc_ip and lip != doc_ip:
vmid_ip_mismatch.append(
{"vmid": vmid, "live_ip": lip, "all_vmids_doc_ip": doc_ip}
)
drift = {
"collected_at": live.get("collected_at"),
"guest_count": len(guests),
"duplicate_ips": duplicate_ips,
"guest_ips_not_in_ip_addresses_conf": live_only_legacy,
"ip_addresses_conf_ips_not_on_guests": conf_only,
"guest_lan_ips_not_in_declared_sources": guest_lan_not_declared,
"declared_lan11_ips_not_on_live_guests": declared_lan11_not_on_guests,
"vmid_ip_mismatch_live_vs_all_vmids_doc": vmid_ip_mismatch,
"hypervisor_and_infra_ips_excluded_from_guest_match": sorted(hyp_ips),
"declared_sources": {
"ip_addresses_conf_ipv4_count": len(conf_ips),
"all_vmids_md_lan11_count": len(doc_ips),
},
"notes": [],
}
if live.get("error"):
drift["notes"].append(str(live["error"]))
inv_out = {
"collected_at": live.get("collected_at"),
"source": "proxmox_cluster_pvesh_plus_config",
"guests": guests,
}
args.out_dir.mkdir(parents=True, exist_ok=True)
(args.out_dir / "live_inventory.json").write_text(
json.dumps(inv_out, indent=2), encoding="utf-8"
)
(args.out_dir / "drift.json").write_text(
json.dumps(drift, indent=2), encoding="utf-8"
)
print(f"Wrote {args.out_dir / 'live_inventory.json'}")
print(f"Wrote {args.out_dir / 'drift.json'}")
# Exit 2 only for duplicate guest IPs (hard failure). VMID vs ALL_VMIDS doc
# mismatches are informational — documentation often lags live `pct set`.
sys.exit(2 if duplicate_ips else 0)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,51 @@
#!/usr/bin/env bash
# Live Proxmox guest inventory + drift vs config/ip-addresses.conf.
# Usage: bash scripts/it-ops/export-live-inventory-and-drift.sh
# Requires: SSH key root@SEED, python3 locally and on PVE.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
# shellcheck source=/dev/null
source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
SEED="${SEED_HOST:-${PROXMOX_HOST_R630_01:-192.168.11.11}}"
OUT_DIR="${OUT_DIR:-${PROJECT_ROOT}/reports/status}"
TS="$(date +%Y%m%d_%H%M%S)"
TMP="${TMPDIR:-/tmp}/live_inv_${TS}.json"
PY="${SCRIPT_DIR}/lib/collect_inventory_remote.py"
mkdir -p "$OUT_DIR"
stub_unreachable() {
python3 - <<'PY'
import json
from datetime import datetime, timezone
print(json.dumps({
"collected_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
"error": "seed_unreachable",
"guests": [],
}, indent=2))
PY
}
if ! ping -c1 -W2 "$SEED" >/dev/null 2>&1; then
stub_unreachable >"$TMP"
else
if ! ssh -o BatchMode=yes -o ConnectTimeout=15 -o StrictHostKeyChecking=no \
"root@${SEED}" "python3 -" <"$PY" >"$TMP" 2>/dev/null; then
stub_unreachable >"$TMP"
fi
fi
set +e
python3 "${SCRIPT_DIR}/compute_ipam_drift.py" --live "$TMP" \
--ip-conf "${PROJECT_ROOT}/config/ip-addresses.conf" \
--all-vmids-md "${PROJECT_ROOT}/docs/04-configuration/ALL_VMIDS_ENDPOINTS.md" \
--out-dir "$OUT_DIR"
DRIFT_RC=$?
set -e
cp -f "$OUT_DIR/live_inventory.json" "${OUT_DIR}/live_inventory_${TS}.json" 2>/dev/null || true
cp -f "$OUT_DIR/drift.json" "${OUT_DIR}/drift_${TS}.json" 2>/dev/null || true
rm -f "$TMP"
echo "Latest: ${OUT_DIR}/live_inventory.json , ${OUT_DIR}/drift.json"
exit "${DRIFT_RC}"

View File

@@ -0,0 +1,109 @@
#!/usr/bin/env python3
"""Run ON a Proxmox cluster node (as root). Stdout: JSON live guest inventory."""
from __future__ import annotations
import json
import re
import subprocess
import sys
from datetime import datetime, timezone
def _run(cmd: list[str]) -> str:
return subprocess.check_output(cmd, text=True, stderr=subprocess.DEVNULL)
def _extract_ip_from_net_line(line: str) -> str | None:
m = re.search(r"ip=([0-9.]+)", line)
return m.group(1) if m else None
def _read_config(path: str) -> str:
try:
with open(path, encoding="utf-8", errors="replace") as f:
return f.read()
except OSError:
return ""
def main() -> None:
collected_at = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
try:
raw = _run(
["pvesh", "get", "/cluster/resources", "--output-format", "json"]
)
resources = json.loads(raw)
except (subprocess.CalledProcessError, json.JSONDecodeError) as e:
json.dump(
{
"collected_at": collected_at,
"error": f"pvesh_cluster_resources_failed: {e}",
"guests": [],
},
sys.stdout,
indent=2,
)
return
guests: list[dict] = []
for r in resources:
t = r.get("type")
if t not in ("lxc", "qemu"):
continue
vmid = r.get("vmid")
node = r.get("node")
if vmid is None or not node:
continue
vmid_s = str(vmid)
name = r.get("name") or ""
status = r.get("status") or ""
if t == "lxc":
cfg_path = f"/etc/pve/nodes/{node}/lxc/{vmid_s}.conf"
else:
cfg_path = f"/etc/pve/nodes/{node}/qemu-server/{vmid_s}.conf"
body = _read_config(cfg_path)
ip = ""
for line in body.splitlines():
if line.startswith("net0:"):
got = _extract_ip_from_net_line(line)
if got:
ip = got
break
if not ip and t == "qemu":
for line in body.splitlines():
if line.startswith("ipconfig0:"):
got = _extract_ip_from_net_line(line)
if got:
ip = got
break
if not ip and t == "qemu":
for line in body.splitlines():
if line.startswith("net0:"):
got = _extract_ip_from_net_line(line)
if got:
ip = got
break
guests.append(
{
"vmid": vmid_s,
"type": t,
"node": str(node),
"name": name,
"status": status,
"ip": ip,
"config_path": cfg_path,
}
)
out = {
"collected_at": collected_at,
"guests": sorted(guests, key=lambda g: int(g["vmid"])),
}
json.dump(out, sys.stdout, indent=2)
if __name__ == "__main__":
main()